diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,87810 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 14082, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0010651896037494673, + "grad_norm": 127.65389567139255, + "learning_rate": 4.2553191489361695e-09, + "loss": 0.5528, + "step": 5, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.ded": 1.0, + "success_rate.epoch.env.logic": 1.0, + "success_rate.epoch.env.math": 1.0, + "success_rate.epoch.env.science": 0.5, + "success_rate.epoch.env_macro_mean": 0.9, + "success_rate.epoch.global": 0.9, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.9, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9982850609756098, + "tokens_p.mean_in_band": 0.67109375, + "tokens_rate.above_band": 0.9949443882709808, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.005055611729019211 + }, + { + "epoch": 0.0021303792074989347, + "grad_norm": 182.45480532526113, + "learning_rate": 9.574468085106382e-09, + "loss": 0.839, + "step": 10, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.ded": 1.0, + "success_rate.epoch.env.logic": 0.75, + "success_rate.epoch.env.math": 0.8333333333333334, + "success_rate.epoch.env.science": 0.3333333333333333, + "success_rate.epoch.env_macro_mean": 0.8194444444444443, + "success_rate.epoch.global": 0.8, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 0.75, + "success_rate.window.env.science": 0.0, + "success_rate.window.env_macro_mean": 0.55, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9984365348980853, + "tokens_p.mean_in_band": 0.5885416666666666, + "tokens_rate.above_band": 0.9729567307692307, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.027043269230769232 + }, + { + "epoch": 0.0031955688112484024, + "grad_norm": 143.2525098677747, + "learning_rate": 1.4893617021276594e-08, + "loss": 1.1298, + "step": 15, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.ded": 1.0, + "success_rate.epoch.env.logic": 0.75, + "success_rate.epoch.env.math": 0.9, + "success_rate.epoch.env.science": 0.25, + "success_rate.epoch.env_macro_mean": 0.8428571428571429, + "success_rate.epoch.global": 0.7333333333333333, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.2, + "success_rate.window.env_macro_mean": 0.7333333333333334, + "success_rate.window.global": 0.6, + "tokens_p.mean_above_band": 0.9891447368421052, + "tokens_p.mean_below_band": 3.841705620288849e-09, + "tokens_p.mean_in_band": 0.3806046195652174, + "tokens_rate.above_band": 0.7983193277310925, + "tokens_rate.below_band": 0.008403361344537815, + "tokens_rate.in_band": 0.19327731092436976 + }, + { + "epoch": 0.004260758414997869, + "grad_norm": 102.603002394448, + "learning_rate": 2.0212765957446807e-08, + "loss": 0.867, + "step": 20, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.ded": 1.0, + "success_rate.epoch.env.logic": 0.6, + "success_rate.epoch.env.math": 0.8461538461538461, + "success_rate.epoch.env.science": 0.36363636363636365, + "success_rate.epoch.env_macro_mean": 0.82997002997003, + "success_rate.epoch.global": 0.725, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 0.6666666666666666, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.6666666666666666, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9979813664596273, + "tokens_p.mean_in_band": 0.5777698863636364, + "tokens_rate.above_band": 0.973397823458283, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.026602176541717048 + }, + { + "epoch": 0.005325948018747337, + "grad_norm": 134.87519657802602, + "learning_rate": 2.553191489361702e-08, + "loss": 0.8741, + "step": 25, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.ded": 1.0, + "success_rate.epoch.env.logic": 0.5, + "success_rate.epoch.env.math": 0.8, + "success_rate.epoch.env.science": 0.5, + "success_rate.epoch.env_macro_mean": 0.8285714285714285, + "success_rate.epoch.global": 0.7, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 0.5, + "success_rate.window.env.science": 0.7142857142857143, + "success_rate.window.env_macro_mean": 0.4047619047619048, + "success_rate.window.global": 0.6, + "tokens_p.mean_above_band": 0.9990748355263158, + "tokens_p.mean_in_band": 0.5151827830188679, + "tokens_rate.above_band": 0.9198184568835098, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.08018154311649017 + }, + { + "epoch": 0.006391137622496805, + "grad_norm": 146.47774240723183, + "learning_rate": 3.085106382978723e-08, + "loss": 0.7981, + "step": 30, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.ded": 1.0, + "success_rate.epoch.env.logic": 0.5, + "success_rate.epoch.env.math": 0.8125, + "success_rate.epoch.env.science": 0.48, + "success_rate.epoch.env_macro_mean": 0.8490625, + "success_rate.epoch.global": 0.6833333333333333, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.42857142857142855, + "success_rate.window.env_macro_mean": 0.8571428571428572, + "success_rate.window.global": 0.6, + "tokens_p.mean_above_band": 0.9987949871465296, + "tokens_p.mean_in_band": 0.38421875, + "tokens_rate.above_band": 0.9396135265700483, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.06038647342995169 + }, + { + "epoch": 0.007456327226246272, + "grad_norm": 164.69253341969946, + "learning_rate": 3.617021276595745e-08, + "loss": 0.7385, + "step": 35, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.ded": 1.0, + "success_rate.epoch.env.logic": 0.5, + "success_rate.epoch.env.math": 0.8421052631578947, + "success_rate.epoch.env.science": 0.5172413793103449, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8732607380520265, + "success_rate.epoch.global": 0.7142857142857143, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9988586956521739, + "tokens_p.mean_in_band": 0.671875, + "tokens_rate.above_band": 0.979557069846678, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.020442930153321975 + }, + { + "epoch": 0.008521516829995739, + "grad_norm": 123.14828386632749, + "learning_rate": 4.1489361702127654e-08, + "loss": 0.7603, + "step": 40, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.ded": 1.0, + "success_rate.epoch.env.logic": 0.42857142857142855, + "success_rate.epoch.env.math": 0.8333333333333334, + "success_rate.epoch.env.science": 0.45454545454545453, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8573833573833574, + "success_rate.epoch.global": 0.675, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 0.8, + "success_rate.window.env.science": 0.0, + "success_rate.window.env_macro_mean": 0.26666666666666666, + "success_rate.window.global": 0.4, + "tokens_p.mean_above_band": 0.993560606060606, + "tokens_p.mean_in_band": 0.516953125, + "tokens_rate.above_band": 0.7674418604651163, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.23255813953488372 + }, + { + "epoch": 0.009586706433745207, + "grad_norm": 101.25030838586852, + "learning_rate": 4.680851063829787e-08, + "loss": 0.7417, + "step": 45, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.ded": 1.0, + "success_rate.epoch.env.logic": 0.42857142857142855, + "success_rate.epoch.env.math": 0.8275862068965517, + "success_rate.epoch.env.science": 0.4864864864864865, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.860293791328274, + "success_rate.epoch.global": 0.6888888888888889, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 0.8, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.85, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.995207055214724, + "tokens_p.mean_in_band": 0.41552734375, + "tokens_rate.above_band": 0.9760479041916168, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.023952095808383235 + }, + { + "epoch": 0.010651896037494673, + "grad_norm": 185.78207769109625, + "learning_rate": 5.2127659574468084e-08, + "loss": 0.7104, + "step": 50, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 1.0, + "success_rate.epoch.env.logic": 0.42857142857142855, + "success_rate.epoch.env.math": 0.84375, + "success_rate.epoch.env.science": 0.46153846153846156, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8733859890109891, + "success_rate.epoch.global": 0.7, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.0, + "success_rate.window.env_macro_mean": 0.8, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9963815789473685, + "tokens_p.mean_in_band": 0.4185267857142857, + "tokens_rate.above_band": 0.9895833333333334, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.010416666666666666 + }, + { + "epoch": 0.011717085641244141, + "grad_norm": 220.60635659314818, + "learning_rate": 5.74468085106383e-08, + "loss": 0.813, + "step": 55, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 1.0, + "success_rate.epoch.env.logic": 0.5555555555555556, + "success_rate.epoch.env.math": 0.8378378378378378, + "success_rate.epoch.env.science": 0.43902439024390244, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8832417783637296, + "success_rate.epoch.global": 0.7, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.8, + "success_rate.window.env.science": 0.0, + "success_rate.window.env_macro_mean": 0.7, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9987009237875288, + "tokens_p.mean_in_band": 0.5329241071428571, + "tokens_rate.above_band": 0.9686800894854586, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03131991051454139 + }, + { + "epoch": 0.01278227524499361, + "grad_norm": 280.9108909418842, + "learning_rate": 6.27659574468085e-08, + "loss": 0.8872, + "step": 60, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 1.0, + "success_rate.epoch.env.logic": 0.5454545454545454, + "success_rate.epoch.env.math": 0.8461538461538461, + "success_rate.epoch.env.science": 0.45652173913043476, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8848130130738827, + "success_rate.epoch.global": 0.7, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6, + "success_rate.window.env_macro_mean": 0.775, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9949263649425287, + "tokens_p.mean_in_band": 0.4735243055555556, + "tokens_rate.above_band": 0.9508196721311475, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.04918032786885246 + }, + { + "epoch": 0.013847464848743076, + "grad_norm": 122.85610672798597, + "learning_rate": 6.808510638297871e-08, + "loss": 0.7545, + "step": 65, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9166666666666666, + "success_rate.epoch.env.logic": 0.5454545454545454, + "success_rate.epoch.env.math": 0.85, + "success_rate.epoch.env.science": 0.47058823529411764, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.878270944741533, + "success_rate.epoch.global": 0.7, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6, + "success_rate.window.env_macro_mean": 0.775, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 1.0001908396946564, + "tokens_p.mean_in_band": 0.494140625, + "tokens_rate.above_band": 0.9899244332493703, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.010075566750629723 + }, + { + "epoch": 0.014912654452492544, + "grad_norm": 81.61402105739417, + "learning_rate": 7.340425531914894e-08, + "loss": 0.7344, + "step": 70, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9230769230769231, + "success_rate.epoch.env.logic": 0.46153846153846156, + "success_rate.epoch.env.math": 0.8666666666666667, + "success_rate.epoch.env.science": 0.46153846153846156, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8712820512820514, + "success_rate.epoch.global": 0.7, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.0, + "success_rate.window.env_macro_mean": 0.6, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9975978490832158, + "tokens_p.mean_in_band": 0.47662259615384617, + "tokens_rate.above_band": 0.9561699258260283, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.04383007417397168 + }, + { + "epoch": 0.015977844056242013, + "grad_norm": 153.62559665029062, + "learning_rate": 7.872340425531915e-08, + "loss": 0.5339, + "step": 75, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9285714285714286, + "success_rate.epoch.env.logic": 0.5333333333333333, + "success_rate.epoch.env.math": 0.8723404255319149, + "success_rate.epoch.env.science": 0.49122807017543857, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8825473257612115, + "success_rate.epoch.global": 0.7133333333333334, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9967935667752443, + "tokens_p.mean_in_band": 0.4296875, + "tokens_rate.above_band": 0.9699842022116903, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.030015797788309637 + }, + { + "epoch": 0.017043033659991477, + "grad_norm": 171.79187123368592, + "learning_rate": 8.404255319148936e-08, + "loss": 0.6487, + "step": 80, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9285714285714286, + "success_rate.epoch.env.logic": 0.5625, + "success_rate.epoch.env.math": 0.8846153846153846, + "success_rate.epoch.env.science": 0.4915254237288136, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8867212236915627, + "success_rate.epoch.global": 0.725, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 0.9, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.998422712933754, + "tokens_p.mean_in_band": 0.701904296875, + "tokens_rate.above_band": 0.9814241486068112, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.018575851393188854 + }, + { + "epoch": 0.018108223263740945, + "grad_norm": 78.69450458066295, + "learning_rate": 8.936170212765957e-08, + "loss": 0.382, + "step": 85, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9285714285714286, + "success_rate.epoch.env.logic": 0.5555555555555556, + "success_rate.epoch.env.math": 0.8928571428571429, + "success_rate.epoch.env.science": 0.5079365079365079, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8884920634920637, + "success_rate.epoch.global": 0.7294117647058823, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.75, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9967077759197325, + "tokens_p.mean_in_band": 0.49402573529411764, + "tokens_rate.above_band": 0.9462025316455697, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.05379746835443038 + }, + { + "epoch": 0.019173412867490414, + "grad_norm": 117.32732108960155, + "learning_rate": 9.468085106382978e-08, + "loss": 0.5551, + "step": 90, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9285714285714286, + "success_rate.epoch.env.logic": 0.6, + "success_rate.epoch.env.math": 0.9016393442622951, + "success_rate.epoch.env.science": 0.5076923076923077, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8937903080526031, + "success_rate.epoch.global": 0.7388888888888889, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9963165399239544, + "tokens_p.mean_in_band": 0.63046875, + "tokens_rate.above_band": 0.9813432835820896, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.018656716417910446 + }, + { + "epoch": 0.020238602471239882, + "grad_norm": 125.13625422096041, + "learning_rate": 1e-07, + "loss": 0.4718, + "step": 95, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9285714285714286, + "success_rate.epoch.env.logic": 0.6363636363636364, + "success_rate.epoch.env.math": 0.9047619047619048, + "success_rate.epoch.env.science": 0.5223880597014925, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8992085029398462, + "success_rate.epoch.global": 0.7526315789473684, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9998063816604709, + "tokens_p.mean_in_band": 0.639453125, + "tokens_rate.above_band": 0.9938423645320197, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.006157635467980296 + }, + { + "epoch": 0.021303792074989347, + "grad_norm": 191.69681285035233, + "learning_rate": 1.053191489361702e-07, + "loss": 0.5207, + "step": 100, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9285714285714286, + "success_rate.epoch.env.logic": 0.6666666666666666, + "success_rate.epoch.env.math": 0.9090909090909091, + "success_rate.epoch.env.science": 0.5285714285714286, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.9032900432900434, + "success_rate.epoch.global": 0.76, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9964622641509434, + "tokens_p.mean_in_band": 0.5245535714285714, + "tokens_rate.above_band": 0.9814814814814815, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.018518518518518517 + }, + { + "epoch": 0.022368981678738815, + "grad_norm": 334.618707129673, + "learning_rate": 1.1063829787234042e-07, + "loss": 0.6853, + "step": 105, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9285714285714286, + "success_rate.epoch.env.logic": 0.68, + "success_rate.epoch.env.math": 0.9, + "success_rate.epoch.env.science": 0.5333333333333333, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.9041904761904762, + "success_rate.epoch.global": 0.7571428571428571, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.75, + "success_rate.window.env.science": 0.6, + "success_rate.window.env_macro_mean": 0.7833333333333333, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9907094594594594, + "tokens_p.mean_in_band": 0.3910590277777778, + "tokens_rate.above_band": 0.8043478260869565, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.1956521739130435 + }, + { + "epoch": 0.023434171282488283, + "grad_norm": 223.77307793873968, + "learning_rate": 1.1595744680851063e-07, + "loss": 0.6475, + "step": 110, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9285714285714286, + "success_rate.epoch.env.logic": 0.7037037037037037, + "success_rate.epoch.env.math": 0.9066666666666666, + "success_rate.epoch.env.science": 0.5256410256410257, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.9064582824582825, + "success_rate.epoch.global": 0.759090909090909, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.3333333333333333, + "success_rate.window.env_macro_mean": 0.7777777777777778, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9939903846153846, + "tokens_p.mean_in_band": 0.5901988636363636, + "tokens_rate.above_band": 0.8764044943820225, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.12359550561797752 + }, + { + "epoch": 0.02449936088623775, + "grad_norm": 99.52150128216505, + "learning_rate": 1.2127659574468084e-07, + "loss": 0.5094, + "step": 115, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9285714285714286, + "success_rate.epoch.env.logic": 0.7142857142857143, + "success_rate.epoch.env.math": 0.9113924050632911, + "success_rate.epoch.env.science": 0.525, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.9079249547920434, + "success_rate.epoch.global": 0.7641921397379913, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.9, + "success_rate.window.global": 0.8888888888888888, + "tokens_p.mean_above_band": 0.9977941176470588, + "tokens_p.mean_in_band": 0.373046875, + "tokens_rate.above_band": 0.9550561797752809, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0449438202247191 + }, + { + "epoch": 0.02556455048998722, + "grad_norm": 121.46142275404097, + "learning_rate": 1.2659574468085107e-07, + "loss": 0.759, + "step": 120, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9333333333333333, + "success_rate.epoch.env.logic": 0.7142857142857143, + "success_rate.epoch.env.math": 0.9024390243902439, + "success_rate.epoch.env.science": 0.5301204819277109, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.9080178553937003, + "success_rate.epoch.global": 0.7656903765690377, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 0.6666666666666666, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.8666666666666666, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9973714953271028, + "tokens_p.mean_in_band": 0.6136067708333334, + "tokens_rate.above_band": 0.963963963963964, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.036036036036036036 + }, + { + "epoch": 0.026629740093736684, + "grad_norm": 38.88557519636338, + "learning_rate": 1.3191489361702127e-07, + "loss": 0.3261, + "step": 125, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9375, + "success_rate.epoch.env.logic": 0.6896551724137931, + "success_rate.epoch.env.math": 0.9058823529411765, + "success_rate.epoch.env.science": 0.5232558139534884, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.9056293339308457, + "success_rate.epoch.global": 0.7630522088353414, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.3333333333333333, + "success_rate.window.env_macro_mean": 0.7222222222222222, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9976340694006309, + "tokens_p.mean_in_band": 0.6704963235294118, + "tokens_rate.above_band": 0.9738863287250384, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.026113671274961597 + }, + { + "epoch": 0.027694929697486152, + "grad_norm": 660.6599264924243, + "learning_rate": 1.372340425531915e-07, + "loss": 0.6337, + "step": 130, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9444444444444444, + "success_rate.epoch.env.logic": 0.7096774193548387, + "success_rate.epoch.env.math": 0.8953488372093024, + "success_rate.epoch.env.science": 0.5393258426966292, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.9088796543705214, + "success_rate.epoch.global": 0.7683397683397684, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8333333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.999388966480447, + "tokens_p.mean_in_band": 0.637451171875, + "tokens_rate.above_band": 0.9944444444444445, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.005555555555555556 + }, + { + "epoch": 0.02876011930123562, + "grad_norm": 200.43701752538294, + "learning_rate": 1.425531914893617e-07, + "loss": 0.6314, + "step": 135, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9444444444444444, + "success_rate.epoch.env.logic": 0.71875, + "success_rate.epoch.env.math": 0.896551724137931, + "success_rate.epoch.env.science": 0.5416666666666666, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.9101412835249043, + "success_rate.epoch.global": 0.7657992565055762, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5714285714285714, + "success_rate.window.env_macro_mean": 0.8928571428571428, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9987012987012988, + "tokens_p.mean_in_band": 0.4561244419642857, + "tokens_rate.above_band": 0.9649122807017544, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03508771929824561 + }, + { + "epoch": 0.02982530890498509, + "grad_norm": 79.87488421431745, + "learning_rate": 1.4787234042553191e-07, + "loss": 0.4928, + "step": 140, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9473684210526315, + "success_rate.epoch.env.logic": 0.7352941176470589, + "success_rate.epoch.env.math": 0.9, + "success_rate.epoch.env.science": 0.54, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.912266253869969, + "success_rate.epoch.global": 0.7670250896057348, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9975, + "tokens_p.mean_in_band": 0.5002297794117647, + "tokens_rate.above_band": 0.989321608040201, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.010678391959798994 + }, + { + "epoch": 0.030890498508734553, + "grad_norm": 141.13739354995624, + "learning_rate": 1.531914893617021e-07, + "loss": 0.5645, + "step": 145, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9473684210526315, + "success_rate.epoch.env.logic": 0.7352941176470589, + "success_rate.epoch.env.math": 0.9032258064516129, + "success_rate.epoch.env.science": 0.5514018691588785, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.9137290214310182, + "success_rate.epoch.global": 0.7681660899653979, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.7142857142857143, + "success_rate.window.env_macro_mean": 0.8571428571428572, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9900265957446809, + "tokens_p.mean_in_band": 0.49947916666666664, + "tokens_rate.above_band": 0.8623853211009175, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.13761467889908258 + }, + { + "epoch": 0.031955688112484025, + "grad_norm": 168.0282207866582, + "learning_rate": 1.585106382978723e-07, + "loss": 0.3307, + "step": 150, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9473684210526315, + "success_rate.epoch.env.logic": 0.7567567567567568, + "success_rate.epoch.env.math": 0.9072164948453608, + "success_rate.epoch.env.science": 0.5412844036697247, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.9152626076324474, + "success_rate.epoch.global": 0.7692307692307693, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.0, + "success_rate.window.env_macro_mean": 0.75, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9968354430379747, + "tokens_p.mean_in_band": 0.3268229166666667, + "tokens_rate.above_band": 0.8977272727272727, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.10227272727272728 + }, + { + "epoch": 0.033020877716233486, + "grad_norm": 242.28975794793152, + "learning_rate": 1.6382978723404256e-07, + "loss": 0.5423, + "step": 155, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9473684210526315, + "success_rate.epoch.env.logic": 0.7631578947368421, + "success_rate.epoch.env.math": 0.9090909090909091, + "success_rate.epoch.env.science": 0.5486725663716814, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.9168289791252064, + "success_rate.epoch.global": 0.7734627831715211, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9932369402985075, + "tokens_p.mean_in_band": 0.4778645833333333, + "tokens_rate.above_band": 0.9370629370629371, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.06293706293706294 + }, + { + "epoch": 0.034086067319982954, + "grad_norm": 98.24957404326659, + "learning_rate": 1.6914893617021276e-07, + "loss": 0.5934, + "step": 160, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9473684210526315, + "success_rate.epoch.env.logic": 0.7692307692307693, + "success_rate.epoch.env.math": 0.9117647058823529, + "success_rate.epoch.env.science": 0.559322033898305, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.918768593006406, + "success_rate.epoch.global": 0.7774294670846394, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9982404279279279, + "tokens_p.mean_in_band": 0.4409722222222222, + "tokens_rate.above_band": 0.9801324503311258, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.019867549668874173 + }, + { + "epoch": 0.03515125692373242, + "grad_norm": 207.29874757978172, + "learning_rate": 1.7446808510638299e-07, + "loss": 0.5853, + "step": 165, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.95, + "success_rate.epoch.env.logic": 0.7804878048780488, + "success_rate.epoch.env.math": 0.9150943396226415, + "success_rate.epoch.env.science": 0.5702479338842975, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.9215830078384988, + "success_rate.epoch.global": 0.78419452887538, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9984124331550802, + "tokens_p.mean_in_band": 0.806640625, + "tokens_rate.above_band": 0.9946808510638298, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.005319148936170213 + }, + { + "epoch": 0.03621644652748189, + "grad_norm": 215.72872373681392, + "learning_rate": 1.7978723404255318e-07, + "loss": 0.5016, + "step": 170, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.95, + "success_rate.epoch.env.logic": 0.7954545454545454, + "success_rate.epoch.env.math": 0.9, + "success_rate.epoch.env.science": 0.5772357723577236, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.9222690317812269, + "success_rate.epoch.global": 0.7846607669616519, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.5, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9991619809688581, + "tokens_p.mean_in_band": 0.5600961538461539, + "tokens_rate.above_band": 0.9780033840947546, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.021996615905245348 + }, + { + "epoch": 0.03728163613123136, + "grad_norm": 88.1798383756124, + "learning_rate": 1.8510638297872338e-07, + "loss": 0.4687, + "step": 175, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.95, + "success_rate.epoch.env.logic": 0.7954545454545454, + "success_rate.epoch.env.math": 0.9017857142857143, + "success_rate.epoch.env.science": 0.578125, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.9225365259740259, + "success_rate.epoch.global": 0.7851002865329513, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 0.9199999999999999, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9972988077496274, + "tokens_p.mean_in_band": 0.6089409722222222, + "tokens_rate.above_band": 0.986764705882353, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.013235294117647059 + }, + { + "epoch": 0.03834682573498083, + "grad_norm": 117.02852695568869, + "learning_rate": 1.904255319148936e-07, + "loss": 0.3779, + "step": 180, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9523809523809523, + "success_rate.epoch.env.logic": 0.7777777777777778, + "success_rate.epoch.env.math": 0.9051724137931034, + "success_rate.epoch.env.science": 0.5725190839694656, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.9207850227921298, + "success_rate.epoch.global": 0.7827298050139275, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.3333333333333333, + "success_rate.window.env_macro_mean": 0.6666666666666666, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9985414866032843, + "tokens_p.mean_in_band": 0.544016768292683, + "tokens_rate.above_band": 0.9657762938230384, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.034223706176961605 + }, + { + "epoch": 0.039412015338730295, + "grad_norm": 362.9432453883575, + "learning_rate": 1.957446808510638e-07, + "loss": 0.5635, + "step": 185, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9545454545454546, + "success_rate.epoch.env.logic": 0.7872340425531915, + "success_rate.epoch.env.math": 0.9067796610169492, + "success_rate.epoch.env.science": 0.5808823529411765, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.9229441511056772, + "success_rate.epoch.global": 0.7859078590785907, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9973169191919192, + "tokens_p.mean_in_band": 0.7493489583333334, + "tokens_rate.above_band": 0.9565217391304348, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.043478260869565216 + }, + { + "epoch": 0.040477204942479764, + "grad_norm": 263.98176650817317, + "learning_rate": 2.0106382978723403e-07, + "loss": 0.5884, + "step": 190, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9545454545454546, + "success_rate.epoch.env.logic": 0.8, + "success_rate.epoch.env.math": 0.907563025210084, + "success_rate.epoch.env.science": 0.574468085106383, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.9236576564861922, + "success_rate.epoch.global": 0.783641160949868, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.4, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 0.85, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9983277591973244, + "tokens_p.mean_in_band": 0.4421875, + "tokens_rate.above_band": 0.9522292993630573, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.04777070063694268 + }, + { + "epoch": 0.04154239454622923, + "grad_norm": 99.82716689138132, + "learning_rate": 2.0638297872340426e-07, + "loss": 0.5266, + "step": 195, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9545454545454546, + "success_rate.epoch.env.logic": 0.803921568627451, + "success_rate.epoch.env.math": 0.9090909090909091, + "success_rate.epoch.env.science": 0.5684931506849316, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.9236051082948746, + "success_rate.epoch.global": 0.781491002570694, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.4, + "success_rate.window.env_macro_mean": 0.8800000000000001, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9971910112359551, + "tokens_p.mean_in_band": 0.5352076480263158, + "tokens_rate.above_band": 0.9035532994923858, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.09644670050761421 + }, + { + "epoch": 0.04260758414997869, + "grad_norm": 477.27558166201226, + "learning_rate": 2.1170212765957448e-07, + "loss": 0.4063, + "step": 200, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9545454545454546, + "success_rate.epoch.env.logic": 0.8076923076923077, + "success_rate.epoch.env.math": 0.912, + "success_rate.epoch.env.sat": 1.0, + "success_rate.epoch.env.science": 0.5666666666666667, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.9309913117185844, + "success_rate.epoch.global": 0.7819548872180451, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9920910493827161, + "tokens_p.mean_below_band": 8.585629984736443e-10, + "tokens_p.mean_in_band": 0.64453125, + "tokens_rate.above_band": 0.9050279329608939, + "tokens_rate.below_band": 0.00558659217877095, + "tokens_rate.in_band": 0.0893854748603352 + }, + { + "epoch": 0.04367277375372816, + "grad_norm": 143.40155203496522, + "learning_rate": 2.1702127659574465e-07, + "loss": 0.471, + "step": 205, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9565217391304348, + "success_rate.epoch.env.logic": 0.8113207547169812, + "success_rate.epoch.env.math": 0.9069767441860465, + "success_rate.epoch.env.sat": 1.0, + "success_rate.epoch.env.science": 0.577922077922078, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.9320673923595945, + "success_rate.epoch.global": 0.784841075794621, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.75, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9375, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9952986725663717, + "tokens_p.mean_in_band": 0.6884765625, + "tokens_rate.above_band": 0.904, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.096 + }, + { + "epoch": 0.04473796335747763, + "grad_norm": 616.1292866345375, + "learning_rate": 2.2234042553191488e-07, + "loss": 0.7285, + "step": 210, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9565217391304348, + "success_rate.epoch.env.logic": 0.8, + "success_rate.epoch.env.math": 0.9083969465648855, + "success_rate.epoch.env.sat": 1.0, + "success_rate.epoch.env.science": 0.5786163522012578, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.931230457990598, + "success_rate.epoch.global": 0.7828162291169452, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6, + "success_rate.window.env_macro_mean": 0.775, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.999496644295302, + "tokens_p.mean_in_band": 0.5057444852941176, + "tokens_rate.above_band": 0.9563543003851092, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.043645699614890884 + }, + { + "epoch": 0.0458031529612271, + "grad_norm": 386.42059521215043, + "learning_rate": 2.276595744680851e-07, + "loss": 0.5514, + "step": 215, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.96, + "success_rate.epoch.env.logic": 0.7894736842105263, + "success_rate.epoch.env.math": 0.9097744360902256, + "success_rate.epoch.env.sat": 1.0, + "success_rate.epoch.env.science": 0.5766871165644172, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.9305395669877427, + "success_rate.epoch.global": 0.7808857808857809, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.75, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9971532091097308, + "tokens_p.mean_in_band": 0.634375, + "tokens_rate.above_band": 0.9698795180722891, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.030120481927710843 + }, + { + "epoch": 0.046868342564976566, + "grad_norm": 145.68890202002504, + "learning_rate": 2.3297872340425533e-07, + "loss": 0.4387, + "step": 220, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.96, + "success_rate.epoch.env.logic": 0.7903225806451613, + "success_rate.epoch.env.math": 0.9104477611940298, + "success_rate.epoch.env.sat": 1.0, + "success_rate.epoch.env.science": 0.5843373493975904, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.9313734264760711, + "success_rate.epoch.global": 0.7835990888382688, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 0.8, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.998688352570829, + "tokens_p.mean_in_band": 0.49254261363636365, + "tokens_rate.above_band": 0.9665314401622718, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.033468559837728194 + }, + { + "epoch": 0.047933532168726034, + "grad_norm": 71.69789068314002, + "learning_rate": 2.382978723404255e-07, + "loss": 0.4261, + "step": 225, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9629629629629629, + "success_rate.epoch.env.logic": 0.796875, + "success_rate.epoch.env.math": 0.9117647058823529, + "success_rate.epoch.env.sat": 1.0, + "success_rate.epoch.env.science": 0.5808383233532934, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.9320400901998735, + "success_rate.epoch.global": 0.7861915367483296, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.0, + "success_rate.window.env_macro_mean": 0.8571428571428571, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9977207293666027, + "tokens_p.mean_in_band": 0.6015625, + "tokens_rate.above_band": 0.9961759082217974, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0038240917782026767 + }, + { + "epoch": 0.0489987217724755, + "grad_norm": 52.732008175907225, + "learning_rate": 2.4361702127659575e-07, + "loss": 0.5333, + "step": 230, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9629629629629629, + "success_rate.epoch.env.logic": 0.8, + "success_rate.epoch.env.math": 0.9136690647482014, + "success_rate.epoch.env.sat": 0.5, + "success_rate.epoch.env.science": 0.5798816568047337, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8869557895014453, + "success_rate.epoch.global": 0.7864923747276689, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.7, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9962565104166666, + "tokens_p.mean_in_band": 0.501171875, + "tokens_rate.above_band": 0.9056603773584906, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.09433962264150944 + }, + { + "epoch": 0.05006391137622497, + "grad_norm": 173.93139277894971, + "learning_rate": 2.48936170212766e-07, + "loss": 0.5164, + "step": 235, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9629629629629629, + "success_rate.epoch.env.logic": 0.803030303030303, + "success_rate.epoch.env.math": 0.916083916083916, + "success_rate.epoch.env.sat": 0.5, + "success_rate.epoch.env.science": 0.5862068965517241, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8880258253299005, + "success_rate.epoch.global": 0.7889125799573561, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9901889534883721, + "tokens_p.mean_in_band": 0.5385044642857143, + "tokens_rate.above_band": 0.9247311827956989, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.07526881720430108 + }, + { + "epoch": 0.05112910097997444, + "grad_norm": 101.93574906798119, + "learning_rate": 2.5425531914893615e-07, + "loss": 0.421, + "step": 240, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9629629629629629, + "success_rate.epoch.env.logic": 0.8059701492537313, + "success_rate.epoch.env.math": 0.9183673469387755, + "success_rate.epoch.env.sat": 0.5, + "success_rate.epoch.env.science": 0.5977653631284916, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.889551438389451, + "success_rate.epoch.global": 0.7933194154488518, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9941620879120879, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 0.0521942905837239, + "grad_norm": 73.72945167563957, + "learning_rate": 2.5957446808510637e-07, + "loss": 0.431, + "step": 245, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9629629629629629, + "success_rate.epoch.env.logic": 0.8088235294117647, + "success_rate.epoch.env.math": 0.9194630872483222, + "success_rate.epoch.env.sat": 0.5, + "success_rate.epoch.env.science": 0.6010928961748634, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8902129523452648, + "success_rate.epoch.global": 0.7955010224948875, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9974038461538461, + "tokens_p.mean_in_band": 0.6399739583333334, + "tokens_rate.above_band": 0.9643916913946587, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03560830860534125 + }, + { + "epoch": 0.05325948018747337, + "grad_norm": 85.91349736103393, + "learning_rate": 2.648936170212766e-07, + "loss": 0.4619, + "step": 250, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9629629629629629, + "success_rate.epoch.env.logic": 0.8115942028985508, + "success_rate.epoch.env.math": 0.9144736842105263, + "success_rate.epoch.env.sat": 0.5, + "success_rate.epoch.env.science": 0.6063829787234043, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8904921662541313, + "success_rate.epoch.global": 0.7955911823647295, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.6666666666666666, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.8666666666666667, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9919084821428571, + "tokens_p.mean_in_band": 0.5946514423076923, + "tokens_rate.above_band": 0.896, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.104 + }, + { + "epoch": 0.054324669791222836, + "grad_norm": 286.98707907761883, + "learning_rate": 2.7021276595744677e-07, + "loss": 0.2553, + "step": 255, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9629629629629629, + "success_rate.epoch.env.logic": 0.8169014084507042, + "success_rate.epoch.env.math": 0.9161290322580645, + "success_rate.epoch.env.sat": 0.5, + "success_rate.epoch.env.science": 0.6020942408376964, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.890735240409948, + "success_rate.epoch.global": 0.7956777996070727, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.3333333333333333, + "success_rate.window.env_macro_mean": 0.8666666666666666, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9969723183391004, + "tokens_p.mean_in_band": 0.5823863636363636, + "tokens_rate.above_band": 0.9633333333333334, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03666666666666667 + }, + { + "epoch": 0.055389859394972304, + "grad_norm": 86.96736797423058, + "learning_rate": 2.75531914893617e-07, + "loss": 0.3657, + "step": 260, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9629629629629629, + "success_rate.epoch.env.logic": 0.8194444444444444, + "success_rate.epoch.env.math": 0.9171974522292994, + "success_rate.epoch.env.sat": 0.3333333333333333, + "success_rate.epoch.env.science": 0.6071428571428571, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8763710045557179, + "success_rate.epoch.global": 0.7957610789980732, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.76, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9967447916666666, + "tokens_p.mean_in_band": 0.6536959134615384, + "tokens_rate.above_band": 0.8470588235294118, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.15294117647058825 + }, + { + "epoch": 0.05645504899872177, + "grad_norm": 137.87840566102082, + "learning_rate": 2.808510638297872e-07, + "loss": 0.5173, + "step": 265, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9642857142857143, + "success_rate.epoch.env.logic": 0.821917808219178, + "success_rate.epoch.env.math": 0.9182389937106918, + "success_rate.epoch.env.sat": 0.3333333333333333, + "success_rate.epoch.env.science": 0.6119402985074627, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8772469225505801, + "success_rate.epoch.global": 0.7977315689981096, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.96, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9943364395886889, + "tokens_p.mean_in_band": 0.7736280487804879, + "tokens_rate.above_band": 0.9499389499389499, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.050061050061050064 + }, + { + "epoch": 0.05752023860247124, + "grad_norm": 283.6396316509124, + "learning_rate": 2.8617021276595744e-07, + "loss": 0.4702, + "step": 270, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9655172413793104, + "success_rate.epoch.env.logic": 0.8243243243243243, + "success_rate.epoch.env.math": 0.9125, + "success_rate.epoch.env.sat": 0.3333333333333333, + "success_rate.epoch.env.science": 0.6201923076923077, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8778061097026615, + "success_rate.epoch.global": 0.7977736549165121, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.0, + "success_rate.window.env.science": 0.8571428571428571, + "success_rate.window.env_macro_mean": 0.7142857142857143, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9969907407407408, + "tokens_p.mean_in_band": 0.58447265625, + "tokens_rate.above_band": 0.9507042253521126, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.04929577464788732 + }, + { + "epoch": 0.05858542820622071, + "grad_norm": 225.6895500803243, + "learning_rate": 2.914893617021276e-07, + "loss": 0.3096, + "step": 275, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9655172413793104, + "success_rate.epoch.env.logic": 0.8243243243243243, + "success_rate.epoch.env.math": 0.9146341463414634, + "success_rate.epoch.env.sat": 0.3333333333333333, + "success_rate.epoch.env.science": 0.6255924170616114, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8784910420400039, + "success_rate.epoch.global": 0.8014571948998178, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9970737632508834, + "tokens_p.mean_in_band": 0.8645833333333334, + "tokens_rate.above_band": 0.9947275922671354, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.005272407732864675 + }, + { + "epoch": 0.05965061780997018, + "grad_norm": 107.61547173808319, + "learning_rate": 2.9680851063829784e-07, + "loss": 0.5635, + "step": 280, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9655172413793104, + "success_rate.epoch.env.logic": 0.8243243243243243, + "success_rate.epoch.env.math": 0.9156626506024096, + "success_rate.epoch.env.sat": 0.3333333333333333, + "success_rate.epoch.env.science": 0.6267281105990783, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8786877872944051, + "success_rate.epoch.global": 0.8010752688172043, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.8888888888888888, + "success_rate.window.global": 0.7777777777777778, + "tokens_p.mean_above_band": 0.9954252577319588, + "tokens_p.mean_in_band": 0.5911458333333334, + "tokens_rate.above_band": 0.9642147117296223, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03578528827037773 + }, + { + "epoch": 0.060715807413719645, + "grad_norm": 79.62456069953544, + "learning_rate": 3.0212765957446807e-07, + "loss": 0.3437, + "step": 285, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9666666666666667, + "success_rate.epoch.env.logic": 0.8289473684210527, + "success_rate.epoch.env.math": 0.9161676646706587, + "success_rate.epoch.env.sat": 0.3333333333333333, + "success_rate.epoch.env.science": 0.6306306306306306, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8796132421565767, + "success_rate.epoch.global": 0.8028169014084507, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.96, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9980742296918768, + "tokens_p.mean_in_band": 0.6998697916666666, + "tokens_rate.above_band": 0.967479674796748, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.032520325203252036 + }, + { + "epoch": 0.06178099701746911, + "grad_norm": 184.29277848775067, + "learning_rate": 3.074468085106383e-07, + "loss": 0.4092, + "step": 290, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.967741935483871, + "success_rate.epoch.env.logic": 0.8289473684210527, + "success_rate.epoch.env.math": 0.9166666666666666, + "success_rate.epoch.env.sat": 0.3333333333333333, + "success_rate.epoch.env.science": 0.6327433628318584, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8799484242487984, + "success_rate.epoch.global": 0.8044982698961938, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9945987654320988, + "tokens_p.mean_in_band": 0.490234375, + "tokens_rate.above_band": 0.9878048780487805, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.012195121951219513 + }, + { + "epoch": 0.06284618662121857, + "grad_norm": 192.1485208245573, + "learning_rate": 3.127659574468085e-07, + "loss": 0.5063, + "step": 295, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.967741935483871, + "success_rate.epoch.env.logic": 0.8289473684210527, + "success_rate.epoch.env.math": 0.9181286549707602, + "success_rate.epoch.env.sat": 0.3333333333333333, + "success_rate.epoch.env.science": 0.6320346320346321, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8800169022039682, + "success_rate.epoch.global": 0.8044217687074829, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6, + "success_rate.window.env_macro_mean": 0.9, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9971289752650176, + "tokens_p.mean_in_band": 0.51220703125, + "tokens_rate.above_band": 0.9725085910652921, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.027491408934707903 + }, + { + "epoch": 0.06391137622496805, + "grad_norm": 53.717681249975506, + "learning_rate": 3.1808510638297874e-07, + "loss": 0.3581, + "step": 300, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9375, + "success_rate.epoch.env.logic": 0.8311688311688312, + "success_rate.epoch.env.math": 0.9186046511627907, + "success_rate.epoch.env.sat": 0.3333333333333333, + "success_rate.epoch.env.science": 0.6329113924050633, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8775925643700018, + "success_rate.epoch.global": 0.802675585284281, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 0.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.7333333333333333, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9890836408364083, + "tokens_p.mean_below_band": 7.338821887969971e-07, + "tokens_p.mean_in_band": 0.4773232677902622, + "tokens_rate.above_band": 0.7520814061054579, + "tokens_rate.below_band": 0.0009250693802035153, + "tokens_rate.in_band": 0.24699352451433856 + }, + { + "epoch": 0.06497656582871751, + "grad_norm": 105.27737944046548, + "learning_rate": 3.234042553191489e-07, + "loss": 0.2076, + "step": 305, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9375, + "success_rate.epoch.env.logic": 0.8333333333333334, + "success_rate.epoch.env.math": 0.92, + "success_rate.epoch.env.sat": 0.3333333333333333, + "success_rate.epoch.env.science": 0.6419753086419753, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8787401795735129, + "success_rate.epoch.global": 0.805921052631579, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.990320796460177, + "tokens_p.mean_in_band": 0.828125, + "tokens_rate.above_band": 0.9912280701754386, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.008771929824561403 + }, + { + "epoch": 0.06604175543246697, + "grad_norm": 128.20518201903698, + "learning_rate": 3.2872340425531914e-07, + "loss": 0.3951, + "step": 310, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9375, + "success_rate.epoch.env.logic": 0.8375, + "success_rate.epoch.env.math": 0.9204545454545454, + "success_rate.epoch.env.sat": 0.3333333333333333, + "success_rate.epoch.env.science": 0.6506024096385542, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8799445716751304, + "success_rate.epoch.global": 0.8090614886731392, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9965659340659341, + "tokens_p.mean_in_band": 0.859375, + "tokens_rate.above_band": 0.9680851063829787, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.031914893617021274 + }, + { + "epoch": 0.06710694503621645, + "grad_norm": 111.42614963644284, + "learning_rate": 3.3404255319148936e-07, + "loss": 0.198, + "step": 315, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9375, + "success_rate.epoch.env.logic": 0.8414634146341463, + "success_rate.epoch.env.math": 0.9213483146067416, + "success_rate.epoch.env.sat": 0.3333333333333333, + "success_rate.epoch.env.science": 0.6535433070866141, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8806534881509851, + "success_rate.epoch.global": 0.810207336523126, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.8888888888888888, + "tokens_p.mean_above_band": 0.9887242268041238, + "tokens_p.mean_in_band": 0.55810546875, + "tokens_rate.above_band": 0.9238095238095239, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0761904761904762 + }, + { + "epoch": 0.06817213463996591, + "grad_norm": 80.68678770558238, + "learning_rate": 3.393617021276596e-07, + "loss": 0.3028, + "step": 320, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9375, + "success_rate.epoch.env.logic": 0.8414634146341463, + "success_rate.epoch.env.math": 0.9243243243243243, + "success_rate.epoch.env.sat": 0.3333333333333333, + "success_rate.epoch.env.science": 0.65234375, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8808149838447096, + "success_rate.epoch.global": 0.8116169544740973, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.8333333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.99625, + "tokens_p.mean_in_band": 0.4539930555555556, + "tokens_rate.above_band": 0.9433962264150944, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.05660377358490566 + }, + { + "epoch": 0.06923732424371538, + "grad_norm": 212.6150375396718, + "learning_rate": 3.4468085106382976e-07, + "loss": 0.5615, + "step": 325, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9375, + "success_rate.epoch.env.logic": 0.8452380952380952, + "success_rate.epoch.env.math": 0.9251336898395722, + "success_rate.epoch.env.sat": 0.3333333333333333, + "success_rate.epoch.env.science": 0.6538461538461539, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8813682974779232, + "success_rate.epoch.global": 0.8129829984544049, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9959656762295082, + "tokens_p.mean_in_band": 0.54296875, + "tokens_rate.above_band": 0.9838709677419355, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.016129032258064516 + }, + { + "epoch": 0.07030251384746485, + "grad_norm": 274.6090402190955, + "learning_rate": 3.5e-07, + "loss": 0.3874, + "step": 330, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9375, + "success_rate.epoch.env.logic": 0.8470588235294118, + "success_rate.epoch.env.math": 0.9259259259259259, + "success_rate.epoch.env.sat": 0.3333333333333333, + "success_rate.epoch.env.science": 0.6566037735849056, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8818565323975979, + "success_rate.epoch.global": 0.8143074581430746, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9955538617886179, + "tokens_p.mean_in_band": 0.5929129464285714, + "tokens_rate.above_band": 0.9213483146067416, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.07865168539325842 + }, + { + "epoch": 0.07136770345121432, + "grad_norm": 78.24440459832996, + "learning_rate": 3.553191489361702e-07, + "loss": 0.407, + "step": 335, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9375, + "success_rate.epoch.env.logic": 0.8409090909090909, + "success_rate.epoch.env.math": 0.921875, + "success_rate.epoch.env.sat": 0.3333333333333333, + "success_rate.epoch.env.science": 0.654275092936803, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8807175015617479, + "success_rate.epoch.global": 0.8110944527736131, + "success_rate.window.env.logic": 0.6666666666666666, + "success_rate.window.env.math": 0.6666666666666666, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.611111111111111, + "success_rate.window.global": 0.6, + "tokens_p.mean_above_band": 0.9953568447412354, + "tokens_p.mean_in_band": 0.4287405303030303, + "tokens_rate.above_band": 0.9477848101265823, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.05221518987341772 + }, + { + "epoch": 0.07243289305496378, + "grad_norm": 78.54678932890421, + "learning_rate": 3.606382978723404e-07, + "loss": 0.2786, + "step": 340, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9375, + "success_rate.epoch.env.logic": 0.8478260869565217, + "success_rate.epoch.env.math": 0.9226804123711341, + "success_rate.epoch.env.sat": 0.3333333333333333, + "success_rate.epoch.env.science": 0.6568265682656826, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8816514909933338, + "success_rate.epoch.global": 0.8136094674556213, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9973739495798319, + "tokens_p.mean_in_band": 0.87109375, + "tokens_rate.above_band": 0.9916666666666667, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.008333333333333333 + }, + { + "epoch": 0.07349808265871326, + "grad_norm": 270.3106115154008, + "learning_rate": 3.659574468085106e-07, + "loss": 0.442, + "step": 345, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9393939393939394, + "success_rate.epoch.env.logic": 0.8494623655913979, + "success_rate.epoch.env.math": 0.9242424242424242, + "success_rate.epoch.env.sat": 0.3333333333333333, + "success_rate.epoch.env.science": 0.656934306569343, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8821242153754945, + "success_rate.epoch.global": 0.814868804664723, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9949324324324325, + "tokens_p.mean_in_band": 0.6143973214285714, + "tokens_rate.above_band": 0.9814323607427056, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.01856763925729443 + }, + { + "epoch": 0.07456327226246272, + "grad_norm": 47.5508422766998, + "learning_rate": 3.7127659574468083e-07, + "loss": 0.338, + "step": 350, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9393939393939394, + "success_rate.epoch.env.logic": 0.851063829787234, + "success_rate.epoch.env.math": 0.925, + "success_rate.epoch.env.sat": 0.3333333333333333, + "success_rate.epoch.env.science": 0.6571428571428571, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8823576326961241, + "success_rate.epoch.global": 0.8146551724137931, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9967161016949152, + "tokens_p.mean_in_band": 0.6422991071428571, + "tokens_rate.above_band": 0.9546925566343042, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.045307443365695796 + }, + { + "epoch": 0.07562846186621218, + "grad_norm": 112.27176807594125, + "learning_rate": 3.7659574468085106e-07, + "loss": 0.3081, + "step": 355, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9411764705882353, + "success_rate.epoch.env.logic": 0.8526315789473684, + "success_rate.epoch.env.math": 0.9261083743842364, + "success_rate.epoch.env.sat": 0.25, + "success_rate.epoch.env.science": 0.6584507042253521, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8753061025586539, + "success_rate.epoch.global": 0.8144475920679887, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.75, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9988411125319693, + "tokens_p.mean_in_band": 0.6462296195652174, + "tokens_rate.above_band": 0.9714285714285714, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02857142857142857 + }, + { + "epoch": 0.07669365146996165, + "grad_norm": 72.5725592936309, + "learning_rate": 3.8191489361702123e-07, + "loss": 0.2553, + "step": 360, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9411764705882353, + "success_rate.epoch.env.logic": 0.8556701030927835, + "success_rate.epoch.env.math": 0.9261083743842364, + "success_rate.epoch.env.sat": 0.2, + "success_rate.epoch.env.science": 0.6608996539792388, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8712595092767721, + "success_rate.epoch.global": 0.8142458100558659, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.76, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9954044117647058, + "tokens_p.mean_in_band": 0.6996783088235294, + "tokens_rate.above_band": 0.9230769230769231, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.07692307692307693 + }, + { + "epoch": 0.07775884107371112, + "grad_norm": 76.05177626656429, + "learning_rate": 3.8723404255319145e-07, + "loss": 0.2523, + "step": 365, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9444444444444444, + "success_rate.epoch.env.logic": 0.8556701030927835, + "success_rate.epoch.env.math": 0.9271844660194175, + "success_rate.epoch.env.sat": 0.2, + "success_rate.epoch.env.science": 0.6632302405498282, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8718662958278611, + "success_rate.epoch.global": 0.8168044077134986, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9946409189580319, + "tokens_p.mean_in_band": 0.875, + "tokens_rate.above_band": 0.9978339350180505, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0021660649819494585 + }, + { + "epoch": 0.07882403067746059, + "grad_norm": 38.66044484751237, + "learning_rate": 3.925531914893617e-07, + "loss": 0.1606, + "step": 370, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9444444444444444, + "success_rate.epoch.env.logic": 0.8571428571428571, + "success_rate.epoch.env.math": 0.9282296650717703, + "success_rate.epoch.env.sat": 0.2, + "success_rate.epoch.env.science": 0.6621160409556314, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8719939097831548, + "success_rate.epoch.global": 0.8179347826086957, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9984452736318408, + "tokens_p.mean_in_band": 0.4765625, + "tokens_rate.above_band": 0.9901477832512315, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.009852216748768473 + }, + { + "epoch": 0.07988922028121005, + "grad_norm": 31.630314299338544, + "learning_rate": 3.978723404255319e-07, + "loss": 0.3096, + "step": 375, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9473684210526315, + "success_rate.epoch.env.logic": 0.8571428571428571, + "success_rate.epoch.env.math": 0.9289099526066351, + "success_rate.epoch.env.sat": 0.2, + "success_rate.epoch.env.science": 0.6644067796610169, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8725298191330126, + "success_rate.epoch.global": 0.8203753351206434, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9994218910585817, + "tokens_p.mean_in_band": 0.806640625, + "tokens_rate.above_band": 0.9959058341862845, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0040941658137154556 + }, + { + "epoch": 0.08095440988495953, + "grad_norm": 133.9275449390992, + "learning_rate": 3.9999999475269154e-07, + "loss": 0.4971, + "step": 380, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9473684210526315, + "success_rate.epoch.env.logic": 0.85, + "success_rate.epoch.env.math": 0.9299065420560748, + "success_rate.epoch.env.sat": 0.2, + "success_rate.epoch.env.science": 0.6666666666666666, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8721765117977611, + "success_rate.epoch.global": 0.8201058201058201, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.7666666666666666, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9920164233576643, + "tokens_p.mean_in_band": 0.4291294642857143, + "tokens_rate.above_band": 0.9513888888888888, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.04861111111111111 + }, + { + "epoch": 0.08201959948870899, + "grad_norm": 70.91299771339187, + "learning_rate": 3.9999996268581145e-07, + "loss": 0.3012, + "step": 385, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9473684210526315, + "success_rate.epoch.env.logic": 0.8543689320388349, + "success_rate.epoch.env.math": 0.9308755760368663, + "success_rate.epoch.env.sat": 0.2, + "success_rate.epoch.env.science": 0.6666666666666666, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8726617814359091, + "success_rate.epoch.global": 0.8211488250652742, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9980053191489362, + "tokens_p.mean_in_band": 0.42578125, + "tokens_rate.above_band": 0.9791666666666666, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.020833333333333332 + }, + { + "epoch": 0.08308478909245846, + "grad_norm": 196.63393882497718, + "learning_rate": 3.99999901467246e-07, + "loss": 0.3086, + "step": 390, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9473684210526315, + "success_rate.epoch.env.logic": 0.8543689320388349, + "success_rate.epoch.env.math": 0.9324324324324325, + "success_rate.epoch.env.sat": 0.2, + "success_rate.epoch.env.science": 0.6644951140065146, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8726058999573102, + "success_rate.epoch.global": 0.8208762886597938, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.8333333333333334, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9915780141843972, + "tokens_p.mean_in_band": 0.5732421875, + "tokens_rate.above_band": 0.9463087248322147, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.053691275167785234 + }, + { + "epoch": 0.08414997869620792, + "grad_norm": 54.23492780550172, + "learning_rate": 3.9999981109703984e-07, + "loss": 0.3664, + "step": 395, + "success_rate.epoch.env.abd": 0.9767441860465116, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9487179487179487, + "success_rate.epoch.env.logic": 0.8557692307692307, + "success_rate.epoch.env.math": 0.9288888888888889, + "success_rate.epoch.env.sat": 0.2, + "success_rate.epoch.env.science": 0.667741935483871, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8707147445369501, + "success_rate.epoch.global": 0.8206106870229007, + "success_rate.window.env.abd": 0.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.6666666666666666, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.7777777777777777, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9968155095184771, + "tokens_p.mean_below_band": 3.625949223836263e-07, + "tokens_p.mean_in_band": 0.10156926114236262, + "tokens_rate.above_band": 0.2779333955804544, + "tokens_rate.below_band": 0.0028011204481792717, + "tokens_rate.in_band": 0.7192654839713664 + }, + { + "epoch": 0.08521516829995739, + "grad_norm": 79.04932777844998, + "learning_rate": 3.999996915752588e-07, + "loss": 0.2101, + "step": 400, + "success_rate.epoch.env.abd": 0.9767441860465116, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9487179487179487, + "success_rate.epoch.env.logic": 0.8571428571428571, + "success_rate.epoch.env.math": 0.9298245614035088, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.6719745222929936, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.868279158388226, + "success_rate.epoch.global": 0.821608040201005, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9981986215538847, + "tokens_p.mean_in_band": 0.7042100694444444, + "tokens_rate.above_band": 0.9568345323741008, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.04316546762589928 + }, + { + "epoch": 0.08628035790370686, + "grad_norm": 151.24303411741707, + "learning_rate": 3.9999954290198994e-07, + "loss": 0.4614, + "step": 405, + "success_rate.epoch.env.abd": 0.9772727272727273, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9487179487179487, + "success_rate.epoch.env.logic": 0.8611111111111112, + "success_rate.epoch.env.math": 0.9301310043668122, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.6729559748427673, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8688050393616393, + "success_rate.epoch.global": 0.8225806451612904, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9983974358974359, + "tokens_p.mean_in_band": 0.716015625, + "tokens_rate.above_band": 0.9722991689750693, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.027700831024930747 + }, + { + "epoch": 0.08734554750745632, + "grad_norm": 83.87010578776845, + "learning_rate": 3.999993650773417e-07, + "loss": 0.5082, + "step": 410, + "success_rate.epoch.env.abd": 0.9772727272727273, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9487179487179487, + "success_rate.epoch.env.logic": 0.8611111111111112, + "success_rate.epoch.env.math": 0.9313304721030042, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.6718266253869969, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8688114137507685, + "success_rate.epoch.global": 0.8223039215686274, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6, + "success_rate.window.env_macro_mean": 0.8666666666666667, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9968297101449275, + "tokens_p.mean_in_band": 0.5022321428571429, + "tokens_rate.above_band": 0.9672897196261683, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03271028037383177 + }, + { + "epoch": 0.0884107371112058, + "grad_norm": 158.2557486788325, + "learning_rate": 3.999991581014437e-07, + "loss": 0.3502, + "step": 415, + "success_rate.epoch.env.abd": 0.9772727272727273, + "success_rate.epoch.env.agentgym:alfworld": 0.9565217391304348, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9487179487179487, + "success_rate.epoch.env.logic": 0.8623853211009175, + "success_rate.epoch.env.math": 0.9279661016949152, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.6717791411042945, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8646645132443549, + "success_rate.epoch.global": 0.8208232445520581, + "success_rate.window.env.agentgym:alfworld": 0.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.6666666666666666, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.7222222222222222, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9982092696629213, + "tokens_p.mean_in_band": 0.599365234375, + "tokens_rate.above_band": 0.9910913140311804, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.008908685968819599 + }, + { + "epoch": 0.08947592671495526, + "grad_norm": 89.24530838881262, + "learning_rate": 3.9999892197444665e-07, + "loss": 0.3589, + "step": 420, + "success_rate.epoch.env.abd": 0.9772727272727273, + "success_rate.epoch.env.agentgym:alfworld": 0.9583333333333334, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.95, + "success_rate.epoch.env.logic": 0.8623853211009175, + "success_rate.epoch.env.math": 0.9288702928870293, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.6707317073170732, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8649327316888861, + "success_rate.epoch.global": 0.8217703349282297, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9998679577464789, + "tokens_p.mean_in_band": 0.359375, + "tokens_rate.above_band": 0.9989447766443897, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.001055223355610271 + }, + { + "epoch": 0.09054111631870473, + "grad_norm": 774.5868925201706, + "learning_rate": 3.9999865669652263e-07, + "loss": 0.5007, + "step": 425, + "success_rate.epoch.env.abd": 0.9782608695652174, + "success_rate.epoch.env.agentgym:alfworld": 0.92, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9512195121951219, + "success_rate.epoch.env.logic": 0.8623853211009175, + "success_rate.epoch.env.math": 0.9291666666666667, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.6746987951807228, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8620361664886649, + "success_rate.epoch.global": 0.8226950354609929, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 0.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8333333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.999501329787234, + "tokens_p.mean_in_band": 0.6555989583333334, + "tokens_rate.above_band": 0.9936575052854123, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.006342494714587738 + }, + { + "epoch": 0.0916063059224542, + "grad_norm": 73.24465644591767, + "learning_rate": 3.9999836226786514e-07, + "loss": 0.3772, + "step": 430, + "success_rate.epoch.env.abd": 0.9787234042553191, + "success_rate.epoch.env.agentgym:alfworld": 0.92, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9534883720930233, + "success_rate.epoch.env.logic": 0.8623853211009175, + "success_rate.epoch.env.math": 0.9300411522633745, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.6776119402985075, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8626288051525282, + "success_rate.epoch.global": 0.8247663551401869, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9974628712871287, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 0.09267149552620367, + "grad_norm": 78.03466765787174, + "learning_rate": 3.9999803868868867e-07, + "loss": 0.3244, + "step": 435, + "success_rate.epoch.env.abd": 0.9787234042553191, + "success_rate.epoch.env.agentgym:alfworld": 0.9230769230769231, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9534883720930233, + "success_rate.epoch.env.logic": 0.8660714285714286, + "success_rate.epoch.env.math": 0.9317269076305221, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.6776119402985075, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8633968765993082, + "success_rate.epoch.global": 0.8267898383371824, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9952042079207921, + "tokens_p.mean_in_band": 0.7994791666666666, + "tokens_rate.above_band": 0.9711538461538461, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.028846153846153848 + }, + { + "epoch": 0.09373668512995313, + "grad_norm": 51.250560278260515, + "learning_rate": 3.99997685959229e-07, + "loss": 0.3209, + "step": 440, + "success_rate.epoch.env.abd": 0.9787234042553191, + "success_rate.epoch.env.agentgym:alfworld": 0.9230769230769231, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9545454545454546, + "success_rate.epoch.env.logic": 0.8660714285714286, + "success_rate.epoch.env.math": 0.932806324110672, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.6794117647058824, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8637547241756679, + "success_rate.epoch.global": 0.8276255707762558, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9950850938967136, + "tokens_p.mean_in_band": 0.671875, + "tokens_rate.above_band": 0.9594594594594594, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.04054054054054054 + }, + { + "epoch": 0.09480187473370259, + "grad_norm": 181.4886482421037, + "learning_rate": 3.999973040797433e-07, + "loss": 0.2428, + "step": 445, + "success_rate.epoch.env.abd": 0.9787234042553191, + "success_rate.epoch.env.agentgym:alfworld": 0.9230769230769231, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9545454545454546, + "success_rate.epoch.env.logic": 0.8660714285714286, + "success_rate.epoch.env.math": 0.93359375, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.6820809248554913, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8640689592701167, + "success_rate.epoch.global": 0.8284424379232506, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env_macro_mean": 0.9444444444444445, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9949919871794872, + "tokens_p.mean_in_band": 0.5953480113636364, + "tokens_rate.above_band": 0.9341317365269461, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0658682634730539 + }, + { + "epoch": 0.09586706433745207, + "grad_norm": 110.38045176779245, + "learning_rate": 3.999968930505097e-07, + "loss": 0.505, + "step": 450, + "success_rate.epoch.env.abd": 0.9787234042553191, + "success_rate.epoch.env.agentgym:alfworld": 0.9259259259259259, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9333333333333333, + "success_rate.epoch.env.logic": 0.8660714285714286, + "success_rate.epoch.env.math": 0.9341085271317829, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.6818181818181818, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8624224970638761, + "success_rate.epoch.global": 0.8270089285714286, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.6666666666666666, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9923349056603774, + "tokens_p.mean_in_band": 0.6490234375, + "tokens_rate.above_band": 0.8793363499245852, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.12066365007541478 + }, + { + "epoch": 0.09693225394120153, + "grad_norm": 51.92290806746458, + "learning_rate": 3.999964528718279e-07, + "loss": 0.3702, + "step": 455, + "success_rate.epoch.env.abd": 0.9787234042553191, + "success_rate.epoch.env.agentgym:alfworld": 0.9285714285714286, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9333333333333333, + "success_rate.epoch.env.logic": 0.8660714285714286, + "success_rate.epoch.env.math": 0.9348659003831418, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.6853932584269663, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8630568563825712, + "success_rate.epoch.global": 0.8289183222958058, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9983591885441527, + "tokens_p.mean_in_band": 0.7330729166666666, + "tokens_rate.above_band": 0.9858823529411764, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.01411764705882353 + }, + { + "epoch": 0.097997443544951, + "grad_norm": 269.1855873337498, + "learning_rate": 3.9999598354401874e-07, + "loss": 0.583, + "step": 460, + "success_rate.epoch.env.abd": 0.9787234042553191, + "success_rate.epoch.env.agentgym:alfworld": 0.9285714285714286, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9347826086956522, + "success_rate.epoch.env.logic": 0.8596491228070176, + "success_rate.epoch.env.math": 0.935361216730038, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.6861111111111111, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8627150508033847, + "success_rate.epoch.global": 0.8286026200873362, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.85, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9974735086342229, + "tokens_p.mean_in_band": 0.6817908653846154, + "tokens_rate.above_band": 0.98989898989899, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.010101010101010102 + }, + { + "epoch": 0.09906263314870047, + "grad_norm": 202.27025054914114, + "learning_rate": 3.9999548506742416e-07, + "loss": 0.3018, + "step": 465, + "success_rate.epoch.env.abd": 0.9787234042553191, + "success_rate.epoch.env.agentgym:alfworld": 0.9285714285714286, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9347826086956522, + "success_rate.epoch.env.logic": 0.8596491228070176, + "success_rate.epoch.env.math": 0.9360902255639098, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.6866485013623979, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8628301779929447, + "success_rate.epoch.global": 0.82829373650108, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.7142857142857143, + "success_rate.window.env_macro_mean": 0.8571428571428572, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.990530303030303, + "tokens_p.mean_in_band": 0.6188616071428571, + "tokens_rate.above_band": 0.8761061946902655, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.12389380530973451 + }, + { + "epoch": 0.10012782275244994, + "grad_norm": 114.40305866456586, + "learning_rate": 3.9999495744240743e-07, + "loss": 0.2159, + "step": 470, + "success_rate.epoch.env.abd": 0.9787234042553191, + "success_rate.epoch.env.agentgym:alfworld": 0.9, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9347826086956522, + "success_rate.epoch.env.logic": 0.8608695652173913, + "success_rate.epoch.env.math": 0.9363295880149812, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.6916890080428955, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.860823712808446, + "success_rate.epoch.global": 0.8290598290598291, + "success_rate.window.env.agentgym:alfworld": 0.5, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9988591269841269, + "tokens_p.mean_in_band": 0.4375, + "tokens_rate.above_band": 0.9984152139461173, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.001584786053882726 + }, + { + "epoch": 0.1011930123561994, + "grad_norm": 83.56720179963696, + "learning_rate": 3.9999440066935317e-07, + "loss": 0.4819, + "step": 475, + "success_rate.epoch.env.abd": 0.9787234042553191, + "success_rate.epoch.env.agentgym:alfworld": 0.9, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9361702127659575, + "success_rate.epoch.env.logic": 0.8620689655172413, + "success_rate.epoch.env.math": 0.937037037037037, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.6904761904761905, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8610129524289466, + "success_rate.epoch.global": 0.828752642706131, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6, + "success_rate.window.env_macro_mean": 0.9, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9935416666666667, + "tokens_p.mean_in_band": 0.58203125, + "tokens_rate.above_band": 0.9463722397476341, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.05362776025236593 + }, + { + "epoch": 0.10225820195994888, + "grad_norm": 74.71988528002478, + "learning_rate": 3.9999381474866716e-07, + "loss": 0.3764, + "step": 480, + "success_rate.epoch.env.abd": 0.9787234042553191, + "success_rate.epoch.env.agentgym:alfworld": 0.9, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9375, + "success_rate.epoch.env.logic": 0.85, + "success_rate.epoch.env.math": 0.9375, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.6894736842105263, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8599876141029555, + "success_rate.epoch.global": 0.8274058577405857, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.8, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.999594395280236, + "tokens_p.mean_in_band": 0.47119140625, + "tokens_rate.above_band": 0.9814707585408222, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.018529241459177764 + }, + { + "epoch": 0.10332339156369834, + "grad_norm": 94.28729333594796, + "learning_rate": 3.9999319968077624e-07, + "loss": 0.3541, + "step": 485, + "success_rate.epoch.env.abd": 0.9791666666666666, + "success_rate.epoch.env.agentgym:alfworld": 0.9032258064516129, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9375, + "success_rate.epoch.env.logic": 0.85, + "success_rate.epoch.env.math": 0.9347826086956522, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.6901041666666666, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8601314468315695, + "success_rate.epoch.global": 0.8271221532091098, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.math": 0.75, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9982839595375722, + "tokens_p.mean_in_band": 0.6754807692307693, + "tokens_rate.above_band": 0.9637883008356546, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.036211699164345405 + }, + { + "epoch": 0.1043885811674478, + "grad_norm": 51.53614996122157, + "learning_rate": 3.999925554661289e-07, + "loss": 0.2195, + "step": 490, + "success_rate.epoch.env.abd": 0.98, + "success_rate.epoch.env.agentgym:alfworld": 0.9032258064516129, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9375, + "success_rate.epoch.env.logic": 0.8524590163934426, + "success_rate.epoch.env.math": 0.9357142857142857, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.6909090909090909, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8605886241941, + "success_rate.epoch.global": 0.8288934426229508, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.999047256097561, + "tokens_p.mean_in_band": 0.7209821428571429, + "tokens_rate.above_band": 0.9590643274853801, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.04093567251461988 + }, + { + "epoch": 0.10545377077119727, + "grad_norm": 546.5049464676177, + "learning_rate": 3.999918821051945e-07, + "loss": 0.5175, + "step": 495, + "success_rate.epoch.env.abd": 0.98, + "success_rate.epoch.env.agentgym:alfworld": 0.9032258064516129, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9387755102040817, + "success_rate.epoch.env.logic": 0.856, + "success_rate.epoch.env.math": 0.9361702127659575, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.6889460154241646, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8608894737738619, + "success_rate.epoch.global": 0.8286004056795132, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9961295871559633, + "tokens_p.mean_in_band": 0.3876201923076923, + "tokens_rate.above_band": 0.8934426229508197, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.10655737704918032 + }, + { + "epoch": 0.10651896037494674, + "grad_norm": 259.1329333652082, + "learning_rate": 3.999911795984638e-07, + "loss": 0.3655, + "step": 500, + "success_rate.epoch.env.abd": 0.9807692307692307, + "success_rate.epoch.env.agentgym:alfworld": 0.9032258064516129, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9387755102040817, + "success_rate.epoch.env.logic": 0.8582677165354331, + "success_rate.epoch.env.math": 0.9366197183098591, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.6895674300254453, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8612629162693026, + "success_rate.epoch.global": 0.8293172690763052, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9375, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9987847222222223, + "tokens_p.mean_in_band": 0.772265625, + "tokens_rate.above_band": 0.9473684210526315, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.05263157894736842 + }, + { + "epoch": 0.10758414997869621, + "grad_norm": 125.78633647258418, + "learning_rate": 3.999904479464488e-07, + "loss": 0.367, + "step": 505, + "success_rate.epoch.env.abd": 0.9807692307692307, + "success_rate.epoch.env.agentgym:alfworld": 0.9032258064516129, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9387755102040817, + "success_rate.epoch.env.logic": 0.8515625, + "success_rate.epoch.env.math": 0.9372822299651568, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.6917293233082706, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8609101152150017, + "success_rate.epoch.global": 0.8290258449304175, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env_macro_mean": 0.6111111111111112, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9924395161290323, + "tokens_p.mean_in_band": 0.6408420138888888, + "tokens_rate.above_band": 0.9117647058823529, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.08823529411764706 + }, + { + "epoch": 0.10864933958244567, + "grad_norm": 83.79745537983887, + "learning_rate": 3.999896871496827e-07, + "loss": 0.3141, + "step": 510, + "success_rate.epoch.env.abd": 0.9807692307692307, + "success_rate.epoch.env.agentgym:alfworld": 0.90625, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9411764705882353, + "success_rate.epoch.env.logic": 0.8527131782945736, + "success_rate.epoch.env.math": 0.9377162629757786, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.6940298507462687, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8617565145491594, + "success_rate.epoch.global": 0.8307086614173228, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9994205809641533, + "tokens_p.mean_in_band": 0.5953125, + "tokens_rate.above_band": 0.9938574938574939, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.006142506142506142 + }, + { + "epoch": 0.10971452918619515, + "grad_norm": 92.02197236937367, + "learning_rate": 3.9998889720872003e-07, + "loss": 0.4128, + "step": 515, + "success_rate.epoch.env.abd": 0.9807692307692307, + "success_rate.epoch.env.agentgym:alfworld": 0.90625, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9444444444444444, + "success_rate.epoch.env.logic": 0.8549618320610687, + "success_rate.epoch.env.math": 0.9379310344827586, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.6921182266009852, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8621037668204685, + "success_rate.epoch.global": 0.8304093567251462, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9975524664647338, + "tokens_p.mean_in_band": 0.451171875, + "tokens_rate.above_band": 0.9948342660352992, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.005165733964700818 + }, + { + "epoch": 0.11077971878994461, + "grad_norm": 66.75270647588353, + "learning_rate": 3.9998807812413637e-07, + "loss": 0.2742, + "step": 520, + "success_rate.epoch.env.abd": 0.9811320754716981, + "success_rate.epoch.env.agentgym:alfworld": 0.9090909090909091, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9454545454545454, + "success_rate.epoch.env.logic": 0.8571428571428571, + "success_rate.epoch.env.math": 0.9383561643835616, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.6919315403422983, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8627067962320488, + "success_rate.epoch.global": 0.831081081081081, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9444444444444443, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9973300970873786, + "tokens_p.mean_in_band": 0.7044270833333334, + "tokens_rate.above_band": 0.9942084942084942, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.005791505791505791 + }, + { + "epoch": 0.11184490839369408, + "grad_norm": 274.2701795228874, + "learning_rate": 3.9998722989652877e-07, + "loss": 0.5266, + "step": 525, + "success_rate.epoch.env.abd": 0.9811320754716981, + "success_rate.epoch.env.agentgym:alfworld": 0.9142857142857143, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9464285714285714, + "success_rate.epoch.env.logic": 0.8582089552238806, + "success_rate.epoch.env.math": 0.9387755102040817, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.6934306569343066, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8635389227468108, + "success_rate.epoch.global": 0.8326959847036329, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9997073126801153, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 0.11291009799744355, + "grad_norm": 84.57627879830797, + "learning_rate": 3.999863525265154e-07, + "loss": 0.3129, + "step": 530, + "success_rate.epoch.env.abd": 0.9811320754716981, + "success_rate.epoch.env.agentgym:alfworld": 0.9142857142857143, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9464285714285714, + "success_rate.epoch.env.logic": 0.8592592592592593, + "success_rate.epoch.env.math": 0.9389830508474576, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.6968973747016707, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8639684284237308, + "success_rate.epoch.global": 0.8333333333333334, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.875, + "success_rate.window.env_macro_mean": 0.9583333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9933035714285714, + "tokens_p.mean_in_band": 0.5524553571428571, + "tokens_rate.above_band": 0.9565217391304348, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.043478260869565216 + }, + { + "epoch": 0.113975287601193, + "grad_norm": 85.09942168277358, + "learning_rate": 3.9998544601473564e-07, + "loss": 0.4208, + "step": 535, + "success_rate.epoch.env.abd": 0.9818181818181818, + "success_rate.epoch.env.agentgym:alfworld": 0.9142857142857143, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9464285714285714, + "success_rate.epoch.env.logic": 0.8602941176470589, + "success_rate.epoch.env.math": 0.9389830508474576, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.6901408450704225, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8635106497967339, + "success_rate.epoch.global": 0.8302063789868668, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.science": 0.2857142857142857, + "success_rate.window.env_macro_mean": 0.7619047619047619, + "success_rate.window.global": 0.5, + "tokens_p.mean_above_band": 0.9979304635761589, + "tokens_p.mean_in_band": 0.4636314655172414, + "tokens_rate.above_band": 0.8388888888888889, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.16111111111111112 + }, + { + "epoch": 0.11504047720494248, + "grad_norm": 73.6428983207297, + "learning_rate": 3.9998451036185016e-07, + "loss": 0.494, + "step": 540, + "success_rate.epoch.env.abd": 0.9818181818181818, + "success_rate.epoch.env.agentgym:alfworld": 0.9166666666666666, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9491525423728814, + "success_rate.epoch.env.logic": 0.8613138686131386, + "success_rate.epoch.env.math": 0.9391891891891891, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.6906976744186046, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8641367990677572, + "success_rate.epoch.global": 0.8308550185873605, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9984691722972973, + "tokens_p.mean_in_band": 0.6423611111111112, + "tokens_rate.above_band": 0.9924559932942163, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0075440067057837385 + }, + { + "epoch": 0.11610566680869194, + "grad_norm": 87.36311953159915, + "learning_rate": 3.9998354556854086e-07, + "loss": 0.3509, + "step": 545, + "success_rate.epoch.env.abd": 0.9818181818181818, + "success_rate.epoch.env.agentgym:alfworld": 0.9166666666666666, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.95, + "success_rate.epoch.env.logic": 0.8633093525179856, + "success_rate.epoch.env.math": 0.9403973509933775, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.6906976744186046, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8645050811892255, + "success_rate.epoch.global": 0.8324125230202578, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9983552631578947, + "tokens_p.mean_in_band": 0.88671875, + "tokens_rate.above_band": 0.9988814317673378, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0011185682326621924 + }, + { + "epoch": 0.11717085641244142, + "grad_norm": 99.39985706178769, + "learning_rate": 3.999825516355109e-07, + "loss": 0.4542, + "step": 550, + "success_rate.epoch.env.abd": 0.9818181818181818, + "success_rate.epoch.env.agentgym:alfworld": 0.9166666666666666, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.95, + "success_rate.epoch.env.logic": 0.8671328671328671, + "success_rate.epoch.env.math": 0.9407894736842105, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.6921296296296297, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8628539965262454, + "success_rate.epoch.global": 0.833029197080292, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9972222222222222, + "tokens_p.mean_in_band": 0.5642361111111112, + "tokens_rate.above_band": 0.967741935483871, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03225806451612903 + }, + { + "epoch": 0.11823604601619088, + "grad_norm": 608.1477838662232, + "learning_rate": 3.9998152856348464e-07, + "loss": 0.6797, + "step": 555, + "success_rate.epoch.env.abd": 0.9821428571428571, + "success_rate.epoch.env.agentgym:alfworld": 0.9166666666666666, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.95, + "success_rate.epoch.env.logic": 0.8698630136986302, + "success_rate.epoch.env.math": 0.9413680781758957, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.6942528735632184, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8633773301913101, + "success_rate.epoch.global": 0.8345388788426763, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9948529411764706, + "tokens_p.mean_in_band": 0.865234375, + "tokens_rate.above_band": 0.9770114942528736, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.022988505747126436 + }, + { + "epoch": 0.11930123561994035, + "grad_norm": 132.18093575713627, + "learning_rate": 3.999804763532076e-07, + "loss": 0.4695, + "step": 560, + "success_rate.epoch.env.abd": 0.9827586206896551, + "success_rate.epoch.env.agentgym:alfworld": 0.9166666666666666, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9508196721311475, + "success_rate.epoch.env.logic": 0.8648648648648649, + "success_rate.epoch.env.math": 0.9419354838709677, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.6933638443935927, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8630242086794578, + "success_rate.epoch.global": 0.8342293906810035, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.8, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9990693573667712, + "tokens_p.mean_in_band": 0.6654829545454546, + "tokens_rate.above_band": 0.9830508474576272, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.01694915254237288 + }, + { + "epoch": 0.12036642522368982, + "grad_norm": 410.0190897977076, + "learning_rate": 3.999793950054468e-07, + "loss": 0.5749, + "step": 565, + "success_rate.epoch.env.abd": 0.9827586206896551, + "success_rate.epoch.env.agentgym:alfworld": 0.918918918918919, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9508196721311475, + "success_rate.epoch.env.logic": 0.8657718120805369, + "success_rate.epoch.env.math": 0.9421221864951769, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.6968325791855203, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8636437211234635, + "success_rate.epoch.global": 0.8357015985790408, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9980134474327629, + "tokens_p.mean_in_band": 0.7724609375, + "tokens_rate.above_band": 0.9951338199513382, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.004866180048661801 + }, + { + "epoch": 0.12143161482743929, + "grad_norm": 109.44767367759921, + "learning_rate": 3.9997828452099015e-07, + "loss": 0.4051, + "step": 570, + "success_rate.epoch.env.abd": 0.9827586206896551, + "success_rate.epoch.env.agentgym:alfworld": 0.918918918918919, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9508196721311475, + "success_rate.epoch.env.logic": 0.8618421052631579, + "success_rate.epoch.env.math": 0.9430379746835443, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.6952595936794582, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8632267298384568, + "success_rate.epoch.global": 0.835387323943662, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 0.6666666666666666, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.0, + "success_rate.window.env_macro_mean": 0.6666666666666666, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9953088914549654, + "tokens_p.mean_in_band": 0.6181640625, + "tokens_rate.above_band": 0.9474835886214442, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0525164113785558 + }, + { + "epoch": 0.12249680443118875, + "grad_norm": 223.9658910833248, + "learning_rate": 3.9997714490064704e-07, + "loss": 0.4545, + "step": 575, + "success_rate.epoch.env.abd": 0.9833333333333333, + "success_rate.epoch.env.agentgym:alfworld": 0.9230769230769231, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9508196721311475, + "success_rate.epoch.env.logic": 0.8618421052631579, + "success_rate.epoch.env.math": 0.943217665615142, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.697986577181208, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8639212199507322, + "success_rate.epoch.global": 0.8368237347294939, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9981589147286821, + "tokens_p.mean_in_band": 0.7375, + "tokens_rate.above_band": 0.9923076923076923, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.007692307692307693 + }, + { + "epoch": 0.12356199403493821, + "grad_norm": 94.30895427673707, + "learning_rate": 3.9997597614524807e-07, + "loss": 0.3034, + "step": 580, + "success_rate.epoch.env.abd": 0.9833333333333333, + "success_rate.epoch.env.agentgym:alfworld": 0.9230769230769231, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9508196721311475, + "success_rate.epoch.env.logic": 0.8653846153846154, + "success_rate.epoch.env.math": 0.9435736677115988, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.6962305986696231, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8641159957422168, + "success_rate.epoch.global": 0.8365051903114187, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.8333333333333334, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9942781690140845, + "tokens_p.mean_in_band": 0.6307091346153846, + "tokens_rate.above_band": 0.8452380952380952, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.15476190476190477 + }, + { + "epoch": 0.12462718363868769, + "grad_norm": 34.76815656056043, + "learning_rate": 3.999747782556449e-07, + "loss": 0.119, + "step": 585, + "success_rate.epoch.env.abd": 0.9833333333333333, + "success_rate.epoch.env.agentgym:alfworld": 0.9230769230769231, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9508196721311475, + "success_rate.epoch.env.logic": 0.8662420382165605, + "success_rate.epoch.env.math": 0.9444444444444444, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.698237885462555, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8644555854111006, + "success_rate.epoch.global": 0.8379073756432247, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 1.0006167763157894, + "tokens_p.mean_in_band": 0.8385416666666666, + "tokens_rate.above_band": 0.9960681520314548, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.003931847968545216 + }, + { + "epoch": 0.12569237324243715, + "grad_norm": 113.48498229136952, + "learning_rate": 3.999735512327106e-07, + "loss": 0.4068, + "step": 590, + "success_rate.epoch.env.abd": 0.9833333333333333, + "success_rate.epoch.env.agentgym:alfworld": 0.9230769230769231, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9516129032258065, + "success_rate.epoch.env.logic": 0.8670886075949367, + "success_rate.epoch.env.math": 0.9420731707317073, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.6980306345733042, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8643702468539232, + "success_rate.epoch.global": 0.8375850340136054, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.75, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 0.8833333333333332, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9980517456359103, + "tokens_p.mean_in_band": 0.6691080729166666, + "tokens_rate.above_band": 0.9852579852579852, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.014742014742014743 + }, + { + "epoch": 0.1267575628461866, + "grad_norm": 144.78465893134438, + "learning_rate": 3.9997229507733947e-07, + "loss": 0.4542, + "step": 595, + "success_rate.epoch.env.abd": 0.9833333333333333, + "success_rate.epoch.env.agentgym:alfworld": 0.9230769230769231, + "success_rate.epoch.env.agentgym:sciworld": 0.972972972972973, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9516129032258065, + "success_rate.epoch.env.logic": 0.8695652173913043, + "success_rate.epoch.env.math": 0.9427710843373494, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.6993464052287581, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8623214529475992, + "success_rate.epoch.global": 0.8381112984822934, + "success_rate.window.env.agentgym:sciworld": 0.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.75, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9990511133603239, + "tokens_p.mean_below_band": 2.455635694786906e-10, + "tokens_p.mean_in_band": 0.857421875, + "tokens_rate.above_band": 0.993963782696177, + "tokens_rate.below_band": 0.002012072434607646, + "tokens_rate.in_band": 0.004024144869215292 + }, + { + "epoch": 0.1278227524499361, + "grad_norm": 63.654772111912095, + "learning_rate": 3.999710097904469e-07, + "loss": 0.4511, + "step": 600, + "success_rate.epoch.env.abd": 0.9833333333333333, + "success_rate.epoch.env.agentgym:alfworld": 0.9230769230769231, + "success_rate.epoch.env.agentgym:sciworld": 0.972972972972973, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9523809523809523, + "success_rate.epoch.env.logic": 0.8703703703703703, + "success_rate.epoch.env.math": 0.9427710843373494, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7044967880085653, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8629326879397826, + "success_rate.epoch.global": 0.8394648829431438, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9976092896174863, + "tokens_p.mean_in_band": 0.783203125, + "tokens_rate.above_band": 0.9682539682539683, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.031746031746031744 + }, + { + "epoch": 0.12888794205368556, + "grad_norm": 70.54572223671359, + "learning_rate": 3.9996969537296963e-07, + "loss": 0.243, + "step": 605, + "success_rate.epoch.env.abd": 0.9836065573770492, + "success_rate.epoch.env.agentgym:alfworld": 0.9230769230769231, + "success_rate.epoch.env.agentgym:sciworld": 0.972972972972973, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9523809523809523, + "success_rate.epoch.env.logic": 0.8727272727272727, + "success_rate.epoch.env.math": 0.9436201780415431, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7029914529914529, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8631121320386644, + "success_rate.epoch.global": 0.8399668325041459, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.0, + "success_rate.window.env_macro_mean": 0.75, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.998282967032967, + "tokens_p.mean_in_band": 0.640625, + "tokens_rate.above_band": 0.9680851063829787, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.031914893617021274 + }, + { + "epoch": 0.12995313165743502, + "grad_norm": 34.321238508916935, + "learning_rate": 3.9996835182586565e-07, + "loss": 0.2804, + "step": 610, + "success_rate.epoch.env.abd": 0.9838709677419355, + "success_rate.epoch.env.agentgym:alfworld": 0.9230769230769231, + "success_rate.epoch.env.agentgym:sciworld": 0.972972972972973, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.953125, + "success_rate.epoch.env.logic": 0.8734939759036144, + "success_rate.epoch.env.math": 0.9441176470588235, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.701271186440678, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8631623469138264, + "success_rate.epoch.global": 0.8396381578947368, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.9, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9968603971962616, + "tokens_p.mean_in_band": 0.435546875, + "tokens_rate.above_band": 0.9553571428571429, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.044642857142857144 + }, + { + "epoch": 0.13101832126118448, + "grad_norm": 99.86068617393661, + "learning_rate": 3.9996697915011404e-07, + "loss": 0.5902, + "step": 615, + "success_rate.epoch.env.abd": 0.9841269841269841, + "success_rate.epoch.env.agentgym:alfworld": 0.9230769230769231, + "success_rate.epoch.env.agentgym:sciworld": 0.972972972972973, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.953125, + "success_rate.epoch.env.logic": 0.874251497005988, + "success_rate.epoch.env.math": 0.9446064139941691, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7044025157232704, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8635835863415864, + "success_rate.epoch.global": 0.8409461663947798, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9907945736434108, + "tokens_p.mean_in_band": 0.85, + "tokens_rate.above_band": 0.9626865671641791, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03731343283582089 + }, + { + "epoch": 0.13208351086493395, + "grad_norm": 59.76441666171027, + "learning_rate": 3.999655773467152e-07, + "loss": 0.254, + "step": 620, + "success_rate.epoch.env.abd": 0.984375, + "success_rate.epoch.env.agentgym:alfworld": 0.9230769230769231, + "success_rate.epoch.env.agentgym:sciworld": 0.972972972972973, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.953125, + "success_rate.epoch.env.logic": 0.875, + "success_rate.epoch.env.math": 0.9455587392550143, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.702928870292887, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8636267862231765, + "success_rate.epoch.global": 0.8414239482200647, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.0, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 0.8, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9993489583333334, + "tokens_p.mean_in_band": 0.4973958333333333, + "tokens_rate.above_band": 0.9896907216494846, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.010309278350515464 + }, + { + "epoch": 0.13314870046868343, + "grad_norm": 109.9478775600475, + "learning_rate": 3.9996414641669086e-07, + "loss": 0.5104, + "step": 625, + "success_rate.epoch.env.abd": 0.984375, + "success_rate.epoch.env.agentgym:alfworld": 0.9230769230769231, + "success_rate.epoch.env.agentgym:sciworld": 0.972972972972973, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.953125, + "success_rate.epoch.env.logic": 0.8757396449704142, + "success_rate.epoch.env.math": 0.9455587392550143, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.702258726899384, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8636331045483501, + "success_rate.epoch.global": 0.8402889245585875, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.8333333333333333, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9866504854368932, + "tokens_p.mean_in_band": 0.5562160326086957, + "tokens_rate.above_band": 0.8174603174603174, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.18253968253968253 + }, + { + "epoch": 0.1342138900724329, + "grad_norm": 260.60151656044053, + "learning_rate": 3.999626863610838e-07, + "loss": 0.3138, + "step": 630, + "success_rate.epoch.env.abd": 0.984375, + "success_rate.epoch.env.agentgym:alfworld": 0.9285714285714286, + "success_rate.epoch.env.agentgym:sciworld": 0.972972972972973, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9538461538461539, + "success_rate.epoch.env.logic": 0.8771929824561403, + "success_rate.epoch.env.math": 0.9458689458689459, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7014314928425358, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8642832835832109, + "success_rate.epoch.global": 0.8407643312101911, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.9, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9977854330708661, + "tokens_p.mean_in_band": 0.3307291666666667, + "tokens_rate.above_band": 0.9970559371933267, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.002944062806673209 + }, + { + "epoch": 0.13527907967618236, + "grad_norm": 217.6153973733924, + "learning_rate": 3.9996119718095804e-07, + "loss": 0.4998, + "step": 635, + "success_rate.epoch.env.abd": 0.984375, + "success_rate.epoch.env.agentgym:alfworld": 0.9285714285714286, + "success_rate.epoch.env.agentgym:sciworld": 0.972972972972973, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9538461538461539, + "success_rate.epoch.env.logic": 0.8786127167630058, + "success_rate.epoch.env.math": 0.9463276836158192, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.6997971602434077, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.862682101455708, + "success_rate.epoch.global": 0.839652448657188, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.625, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9925271739130435, + "tokens_p.mean_in_band": 0.4596354166666667, + "tokens_rate.above_band": 0.8518518518518519, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.14814814814814814 + }, + { + "epoch": 0.13634426927993182, + "grad_norm": 909.8461633131893, + "learning_rate": 3.99959678877399e-07, + "loss": 0.3338, + "step": 640, + "success_rate.epoch.env.abd": 0.984375, + "success_rate.epoch.env.agentgym:alfworld": 0.9285714285714286, + "success_rate.epoch.env.agentgym:sciworld": 0.972972972972973, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9545454545454546, + "success_rate.epoch.env.logic": 0.8806818181818182, + "success_rate.epoch.env.math": 0.9466292134831461, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.6995967741935484, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8629429692680336, + "success_rate.epoch.global": 0.8401253918495298, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9993470149253731, + "tokens_p.mean_in_band": 0.5807291666666666, + "tokens_rate.above_band": 0.9955423476968797, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.004457652303120356 + }, + { + "epoch": 0.1374094588836813, + "grad_norm": 54.04995690793635, + "learning_rate": 3.999581314515131e-07, + "loss": 0.3359, + "step": 645, + "success_rate.epoch.env.abd": 0.9846153846153847, + "success_rate.epoch.env.agentgym:alfworld": 0.9285714285714286, + "success_rate.epoch.env.agentgym:sciworld": 0.972972972972973, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9545454545454546, + "success_rate.epoch.env.logic": 0.8820224719101124, + "success_rate.epoch.env.math": 0.947075208913649, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.702, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8633457201390002, + "success_rate.epoch.global": 0.8413685847589425, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9949252136752137, + "tokens_p.mean_in_band": 0.673828125, + "tokens_rate.above_band": 0.9831932773109243, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.01680672268907563 + }, + { + "epoch": 0.13847464848743077, + "grad_norm": 122.77730185305981, + "learning_rate": 3.999565549044282e-07, + "loss": 0.4627, + "step": 650, + "success_rate.epoch.env.abd": 0.9846153846153847, + "success_rate.epoch.env.agentgym:alfworld": 0.9285714285714286, + "success_rate.epoch.env.agentgym:sciworld": 0.972972972972973, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9552238805970149, + "success_rate.epoch.env.logic": 0.8820224719101124, + "success_rate.epoch.env.math": 0.9475138121546961, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.7015810276679841, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8634091798626904, + "success_rate.epoch.global": 0.8410493827160493, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.8888888888888888, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9874174917491749, + "tokens_p.mean_in_band": 0.6307291666666667, + "tokens_rate.above_band": 0.9528301886792453, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.04716981132075472 + }, + { + "epoch": 0.13953983809118023, + "grad_norm": 241.3829237230056, + "learning_rate": 3.9995494923729314e-07, + "loss": 0.693, + "step": 655, + "success_rate.epoch.env.abd": 0.9846153846153847, + "success_rate.epoch.env.agentgym:alfworld": 0.9285714285714286, + "success_rate.epoch.env.agentgym:sciworld": 0.972972972972973, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9558823529411765, + "success_rate.epoch.env.logic": 0.8833333333333333, + "success_rate.epoch.env.math": 0.9480874316939891, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.7033398821218074, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.863800253295463, + "success_rate.epoch.global": 0.8422664624808576, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9967350746268657, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 0.1406050276949297, + "grad_norm": 60.80686084518257, + "learning_rate": 3.999533144512783e-07, + "loss": 0.3258, + "step": 660, + "success_rate.epoch.env.abd": 0.9846153846153847, + "success_rate.epoch.env.agentgym:alfworld": 0.9285714285714286, + "success_rate.epoch.env.agentgym:sciworld": 0.9736842105263158, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9565217391304348, + "success_rate.epoch.env.logic": 0.8852459016393442, + "success_rate.epoch.env.math": 0.9483695652173914, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.703125, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8641030208818453, + "success_rate.epoch.global": 0.8427051671732523, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9980867346938775, + "tokens_p.mean_in_band": 0.5651041666666666, + "tokens_rate.above_band": 0.9879032258064516, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.012096774193548387 + }, + { + "epoch": 0.14167021729867915, + "grad_norm": 91.79210271315232, + "learning_rate": 3.9995165054757497e-07, + "loss": 0.4259, + "step": 665, + "success_rate.epoch.env.abd": 0.9846153846153847, + "success_rate.epoch.env.agentgym:alfworld": 0.9285714285714286, + "success_rate.epoch.env.agentgym:sciworld": 0.9736842105263158, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9571428571428572, + "success_rate.epoch.env.logic": 0.8852459016393442, + "success_rate.epoch.env.math": 0.9487870619946092, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.7001934235976789, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8639309334625108, + "success_rate.epoch.global": 0.8416289592760181, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.4, + "success_rate.window.env_macro_mean": 0.85, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9983747044917257, + "tokens_p.mean_below_band": 3.655441105365753e-08, + "tokens_p.mean_in_band": 0.44345238095238093, + "tokens_rate.above_band": 0.950561797752809, + "tokens_rate.below_band": 0.0022471910112359553, + "tokens_rate.in_band": 0.04719101123595506 + }, + { + "epoch": 0.14273540690242864, + "grad_norm": 246.6225688094241, + "learning_rate": 3.9994995752739583e-07, + "loss": 0.4846, + "step": 670, + "success_rate.epoch.env.abd": 0.9848484848484849, + "success_rate.epoch.env.agentgym:alfworld": 0.9285714285714286, + "success_rate.epoch.env.agentgym:sciworld": 0.9736842105263158, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9571428571428572, + "success_rate.epoch.env.logic": 0.8858695652173914, + "success_rate.epoch.env.math": 0.9493333333333334, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.7005758157389635, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8640932450344341, + "success_rate.epoch.global": 0.8420658682634731, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9375, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9915303738317757, + "tokens_p.mean_in_band": 0.6967329545454546, + "tokens_rate.above_band": 0.9067796610169492, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.09322033898305085 + }, + { + "epoch": 0.1438005965061781, + "grad_norm": 184.56099259093554, + "learning_rate": 3.9994823539197464e-07, + "loss": 0.316, + "step": 675, + "success_rate.epoch.env.abd": 0.9848484848484849, + "success_rate.epoch.env.agentgym:alfworld": 0.9285714285714286, + "success_rate.epoch.env.agentgym:sciworld": 0.9736842105263158, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9577464788732394, + "success_rate.epoch.env.logic": 0.8864864864864865, + "success_rate.epoch.env.math": 0.9498680738786279, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.6984732824427481, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8640616768752118, + "success_rate.epoch.global": 0.8417533432392273, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.3333333333333333, + "success_rate.window.env_macro_mean": 0.8666666666666668, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9989472517730497, + "tokens_p.mean_in_band": 0.5361328125, + "tokens_rate.above_band": 0.986013986013986, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.013986013986013986 + }, + { + "epoch": 0.14486578610992756, + "grad_norm": 199.511712911517, + "learning_rate": 3.999464841425667e-07, + "loss": 0.4566, + "step": 680, + "success_rate.epoch.env.abd": 0.9852941176470589, + "success_rate.epoch.env.agentgym:alfworld": 0.9285714285714286, + "success_rate.epoch.env.agentgym:sciworld": 0.9743589743589743, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9577464788732394, + "success_rate.epoch.env.logic": 0.8823529411764706, + "success_rate.epoch.env.math": 0.9502617801047121, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.6996197718631179, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.863927772054091, + "success_rate.epoch.global": 0.8421828908554573, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9982244318181818, + "tokens_p.mean_in_band": 0.5126953125, + "tokens_rate.above_band": 0.9777777777777777, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.022222222222222223 + }, + { + "epoch": 0.14593097571367702, + "grad_norm": 92.45131206056814, + "learning_rate": 3.999447037804481e-07, + "loss": 0.3569, + "step": 685, + "success_rate.epoch.env.abd": 0.9852941176470589, + "success_rate.epoch.env.agentgym:alfworld": 0.9285714285714286, + "success_rate.epoch.env.agentgym:sciworld": 0.975, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9577464788732394, + "success_rate.epoch.env.logic": 0.8829787234042553, + "success_rate.epoch.env.math": 0.9509043927648578, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.7013232514177694, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8642562175162373, + "success_rate.epoch.global": 0.8433382137628112, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.996149289099526, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 0.1469961653174265, + "grad_norm": 113.64849552894992, + "learning_rate": 3.9994289430691644e-07, + "loss": 0.3519, + "step": 690, + "success_rate.epoch.env.abd": 0.9852941176470589, + "success_rate.epoch.env.agentgym:alfworld": 0.9285714285714286, + "success_rate.epoch.env.agentgym:sciworld": 0.975, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9577464788732394, + "success_rate.epoch.env.logic": 0.8829787234042553, + "success_rate.epoch.env.math": 0.9516539440203562, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.701688555347092, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8643575679875846, + "success_rate.epoch.global": 0.84375, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9941860465116279, + "tokens_p.mean_in_band": 0.646484375, + "tokens_rate.above_band": 0.9347826086956522, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.06521739130434782 + }, + { + "epoch": 0.14806135492117597, + "grad_norm": 185.34855306142984, + "learning_rate": 3.9994105572329047e-07, + "loss": 0.4525, + "step": 695, + "success_rate.epoch.env.abd": 0.9852941176470589, + "success_rate.epoch.env.agentgym:alfworld": 0.9285714285714286, + "success_rate.epoch.env.agentgym:sciworld": 0.975, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9577464788732394, + "success_rate.epoch.env.logic": 0.8842105263157894, + "success_rate.epoch.env.math": 0.9517766497461929, + "success_rate.epoch.env.sat": 0.2222222222222222, + "success_rate.epoch.env.science": 0.7050092764378478, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8736209727103436, + "success_rate.epoch.global": 0.8448773448773449, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9967570754716981, + "tokens_p.mean_in_band": 0.7512019230769231, + "tokens_rate.above_band": 0.9760589318600368, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02394106813996317 + }, + { + "epoch": 0.14912654452492544, + "grad_norm": 185.19581163224188, + "learning_rate": 3.999391880309101e-07, + "loss": 0.5116, + "step": 700, + "success_rate.epoch.env.abd": 0.9855072463768116, + "success_rate.epoch.env.agentgym:alfworld": 0.9285714285714286, + "success_rate.epoch.env.agentgym:sciworld": 0.975, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9577464788732394, + "success_rate.epoch.env.logic": 0.8802083333333334, + "success_rate.epoch.env.math": 0.952020202020202, + "success_rate.epoch.env.sat": 0.2222222222222222, + "success_rate.epoch.env.science": 0.7058823529411765, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8733780240307648, + "success_rate.epoch.global": 0.8445558739255015, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.825, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9968861209964412, + "tokens_p.mean_in_band": 0.6357421875, + "tokens_rate.above_band": 0.939799331103679, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.06020066889632107 + }, + { + "epoch": 0.1501917341286749, + "grad_norm": 39.156166978674044, + "learning_rate": 3.999372912311365e-07, + "loss": 0.3961, + "step": 705, + "success_rate.epoch.env.abd": 0.9855072463768116, + "success_rate.epoch.env.agentgym:alfworld": 0.9285714285714286, + "success_rate.epoch.env.agentgym:sciworld": 0.975609756097561, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9577464788732394, + "success_rate.epoch.env.logic": 0.8808290155440415, + "success_rate.epoch.env.math": 0.9526184538653366, + "success_rate.epoch.env.sat": 0.2222222222222222, + "success_rate.epoch.env.science": 0.7074954296160878, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.873690911924248, + "success_rate.epoch.global": 0.8456614509246089, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.999633072407045, + "tokens_p.mean_in_band": 0.7099609375, + "tokens_rate.above_band": 0.9845857418111753, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.015414258188824663 + }, + { + "epoch": 0.15125692373242436, + "grad_norm": 41.85091522492022, + "learning_rate": 3.9993536532535207e-07, + "loss": 0.3028, + "step": 710, + "success_rate.epoch.env.abd": 0.9855072463768116, + "success_rate.epoch.env.agentgym:alfworld": 0.9285714285714286, + "success_rate.epoch.env.agentgym:sciworld": 0.975609756097561, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9577464788732394, + "success_rate.epoch.env.logic": 0.882051282051282, + "success_rate.epoch.env.math": 0.9528535980148883, + "success_rate.epoch.env.sat": 0.2, + "success_rate.epoch.env.science": 0.7065217391304348, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8717146844650586, + "success_rate.epoch.global": 0.844632768361582, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 0.6, + "success_rate.window.env_macro_mean": 0.65, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9896537162162162, + "tokens_p.mean_in_band": 0.5136088709677419, + "tokens_rate.above_band": 0.8268156424581006, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.17318435754189945 + }, + { + "epoch": 0.15232211333617385, + "grad_norm": 116.37390393970315, + "learning_rate": 3.9993341031496035e-07, + "loss": 0.253, + "step": 715, + "success_rate.epoch.env.abd": 0.9855072463768116, + "success_rate.epoch.env.agentgym:alfworld": 0.9285714285714286, + "success_rate.epoch.env.agentgym:sciworld": 0.975609756097561, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9577464788732394, + "success_rate.epoch.env.logic": 0.8838383838383839, + "success_rate.epoch.env.math": 0.9530864197530864, + "success_rate.epoch.env.sat": 0.2, + "success_rate.epoch.env.science": 0.7091561938958707, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8721378097642165, + "success_rate.epoch.global": 0.8457223001402524, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9926339285714286, + "tokens_p.mean_in_band": 0.8098958333333334, + "tokens_rate.above_band": 0.9790209790209791, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02097902097902098 + }, + { + "epoch": 0.1533873029399233, + "grad_norm": 116.36650017890712, + "learning_rate": 3.999314262013862e-07, + "loss": 0.5568, + "step": 720, + "success_rate.epoch.env.abd": 0.9855072463768116, + "success_rate.epoch.env.agentgym:alfworld": 0.9285714285714286, + "success_rate.epoch.env.agentgym:sciworld": 0.975609756097561, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9577464788732394, + "success_rate.epoch.env.logic": 0.8855721393034826, + "success_rate.epoch.env.math": 0.9535452322738386, + "success_rate.epoch.env.sat": 0.2, + "success_rate.epoch.env.science": 0.7107142857142857, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8724787788373315, + "success_rate.epoch.global": 0.8467966573816156, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.99375, + "tokens_p.mean_in_band": 0.84765625, + "tokens_rate.above_band": 0.9876543209876543, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.012345679012345678 + }, + { + "epoch": 0.15445249254367277, + "grad_norm": 198.86925981719995, + "learning_rate": 3.9992941298607557e-07, + "loss": 0.3691, + "step": 725, + "success_rate.epoch.env.abd": 0.9857142857142858, + "success_rate.epoch.env.agentgym:alfworld": 0.9285714285714286, + "success_rate.epoch.env.agentgym:sciworld": 0.975609756097561, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9577464788732394, + "success_rate.epoch.env.logic": 0.8861386138613861, + "success_rate.epoch.env.math": 0.9536585365853658, + "success_rate.epoch.env.sat": 0.2, + "success_rate.epoch.env.science": 0.7107583774250441, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8725634070116645, + "success_rate.epoch.global": 0.8464730290456431, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.7142857142857143, + "success_rate.window.env_macro_mean": 0.9285714285714286, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9926571038251366, + "tokens_p.mean_in_band": 0.4909855769230769, + "tokens_rate.above_band": 0.9336734693877551, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0663265306122449 + }, + { + "epoch": 0.15551768214742223, + "grad_norm": 74.23956330436344, + "learning_rate": 3.9992737067049566e-07, + "loss": 0.3195, + "step": 730, + "success_rate.epoch.env.abd": 0.9857142857142858, + "success_rate.epoch.env.agentgym:alfworld": 0.9302325581395349, + "success_rate.epoch.env.agentgym:sciworld": 0.9761904761904762, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9583333333333334, + "success_rate.epoch.env.logic": 0.8872549019607843, + "success_rate.epoch.env.math": 0.9537712895377128, + "success_rate.epoch.env.sat": 0.18181818181818182, + "success_rate.epoch.env.science": 0.712280701754386, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8714177934953359, + "success_rate.epoch.global": 0.8468406593406593, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8571428571428571, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9984515765765766, + "tokens_p.mean_in_band": 0.6979166666666666, + "tokens_rate.above_band": 0.9833887043189369, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.016611295681063124 + }, + { + "epoch": 0.15658287175117172, + "grad_norm": 100.75482028353807, + "learning_rate": 3.99925299256135e-07, + "loss": 0.4015, + "step": 735, + "success_rate.epoch.env.abd": 0.9859154929577465, + "success_rate.epoch.env.agentgym:alfworld": 0.9347826086956522, + "success_rate.epoch.env.agentgym:sciworld": 0.9761904761904762, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.958904109589041, + "success_rate.epoch.env.logic": 0.8878048780487805, + "success_rate.epoch.env.math": 0.9538834951456311, + "success_rate.epoch.env.sat": 0.18181818181818182, + "success_rate.epoch.env.science": 0.7137870855148342, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8720987570873039, + "success_rate.epoch.global": 0.8478854024556617, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9978782013103038, + "tokens_p.mean_in_band": 0.666015625, + "tokens_rate.above_band": 0.9982164090368609, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0017835909631391202 + }, + { + "epoch": 0.15764806135492118, + "grad_norm": 41.415504626263875, + "learning_rate": 3.999231987445031e-07, + "loss": 0.4562, + "step": 740, + "success_rate.epoch.env.abd": 0.9859154929577465, + "success_rate.epoch.env.agentgym:alfworld": 0.9347826086956522, + "success_rate.epoch.env.agentgym:sciworld": 0.9761904761904762, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.958904109589041, + "success_rate.epoch.env.logic": 0.8894230769230769, + "success_rate.epoch.env.math": 0.9538834951456311, + "success_rate.epoch.env.sat": 0.18181818181818182, + "success_rate.epoch.env.science": 0.7115716753022453, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8720444651474591, + "success_rate.epoch.global": 0.8468834688346883, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.8333333333333334, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9969370860927153, + "tokens_p.mean_in_band": 0.5588541666666667, + "tokens_rate.above_band": 0.9617834394904459, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03821656050955414 + }, + { + "epoch": 0.15871325095867064, + "grad_norm": 109.74259053385872, + "learning_rate": 3.9992106913713083e-07, + "loss": 0.5705, + "step": 745, + "success_rate.epoch.env.abd": 0.9859154929577465, + "success_rate.epoch.env.agentgym:alfworld": 0.9347826086956522, + "success_rate.epoch.env.agentgym:sciworld": 0.9761904761904762, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9594594594594594, + "success_rate.epoch.env.logic": 0.8899521531100478, + "success_rate.epoch.env.math": 0.9543269230769231, + "success_rate.epoch.env.sat": 0.18181818181818182, + "success_rate.epoch.env.science": 0.7101200686106347, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8720513967199203, + "success_rate.epoch.global": 0.8465679676985195, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 1.00049882629108, + "tokens_p.mean_in_band": 0.490234375, + "tokens_rate.above_band": 0.9906976744186047, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.009302325581395349 + }, + { + "epoch": 0.1597784405624201, + "grad_norm": 320.24658443520934, + "learning_rate": 3.999189104355703e-07, + "loss": 0.3239, + "step": 750, + "success_rate.epoch.env.abd": 0.9859154929577465, + "success_rate.epoch.env.agentgym:alfworld": 0.9347826086956522, + "success_rate.epoch.env.agentgym:sciworld": 0.9761904761904762, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.96, + "success_rate.epoch.env.logic": 0.892018779342723, + "success_rate.epoch.env.math": 0.9544364508393285, + "success_rate.epoch.env.sat": 0.18181818181818182, + "success_rate.epoch.env.science": 0.7103918228279387, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8723230738792769, + "success_rate.epoch.global": 0.8469251336898396, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9375, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9981677524429967, + "tokens_p.mean_in_band": 0.425537109375, + "tokens_rate.above_band": 0.9746031746031746, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.025396825396825397 + }, + { + "epoch": 0.16084363016616957, + "grad_norm": 78.83909682586267, + "learning_rate": 3.999167226413947e-07, + "loss": 0.4456, + "step": 755, + "success_rate.epoch.env.abd": 0.9859154929577465, + "success_rate.epoch.env.agentgym:alfworld": 0.9148936170212766, + "success_rate.epoch.env.agentgym:sciworld": 0.9761904761904762, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9615384615384616, + "success_rate.epoch.env.logic": 0.8930232558139535, + "success_rate.epoch.env.math": 0.9547619047619048, + "success_rate.epoch.env.sat": 0.18181818181818182, + "success_rate.epoch.env.science": 0.7108843537414966, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8708205221675906, + "success_rate.epoch.global": 0.8472775564409031, + "success_rate.window.env.agentgym:alfworld": 0.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9998900615655233, + "tokens_p.mean_in_band": 0.6979166666666666, + "tokens_rate.above_band": 0.9973684210526316, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.002631578947368421 + }, + { + "epoch": 0.16190881976991905, + "grad_norm": 108.22896960316253, + "learning_rate": 3.999145057561985e-07, + "loss": 0.4478, + "step": 760, + "success_rate.epoch.env.abd": 0.9859154929577465, + "success_rate.epoch.env.agentgym:alfworld": 0.9148936170212766, + "success_rate.epoch.env.agentgym:sciworld": 0.9761904761904762, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9615384615384616, + "success_rate.epoch.env.logic": 0.8944954128440367, + "success_rate.epoch.env.math": 0.9549763033175356, + "success_rate.epoch.env.sat": 0.18181818181818182, + "success_rate.epoch.env.science": 0.7099494097807757, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8708888504971356, + "success_rate.epoch.global": 0.8469656992084432, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6, + "success_rate.window.env_macro_mean": 0.8666666666666667, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9947289156626506, + "tokens_p.mean_in_band": 0.59765625, + "tokens_rate.above_band": 0.8924731182795699, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.10752688172043011 + }, + { + "epoch": 0.16297400937366852, + "grad_norm": 179.91587218705112, + "learning_rate": 3.9991225978159735e-07, + "loss": 0.3552, + "step": 765, + "success_rate.epoch.env.abd": 0.9859154929577465, + "success_rate.epoch.env.agentgym:alfworld": 0.9148936170212766, + "success_rate.epoch.env.agentgym:sciworld": 0.9761904761904762, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9615384615384616, + "success_rate.epoch.env.logic": 0.8944954128440367, + "success_rate.epoch.env.math": 0.9557109557109557, + "success_rate.epoch.env.sat": 0.18181818181818182, + "success_rate.epoch.env.science": 0.7114093959731543, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8710883630958446, + "success_rate.epoch.global": 0.8479685452162516, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9927591463414634, + "tokens_p.mean_in_band": 0.8046875, + "tokens_rate.above_band": 0.8723404255319149, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.1276595744680851 + }, + { + "epoch": 0.16403919897741798, + "grad_norm": 80.3672189314749, + "learning_rate": 3.9990998471922804e-07, + "loss": 0.3091, + "step": 770, + "success_rate.epoch.env.abd": 0.9859154929577465, + "success_rate.epoch.env.agentgym:alfworld": 0.9148936170212766, + "success_rate.epoch.env.agentgym:sciworld": 0.9767441860465116, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9620253164556962, + "success_rate.epoch.env.logic": 0.8944954128440367, + "success_rate.epoch.env.math": 0.9517241379310345, + "success_rate.epoch.env.sat": 0.18181818181818182, + "success_rate.epoch.env.science": 0.7123745819397993, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8709082660922077, + "success_rate.epoch.global": 0.84765625, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 0.6666666666666666, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9973363774733638, + "tokens_p.mean_in_band": 0.71484375, + "tokens_rate.above_band": 0.9850074962518741, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.014992503748125937 + }, + { + "epoch": 0.16510438858116744, + "grad_norm": 54.40434870976095, + "learning_rate": 3.999076805707487e-07, + "loss": 0.5035, + "step": 775, + "success_rate.epoch.env.abd": 0.9859154929577465, + "success_rate.epoch.env.agentgym:alfworld": 0.9148936170212766, + "success_rate.epoch.env.agentgym:sciworld": 0.9772727272727273, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9620253164556962, + "success_rate.epoch.env.logic": 0.8963963963963963, + "success_rate.epoch.env.math": 0.952054794520548, + "success_rate.epoch.env.sat": 0.18181818181818182, + "success_rate.epoch.env.science": 0.7133333333333334, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8712463508887187, + "success_rate.epoch.global": 0.8486416558861578, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9965861344537815, + "tokens_p.mean_in_band": 0.6171875, + "tokens_rate.above_band": 0.9966499162479062, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0033500837520938024 + }, + { + "epoch": 0.16616957818491693, + "grad_norm": 77.1810737306313, + "learning_rate": 3.999053473378385e-07, + "loss": 0.5249, + "step": 780, + "success_rate.epoch.env.abd": 0.9861111111111112, + "success_rate.epoch.env.agentgym:alfworld": 0.9148936170212766, + "success_rate.epoch.env.agentgym:sciworld": 0.9772727272727273, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9620253164556962, + "success_rate.epoch.env.logic": 0.8968609865470852, + "success_rate.epoch.env.math": 0.9522727272727273, + "success_rate.epoch.env.sat": 0.18181818181818182, + "success_rate.epoch.env.science": 0.7128712871287128, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8712841776934108, + "success_rate.epoch.global": 0.8483290488431876, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9942434210526315, + "tokens_p.mean_in_band": 0.4144965277777778, + "tokens_rate.above_band": 0.926829268292683, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.07317073170731707 + }, + { + "epoch": 0.1672347677886664, + "grad_norm": 110.1358637045039, + "learning_rate": 3.99902985022198e-07, + "loss": 0.5139, + "step": 785, + "success_rate.epoch.env.abd": 0.9861111111111112, + "success_rate.epoch.env.agentgym:alfworld": 0.9148936170212766, + "success_rate.epoch.env.agentgym:sciworld": 0.9772727272727273, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9620253164556962, + "success_rate.epoch.env.logic": 0.8973214285714286, + "success_rate.epoch.env.math": 0.952808988764045, + "success_rate.epoch.env.sat": 0.18181818181818182, + "success_rate.epoch.env.science": 0.7131147540983607, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8713969204648027, + "success_rate.epoch.global": 0.8486590038314177, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.993452380952381, + "tokens_p.mean_in_band": 0.6328125, + "tokens_rate.above_band": 0.963302752293578, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03669724770642202 + }, + { + "epoch": 0.16829995739241585, + "grad_norm": 130.90584190579202, + "learning_rate": 3.9990059362554866e-07, + "loss": 0.5549, + "step": 790, + "success_rate.epoch.env.abd": 0.9861111111111112, + "success_rate.epoch.env.agentgym:alfworld": 0.9148936170212766, + "success_rate.epoch.env.agentgym:sciworld": 0.9772727272727273, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9625, + "success_rate.epoch.env.logic": 0.8986784140969163, + "success_rate.epoch.env.math": 0.952808988764045, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.7138211382113822, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8702502421040113, + "success_rate.epoch.global": 0.8483502538071066, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.7, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9989742888402626, + "tokens_p.mean_in_band": 0.6667798913043478, + "tokens_rate.above_band": 0.9520833333333333, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.04791666666666667 + }, + { + "epoch": 0.1693651469961653, + "grad_norm": 151.1400154810627, + "learning_rate": 3.998981731496335e-07, + "loss": 0.2527, + "step": 795, + "success_rate.epoch.env.abd": 0.9864864864864865, + "success_rate.epoch.env.agentgym:alfworld": 0.9148936170212766, + "success_rate.epoch.env.agentgym:sciworld": 0.9772727272727273, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9625, + "success_rate.epoch.env.logic": 0.8986784140969163, + "success_rate.epoch.env.math": 0.953125, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.714516129032258, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8703762764160302, + "success_rate.epoch.global": 0.8486759142496847, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9933562992125984, + "tokens_p.mean_in_band": 0.608154296875, + "tokens_rate.above_band": 0.9407407407407408, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.05925925925925926 + }, + { + "epoch": 0.17043033659991477, + "grad_norm": 67.54128195155032, + "learning_rate": 3.9989572359621646e-07, + "loss": 0.3927, + "step": 800, + "success_rate.epoch.env.abd": 0.9866666666666667, + "success_rate.epoch.env.agentgym:alfworld": 0.9148936170212766, + "success_rate.epoch.env.agentgym:sciworld": 0.9772727272727273, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9629629629629629, + "success_rate.epoch.env.logic": 0.8995633187772926, + "success_rate.epoch.env.math": 0.9534368070953437, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.7154340836012861, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8706269863694747, + "success_rate.epoch.global": 0.849624060150376, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.999609375, + "tokens_p.mean_in_band": 0.6276041666666666, + "tokens_rate.above_band": 0.9946714031971581, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0053285968028419185 + }, + { + "epoch": 0.17149552620366426, + "grad_norm": 84.23383582672564, + "learning_rate": 3.9989324496708275e-07, + "loss": 0.4299, + "step": 805, + "success_rate.epoch.env.abd": 0.9866666666666667, + "success_rate.epoch.env.agentgym:alfworld": 0.9166666666666666, + "success_rate.epoch.env.agentgym:sciworld": 0.9777777777777777, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9629629629629629, + "success_rate.epoch.env.logic": 0.9004329004329005, + "success_rate.epoch.env.math": 0.9534368070953437, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.7165605095541401, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.871015541620284, + "success_rate.epoch.global": 0.8499377334993773, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env_macro_mean": 0.9583333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9979570217917676, + "tokens_p.mean_in_band": 0.55322265625, + "tokens_rate.above_band": 0.9627039627039627, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.037296037296037296 + }, + { + "epoch": 0.17256071580741372, + "grad_norm": 152.75715314646231, + "learning_rate": 3.998907372640388e-07, + "loss": 0.3903, + "step": 810, + "success_rate.epoch.env.abd": 0.9866666666666667, + "success_rate.epoch.env.agentgym:alfworld": 0.9166666666666666, + "success_rate.epoch.env.agentgym:sciworld": 0.9777777777777777, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9629629629629629, + "success_rate.epoch.env.logic": 0.9008620689655172, + "success_rate.epoch.env.math": 0.9537444933920705, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.7176656151419558, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8711829925672986, + "success_rate.epoch.global": 0.8502475247524752, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env_macro_mean": 0.9444444444444445, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9970238095238095, + "tokens_p.mean_in_band": 0.669921875, + "tokens_rate.above_band": 0.9797160243407708, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02028397565922921 + }, + { + "epoch": 0.17362590541116318, + "grad_norm": 707.8209272033347, + "learning_rate": 3.998882004889122e-07, + "loss": 0.3435, + "step": 815, + "success_rate.epoch.env.abd": 0.9866666666666667, + "success_rate.epoch.env.agentgym:alfworld": 0.9166666666666666, + "success_rate.epoch.env.agentgym:sciworld": 0.9777777777777777, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9629629629629629, + "success_rate.epoch.env.logic": 0.9017094017094017, + "success_rate.epoch.env.math": 0.9540481400437637, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.7183098591549296, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8713461946953487, + "success_rate.epoch.global": 0.8505535055350554, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9920594262295082, + "tokens_p.mean_in_band": 0.6953125, + "tokens_rate.above_band": 0.9242424242424242, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.07575757575757576 + }, + { + "epoch": 0.17469109501491265, + "grad_norm": 75.77018405407335, + "learning_rate": 3.998856346435517e-07, + "loss": 0.3434, + "step": 820, + "success_rate.epoch.env.abd": 0.9866666666666667, + "success_rate.epoch.env.agentgym:alfworld": 0.9166666666666666, + "success_rate.epoch.env.agentgym:sciworld": 0.9777777777777777, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9629629629629629, + "success_rate.epoch.env.logic": 0.8991596638655462, + "success_rate.epoch.env.math": 0.9543478260869566, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.719626168224299, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8712613089925039, + "success_rate.epoch.global": 0.8508557457212714, + "success_rate.window.env.logic": 0.75, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.995291095890411, + "tokens_p.mean_in_band": 0.7314453125, + "tokens_rate.above_band": 0.8795180722891566, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.12048192771084337 + }, + { + "epoch": 0.17575628461866213, + "grad_norm": 72.17329635538552, + "learning_rate": 3.998830397298273e-07, + "loss": 0.4809, + "step": 825, + "success_rate.epoch.env.abd": 0.9868421052631579, + "success_rate.epoch.env.agentgym:alfworld": 0.9183673469387755, + "success_rate.epoch.env.agentgym:sciworld": 0.9777777777777777, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9634146341463414, + "success_rate.epoch.env.logic": 0.9, + "success_rate.epoch.env.math": 0.9546436285097192, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.7204968944099379, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8716553685193067, + "success_rate.epoch.global": 0.8517618469015796, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 1.0015916149068322, + "tokens_p.mean_in_band": 0.681640625, + "tokens_rate.above_band": 0.9987593052109182, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0012406947890818859 + }, + { + "epoch": 0.1768214742224116, + "grad_norm": 74.68546642048564, + "learning_rate": 3.9988041574963017e-07, + "loss": 0.2879, + "step": 830, + "success_rate.epoch.env.abd": 0.987012987012987, + "success_rate.epoch.env.agentgym:alfworld": 0.9183673469387755, + "success_rate.epoch.env.agentgym:sciworld": 0.9777777777777777, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9634146341463414, + "success_rate.epoch.env.logic": 0.9008264462809917, + "success_rate.epoch.env.math": 0.9550321199143469, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.7217928902627512, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8718991699091488, + "success_rate.epoch.global": 0.8526570048309179, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9965909090909091, + "tokens_p.mean_in_band": 0.859375, + "tokens_rate.above_band": 0.990990990990991, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.009009009009009009 + }, + { + "epoch": 0.17788666382616106, + "grad_norm": 0.0, + "learning_rate": 3.998777627048726e-07, + "loss": 0.2862, + "step": 835, + "success_rate.epoch.env.abd": 0.9873417721518988, + "success_rate.epoch.env.agentgym:alfworld": 0.9183673469387755, + "success_rate.epoch.env.agentgym:sciworld": 0.9782608695652174, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.963855421686747, + "success_rate.epoch.env.logic": 0.9016393442622951, + "success_rate.epoch.env.math": 0.9552238805970149, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.7211093990755008, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8720422455403741, + "success_rate.epoch.global": 0.8529411764705882, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9979674796747967, + "tokens_p.mean_in_band": 0.380859375, + "tokens_rate.above_band": 0.9935379644588045, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.006462035541195477 + }, + { + "epoch": 0.17895185342991052, + "grad_norm": 211.98391817922078, + "learning_rate": 3.998750805974882e-07, + "loss": 0.5927, + "step": 840, + "success_rate.epoch.env.abd": 0.9875, + "success_rate.epoch.env.agentgym:alfworld": 0.9183673469387755, + "success_rate.epoch.env.agentgym:sciworld": 0.9782608695652174, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9523809523809523, + "success_rate.epoch.env.logic": 0.9016393442622951, + "success_rate.epoch.env.math": 0.9556025369978859, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.7212863705972435, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8710640079462759, + "success_rate.epoch.global": 0.8526252983293556, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.6875, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9847134476534296, + "tokens_p.mean_in_band": 0.6323162141393442, + "tokens_rate.above_band": 0.8195266272189349, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.1804733727810651 + }, + { + "epoch": 0.18001704303365998, + "grad_norm": 115.45759197346109, + "learning_rate": 3.998723694294316e-07, + "loss": 0.3578, + "step": 845, + "success_rate.epoch.env.abd": 0.9875, + "success_rate.epoch.env.agentgym:alfworld": 0.9183673469387755, + "success_rate.epoch.env.agentgym:sciworld": 0.9782608695652174, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9523809523809523, + "success_rate.epoch.env.logic": 0.9024390243902439, + "success_rate.epoch.env.math": 0.9556962025316456, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.723823975720789, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8713759125631172, + "success_rate.epoch.global": 0.8534994068801898, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9989600665557404, + "tokens_p.mean_in_band": 0.74921875, + "tokens_rate.above_band": 0.9917491749174917, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.00825082508250825 + }, + { + "epoch": 0.18108223263740947, + "grad_norm": 103.01654660605729, + "learning_rate": 3.9986962920267865e-07, + "loss": 0.3262, + "step": 850, + "success_rate.epoch.env.abd": 0.9876543209876543, + "success_rate.epoch.env.agentgym:alfworld": 0.92, + "success_rate.epoch.env.agentgym:sciworld": 0.9782608695652174, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9523809523809523, + "success_rate.epoch.env.logic": 0.9032258064516129, + "success_rate.epoch.env.math": 0.9558823529411765, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.7239819004524887, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8716411699496153, + "success_rate.epoch.global": 0.8537735849056604, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.996828007518797, + "tokens_p.mean_in_band": 0.705078125, + "tokens_rate.above_band": 0.981549815498155, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.01845018450184502 + }, + { + "epoch": 0.18214742224115893, + "grad_norm": 43.53353342667868, + "learning_rate": 3.9986685991922645e-07, + "loss": 0.4394, + "step": 855, + "success_rate.epoch.env.abd": 0.9878048780487805, + "success_rate.epoch.env.agentgym:alfworld": 0.92, + "success_rate.epoch.env.agentgym:sciworld": 0.9791666666666666, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9523809523809523, + "success_rate.epoch.env.logic": 0.9036144578313253, + "success_rate.epoch.env.math": 0.9560669456066946, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.7256371814092953, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8719397953282164, + "success_rate.epoch.global": 0.854630715123095, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9974061264822134, + "tokens_p.mean_in_band": 0.7890625, + "tokens_rate.above_band": 0.9921568627450981, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.00784313725490196 + }, + { + "epoch": 0.1832126118449084, + "grad_norm": 56.361183853925176, + "learning_rate": 3.998640615810933e-07, + "loss": 0.2373, + "step": 860, + "success_rate.epoch.env.abd": 0.9878048780487805, + "success_rate.epoch.env.agentgym:alfworld": 0.92, + "success_rate.epoch.env.agentgym:sciworld": 0.9795918367346939, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9523809523809523, + "success_rate.epoch.env.logic": 0.904, + "success_rate.epoch.env.math": 0.9544513457556936, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.726457399103139, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8719411889718114, + "success_rate.epoch.global": 0.8548951048951049, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.8, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.96, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9971289752650176, + "tokens_p.mean_below_band": 6.344635039567947e-09, + "tokens_p.mean_in_band": 0.8447265625, + "tokens_rate.above_band": 0.9826388888888888, + "tokens_rate.below_band": 0.003472222222222222, + "tokens_rate.in_band": 0.013888888888888888 + }, + { + "epoch": 0.18427780144865785, + "grad_norm": 149.68786296746322, + "learning_rate": 3.998612341903184e-07, + "loss": 0.366, + "step": 865, + "success_rate.epoch.env.abd": 0.9879518072289156, + "success_rate.epoch.env.agentgym:alfworld": 0.92, + "success_rate.epoch.env.agentgym:sciworld": 0.98, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9523809523809523, + "success_rate.epoch.env.logic": 0.9047619047619048, + "success_rate.epoch.env.math": 0.9545454545454546, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.7265973254086181, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8720821919084102, + "success_rate.epoch.global": 0.8551564310544612, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9583333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9979628422425033, + "tokens_p.mean_in_band": 0.5279947916666666, + "tokens_rate.above_band": 0.9922380336351876, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.007761966364812419 + }, + { + "epoch": 0.18534299105240734, + "grad_norm": 241.04134938089493, + "learning_rate": 3.998583777489626e-07, + "loss": 0.5057, + "step": 870, + "success_rate.epoch.env.abd": 0.9879518072289156, + "success_rate.epoch.env.agentgym:alfworld": 0.92, + "success_rate.epoch.env.agentgym:sciworld": 0.98, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9534883720930233, + "success_rate.epoch.env.logic": 0.9015748031496063, + "success_rate.epoch.env.math": 0.9548254620123203, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.7263313609467456, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8718944065542978, + "success_rate.epoch.global": 0.8548387096774194, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.7916666666666666, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.997590782122905, + "tokens_p.mean_in_band": 0.4397786458333333, + "tokens_rate.above_band": 0.9738846572361263, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.026115342763873776 + }, + { + "epoch": 0.1864081806561568, + "grad_norm": 81.37827634038625, + "learning_rate": 3.9985549225910747e-07, + "loss": 0.4182, + "step": 875, + "success_rate.epoch.env.abd": 0.9879518072289156, + "success_rate.epoch.env.agentgym:alfworld": 0.9215686274509803, + "success_rate.epoch.env.agentgym:sciworld": 0.98, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9540229885057471, + "success_rate.epoch.env.logic": 0.9015748031496063, + "success_rate.epoch.env.math": 0.955193482688391, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.727540500736377, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.871063487600561, + "success_rate.epoch.global": 0.8550973654066437, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9973714953271028, + "tokens_p.mean_in_band": 0.7319711538461539, + "tokens_rate.above_band": 0.9427312775330396, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.05726872246696035 + }, + { + "epoch": 0.18747337025990626, + "grad_norm": 57.99123672846491, + "learning_rate": 3.99852577722856e-07, + "loss": 0.5053, + "step": 880, + "success_rate.epoch.env.abd": 0.9879518072289156, + "success_rate.epoch.env.agentgym:alfworld": 0.9215686274509803, + "success_rate.epoch.env.agentgym:sciworld": 0.98, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9545454545454546, + "success_rate.epoch.env.logic": 0.8984375, + "success_rate.epoch.env.math": 0.9553752535496958, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.726207906295754, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8707211548106322, + "success_rate.epoch.global": 0.8541310541310542, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.75, + "success_rate.window.global": 0.6666666666666666, + "tokens_p.mean_above_band": 0.9952400662251656, + "tokens_p.mean_in_band": 0.4934895833333333, + "tokens_rate.above_band": 0.9096385542168675, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.09036144578313253 + }, + { + "epoch": 0.18853855986365572, + "grad_norm": 135.74567562821284, + "learning_rate": 3.998496341423323e-07, + "loss": 0.5965, + "step": 885, + "success_rate.epoch.env.abd": 0.9879518072289156, + "success_rate.epoch.env.agentgym:alfworld": 0.9215686274509803, + "success_rate.epoch.env.agentgym:sciworld": 0.98, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9545454545454546, + "success_rate.epoch.env.logic": 0.8984375, + "success_rate.epoch.env.math": 0.9516129032258065, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.7260869565217392, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8703681275290045, + "success_rate.epoch.global": 0.8526912181303116, + "success_rate.window.env.math": 0.3333333333333333, + "success_rate.window.env.science": 0.7142857142857143, + "success_rate.window.env_macro_mean": 0.5238095238095238, + "success_rate.window.global": 0.6, + "tokens_p.mean_above_band": 0.9849901574803149, + "tokens_p.mean_in_band": 0.59375, + "tokens_rate.above_band": 0.8141025641025641, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.1858974358974359 + }, + { + "epoch": 0.18960374946740519, + "grad_norm": 76.23163641437583, + "learning_rate": 3.9984666151968154e-07, + "loss": 0.3778, + "step": 890, + "success_rate.epoch.env.abd": 0.9880952380952381, + "success_rate.epoch.env.agentgym:alfworld": 0.9215686274509803, + "success_rate.epoch.env.agentgym:sciworld": 0.98, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9545454545454546, + "success_rate.epoch.env.logic": 0.8984375, + "success_rate.epoch.env.math": 0.9519038076152304, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.7241379310344828, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8702304284170491, + "success_rate.epoch.global": 0.851830985915493, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.8333333333333334, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9923611111111111, + "tokens_p.mean_in_band": 0.45390625, + "tokens_rate.above_band": 0.9, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.1 + }, + { + "epoch": 0.19066893907115467, + "grad_norm": 70.31600878436407, + "learning_rate": 3.998436598570703e-07, + "loss": 0.3689, + "step": 895, + "success_rate.epoch.env.abd": 0.9880952380952381, + "success_rate.epoch.env.agentgym:alfworld": 0.9215686274509803, + "success_rate.epoch.env.agentgym:sciworld": 0.98, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9545454545454546, + "success_rate.epoch.env.logic": 0.8992248062015504, + "success_rate.epoch.env.math": 0.9524752475247524, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.7249283667621776, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8704258085842098, + "success_rate.epoch.global": 0.8526610644257703, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9876179245283019, + "tokens_p.mean_in_band": 0.82421875, + "tokens_rate.above_band": 0.9636363636363636, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03636363636363636 + }, + { + "epoch": 0.19173412867490414, + "grad_norm": 198.68709370146894, + "learning_rate": 3.99840629156686e-07, + "loss": 0.3285, + "step": 900, + "success_rate.epoch.env.abd": 0.9880952380952381, + "success_rate.epoch.env.agentgym:alfworld": 0.9215686274509803, + "success_rate.epoch.env.agentgym:sciworld": 0.98, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9545454545454546, + "success_rate.epoch.env.logic": 0.9, + "success_rate.epoch.env.math": 0.952755905511811, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.7254623044096729, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8705703348963009, + "success_rate.epoch.global": 0.852924791086351, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9981231231231231, + "tokens_p.mean_in_band": 0.63916015625, + "tokens_rate.above_band": 0.9765395894428153, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02346041055718475 + }, + { + "epoch": 0.1927993182786536, + "grad_norm": 115.64974806030389, + "learning_rate": 3.998375694207375e-07, + "loss": 0.3953, + "step": 905, + "success_rate.epoch.env.abd": 0.9880952380952381, + "success_rate.epoch.env.agentgym:alfworld": 0.9215686274509803, + "success_rate.epoch.env.agentgym:sciworld": 0.98, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9545454545454546, + "success_rate.epoch.env.logic": 0.9003831417624522, + "success_rate.epoch.env.math": 0.9528487229862476, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.7257383966244726, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.870638703210091, + "success_rate.epoch.global": 0.8526315789473684, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9913194444444444, + "tokens_p.mean_in_band": 0.3875, + "tokens_rate.above_band": 0.9152542372881356, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0847457627118644 + }, + { + "epoch": 0.19386450788240306, + "grad_norm": 196.30656246782092, + "learning_rate": 3.9983448065145473e-07, + "loss": 0.348, + "step": 910, + "success_rate.epoch.env.abd": 0.9880952380952381, + "success_rate.epoch.env.agentgym:alfworld": 0.9215686274509803, + "success_rate.epoch.env.agentgym:sciworld": 0.9803921568627451, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9550561797752809, + "success_rate.epoch.env.logic": 0.9003831417624522, + "success_rate.epoch.env.math": 0.9529411764705882, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.7284122562674095, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8709722664118953, + "success_rate.epoch.global": 0.8534435261707989, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.997235254691689, + "tokens_p.mean_in_band": 0.709375, + "tokens_rate.above_band": 0.9933422103861518, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.006657789613848202 + }, + { + "epoch": 0.19492969748615255, + "grad_norm": 63.38646157867469, + "learning_rate": 3.998313628510887e-07, + "loss": 0.4932, + "step": 915, + "success_rate.epoch.env.abd": 0.9880952380952381, + "success_rate.epoch.env.agentgym:alfworld": 0.9215686274509803, + "success_rate.epoch.env.agentgym:sciworld": 0.9803921568627451, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9555555555555556, + "success_rate.epoch.env.logic": 0.9007633587786259, + "success_rate.epoch.env.math": 0.953307392996109, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.7285318559556787, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8710963945037352, + "success_rate.epoch.global": 0.8536986301369863, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9375, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9982255520504731, + "tokens_p.mean_in_band": 0.478515625, + "tokens_rate.above_band": 0.9875389408099688, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.012461059190031152 + }, + { + "epoch": 0.195994887089902, + "grad_norm": 38.91201492250749, + "learning_rate": 3.9982821602191167e-07, + "loss": 0.3058, + "step": 920, + "success_rate.epoch.env.abd": 0.9882352941176471, + "success_rate.epoch.env.agentgym:alfworld": 0.9230769230769231, + "success_rate.epoch.env.agentgym:sciworld": 0.9803921568627451, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9560439560439561, + "success_rate.epoch.env.logic": 0.8973384030418251, + "success_rate.epoch.env.math": 0.9534883720930233, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.7286501377410468, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8710064906203017, + "success_rate.epoch.global": 0.8534059945504087, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.7916666666666666, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9975149105367793, + "tokens_p.mean_in_band": 0.4923177083333333, + "tokens_rate.above_band": 0.971042471042471, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02895752895752896 + }, + { + "epoch": 0.19706007669365147, + "grad_norm": 56.016960219651835, + "learning_rate": 3.99825040166217e-07, + "loss": 0.2913, + "step": 925, + "success_rate.epoch.env.abd": 0.9882352941176471, + "success_rate.epoch.env.agentgym:alfworld": 0.9230769230769231, + "success_rate.epoch.env.agentgym:sciworld": 0.9807692307692307, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9560439560439561, + "success_rate.epoch.env.logic": 0.8977272727272727, + "success_rate.epoch.env.math": 0.953757225433526, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.7291381668946648, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8711449293553977, + "success_rate.epoch.global": 0.8536585365853658, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9978146853146853, + "tokens_p.mean_in_band": 0.51640625, + "tokens_rate.above_band": 0.9828178694158075, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.01718213058419244 + }, + { + "epoch": 0.19812526629740093, + "grad_norm": 94.65933006220571, + "learning_rate": 3.998218352863192e-07, + "loss": 0.2486, + "step": 930, + "success_rate.epoch.env.abd": 0.9883720930232558, + "success_rate.epoch.env.agentgym:alfworld": 0.9230769230769231, + "success_rate.epoch.env.agentgym:sciworld": 0.9807692307692307, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9565217391304348, + "success_rate.epoch.env.logic": 0.8981132075471698, + "success_rate.epoch.env.math": 0.9540229885057471, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.7302452316076294, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8713606879551405, + "success_rate.epoch.global": 0.8544474393530997, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9966364970645792, + "tokens_p.mean_in_band": 0.8359375, + "tokens_rate.above_band": 0.9922330097087378, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.007766990291262136 + }, + { + "epoch": 0.1991904559011504, + "grad_norm": 141.17268123939456, + "learning_rate": 3.9981860138455407e-07, + "loss": 0.3464, + "step": 935, + "success_rate.epoch.env.abd": 0.9883720930232558, + "success_rate.epoch.env.agentgym:alfworld": 0.9230769230769231, + "success_rate.epoch.env.agentgym:sciworld": 0.9811320754716981, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.946236559139785, + "success_rate.epoch.env.logic": 0.8981132075471698, + "success_rate.epoch.env.math": 0.9542857142857143, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7303523035230353, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8694932744477022, + "success_rate.epoch.global": 0.8536193029490616, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.55, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9878554502369669, + "tokens_p.mean_in_band": 0.6954296875, + "tokens_rate.above_band": 0.8635743519781719, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.1364256480218281 + }, + { + "epoch": 0.20025564550489988, + "grad_norm": 38.756388962269526, + "learning_rate": 3.9981533846327834e-07, + "loss": 0.3103, + "step": 940, + "success_rate.epoch.env.abd": 0.9886363636363636, + "success_rate.epoch.env.agentgym:alfworld": 0.9230769230769231, + "success_rate.epoch.env.agentgym:sciworld": 0.9811320754716981, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.946236559139785, + "success_rate.epoch.env.logic": 0.8984962406015038, + "success_rate.epoch.env.math": 0.9546313799621928, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7314439946018894, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8696827890315908, + "success_rate.epoch.global": 0.8544, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9972278225806451, + "tokens_p.mean_in_band": 0.88671875, + "tokens_rate.above_band": 0.992, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.008 + }, + { + "epoch": 0.20132083510864934, + "grad_norm": 39.895267565989755, + "learning_rate": 3.998120465248701e-07, + "loss": 0.3391, + "step": 945, + "success_rate.epoch.env.abd": 0.9886363636363636, + "success_rate.epoch.env.agentgym:alfworld": 0.9230769230769231, + "success_rate.epoch.env.agentgym:sciworld": 0.9811320754716981, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.946236559139785, + "success_rate.epoch.env.logic": 0.8955223880597015, + "success_rate.epoch.env.math": 0.9548022598870056, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7295850066934404, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.869258974438369, + "success_rate.epoch.global": 0.8530503978779841, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.6666666666666666, + "success_rate.window.global": 0.6, + "tokens_p.mean_above_band": 0.9918376865671642, + "tokens_p.mean_in_band": 0.6015625, + "tokens_rate.above_band": 0.8993288590604027, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.10067114093959731 + }, + { + "epoch": 0.2023860247123988, + "grad_norm": 84.84923260523423, + "learning_rate": 3.9980872557172846e-07, + "loss": 0.4443, + "step": 950, + "success_rate.epoch.env.abd": 0.9886363636363636, + "success_rate.epoch.env.agentgym:alfworld": 0.9230769230769231, + "success_rate.epoch.env.agentgym:sciworld": 0.9814814814814815, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9468085106382979, + "success_rate.epoch.env.logic": 0.895910780669145, + "success_rate.epoch.env.math": 0.9552238805970149, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7299465240641712, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8694492370018672, + "success_rate.epoch.global": 0.8538258575197889, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9990684281842819, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 0.20345121431614827, + "grad_norm": 91.18217686793365, + "learning_rate": 3.9980537560627366e-07, + "loss": 0.2901, + "step": 955, + "success_rate.epoch.env.abd": 0.9886363636363636, + "success_rate.epoch.env.agentgym:alfworld": 0.9230769230769231, + "success_rate.epoch.env.agentgym:sciworld": 0.9818181818181818, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9468085106382979, + "success_rate.epoch.env.logic": 0.895910780669145, + "success_rate.epoch.env.math": 0.9555555555555556, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7317397078353254, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8696730150988123, + "success_rate.epoch.global": 0.8545931758530184, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9960069444444445, + "tokens_p.mean_in_band": 0.7375, + "tokens_rate.above_band": 0.972972972972973, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02702702702702703 + }, + { + "epoch": 0.20451640391989775, + "grad_norm": 293.4925344821206, + "learning_rate": 3.9980199663094723e-07, + "loss": 0.4795, + "step": 960, + "success_rate.epoch.env.abd": 0.9886363636363636, + "success_rate.epoch.env.agentgym:alfworld": 0.9230769230769231, + "success_rate.epoch.env.agentgym:sciworld": 0.9818181818181818, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9473684210526315, + "success_rate.epoch.env.logic": 0.8962962962962963, + "success_rate.epoch.env.math": 0.9558823529411765, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7318361955085865, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8697974433806638, + "success_rate.epoch.global": 0.8548302872062663, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9375, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9963329081632653, + "tokens_p.mean_in_band": 0.6484375, + "tokens_rate.above_band": 0.9849246231155779, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.01507537688442211 + }, + { + "epoch": 0.20558159352364722, + "grad_norm": 71.09037694143568, + "learning_rate": 3.997985886482116e-07, + "loss": 0.3162, + "step": 965, + "success_rate.epoch.env.abd": 0.9886363636363636, + "success_rate.epoch.env.agentgym:alfworld": 0.9230769230769231, + "success_rate.epoch.env.agentgym:sciworld": 0.9821428571428571, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9479166666666666, + "success_rate.epoch.env.logic": 0.8974358974358975, + "success_rate.epoch.env.math": 0.9560439560439561, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7315789473684211, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8699717049298389, + "success_rate.epoch.global": 0.8550649350649351, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9993512110726643, + "tokens_p.mean_in_band": 0.65234375, + "tokens_rate.above_band": 0.9942660550458715, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.005733944954128441 + }, + { + "epoch": 0.20664678312739668, + "grad_norm": 107.20650782072696, + "learning_rate": 3.997951516605506e-07, + "loss": 0.319, + "step": 970, + "success_rate.epoch.env.abd": 0.9887640449438202, + "success_rate.epoch.env.agentgym:alfworld": 0.9230769230769231, + "success_rate.epoch.env.agentgym:sciworld": 0.9821428571428571, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9479166666666666, + "success_rate.epoch.env.logic": 0.8978102189781022, + "success_rate.epoch.env.math": 0.9561243144424132, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7323759791122716, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.870097104292745, + "success_rate.epoch.global": 0.8552971576227391, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 0.9666666666666668, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9934573002754821, + "tokens_p.mean_in_band": 0.67578125, + "tokens_rate.above_band": 0.989100817438692, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.010899182561307902 + }, + { + "epoch": 0.20771197273114614, + "grad_norm": 150.3110438257762, + "learning_rate": 3.99791685670469e-07, + "loss": 0.6073, + "step": 975, + "success_rate.epoch.env.abd": 0.9887640449438202, + "success_rate.epoch.env.agentgym:alfworld": 0.9230769230769231, + "success_rate.epoch.env.agentgym:sciworld": 0.9821428571428571, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9484536082474226, + "success_rate.epoch.env.logic": 0.8985507246376812, + "success_rate.epoch.env.math": 0.956442831215971, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7321196358907672, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8702188880011441, + "success_rate.epoch.global": 0.8555269922879177, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9951746323529411, + "tokens_p.mean_in_band": 0.5915798611111112, + "tokens_rate.above_band": 0.9379310344827586, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.06206896551724138 + }, + { + "epoch": 0.2087771623348956, + "grad_norm": 65.41919357601023, + "learning_rate": 3.9978819068049294e-07, + "loss": 0.2164, + "step": 980, + "success_rate.epoch.env.abd": 0.9887640449438202, + "success_rate.epoch.env.agentgym:alfworld": 0.9245283018867925, + "success_rate.epoch.env.agentgym:sciworld": 0.9821428571428571, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9489795918367347, + "success_rate.epoch.env.logic": 0.8985507246376812, + "success_rate.epoch.env.math": 0.9566787003610109, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7325581395348837, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8704599548364474, + "success_rate.epoch.global": 0.8557544757033249, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9983700539568345, + "tokens_p.mean_below_band": 2.9976945370435715e-09, + "tokens_p.mean_in_band": 0.78515625, + "tokens_rate.above_band": 0.9946332737030411, + "tokens_rate.below_band": 0.0017889087656529517, + "tokens_rate.in_band": 0.0035778175313059034 + }, + { + "epoch": 0.2098423519386451, + "grad_norm": 125.19739449705256, + "learning_rate": 3.997846666931694e-07, + "loss": 0.4152, + "step": 985, + "success_rate.epoch.env.abd": 0.9888888888888889, + "success_rate.epoch.env.agentgym:alfworld": 0.9245283018867925, + "success_rate.epoch.env.agentgym:sciworld": 0.9821428571428571, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9494949494949495, + "success_rate.epoch.env.logic": 0.8992805755395683, + "success_rate.epoch.env.math": 0.9568345323741008, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7339331619537275, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8707236736489115, + "success_rate.epoch.global": 0.8564885496183207, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.996510152284264, + "tokens_p.mean_in_band": 0.87890625, + "tokens_rate.above_band": 0.9949494949494949, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.005050505050505051 + }, + { + "epoch": 0.21090754154239455, + "grad_norm": 50.78380193167762, + "learning_rate": 3.997811137110666e-07, + "loss": 0.4005, + "step": 990, + "success_rate.epoch.env.abd": 0.9888888888888889, + "success_rate.epoch.env.agentgym:alfworld": 0.9245283018867925, + "success_rate.epoch.env.agentgym:sciworld": 0.9821428571428571, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9405940594059405, + "success_rate.epoch.env.logic": 0.899641577060932, + "success_rate.epoch.env.math": 0.956989247311828, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7343550446998723, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8699997381140231, + "success_rate.epoch.global": 0.8562025316455696, + "success_rate.window.env.ded": 0.5, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.825, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9927991548042705, + "tokens_p.mean_in_band": 0.5703828828828829, + "tokens_rate.above_band": 0.8350668647845468, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.1649331352154532 + }, + { + "epoch": 0.211972731146144, + "grad_norm": 336.6614214411064, + "learning_rate": 3.997775317367741e-07, + "loss": 0.2875, + "step": 995, + "success_rate.epoch.env.abd": 0.989010989010989, + "success_rate.epoch.env.agentgym:alfworld": 0.9245283018867925, + "success_rate.epoch.env.agentgym:sciworld": 0.9824561403508771, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9411764705882353, + "success_rate.epoch.env.logic": 0.900709219858156, + "success_rate.epoch.env.math": 0.9571428571428572, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7350318471337579, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8702648153480733, + "success_rate.epoch.global": 0.856926952141058, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9989367219917012, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 0.21303792074989347, + "grad_norm": 101.31429936007149, + "learning_rate": 3.9977392077290223e-07, + "loss": 0.3308, + "step": 1000, + "success_rate.epoch.env.abd": 0.989247311827957, + "success_rate.epoch.env.agentgym:alfworld": 0.9245283018867925, + "success_rate.epoch.env.agentgym:sciworld": 0.9824561403508771, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9411764705882353, + "success_rate.epoch.env.logic": 0.901060070671378, + "success_rate.epoch.env.math": 0.9555160142348754, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7341772151898734, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8700926061461028, + "success_rate.epoch.global": 0.856140350877193, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.5, + "success_rate.window.env.science": 0.6, + "success_rate.window.env_macro_mean": 0.775, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9930862831858407, + "tokens_p.mean_in_band": 0.300101902173913, + "tokens_rate.above_band": 0.8308823529411765, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.16911764705882354 + }, + { + "epoch": 0.21410311035364296, + "grad_norm": 77.68227255786232, + "learning_rate": 3.997702808220828e-07, + "loss": 0.2753, + "step": 1005, + "success_rate.epoch.env.abd": 0.989247311827957, + "success_rate.epoch.env.agentgym:alfworld": 0.9259259259259259, + "success_rate.epoch.env.agentgym:sciworld": 0.9824561403508771, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.941747572815534, + "success_rate.epoch.env.logic": 0.901060070671378, + "success_rate.epoch.env.math": 0.9558303886925795, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7342569269521411, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8703074072812303, + "success_rate.epoch.global": 0.856359102244389, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9375, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.997750946969697, + "tokens_p.mean_in_band": 0.6380208333333334, + "tokens_rate.above_band": 0.9887640449438202, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.011235955056179775 + }, + { + "epoch": 0.21516829995739242, + "grad_norm": 992.9681403865821, + "learning_rate": 3.997666118869684e-07, + "loss": 0.6206, + "step": 1010, + "success_rate.epoch.env.abd": 0.989247311827957, + "success_rate.epoch.env.agentgym:alfworld": 0.9259259259259259, + "success_rate.epoch.env.agentgym:sciworld": 0.9824561403508771, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9423076923076923, + "success_rate.epoch.env.logic": 0.9020979020979021, + "success_rate.epoch.env.math": 0.9543859649122807, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7349246231155779, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8703820639450321, + "success_rate.epoch.global": 0.856575682382134, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.75, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9375, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9935787671232876, + "tokens_p.mean_in_band": 0.65625, + "tokens_rate.above_band": 0.9776785714285714, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.022321428571428572 + }, + { + "epoch": 0.21623348956114188, + "grad_norm": 136.27811542989434, + "learning_rate": 3.9976291397023315e-07, + "loss": 0.3294, + "step": 1015, + "success_rate.epoch.env.abd": 0.989247311827957, + "success_rate.epoch.env.agentgym:alfworld": 0.9259259259259259, + "success_rate.epoch.env.agentgym:sciworld": 0.9827586206896551, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9433962264150944, + "success_rate.epoch.env.logic": 0.9024390243902439, + "success_rate.epoch.env.math": 0.9546247818499127, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7346683354192741, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8705379426704731, + "success_rate.epoch.global": 0.8567901234567902, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9981009224091155, + "tokens_p.mean_in_band": 0.5725446428571429, + "tokens_rate.above_band": 0.9962162162162163, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0037837837837837837 + }, + { + "epoch": 0.21729867916489135, + "grad_norm": 70.46154311145497, + "learning_rate": 3.9975918707457187e-07, + "loss": 0.3256, + "step": 1020, + "success_rate.epoch.env.abd": 0.9893617021276596, + "success_rate.epoch.env.agentgym:alfworld": 0.9259259259259259, + "success_rate.epoch.env.agentgym:sciworld": 0.9827586206896551, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9433962264150944, + "success_rate.epoch.env.logic": 0.8996539792387543, + "success_rate.epoch.env.math": 0.9547826086956521, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7325870646766169, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8701202973296819, + "success_rate.epoch.global": 0.8555282555282555, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.4, + "success_rate.window.env_macro_mean": 0.725, + "success_rate.window.global": 0.6, + "tokens_p.mean_above_band": 0.9952256944444444, + "tokens_p.mean_in_band": 0.6244480298913043, + "tokens_rate.above_band": 0.9616026711185309, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.038397328881469114 + }, + { + "epoch": 0.2183638687686408, + "grad_norm": 70.3106937233737, + "learning_rate": 3.9975543120270083e-07, + "loss": 0.2602, + "step": 1025, + "success_rate.epoch.env.abd": 0.9894736842105263, + "success_rate.epoch.env.agentgym:alfworld": 0.9259259259259259, + "success_rate.epoch.env.agentgym:sciworld": 0.9827586206896551, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9433962264150944, + "success_rate.epoch.env.logic": 0.8996539792387543, + "success_rate.epoch.env.math": 0.9549393414211439, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7336621454993835, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8702424605688751, + "success_rate.epoch.global": 0.8557457212713936, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8571428571428571, + "success_rate.window.env_macro_mean": 0.9523809523809524, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9926321138211383, + "tokens_p.mean_in_band": 0.6272321428571429, + "tokens_rate.above_band": 0.9461538461538461, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.05384615384615385 + }, + { + "epoch": 0.2194290583723903, + "grad_norm": 45.41832513907574, + "learning_rate": 3.997516463573571e-07, + "loss": 0.4279, + "step": 1030, + "success_rate.epoch.env.abd": 0.9894736842105263, + "success_rate.epoch.env.agentgym:alfworld": 0.9259259259259259, + "success_rate.epoch.env.agentgym:sciworld": 0.9827586206896551, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9433962264150944, + "success_rate.epoch.env.logic": 0.8979591836734694, + "success_rate.epoch.env.math": 0.9535283993115319, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7339901477832512, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8699899391696906, + "success_rate.epoch.global": 0.8554744525547445, + "success_rate.window.env.logic": 0.8, + "success_rate.window.env.math": 0.75, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.85, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 1.0006443298969072, + "tokens_p.mean_in_band": 0.58549072265625, + "tokens_rate.above_band": 0.9644886363636364, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03551136363636364 + }, + { + "epoch": 0.22049424797613976, + "grad_norm": 72.21395780256603, + "learning_rate": 3.997478325412993e-07, + "loss": 0.2947, + "step": 1035, + "success_rate.epoch.env.abd": 0.9894736842105263, + "success_rate.epoch.env.agentgym:alfworld": 0.9272727272727272, + "success_rate.epoch.env.agentgym:sciworld": 0.9827586206896551, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9433962264150944, + "success_rate.epoch.env.logic": 0.8993288590604027, + "success_rate.epoch.env.math": 0.952054794520548, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7334152334152334, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8700506625855755, + "success_rate.epoch.global": 0.8552058111380145, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.6666666666666666, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.7916666666666666, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9977072010869565, + "tokens_p.mean_in_band": 0.47380514705882354, + "tokens_rate.above_band": 0.9558441558441558, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.04415584415584416 + }, + { + "epoch": 0.22155943757988922, + "grad_norm": 106.13585676785776, + "learning_rate": 3.997439897573067e-07, + "loss": 0.5819, + "step": 1040, + "success_rate.epoch.env.abd": 0.9894736842105263, + "success_rate.epoch.env.agentgym:alfworld": 0.9272727272727272, + "success_rate.epoch.env.agentgym:sciworld": 0.9827586206896551, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9433962264150944, + "success_rate.epoch.env.logic": 0.8996655518394648, + "success_rate.epoch.env.math": 0.9507640067911715, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7347188264058679, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8700824351346955, + "success_rate.epoch.global": 0.8554216867469879, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.8, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.991504854368932, + "tokens_p.mean_in_band": 0.4294704861111111, + "tokens_rate.above_band": 0.9196428571428571, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.08035714285714286 + }, + { + "epoch": 0.22262462718363868, + "grad_norm": 69.45850710137455, + "learning_rate": 3.9974011800818e-07, + "loss": 0.4525, + "step": 1045, + "success_rate.epoch.env.abd": 0.9894736842105263, + "success_rate.epoch.env.agentgym:alfworld": 0.9272727272727272, + "success_rate.epoch.env.agentgym:sciworld": 0.9830508474576272, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9433962264150944, + "success_rate.epoch.env.logic": 0.8996655518394648, + "success_rate.epoch.env.math": 0.9509306260575296, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7345454545454545, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8701083873323242, + "success_rate.epoch.global": 0.8551558752997602, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.7142857142857143, + "success_rate.window.env_macro_mean": 0.9047619047619048, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9937150837988827, + "tokens_p.mean_in_band": 0.4783380681818182, + "tokens_rate.above_band": 0.9421052631578948, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.05789473684210526 + }, + { + "epoch": 0.22368981678738817, + "grad_norm": 41.56704083564087, + "learning_rate": 3.997362172967409e-07, + "loss": 0.301, + "step": 1050, + "success_rate.epoch.env.abd": 0.9894736842105263, + "success_rate.epoch.env.agentgym:alfworld": 0.9272727272727272, + "success_rate.epoch.env.agentgym:sciworld": 0.9830508474576272, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9433962264150944, + "success_rate.epoch.env.logic": 0.900990099009901, + "success_rate.epoch.env.math": 0.9511784511784511, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7355072463768116, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8703387658889348, + "success_rate.epoch.global": 0.8558472553699285, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9974226804123711, + "tokens_p.mean_in_band": 0.625, + "tokens_rate.above_band": 0.9797979797979798, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.020202020202020204 + }, + { + "epoch": 0.22475500639113763, + "grad_norm": 42.018910693897, + "learning_rate": 3.997322876258321e-07, + "loss": 0.173, + "step": 1055, + "success_rate.epoch.env.abd": 0.9894736842105263, + "success_rate.epoch.env.agentgym:alfworld": 0.9272727272727272, + "success_rate.epoch.env.agentgym:sciworld": 0.9830508474576272, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9433962264150944, + "success_rate.epoch.env.logic": 0.9013157894736842, + "success_rate.epoch.env.math": 0.9513422818791947, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7365269461077845, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8704759677885255, + "success_rate.epoch.global": 0.8560570071258907, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8571428571428571, + "success_rate.window.env_macro_mean": 0.9523809523809524, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9925, + "tokens_p.mean_in_band": 0.6135817307692307, + "tokens_rate.above_band": 0.8849557522123894, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.11504424778761062 + }, + { + "epoch": 0.2258201959948871, + "grad_norm": 50.53699077668399, + "learning_rate": 3.997283289983177e-07, + "loss": 0.292, + "step": 1060, + "success_rate.epoch.env.abd": 0.9895833333333334, + "success_rate.epoch.env.agentgym:alfworld": 0.9272727272727272, + "success_rate.epoch.env.agentgym:sciworld": 0.9830508474576272, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9433962264150944, + "success_rate.epoch.env.logic": 0.9016393442622951, + "success_rate.epoch.env.math": 0.9515050167224081, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7384066587395958, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8707010270054748, + "success_rate.epoch.global": 0.8567375886524823, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9954108391608392, + "tokens_p.mean_in_band": 0.85546875, + "tokens_rate.above_band": 0.9794520547945206, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02054794520547945 + }, + { + "epoch": 0.22688538559863655, + "grad_norm": 153.33174535054349, + "learning_rate": 3.997243414170826e-07, + "loss": 0.4433, + "step": 1065, + "success_rate.epoch.env.abd": 0.9896907216494846, + "success_rate.epoch.env.agentgym:alfworld": 0.9107142857142857, + "success_rate.epoch.env.agentgym:sciworld": 0.9830508474576272, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9439252336448598, + "success_rate.epoch.env.logic": 0.9019607843137255, + "success_rate.epoch.env.math": 0.9516666666666667, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7384615384615385, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.86930247461503, + "success_rate.epoch.global": 0.8564705882352941, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 0.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.7916666666666666, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9973072562358276, + "tokens_p.mean_in_band": 0.45458984375, + "tokens_rate.above_band": 0.9910112359550561, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.008988764044943821 + }, + { + "epoch": 0.227950575202386, + "grad_norm": 58.014062366038246, + "learning_rate": 3.9972032488503296e-07, + "loss": 0.2734, + "step": 1070, + "success_rate.epoch.env.abd": 0.9896907216494846, + "success_rate.epoch.env.agentgym:alfworld": 0.9107142857142857, + "success_rate.epoch.env.agentgym:sciworld": 0.9838709677419355, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9439252336448598, + "success_rate.epoch.env.logic": 0.9022801302931596, + "success_rate.epoch.env.math": 0.9517470881863561, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.74, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.869553233644293, + "success_rate.epoch.global": 0.8571428571428571, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9988473360655737, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 0.2290157648061355, + "grad_norm": 590.3894529474009, + "learning_rate": 3.997162794050959e-07, + "loss": 0.2279, + "step": 1075, + "success_rate.epoch.env.abd": 0.9897959183673469, + "success_rate.epoch.env.agentgym:alfworld": 0.9107142857142857, + "success_rate.epoch.env.agentgym:sciworld": 0.9841269841269841, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9439252336448598, + "success_rate.epoch.env.logic": 0.9025974025974026, + "success_rate.epoch.env.math": 0.9518272425249169, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7406542056074766, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8696816741309467, + "success_rate.epoch.global": 0.8573426573426574, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env_macro_mean": 0.9666666666666668, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.992505081300813, + "tokens_p.mean_below_band": 5.617039278149605e-09, + "tokens_p.mean_in_band": 0.7265625, + "tokens_rate.above_band": 0.9879518072289156, + "tokens_rate.below_band": 0.004016064257028112, + "tokens_rate.in_band": 0.008032128514056224 + }, + { + "epoch": 0.23008095440988496, + "grad_norm": 61.649239684523124, + "learning_rate": 3.9971220498021985e-07, + "loss": 0.2757, + "step": 1080, + "success_rate.epoch.env.abd": 0.9897959183673469, + "success_rate.epoch.env.agentgym:alfworld": 0.9107142857142857, + "success_rate.epoch.env.agentgym:sciworld": 0.9841269841269841, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9439252336448598, + "success_rate.epoch.env.logic": 0.9032258064516129, + "success_rate.epoch.env.math": 0.9520661157024793, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7418604651162791, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8698701774528171, + "success_rate.epoch.global": 0.8580046403712297, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9982348326359832, + "tokens_p.mean_in_band": 0.81015625, + "tokens_rate.above_band": 0.989648033126294, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.010351966873706004 + }, + { + "epoch": 0.23114614401363442, + "grad_norm": 22.105312534211336, + "learning_rate": 3.9970810161337427e-07, + "loss": 0.3216, + "step": 1085, + "success_rate.epoch.env.abd": 0.98989898989899, + "success_rate.epoch.env.agentgym:alfworld": 0.9107142857142857, + "success_rate.epoch.env.agentgym:sciworld": 0.9841269841269841, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9444444444444444, + "success_rate.epoch.env.logic": 0.9038461538461539, + "success_rate.epoch.env.math": 0.9521452145214522, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7410404624277457, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8699157888942909, + "success_rate.epoch.global": 0.8577367205542725, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6, + "success_rate.window.env_macro_mean": 0.9199999999999999, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.998477564102564, + "tokens_p.mean_in_band": 0.4984375, + "tokens_rate.above_band": 0.975, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.025 + }, + { + "epoch": 0.23221133361738389, + "grad_norm": 44.231131899712054, + "learning_rate": 3.997039693075495e-07, + "loss": 0.2264, + "step": 1090, + "success_rate.epoch.env.abd": 0.98989898989899, + "success_rate.epoch.env.agentgym:alfworld": 0.9107142857142857, + "success_rate.epoch.env.agentgym:sciworld": 0.984375, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9454545454545454, + "success_rate.epoch.env.logic": 0.9047619047619048, + "success_rate.epoch.env.math": 0.9522240527182867, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7416378316032295, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8701748866371258, + "success_rate.epoch.global": 0.8583908045977011, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.999488543371522, + "tokens_p.mean_in_band": 0.890625, + "tokens_rate.above_band": 0.9967373572593801, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0032626427406199023 + }, + { + "epoch": 0.23327652322113338, + "grad_norm": 99.58092276063034, + "learning_rate": 3.9969980806575724e-07, + "loss": 0.2318, + "step": 1095, + "success_rate.epoch.env.abd": 0.98989898989899, + "success_rate.epoch.env.agentgym:alfworld": 0.9107142857142857, + "success_rate.epoch.env.agentgym:sciworld": 0.984375, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9454545454545454, + "success_rate.epoch.env.logic": 0.9047619047619048, + "success_rate.epoch.env.math": 0.9525368248772504, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7434135166093929, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8703647463794101, + "success_rate.epoch.global": 0.8590389016018307, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9928463855421686, + "tokens_p.mean_in_band": 0.81015625, + "tokens_rate.above_band": 0.9431818181818182, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.056818181818181816 + }, + { + "epoch": 0.23434171282488284, + "grad_norm": 202.14236062783922, + "learning_rate": 3.9969561789103016e-07, + "loss": 0.3987, + "step": 1100, + "success_rate.epoch.env.abd": 0.98989898989899, + "success_rate.epoch.env.agentgym:alfworld": 0.9107142857142857, + "success_rate.epoch.env.agentgym:sciworld": 0.9846153846153847, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9459459459459459, + "success_rate.epoch.env.logic": 0.9056603773584906, + "success_rate.epoch.env.math": 0.9526916802610114, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7431506849315068, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8705031355984325, + "success_rate.epoch.global": 0.8592255125284738, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.993421052631579, + "tokens_p.mean_in_band": 0.6255580357142857, + "tokens_rate.above_band": 0.9809782608695652, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.019021739130434784 + }, + { + "epoch": 0.2354069024286323, + "grad_norm": 107.93231017247012, + "learning_rate": 3.99691398786422e-07, + "loss": 0.3863, + "step": 1105, + "success_rate.epoch.env.abd": 0.98989898989899, + "success_rate.epoch.env.agentgym:alfworld": 0.9107142857142857, + "success_rate.epoch.env.agentgym:sciworld": 0.9846153846153847, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9459459459459459, + "success_rate.epoch.env.logic": 0.9065420560747663, + "success_rate.epoch.env.math": 0.9529983792544571, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7440273037542662, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8706908625559305, + "success_rate.epoch.global": 0.8598639455782313, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9914383561643836, + "tokens_p.mean_in_band": 0.7700892857142857, + "tokens_rate.above_band": 0.9125, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0875 + }, + { + "epoch": 0.23647209203238176, + "grad_norm": 303.380145269383, + "learning_rate": 3.996871507550077e-07, + "loss": 0.5499, + "step": 1110, + "success_rate.epoch.env.abd": 0.98989898989899, + "success_rate.epoch.env.agentgym:alfworld": 0.9122807017543859, + "success_rate.epoch.env.agentgym:sciworld": 0.9846153846153847, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9464285714285714, + "success_rate.epoch.env.logic": 0.906832298136646, + "success_rate.epoch.env.math": 0.9532258064516129, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7429218573046432, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8708237047679432, + "success_rate.epoch.global": 0.8595936794582393, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.9, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9977777777777778, + "tokens_p.mean_in_band": 0.5796342329545454, + "tokens_rate.above_band": 0.9684361549497847, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03156384505021521 + }, + { + "epoch": 0.23753728163613122, + "grad_norm": 172.28347523897557, + "learning_rate": 3.9968287379988305e-07, + "loss": 0.325, + "step": 1115, + "success_rate.epoch.env.abd": 0.98989898989899, + "success_rate.epoch.env.agentgym:alfworld": 0.9137931034482759, + "success_rate.epoch.env.agentgym:sciworld": 0.9846153846153847, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9385964912280702, + "success_rate.epoch.env.logic": 0.9074074074074074, + "success_rate.epoch.env.math": 0.9533011272141707, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7440811724915445, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8704137108328169, + "success_rate.epoch.global": 0.8597752808988764, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 0.5, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9930718570683359, + "tokens_p.mean_in_band": 0.5292215616966581, + "tokens_rate.above_band": 0.907885389533507, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.09211461046649301 + }, + { + "epoch": 0.2386024712398807, + "grad_norm": 96.41067664182437, + "learning_rate": 3.996785679241652e-07, + "loss": 0.3093, + "step": 1120, + "success_rate.epoch.env.abd": 0.99, + "success_rate.epoch.env.agentgym:alfworld": 0.9137931034482759, + "success_rate.epoch.env.agentgym:sciworld": 0.9848484848484849, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9391304347826087, + "success_rate.epoch.env.logic": 0.9049079754601227, + "success_rate.epoch.env.math": 0.9535256410256411, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7446569178852643, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8703381545734128, + "success_rate.epoch.global": 0.8599552572706936, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9987399193548387, + "tokens_p.mean_in_band": 0.74921875, + "tokens_rate.above_band": 0.986737400530504, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.013262599469496022 + }, + { + "epoch": 0.23966766084363017, + "grad_norm": 94.36686829498194, + "learning_rate": 3.996742331309921e-07, + "loss": 0.2574, + "step": 1125, + "success_rate.epoch.env.abd": 0.9901960784313726, + "success_rate.epoch.env.agentgym:alfworld": 0.9137931034482759, + "success_rate.epoch.env.agentgym:sciworld": 0.9848484848484849, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9391304347826087, + "success_rate.epoch.env.logic": 0.9054878048780488, + "success_rate.epoch.env.math": 0.9535256410256411, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7449664429530202, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8704368302931449, + "success_rate.epoch.global": 0.8601336302895323, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9925595238095238, + "tokens_p.mean_in_band": 0.7265625, + "tokens_rate.above_band": 0.9130434782608695, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.08695652173913043 + }, + { + "epoch": 0.24073285044737963, + "grad_norm": 134.93265771483846, + "learning_rate": 3.9966986942352307e-07, + "loss": 0.3389, + "step": 1130, + "success_rate.epoch.env.abd": 0.9902912621359223, + "success_rate.epoch.env.agentgym:alfworld": 0.9152542372881356, + "success_rate.epoch.env.agentgym:sciworld": 0.9848484848484849, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9391304347826087, + "success_rate.epoch.env.logic": 0.9057750759878419, + "success_rate.epoch.env.math": 0.9538950715421304, + "success_rate.epoch.env.sat": 0.13333333333333333, + "success_rate.epoch.env.science": 0.7452513966480447, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8697981178696821, + "success_rate.epoch.global": 0.8603104212860311, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8333333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9975806451612903, + "tokens_p.mean_in_band": 0.65390625, + "tokens_rate.above_band": 0.9393939393939394, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.06060606060606061 + }, + { + "epoch": 0.2417980400511291, + "grad_norm": 326.84204609725606, + "learning_rate": 3.9966547680493825e-07, + "loss": 0.4822, + "step": 1135, + "success_rate.epoch.env.abd": 0.9902912621359223, + "success_rate.epoch.env.agentgym:alfworld": 0.9152542372881356, + "success_rate.epoch.env.agentgym:sciworld": 0.9848484848484849, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9391304347826087, + "success_rate.epoch.env.logic": 0.906060606060606, + "success_rate.epoch.env.math": 0.9542586750788643, + "success_rate.epoch.env.sat": 0.13333333333333333, + "success_rate.epoch.env.science": 0.744988864142539, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8652878088791357, + "success_rate.epoch.global": 0.8600441501103753, + "success_rate.window.env.agentgym:textcraft": 0.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.6666666666666666, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9962797619047619, + "tokens_p.mean_in_band": 0.6243265086206896, + "tokens_rate.above_band": 0.9559939301972686, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.04400606980273141 + }, + { + "epoch": 0.24286322965487858, + "grad_norm": 156.44290078317317, + "learning_rate": 3.99661055278439e-07, + "loss": 0.4755, + "step": 1140, + "success_rate.epoch.env.abd": 0.9902912621359223, + "success_rate.epoch.env.agentgym:alfworld": 0.9166666666666666, + "success_rate.epoch.env.agentgym:sciworld": 0.9848484848484849, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9391304347826087, + "success_rate.epoch.env.logic": 0.9063444108761329, + "success_rate.epoch.env.math": 0.95141065830721, + "success_rate.epoch.env.sat": 0.13333333333333333, + "success_rate.epoch.env.science": 0.7439024390243902, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8650843354522499, + "success_rate.epoch.global": 0.8589010989010989, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.5, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.75, + "success_rate.window.global": 0.6, + "tokens_p.mean_above_band": 0.9955119680851063, + "tokens_p.mean_in_band": 0.49148995535714285, + "tokens_rate.above_band": 0.9641025641025641, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.035897435897435895 + }, + { + "epoch": 0.24392841925862804, + "grad_norm": 76.93919193952442, + "learning_rate": 3.996566048472477e-07, + "loss": 0.355, + "step": 1145, + "success_rate.epoch.env.abd": 0.9902912621359223, + "success_rate.epoch.env.agentgym:alfworld": 0.9166666666666666, + "success_rate.epoch.env.agentgym:sciworld": 0.9848484848484849, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9396551724137931, + "success_rate.epoch.env.logic": 0.9066265060240963, + "success_rate.epoch.env.math": 0.9516380655226209, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.7450331125827815, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8645235700176696, + "success_rate.epoch.global": 0.8590809628008753, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9963118654822335, + "tokens_p.mean_in_band": 0.6280048076923077, + "tokens_rate.above_band": 0.9680589680589681, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03194103194103194 + }, + { + "epoch": 0.2449936088623775, + "grad_norm": 696.2384141161228, + "learning_rate": 3.996521255146077e-07, + "loss": 0.4063, + "step": 1150, + "success_rate.epoch.env.abd": 0.9902912621359223, + "success_rate.epoch.env.agentgym:alfworld": 0.9193548387096774, + "success_rate.epoch.env.agentgym:sciworld": 0.9848484848484849, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9396551724137931, + "success_rate.epoch.env.logic": 0.9039039039039038, + "success_rate.epoch.env.math": 0.951937984496124, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.7447744774477447, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8645241930868773, + "success_rate.epoch.global": 0.8588235294117647, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.6666666666666666, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9982822410147991, + "tokens_p.mean_in_band": 0.6394675925925926, + "tokens_rate.above_band": 0.946, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.054 + }, + { + "epoch": 0.24605879846612697, + "grad_norm": 71.07902201431295, + "learning_rate": 3.996476172837836e-07, + "loss": 0.39, + "step": 1155, + "success_rate.epoch.env.abd": 0.9902912621359223, + "success_rate.epoch.env.agentgym:alfworld": 0.9206349206349206, + "success_rate.epoch.env.agentgym:sciworld": 0.9848484848484849, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9401709401709402, + "success_rate.epoch.env.logic": 0.9047619047619048, + "success_rate.epoch.env.math": 0.9521604938271605, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.7442371020856202, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8647368280422686, + "success_rate.epoch.global": 0.8590021691973969, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.9, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9978060787671232, + "tokens_p.mean_in_band": 0.2152777777777778, + "tokens_rate.above_band": 0.984822934232715, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.01517706576728499 + }, + { + "epoch": 0.24712398806987643, + "grad_norm": 95.64706189143268, + "learning_rate": 3.99643080158061e-07, + "loss": 0.3741, + "step": 1160, + "success_rate.epoch.env.abd": 0.9902912621359223, + "success_rate.epoch.env.agentgym:alfworld": 0.9206349206349206, + "success_rate.epoch.env.agentgym:sciworld": 0.9848484848484849, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9401709401709402, + "success_rate.epoch.env.logic": 0.9050445103857567, + "success_rate.epoch.env.math": 0.9523809523809523, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.7456331877729258, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8642410288321302, + "success_rate.epoch.global": 0.8591792656587472, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.75, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9973958333333334, + "tokens_p.mean_in_band": 0.6473524305555556, + "tokens_rate.above_band": 0.8888888888888888, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.1111111111111111 + }, + { + "epoch": 0.24818917767362592, + "grad_norm": 64.57519426962992, + "learning_rate": 3.996385141407464e-07, + "loss": 0.3125, + "step": 1165, + "success_rate.epoch.env.abd": 0.9903846153846154, + "success_rate.epoch.env.agentgym:alfworld": 0.9206349206349206, + "success_rate.epoch.env.agentgym:sciworld": 0.9848484848484849, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.940677966101695, + "success_rate.epoch.env.logic": 0.9029411764705882, + "success_rate.epoch.env.math": 0.9526717557251908, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.7459105779716467, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8641560505418792, + "success_rate.epoch.global": 0.8593548387096774, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.6666666666666666, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9955180921052632, + "tokens_p.mean_in_band": 0.5962171052631579, + "tokens_rate.above_band": 0.975609756097561, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.024390243902439025 + }, + { + "epoch": 0.24925436727737538, + "grad_norm": 162.48890252307692, + "learning_rate": 3.9963391923516754e-07, + "loss": 0.4745, + "step": 1170, + "success_rate.epoch.env.abd": 0.9903846153846154, + "success_rate.epoch.env.agentgym:alfworld": 0.9206349206349206, + "success_rate.epoch.env.agentgym:sciworld": 0.9848484848484849, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9327731092436975, + "success_rate.epoch.env.logic": 0.9008746355685131, + "success_rate.epoch.env.math": 0.952887537993921, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.7456521739130435, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8632456851282477, + "success_rate.epoch.global": 0.8586723768736617, + "success_rate.window.env.ded": 0.0, + "success_rate.window.env.logic": 0.6666666666666666, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.5833333333333333, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9827567911714771, + "tokens_p.mean_below_band": 5.066394805908203e-07, + "tokens_p.mean_in_band": 0.4876067895683453, + "tokens_rate.above_band": 0.6282666666666666, + "tokens_rate.below_band": 0.0010666666666666667, + "tokens_rate.in_band": 0.37066666666666664 + }, + { + "epoch": 0.25031955688112484, + "grad_norm": 34.11008594390763, + "learning_rate": 3.9962929544467316e-07, + "loss": 0.2818, + "step": 1175, + "success_rate.epoch.env.abd": 0.9904761904761905, + "success_rate.epoch.env.agentgym:alfworld": 0.9206349206349206, + "success_rate.epoch.env.agentgym:sciworld": 0.9848484848484849, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9327731092436975, + "success_rate.epoch.env.logic": 0.9017341040462428, + "success_rate.epoch.env.math": 0.9532428355957768, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.745928338762215, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8633895493119144, + "success_rate.epoch.global": 0.8592750533049041, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9963662790697675, + "tokens_p.mean_in_band": 0.8681640625, + "tokens_rate.above_band": 0.9699248120300752, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03007518796992481 + }, + { + "epoch": 0.2513847464848743, + "grad_norm": 147.7128524768551, + "learning_rate": 3.996246427726331e-07, + "loss": 0.3735, + "step": 1180, + "success_rate.epoch.env.abd": 0.9904761904761905, + "success_rate.epoch.env.agentgym:alfworld": 0.9206349206349206, + "success_rate.epoch.env.agentgym:sciworld": 0.9848484848484849, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9327731092436975, + "success_rate.epoch.env.logic": 0.9020172910662824, + "success_rate.epoch.env.math": 0.9533834586466166, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.7464940668824164, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8634795073292852, + "success_rate.epoch.global": 0.8593882752761257, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env_macro_mean": 0.9444444444444445, + "success_rate.window.global": 0.8888888888888888, + "tokens_p.mean_above_band": 0.9893465909090909, + "tokens_p.mean_in_band": 0.6871744791666666, + "tokens_rate.above_band": 0.9361702127659575, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.06382978723404255 + }, + { + "epoch": 0.25244993608862376, + "grad_norm": 485.1256599952441, + "learning_rate": 3.9961996122243804e-07, + "loss": 0.8177, + "step": 1185, + "success_rate.epoch.env.abd": 0.9904761904761905, + "success_rate.epoch.env.agentgym:alfworld": 0.9206349206349206, + "success_rate.epoch.env.agentgym:sciworld": 0.9848484848484849, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9327731092436975, + "success_rate.epoch.env.logic": 0.9025787965616046, + "success_rate.epoch.env.math": 0.9520958083832335, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.7446351931330472, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8632445056458825, + "success_rate.epoch.global": 0.8582910321489001, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.6666666666666666, + "success_rate.window.env.science": 0.4, + "success_rate.window.env_macro_mean": 0.6888888888888888, + "success_rate.window.global": 0.6, + "tokens_p.mean_above_band": 0.9884105960264901, + "tokens_p.mean_in_band": 0.5148050462877428, + "tokens_rate.above_band": 0.8031914893617021, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.19680851063829788 + }, + { + "epoch": 0.2535151256923732, + "grad_norm": 30.907029672898215, + "learning_rate": 3.9961525079750005e-07, + "loss": 0.178, + "step": 1190, + "success_rate.epoch.env.abd": 0.9904761904761905, + "success_rate.epoch.env.agentgym:alfworld": 0.921875, + "success_rate.epoch.env.agentgym:sciworld": 0.9848484848484849, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9333333333333333, + "success_rate.epoch.env.logic": 0.9036827195467422, + "success_rate.epoch.env.math": 0.9522388059701492, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.7451820128479657, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8635712368951267, + "success_rate.epoch.global": 0.858887952822241, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9966584158415842, + "tokens_p.mean_in_band": 0.86328125, + "tokens_rate.above_band": 0.9980237154150198, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.001976284584980237 + }, + { + "epoch": 0.2545803152961227, + "grad_norm": 62.61177680183389, + "learning_rate": 3.9961051150125193e-07, + "loss": 0.2963, + "step": 1195, + "success_rate.epoch.env.abd": 0.9905660377358491, + "success_rate.epoch.env.agentgym:alfworld": 0.9230769230769231, + "success_rate.epoch.env.agentgym:sciworld": 0.9848484848484849, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9333333333333333, + "success_rate.epoch.env.logic": 0.9042253521126761, + "success_rate.epoch.env.math": 0.9524517087667161, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.7449306296691569, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8637345025787883, + "success_rate.epoch.global": 0.8590604026845637, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9980182926829269, + "tokens_p.mean_in_band": 0.7434895833333334, + "tokens_rate.above_band": 0.9715639810426541, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02843601895734597 + }, + { + "epoch": 0.2556455048998722, + "grad_norm": 232.10678464266607, + "learning_rate": 3.996057433371477e-07, + "loss": 0.403, + "step": 1200, + "success_rate.epoch.env.abd": 0.9905660377358491, + "success_rate.epoch.env.agentgym:alfworld": 0.9242424242424242, + "success_rate.epoch.env.agentgym:sciworld": 0.9855072463768116, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9333333333333333, + "success_rate.epoch.env.logic": 0.901685393258427, + "success_rate.epoch.env.math": 0.9525925925925925, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.7457446808510638, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8637562515649119, + "success_rate.epoch.global": 0.8592314118629908, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9982069672131147, + "tokens_p.mean_in_band": 0.7373798076923077, + "tokens_rate.above_band": 0.9740518962075848, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02594810379241517 + }, + { + "epoch": 0.25671069450362166, + "grad_norm": 60.46428905885166, + "learning_rate": 3.996009463086623e-07, + "loss": 0.1888, + "step": 1205, + "success_rate.epoch.env.abd": 0.9905660377358491, + "success_rate.epoch.env.agentgym:alfworld": 0.9242424242424242, + "success_rate.epoch.env.agentgym:sciworld": 0.9855072463768116, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9333333333333333, + "success_rate.epoch.env.logic": 0.9022346368715084, + "success_rate.epoch.env.math": 0.9514705882352941, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.7465535524920467, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8637777161918906, + "success_rate.epoch.global": 0.8594009983361065, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.8, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9929347826086956, + "tokens_p.mean_in_band": 0.4879557291666667, + "tokens_rate.above_band": 0.9745762711864406, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.025423728813559324 + }, + { + "epoch": 0.2577758841073711, + "grad_norm": 84.49690322959322, + "learning_rate": 3.995961204192918e-07, + "loss": 0.234, + "step": 1210, + "success_rate.epoch.env.abd": 0.9905660377358491, + "success_rate.epoch.env.agentgym:alfworld": 0.9253731343283582, + "success_rate.epoch.env.agentgym:sciworld": 0.9855072463768116, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9333333333333333, + "success_rate.epoch.env.logic": 0.9025069637883009, + "success_rate.epoch.env.math": 0.9518248175182482, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.7463002114164905, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8639144366655384, + "success_rate.epoch.global": 0.8595691797845899, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9977678571428571, + "tokens_p.mean_in_band": 0.66484375, + "tokens_rate.above_band": 0.9685534591194969, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.031446540880503145 + }, + { + "epoch": 0.2588410737111206, + "grad_norm": 88.24454046746219, + "learning_rate": 3.995912656725533e-07, + "loss": 0.4847, + "step": 1215, + "success_rate.epoch.env.abd": 0.9905660377358491, + "success_rate.epoch.env.agentgym:alfworld": 0.9264705882352942, + "success_rate.epoch.env.agentgym:sciworld": 0.9714285714285714, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9333333333333333, + "success_rate.epoch.env.logic": 0.9033149171270718, + "success_rate.epoch.env.math": 0.9520348837209303, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.7457805907172996, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8627796346474436, + "success_rate.epoch.global": 0.8593234323432343, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 0.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.7, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9974489795918368, + "tokens_p.mean_in_band": 0.675537109375, + "tokens_rate.above_band": 0.9821826280623608, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.017817371937639197 + }, + { + "epoch": 0.25990626331487005, + "grad_norm": 34.44953619460801, + "learning_rate": 3.9958638207198493e-07, + "loss": 0.2793, + "step": 1220, + "success_rate.epoch.env.abd": 0.9907407407407407, + "success_rate.epoch.env.agentgym:alfworld": 0.9264705882352942, + "success_rate.epoch.env.agentgym:sciworld": 0.971830985915493, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9333333333333333, + "success_rate.epoch.env.logic": 0.9033149171270718, + "success_rate.epoch.env.math": 0.9523121387283237, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.7465825446898002, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.862930209781235, + "success_rate.epoch.global": 0.8599013968775678, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9956647398843931, + "tokens_p.mean_in_band": 0.746875, + "tokens_rate.above_band": 0.9719101123595506, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.028089887640449437 + }, + { + "epoch": 0.2609714529186195, + "grad_norm": 131.53498135573238, + "learning_rate": 3.9958146962114574e-07, + "loss": 0.4549, + "step": 1225, + "success_rate.epoch.env.abd": 0.990909090909091, + "success_rate.epoch.env.agentgym:alfworld": 0.9264705882352942, + "success_rate.epoch.env.agentgym:sciworld": 0.971830985915493, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9333333333333333, + "success_rate.epoch.env.logic": 0.9033149171270718, + "success_rate.epoch.env.math": 0.9512195121951219, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.7460650577124869, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8627991403864929, + "success_rate.epoch.global": 0.8596563011456628, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.math": 0.8, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.825, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9987610132158591, + "tokens_p.mean_in_band": 0.416015625, + "tokens_rate.above_band": 0.9659574468085106, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03404255319148936 + }, + { + "epoch": 0.26203664252236897, + "grad_norm": 37.297637371452424, + "learning_rate": 3.995765283236159e-07, + "loss": 0.4461, + "step": 1230, + "success_rate.epoch.env.abd": 0.9910714285714286, + "success_rate.epoch.env.agentgym:alfworld": 0.9264705882352942, + "success_rate.epoch.env.agentgym:sciworld": 0.971830985915493, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9338842975206612, + "success_rate.epoch.env.logic": 0.9038461538461539, + "success_rate.epoch.env.math": 0.9513590844062947, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.74581589958159, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8629023179000406, + "success_rate.epoch.global": 0.8598207008964955, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9968993190661478, + "tokens_p.mean_in_band": 0.3782552083333333, + "tokens_rate.above_band": 0.9884615384615385, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.011538461538461539 + }, + { + "epoch": 0.26310183212611843, + "grad_norm": 63.362707045834405, + "learning_rate": 3.9957155818299666e-07, + "loss": 0.2566, + "step": 1235, + "success_rate.epoch.env.abd": 0.9910714285714286, + "success_rate.epoch.env.agentgym:alfworld": 0.9264705882352942, + "success_rate.epoch.env.agentgym:sciworld": 0.9722222222222222, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9349593495934959, + "success_rate.epoch.env.logic": 0.9038461538461539, + "success_rate.epoch.env.math": 0.9514978601997147, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.7460978147762747, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8630738614789195, + "success_rate.epoch.global": 0.8599837662337663, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9974290780141843, + "tokens_p.mean_in_band": 0.5837053571428571, + "tokens_rate.above_band": 0.9901685393258427, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.009831460674157303 + }, + { + "epoch": 0.2641670217298679, + "grad_norm": 24.107363541833237, + "learning_rate": 3.995665592029102e-07, + "loss": 0.2622, + "step": 1240, + "success_rate.epoch.env.abd": 0.9911504424778761, + "success_rate.epoch.env.agentgym:alfworld": 0.9264705882352942, + "success_rate.epoch.env.agentgym:sciworld": 0.9722222222222222, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9365079365079365, + "success_rate.epoch.env.logic": 0.9016393442622951, + "success_rate.epoch.env.math": 0.9516358463726885, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.7455867082035307, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8629872861004884, + "success_rate.epoch.global": 0.8597413096200485, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.8, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9957075315195754, + "tokens_p.mean_in_band": 0.6875, + "tokens_rate.above_band": 0.9960343688037012, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0039656311962987445 + }, + { + "epoch": 0.2652322113336174, + "grad_norm": 63.68066761821555, + "learning_rate": 3.995615313869997e-07, + "loss": 0.1878, + "step": 1245, + "success_rate.epoch.env.abd": 0.991304347826087, + "success_rate.epoch.env.agentgym:alfworld": 0.927536231884058, + "success_rate.epoch.env.agentgym:sciworld": 0.9722222222222222, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9365079365079365, + "success_rate.epoch.env.logic": 0.9019073569482289, + "success_rate.epoch.env.math": 0.9518413597733711, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.7453416149068323, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8631189208083877, + "success_rate.epoch.global": 0.8599033816425121, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.997631195335277, + "tokens_p.mean_in_band": 0.5546875, + "tokens_rate.above_band": 0.9884726224783862, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.011527377521613832 + }, + { + "epoch": 0.26629740093736687, + "grad_norm": 122.19408019516926, + "learning_rate": 3.9955647473892945e-07, + "loss": 0.3302, + "step": 1250, + "success_rate.epoch.env.abd": 0.991304347826087, + "success_rate.epoch.env.agentgym:alfworld": 0.927536231884058, + "success_rate.epoch.env.agentgym:sciworld": 0.9722222222222222, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.937007874015748, + "success_rate.epoch.env.logic": 0.9027027027027027, + "success_rate.epoch.env.math": 0.9520451339915373, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.7461300309597523, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8633268729477851, + "success_rate.epoch.global": 0.8604651162790697, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9935747663551402, + "tokens_p.mean_in_band": 0.724365234375, + "tokens_rate.above_band": 0.963963963963964, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.036036036036036036 + }, + { + "epoch": 0.26736259054111633, + "grad_norm": 47.3893792152576, + "learning_rate": 3.9955138926238467e-07, + "loss": 0.3884, + "step": 1255, + "success_rate.epoch.env.abd": 0.991304347826087, + "success_rate.epoch.env.agentgym:alfworld": 0.927536231884058, + "success_rate.epoch.env.agentgym:sciworld": 0.972972972972973, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.937007874015748, + "success_rate.epoch.env.logic": 0.9005376344086021, + "success_rate.epoch.env.math": 0.952247191011236, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.7458847736625515, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8631943713277078, + "success_rate.epoch.global": 0.860223642172524, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.7916666666666666, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9955501618122977, + "tokens_p.mean_in_band": 0.6724076704545454, + "tokens_rate.above_band": 0.9335347432024169, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.06646525679758308 + }, + { + "epoch": 0.2684277801448658, + "grad_norm": 82.82508494278193, + "learning_rate": 3.9954627496107157e-07, + "loss": 0.3856, + "step": 1260, + "success_rate.epoch.env.abd": 0.991304347826087, + "success_rate.epoch.env.agentgym:alfworld": 0.927536231884058, + "success_rate.epoch.env.agentgym:sciworld": 0.9733333333333334, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.937007874015748, + "success_rate.epoch.env.logic": 0.9013333333333333, + "success_rate.epoch.env.math": 0.9523809523809523, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.7448770491803278, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8632200164343063, + "success_rate.epoch.global": 0.8599840891010342, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9974563953488372, + "tokens_p.mean_in_band": 0.49360795454545453, + "tokens_rate.above_band": 0.8865979381443299, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.1134020618556701 + }, + { + "epoch": 0.26949296974861525, + "grad_norm": 325.699270538047, + "learning_rate": 3.9954113183871753e-07, + "loss": 0.3237, + "step": 1265, + "success_rate.epoch.env.abd": 0.991304347826087, + "success_rate.epoch.env.agentgym:alfworld": 0.927536231884058, + "success_rate.epoch.env.agentgym:sciworld": 0.9733333333333334, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.937007874015748, + "success_rate.epoch.env.logic": 0.9018567639257294, + "success_rate.epoch.env.math": 0.9525801952580195, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.744138634046891, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8632185853739451, + "success_rate.epoch.global": 0.8597464342313788, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6, + "success_rate.window.env_macro_mean": 0.8666666666666667, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9934129901960784, + "tokens_p.mean_in_band": 0.65234375, + "tokens_rate.above_band": 0.9272727272727272, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.07272727272727272 + }, + { + "epoch": 0.2705581593523647, + "grad_norm": 294.7283271922979, + "learning_rate": 3.9953595989907073e-07, + "loss": 0.3592, + "step": 1270, + "success_rate.epoch.env.abd": 0.9913793103448276, + "success_rate.epoch.env.agentgym:alfworld": 0.927536231884058, + "success_rate.epoch.env.agentgym:sciworld": 0.9736842105263158, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.937007874015748, + "success_rate.epoch.env.logic": 0.9021164021164021, + "success_rate.epoch.env.math": 0.9525801952580195, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.7439271255060729, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8632616734977248, + "success_rate.epoch.global": 0.8595106550907656, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.science": 0.7142857142857143, + "success_rate.window.env_macro_mean": 0.9285714285714286, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9961148648648649, + "tokens_p.mean_in_band": 0.6058708639705882, + "tokens_rate.above_band": 0.9158415841584159, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.08415841584158416 + }, + { + "epoch": 0.2716233489561142, + "grad_norm": 115.02366969359464, + "learning_rate": 3.9953075914590045e-07, + "loss": 0.2804, + "step": 1275, + "success_rate.epoch.env.abd": 0.9913793103448276, + "success_rate.epoch.env.agentgym:alfworld": 0.927536231884058, + "success_rate.epoch.env.agentgym:sciworld": 0.9615384615384616, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9375, + "success_rate.epoch.env.logic": 0.899736147757256, + "success_rate.epoch.env.math": 0.952712100139082, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.7449596774193549, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8620917261733244, + "success_rate.epoch.global": 0.8592767295597484, + "success_rate.window.env.agentgym:sciworld": 0.5, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.7, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9987203663793104, + "tokens_p.mean_in_band": 0.7512019230769231, + "tokens_rate.above_band": 0.9861849096705633, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.01381509032943677 + }, + { + "epoch": 0.27268853855986364, + "grad_norm": 55.520837167877254, + "learning_rate": 3.99525529582997e-07, + "loss": 0.2886, + "step": 1280, + "success_rate.epoch.env.abd": 0.9913793103448276, + "success_rate.epoch.env.agentgym:alfworld": 0.927536231884058, + "success_rate.epoch.env.agentgym:sciworld": 0.9615384615384616, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.937984496124031, + "success_rate.epoch.env.logic": 0.9, + "success_rate.epoch.env.math": 0.9517906336088154, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.7449596774193549, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8620759881584615, + "success_rate.epoch.global": 0.8594361785434612, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.8571428571428571, + "success_rate.window.env_macro_mean": 0.9642857142857143, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9959802904564315, + "tokens_p.mean_in_band": 0.69140625, + "tokens_rate.above_band": 0.9836734693877551, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0163265306122449 + }, + { + "epoch": 0.2737537281636131, + "grad_norm": 63.00918193490182, + "learning_rate": 3.995202712141716e-07, + "loss": 0.4008, + "step": 1285, + "success_rate.epoch.env.abd": 0.9913793103448276, + "success_rate.epoch.env.agentgym:alfworld": 0.9285714285714286, + "success_rate.epoch.env.agentgym:sciworld": 0.9615384615384616, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.937984496124031, + "success_rate.epoch.env.logic": 0.9002624671916011, + "success_rate.epoch.env.math": 0.9519230769230769, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.7444889779559118, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.862163207042988, + "success_rate.epoch.global": 0.859204368174727, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9951785714285715, + "tokens_p.mean_in_band": 0.4497327302631579, + "tokens_rate.above_band": 0.9020618556701031, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0979381443298969 + }, + { + "epoch": 0.2748189177673626, + "grad_norm": 144.61545721448763, + "learning_rate": 3.995149840432566e-07, + "loss": 0.3814, + "step": 1290, + "success_rate.epoch.env.abd": 0.9913793103448276, + "success_rate.epoch.env.agentgym:alfworld": 0.9285714285714286, + "success_rate.epoch.env.agentgym:sciworld": 0.9620253164556962, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.937984496124031, + "success_rate.epoch.env.logic": 0.9002624671916011, + "success_rate.epoch.env.math": 0.9521857923497268, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.7457627118644068, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8623471437932043, + "success_rate.epoch.global": 0.8597513597513597, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9945469798657718, + "tokens_p.mean_in_band": 0.7829241071428571, + "tokens_rate.above_band": 0.9551282051282052, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.04487179487179487 + }, + { + "epoch": 0.2758841073711121, + "grad_norm": 58.87006540030209, + "learning_rate": 3.9950966807410513e-07, + "loss": 0.1866, + "step": 1295, + "success_rate.epoch.env.abd": 0.9915254237288136, + "success_rate.epoch.env.agentgym:alfworld": 0.9285714285714286, + "success_rate.epoch.env.agentgym:sciworld": 0.9620253164556962, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.937984496124031, + "success_rate.epoch.env.logic": 0.9002624671916011, + "success_rate.epoch.env.math": 0.952316076294278, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.7462834489593657, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8624196105589766, + "success_rate.epoch.global": 0.8599071207430341, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env_macro_mean": 0.9444444444444445, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9864864864864865, + "tokens_p.mean_in_band": 0.6261160714285714, + "tokens_rate.above_band": 0.940677966101695, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.059322033898305086 + }, + { + "epoch": 0.27694929697486154, + "grad_norm": 157.23310614908453, + "learning_rate": 3.9950432331059153e-07, + "loss": 0.3177, + "step": 1300, + "success_rate.epoch.env.abd": 0.9915966386554622, + "success_rate.epoch.env.agentgym:alfworld": 0.9285714285714286, + "success_rate.epoch.env.agentgym:sciworld": 0.9620253164556962, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.937984496124031, + "success_rate.epoch.env.logic": 0.9007832898172323, + "success_rate.epoch.env.math": 0.9525745257452575, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.7460474308300395, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8624754713656979, + "success_rate.epoch.global": 0.8600616808018504, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9935247747747747, + "tokens_p.mean_in_band": 0.56796875, + "tokens_rate.above_band": 0.9173553719008265, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.08264462809917356 + }, + { + "epoch": 0.278014486578611, + "grad_norm": 127.29586473619848, + "learning_rate": 3.9949894975661096e-07, + "loss": 0.3397, + "step": 1305, + "success_rate.epoch.env.abd": 0.9916666666666667, + "success_rate.epoch.env.agentgym:alfworld": 0.9285714285714286, + "success_rate.epoch.env.agentgym:sciworld": 0.9620253164556962, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.937984496124031, + "success_rate.epoch.env.logic": 0.9018087855297158, + "success_rate.epoch.env.math": 0.9527027027027027, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.7458128078817734, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8625653875232313, + "success_rate.epoch.global": 0.8602150537634409, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9947916666666666, + "tokens_p.mean_in_band": 0.5600961538461539, + "tokens_rate.above_band": 0.8898305084745762, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.11016949152542373 + }, + { + "epoch": 0.27907967618236046, + "grad_norm": 71.98095137110536, + "learning_rate": 3.9949354741607967e-07, + "loss": 0.4332, + "step": 1310, + "success_rate.epoch.env.abd": 0.9834710743801653, + "success_rate.epoch.env.agentgym:alfworld": 0.9285714285714286, + "success_rate.epoch.env.agentgym:sciworld": 0.9625, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.937984496124031, + "success_rate.epoch.env.logic": 0.8994845360824743, + "success_rate.epoch.env.math": 0.9528301886792453, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.7450980392156863, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8615988019887783, + "success_rate.epoch.global": 0.8592195868400918, + "success_rate.window.env.abd": 0.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6, + "success_rate.window.env_macro_mean": 0.52, + "success_rate.window.global": 0.6, + "tokens_p.mean_above_band": 0.9870084269662921, + "tokens_p.mean_in_band": 0.7301111355633803, + "tokens_rate.above_band": 0.7899408284023669, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.21005917159763313 + }, + { + "epoch": 0.2801448657861099, + "grad_norm": 141.9478287586167, + "learning_rate": 3.9948811629293484e-07, + "loss": 0.3183, + "step": 1315, + "success_rate.epoch.env.abd": 0.9836065573770492, + "success_rate.epoch.env.agentgym:alfworld": 0.9295774647887324, + "success_rate.epoch.env.agentgym:sciworld": 0.9625, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9384615384615385, + "success_rate.epoch.env.logic": 0.9, + "success_rate.epoch.env.math": 0.9529569892473119, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.7458455522971652, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8618722873632115, + "success_rate.epoch.global": 0.8597560975609756, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9965930451127819, + "tokens_p.mean_in_band": 0.65234375, + "tokens_rate.above_band": 0.99812382739212, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.001876172607879925 + }, + { + "epoch": 0.2812100553898594, + "grad_norm": 79.47000679132687, + "learning_rate": 3.994826563911346e-07, + "loss": 0.4147, + "step": 1320, + "success_rate.epoch.env.abd": 0.983739837398374, + "success_rate.epoch.env.agentgym:alfworld": 0.9305555555555556, + "success_rate.epoch.env.agentgym:sciworld": 0.9625, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9384615384615385, + "success_rate.epoch.env.logic": 0.8976982097186701, + "success_rate.epoch.env.math": 0.9530201342281879, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.7461089494163424, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8617937530547451, + "success_rate.epoch.global": 0.8595292331055429, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.7999999999999999, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9988977597712106, + "tokens_p.mean_in_band": 0.5001148897058824, + "tokens_rate.above_band": 0.9686057248384118, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03139427516158818 + }, + { + "epoch": 0.28227524499360884, + "grad_norm": 67.74721412021016, + "learning_rate": 3.9947716771465813e-07, + "loss": 0.3213, + "step": 1325, + "success_rate.epoch.env.abd": 0.983739837398374, + "success_rate.epoch.env.agentgym:alfworld": 0.9305555555555556, + "success_rate.epoch.env.agentgym:sciworld": 0.9625, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9384615384615385, + "success_rate.epoch.env.logic": 0.8954081632653061, + "success_rate.epoch.env.math": 0.9532085561497327, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.746615087040619, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8616487087904232, + "success_rate.epoch.global": 0.859304084720121, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env_macro_mean": 0.6111111111111112, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9923573369565217, + "tokens_p.mean_in_band": 0.5833834134615384, + "tokens_rate.above_band": 0.8761904761904762, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.12380952380952381 + }, + { + "epoch": 0.2833404345973583, + "grad_norm": 36.412287187478704, + "learning_rate": 3.994716502675055e-07, + "loss": 0.3005, + "step": 1330, + "success_rate.epoch.env.abd": 0.983739837398374, + "success_rate.epoch.env.agentgym:alfworld": 0.9305555555555556, + "success_rate.epoch.env.agentgym:sciworld": 0.9625, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9393939393939394, + "success_rate.epoch.env.logic": 0.8931297709923665, + "success_rate.epoch.env.math": 0.9533333333333334, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.7456647398843931, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.86145129412559, + "success_rate.epoch.global": 0.8587038432554635, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 0.7, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9974502487562189, + "tokens_p.mean_in_band": 0.47042410714285715, + "tokens_rate.above_band": 0.9598853868194842, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.04011461318051576 + }, + { + "epoch": 0.2844056242011078, + "grad_norm": 164.6989623658681, + "learning_rate": 3.9946610405369783e-07, + "loss": 0.368, + "step": 1335, + "success_rate.epoch.env.abd": 0.983739837398374, + "success_rate.epoch.env.agentgym:alfworld": 0.9305555555555556, + "success_rate.epoch.env.agentgym:sciworld": 0.9625, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9393939393939394, + "success_rate.epoch.env.logic": 0.8931297709923665, + "success_rate.epoch.env.math": 0.9537648612945839, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.7454370797310279, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8614698275626705, + "success_rate.epoch.global": 0.8588588588588588, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.8333333333333333, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9913366336633663, + "tokens_p.mean_in_band": 0.654296875, + "tokens_rate.above_band": 0.9619047619047619, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0380952380952381 + }, + { + "epoch": 0.2854708138048573, + "grad_norm": 155.86441965563125, + "learning_rate": 3.9946052907727716e-07, + "loss": 0.2241, + "step": 1340, + "success_rate.epoch.env.abd": 0.983739837398374, + "success_rate.epoch.env.agentgym:alfworld": 0.9305555555555556, + "success_rate.epoch.env.agentgym:sciworld": 0.9506172839506173, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9398496240601504, + "success_rate.epoch.env.logic": 0.8936708860759494, + "success_rate.epoch.env.math": 0.9538866930171278, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.7464114832535885, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8605798565577174, + "success_rate.epoch.global": 0.8590127150336574, + "success_rate.window.env.agentgym:sciworld": 0.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9990981240981242, + "tokens_p.mean_below_band": 3.841705620288849e-09, + "tokens_p.mean_in_band": 0.8029513888888888, + "tokens_rate.above_band": 0.9857752489331437, + "tokens_rate.below_band": 0.001422475106685633, + "tokens_rate.in_band": 0.012802275960170697 + }, + { + "epoch": 0.28653600340860674, + "grad_norm": 63.62741452945286, + "learning_rate": 3.994549253423064e-07, + "loss": 0.2889, + "step": 1345, + "success_rate.epoch.env.abd": 0.9838709677419355, + "success_rate.epoch.env.agentgym:alfworld": 0.9315068493150684, + "success_rate.epoch.env.agentgym:sciworld": 0.9506172839506173, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9398496240601504, + "success_rate.epoch.env.logic": 0.8947368421052632, + "success_rate.epoch.env.math": 0.9540078843626807, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.7468958930276982, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.860830218489722, + "success_rate.epoch.global": 0.8595380029806259, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9983974358974359, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 0.2876011930123562, + "grad_norm": 117.86006513451086, + "learning_rate": 3.9944929285286966e-07, + "loss": 0.2065, + "step": 1350, + "success_rate.epoch.env.abd": 0.9838709677419355, + "success_rate.epoch.env.agentgym:alfworld": 0.9315068493150684, + "success_rate.epoch.env.agentgym:sciworld": 0.9506172839506173, + "success_rate.epoch.env.agentgym:textcraft": 0.9523809523809523, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9398496240601504, + "success_rate.epoch.env.logic": 0.8927680798004988, + "success_rate.epoch.env.math": 0.9541284403669725, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.7480988593155894, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8609880105232104, + "success_rate.epoch.global": 0.8596881959910914, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 1.000401246223565, + "tokens_p.mean_in_band": 0.49874441964285715, + "tokens_rate.above_band": 0.9792899408284024, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.020710059171597635 + }, + { + "epoch": 0.28866638261610567, + "grad_norm": 0.0, + "learning_rate": 3.994436316130717e-07, + "loss": 0.328, + "step": 1355, + "success_rate.epoch.env.abd": 0.9838709677419355, + "success_rate.epoch.env.agentgym:alfworld": 0.9315068493150684, + "success_rate.epoch.env.agentgym:sciworld": 0.9506172839506173, + "success_rate.epoch.env.agentgym:textcraft": 0.9523809523809523, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9398496240601504, + "success_rate.epoch.env.logic": 0.8930348258706468, + "success_rate.epoch.env.math": 0.954367666232073, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.7492904446546831, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.861142333911787, + "success_rate.epoch.global": 0.860207100591716, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9925595238095238, + "tokens_p.mean_in_band": 0.86640625, + "tokens_rate.above_band": 0.9438202247191011, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.056179775280898875 + }, + { + "epoch": 0.2897315722198551, + "grad_norm": 43.621090843298276, + "learning_rate": 3.9943794162703856e-07, + "loss": 0.2098, + "step": 1360, + "success_rate.epoch.env.abd": 0.9838709677419355, + "success_rate.epoch.env.agentgym:alfworld": 0.9315068493150684, + "success_rate.epoch.env.agentgym:sciworld": 0.9506172839506173, + "success_rate.epoch.env.agentgym:textcraft": 0.9523809523809523, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9398496240601504, + "success_rate.epoch.env.logic": 0.8938271604938272, + "success_rate.epoch.env.math": 0.9546044098573282, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.7490566037735849, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8612146282179086, + "success_rate.epoch.global": 0.8603537214443626, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.8888888888888888, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9925742574257426, + "tokens_p.mean_in_band": 0.4270833333333333, + "tokens_rate.above_band": 0.8487394957983193, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.15126050420168066 + }, + { + "epoch": 0.2907967618236046, + "grad_norm": 43.90269283165277, + "learning_rate": 3.994322228989169e-07, + "loss": 0.3167, + "step": 1365, + "success_rate.epoch.env.abd": 0.9838709677419355, + "success_rate.epoch.env.agentgym:alfworld": 0.9315068493150684, + "success_rate.epoch.env.agentgym:sciworld": 0.9506172839506173, + "success_rate.epoch.env.agentgym:textcraft": 0.9523809523809523, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9398496240601504, + "success_rate.epoch.env.logic": 0.8938271604938272, + "success_rate.epoch.env.math": 0.9547218628719275, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.7509363295880149, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8613961899296385, + "success_rate.epoch.global": 0.8608663729809104, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9920164233576643, + "tokens_p.mean_in_band": 0.7171875, + "tokens_rate.above_band": 0.9647887323943662, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.035211267605633804 + }, + { + "epoch": 0.29186195142735405, + "grad_norm": 108.55430406989052, + "learning_rate": 3.9942647543287454e-07, + "loss": 0.2433, + "step": 1370, + "success_rate.epoch.env.abd": 0.9838709677419355, + "success_rate.epoch.env.agentgym:alfworld": 0.9315068493150684, + "success_rate.epoch.env.agentgym:sciworld": 0.9506172839506173, + "success_rate.epoch.env.agentgym:textcraft": 0.9523809523809523, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9398496240601504, + "success_rate.epoch.env.logic": 0.8940886699507389, + "success_rate.epoch.env.math": 0.9548387096774194, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.7516279069767442, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.861493456625196, + "success_rate.epoch.global": 0.8610095098756401, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8571428571428571, + "success_rate.window.env_macro_mean": 0.9523809523809524, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9933286516853933, + "tokens_p.mean_below_band": 3.655441105365753e-08, + "tokens_p.mean_in_band": 0.693359375, + "tokens_rate.above_band": 0.9468085106382979, + "tokens_rate.below_band": 0.010638297872340425, + "tokens_rate.in_band": 0.0425531914893617 + }, + { + "epoch": 0.2929271410311035, + "grad_norm": 84.60491200856944, + "learning_rate": 3.9942069923310024e-07, + "loss": 0.1952, + "step": 1375, + "success_rate.epoch.env.abd": 0.9838709677419355, + "success_rate.epoch.env.agentgym:alfworld": 0.9315068493150684, + "success_rate.epoch.env.agentgym:sciworld": 0.9512195121951219, + "success_rate.epoch.env.agentgym:textcraft": 0.9523809523809523, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9398496240601504, + "success_rate.epoch.env.logic": 0.8946078431372549, + "success_rate.epoch.env.math": 0.954954954954955, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.7527777777777778, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8617105036715224, + "success_rate.epoch.global": 0.8615160349854227, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9952651515151515, + "tokens_p.mean_in_band": 0.80078125, + "tokens_rate.above_band": 0.9880239520958084, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.011976047904191617 + }, + { + "epoch": 0.293992330634853, + "grad_norm": 0.0, + "learning_rate": 3.994148943038037e-07, + "loss": 0.2139, + "step": 1380, + "success_rate.epoch.env.abd": 0.984, + "success_rate.epoch.env.agentgym:alfworld": 0.9315068493150684, + "success_rate.epoch.env.agentgym:sciworld": 0.9512195121951219, + "success_rate.epoch.env.agentgym:textcraft": 0.9523809523809523, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9398496240601504, + "success_rate.epoch.env.logic": 0.8948655256723717, + "success_rate.epoch.env.math": 0.9551856594110115, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.753690036900369, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8618495653416887, + "success_rate.epoch.global": 0.8620188816267248, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9955778301886793, + "tokens_p.mean_in_band": 0.7643229166666666, + "tokens_rate.above_band": 0.9217391304347826, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0782608695652174 + }, + { + "epoch": 0.2950575202386025, + "grad_norm": 30.945606159980354, + "learning_rate": 3.994090606492153e-07, + "loss": 0.3553, + "step": 1385, + "success_rate.epoch.env.abd": 0.984, + "success_rate.epoch.env.agentgym:alfworld": 0.9315068493150684, + "success_rate.epoch.env.agentgym:sciworld": 0.9512195121951219, + "success_rate.epoch.env.agentgym:textcraft": 0.9523809523809523, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9398496240601504, + "success_rate.epoch.env.logic": 0.8951219512195122, + "success_rate.epoch.env.math": 0.9554140127388535, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.7548209366391184, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8619964452156643, + "success_rate.epoch.global": 0.8625180897250362, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9922680412371134, + "tokens_p.mean_in_band": 0.759765625, + "tokens_rate.above_band": 0.9603960396039604, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.039603960396039604 + }, + { + "epoch": 0.29612270984235195, + "grad_norm": 547.090465167263, + "learning_rate": 3.994031982735868e-07, + "loss": 0.2379, + "step": 1390, + "success_rate.epoch.env.abd": 0.984, + "success_rate.epoch.env.agentgym:alfworld": 0.9315068493150684, + "success_rate.epoch.env.agentgym:sciworld": 0.9518072289156626, + "success_rate.epoch.env.agentgym:textcraft": 0.9523809523809523, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9398496240601504, + "success_rate.epoch.env.logic": 0.8956310679611651, + "success_rate.epoch.env.math": 0.9555837563451777, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.7557182067703568, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8621931585974602, + "success_rate.epoch.global": 0.863013698630137, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9976415094339622, + "tokens_p.mean_in_band": 0.8515625, + "tokens_rate.above_band": 0.99375, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.00625 + }, + { + "epoch": 0.2971878994461014, + "grad_norm": 72.9360345656743, + "learning_rate": 3.9939730718119053e-07, + "loss": 0.3989, + "step": 1395, + "success_rate.epoch.env.abd": 0.984, + "success_rate.epoch.env.agentgym:alfworld": 0.9315068493150684, + "success_rate.epoch.env.agentgym:sciworld": 0.9518072289156626, + "success_rate.epoch.env.agentgym:textcraft": 0.9523809523809523, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9411764705882353, + "success_rate.epoch.env.logic": 0.8956310679611651, + "success_rate.epoch.env.math": 0.9555837563451777, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.7563636363636363, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8623724564266751, + "success_rate.epoch.global": 0.8631465517241379, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.science": 0.8571428571428571, + "success_rate.window.env_macro_mean": 0.9285714285714286, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9969446163366337, + "tokens_p.mean_in_band": 0.7836441532258065, + "tokens_rate.above_band": 0.9811778992106861, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.018822100789313904 + }, + { + "epoch": 0.2982530890498509, + "grad_norm": 113.12336818495687, + "learning_rate": 3.993913873763199e-07, + "loss": 0.3762, + "step": 1400, + "success_rate.epoch.env.abd": 0.984, + "success_rate.epoch.env.agentgym:alfworld": 0.918918918918919, + "success_rate.epoch.env.agentgym:sciworld": 0.9518072289156626, + "success_rate.epoch.env.agentgym:textcraft": 0.9523809523809523, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9416058394160584, + "success_rate.epoch.env.logic": 0.8956310679611651, + "success_rate.epoch.env.math": 0.95448798988622, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.7565610859728507, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8611854674795779, + "success_rate.epoch.global": 0.8625626342161775, + "success_rate.window.env.agentgym:alfworld": 0.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 0.6666666666666666, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.6166666666666667, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9983119235836627, + "tokens_p.mean_in_band": 0.6292146381578947, + "tokens_rate.above_band": 0.9755784061696658, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02442159383033419 + }, + { + "epoch": 0.29931827865360033, + "grad_norm": 215.39438754700254, + "learning_rate": 3.993854388632892e-07, + "loss": 0.3486, + "step": 1405, + "success_rate.epoch.env.abd": 0.984, + "success_rate.epoch.env.agentgym:alfworld": 0.918918918918919, + "success_rate.epoch.env.agentgym:sciworld": 0.9518072289156626, + "success_rate.epoch.env.agentgym:textcraft": 0.9523809523809523, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9416058394160584, + "success_rate.epoch.env.logic": 0.8958837772397095, + "success_rate.epoch.env.math": 0.9547169811320755, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.7567567567567568, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.861247046689424, + "success_rate.epoch.global": 0.8626961483594865, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9893465909090909, + "tokens_p.mean_in_band": 0.7084517045454546, + "tokens_rate.above_band": 0.8888888888888888, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.1111111111111111 + }, + { + "epoch": 0.3003834682573498, + "grad_norm": 45.96687000680754, + "learning_rate": 3.993794616464337e-07, + "loss": 0.2201, + "step": 1410, + "success_rate.epoch.env.abd": 0.9841269841269841, + "success_rate.epoch.env.agentgym:alfworld": 0.92, + "success_rate.epoch.env.agentgym:sciworld": 0.9518072289156626, + "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9416058394160584, + "success_rate.epoch.env.logic": 0.8961352657004831, + "success_rate.epoch.env.math": 0.9549436795994993, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.7571942446043165, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8616368868847261, + "success_rate.epoch.global": 0.8631840796019901, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9989940987124464, + "tokens_p.mean_in_band": 0.869140625, + "tokens_rate.above_band": 0.9957264957264957, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.004273504273504274 + }, + { + "epoch": 0.30144865786109926, + "grad_norm": 110.98541362905216, + "learning_rate": 3.9937345573010957e-07, + "loss": 0.3345, + "step": 1415, + "success_rate.epoch.env.abd": 0.984251968503937, + "success_rate.epoch.env.agentgym:alfworld": 0.92, + "success_rate.epoch.env.agentgym:sciworld": 0.9518072289156626, + "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9420289855072463, + "success_rate.epoch.env.logic": 0.8966346153846154, + "success_rate.epoch.env.math": 0.9549436795994993, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.7573858549686661, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8617495314771464, + "success_rate.epoch.global": 0.863314447592068, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.96, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9966707021791767, + "tokens_p.mean_in_band": 0.447265625, + "tokens_rate.above_band": 0.9809976247030879, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.019002375296912115 + }, + { + "epoch": 0.3025138474648487, + "grad_norm": 38.80479025769866, + "learning_rate": 3.9936742111869385e-07, + "loss": 0.386, + "step": 1420, + "success_rate.epoch.env.abd": 0.984251968503937, + "success_rate.epoch.env.agentgym:alfworld": 0.92, + "success_rate.epoch.env.agentgym:sciworld": 0.9529411764705882, + "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9420289855072463, + "success_rate.epoch.env.logic": 0.8968824940047961, + "success_rate.epoch.env.math": 0.9551681195516812, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.7571428571428571, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.861873464959099, + "success_rate.epoch.global": 0.8634438955539873, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9967485549132948, + "tokens_p.mean_in_band": 0.7042410714285714, + "tokens_rate.above_band": 0.9866920152091255, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.013307984790874524 + }, + { + "epoch": 0.30357903706859823, + "grad_norm": 63.77814085868929, + "learning_rate": 3.993613578165845e-07, + "loss": 0.1822, + "step": 1425, + "success_rate.epoch.env.abd": 0.984251968503937, + "success_rate.epoch.env.agentgym:alfworld": 0.92, + "success_rate.epoch.env.agentgym:sciworld": 0.9534883720930233, + "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9424460431654677, + "success_rate.epoch.env.logic": 0.8973747016706444, + "success_rate.epoch.env.math": 0.9553349875930521, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.7577916295636687, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8620800196326162, + "success_rate.epoch.global": 0.8639240506329114, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9985062141491395, + "tokens_p.mean_in_band": 0.7063802083333334, + "tokens_rate.above_band": 0.9942965779467681, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.005703422053231939 + }, + { + "epoch": 0.3046442266723477, + "grad_norm": 81.32942040954173, + "learning_rate": 3.993552658282004e-07, + "loss": 0.2428, + "step": 1430, + "success_rate.epoch.env.abd": 0.984375, + "success_rate.epoch.env.agentgym:alfworld": 0.92, + "success_rate.epoch.env.agentgym:sciworld": 0.9534883720930233, + "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9424460431654677, + "success_rate.epoch.env.logic": 0.8981042654028436, + "success_rate.epoch.env.math": 0.9555555555555556, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.7582222222222222, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8622167247098269, + "success_rate.epoch.global": 0.8644008409250176, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9965034965034965, + "tokens_p.mean_in_band": 0.814453125, + "tokens_rate.above_band": 0.9533333333333334, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.04666666666666667 + }, + { + "epoch": 0.30570941627609716, + "grad_norm": 137.3063989115003, + "learning_rate": 3.993491451579814e-07, + "loss": 0.3279, + "step": 1435, + "success_rate.epoch.env.abd": 0.9844961240310077, + "success_rate.epoch.env.agentgym:alfworld": 0.92, + "success_rate.epoch.env.agentgym:sciworld": 0.9534883720930233, + "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9424460431654677, + "success_rate.epoch.env.logic": 0.8983451536643026, + "success_rate.epoch.env.math": 0.9556650246305419, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.7586206896551724, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8622958109644091, + "success_rate.epoch.global": 0.8645251396648045, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env_macro_mean": 0.9583333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9957157258064516, + "tokens_p.mean_in_band": 0.6741071428571429, + "tokens_rate.above_band": 0.9465648854961832, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.05343511450381679 + }, + { + "epoch": 0.3067746058798466, + "grad_norm": 122.5322727047395, + "learning_rate": 3.993429958103882e-07, + "loss": 0.3459, + "step": 1440, + "success_rate.epoch.env.abd": 0.9846153846153847, + "success_rate.epoch.env.agentgym:alfworld": 0.92, + "success_rate.epoch.env.agentgym:sciworld": 0.9540229885057471, + "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9428571428571428, + "success_rate.epoch.env.logic": 0.8985849056603774, + "success_rate.epoch.env.math": 0.9557739557739557, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.7594713656387665, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.862501659674578, + "success_rate.epoch.global": 0.8649965205288797, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9987139917695473, + "tokens_p.mean_in_band": 0.6979166666666666, + "tokens_rate.above_band": 0.9759036144578314, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.024096385542168676 + }, + { + "epoch": 0.3078397954835961, + "grad_norm": 201.48160381375968, + "learning_rate": 3.9933681778990234e-07, + "loss": 0.3875, + "step": 1445, + "success_rate.epoch.env.abd": 0.9847328244274809, + "success_rate.epoch.env.agentgym:alfworld": 0.92, + "success_rate.epoch.env.agentgym:sciworld": 0.9540229885057471, + "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9432624113475178, + "success_rate.epoch.env.logic": 0.8988235294117647, + "success_rate.epoch.env.math": 0.9547677261613692, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.7601054481546573, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.862537040125229, + "success_rate.epoch.global": 0.8651178918169209, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.75, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9943311737804879, + "tokens_p.mean_in_band": 0.5791015625, + "tokens_rate.above_band": 0.9879518072289156, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.012048192771084338 + }, + { + "epoch": 0.30890498508734554, + "grad_norm": 139.77347577911482, + "learning_rate": 3.993306111010264e-07, + "loss": 0.2607, + "step": 1450, + "success_rate.epoch.env.abd": 0.9847328244274809, + "success_rate.epoch.env.agentgym:alfworld": 0.92, + "success_rate.epoch.env.agentgym:sciworld": 0.9540229885057471, + "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9440559440559441, + "success_rate.epoch.env.logic": 0.8992974238875878, + "success_rate.epoch.env.math": 0.9549878345498783, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.7605263157894737, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8627105313259178, + "success_rate.epoch.global": 0.8655839668279198, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9976095699277742, + "tokens_p.mean_in_band": 0.83984375, + "tokens_rate.above_band": 0.9996718083360683, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0003281916639317361 + }, + { + "epoch": 0.309970174691095, + "grad_norm": 51.08228919367456, + "learning_rate": 3.993243757482837e-07, + "loss": 0.1899, + "step": 1455, + "success_rate.epoch.env.abd": 0.9847328244274809, + "success_rate.epoch.env.agentgym:alfworld": 0.92, + "success_rate.epoch.env.agentgym:sciworld": 0.9540229885057471, + "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9440559440559441, + "success_rate.epoch.env.logic": 0.9002320185614849, + "success_rate.epoch.env.math": 0.9550970873786407, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.7602799650043744, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8627830310275141, + "success_rate.epoch.global": 0.8657024793388429, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9960585585585585, + "tokens_p.mean_in_band": 0.5130208333333334, + "tokens_rate.above_band": 0.9487179487179487, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.05128205128205128 + }, + { + "epoch": 0.31103536429484446, + "grad_norm": 86.8509214605948, + "learning_rate": 3.9931811173621857e-07, + "loss": 0.2136, + "step": 1460, + "success_rate.epoch.env.abd": 0.9847328244274809, + "success_rate.epoch.env.agentgym:alfworld": 0.9210526315789473, + "success_rate.epoch.env.agentgym:sciworld": 0.9540229885057471, + "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9440559440559441, + "success_rate.epoch.env.logic": 0.8986175115207373, + "success_rate.epoch.env.math": 0.9552599758162031, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.7609075043630017, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8628038085124587, + "success_rate.epoch.global": 0.8658201784488675, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 0.6666666666666666, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9958125, + "tokens_p.mean_in_band": 0.7569444444444444, + "tokens_rate.above_band": 0.9823182711198428, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.01768172888015717 + }, + { + "epoch": 0.3121005538985939, + "grad_norm": 173.6463091852164, + "learning_rate": 3.9931181906939617e-07, + "loss": 0.3204, + "step": 1465, + "success_rate.epoch.env.abd": 0.9847328244274809, + "success_rate.epoch.env.agentgym:alfworld": 0.9210526315789473, + "success_rate.epoch.env.agentgym:sciworld": 0.9545454545454546, + "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9440559440559441, + "success_rate.epoch.env.logic": 0.8986175115207373, + "success_rate.epoch.env.math": 0.9554753309265944, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.7610773240660296, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8628863213172884, + "success_rate.epoch.global": 0.86593707250342, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9981884057971014, + "tokens_p.mean_in_band": 0.6276041666666666, + "tokens_rate.above_band": 0.9387755102040817, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.061224489795918366 + }, + { + "epoch": 0.31316574350234344, + "grad_norm": 88.2591646850361, + "learning_rate": 3.993054977524025e-07, + "loss": 0.3412, + "step": 1470, + "success_rate.epoch.env.abd": 0.9847328244274809, + "success_rate.epoch.env.agentgym:alfworld": 0.9230769230769231, + "success_rate.epoch.env.agentgym:sciworld": 0.9545454545454546, + "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9440559440559441, + "success_rate.epoch.env.logic": 0.8990825688073395, + "success_rate.epoch.env.math": 0.9543817527010804, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.7610389610389611, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8630097220020151, + "success_rate.epoch.global": 0.8657123381049762, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.5, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.8125, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9990781710914455, + "tokens_p.mean_in_band": 0.5185546875, + "tokens_rate.above_band": 0.9883381924198251, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.011661807580174927 + }, + { + "epoch": 0.3142309331060929, + "grad_norm": 66.02744915693027, + "learning_rate": 3.992991477898445e-07, + "loss": 0.4142, + "step": 1475, + "success_rate.epoch.env.abd": 0.9847328244274809, + "success_rate.epoch.env.agentgym:alfworld": 0.9230769230769231, + "success_rate.epoch.env.agentgym:sciworld": 0.9550561797752809, + "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9440559440559441, + "success_rate.epoch.env.logic": 0.8997722095671982, + "success_rate.epoch.env.math": 0.954653937947494, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.7612456747404844, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.86316238245089, + "success_rate.epoch.global": 0.8661684782608695, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9992806905370843, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 0.31529612270984236, + "grad_norm": 49.1632575748, + "learning_rate": 3.9929276918635006e-07, + "loss": 0.3243, + "step": 1480, + "success_rate.epoch.env.abd": 0.9847328244274809, + "success_rate.epoch.env.agentgym:alfworld": 0.9230769230769231, + "success_rate.epoch.env.agentgym:sciworld": 0.9550561797752809, + "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9440559440559441, + "success_rate.epoch.env.logic": 0.9004524886877828, + "success_rate.epoch.env.math": 0.9548156956004756, + "success_rate.epoch.env.sat": 0.1111111111111111, + "success_rate.epoch.env.science": 0.7601380500431406, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8625440610294177, + "success_rate.epoch.global": 0.8656059580230197, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 0.3333333333333333, + "success_rate.window.env_macro_mean": 0.5833333333333333, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9969758064516129, + "tokens_p.mean_in_band": 0.5425646551724138, + "tokens_rate.above_band": 0.8104575163398693, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.1895424836601307 + }, + { + "epoch": 0.3163613123135918, + "grad_norm": 84.44931247002147, + "learning_rate": 3.992863619465678e-07, + "loss": 0.3398, + "step": 1485, + "success_rate.epoch.env.abd": 0.9847328244274809, + "success_rate.epoch.env.agentgym:alfworld": 0.9230769230769231, + "success_rate.epoch.env.agentgym:sciworld": 0.9555555555555556, + "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9440559440559441, + "success_rate.epoch.env.logic": 0.9009009009009009, + "success_rate.epoch.env.math": 0.9549228944246738, + "success_rate.epoch.env.sat": 0.1111111111111111, + "success_rate.epoch.env.science": 0.7592433361994841, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8625586312997752, + "success_rate.epoch.global": 0.8653846153846154, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.9, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9972426470588235, + "tokens_p.mean_in_band": 0.5478515625, + "tokens_rate.above_band": 0.9324894514767933, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.06751054852320675 + }, + { + "epoch": 0.3174265019173413, + "grad_norm": 110.02614841180721, + "learning_rate": 3.9927992607516725e-07, + "loss": 0.2173, + "step": 1490, + "success_rate.epoch.env.abd": 0.9848484848484849, + "success_rate.epoch.env.agentgym:alfworld": 0.9230769230769231, + "success_rate.epoch.env.agentgym:sciworld": 0.9555555555555556, + "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9440559440559441, + "success_rate.epoch.env.logic": 0.9013452914798207, + "success_rate.epoch.env.math": 0.9551886792452831, + "success_rate.epoch.env.sat": 0.1111111111111111, + "success_rate.epoch.env.science": 0.7579399141630901, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8625152143710607, + "success_rate.epoch.global": 0.8651647612642905, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.0, + "success_rate.window.env_macro_mean": 0.75, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9939903846153846, + "tokens_p.mean_in_band": 0.574951171875, + "tokens_rate.above_band": 0.9069767441860465, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.09302325581395349 + }, + { + "epoch": 0.31849169152109075, + "grad_norm": 117.81862692191989, + "learning_rate": 3.9927346157683887e-07, + "loss": 0.2136, + "step": 1495, + "success_rate.epoch.env.abd": 0.9848484848484849, + "success_rate.epoch.env.agentgym:alfworld": 0.9240506329113924, + "success_rate.epoch.env.agentgym:sciworld": 0.9560439560439561, + "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9444444444444444, + "success_rate.epoch.env.logic": 0.9013452914798207, + "success_rate.epoch.env.math": 0.9553466509988249, + "success_rate.epoch.env.sat": 0.1111111111111111, + "success_rate.epoch.env.science": 0.7587681779298546, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8627731094830312, + "success_rate.epoch.global": 0.8656166219839142, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9974279835390947, + "tokens_p.mean_in_band": 0.7125, + "tokens_rate.above_band": 0.9931880108991825, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.006811989100817439 + }, + { + "epoch": 0.3195568811248402, + "grad_norm": 120.70531930200049, + "learning_rate": 3.992669684562939e-07, + "loss": 0.3861, + "step": 1500, + "success_rate.epoch.env.abd": 0.9848484848484849, + "success_rate.epoch.env.agentgym:alfworld": 0.9240506329113924, + "success_rate.epoch.env.agentgym:sciworld": 0.9560439560439561, + "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9448275862068966, + "success_rate.epoch.env.logic": 0.9017857142857143, + "success_rate.epoch.env.math": 0.9554513481828839, + "success_rate.epoch.env.sat": 0.1111111111111111, + "success_rate.epoch.env.science": 0.75809199318569, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8627960255746893, + "success_rate.epoch.global": 0.8653974615898463, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6, + "success_rate.window.env_macro_mean": 0.9, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9972245065789473, + "tokens_p.mean_in_band": 0.43828125, + "tokens_rate.above_band": 0.9681528662420382, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03184713375796178 + }, + { + "epoch": 0.32062207072858967, + "grad_norm": 51.607289914165435, + "learning_rate": 3.992604467182645e-07, + "loss": 0.2148, + "step": 1505, + "success_rate.epoch.env.abd": 0.9848484848484849, + "success_rate.epoch.env.agentgym:alfworld": 0.9240506329113924, + "success_rate.epoch.env.agentgym:sciworld": 0.9560439560439561, + "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9448275862068966, + "success_rate.epoch.env.logic": 0.9020044543429844, + "success_rate.epoch.env.math": 0.955607476635514, + "success_rate.epoch.env.sat": 0.10526315789473684, + "success_rate.epoch.env.science": 0.7589134125636672, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8623731469084622, + "success_rate.epoch.global": 0.8655126498002663, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 0.8, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9988532110091743, + "tokens_p.mean_in_band": 0.63037109375, + "tokens_rate.above_band": 0.9533527696793003, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.04664723032069971 + }, + { + "epoch": 0.32168726033233913, + "grad_norm": 65.40469120827466, + "learning_rate": 3.9925389636750364e-07, + "loss": 0.1952, + "step": 1510, + "success_rate.epoch.env.abd": 0.9849624060150376, + "success_rate.epoch.env.agentgym:alfworld": 0.925, + "success_rate.epoch.env.agentgym:sciworld": 0.9565217391304348, + "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9448275862068966, + "success_rate.epoch.env.logic": 0.9020044543429844, + "success_rate.epoch.env.math": 0.9558139534883721, + "success_rate.epoch.env.sat": 0.10526315789473684, + "success_rate.epoch.env.science": 0.7578323454699407, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8624337360994416, + "success_rate.epoch.global": 0.8652952886529529, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.3333333333333333, + "success_rate.window.env_macro_mean": 0.8666666666666668, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9983078917050692, + "tokens_p.mean_in_band": 0.5868055555555556, + "tokens_rate.above_band": 0.9897377423033067, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.010262257696693273 + }, + { + "epoch": 0.32275244993608865, + "grad_norm": 164.19980498880886, + "learning_rate": 3.9924731740878523e-07, + "loss": 0.5151, + "step": 1515, + "success_rate.epoch.env.abd": 0.9850746268656716, + "success_rate.epoch.env.agentgym:alfworld": 0.925, + "success_rate.epoch.env.agentgym:sciworld": 0.956989247311828, + "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9448275862068966, + "success_rate.epoch.env.logic": 0.9, + "success_rate.epoch.env.math": 0.9559164733178654, + "success_rate.epoch.env.sat": 0.10526315789473684, + "success_rate.epoch.env.science": 0.7580101180438449, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.862329696744209, + "success_rate.epoch.global": 0.8650793650793651, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.76, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.999433192261185, + "tokens_p.mean_in_band": 0.568359375, + "tokens_rate.above_band": 0.9560693641618497, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.04393063583815029 + }, + { + "epoch": 0.3238176395398381, + "grad_norm": 61.94851402954748, + "learning_rate": 3.992407098469039e-07, + "loss": 0.2012, + "step": 1520, + "success_rate.epoch.env.abd": 0.9850746268656716, + "success_rate.epoch.env.agentgym:alfworld": 0.925, + "success_rate.epoch.env.agentgym:sciworld": 0.9574468085106383, + "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9448275862068966, + "success_rate.epoch.env.logic": 0.9004424778761062, + "success_rate.epoch.env.math": 0.9560185185185185, + "success_rate.epoch.env.sat": 0.10526315789473684, + "success_rate.epoch.env.science": 0.7590260285474392, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.862513150815042, + "success_rate.epoch.global": 0.8655240606460118, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9971751412429378, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 0.32488282914358757, + "grad_norm": 186.84805360085605, + "learning_rate": 3.992340736866753e-07, + "loss": 0.5019, + "step": 1525, + "success_rate.epoch.env.abd": 0.9851851851851852, + "success_rate.epoch.env.agentgym:alfworld": 0.925, + "success_rate.epoch.env.agentgym:sciworld": 0.9578947368421052, + "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9448275862068966, + "success_rate.epoch.env.logic": 0.9006622516556292, + "success_rate.epoch.env.math": 0.9560693641618497, + "success_rate.epoch.env.sat": 0.10526315789473684, + "success_rate.epoch.env.science": 0.7577276524644946, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8624704899051228, + "success_rate.epoch.global": 0.8649802890932983, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.9, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9961890243902439, + "tokens_p.mean_in_band": 0.28076171875, + "tokens_rate.above_band": 0.9318181818181818, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.06818181818181818 + }, + { + "epoch": 0.32594801874733703, + "grad_norm": 351.9884742490813, + "learning_rate": 3.992274089329356e-07, + "loss": 0.3237, + "step": 1530, + "success_rate.epoch.env.abd": 0.9851851851851852, + "success_rate.epoch.env.agentgym:alfworld": 0.925, + "success_rate.epoch.env.agentgym:sciworld": 0.9578947368421052, + "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9448275862068966, + "success_rate.epoch.env.logic": 0.9010989010989011, + "success_rate.epoch.env.math": 0.956221198156682, + "success_rate.epoch.env.sat": 0.10526315789473684, + "success_rate.epoch.env.science": 0.7587354409317804, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8626156055328855, + "success_rate.epoch.global": 0.8654223968565815, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9935963114754098, + "tokens_p.mean_in_band": 0.8697916666666666, + "tokens_rate.above_band": 0.976, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.024 + }, + { + "epoch": 0.3270132083510865, + "grad_norm": 74.54036807916403, + "learning_rate": 3.992207155905423e-07, + "loss": 0.1336, + "step": 1535, + "success_rate.epoch.env.abd": 0.9851851851851852, + "success_rate.epoch.env.agentgym:alfworld": 0.925, + "success_rate.epoch.env.agentgym:sciworld": 0.9578947368421052, + "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9452054794520548, + "success_rate.epoch.env.logic": 0.9019607843137255, + "success_rate.epoch.env.math": 0.956271576524741, + "success_rate.epoch.env.sat": 0.10526315789473684, + "success_rate.epoch.env.science": 0.7595356550580431, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8628056390741858, + "success_rate.epoch.global": 0.8658616187989556, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9969866071428571, + "tokens_p.mean_in_band": 0.8515625, + "tokens_rate.above_band": 0.9982174688057041, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0017825311942959 + }, + { + "epoch": 0.32807839795483595, + "grad_norm": 100.67878856565883, + "learning_rate": 3.992139936643733e-07, + "loss": 0.3956, + "step": 1540, + "success_rate.epoch.env.abd": 0.9851851851851852, + "success_rate.epoch.env.agentgym:alfworld": 0.925, + "success_rate.epoch.env.agentgym:sciworld": 0.9583333333333334, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9455782312925171, + "success_rate.epoch.env.logic": 0.9, + "success_rate.epoch.env.math": 0.9563719862227325, + "success_rate.epoch.env.sat": 0.10526315789473684, + "success_rate.epoch.env.science": 0.7603305785123967, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8629622010519395, + "success_rate.epoch.global": 0.8659726740403383, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8333333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9970596280087527, + "tokens_p.mean_in_band": 0.6303267045454546, + "tokens_rate.above_band": 0.9764957264957265, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.023504273504273504 + }, + { + "epoch": 0.3291435875585854, + "grad_norm": 77.36439900993793, + "learning_rate": 3.992072431593275e-07, + "loss": 0.2907, + "step": 1545, + "success_rate.epoch.env.abd": 0.9851851851851852, + "success_rate.epoch.env.agentgym:alfworld": 0.925, + "success_rate.epoch.env.agentgym:sciworld": 0.9587628865979382, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9455782312925171, + "success_rate.epoch.env.logic": 0.9004329004329005, + "success_rate.epoch.env.math": 0.9564220183486238, + "success_rate.epoch.env.sat": 0.10526315789473684, + "success_rate.epoch.env.science": 0.7615131578947368, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8631526615251883, + "success_rate.epoch.global": 0.8664072632944229, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9960656474820144, + "tokens_p.mean_in_band": 0.86328125, + "tokens_rate.above_band": 0.996415770609319, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0035842293906810036 + }, + { + "epoch": 0.3302087771623349, + "grad_norm": 96.53931145528712, + "learning_rate": 3.992004640803246e-07, + "loss": 0.2384, + "step": 1550, + "success_rate.epoch.env.abd": 0.9851851851851852, + "success_rate.epoch.env.agentgym:alfworld": 0.925, + "success_rate.epoch.env.agentgym:sciworld": 0.9587628865979382, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9459459459459459, + "success_rate.epoch.env.logic": 0.9012875536480687, + "success_rate.epoch.env.math": 0.9565714285714285, + "success_rate.epoch.env.sat": 0.10526315789473684, + "success_rate.epoch.env.science": 0.7619047619047619, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8633129689889544, + "success_rate.epoch.global": 0.8668390433096316, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9996141975308642, + "tokens_p.mean_in_band": 0.84765625, + "tokens_rate.above_band": 0.9918367346938776, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.00816326530612245 + }, + { + "epoch": 0.33127396676608434, + "grad_norm": 686.4017677934039, + "learning_rate": 3.991936564323052e-07, + "loss": 0.3249, + "step": 1555, + "success_rate.epoch.env.abd": 0.9852941176470589, + "success_rate.epoch.env.agentgym:alfworld": 0.925, + "success_rate.epoch.env.agentgym:sciworld": 0.9587628865979382, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9463087248322147, + "success_rate.epoch.env.logic": 0.8993576017130621, + "success_rate.epoch.env.math": 0.95662100456621, + "success_rate.epoch.env.sat": 0.10526315789473684, + "success_rate.epoch.env.science": 0.761437908496732, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8631424673525807, + "success_rate.epoch.global": 0.8663015463917526, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.7333333333333333, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9989543114543115, + "tokens_p.mean_in_band": 0.6637834821428571, + "tokens_rate.above_band": 0.9823008849557522, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.017699115044247787 + }, + { + "epoch": 0.33233915636983385, + "grad_norm": 224.07292739092742, + "learning_rate": 3.9918682022023065e-07, + "loss": 0.5372, + "step": 1560, + "success_rate.epoch.env.abd": 0.9854014598540146, + "success_rate.epoch.env.agentgym:alfworld": 0.925, + "success_rate.epoch.env.agentgym:sciworld": 0.9595959595959596, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9466666666666667, + "success_rate.epoch.env.logic": 0.8995726495726496, + "success_rate.epoch.env.math": 0.9555808656036446, + "success_rate.epoch.env.sat": 0.1, + "success_rate.epoch.env.science": 0.7610114192495921, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.86266825087936, + "success_rate.epoch.global": 0.8657675016056519, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.5, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.7142857142857143, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9950471698113208, + "tokens_p.mean_in_band": 0.5339439655172413, + "tokens_rate.above_band": 0.9481216457960644, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0518783542039356 + }, + { + "epoch": 0.3334043459735833, + "grad_norm": 47.40386491265625, + "learning_rate": 3.9917995544908316e-07, + "loss": 0.2637, + "step": 1565, + "success_rate.epoch.env.abd": 0.9854014598540146, + "success_rate.epoch.env.agentgym:alfworld": 0.925, + "success_rate.epoch.env.agentgym:sciworld": 0.96, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9466666666666667, + "success_rate.epoch.env.logic": 0.8997867803837953, + "success_rate.epoch.env.math": 0.95578231292517, + "success_rate.epoch.env.sat": 0.1, + "success_rate.epoch.env.science": 0.7607811228641171, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8627218256203816, + "success_rate.epoch.global": 0.865877080665813, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9981475515463918, + "tokens_p.mean_in_band": 0.55625, + "tokens_rate.above_band": 0.9872773536895675, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.01272264631043257 + }, + { + "epoch": 0.3344695355773328, + "grad_norm": 38.540465202751186, + "learning_rate": 3.9917306212386564e-07, + "loss": 0.3496, + "step": 1570, + "success_rate.epoch.env.abd": 0.9854014598540146, + "success_rate.epoch.env.agentgym:alfworld": 0.925, + "success_rate.epoch.env.agentgym:sciworld": 0.96, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9466666666666667, + "success_rate.epoch.env.logic": 0.9002123142250531, + "success_rate.epoch.env.math": 0.9560315670800451, + "success_rate.epoch.env.sat": 0.1, + "success_rate.epoch.env.science": 0.760551948051948, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.862762335909833, + "success_rate.epoch.global": 0.8659859604339503, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.8888888888888888, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9968327702702703, + "tokens_p.mean_in_band": 0.5712890625, + "tokens_rate.above_band": 0.9736842105263158, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02631578947368421 + }, + { + "epoch": 0.33553472518108224, + "grad_norm": 79.44511342691798, + "learning_rate": 3.99166140249602e-07, + "loss": 0.318, + "step": 1575, + "success_rate.epoch.env.abd": 0.9855072463768116, + "success_rate.epoch.env.agentgym:alfworld": 0.9259259259259259, + "success_rate.epoch.env.agentgym:sciworld": 0.9603960396039604, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9466666666666667, + "success_rate.epoch.env.logic": 0.9006342494714588, + "success_rate.epoch.env.math": 0.956081081081081, + "success_rate.epoch.env.sat": 0.1, + "success_rate.epoch.env.science": 0.761326860841424, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8630054371907057, + "success_rate.epoch.global": 0.8664122137404581, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9976156655844156, + "tokens_p.mean_in_band": 0.8671875, + "tokens_rate.above_band": 0.9983792544570502, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0016207455429497568 + }, + { + "epoch": 0.3365999147848317, + "grad_norm": 81.54056074429866, + "learning_rate": 3.9915918983133674e-07, + "loss": 0.2979, + "step": 1580, + "success_rate.epoch.env.abd": 0.9856115107913669, + "success_rate.epoch.env.agentgym:alfworld": 0.9259259259259259, + "success_rate.epoch.env.agentgym:sciworld": 0.9603960396039604, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9466666666666667, + "success_rate.epoch.env.logic": 0.8987341772151899, + "success_rate.epoch.env.math": 0.9562780269058296, + "success_rate.epoch.env.sat": 0.1, + "success_rate.epoch.env.science": 0.7612903225806451, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8628567644381835, + "success_rate.epoch.global": 0.8662016487000634, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.6875, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.99734375, + "tokens_p.mean_in_band": 0.48270089285714285, + "tokens_rate.above_band": 0.9448818897637795, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.05511811023622047 + }, + { + "epoch": 0.33766510438858116, + "grad_norm": 79.58485444247916, + "learning_rate": 3.991522108741354e-07, + "loss": 0.2972, + "step": 1585, + "success_rate.epoch.env.abd": 0.9857142857142858, + "success_rate.epoch.env.agentgym:alfworld": 0.9259259259259259, + "success_rate.epoch.env.agentgym:sciworld": 0.9603960396039604, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9466666666666667, + "success_rate.epoch.env.logic": 0.8991596638655462, + "success_rate.epoch.env.math": 0.9563758389261745, + "success_rate.epoch.env.sat": 0.1, + "success_rate.epoch.env.science": 0.7614457831325301, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8629278129968657, + "success_rate.epoch.global": 0.8663084702907712, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9980540293040293, + "tokens_p.mean_in_band": 0.65, + "tokens_rate.above_band": 0.9820143884892086, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.017985611510791366 + }, + { + "epoch": 0.3387302939923306, + "grad_norm": 94.54094001351449, + "learning_rate": 3.991452033830841e-07, + "loss": 0.4793, + "step": 1590, + "success_rate.epoch.env.abd": 0.9857142857142858, + "success_rate.epoch.env.agentgym:alfworld": 0.9259259259259259, + "success_rate.epoch.env.agentgym:sciworld": 0.9603960396039604, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9470198675496688, + "success_rate.epoch.env.logic": 0.899581589958159, + "success_rate.epoch.env.math": 0.9565217391304348, + "success_rate.epoch.env.sat": 0.1, + "success_rate.epoch.env.science": 0.7612179487179487, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8629908305209835, + "success_rate.epoch.global": 0.8664146187775678, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9944196428571429, + "tokens_p.mean_below_band": 4.602043190971017e-10, + "tokens_p.mean_in_band": 0.7946428571428571, + "tokens_rate.above_band": 0.9871794871794872, + "tokens_rate.below_band": 0.0016025641025641025, + "tokens_rate.in_band": 0.011217948717948718 + }, + { + "epoch": 0.3397954835960801, + "grad_norm": 130.93538291608567, + "learning_rate": 3.991381673632899e-07, + "loss": 0.3931, + "step": 1595, + "success_rate.epoch.env.abd": 0.9857142857142858, + "success_rate.epoch.env.agentgym:alfworld": 0.9259259259259259, + "success_rate.epoch.env.agentgym:sciworld": 0.9603960396039604, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9470198675496688, + "success_rate.epoch.env.logic": 0.9002079002079002, + "success_rate.epoch.env.math": 0.9567147613762487, + "success_rate.epoch.env.sat": 0.1, + "success_rate.epoch.env.science": 0.7617905675459632, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8631173715503988, + "success_rate.epoch.global": 0.8668341708542714, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9957561728395061, + "tokens_p.mean_in_band": 0.8046875, + "tokens_rate.above_band": 0.9759036144578314, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.024096385542168676 + }, + { + "epoch": 0.34086067319982954, + "grad_norm": 131.1681464740304, + "learning_rate": 3.9913110281988054e-07, + "loss": 0.4012, + "step": 1600, + "success_rate.epoch.env.abd": 0.9857142857142858, + "success_rate.epoch.env.agentgym:alfworld": 0.9146341463414634, + "success_rate.epoch.env.agentgym:sciworld": 0.9603960396039604, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9470198675496688, + "success_rate.epoch.env.logic": 0.9006211180124224, + "success_rate.epoch.env.math": 0.9568106312292359, + "success_rate.epoch.env.sat": 0.1, + "success_rate.epoch.env.science": 0.7627388535031847, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8622233346440598, + "success_rate.epoch.global": 0.8669380087664371, + "success_rate.window.env.agentgym:alfworld": 0.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.75, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9968826034063261, + "tokens_p.mean_in_band": 0.70068359375, + "tokens_rate.above_band": 0.9903614457831326, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.00963855421686747 + }, + { + "epoch": 0.34192586280357906, + "grad_norm": 155.88479947828057, + "learning_rate": 3.991240097580047e-07, + "loss": 0.2929, + "step": 1605, + "success_rate.epoch.env.abd": 0.9858156028368794, + "success_rate.epoch.env.agentgym:alfworld": 0.9166666666666666, + "success_rate.epoch.env.agentgym:sciworld": 0.9603960396039604, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9473684210526315, + "success_rate.epoch.env.logic": 0.9006211180124224, + "success_rate.epoch.env.math": 0.9569060773480663, + "success_rate.epoch.env.sat": 0.1, + "success_rate.epoch.env.science": 0.7626984126984127, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8624540070317703, + "success_rate.epoch.global": 0.8670411985018727, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9986590485074627, + "tokens_p.mean_in_band": 0.5579427083333334, + "tokens_rate.above_band": 0.9944341372912802, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0055658627087198514 + }, + { + "epoch": 0.3429910524073285, + "grad_norm": 56.745914811429785, + "learning_rate": 3.9911688818283167e-07, + "loss": 0.3288, + "step": 1610, + "success_rate.epoch.env.abd": 0.9859154929577465, + "success_rate.epoch.env.agentgym:alfworld": 0.9166666666666666, + "success_rate.epoch.env.agentgym:sciworld": 0.9611650485436893, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9473684210526315, + "success_rate.epoch.env.logic": 0.9008264462809917, + "success_rate.epoch.env.math": 0.9570011025358324, + "success_rate.epoch.env.sat": 0.1, + "success_rate.epoch.env.science": 0.7626582278481012, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8625566495469177, + "success_rate.epoch.global": 0.8671437461107654, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9972587719298246, + "tokens_p.mean_in_band": 0.5859375, + "tokens_rate.above_band": 0.991304347826087, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.008695652173913044 + }, + { + "epoch": 0.344056242011078, + "grad_norm": 58.56698539334295, + "learning_rate": 3.991097380995516e-07, + "loss": 0.297, + "step": 1615, + "success_rate.epoch.env.abd": 0.9861111111111112, + "success_rate.epoch.env.agentgym:alfworld": 0.9176470588235294, + "success_rate.epoch.env.agentgym:sciworld": 0.9611650485436893, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9477124183006536, + "success_rate.epoch.env.logic": 0.9012345679012346, + "success_rate.epoch.env.math": 0.9570484581497798, + "success_rate.epoch.env.sat": 0.1, + "success_rate.epoch.env.science": 0.7632202052091555, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.862787327924508, + "success_rate.epoch.global": 0.8675558312655087, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9980961134453782, + "tokens_p.mean_in_band": 0.7682291666666666, + "tokens_rate.above_band": 0.9968586387434555, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0031413612565445027 + }, + { + "epoch": 0.34512143161482745, + "grad_norm": 67.4285831649213, + "learning_rate": 3.991025595133755e-07, + "loss": 0.2021, + "step": 1620, + "success_rate.epoch.env.abd": 0.9861111111111112, + "success_rate.epoch.env.agentgym:alfworld": 0.9176470588235294, + "success_rate.epoch.env.agentgym:sciworld": 0.9611650485436893, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.948051948051948, + "success_rate.epoch.env.logic": 0.9016393442622951, + "success_rate.epoch.env.math": 0.9572368421052632, + "success_rate.epoch.env.sat": 0.1, + "success_rate.epoch.env.science": 0.7622047244094489, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8627798014943381, + "success_rate.epoch.global": 0.8673469387755102, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.3333333333333333, + "success_rate.window.env_macro_mean": 0.8333333333333333, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9972178060413355, + "tokens_p.mean_in_band": 0.5549879807692307, + "tokens_rate.above_band": 0.9797507788161994, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.020249221183800622 + }, + { + "epoch": 0.3461866212185769, + "grad_norm": 90.80392114340461, + "learning_rate": 3.99095352429535e-07, + "loss": 0.2578, + "step": 1625, + "success_rate.epoch.env.abd": 0.9861111111111112, + "success_rate.epoch.env.agentgym:alfworld": 0.9186046511627907, + "success_rate.epoch.env.agentgym:sciworld": 0.9615384615384616, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9487179487179487, + "success_rate.epoch.env.logic": 0.9016393442622951, + "success_rate.epoch.env.math": 0.9573304157549234, + "success_rate.epoch.env.sat": 0.1, + "success_rate.epoch.env.science": 0.7629513343799058, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8630377278234428, + "success_rate.epoch.global": 0.8677558569667078, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9991514513108615, + "tokens_p.mean_in_band": 0.771484375, + "tokens_rate.above_band": 0.9925650557620818, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.007434944237918215 + }, + { + "epoch": 0.34725181082232637, + "grad_norm": 87.63793387690875, + "learning_rate": 3.990881168532827e-07, + "loss": 0.3226, + "step": 1630, + "success_rate.epoch.env.abd": 0.9861111111111112, + "success_rate.epoch.env.agentgym:alfworld": 0.9080459770114943, + "success_rate.epoch.env.agentgym:sciworld": 0.9619047619047619, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9487179487179487, + "success_rate.epoch.env.logic": 0.9020408163265307, + "success_rate.epoch.env.math": 0.9574235807860262, + "success_rate.epoch.env.sat": 0.1, + "success_rate.epoch.env.science": 0.7636932707355243, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.862223564156712, + "success_rate.epoch.global": 0.8678549477566072, + "success_rate.window.env.agentgym:alfworld": 0.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9980300859598854, + "tokens_p.mean_in_band": 0.76611328125, + "tokens_rate.above_band": 0.9886685552407932, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0113314447592068 + }, + { + "epoch": 0.34831700042607583, + "grad_norm": 83.51143383530129, + "learning_rate": 3.990808527898916e-07, + "loss": 0.337, + "step": 1635, + "success_rate.epoch.env.abd": 0.9861111111111112, + "success_rate.epoch.env.agentgym:alfworld": 0.9080459770114943, + "success_rate.epoch.env.agentgym:sciworld": 0.9619047619047619, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9493670886075949, + "success_rate.epoch.env.logic": 0.9026369168356998, + "success_rate.epoch.env.math": 0.9575625680087051, + "success_rate.epoch.env.sat": 0.1, + "success_rate.epoch.env.science": 0.76328125, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8623119466008909, + "success_rate.epoch.global": 0.867953431372549, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9979940878378378, + "tokens_p.mean_in_band": 0.6865234375, + "tokens_rate.above_band": 0.961038961038961, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03896103896103896 + }, + { + "epoch": 0.3493821900298253, + "grad_norm": 60.8269867052626, + "learning_rate": 3.9907356024465587e-07, + "loss": 0.3943, + "step": 1640, + "success_rate.epoch.env.abd": 0.9861111111111112, + "success_rate.epoch.env.agentgym:alfworld": 0.8977272727272727, + "success_rate.epoch.env.agentgym:sciworld": 0.9619047619047619, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9493670886075949, + "success_rate.epoch.env.logic": 0.902834008097166, + "success_rate.epoch.env.math": 0.9566630552546046, + "success_rate.epoch.env.sat": 0.1, + "success_rate.epoch.env.science": 0.7632398753894081, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8613062647474868, + "success_rate.epoch.global": 0.8674404398289554, + "success_rate.window.env.agentgym:alfworld": 0.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.75, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.625, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9965717299578059, + "tokens_p.mean_in_band": 0.614453125, + "tokens_rate.above_band": 0.9595141700404858, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.04048582995951417 + }, + { + "epoch": 0.35044737963357475, + "grad_norm": 112.00326917942903, + "learning_rate": 3.990662392228902e-07, + "loss": 0.3135, + "step": 1645, + "success_rate.epoch.env.abd": 0.9863013698630136, + "success_rate.epoch.env.agentgym:alfworld": 0.8977272727272727, + "success_rate.epoch.env.agentgym:sciworld": 0.9622641509433962, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.95, + "success_rate.epoch.env.logic": 0.9032258064516129, + "success_rate.epoch.env.math": 0.9567099567099567, + "success_rate.epoch.env.sat": 0.1, + "success_rate.epoch.env.science": 0.7636080870917574, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8614871257197677, + "success_rate.epoch.global": 0.8678440925700366, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9982135668276972, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 0.35151256923732427, + "grad_norm": 265.864720672112, + "learning_rate": 3.990588897299302e-07, + "loss": 0.2984, + "step": 1650, + "success_rate.epoch.env.abd": 0.9863945578231292, + "success_rate.epoch.env.agentgym:alfworld": 0.8977272727272727, + "success_rate.epoch.env.agentgym:sciworld": 0.9622641509433962, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9503105590062112, + "success_rate.epoch.env.logic": 0.9036144578313253, + "success_rate.epoch.env.math": 0.9568500539374326, + "success_rate.epoch.env.sat": 0.1, + "success_rate.epoch.env.science": 0.7641582622187743, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8616219139652707, + "success_rate.epoch.global": 0.8682452944748027, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9983333333333333, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 0.35257775884107373, + "grad_norm": 175.9035112668465, + "learning_rate": 3.99051511771132e-07, + "loss": 0.4593, + "step": 1655, + "success_rate.epoch.env.abd": 0.9863945578231292, + "success_rate.epoch.env.agentgym:alfworld": 0.8977272727272727, + "success_rate.epoch.env.agentgym:sciworld": 0.9622641509433962, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9503105590062112, + "success_rate.epoch.env.logic": 0.9018036072144289, + "success_rate.epoch.env.math": 0.9570354457572503, + "success_rate.epoch.env.sat": 0.1, + "success_rate.epoch.env.science": 0.7633410672853829, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8613998545352277, + "success_rate.epoch.global": 0.8677360774818402, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.625, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9975378787878788, + "tokens_p.mean_in_band": 0.5579427083333334, + "tokens_rate.above_band": 0.9322033898305084, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.06779661016949153 + }, + { + "epoch": 0.3536429484448232, + "grad_norm": 48.11765350577683, + "learning_rate": 3.9904410535187265e-07, + "loss": 0.2187, + "step": 1660, + "success_rate.epoch.env.abd": 0.9863945578231292, + "success_rate.epoch.env.agentgym:alfworld": 0.8977272727272727, + "success_rate.epoch.env.agentgym:sciworld": 0.9622641509433962, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9503105590062112, + "success_rate.epoch.env.logic": 0.9005964214711729, + "success_rate.epoch.env.math": 0.9571275455519829, + "success_rate.epoch.env.sat": 0.1, + "success_rate.epoch.env.science": 0.764070932922128, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8613648345068843, + "success_rate.epoch.global": 0.867833433916717, + "success_rate.window.env.logic": 0.75, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9928728070175439, + "tokens_p.mean_in_band": 0.6769386574074074, + "tokens_rate.above_band": 0.8941176470588236, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.10588235294117647 + }, + { + "epoch": 0.35470813804857265, + "grad_norm": 30.89964757892007, + "learning_rate": 3.990366704775499e-07, + "loss": 0.5268, + "step": 1665, + "success_rate.epoch.env.abd": 0.9863945578231292, + "success_rate.epoch.env.agentgym:alfworld": 0.8977272727272727, + "success_rate.epoch.env.agentgym:sciworld": 0.9622641509433962, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9503105590062112, + "success_rate.epoch.env.logic": 0.9011857707509882, + "success_rate.epoch.env.math": 0.9572192513368984, + "success_rate.epoch.env.sat": 0.09523809523809523, + "success_rate.epoch.env.science": 0.7624903920061491, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8608501626329612, + "success_rate.epoch.global": 0.8670276774969916, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 0.25, + "success_rate.window.env_macro_mean": 0.5625, + "success_rate.window.global": 0.6, + "tokens_p.mean_above_band": 0.9954594017094017, + "tokens_p.mean_in_band": 0.4809027777777778, + "tokens_rate.above_band": 0.8125, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.1875 + }, + { + "epoch": 0.3557733276523221, + "grad_norm": 62.64945174130081, + "learning_rate": 3.990292071535822e-07, + "loss": 0.2773, + "step": 1670, + "success_rate.epoch.env.abd": 0.9863945578231292, + "success_rate.epoch.env.agentgym:alfworld": 0.8977272727272727, + "success_rate.epoch.env.agentgym:sciworld": 0.9622641509433962, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9506172839506173, + "success_rate.epoch.env.logic": 0.9017681728880157, + "success_rate.epoch.env.math": 0.9573560767590619, + "success_rate.epoch.env.sat": 0.09523809523809523, + "success_rate.epoch.env.science": 0.7630368098159509, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8609931053887249, + "success_rate.epoch.global": 0.8674265146970606, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9895833333333334, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 0.3568385172560716, + "grad_norm": 74.75262038442307, + "learning_rate": 3.9902171538540884e-07, + "loss": 0.375, + "step": 1675, + "success_rate.epoch.env.abd": 0.9864864864864865, + "success_rate.epoch.env.agentgym:alfworld": 0.898876404494382, + "success_rate.epoch.env.agentgym:sciworld": 0.9622641509433962, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.950920245398773, + "success_rate.epoch.env.logic": 0.90234375, + "success_rate.epoch.env.math": 0.9574468085106383, + "success_rate.epoch.env.sat": 0.09523809523809523, + "success_rate.epoch.env.science": 0.7626339969372129, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8611574251944927, + "success_rate.epoch.global": 0.8675239234449761, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9991347719869706, + "tokens_p.mean_in_band": 0.3053385416666667, + "tokens_rate.above_band": 0.9903225806451613, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.00967741935483871 + }, + { + "epoch": 0.35790370685982104, + "grad_norm": 78.76786976636863, + "learning_rate": 3.9901419517848974e-07, + "loss": 0.2368, + "step": 1680, + "success_rate.epoch.env.abd": 0.9865771812080537, + "success_rate.epoch.env.agentgym:alfworld": 0.9, + "success_rate.epoch.env.agentgym:sciworld": 0.9626168224299065, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.950920245398773, + "success_rate.epoch.env.logic": 0.9025341130604289, + "success_rate.epoch.env.math": 0.9574920297555791, + "success_rate.epoch.env.sat": 0.09523809523809523, + "success_rate.epoch.env.science": 0.7635392829900839, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8614035917464868, + "success_rate.epoch.global": 0.8679189028026237, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9993265086206896, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 0.3589688964635705, + "grad_norm": 275.52401010424194, + "learning_rate": 3.990066465383055e-07, + "loss": 0.4104, + "step": 1685, + "success_rate.epoch.env.abd": 0.9866666666666667, + "success_rate.epoch.env.agentgym:alfworld": 0.9, + "success_rate.epoch.env.agentgym:sciworld": 0.9626168224299065, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9512195121951219, + "success_rate.epoch.env.logic": 0.9007782101167315, + "success_rate.epoch.env.math": 0.9556025369978859, + "success_rate.epoch.env.sat": 0.09523809523809523, + "success_rate.epoch.env.science": 0.763899466869764, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8611402772404188, + "success_rate.epoch.global": 0.8674197384066588, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 0.6, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.72, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9966457960644007, + "tokens_p.mean_in_band": 0.6673519736842105, + "tokens_rate.above_band": 0.967128027681661, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0328719723183391 + }, + { + "epoch": 0.36003408606731996, + "grad_norm": 162.86737483365874, + "learning_rate": 3.989990694703576e-07, + "loss": 0.2919, + "step": 1690, + "success_rate.epoch.env.abd": 0.9867549668874173, + "success_rate.epoch.env.agentgym:alfworld": 0.9, + "success_rate.epoch.env.agentgym:sciworld": 0.9626168224299065, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9512195121951219, + "success_rate.epoch.env.logic": 0.9007782101167315, + "success_rate.epoch.env.math": 0.9557428872497366, + "success_rate.epoch.env.sat": 0.08695652173913043, + "success_rate.epoch.env.science": 0.7646165527714502, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8604733829563572, + "success_rate.epoch.global": 0.8672199170124482, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.75, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.995895127118644, + "tokens_p.mean_in_band": 0.5921223958333334, + "tokens_rate.above_band": 0.8676470588235294, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.1323529411764706 + }, + { + "epoch": 0.3610992756710695, + "grad_norm": 108.43387266027811, + "learning_rate": 3.989914639801681e-07, + "loss": 0.2812, + "step": 1695, + "success_rate.epoch.env.abd": 0.9868421052631579, + "success_rate.epoch.env.agentgym:alfworld": 0.9, + "success_rate.epoch.env.agentgym:sciworld": 0.9626168224299065, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9512195121951219, + "success_rate.epoch.env.logic": 0.9007782101167315, + "success_rate.epoch.env.math": 0.9559748427672956, + "success_rate.epoch.env.sat": 0.08695652173913043, + "success_rate.epoch.env.science": 0.765329295987888, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8605671863299696, + "success_rate.epoch.global": 0.8676122931442081, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9961309523809524, + "tokens_p.mean_in_band": 0.89453125, + "tokens_rate.above_band": 0.9905660377358491, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.009433962264150943 + }, + { + "epoch": 0.36216446527481894, + "grad_norm": 416.0216363403163, + "learning_rate": 3.989838300732799e-07, + "loss": 0.5509, + "step": 1700, + "success_rate.epoch.env.abd": 0.9868421052631579, + "success_rate.epoch.env.agentgym:alfworld": 0.9, + "success_rate.epoch.env.agentgym:sciworld": 0.9629629629629629, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9512195121951219, + "success_rate.epoch.env.logic": 0.8994197292069632, + "success_rate.epoch.env.math": 0.9561128526645768, + "success_rate.epoch.env.sat": 0.08695652173913043, + "success_rate.epoch.env.science": 0.7658610271903323, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8605360409411528, + "success_rate.epoch.global": 0.8677077195050088, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 0.6666666666666666, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.999312106918239, + "tokens_p.mean_in_band": 0.6130208333333333, + "tokens_rate.above_band": 0.9769585253456221, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02304147465437788 + }, + { + "epoch": 0.3632296548785684, + "grad_norm": 203.39997145765022, + "learning_rate": 3.9897616775525646e-07, + "loss": 0.4874, + "step": 1705, + "success_rate.epoch.env.abd": 0.9868421052631579, + "success_rate.epoch.env.agentgym:alfworld": 0.9010989010989011, + "success_rate.epoch.env.agentgym:sciworld": 0.9629629629629629, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9512195121951219, + "success_rate.epoch.env.logic": 0.8994197292069632, + "success_rate.epoch.env.math": 0.9562955254942768, + "success_rate.epoch.env.sat": 0.08695652173913043, + "success_rate.epoch.env.science": 0.7652370203160271, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8605958197642706, + "success_rate.epoch.global": 0.867508813160987, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6, + "success_rate.window.env_macro_mean": 0.8666666666666667, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.99619708994709, + "tokens_p.mean_in_band": 0.5598958333333334, + "tokens_rate.above_band": 0.9767441860465116, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.023255813953488372 + }, + { + "epoch": 0.36429484448231786, + "grad_norm": 149.26613329584015, + "learning_rate": 3.9896847703168206e-07, + "loss": 0.485, + "step": 1710, + "success_rate.epoch.env.abd": 0.9868421052631579, + "success_rate.epoch.env.agentgym:alfworld": 0.9010989010989011, + "success_rate.epoch.env.agentgym:sciworld": 0.963302752293578, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9512195121951219, + "success_rate.epoch.env.logic": 0.8998073217726397, + "success_rate.epoch.env.math": 0.9564766839378238, + "success_rate.epoch.env.sat": 0.08695652173913043, + "success_rate.epoch.env.science": 0.7655897821187078, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8607104835954087, + "success_rate.epoch.global": 0.8678968951376684, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.998956013363029, + "tokens_p.mean_in_band": 0.865234375, + "tokens_rate.above_band": 0.9955654101995566, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.004434589800443459 + }, + { + "epoch": 0.3653600340860673, + "grad_norm": 116.37779562520686, + "learning_rate": 3.9896075790816163e-07, + "loss": 0.348, + "step": 1715, + "success_rate.epoch.env.abd": 0.9869281045751634, + "success_rate.epoch.env.agentgym:alfworld": 0.9010989010989011, + "success_rate.epoch.env.agentgym:sciworld": 0.963302752293578, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9512195121951219, + "success_rate.epoch.env.logic": 0.9001919385796545, + "success_rate.epoch.env.math": 0.9567010309278351, + "success_rate.epoch.env.sat": 0.08695652173913043, + "success_rate.epoch.env.science": 0.765015015015015, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.860721410504985, + "success_rate.epoch.global": 0.8679906542056075, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.0, + "success_rate.window.env_macro_mean": 0.8, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9997093023255814, + "tokens_p.mean_in_band": 0.31640625, + "tokens_rate.above_band": 0.9817351598173516, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0182648401826484 + }, + { + "epoch": 0.3664252236898168, + "grad_norm": 53.77509510047899, + "learning_rate": 3.9895301039032085e-07, + "loss": 0.2927, + "step": 1720, + "success_rate.epoch.env.abd": 0.9869281045751634, + "success_rate.epoch.env.agentgym:alfworld": 0.9010989010989011, + "success_rate.epoch.env.agentgym:sciworld": 0.963302752293578, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9512195121951219, + "success_rate.epoch.env.logic": 0.9003831417624522, + "success_rate.epoch.env.math": 0.9570112589559877, + "success_rate.epoch.env.sat": 0.08695652173913043, + "success_rate.epoch.env.science": 0.7653673163418291, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8607990225538728, + "success_rate.epoch.global": 0.8683750728013978, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9902146464646465, + "tokens_p.mean_in_band": 0.7958984375, + "tokens_rate.above_band": 0.9611650485436893, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.038834951456310676 + }, + { + "epoch": 0.36749041329356624, + "grad_norm": 82.51702436627924, + "learning_rate": 3.98945234483806e-07, + "loss": 0.2838, + "step": 1725, + "success_rate.epoch.env.abd": 0.9869281045751634, + "success_rate.epoch.env.agentgym:alfworld": 0.9021739130434783, + "success_rate.epoch.env.agentgym:sciworld": 0.963302752293578, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9512195121951219, + "success_rate.epoch.env.logic": 0.900952380952381, + "success_rate.epoch.env.math": 0.9570990806945863, + "success_rate.epoch.env.sat": 0.08695652173913043, + "success_rate.epoch.env.science": 0.7653213751868461, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.86095230725552, + "success_rate.epoch.global": 0.8684668989547039, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9375, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9980902777777778, + "tokens_p.mean_in_band": 0.6938100961538461, + "tokens_rate.above_band": 0.9651474530831099, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03485254691689008 + }, + { + "epoch": 0.3685556028973157, + "grad_norm": 51.601020862501755, + "learning_rate": 3.989374301942841e-07, + "loss": 0.3697, + "step": 1730, + "success_rate.epoch.env.abd": 0.9869281045751634, + "success_rate.epoch.env.agentgym:alfworld": 0.9021739130434783, + "success_rate.epoch.env.agentgym:sciworld": 0.963302752293578, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9512195121951219, + "success_rate.epoch.env.logic": 0.9011406844106464, + "success_rate.epoch.env.math": 0.9573604060913705, + "success_rate.epoch.env.sat": 0.08695652173913043, + "success_rate.epoch.env.science": 0.7651006711409396, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8609731186018057, + "success_rate.epoch.global": 0.8685581933989577, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.8888888888888888, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9964080459770115, + "tokens_p.mean_in_band": 0.443359375, + "tokens_rate.above_band": 0.9560439560439561, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.04395604395604396 + }, + { + "epoch": 0.36962079250106517, + "grad_norm": 172.2472924451879, + "learning_rate": 3.989295975274429e-07, + "loss": 0.4709, + "step": 1735, + "success_rate.epoch.env.abd": 0.9869281045751634, + "success_rate.epoch.env.agentgym:alfworld": 0.9021739130434783, + "success_rate.epoch.env.agentgym:sciworld": 0.9636363636363636, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9512195121951219, + "success_rate.epoch.env.logic": 0.9015151515151515, + "success_rate.epoch.env.math": 0.9564777327935222, + "success_rate.epoch.env.sat": 0.08695652173913043, + "success_rate.epoch.env.science": 0.7650557620817844, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8609531637009229, + "success_rate.epoch.global": 0.8683602771362586, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.6666666666666666, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.8541666666666666, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9974563953488372, + "tokens_p.mean_in_band": 0.546875, + "tokens_rate.above_band": 0.9756888168557536, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.024311183144246355 + }, + { + "epoch": 0.3706859821048147, + "grad_norm": 134.34887301630633, + "learning_rate": 3.9892173648899077e-07, + "loss": 0.298, + "step": 1740, + "success_rate.epoch.env.abd": 0.9869281045751634, + "success_rate.epoch.env.agentgym:alfworld": 0.9021739130434783, + "success_rate.epoch.env.agentgym:sciworld": 0.963963963963964, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9515151515151515, + "success_rate.epoch.env.logic": 0.9017013232514177, + "success_rate.epoch.env.math": 0.9555106167846309, + "success_rate.epoch.env.sat": 0.08695652173913043, + "success_rate.epoch.env.science": 0.7660991857883049, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8610336836174249, + "success_rate.epoch.global": 0.8684513529073115, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9978693181818182, + "tokens_p.mean_in_band": 0.4921875, + "tokens_rate.above_band": 0.99, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.01 + }, + { + "epoch": 0.37175117170856414, + "grad_norm": 63.58531877240587, + "learning_rate": 3.989138470846568e-07, + "loss": 0.2935, + "step": 1745, + "success_rate.epoch.env.abd": 0.9869281045751634, + "success_rate.epoch.env.agentgym:alfworld": 0.9021739130434783, + "success_rate.epoch.env.agentgym:sciworld": 0.963963963963964, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9518072289156626, + "success_rate.epoch.env.logic": 0.9018867924528302, + "success_rate.epoch.env.math": 0.9546827794561934, + "success_rate.epoch.env.sat": 0.08695652173913043, + "success_rate.epoch.env.science": 0.766789667896679, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8610646101066852, + "success_rate.epoch.global": 0.8685419058553386, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.75, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9375, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9996234939759037, + "tokens_p.mean_in_band": 0.6732584635416666, + "tokens_rate.above_band": 0.991044776119403, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.008955223880597015 + }, + { + "epoch": 0.3728163613123136, + "grad_norm": 76.26688553352443, + "learning_rate": 3.989059293201907e-07, + "loss": 0.4543, + "step": 1750, + "success_rate.epoch.env.abd": 0.9869281045751634, + "success_rate.epoch.env.agentgym:alfworld": 0.9021739130434783, + "success_rate.epoch.env.agentgym:sciworld": 0.963963963963964, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9518072289156626, + "success_rate.epoch.env.logic": 0.9020715630885122, + "success_rate.epoch.env.math": 0.954954954954955, + "success_rate.epoch.env.sat": 0.08695652173913043, + "success_rate.epoch.env.science": 0.7665684830633285, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.861086042952239, + "success_rate.epoch.global": 0.8686319404693761, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.8888888888888888, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9914772727272727, + "tokens_p.mean_in_band": 0.6436631944444444, + "tokens_rate.above_band": 0.9243697478991597, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.07563025210084033 + }, + { + "epoch": 0.37388155091606307, + "grad_norm": 106.52773155292499, + "learning_rate": 3.9889798320136297e-07, + "loss": 0.261, + "step": 1755, + "success_rate.epoch.env.abd": 0.987012987012987, + "success_rate.epoch.env.agentgym:alfworld": 0.9032258064516129, + "success_rate.epoch.env.agentgym:sciworld": 0.9553571428571429, + "success_rate.epoch.env.agentgym:textcraft": 0.9583333333333334, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9518072289156626, + "success_rate.epoch.env.logic": 0.9022556390977443, + "success_rate.epoch.env.math": 0.9550898203592815, + "success_rate.epoch.env.sat": 0.08333333333333333, + "success_rate.epoch.env.science": 0.7660044150110376, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8602199733065579, + "success_rate.epoch.global": 0.8681506849315068, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 0.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 0.0, + "success_rate.window.env_macro_mean": 0.625, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9985119047619048, + "tokens_p.mean_in_band": 0.5394736842105263, + "tokens_rate.above_band": 0.9784090909090909, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02159090909090909 + }, + { + "epoch": 0.3749467405198125, + "grad_norm": 141.56562190147378, + "learning_rate": 3.988900087339645e-07, + "loss": 0.1901, + "step": 1760, + "success_rate.epoch.env.abd": 0.987012987012987, + "success_rate.epoch.env.agentgym:alfworld": 0.8936170212765957, + "success_rate.epoch.env.agentgym:sciworld": 0.9553571428571429, + "success_rate.epoch.env.agentgym:textcraft": 0.9583333333333334, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9518072289156626, + "success_rate.epoch.env.logic": 0.9022556390977443, + "success_rate.epoch.env.math": 0.9552683896620279, + "success_rate.epoch.env.sat": 0.08333333333333333, + "success_rate.epoch.env.science": 0.7661290322580645, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.859374009795172, + "success_rate.epoch.global": 0.8679567444507683, + "success_rate.window.env.agentgym:alfworld": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.6, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9972014925373134, + "tokens_p.mean_in_band": 0.52734375, + "tokens_rate.above_band": 0.9710144927536232, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.028985507246376812 + }, + { + "epoch": 0.376011930123562, + "grad_norm": 115.16287068471944, + "learning_rate": 3.988820059238072e-07, + "loss": 0.3068, + "step": 1765, + "success_rate.epoch.env.abd": 0.987012987012987, + "success_rate.epoch.env.agentgym:alfworld": 0.8947368421052632, + "success_rate.epoch.env.agentgym:sciworld": 0.9553571428571429, + "success_rate.epoch.env.agentgym:textcraft": 0.9583333333333334, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9518072289156626, + "success_rate.epoch.env.logic": 0.9009345794392524, + "success_rate.epoch.env.math": 0.9554013875123885, + "success_rate.epoch.env.sat": 0.08333333333333333, + "success_rate.epoch.env.science": 0.7666422823701536, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8594144651708651, + "success_rate.epoch.global": 0.8680476730987514, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 0.6666666666666666, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9956706549118388, + "tokens_p.mean_in_band": 0.6513671875, + "tokens_rate.above_band": 0.9900249376558603, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.00997506234413965 + }, + { + "epoch": 0.37707711972731145, + "grad_norm": 207.01314214792083, + "learning_rate": 3.988739747767234e-07, + "loss": 0.3236, + "step": 1770, + "success_rate.epoch.env.abd": 0.987012987012987, + "success_rate.epoch.env.agentgym:alfworld": 0.8958333333333334, + "success_rate.epoch.env.agentgym:sciworld": 0.9557522123893806, + "success_rate.epoch.env.agentgym:textcraft": 0.9583333333333334, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9518072289156626, + "success_rate.epoch.env.logic": 0.9011194029850746, + "success_rate.epoch.env.math": 0.9554896142433235, + "success_rate.epoch.env.sat": 0.08333333333333333, + "success_rate.epoch.env.science": 0.7645772594752187, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.859387155001968, + "success_rate.epoch.global": 0.8672891907187323, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.2, + "success_rate.window.env_macro_mean": 0.8400000000000001, + "success_rate.window.global": 0.6, + "tokens_p.mean_above_band": 0.99906191369606, + "tokens_p.mean_in_band": 0.5969669117647058, + "tokens_rate.above_band": 0.9690909090909091, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03090909090909091 + }, + { + "epoch": 0.3781423093310609, + "grad_norm": 79.1865554169076, + "learning_rate": 3.988659152985661e-07, + "loss": 0.3867, + "step": 1775, + "success_rate.epoch.env.abd": 0.9870967741935484, + "success_rate.epoch.env.agentgym:alfworld": 0.8958333333333334, + "success_rate.epoch.env.agentgym:sciworld": 0.9557522123893806, + "success_rate.epoch.env.agentgym:textcraft": 0.9583333333333334, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9518072289156626, + "success_rate.epoch.env.logic": 0.9014869888475836, + "success_rate.epoch.env.math": 0.9555774925962488, + "success_rate.epoch.env.sat": 0.08333333333333333, + "success_rate.epoch.env.science": 0.7645348837209303, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8594323255148505, + "success_rate.epoch.global": 0.8673814898419865, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9972587719298246, + "tokens_p.mean_in_band": 0.5085227272727273, + "tokens_rate.above_band": 0.9810671256454389, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0189328743545611 + }, + { + "epoch": 0.37920749893481037, + "grad_norm": 56.81154084391707, + "learning_rate": 3.9885782749520904e-07, + "loss": 0.4482, + "step": 1780, + "success_rate.epoch.env.abd": 0.9870967741935484, + "success_rate.epoch.env.agentgym:alfworld": 0.8958333333333334, + "success_rate.epoch.env.agentgym:sciworld": 0.9473684210526315, + "success_rate.epoch.env.agentgym:textcraft": 0.96, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9520958083832335, + "success_rate.epoch.env.logic": 0.9018518518518519, + "success_rate.epoch.env.math": 0.9556650246305419, + "success_rate.epoch.env.sat": 0.08333333333333333, + "success_rate.epoch.env.science": 0.7650471356055112, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8589356074894532, + "success_rate.epoch.global": 0.8674732695554305, + "success_rate.window.env.agentgym:sciworld": 0.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8333333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.999683887283237, + "tokens_p.mean_in_band": 0.2431640625, + "tokens_rate.above_band": 0.9985569985569985, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.001443001443001443 + }, + { + "epoch": 0.3802726885385599, + "grad_norm": 94.01123671164818, + "learning_rate": 3.988497113725466e-07, + "loss": 0.4395, + "step": 1785, + "success_rate.epoch.env.abd": 0.9871794871794872, + "success_rate.epoch.env.agentgym:alfworld": 0.8969072164948454, + "success_rate.epoch.env.agentgym:sciworld": 0.9478260869565217, + "success_rate.epoch.env.agentgym:textcraft": 0.96, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9523809523809523, + "success_rate.epoch.env.logic": 0.9018518518518519, + "success_rate.epoch.env.math": 0.9557086614173228, + "success_rate.epoch.env.sat": 0.08333333333333333, + "success_rate.epoch.env.science": 0.7651734104046243, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.859123727274449, + "success_rate.epoch.global": 0.867564534231201, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.9666666666666667, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9987687155240347, + "tokens_p.mean_in_band": 0.7047293526785714, + "tokens_rate.above_band": 0.9890880748246298, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.010911925175370226 + }, + { + "epoch": 0.38133787814230935, + "grad_norm": 261.2873970539276, + "learning_rate": 3.9884156693649366e-07, + "loss": 0.2541, + "step": 1790, + "success_rate.epoch.env.abd": 0.9872611464968153, + "success_rate.epoch.env.agentgym:alfworld": 0.8969072164948454, + "success_rate.epoch.env.agentgym:sciworld": 0.9478260869565217, + "success_rate.epoch.env.agentgym:textcraft": 0.96, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9523809523809523, + "success_rate.epoch.env.logic": 0.9025735294117647, + "success_rate.epoch.env.math": 0.9557522123893806, + "success_rate.epoch.env.sat": 0.08333333333333333, + "success_rate.epoch.env.science": 0.7644092219020173, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8591312453968757, + "success_rate.epoch.global": 0.8673754896474538, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9905973451327433, + "tokens_p.mean_in_band": 0.5203993055555556, + "tokens_rate.above_band": 0.9262295081967213, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.07377049180327869 + }, + { + "epoch": 0.3824030677460588, + "grad_norm": 128.50981424547092, + "learning_rate": 3.988333941929858e-07, + "loss": 0.2765, + "step": 1795, + "success_rate.epoch.env.abd": 0.9872611464968153, + "success_rate.epoch.env.agentgym:alfworld": 0.898989898989899, + "success_rate.epoch.env.agentgym:sciworld": 0.9478260869565217, + "success_rate.epoch.env.agentgym:textcraft": 0.96, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9523809523809523, + "success_rate.epoch.env.logic": 0.9012797074954296, + "success_rate.epoch.env.math": 0.9558390578999019, + "success_rate.epoch.env.sat": 0.08333333333333333, + "success_rate.epoch.env.science": 0.7634795111430626, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8591263358814469, + "success_rate.epoch.global": 0.8669084821428571, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 0.6666666666666666, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.3333333333333333, + "success_rate.window.env_macro_mean": 0.75, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 1.0002307219031994, + "tokens_p.mean_in_band": 0.5099734042553191, + "tokens_rate.above_band": 0.9628751974723538, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03712480252764613 + }, + { + "epoch": 0.3834682573498083, + "grad_norm": 101.51869721822652, + "learning_rate": 3.9882519314797937e-07, + "loss": 0.7223, + "step": 1800, + "success_rate.epoch.env.abd": 0.9872611464968153, + "success_rate.epoch.env.agentgym:alfworld": 0.898989898989899, + "success_rate.epoch.env.agentgym:sciworld": 0.9478260869565217, + "success_rate.epoch.env.agentgym:textcraft": 0.9615384615384616, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9523809523809523, + "success_rate.epoch.env.logic": 0.9012797074954296, + "success_rate.epoch.env.math": 0.9549902152641878, + "success_rate.epoch.env.sat": 0.08333333333333333, + "success_rate.epoch.env.science": 0.7644953471725126, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8592813772389193, + "success_rate.epoch.global": 0.8670005564830273, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.math": 0.6666666666666666, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8888888888888888, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 1.0005695625759417, + "tokens_p.mean_in_band": 0.72265625, + "tokens_rate.above_band": 0.9927623642943305, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.007237635705669481 + }, + { + "epoch": 0.38453344695355773, + "grad_norm": 238.39867050449627, + "learning_rate": 3.9881696380745117e-07, + "loss": 0.4797, + "step": 1805, + "success_rate.epoch.env.abd": 0.9872611464968153, + "success_rate.epoch.env.agentgym:alfworld": 0.8910891089108911, + "success_rate.epoch.env.agentgym:sciworld": 0.9478260869565217, + "success_rate.epoch.env.agentgym:textcraft": 0.9615384615384616, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9523809523809523, + "success_rate.epoch.env.logic": 0.9014598540145985, + "success_rate.epoch.env.math": 0.9551656920077972, + "success_rate.epoch.env.sat": 0.08333333333333333, + "success_rate.epoch.env.science": 0.765, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8586413305126701, + "success_rate.epoch.global": 0.8670921198668147, + "success_rate.window.env.agentgym:alfworld": 0.5, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9977609034267912, + "tokens_p.mean_in_band": 0.556640625, + "tokens_rate.above_band": 0.9907407407407407, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.009259259259259259 + }, + { + "epoch": 0.3855986365573072, + "grad_norm": 165.70785372117808, + "learning_rate": 3.988087061773987e-07, + "loss": 0.5067, + "step": 1810, + "success_rate.epoch.env.abd": 0.9810126582278481, + "success_rate.epoch.env.agentgym:alfworld": 0.8921568627450981, + "success_rate.epoch.env.agentgym:sciworld": 0.9478260869565217, + "success_rate.epoch.env.agentgym:textcraft": 0.9615384615384616, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9523809523809523, + "success_rate.epoch.env.logic": 0.9, + "success_rate.epoch.env.math": 0.9552529182879378, + "success_rate.epoch.env.sat": 0.08333333333333333, + "success_rate.epoch.env.science": 0.7649572649572649, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8580416853115835, + "success_rate.epoch.global": 0.8666297731045932, + "success_rate.window.env.abd": 0.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.65, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9940807799442897, + "tokens_p.mean_below_band": 4.805624485015869e-07, + "tokens_p.mean_in_band": 0.14323846726190476, + "tokens_rate.above_band": 0.45966709346991036, + "tokens_rate.below_band": 0.002560819462227913, + "tokens_rate.in_band": 0.5377720870678617 + }, + { + "epoch": 0.38666382616105666, + "grad_norm": 19.52626271575069, + "learning_rate": 3.9880042026384e-07, + "loss": 0.1823, + "step": 1815, + "success_rate.epoch.env.abd": 0.9811320754716981, + "success_rate.epoch.env.agentgym:alfworld": 0.8932038834951457, + "success_rate.epoch.env.agentgym:sciworld": 0.9482758620689655, + "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9523809523809523, + "success_rate.epoch.env.logic": 0.9, + "success_rate.epoch.env.math": 0.9553831231813773, + "success_rate.epoch.env.sat": 0.08333333333333333, + "success_rate.epoch.env.science": 0.7647476901208244, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8583108984559328, + "success_rate.epoch.global": 0.8667218543046358, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9444444444444443, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9987074209245742, + "tokens_p.mean_in_band": 0.65625, + "tokens_rate.above_band": 0.9879807692307693, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.01201923076923077 + }, + { + "epoch": 0.3877290157648061, + "grad_norm": 111.96252552555268, + "learning_rate": 3.987921060728139e-07, + "loss": 0.313, + "step": 1820, + "success_rate.epoch.env.abd": 0.9811320754716981, + "success_rate.epoch.env.agentgym:alfworld": 0.8942307692307693, + "success_rate.epoch.env.agentgym:sciworld": 0.9482758620689655, + "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9523809523809523, + "success_rate.epoch.env.logic": 0.9001814882032668, + "success_rate.epoch.env.math": 0.9555984555984556, + "success_rate.epoch.env.sat": 0.08333333333333333, + "success_rate.epoch.env.science": 0.7652482269503547, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8584858296546144, + "success_rate.epoch.global": 0.8670886075949367, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9956837016574586, + "tokens_p.mean_in_band": 0.8359375, + "tokens_rate.above_band": 0.9836956521739131, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.016304347826086956 + }, + { + "epoch": 0.3887942053685556, + "grad_norm": 32.72777812688531, + "learning_rate": 3.987837636103797e-07, + "loss": 0.5374, + "step": 1825, + "success_rate.epoch.env.abd": 0.9811320754716981, + "success_rate.epoch.env.agentgym:alfworld": 0.8952380952380953, + "success_rate.epoch.env.agentgym:sciworld": 0.9482758620689655, + "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9526627218934911, + "success_rate.epoch.env.logic": 0.9007220216606499, + "success_rate.epoch.env.math": 0.9556412729026037, + "success_rate.epoch.env.sat": 0.08333333333333333, + "success_rate.epoch.env.science": 0.7644978783592645, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8585878385355513, + "success_rate.epoch.global": 0.8669045005488474, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.9, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9980614143920595, + "tokens_p.mean_in_band": 0.50390625, + "tokens_rate.above_band": 0.9664268585131894, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03357314148681055 + }, + { + "epoch": 0.3898593949723051, + "grad_norm": 226.50015530548728, + "learning_rate": 3.987753928826172e-07, + "loss": 0.3713, + "step": 1830, + "success_rate.epoch.env.abd": 0.9811320754716981, + "success_rate.epoch.env.agentgym:alfworld": 0.8952380952380953, + "success_rate.epoch.env.agentgym:sciworld": 0.9482758620689655, + "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9526627218934911, + "success_rate.epoch.env.logic": 0.9010791366906474, + "success_rate.epoch.env.math": 0.9557266602502407, + "success_rate.epoch.env.sat": 0.08333333333333333, + "success_rate.epoch.env.science": 0.7654929577464789, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8587185277869014, + "success_rate.epoch.global": 0.8672687465790914, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9909336419753086, + "tokens_p.mean_in_band": 0.7723214285714286, + "tokens_rate.above_band": 0.9585798816568047, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.04142011834319527 + }, + { + "epoch": 0.39092458457605456, + "grad_norm": 155.28369210290896, + "learning_rate": 3.987669938956271e-07, + "loss": 0.4369, + "step": 1835, + "success_rate.epoch.env.abd": 0.9811320754716981, + "success_rate.epoch.env.agentgym:alfworld": 0.8952380952380953, + "success_rate.epoch.env.agentgym:sciworld": 0.9487179487179487, + "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9529411764705882, + "success_rate.epoch.env.logic": 0.9012567324955116, + "success_rate.epoch.env.math": 0.9558541266794626, + "success_rate.epoch.env.sat": 0.08333333333333333, + "success_rate.epoch.env.science": 0.7661516853932584, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8588716487966237, + "success_rate.epoch.global": 0.8676310043668122, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9961231203007519, + "tokens_p.mean_in_band": 0.5859375, + "tokens_rate.above_band": 0.9925373134328358, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.007462686567164179 + }, + { + "epoch": 0.391989774179804, + "grad_norm": 115.30390205213794, + "learning_rate": 3.9875856665553033e-07, + "loss": 0.2586, + "step": 1840, + "success_rate.epoch.env.abd": 0.98125, + "success_rate.epoch.env.agentgym:alfworld": 0.8952380952380953, + "success_rate.epoch.env.agentgym:sciworld": 0.9491525423728814, + "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9532163742690059, + "success_rate.epoch.env.logic": 0.9017857142857143, + "success_rate.epoch.env.math": 0.9558964525407478, + "success_rate.epoch.env.sat": 0.08333333333333333, + "success_rate.epoch.env.science": 0.7659425367904695, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8589798192539283, + "success_rate.epoch.global": 0.8677191072400653, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9444444444444445, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.996875, + "tokens_p.mean_in_band": 0.72109375, + "tokens_rate.above_band": 0.9811320754716981, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.018867924528301886 + }, + { + "epoch": 0.3930549637835535, + "grad_norm": 206.94307274182046, + "learning_rate": 3.987501111684688e-07, + "loss": 0.4904, + "step": 1845, + "success_rate.epoch.env.abd": 0.98125, + "success_rate.epoch.env.agentgym:alfworld": 0.8952380952380953, + "success_rate.epoch.env.agentgym:sciworld": 0.9491525423728814, + "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9532163742690059, + "success_rate.epoch.env.logic": 0.9019607843137255, + "success_rate.epoch.env.math": 0.9559808612440192, + "success_rate.epoch.env.sat": 0.08333333333333333, + "success_rate.epoch.env.science": 0.7656903765690377, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8589804845730057, + "success_rate.epoch.global": 0.8675352877307275, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.7142857142857143, + "success_rate.window.env_macro_mean": 0.9047619047619048, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9902083333333334, + "tokens_p.mean_in_band": 0.623046875, + "tokens_rate.above_band": 0.9375, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0625 + }, + { + "epoch": 0.39412015338730294, + "grad_norm": 76.81925583778981, + "learning_rate": 3.987416274406047e-07, + "loss": 0.3629, + "step": 1850, + "success_rate.epoch.env.abd": 0.9813664596273292, + "success_rate.epoch.env.agentgym:alfworld": 0.8962264150943396, + "success_rate.epoch.env.agentgym:sciworld": 0.9491525423728814, + "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9534883720930233, + "success_rate.epoch.env.logic": 0.9024822695035462, + "success_rate.epoch.env.math": 0.9561068702290076, + "success_rate.epoch.env.sat": 0.08333333333333333, + "success_rate.epoch.env.science": 0.7658536585365854, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8591793530684554, + "success_rate.epoch.global": 0.8678938819707634, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9993181818181818, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 0.3951853429910524, + "grad_norm": 116.64443385467045, + "learning_rate": 3.987331154781209e-07, + "loss": 0.3234, + "step": 1855, + "success_rate.epoch.env.abd": 0.9814814814814815, + "success_rate.epoch.env.agentgym:alfworld": 0.8962264150943396, + "success_rate.epoch.env.agentgym:sciworld": 0.9495798319327731, + "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9534883720930233, + "success_rate.epoch.env.logic": 0.9028268551236749, + "success_rate.epoch.env.math": 0.956232159847764, + "success_rate.epoch.env.sat": 0.08333333333333333, + "success_rate.epoch.env.science": 0.7663421418636995, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8593157776120957, + "success_rate.epoch.global": 0.8682505399568035, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9944444444444445, + "tokens_p.mean_in_band": 0.7317708333333334, + "tokens_rate.above_band": 0.9836065573770492, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.01639344262295082 + }, + { + "epoch": 0.39625053259480186, + "grad_norm": 75.61262444922384, + "learning_rate": 3.987245752872209e-07, + "loss": 0.2077, + "step": 1860, + "success_rate.epoch.env.abd": 0.9814814814814815, + "success_rate.epoch.env.agentgym:alfworld": 0.897196261682243, + "success_rate.epoch.env.agentgym:sciworld": 0.9504132231404959, + "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.953757225433526, + "success_rate.epoch.env.logic": 0.9028268551236749, + "success_rate.epoch.env.math": 0.9563567362428842, + "success_rate.epoch.env.sat": 0.08333333333333333, + "success_rate.epoch.env.science": 0.7661346287300486, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8594966098300592, + "success_rate.epoch.global": 0.8683360258481422, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9993592271293376, + "tokens_p.mean_in_band": 0.611083984375, + "tokens_rate.above_band": 0.9875389408099688, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.012461059190031152 + }, + { + "epoch": 0.3973157221985513, + "grad_norm": 100.09982518001627, + "learning_rate": 3.987160068741287e-07, + "loss": 0.2537, + "step": 1865, + "success_rate.epoch.env.abd": 0.9814814814814815, + "success_rate.epoch.env.agentgym:alfworld": 0.897196261682243, + "success_rate.epoch.env.agentgym:sciworld": 0.9504132231404959, + "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.953757225433526, + "success_rate.epoch.env.logic": 0.9035087719298246, + "success_rate.epoch.env.math": 0.9563981042654028, + "success_rate.epoch.env.sat": 0.08333333333333333, + "success_rate.epoch.env.science": 0.7669432918395575, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.859635877824439, + "success_rate.epoch.global": 0.8686895810955961, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9967261904761905, + "tokens_p.mean_in_band": 0.8515625, + "tokens_rate.above_band": 0.9929078014184397, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0070921985815602835 + }, + { + "epoch": 0.3983809118023008, + "grad_norm": 78.34741176743526, + "learning_rate": 3.987074102450889e-07, + "loss": 0.2116, + "step": 1870, + "success_rate.epoch.env.abd": 0.9817073170731707, + "success_rate.epoch.env.agentgym:alfworld": 0.897196261682243, + "success_rate.epoch.env.agentgym:sciworld": 0.9504132231404959, + "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9540229885057471, + "success_rate.epoch.env.logic": 0.9040139616055847, + "success_rate.epoch.env.math": 0.956480605487228, + "success_rate.epoch.env.sat": 0.08333333333333333, + "success_rate.epoch.env.science": 0.7672651933701657, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.859763258832812, + "success_rate.epoch.global": 0.8690412426352437, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9945195895522388, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 0.3994461014060503, + "grad_norm": 101.8501578816676, + "learning_rate": 3.986987854063667e-07, + "loss": 0.4776, + "step": 1875, + "success_rate.epoch.env.abd": 0.9817073170731707, + "success_rate.epoch.env.agentgym:alfworld": 0.897196261682243, + "success_rate.epoch.env.agentgym:sciworld": 0.9504132231404959, + "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9540229885057471, + "success_rate.epoch.env.logic": 0.9043478260869565, + "success_rate.epoch.env.math": 0.9565627950897073, + "success_rate.epoch.env.sat": 0.08, + "success_rate.epoch.env.science": 0.7673778389538886, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8595082921359247, + "success_rate.epoch.global": 0.8688568376068376, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.7, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9922326589595376, + "tokens_p.mean_in_band": 0.5869565217391305, + "tokens_rate.above_band": 0.8826530612244898, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.11734693877551021 + }, + { + "epoch": 0.40051129100979976, + "grad_norm": 92.16111321999547, + "learning_rate": 3.9869013236424776e-07, + "loss": 0.3388, + "step": 1880, + "success_rate.epoch.env.abd": 0.9817073170731707, + "success_rate.epoch.env.agentgym:alfworld": 0.897196261682243, + "success_rate.epoch.env.agentgym:sciworld": 0.9512195121951219, + "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9545454545454546, + "success_rate.epoch.env.logic": 0.9045138888888888, + "success_rate.epoch.env.math": 0.9566037735849057, + "success_rate.epoch.env.sat": 0.08, + "success_rate.epoch.env.science": 0.7673301304049417, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8596435728488809, + "success_rate.epoch.global": 0.8689397975492807, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9979385018495684, + "tokens_p.mean_in_band": 0.6731770833333334, + "tokens_rate.above_band": 0.9963144963144963, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0036855036855036856 + }, + { + "epoch": 0.4015764806135492, + "grad_norm": 75.80936004452577, + "learning_rate": 3.986814511250384e-07, + "loss": 0.3434, + "step": 1885, + "success_rate.epoch.env.abd": 0.9817073170731707, + "success_rate.epoch.env.agentgym:alfworld": 0.8981481481481481, + "success_rate.epoch.env.agentgym:sciworld": 0.9512195121951219, + "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9545454545454546, + "success_rate.epoch.env.logic": 0.9045138888888888, + "success_rate.epoch.env.math": 0.9566446748350612, + "success_rate.epoch.env.sat": 0.08, + "success_rate.epoch.env.science": 0.76775956284153, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8597728655900307, + "success_rate.epoch.global": 0.8690223166843783, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8571428571428571, + "success_rate.window.env_macro_mean": 0.9642857142857143, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9965909090909091, + "tokens_p.mean_in_band": 0.53466796875, + "tokens_rate.above_band": 0.9649122807017544, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03508771929824561 + }, + { + "epoch": 0.4026416702172987, + "grad_norm": 207.99906922213967, + "learning_rate": 3.986727416950655e-07, + "loss": 0.3187, + "step": 1890, + "success_rate.epoch.env.abd": 0.9817073170731707, + "success_rate.epoch.env.agentgym:alfworld": 0.8981481481481481, + "success_rate.epoch.env.agentgym:sciworld": 0.9512195121951219, + "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9548022598870056, + "success_rate.epoch.env.logic": 0.9048442906574394, + "success_rate.epoch.env.math": 0.9559099437148217, + "success_rate.epoch.env.sat": 0.08, + "success_rate.epoch.env.science": 0.7680763983628922, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8597882575455966, + "success_rate.epoch.global": 0.8691043985161632, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.8, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9973730297723292, + "tokens_p.mean_in_band": 0.6372767857142857, + "tokens_rate.above_band": 0.9878892733564014, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.012110726643598616 + }, + { + "epoch": 0.40370685982104815, + "grad_norm": 133.45399617049515, + "learning_rate": 3.9866400408067625e-07, + "loss": 0.2572, + "step": 1895, + "success_rate.epoch.env.abd": 0.9817073170731707, + "success_rate.epoch.env.agentgym:alfworld": 0.8981481481481481, + "success_rate.epoch.env.agentgym:sciworld": 0.9512195121951219, + "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9548022598870056, + "success_rate.epoch.env.logic": 0.9051724137931034, + "success_rate.epoch.env.math": 0.9560747663551402, + "success_rate.epoch.env.sat": 0.08, + "success_rate.epoch.env.science": 0.7680272108843538, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8598285992090006, + "success_rate.epoch.global": 0.8691860465116279, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9930555555555556, + "tokens_p.mean_below_band": 1.3597309589385986e-07, + "tokens_p.mean_in_band": 0.7118055555555556, + "tokens_rate.above_band": 0.9212598425196851, + "tokens_rate.below_band": 0.007874015748031496, + "tokens_rate.in_band": 0.07086614173228346 + }, + { + "epoch": 0.4047720494247976, + "grad_norm": 37.14186426230828, + "learning_rate": 3.9865523828823873e-07, + "loss": 0.403, + "step": 1900, + "success_rate.epoch.env.abd": 0.9819277108433735, + "success_rate.epoch.env.agentgym:alfworld": 0.8981481481481481, + "success_rate.epoch.env.agentgym:sciworld": 0.952, + "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9548022598870056, + "success_rate.epoch.env.logic": 0.9051724137931034, + "success_rate.epoch.env.math": 0.9562383612662942, + "success_rate.epoch.env.sat": 0.08, + "success_rate.epoch.env.science": 0.7676630434782609, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8599013545799225, + "success_rate.epoch.global": 0.8692672641012125, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9966877880184332, + "tokens_p.mean_in_band": 0.51171875, + "tokens_rate.above_band": 0.9908675799086758, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0091324200913242 + }, + { + "epoch": 0.40583723902854707, + "grad_norm": 286.06375095639777, + "learning_rate": 3.9864644432414135e-07, + "loss": 0.3238, + "step": 1905, + "success_rate.epoch.env.abd": 0.9820359281437125, + "success_rate.epoch.env.agentgym:alfworld": 0.8981481481481481, + "success_rate.epoch.env.agentgym:sciworld": 0.952, + "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9548022598870056, + "success_rate.epoch.env.logic": 0.9051724137931034, + "success_rate.epoch.env.math": 0.9564007421150278, + "success_rate.epoch.env.sat": 0.08, + "success_rate.epoch.env.science": 0.7664184157075152, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8598128064324978, + "success_rate.epoch.global": 0.8688222923238696, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.4, + "success_rate.window.env_macro_mean": 0.7999999999999999, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9956808943089431, + "tokens_p.mean_below_band": 1.7415732145309448e-07, + "tokens_p.mean_in_band": 0.5652901785714286, + "tokens_rate.above_band": 0.8913043478260869, + "tokens_rate.below_band": 0.007246376811594203, + "tokens_rate.in_band": 0.10144927536231885 + }, + { + "epoch": 0.40690242863229653, + "grad_norm": 96.81548796544416, + "learning_rate": 3.9863762219479304e-07, + "loss": 0.1935, + "step": 1910, + "success_rate.epoch.env.abd": 0.9820359281437125, + "success_rate.epoch.env.agentgym:alfworld": 0.8990825688073395, + "success_rate.epoch.env.agentgym:sciworld": 0.9523809523809523, + "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9548022598870056, + "success_rate.epoch.env.logic": 0.9051724137931034, + "success_rate.epoch.env.math": 0.9565217391304348, + "success_rate.epoch.env.sat": 0.08, + "success_rate.epoch.env.science": 0.7672064777327935, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8600150275307551, + "success_rate.epoch.global": 0.8691662296801258, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9991776315789473, + "tokens_p.mean_in_band": 0.88671875, + "tokens_rate.above_band": 0.9973753280839895, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0026246719160104987 + }, + { + "epoch": 0.407967618236046, + "grad_norm": 346.6933265257248, + "learning_rate": 3.9862877190662336e-07, + "loss": 0.4523, + "step": 1915, + "success_rate.epoch.env.abd": 0.9821428571428571, + "success_rate.epoch.env.agentgym:alfworld": 0.8990825688073395, + "success_rate.epoch.env.agentgym:sciworld": 0.9523809523809523, + "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9548022598870056, + "success_rate.epoch.env.logic": 0.9053356282271945, + "success_rate.epoch.env.math": 0.9566820276497696, + "success_rate.epoch.env.sat": 0.08, + "success_rate.epoch.env.science": 0.7671601615074024, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8600499471423166, + "success_rate.epoch.global": 0.8692468619246861, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9375, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9956521739130435, + "tokens_p.mean_in_band": 0.5880681818181818, + "tokens_rate.above_band": 0.9126984126984127, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0873015873015873 + }, + { + "epoch": 0.4090328078397955, + "grad_norm": 295.83045843893126, + "learning_rate": 3.9861989346608225e-07, + "loss": 0.4063, + "step": 1920, + "success_rate.epoch.env.abd": 0.9822485207100592, + "success_rate.epoch.env.agentgym:alfworld": 0.8990825688073395, + "success_rate.epoch.env.agentgym:sciworld": 0.9534883720930233, + "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9548022598870056, + "success_rate.epoch.env.logic": 0.9053356282271945, + "success_rate.epoch.env.math": 0.9568411386593205, + "success_rate.epoch.env.sat": 0.08, + "success_rate.epoch.env.science": 0.7674731182795699, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.860203142693316, + "success_rate.epoch.global": 0.8695878977569118, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 1.0004023913543343, + "tokens_p.mean_in_band": 0.7890625, + "tokens_rate.above_band": 0.9993106617647058, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0006893382352941177 + }, + { + "epoch": 0.41009799744354497, + "grad_norm": 50.90770892944702, + "learning_rate": 3.9861098687964035e-07, + "loss": 0.2016, + "step": 1925, + "success_rate.epoch.env.abd": 0.9822485207100592, + "success_rate.epoch.env.agentgym:alfworld": 0.9, + "success_rate.epoch.env.agentgym:sciworld": 0.9534883720930233, + "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9550561797752809, + "success_rate.epoch.env.logic": 0.9056603773584906, + "success_rate.epoch.env.math": 0.9569202566452796, + "success_rate.epoch.env.sat": 0.08, + "success_rate.epoch.env.science": 0.7680965147453083, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8604030167536731, + "success_rate.epoch.global": 0.8699271592091571, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9994131455399061, + "tokens_p.mean_in_band": 0.8333333333333334, + "tokens_rate.above_band": 0.993006993006993, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.006993006993006993 + }, + { + "epoch": 0.41116318704729443, + "grad_norm": 42.168066538962435, + "learning_rate": 3.986020521537887e-07, + "loss": 0.2325, + "step": 1930, + "success_rate.epoch.env.abd": 0.9822485207100592, + "success_rate.epoch.env.agentgym:alfworld": 0.9, + "success_rate.epoch.env.agentgym:sciworld": 0.9534883720930233, + "success_rate.epoch.env.agentgym:textcraft": 0.9642857142857143, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9560439560439561, + "success_rate.epoch.env.logic": 0.905982905982906, + "success_rate.epoch.env.math": 0.956959706959707, + "success_rate.epoch.env.sat": 0.08, + "success_rate.epoch.env.science": 0.768251841929002, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8606600925458516, + "success_rate.epoch.global": 0.8702646600934094, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9995320720356131, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 0.4122283766510439, + "grad_norm": 145.53337290937844, + "learning_rate": 3.985930892950388e-07, + "loss": 0.2996, + "step": 1935, + "success_rate.epoch.env.abd": 0.9823529411764705, + "success_rate.epoch.env.agentgym:alfworld": 0.9, + "success_rate.epoch.env.agentgym:sciworld": 0.9538461538461539, + "success_rate.epoch.env.agentgym:textcraft": 0.9642857142857143, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9560439560439561, + "success_rate.epoch.env.logic": 0.9063032367972743, + "success_rate.epoch.env.math": 0.9570383912248629, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.768561872909699, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8604868493824734, + "success_rate.epoch.global": 0.8703416149068323, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8571428571428571, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9954268292682927, + "tokens_p.mean_in_band": 0.5817307692307693, + "tokens_rate.above_band": 0.9403669724770642, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.05963302752293578 + }, + { + "epoch": 0.41329356625479335, + "grad_norm": 21.038500967777555, + "learning_rate": 3.985840983099228e-07, + "loss": 0.2493, + "step": 1940, + "success_rate.epoch.env.abd": 0.9824561403508771, + "success_rate.epoch.env.agentgym:alfworld": 0.9009009009009009, + "success_rate.epoch.env.agentgym:sciworld": 0.9538461538461539, + "success_rate.epoch.env.agentgym:textcraft": 0.9642857142857143, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9560439560439561, + "success_rate.epoch.env.logic": 0.9049235993208828, + "success_rate.epoch.env.math": 0.9571948998178507, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.7688710754843019, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8604950469976104, + "success_rate.epoch.global": 0.8704181724315952, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9975743447580645, + "tokens_p.mean_in_band": 0.5204190340909091, + "tokens_rate.above_band": 0.9783037475345168, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.021696252465483234 + }, + { + "epoch": 0.4143587558585428, + "grad_norm": 178.0250680459173, + "learning_rate": 3.9857507920499315e-07, + "loss": 0.2773, + "step": 1945, + "success_rate.epoch.env.abd": 0.9826589595375722, + "success_rate.epoch.env.agentgym:alfworld": 0.9009009009009009, + "success_rate.epoch.env.agentgym:sciworld": 0.9538461538461539, + "success_rate.epoch.env.agentgym:textcraft": 0.9642857142857143, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9560439560439561, + "success_rate.epoch.env.logic": 0.9054054054054054, + "success_rate.epoch.env.math": 0.9572727272727273, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.768, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8604851722014097, + "success_rate.epoch.global": 0.870236869207003, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.3333333333333333, + "success_rate.window.env_macro_mean": 0.8333333333333334, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9960526315789474, + "tokens_p.mean_in_band": 0.44091796875, + "tokens_rate.above_band": 0.9223300970873787, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.07766990291262135 + }, + { + "epoch": 0.4154239454622923, + "grad_norm": 137.5432836863038, + "learning_rate": 3.9856603198682303e-07, + "loss": 0.3469, + "step": 1950, + "success_rate.epoch.env.abd": 0.9827586206896551, + "success_rate.epoch.env.agentgym:alfworld": 0.9009009009009009, + "success_rate.epoch.env.agentgym:sciworld": 0.9538461538461539, + "success_rate.epoch.env.agentgym:textcraft": 0.9642857142857143, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9562841530054644, + "success_rate.epoch.env.logic": 0.9054054054054054, + "success_rate.epoch.env.math": 0.957427536231884, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.7672872340425532, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8604653450300734, + "success_rate.epoch.global": 0.8700564971751412, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9958465189873418, + "tokens_p.mean_in_band": 0.4979440789473684, + "tokens_rate.above_band": 0.9432835820895522, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.056716417910447764 + }, + { + "epoch": 0.41648913506604174, + "grad_norm": 28.793385428920907, + "learning_rate": 3.9855695666200597e-07, + "loss": 0.2335, + "step": 1955, + "success_rate.epoch.env.abd": 0.9830508474576272, + "success_rate.epoch.env.agentgym:alfworld": 0.9009009009009009, + "success_rate.epoch.env.agentgym:sciworld": 0.9538461538461539, + "success_rate.epoch.env.agentgym:textcraft": 0.9642857142857143, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9567567567567568, + "success_rate.epoch.env.logic": 0.9054054054054054, + "success_rate.epoch.env.math": 0.9575045207956601, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.7670869276708693, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8605236640038331, + "success_rate.epoch.global": 0.8701331967213115, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9975110619469026, + "tokens_p.mean_in_band": 0.64375, + "tokens_rate.above_band": 0.9826086956521739, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.017391304347826087 + }, + { + "epoch": 0.4175543246697912, + "grad_norm": 88.79062517186233, + "learning_rate": 3.9854785323715596e-07, + "loss": 0.3732, + "step": 1960, + "success_rate.epoch.env.abd": 0.9830508474576272, + "success_rate.epoch.env.agentgym:alfworld": 0.9017857142857143, + "success_rate.epoch.env.agentgym:sciworld": 0.9541984732824428, + "success_rate.epoch.env.agentgym:textcraft": 0.9642857142857143, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9567567567567568, + "success_rate.epoch.env.logic": 0.9040404040404041, + "success_rate.epoch.env.math": 0.957542908762421, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.7671957671957672, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8605254239081749, + "success_rate.epoch.global": 0.8699540112416965, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.86, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9991261185682326, + "tokens_p.mean_in_band": 0.5973307291666666, + "tokens_rate.above_band": 0.9612903225806452, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03870967741935484 + }, + { + "epoch": 0.4186195142735407, + "grad_norm": 52.39104396987938, + "learning_rate": 3.985387217189075e-07, + "loss": 0.2466, + "step": 1965, + "success_rate.epoch.env.abd": 0.9830508474576272, + "success_rate.epoch.env.agentgym:alfworld": 0.9017857142857143, + "success_rate.epoch.env.agentgym:sciworld": 0.9541984732824428, + "success_rate.epoch.env.agentgym:textcraft": 0.9642857142857143, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9567567567567568, + "success_rate.epoch.env.logic": 0.9046822742474916, + "success_rate.epoch.env.math": 0.9566787003610109, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.7678100263852242, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8605610530895508, + "success_rate.epoch.global": 0.8700305810397554, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 0.75, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9977973568281938, + "tokens_p.mean_in_band": 0.4231770833333333, + "tokens_rate.above_band": 0.9934354485776805, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.006564551422319475 + }, + { + "epoch": 0.4196847038772902, + "grad_norm": 65.68878592882285, + "learning_rate": 3.985295621139156e-07, + "loss": 0.2321, + "step": 1970, + "success_rate.epoch.env.abd": 0.9830508474576272, + "success_rate.epoch.env.agentgym:alfworld": 0.9017857142857143, + "success_rate.epoch.env.agentgym:sciworld": 0.9541984732824428, + "success_rate.epoch.env.agentgym:textcraft": 0.9642857142857143, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9518716577540107, + "success_rate.epoch.env.logic": 0.9048414023372288, + "success_rate.epoch.env.math": 0.9567567567567568, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.7685733070348455, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8602079045561287, + "success_rate.epoch.global": 0.8701067615658363, + "success_rate.window.env.ded": 0.5, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.995149534629072, + "tokens_p.mean_below_band": 1.525040715932846e-08, + "tokens_p.mean_in_band": 0.5329491725768322, + "tokens_rate.above_band": 0.8960019622271278, + "tokens_rate.below_band": 0.0002452783909737552, + "tokens_rate.in_band": 0.10375275938189846 + }, + { + "epoch": 0.42074989348103964, + "grad_norm": 102.3532420465388, + "learning_rate": 3.985203744288558e-07, + "loss": 0.2261, + "step": 1975, + "success_rate.epoch.env.abd": 0.9831460674157303, + "success_rate.epoch.env.agentgym:alfworld": 0.9017857142857143, + "success_rate.epoch.env.agentgym:sciworld": 0.9541984732824428, + "success_rate.epoch.env.agentgym:textcraft": 0.9642857142857143, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9518716577540107, + "success_rate.epoch.env.logic": 0.9051580698835274, + "success_rate.epoch.env.math": 0.95695067264574, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.7688772160210111, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8602906056815427, + "success_rate.epoch.global": 0.8704361054766734, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9893973214285714, + "tokens_p.mean_in_band": 0.751953125, + "tokens_rate.above_band": 0.9824561403508771, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.017543859649122806 + }, + { + "epoch": 0.4218150830847891, + "grad_norm": 433.8337714904573, + "learning_rate": 3.985111586704238e-07, + "loss": 0.3813, + "step": 1980, + "success_rate.epoch.env.abd": 0.9831460674157303, + "success_rate.epoch.env.agentgym:alfworld": 0.9017857142857143, + "success_rate.epoch.env.agentgym:sciworld": 0.9541984732824428, + "success_rate.epoch.env.agentgym:textcraft": 0.9642857142857143, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9521276595744681, + "success_rate.epoch.env.logic": 0.9051580698835274, + "success_rate.epoch.env.math": 0.9571810883140054, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.7680209698558322, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8602569848927737, + "success_rate.epoch.global": 0.870257966616085, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.3333333333333333, + "success_rate.window.env_macro_mean": 0.7777777777777777, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9976154618473896, + "tokens_p.mean_in_band": 0.4341517857142857, + "tokens_rate.above_band": 0.97265625, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02734375 + }, + { + "epoch": 0.42288027268853856, + "grad_norm": 75.35711064317096, + "learning_rate": 3.9850191484533616e-07, + "loss": 0.296, + "step": 1985, + "success_rate.epoch.env.abd": 0.9832402234636871, + "success_rate.epoch.env.agentgym:alfworld": 0.9017857142857143, + "success_rate.epoch.env.agentgym:sciworld": 0.9541984732824428, + "success_rate.epoch.env.agentgym:textcraft": 0.9642857142857143, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9523809523809523, + "success_rate.epoch.env.logic": 0.9051580698835274, + "success_rate.epoch.env.math": 0.9572953736654805, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.768125408229915, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8603084551273191, + "success_rate.epoch.global": 0.8703329969727548, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9938350340136054, + "tokens_p.mean_in_band": 0.65078125, + "tokens_rate.above_band": 0.9671052631578947, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03289473684210526 + }, + { + "epoch": 0.423945462292288, + "grad_norm": 105.79826561046573, + "learning_rate": 3.9849264296032966e-07, + "loss": 0.3038, + "step": 1990, + "success_rate.epoch.env.abd": 0.9832402234636871, + "success_rate.epoch.env.agentgym:alfworld": 0.9017857142857143, + "success_rate.epoch.env.agentgym:sciworld": 0.9545454545454546, + "success_rate.epoch.env.agentgym:textcraft": 0.9642857142857143, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9526315789473684, + "success_rate.epoch.env.logic": 0.9039735099337748, + "success_rate.epoch.env.math": 0.9573712255772646, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.7679269882659713, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.860243953293457, + "success_rate.epoch.global": 0.8701560140915954, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.6666666666666666, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.8666666666666666, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9973378112712975, + "tokens_p.mean_in_band": 0.55921875, + "tokens_rate.above_band": 0.9838813668600903, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.016118633139909737 + }, + { + "epoch": 0.4250106518960375, + "grad_norm": 583.0877240908128, + "learning_rate": 3.984833430221616e-07, + "loss": 0.4396, + "step": 1995, + "success_rate.epoch.env.abd": 0.9833333333333333, + "success_rate.epoch.env.agentgym:alfworld": 0.9017857142857143, + "success_rate.epoch.env.agentgym:sciworld": 0.9545454545454546, + "success_rate.epoch.env.agentgym:textcraft": 0.9642857142857143, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9526315789473684, + "success_rate.epoch.env.logic": 0.9041322314049587, + "success_rate.epoch.env.math": 0.9574090505767524, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.7683322517845554, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8603071278260844, + "success_rate.epoch.global": 0.8702309236947792, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8571428571428571, + "success_rate.window.env_macro_mean": 0.9642857142857143, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9949596774193549, + "tokens_p.mean_in_band": 0.6495361328125, + "tokens_rate.above_band": 0.9323308270676691, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.06766917293233082 + }, + { + "epoch": 0.42607584149978694, + "grad_norm": 226.80312272086525, + "learning_rate": 3.984740150376097e-07, + "loss": 0.4215, + "step": 2000, + "success_rate.epoch.env.abd": 0.9833333333333333, + "success_rate.epoch.env.agentgym:alfworld": 0.9017857142857143, + "success_rate.epoch.env.agentgym:sciworld": 0.9545454545454546, + "success_rate.epoch.env.agentgym:textcraft": 0.9642857142857143, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9526315789473684, + "success_rate.epoch.env.logic": 0.9044481054365733, + "success_rate.epoch.env.math": 0.9575596816976127, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.7689320388349514, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8604040634808908, + "success_rate.epoch.global": 0.870555833750626, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9940308988764045, + "tokens_p.mean_in_band": 0.78125, + "tokens_rate.above_band": 0.9888888888888889, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.011111111111111112 + }, + { + "epoch": 0.4271410311035364, + "grad_norm": 85.21569872481132, + "learning_rate": 3.984646590134721e-07, + "loss": 0.3414, + "step": 2005, + "success_rate.epoch.env.abd": 0.9833333333333333, + "success_rate.epoch.env.agentgym:alfworld": 0.9017857142857143, + "success_rate.epoch.env.agentgym:sciworld": 0.9545454545454546, + "success_rate.epoch.env.agentgym:textcraft": 0.9642857142857143, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9528795811518325, + "success_rate.epoch.env.logic": 0.9047619047619048, + "success_rate.epoch.env.math": 0.9577092511013215, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.7680878552971576, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8603919896077737, + "success_rate.epoch.global": 0.8703796203796204, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.3333333333333333, + "success_rate.window.env_macro_mean": 0.8333333333333333, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9957356076759062, + "tokens_p.mean_in_band": 0.5249467329545454, + "tokens_rate.above_band": 0.9770833333333333, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.022916666666666665 + }, + { + "epoch": 0.4282062207072859, + "grad_norm": 39.98575523225476, + "learning_rate": 3.9845527495656743e-07, + "loss": 0.2913, + "step": 2010, + "success_rate.epoch.env.abd": 0.9833333333333333, + "success_rate.epoch.env.agentgym:alfworld": 0.9017857142857143, + "success_rate.epoch.env.agentgym:sciworld": 0.9545454545454546, + "success_rate.epoch.env.agentgym:textcraft": 0.9655172413793104, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.953125, + "success_rate.epoch.env.logic": 0.9052287581699346, + "success_rate.epoch.env.math": 0.9577464788732394, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.7680412371134021, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8605678449657698, + "success_rate.epoch.global": 0.8704534130543099, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9972998271889401, + "tokens_p.mean_in_band": 0.6315104166666666, + "tokens_rate.above_band": 0.9863636363636363, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.013636363636363636 + }, + { + "epoch": 0.4292714103110354, + "grad_norm": 136.67788041878998, + "learning_rate": 3.9844586287373476e-07, + "loss": 0.2097, + "step": 2015, + "success_rate.epoch.env.abd": 0.9833333333333333, + "success_rate.epoch.env.agentgym:alfworld": 0.9026548672566371, + "success_rate.epoch.env.agentgym:sciworld": 0.9545454545454546, + "success_rate.epoch.env.agentgym:textcraft": 0.9655172413793104, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.953125, + "success_rate.epoch.env.logic": 0.9052287581699346, + "success_rate.epoch.env.math": 0.957931638913234, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.7686375321336761, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8607179002413325, + "success_rate.epoch.global": 0.8707753479125249, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9965861344537815, + "tokens_p.mean_in_band": 0.7521701388888888, + "tokens_rate.above_band": 0.9814432989690721, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.018556701030927835 + }, + { + "epoch": 0.43033659991478485, + "grad_norm": 45.39873717022019, + "learning_rate": 3.984364227718334e-07, + "loss": 0.3669, + "step": 2020, + "success_rate.epoch.env.abd": 0.9834254143646409, + "success_rate.epoch.env.agentgym:alfworld": 0.9026548672566371, + "success_rate.epoch.env.agentgym:sciworld": 0.9545454545454546, + "success_rate.epoch.env.agentgym:textcraft": 0.9655172413793104, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9533678756476683, + "success_rate.epoch.env.logic": 0.9055374592833876, + "success_rate.epoch.env.math": 0.958041958041958, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.7689345314505777, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.860813443535701, + "success_rate.epoch.global": 0.8710956866633615, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9992419137466307, + "tokens_p.mean_in_band": 0.6328125, + "tokens_rate.above_band": 0.9973118279569892, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.002688172043010753 + }, + { + "epoch": 0.4314017895185343, + "grad_norm": 76.85691562348057, + "learning_rate": 3.984269546577434e-07, + "loss": 0.5235, + "step": 2025, + "success_rate.epoch.env.abd": 0.9836065573770492, + "success_rate.epoch.env.agentgym:alfworld": 0.9026548672566371, + "success_rate.epoch.env.agentgym:sciworld": 0.9545454545454546, + "success_rate.epoch.env.agentgym:textcraft": 0.9655172413793104, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9533678756476683, + "success_rate.epoch.env.logic": 0.9056910569105691, + "success_rate.epoch.env.math": 0.958151700087184, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.7676056338028169, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8607330421754333, + "success_rate.epoch.global": 0.8706726013847675, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.25, + "success_rate.window.env_macro_mean": 0.8125, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9949664429530202, + "tokens_p.mean_in_band": 0.6236298301003196, + "tokens_rate.above_band": 0.93125, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.06875 + }, + { + "epoch": 0.43246697912228377, + "grad_norm": 69.19712439485015, + "learning_rate": 3.98417458538365e-07, + "loss": 0.3394, + "step": 2030, + "success_rate.epoch.env.abd": 0.9836065573770492, + "success_rate.epoch.env.agentgym:alfworld": 0.9026548672566371, + "success_rate.epoch.env.agentgym:sciworld": 0.9545454545454546, + "success_rate.epoch.env.agentgym:textcraft": 0.9655172413793104, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9540816326530612, + "success_rate.epoch.env.logic": 0.9059967585089141, + "success_rate.epoch.env.math": 0.9582608695652174, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.7679028132992327, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8608626610461776, + "success_rate.epoch.global": 0.8709916132215096, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9979368932038835, + "tokens_p.mean_in_band": 0.89453125, + "tokens_rate.above_band": 0.9995147986414362, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.00048520135856380397 + }, + { + "epoch": 0.43353216872603323, + "grad_norm": 229.1415625937778, + "learning_rate": 3.984079344206189e-07, + "loss": 0.337, + "step": 2035, + "success_rate.epoch.env.abd": 0.9836065573770492, + "success_rate.epoch.env.agentgym:alfworld": 0.9026548672566371, + "success_rate.epoch.env.agentgym:sciworld": 0.9545454545454546, + "success_rate.epoch.env.agentgym:textcraft": 0.9655172413793104, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9540816326530612, + "success_rate.epoch.env.logic": 0.9063004846526656, + "success_rate.epoch.env.math": 0.9582971329278888, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.7683004455760662, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8609297175719282, + "success_rate.epoch.global": 0.8710629921259843, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8571428571428571, + "success_rate.window.env_macro_mean": 0.9523809523809524, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9982, + "tokens_p.mean_in_band": 0.7639508928571429, + "tokens_rate.above_band": 0.9889240506329114, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.011075949367088608 + }, + { + "epoch": 0.4345973583297827, + "grad_norm": 47.878039828477796, + "learning_rate": 3.983983823114462e-07, + "loss": 0.1873, + "step": 2040, + "success_rate.epoch.env.abd": 0.9836065573770492, + "success_rate.epoch.env.agentgym:alfworld": 0.9026548672566371, + "success_rate.epoch.env.agentgym:sciworld": 0.9548872180451128, + "success_rate.epoch.env.agentgym:textcraft": 0.9655172413793104, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9540816326530612, + "success_rate.epoch.env.logic": 0.9066022544283414, + "success_rate.epoch.env.math": 0.9584775086505191, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.7679593134138589, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.860973606375179, + "success_rate.epoch.global": 0.8711340206185567, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9962993421052632, + "tokens_p.mean_in_band": 0.453125, + "tokens_rate.above_band": 0.9743589743589743, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02564102564102564 + }, + { + "epoch": 0.43566254793353215, + "grad_norm": 734.933175739297, + "learning_rate": 3.983888022178084e-07, + "loss": 0.4017, + "step": 2045, + "success_rate.epoch.env.abd": 0.9836065573770492, + "success_rate.epoch.env.agentgym:alfworld": 0.9026548672566371, + "success_rate.epoch.env.agentgym:sciworld": 0.9548872180451128, + "success_rate.epoch.env.agentgym:textcraft": 0.9655172413793104, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9540816326530612, + "success_rate.epoch.env.logic": 0.9066022544283414, + "success_rate.epoch.env.math": 0.9585849870578085, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.7683544303797468, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8610192968636496, + "success_rate.epoch.global": 0.8712047012732616, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8571428571428571, + "success_rate.window.env_macro_mean": 0.9285714285714286, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9895833333333334, + "tokens_p.mean_in_band": 0.587890625, + "tokens_rate.above_band": 0.9310344827586207, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.06896551724137931 + }, + { + "epoch": 0.4367277375372816, + "grad_norm": 62.88577151831177, + "learning_rate": 3.983791941466874e-07, + "loss": 0.495, + "step": 2050, + "success_rate.epoch.env.abd": 0.9836065573770492, + "success_rate.epoch.env.agentgym:alfworld": 0.9026548672566371, + "success_rate.epoch.env.agentgym:sciworld": 0.9548872180451128, + "success_rate.epoch.env.agentgym:textcraft": 0.9655172413793104, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9543147208121827, + "success_rate.epoch.env.logic": 0.9067524115755627, + "success_rate.epoch.env.math": 0.9577950043066322, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.7692307692307693, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8610619879005758, + "success_rate.epoch.global": 0.8712750366389839, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.5, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9964622641509434, + "tokens_p.mean_in_band": 0.65625, + "tokens_rate.above_band": 0.9754601226993865, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.024539877300613498 + }, + { + "epoch": 0.43779292714103113, + "grad_norm": 26.450002652779286, + "learning_rate": 3.983695581050855e-07, + "loss": 0.2639, + "step": 2055, + "success_rate.epoch.env.abd": 0.9836065573770492, + "success_rate.epoch.env.agentgym:alfworld": 0.9026548672566371, + "success_rate.epoch.env.agentgym:sciworld": 0.9552238805970149, + "success_rate.epoch.env.agentgym:textcraft": 0.9655172413793104, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9543147208121827, + "success_rate.epoch.env.logic": 0.9072, + "success_rate.epoch.env.math": 0.9570077386070507, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.769811320754717, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8611144912460944, + "success_rate.epoch.global": 0.8713450292397661, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.5, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9964953271028038, + "tokens_p.mean_in_band": 0.6625, + "tokens_rate.above_band": 0.9553571428571429, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.044642857142857144 + }, + { + "epoch": 0.4388581167447806, + "grad_norm": 91.69596708735087, + "learning_rate": 3.983598941000254e-07, + "loss": 0.6407, + "step": 2060, + "success_rate.epoch.env.abd": 0.9836065573770492, + "success_rate.epoch.env.agentgym:alfworld": 0.9026548672566371, + "success_rate.epoch.env.agentgym:sciworld": 0.9552238805970149, + "success_rate.epoch.env.agentgym:textcraft": 0.9655172413793104, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9543147208121827, + "success_rate.epoch.env.logic": 0.9059011164274322, + "success_rate.epoch.env.math": 0.9571550985432733, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.7697616060225847, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8610052877580511, + "success_rate.epoch.global": 0.8711716091395236, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.75, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9921875, + "tokens_p.mean_in_band": 0.6252604166666667, + "tokens_rate.above_band": 0.9142857142857143, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.08571428571428572 + }, + { + "epoch": 0.43992330634853005, + "grad_norm": 91.43104965971058, + "learning_rate": 3.983502021385502e-07, + "loss": 0.2632, + "step": 2065, + "success_rate.epoch.env.abd": 0.9836956521739131, + "success_rate.epoch.env.agentgym:alfworld": 0.9026548672566371, + "success_rate.epoch.env.agentgym:sciworld": 0.9552238805970149, + "success_rate.epoch.env.agentgym:textcraft": 0.9666666666666667, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9545454545454546, + "success_rate.epoch.env.logic": 0.9060509554140127, + "success_rate.epoch.env.math": 0.9563356164383562, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.7698561601000625, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8610865754650177, + "success_rate.epoch.global": 0.8709990300678953, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.7999999999999999, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9983701814058957, + "tokens_p.mean_in_band": 0.4559659090909091, + "tokens_rate.above_band": 0.9756637168141593, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.024336283185840708 + }, + { + "epoch": 0.4409884959522795, + "grad_norm": 265.90081511867646, + "learning_rate": 3.983404822277232e-07, + "loss": 0.3087, + "step": 2070, + "success_rate.epoch.env.abd": 0.9837837837837838, + "success_rate.epoch.env.agentgym:alfworld": 0.8947368421052632, + "success_rate.epoch.env.agentgym:sciworld": 0.9552238805970149, + "success_rate.epoch.env.agentgym:textcraft": 0.9666666666666667, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9545454545454546, + "success_rate.epoch.env.logic": 0.9049128367670365, + "success_rate.epoch.env.math": 0.9563729683490163, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.7704304429195259, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8603269047869854, + "success_rate.epoch.global": 0.8708272859216255, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 0.0, + "success_rate.window.env.logic": 0.6666666666666666, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.7333333333333333, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9992339121552605, + "tokens_p.mean_in_band": 0.5140086206896551, + "tokens_rate.above_band": 0.9712301587301587, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.028769841269841268 + }, + { + "epoch": 0.442053685556029, + "grad_norm": 272.6197297841009, + "learning_rate": 3.983307343746283e-07, + "loss": 0.3549, + "step": 2075, + "success_rate.epoch.env.abd": 0.9837837837837838, + "success_rate.epoch.env.agentgym:alfworld": 0.8947368421052632, + "success_rate.epoch.env.agentgym:sciworld": 0.9552238805970149, + "success_rate.epoch.env.agentgym:textcraft": 0.9666666666666667, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9547738693467337, + "success_rate.epoch.env.logic": 0.9034810126582279, + "success_rate.epoch.env.math": 0.9565217391304348, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.7703795892968264, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8602264055007297, + "success_rate.epoch.global": 0.8706563706563707, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.6875, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9996594551282051, + "tokens_p.mean_in_band": 0.48061342592592593, + "tokens_rate.above_band": 0.9829867674858223, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.017013232514177693 + }, + { + "epoch": 0.44311887515977844, + "grad_norm": 136.09696670608895, + "learning_rate": 3.983209585863696e-07, + "loss": 0.2975, + "step": 2080, + "success_rate.epoch.env.abd": 0.9838709677419355, + "success_rate.epoch.env.agentgym:alfworld": 0.8947368421052632, + "success_rate.epoch.env.agentgym:sciworld": 0.9552238805970149, + "success_rate.epoch.env.agentgym:textcraft": 0.9666666666666667, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9547738693467337, + "success_rate.epoch.env.logic": 0.9036334913112164, + "success_rate.epoch.env.math": 0.9565587734241908, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.7713754646840149, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8603420938909193, + "success_rate.epoch.global": 0.8709677419354839, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9921085858585859, + "tokens_p.mean_in_band": 0.677734375, + "tokens_rate.above_band": 0.9611650485436893, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.038834951456310676 + }, + { + "epoch": 0.4441840647635279, + "grad_norm": 307.72171262899406, + "learning_rate": 3.983111548700717e-07, + "loss": 0.3543, + "step": 2085, + "success_rate.epoch.env.abd": 0.9838709677419355, + "success_rate.epoch.env.agentgym:alfworld": 0.8879310344827587, + "success_rate.epoch.env.agentgym:sciworld": 0.9555555555555556, + "success_rate.epoch.env.agentgym:textcraft": 0.967741935483871, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9547738693467337, + "success_rate.epoch.env.logic": 0.9037854889589906, + "success_rate.epoch.env.math": 0.9566694987255735, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.7716584158415841, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8599008948236437, + "success_rate.epoch.global": 0.8710374639769453, + "success_rate.window.env.agentgym:alfworld": 0.5, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9987739755884917, + "tokens_p.mean_in_band": 0.7448381696428571, + "tokens_rate.above_band": 0.9879414298018949, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.012058570198105082 + }, + { + "epoch": 0.44524925436727736, + "grad_norm": 253.67080269735254, + "learning_rate": 3.983013232328794e-07, + "loss": 0.4017, + "step": 2090, + "success_rate.epoch.env.abd": 0.9838709677419355, + "success_rate.epoch.env.agentgym:alfworld": 0.8888888888888888, + "success_rate.epoch.env.agentgym:sciworld": 0.9555555555555556, + "success_rate.epoch.env.agentgym:textcraft": 0.967741935483871, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9547738693467337, + "success_rate.epoch.env.logic": 0.9039370078740158, + "success_rate.epoch.env.math": 0.9567796610169491, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.7723627390499691, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8600757910800905, + "success_rate.epoch.global": 0.8713464302827024, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9923349056603774, + "tokens_p.mean_in_band": 0.7408854166666666, + "tokens_rate.above_band": 0.9814814814814815, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.018518518518518517 + }, + { + "epoch": 0.4463144439710268, + "grad_norm": 27.330875885209775, + "learning_rate": 3.98291463681958e-07, + "loss": 0.3031, + "step": 2095, + "success_rate.epoch.env.abd": 0.9838709677419355, + "success_rate.epoch.env.agentgym:alfworld": 0.8888888888888888, + "success_rate.epoch.env.agentgym:sciworld": 0.9558823529411765, + "success_rate.epoch.env.agentgym:textcraft": 0.967741935483871, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.955, + "success_rate.epoch.env.logic": 0.9039370078740158, + "success_rate.epoch.env.math": 0.956081081081081, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.7716923076923077, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.860001601693305, + "success_rate.epoch.global": 0.8709369024856597, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 0.75, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.8125, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9973404255319149, + "tokens_p.mean_in_band": 0.5490056818181818, + "tokens_rate.above_band": 0.914396887159533, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.08560311284046693 + }, + { + "epoch": 0.44737963357477634, + "grad_norm": 45.25101740398943, + "learning_rate": 3.9828157622449305e-07, + "loss": 0.255, + "step": 2100, + "success_rate.epoch.env.abd": 0.9838709677419355, + "success_rate.epoch.env.agentgym:alfworld": 0.8888888888888888, + "success_rate.epoch.env.agentgym:sciworld": 0.9565217391304348, + "success_rate.epoch.env.agentgym:textcraft": 0.967741935483871, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9552238805970149, + "success_rate.epoch.env.logic": 0.9040880503144654, + "success_rate.epoch.env.math": 0.9561551433389545, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.7721130221130221, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8601387913210604, + "success_rate.epoch.global": 0.871244635193133, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9950153374233128, + "tokens_p.mean_in_band": 0.8828125, + "tokens_rate.above_band": 0.9939024390243902, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.006097560975609756 + }, + { + "epoch": 0.4484448231785258, + "grad_norm": 151.39764527816436, + "learning_rate": 3.9827166086769046e-07, + "loss": 0.4648, + "step": 2105, + "success_rate.epoch.env.abd": 0.983957219251337, + "success_rate.epoch.env.agentgym:alfworld": 0.8888888888888888, + "success_rate.epoch.env.agentgym:sciworld": 0.9571428571428572, + "success_rate.epoch.env.agentgym:textcraft": 0.967741935483871, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9552238805970149, + "success_rate.epoch.env.logic": 0.9045383411580594, + "success_rate.epoch.env.math": 0.9562289562289562, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.7723926380368098, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8602761630646246, + "success_rate.epoch.global": 0.8715509039010466, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9988046448087432, + "tokens_p.mean_in_band": 0.8802083333333334, + "tokens_rate.above_band": 0.991869918699187, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.008130081300813009 + }, + { + "epoch": 0.44951001278227526, + "grad_norm": 43.74072739264825, + "learning_rate": 3.9826171761877647e-07, + "loss": 0.391, + "step": 2110, + "success_rate.epoch.env.abd": 0.983957219251337, + "success_rate.epoch.env.agentgym:alfworld": 0.8888888888888888, + "success_rate.epoch.env.agentgym:sciworld": 0.9571428571428572, + "success_rate.epoch.env.agentgym:textcraft": 0.96875, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9552238805970149, + "success_rate.epoch.env.logic": 0.9046875, + "success_rate.epoch.env.math": 0.9563758389261745, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.7711138310893513, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8602784629835182, + "success_rate.epoch.global": 0.8711438063597532, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.25, + "success_rate.window.env_macro_mean": 0.8125, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9947429906542056, + "tokens_p.mean_in_band": 0.32502297794117646, + "tokens_rate.above_band": 0.9264069264069265, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0735930735930736 + }, + { + "epoch": 0.4505752023860247, + "grad_norm": 38.731010504410676, + "learning_rate": 3.9825174648499756e-07, + "loss": 0.3777, + "step": 2115, + "success_rate.epoch.env.abd": 0.983957219251337, + "success_rate.epoch.env.agentgym:alfworld": 0.8898305084745762, + "success_rate.epoch.env.agentgym:sciworld": 0.9574468085106383, + "success_rate.epoch.env.agentgym:textcraft": 0.96875, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9552238805970149, + "success_rate.epoch.env.logic": 0.9048361934477379, + "success_rate.epoch.env.math": 0.9565217391304348, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.7709224190592547, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8604010768540065, + "success_rate.epoch.global": 0.8712121212121212, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9986066878980892, + "tokens_p.mean_in_band": 0.3430989583333333, + "tokens_rate.above_band": 0.9751552795031055, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.024844720496894408 + }, + { + "epoch": 0.4516403919897742, + "grad_norm": 145.91802969323246, + "learning_rate": 3.9824174747362073e-07, + "loss": 0.2371, + "step": 2120, + "success_rate.epoch.env.abd": 0.9841269841269841, + "success_rate.epoch.env.agentgym:alfworld": 0.8898305084745762, + "success_rate.epoch.env.agentgym:sciworld": 0.9574468085106383, + "success_rate.epoch.env.agentgym:textcraft": 0.96875, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9554455445544554, + "success_rate.epoch.env.logic": 0.9051321928460342, + "success_rate.epoch.env.math": 0.9565580618212197, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.7714808043875686, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8605176346949593, + "success_rate.epoch.global": 0.8715162966461975, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.997362012987013, + "tokens_p.mean_in_band": 0.73046875, + "tokens_rate.above_band": 0.9871794871794872, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.01282051282051282 + }, + { + "epoch": 0.45270558159352364, + "grad_norm": 114.67775463415884, + "learning_rate": 3.982317205919332e-07, + "loss": 0.3487, + "step": 2125, + "success_rate.epoch.env.abd": 0.9842105263157894, + "success_rate.epoch.env.agentgym:alfworld": 0.8823529411764706, + "success_rate.epoch.env.agentgym:sciworld": 0.9574468085106383, + "success_rate.epoch.env.agentgym:textcraft": 0.96875, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9554455445544554, + "success_rate.epoch.env.logic": 0.9051321928460342, + "success_rate.epoch.env.math": 0.9567027477102414, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.7720364741641338, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8599091192909853, + "success_rate.epoch.global": 0.8715834118755891, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.75, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9979423868312757, + "tokens_p.mean_in_band": 0.62890625, + "tokens_rate.above_band": 0.9604743083003953, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.039525691699604744 + }, + { + "epoch": 0.4537707711972731, + "grad_norm": 82.41558198000548, + "learning_rate": 3.982216658472424e-07, + "loss": 0.4364, + "step": 2130, + "success_rate.epoch.env.abd": 0.984375, + "success_rate.epoch.env.agentgym:alfworld": 0.8823529411764706, + "success_rate.epoch.env.agentgym:sciworld": 0.9574468085106383, + "success_rate.epoch.env.agentgym:textcraft": 0.96875, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9554455445544554, + "success_rate.epoch.env.logic": 0.9055727554179567, + "success_rate.epoch.env.math": 0.9567027477102414, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.7721212121212121, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.859971826037641, + "success_rate.epoch.global": 0.8716502115655853, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9984375, + "tokens_p.mean_in_band": 0.4053819444444444, + "tokens_rate.above_band": 0.9302325581395349, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.06976744186046512 + }, + { + "epoch": 0.45483596080102257, + "grad_norm": 60.606817030987585, + "learning_rate": 3.982115832468762e-07, + "loss": 0.3282, + "step": 2135, + "success_rate.epoch.env.abd": 0.984375, + "success_rate.epoch.env.agentgym:alfworld": 0.8833333333333333, + "success_rate.epoch.env.agentgym:sciworld": 0.9574468085106383, + "success_rate.epoch.env.agentgym:textcraft": 0.96875, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9554455445544554, + "success_rate.epoch.env.logic": 0.9057187017001546, + "success_rate.epoch.env.math": 0.9568464730290457, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.7726723095525998, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8601373861457549, + "success_rate.epoch.global": 0.8719512195121951, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9985250737463127, + "tokens_p.mean_in_band": 0.6146763392857143, + "tokens_rate.above_band": 0.9797687861271677, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02023121387283237 + }, + { + "epoch": 0.455901150404772, + "grad_norm": 184.22197601349504, + "learning_rate": 3.982014727981827e-07, + "loss": 0.3728, + "step": 2140, + "success_rate.epoch.env.abd": 0.984375, + "success_rate.epoch.env.agentgym:alfworld": 0.8833333333333333, + "success_rate.epoch.env.agentgym:sciworld": 0.9574468085106383, + "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9556650246305419, + "success_rate.epoch.env.logic": 0.906441717791411, + "success_rate.epoch.env.math": 0.956089478044739, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.7728096676737161, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8602528251458569, + "success_rate.epoch.global": 0.8720168460458587, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.5, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9994424168694241, + "tokens_p.mean_in_band": 0.49951171875, + "tokens_rate.above_band": 0.9983805668016195, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0016194331983805667 + }, + { + "epoch": 0.45696634000852154, + "grad_norm": 58.82473441628496, + "learning_rate": 3.9819133450853043e-07, + "loss": 0.377, + "step": 2145, + "success_rate.epoch.env.abd": 0.984375, + "success_rate.epoch.env.agentgym:alfworld": 0.8833333333333333, + "success_rate.epoch.env.agentgym:sciworld": 0.9574468085106383, + "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9514563106796117, + "success_rate.epoch.env.logic": 0.9065849923430321, + "success_rate.epoch.env.math": 0.9561258278145696, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.7734939759036145, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8599487541095314, + "success_rate.epoch.global": 0.8720821661998133, + "success_rate.window.env.ded": 0.6666666666666666, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9957092377587077, + "tokens_p.mean_in_band": 0.5876865671641791, + "tokens_rate.above_band": 0.9078826764436297, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0921173235563703 + }, + { + "epoch": 0.458031529612271, + "grad_norm": 96.23351301220453, + "learning_rate": 3.981811683853079e-07, + "loss": 0.4288, + "step": 2150, + "success_rate.epoch.env.abd": 0.984375, + "success_rate.epoch.env.agentgym:alfworld": 0.8833333333333333, + "success_rate.epoch.env.agentgym:sciworld": 0.9577464788732394, + "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9514563106796117, + "success_rate.epoch.env.logic": 0.9067278287461774, + "success_rate.epoch.env.math": 0.9561621174524401, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.7732453509298141, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8599696787849692, + "success_rate.epoch.global": 0.871914299021891, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.7142857142857143, + "success_rate.window.env_macro_mean": 0.9285714285714286, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9954879679144385, + "tokens_p.mean_in_band": 0.70265625, + "tokens_rate.above_band": 0.8820754716981132, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.1179245283018868 + }, + { + "epoch": 0.45909671921602047, + "grad_norm": 31.926622437433984, + "learning_rate": 3.9817097443592435e-07, + "loss": 0.3853, + "step": 2155, + "success_rate.epoch.env.abd": 0.984375, + "success_rate.epoch.env.agentgym:alfworld": 0.8833333333333333, + "success_rate.epoch.env.agentgym:sciworld": 0.9577464788732394, + "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9514563106796117, + "success_rate.epoch.env.logic": 0.9068702290076336, + "success_rate.epoch.env.math": 0.9562706270627063, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.7722653915122535, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8599034015535295, + "success_rate.epoch.global": 0.8715148698884758, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.8333333333333334, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9879557291666666, + "tokens_p.mean_in_band": 0.5868389423076923, + "tokens_rate.above_band": 0.7868852459016393, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.21311475409836064 + }, + { + "epoch": 0.4601619088197699, + "grad_norm": 44.95998243278196, + "learning_rate": 3.9816075266780886e-07, + "loss": 0.3233, + "step": 2160, + "success_rate.epoch.env.abd": 0.984375, + "success_rate.epoch.env.agentgym:alfworld": 0.8833333333333333, + "success_rate.epoch.env.agentgym:sciworld": 0.9577464788732394, + "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9514563106796117, + "success_rate.epoch.env.logic": 0.9057750759878419, + "success_rate.epoch.env.math": 0.9563786008230453, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.7728085867620751, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8598630393708357, + "success_rate.epoch.global": 0.8715808993973111, + "success_rate.window.env.logic": 0.6666666666666666, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8888888888888888, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.992741935483871, + "tokens_p.mean_in_band": 0.69384765625, + "tokens_rate.above_band": 0.950920245398773, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.049079754601226995 + }, + { + "epoch": 0.4612270984235194, + "grad_norm": 96.85026338184036, + "learning_rate": 3.981505030884111e-07, + "loss": 0.1844, + "step": 2165, + "success_rate.epoch.env.abd": 0.9845360824742269, + "success_rate.epoch.env.agentgym:alfworld": 0.8852459016393442, + "success_rate.epoch.env.agentgym:sciworld": 0.9577464788732394, + "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9514563106796117, + "success_rate.epoch.env.logic": 0.9059180576631259, + "success_rate.epoch.env.math": 0.9564860426929392, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.7729439809296782, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8600866274156558, + "success_rate.epoch.global": 0.8718778908418131, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9984357997823722, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 0.46229228802726885, + "grad_norm": 63.37427154117726, + "learning_rate": 3.981402257052008e-07, + "loss": 0.4072, + "step": 2170, + "success_rate.epoch.env.abd": 0.9846153846153847, + "success_rate.epoch.env.agentgym:alfworld": 0.8852459016393442, + "success_rate.epoch.env.agentgym:sciworld": 0.9577464788732394, + "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9516908212560387, + "success_rate.epoch.env.logic": 0.9059180576631259, + "success_rate.epoch.env.math": 0.9565217391304348, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.7738872403560831, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8602041518321543, + "success_rate.epoch.global": 0.8721735117674204, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9977034120734908, + "tokens_p.mean_in_band": 0.89453125, + "tokens_rate.above_band": 0.9973821989528796, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.002617801047120419 + }, + { + "epoch": 0.4633574776310183, + "grad_norm": 101.70573887188247, + "learning_rate": 3.981299205256681e-07, + "loss": 0.3496, + "step": 2175, + "success_rate.epoch.env.abd": 0.9846153846153847, + "success_rate.epoch.env.agentgym:alfworld": 0.8852459016393442, + "success_rate.epoch.env.agentgym:sciworld": 0.958041958041958, + "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9516908212560387, + "success_rate.epoch.env.logic": 0.9059180576631259, + "success_rate.epoch.env.math": 0.9566993464052288, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.7738306690349319, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8602420168432782, + "success_rate.epoch.global": 0.8722375690607734, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9968065693430657, + "tokens_p.mean_in_band": 0.6098090277777778, + "tokens_rate.above_band": 0.9383561643835616, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.06164383561643835 + }, + { + "epoch": 0.46442266723476777, + "grad_norm": 149.27580544374308, + "learning_rate": 3.981195875573234e-07, + "loss": 0.2298, + "step": 2180, + "success_rate.epoch.env.abd": 0.9846153846153847, + "success_rate.epoch.env.agentgym:alfworld": 0.8852459016393442, + "success_rate.epoch.env.agentgym:sciworld": 0.9586206896551724, + "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9516908212560387, + "success_rate.epoch.env.logic": 0.9063444108761329, + "success_rate.epoch.env.math": 0.9567699836867863, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.7736406619385343, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8603225363897672, + "success_rate.epoch.global": 0.8723013321084061, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9979166666666667, + "tokens_p.mean_in_band": 0.7076822916666666, + "tokens_rate.above_band": 0.9770114942528736, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.022988505747126436 + }, + { + "epoch": 0.46548785683851723, + "grad_norm": 58.87499101498911, + "learning_rate": 3.981092268076971e-07, + "loss": 0.2147, + "step": 2185, + "success_rate.epoch.env.abd": 0.9846938775510204, + "success_rate.epoch.env.agentgym:alfworld": 0.8861788617886179, + "success_rate.epoch.env.agentgym:sciworld": 0.958904109589041, + "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9516908212560387, + "success_rate.epoch.env.logic": 0.9066265060240963, + "success_rate.epoch.env.math": 0.9568755085435313, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.7739079102715466, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8604997856039944, + "success_rate.epoch.global": 0.8725939505041247, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.997032122905028, + "tokens_p.mean_in_band": 0.7578125, + "tokens_rate.above_band": 0.9962894248608535, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0037105751391465678 + }, + { + "epoch": 0.46655304644226675, + "grad_norm": 190.27091460647455, + "learning_rate": 3.9809883828434024e-07, + "loss": 0.3868, + "step": 2190, + "success_rate.epoch.env.abd": 0.9846938775510204, + "success_rate.epoch.env.agentgym:alfworld": 0.8870967741935484, + "success_rate.epoch.env.agentgym:sciworld": 0.958904109589041, + "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9519230769230769, + "success_rate.epoch.env.logic": 0.9052631578947369, + "success_rate.epoch.env.math": 0.9569805194805194, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.7744405182567726, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8605383709553419, + "success_rate.epoch.global": 0.8726566072245084, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9993342210386151, + "tokens_p.mean_in_band": 0.5615234375, + "tokens_rate.above_band": 0.9740596627756161, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02594033722438392 + }, + { + "epoch": 0.4676182360460162, + "grad_norm": 31.74037483645768, + "learning_rate": 3.9808842199482387e-07, + "loss": 0.2583, + "step": 2195, + "success_rate.epoch.env.abd": 0.9846938775510204, + "success_rate.epoch.env.agentgym:alfworld": 0.8870967741935484, + "success_rate.epoch.env.agentgym:sciworld": 0.958904109589041, + "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9521531100478469, + "success_rate.epoch.env.logic": 0.9054054054054054, + "success_rate.epoch.env.math": 0.9570502431118314, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.7746478873239436, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8605974048947893, + "success_rate.epoch.global": 0.8727189781021898, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env_macro_mean": 0.9583333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9967873831775701, + "tokens_p.mean_in_band": 0.6293402777777778, + "tokens_rate.above_band": 0.9727272727272728, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02727272727272727 + }, + { + "epoch": 0.4686834256497657, + "grad_norm": 73.87236250917555, + "learning_rate": 3.980779779467392e-07, + "loss": 0.2601, + "step": 2200, + "success_rate.epoch.env.abd": 0.9847715736040609, + "success_rate.epoch.env.agentgym:alfworld": 0.8870967741935484, + "success_rate.epoch.env.agentgym:sciworld": 0.958904109589041, + "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.95260663507109, + "success_rate.epoch.env.logic": 0.9055472263868066, + "success_rate.epoch.env.math": 0.9563106796116505, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.775175644028103, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8606393353731224, + "success_rate.epoch.global": 0.8727810650887574, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.5, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9992465932914046, + "tokens_p.mean_in_band": 0.6692708333333334, + "tokens_rate.above_band": 0.99375, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.00625 + }, + { + "epoch": 0.46974861525351513, + "grad_norm": 200.78911066176883, + "learning_rate": 3.980675061476978e-07, + "loss": 0.6, + "step": 2205, + "success_rate.epoch.env.abd": 0.9847715736040609, + "success_rate.epoch.env.agentgym:alfworld": 0.8870967741935484, + "success_rate.epoch.env.agentgym:sciworld": 0.958904109589041, + "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.95260663507109, + "success_rate.epoch.env.logic": 0.9059701492537313, + "success_rate.epoch.env.math": 0.9564164648910412, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.7751168224299065, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8606820523320422, + "success_rate.epoch.global": 0.8728428701180745, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9946646341463414, + "tokens_p.mean_in_band": 0.5703125, + "tokens_rate.above_band": 0.9318181818181818, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.06818181818181818 + }, + { + "epoch": 0.4708138048572646, + "grad_norm": 38.099097895050534, + "learning_rate": 3.980570066053315e-07, + "loss": 0.3271, + "step": 2210, + "success_rate.epoch.env.abd": 0.9847715736040609, + "success_rate.epoch.env.agentgym:alfworld": 0.8870967741935484, + "success_rate.epoch.env.agentgym:sciworld": 0.958904109589041, + "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9528301886792453, + "success_rate.epoch.env.logic": 0.9063893016344725, + "success_rate.epoch.env.math": 0.9564867042707494, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.7744755244755245, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8606885657333353, + "success_rate.epoch.global": 0.8726778432260988, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9992142166344294, + "tokens_p.mean_in_band": 0.4803602430555556, + "tokens_rate.above_band": 0.9828897338403042, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.017110266159695818 + }, + { + "epoch": 0.47187899446101406, + "grad_norm": 466.8689306661385, + "learning_rate": 3.980464793272923e-07, + "loss": 0.3609, + "step": 2215, + "success_rate.epoch.env.abd": 0.9849246231155779, + "success_rate.epoch.env.agentgym:alfworld": 0.8870967741935484, + "success_rate.epoch.env.agentgym:sciworld": 0.958904109589041, + "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9528301886792453, + "success_rate.epoch.env.logic": 0.9065281899109793, + "success_rate.epoch.env.math": 0.9565916398713826, + "success_rate.epoch.env.sat": 0.07407407407407407, + "success_rate.epoch.env.science": 0.7737056428155905, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8603956556314917, + "success_rate.epoch.global": 0.8722875226039783, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 0.3333333333333333, + "success_rate.window.env_macro_mean": 0.6666666666666667, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9971590909090909, + "tokens_p.mean_in_band": 0.5126008064516129, + "tokens_rate.above_band": 0.8864468864468864, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.11355311355311355 + }, + { + "epoch": 0.4729441840647635, + "grad_norm": 76.79584246966465, + "learning_rate": 3.980359243212524e-07, + "loss": 0.3829, + "step": 2220, + "success_rate.epoch.env.abd": 0.985, + "success_rate.epoch.env.agentgym:alfworld": 0.8870967741935484, + "success_rate.epoch.env.agentgym:sciworld": 0.958904109589041, + "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9528301886792453, + "success_rate.epoch.env.logic": 0.9066666666666666, + "success_rate.epoch.env.math": 0.9565916398713826, + "success_rate.epoch.env.sat": 0.07407407407407407, + "success_rate.epoch.env.science": 0.774174869716271, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8604577538624727, + "success_rate.epoch.global": 0.8723500225529995, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.science": 0.875, + "success_rate.window.env_macro_mean": 0.9583333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9902836134453782, + "tokens_p.mean_in_band": 0.745703125, + "tokens_rate.above_band": 0.9224806201550387, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.07751937984496124 + }, + { + "epoch": 0.474009373668513, + "grad_norm": 128.3435347931603, + "learning_rate": 3.980253415949041e-07, + "loss": 0.3236, + "step": 2225, + "success_rate.epoch.env.abd": 0.9850746268656716, + "success_rate.epoch.env.agentgym:alfworld": 0.88, + "success_rate.epoch.env.agentgym:sciworld": 0.958904109589041, + "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9530516431924883, + "success_rate.epoch.env.logic": 0.9066666666666666, + "success_rate.epoch.env.math": 0.9551641313050441, + "success_rate.epoch.env.sat": 0.07407407407407407, + "success_rate.epoch.env.science": 0.7738577212261423, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8596809038741906, + "success_rate.epoch.global": 0.8717371737173717, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 0.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 0.6, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.62, + "success_rate.window.global": 0.6, + "tokens_p.mean_above_band": 0.9984697164948454, + "tokens_p.mean_in_band": 0.6244419642857143, + "tokens_rate.above_band": 0.9651741293532339, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03482587064676617 + }, + { + "epoch": 0.47507456327226244, + "grad_norm": 77.02831348025278, + "learning_rate": 3.9801473115596027e-07, + "loss": 0.3993, + "step": 2230, + "success_rate.epoch.env.abd": 0.9850746268656716, + "success_rate.epoch.env.agentgym:alfworld": 0.8809523809523809, + "success_rate.epoch.env.agentgym:sciworld": 0.958904109589041, + "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9530516431924883, + "success_rate.epoch.env.logic": 0.9057437407952872, + "success_rate.epoch.env.math": 0.9552, + "success_rate.epoch.env.sat": 0.07407407407407407, + "success_rate.epoch.env.science": 0.7743796884016156, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8597342939606843, + "success_rate.epoch.global": 0.8718006286484059, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 0.75, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9375, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.997120786516854, + "tokens_p.mean_in_band": 0.5649038461538461, + "tokens_rate.above_band": 0.9716157205240175, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.028384279475982533 + }, + { + "epoch": 0.47613975287601196, + "grad_norm": 8.984483501970702, + "learning_rate": 3.980040930121534e-07, + "loss": 0.401, + "step": 2235, + "success_rate.epoch.env.abd": 0.9850746268656716, + "success_rate.epoch.env.agentgym:alfworld": 0.8809523809523809, + "success_rate.epoch.env.agentgym:sciworld": 0.958904109589041, + "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9530516431924883, + "success_rate.epoch.env.logic": 0.9044117647058824, + "success_rate.epoch.env.math": 0.9553072625698324, + "success_rate.epoch.env.sat": 0.07142857142857142, + "success_rate.epoch.env.science": 0.7733026467203682, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8592845432473823, + "success_rate.epoch.global": 0.8709677419354839, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 0.4, + "success_rate.window.env_macro_mean": 0.35, + "success_rate.window.global": 0.5, + "tokens_p.mean_above_band": 0.9969035823170732, + "tokens_p.mean_in_band": 0.4986979166666667, + "tokens_rate.above_band": 0.9162011173184358, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.08379888268156424 + }, + { + "epoch": 0.4772049424797614, + "grad_norm": 178.98429439045935, + "learning_rate": 3.979934271712367e-07, + "loss": 0.1988, + "step": 2240, + "success_rate.epoch.env.abd": 0.9851485148514851, + "success_rate.epoch.env.agentgym:alfworld": 0.8818897637795275, + "success_rate.epoch.env.agentgym:sciworld": 0.958904109589041, + "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9532710280373832, + "success_rate.epoch.env.logic": 0.9046920821114369, + "success_rate.epoch.env.math": 0.9553784860557769, + "success_rate.epoch.env.sat": 0.07142857142857142, + "success_rate.epoch.env.science": 0.7735632183908046, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8594520676309997, + "success_rate.epoch.global": 0.8712561466249441, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 1.0015008576329332, + "tokens_p.mean_in_band": 0.8763020833333334, + "tokens_rate.above_band": 0.9948805460750854, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.005119453924914676 + }, + { + "epoch": 0.4782701320835109, + "grad_norm": 383.0964586358051, + "learning_rate": 3.9798273364098327e-07, + "loss": 0.4197, + "step": 2245, + "success_rate.epoch.env.abd": 0.9851485148514851, + "success_rate.epoch.env.agentgym:alfworld": 0.8818897637795275, + "success_rate.epoch.env.agentgym:sciworld": 0.958904109589041, + "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9532710280373832, + "success_rate.epoch.env.logic": 0.9048316251830161, + "success_rate.epoch.env.math": 0.9555202541699762, + "success_rate.epoch.env.sat": 0.07142857142857142, + "success_rate.epoch.env.science": 0.7736389684813754, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8594845277470313, + "success_rate.epoch.global": 0.8713202497769849, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.99375, + "tokens_p.mean_in_band": 0.45535714285714285, + "tokens_rate.above_band": 0.851063829787234, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.14893617021276595 + }, + { + "epoch": 0.47933532168726034, + "grad_norm": 104.10808818476188, + "learning_rate": 3.9797201242918657e-07, + "loss": 0.4295, + "step": 2250, + "success_rate.epoch.env.abd": 0.9852216748768473, + "success_rate.epoch.env.agentgym:alfworld": 0.8818897637795275, + "success_rate.epoch.env.agentgym:sciworld": 0.958904109589041, + "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9532710280373832, + "success_rate.epoch.env.logic": 0.9049707602339181, + "success_rate.epoch.env.math": 0.9556611243072051, + "success_rate.epoch.env.sat": 0.07142857142857142, + "success_rate.epoch.env.science": 0.773013150371641, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8594597411201004, + "success_rate.epoch.global": 0.8711615487316422, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9924479166666667, + "tokens_p.mean_in_band": 0.5245028409090909, + "tokens_rate.above_band": 0.916030534351145, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.08396946564885496 + }, + { + "epoch": 0.4804005112910098, + "grad_norm": 149.18604883650156, + "learning_rate": 3.9796126354366e-07, + "loss": 0.2466, + "step": 2255, + "success_rate.epoch.env.abd": 0.9852216748768473, + "success_rate.epoch.env.agentgym:alfworld": 0.8828125, + "success_rate.epoch.env.agentgym:sciworld": 0.958904109589041, + "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9493087557603687, + "success_rate.epoch.env.logic": 0.9053857350800583, + "success_rate.epoch.env.math": 0.9556962025316456, + "success_rate.epoch.env.sat": 0.07142857142857142, + "success_rate.epoch.env.science": 0.7732724157624215, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8592479031569021, + "success_rate.epoch.global": 0.8712255772646537, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 0.6666666666666666, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9926783376963351, + "tokens_p.mean_in_band": 0.5896739130434783, + "tokens_rate.above_band": 0.8925233644859814, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.10747663551401869 + }, + { + "epoch": 0.48146570089475926, + "grad_norm": 69.58324827557273, + "learning_rate": 3.979504869922374e-07, + "loss": 0.3747, + "step": 2260, + "success_rate.epoch.env.abd": 0.9852216748768473, + "success_rate.epoch.env.agentgym:alfworld": 0.8828125, + "success_rate.epoch.env.agentgym:sciworld": 0.9591836734693877, + "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9493087557603687, + "success_rate.epoch.env.logic": 0.9042089985486212, + "success_rate.epoch.env.math": 0.9558011049723757, + "success_rate.epoch.env.sat": 0.07142857142857142, + "success_rate.epoch.env.science": 0.7736602052451539, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8592111321816632, + "success_rate.epoch.global": 0.8712893221089942, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 0.9, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9978448275862069, + "tokens_p.mean_in_band": 0.6875, + "tokens_rate.above_band": 0.9592875318066157, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.04071246819338423 + }, + { + "epoch": 0.4825308904985087, + "grad_norm": 441.5440760869613, + "learning_rate": 3.9793968278277267e-07, + "loss": 0.463, + "step": 2265, + "success_rate.epoch.env.abd": 0.9852216748768473, + "success_rate.epoch.env.agentgym:alfworld": 0.8828125, + "success_rate.epoch.env.agentgym:sciworld": 0.9594594594594594, + "success_rate.epoch.env.agentgym:textcraft": 0.9705882352941176, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9493087557603687, + "success_rate.epoch.env.logic": 0.9046242774566474, + "success_rate.epoch.env.math": 0.9558707643814027, + "success_rate.epoch.env.sat": 0.07142857142857142, + "success_rate.epoch.env.science": 0.7733485193621867, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8593329780017819, + "success_rate.epoch.global": 0.8713243422507185, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.9, + "success_rate.window.global": 0.8888888888888888, + "tokens_p.mean_above_band": 0.9984722222222222, + "tokens_p.mean_in_band": 0.66796875, + "tokens_rate.above_band": 0.974025974025974, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.025974025974025976 + }, + { + "epoch": 0.4835960801022582, + "grad_norm": 132.56110870414003, + "learning_rate": 3.9792885092313973e-07, + "loss": 0.2842, + "step": 2270, + "success_rate.epoch.env.abd": 0.9852216748768473, + "success_rate.epoch.env.agentgym:alfworld": 0.8828125, + "success_rate.epoch.env.agentgym:sciworld": 0.959731543624161, + "success_rate.epoch.env.agentgym:textcraft": 0.9705882352941176, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9495412844036697, + "success_rate.epoch.env.logic": 0.9048991354466859, + "success_rate.epoch.env.math": 0.9551886792452831, + "success_rate.epoch.env.sat": 0.07142857142857142, + "success_rate.epoch.env.science": 0.7737350767481523, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.859376972824317, + "success_rate.epoch.global": 0.871387602029561, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.6666666666666666, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9946933962264151, + "tokens_p.mean_in_band": 0.5825892857142857, + "tokens_rate.above_band": 0.9784615384615385, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.021538461538461538 + }, + { + "epoch": 0.48466126970600765, + "grad_norm": 90.24017032065915, + "learning_rate": 3.979179914212328e-07, + "loss": 0.4124, + "step": 2275, + "success_rate.epoch.env.abd": 0.9852941176470589, + "success_rate.epoch.env.agentgym:alfworld": 0.8828125, + "success_rate.epoch.env.agentgym:sciworld": 0.959731543624161, + "success_rate.epoch.env.agentgym:textcraft": 0.9705882352941176, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9497716894977168, + "success_rate.epoch.env.logic": 0.9050359712230216, + "success_rate.epoch.env.math": 0.9553641346906813, + "success_rate.epoch.env.sat": 0.07142857142857142, + "success_rate.epoch.env.science": 0.7734241908006815, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8594046322005464, + "success_rate.epoch.global": 0.8714505833149901, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.9, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9967984409799554, + "tokens_p.mean_in_band": 0.486328125, + "tokens_rate.above_band": 0.9868131868131869, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.013186813186813187 + }, + { + "epoch": 0.48572645930975716, + "grad_norm": 55.49851071523598, + "learning_rate": 3.9790710428496615e-07, + "loss": 0.3556, + "step": 2280, + "success_rate.epoch.env.abd": 0.9853658536585366, + "success_rate.epoch.env.agentgym:alfworld": 0.8828125, + "success_rate.epoch.env.agentgym:sciworld": 0.959731543624161, + "success_rate.epoch.env.agentgym:textcraft": 0.9705882352941176, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9497716894977168, + "success_rate.epoch.env.logic": 0.9053084648493543, + "success_rate.epoch.env.math": 0.9554339327599687, + "success_rate.epoch.env.sat": 0.10344827586206896, + "success_rate.epoch.env.science": 0.7728045325779037, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8622968207385299, + "success_rate.epoch.global": 0.8712936525367889, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.9, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9942642405063291, + "tokens_p.mean_in_band": 0.4583333333333333, + "tokens_rate.above_band": 0.9132947976878613, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.08670520231213873 + }, + { + "epoch": 0.4867916489135066, + "grad_norm": 99.65009153691177, + "learning_rate": 3.9789618952227435e-07, + "loss": 0.4012, + "step": 2285, + "success_rate.epoch.env.abd": 0.9853658536585366, + "success_rate.epoch.env.agentgym:alfworld": 0.8828125, + "success_rate.epoch.env.agentgym:sciworld": 0.959731543624161, + "success_rate.epoch.env.agentgym:textcraft": 0.9705882352941176, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.95, + "success_rate.epoch.env.logic": 0.9045584045584045, + "success_rate.epoch.env.math": 0.9555382215288611, + "success_rate.epoch.env.sat": 0.10344827586206896, + "success_rate.epoch.env.science": 0.7723669309173273, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8622190877675888, + "success_rate.epoch.global": 0.871137409598948, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.8, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.0, + "success_rate.window.env_macro_mean": 0.7, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9996411795407099, + "tokens_p.mean_in_band": 0.4058159722222222, + "tokens_rate.above_band": 0.9637826961770624, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03621730382293763 + }, + { + "epoch": 0.4878568385172561, + "grad_norm": 136.6075873900123, + "learning_rate": 3.9788524714111197e-07, + "loss": 0.3298, + "step": 2290, + "success_rate.epoch.env.abd": 0.9853658536585366, + "success_rate.epoch.env.agentgym:alfworld": 0.8828125, + "success_rate.epoch.env.agentgym:sciworld": 0.959731543624161, + "success_rate.epoch.env.agentgym:textcraft": 0.9705882352941176, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9504504504504504, + "success_rate.epoch.env.logic": 0.9048295454545454, + "success_rate.epoch.env.math": 0.955607476635514, + "success_rate.epoch.env.sat": 0.10344827586206896, + "success_rate.epoch.env.science": 0.772316384180791, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.862286387741835, + "success_rate.epoch.global": 0.8712005248195933, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9375, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9979262483574245, + "tokens_p.mean_in_band": 0.6290564903846154, + "tokens_rate.above_band": 0.9915309446254071, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.008469055374592834 + }, + { + "epoch": 0.48892202812100555, + "grad_norm": 255.3266808259307, + "learning_rate": 3.978742771494537e-07, + "loss": 0.2824, + "step": 2295, + "success_rate.epoch.env.abd": 0.9853658536585366, + "success_rate.epoch.env.agentgym:alfworld": 0.8828125, + "success_rate.epoch.env.agentgym:sciworld": 0.959731543624161, + "success_rate.epoch.env.agentgym:textcraft": 0.9705882352941176, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9508928571428571, + "success_rate.epoch.env.logic": 0.9050991501416431, + "success_rate.epoch.env.math": 0.9557453416149069, + "success_rate.epoch.env.sat": 0.10344827586206896, + "success_rate.epoch.env.science": 0.7725733634311512, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8623870109790402, + "success_rate.epoch.global": 0.8714815622954397, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9995192307692308, + "tokens_p.mean_in_band": 0.72265625, + "tokens_rate.above_band": 0.994263862332696, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0057361376673040155 + }, + { + "epoch": 0.489987217724755, + "grad_norm": 181.1578075830466, + "learning_rate": 3.9786327955529445e-07, + "loss": 0.3752, + "step": 2300, + "success_rate.epoch.env.abd": 0.9854368932038835, + "success_rate.epoch.env.agentgym:alfworld": 0.8828125, + "success_rate.epoch.env.agentgym:sciworld": 0.96, + "success_rate.epoch.env.agentgym:textcraft": 0.9705882352941176, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9511111111111111, + "success_rate.epoch.env.logic": 0.9052333804809052, + "success_rate.epoch.env.math": 0.9558139534883721, + "success_rate.epoch.env.sat": 0.10344827586206896, + "success_rate.epoch.env.science": 0.7725225225225225, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8624515338148165, + "success_rate.epoch.global": 0.8715436533855868, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9583333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 1.0006224370240187, + "tokens_p.mean_in_band": 0.6650390625, + "tokens_rate.above_band": 0.9976621858562245, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0023378141437755697 + }, + { + "epoch": 0.49105240732850447, + "grad_norm": 149.1546908914634, + "learning_rate": 3.978522543666491e-07, + "loss": 0.4842, + "step": 2305, + "success_rate.epoch.env.abd": 0.9854368932038835, + "success_rate.epoch.env.agentgym:alfworld": 0.8837209302325582, + "success_rate.epoch.env.agentgym:sciworld": 0.96, + "success_rate.epoch.env.agentgym:textcraft": 0.9705882352941176, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9511111111111111, + "success_rate.epoch.env.logic": 0.9055007052186178, + "success_rate.epoch.env.math": 0.9558481797056545, + "success_rate.epoch.env.sat": 0.10344827586206896, + "success_rate.epoch.env.science": 0.7721661054994389, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8625291305570411, + "success_rate.epoch.global": 0.8713882250706061, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9983462591240876, + "tokens_p.mean_in_band": 0.4453125, + "tokens_rate.above_band": 0.9647887323943662, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.035211267605633804 + }, + { + "epoch": 0.49211759693225393, + "grad_norm": 141.89747014964618, + "learning_rate": 3.978412015915528e-07, + "loss": 0.2546, + "step": 2310, + "success_rate.epoch.env.abd": 0.9855072463768116, + "success_rate.epoch.env.agentgym:alfworld": 0.8837209302325582, + "success_rate.epoch.env.agentgym:sciworld": 0.96, + "success_rate.epoch.env.agentgym:textcraft": 0.9705882352941176, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9511111111111111, + "success_rate.epoch.env.logic": 0.9056338028169014, + "success_rate.epoch.env.math": 0.955984555984556, + "success_rate.epoch.env.sat": 0.10344827586206896, + "success_rate.epoch.env.science": 0.7721164613661814, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8625555108222097, + "success_rate.epoch.global": 0.8714502492954693, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9375, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9920138888888889, + "tokens_p.mean_in_band": 0.620703125, + "tokens_rate.above_band": 0.9, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.1 + }, + { + "epoch": 0.4931827865360034, + "grad_norm": 91.78071221949968, + "learning_rate": 3.978301212380607e-07, + "loss": 0.2814, + "step": 2315, + "success_rate.epoch.env.abd": 0.9855769230769231, + "success_rate.epoch.env.agentgym:alfworld": 0.8837209302325582, + "success_rate.epoch.env.agentgym:sciworld": 0.96, + "success_rate.epoch.env.agentgym:textcraft": 0.9714285714285714, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9511111111111111, + "success_rate.epoch.env.logic": 0.906030855539972, + "success_rate.epoch.env.math": 0.9560185185185185, + "success_rate.epoch.env.sat": 0.10344827586206896, + "success_rate.epoch.env.science": 0.7715083798882681, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8626221423325448, + "success_rate.epoch.global": 0.8712956954358642, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.9, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9973776223776224, + "tokens_p.mean_in_band": 0.4418402777777778, + "tokens_rate.above_band": 0.9407894736842105, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.05921052631578947 + }, + { + "epoch": 0.49424797613975285, + "grad_norm": 83.92647100878033, + "learning_rate": 3.9781901331424813e-07, + "loss": 0.3356, + "step": 2320, + "success_rate.epoch.env.abd": 0.9857142857142858, + "success_rate.epoch.env.agentgym:alfworld": 0.8837209302325582, + "success_rate.epoch.env.agentgym:sciworld": 0.9602649006622517, + "success_rate.epoch.env.agentgym:textcraft": 0.9714285714285714, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9511111111111111, + "success_rate.epoch.env.logic": 0.9061624649859944, + "success_rate.epoch.env.math": 0.9560862865947611, + "success_rate.epoch.env.sat": 0.10344827586206896, + "success_rate.epoch.env.science": 0.7720178372352285, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8627231512569846, + "success_rate.epoch.global": 0.871573494496007, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9987547438330171, + "tokens_p.mean_in_band": 0.853515625, + "tokens_rate.above_band": 0.996219281663516, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.003780718336483932 + }, + { + "epoch": 0.49531316574350237, + "grad_norm": 144.07250038750462, + "learning_rate": 3.9780787782821046e-07, + "loss": 0.3308, + "step": 2325, + "success_rate.epoch.env.abd": 0.9858490566037735, + "success_rate.epoch.env.agentgym:alfworld": 0.8837209302325582, + "success_rate.epoch.env.agentgym:sciworld": 0.9605263157894737, + "success_rate.epoch.env.agentgym:textcraft": 0.9714285714285714, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9511111111111111, + "success_rate.epoch.env.logic": 0.9062937062937063, + "success_rate.epoch.env.math": 0.9561538461538461, + "success_rate.epoch.env.sat": 0.10344827586206896, + "success_rate.epoch.env.science": 0.7725250278086763, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8628233492076169, + "success_rate.epoch.global": 0.8718500969200947, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9963362068965518, + "tokens_p.mean_in_band": 0.62109375, + "tokens_rate.above_band": 0.9731543624161074, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.026845637583892617 + }, + { + "epoch": 0.49637835534725183, + "grad_norm": 86.81295869589223, + "learning_rate": 3.9779671478806306e-07, + "loss": 0.3185, + "step": 2330, + "success_rate.epoch.env.abd": 0.9858490566037735, + "success_rate.epoch.env.agentgym:alfworld": 0.8769230769230769, + "success_rate.epoch.env.agentgym:sciworld": 0.9605263157894737, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9515418502202643, + "success_rate.epoch.env.logic": 0.9066852367688022, + "success_rate.epoch.env.math": 0.9561875480399693, + "success_rate.epoch.env.sat": 0.10344827586206896, + "success_rate.epoch.env.science": 0.7726514730405781, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8623668232245664, + "success_rate.epoch.global": 0.8719105953148506, + "success_rate.window.env.agentgym:alfworld": 0.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 0.8571428571428571, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9985281447443998, + "tokens_p.mean_in_band": 0.68212890625, + "tokens_rate.above_band": 0.9977077363896848, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.002292263610315186 + }, + { + "epoch": 0.4974435449510013, + "grad_norm": 60.215911289432135, + "learning_rate": 3.977855242019416e-07, + "loss": 0.3292, + "step": 2335, + "success_rate.epoch.env.abd": 0.9859154929577465, + "success_rate.epoch.env.agentgym:alfworld": 0.8769230769230769, + "success_rate.epoch.env.agentgym:sciworld": 0.9605263157894737, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9515418502202643, + "success_rate.epoch.env.logic": 0.9068150208623088, + "success_rate.epoch.env.math": 0.955487336914812, + "success_rate.epoch.env.sat": 0.10344827586206896, + "success_rate.epoch.env.science": 0.7734072022160665, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8623897085425493, + "success_rate.epoch.global": 0.8719708342268926, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.5, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9941007653061225, + "tokens_p.mean_in_band": 0.662890625, + "tokens_rate.above_band": 0.9074074074074074, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.09259259259259259 + }, + { + "epoch": 0.49850873455475075, + "grad_norm": 151.45533500262073, + "learning_rate": 3.9777430607800157e-07, + "loss": 0.2186, + "step": 2340, + "success_rate.epoch.env.abd": 0.985981308411215, + "success_rate.epoch.env.agentgym:alfworld": 0.8778625954198473, + "success_rate.epoch.env.agentgym:sciworld": 0.9605263157894737, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9515418502202643, + "success_rate.epoch.env.logic": 0.907202216066482, + "success_rate.epoch.env.math": 0.9555555555555556, + "success_rate.epoch.env.sat": 0.10344827586206896, + "success_rate.epoch.env.science": 0.7737831858407079, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.862556684126167, + "success_rate.epoch.global": 0.8722448106141665, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9982394366197183, + "tokens_p.mean_in_band": 0.8138020833333334, + "tokens_rate.above_band": 0.9916201117318436, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.008379888268156424 + }, + { + "epoch": 0.4995739241585002, + "grad_norm": 182.9995792946016, + "learning_rate": 3.9776306042441874e-07, + "loss": 0.3115, + "step": 2345, + "success_rate.epoch.env.abd": 0.985981308411215, + "success_rate.epoch.env.agentgym:alfworld": 0.8778625954198473, + "success_rate.epoch.env.agentgym:sciworld": 0.9605263157894737, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9515418502202643, + "success_rate.epoch.env.logic": 0.9075862068965517, + "success_rate.epoch.env.math": 0.9556235654169855, + "success_rate.epoch.env.sat": 0.10344827586206896, + "success_rate.epoch.env.science": 0.7744070601213459, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.862654490941816, + "success_rate.epoch.global": 0.8725176169122357, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9958584337349398, + "tokens_p.mean_in_band": 0.5859375, + "tokens_rate.above_band": 0.9764705882352941, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.023529411764705882 + }, + { + "epoch": 0.5006391137622497, + "grad_norm": 54.47830334472629, + "learning_rate": 3.977517872493889e-07, + "loss": 0.3368, + "step": 2350, + "success_rate.epoch.env.abd": 0.985981308411215, + "success_rate.epoch.env.agentgym:alfworld": 0.8778625954198473, + "success_rate.epoch.env.agentgym:sciworld": 0.9605263157894737, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9515418502202643, + "success_rate.epoch.env.logic": 0.90646492434663, + "success_rate.epoch.env.math": 0.9557589626239512, + "success_rate.epoch.env.sat": 0.10344827586206896, + "success_rate.epoch.env.science": 0.7749036873968079, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8626100129356801, + "success_rate.epoch.global": 0.8725761772853186, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8333333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9957264957264957, + "tokens_p.mean_in_band": 0.6669921875, + "tokens_rate.above_band": 0.936, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.064 + }, + { + "epoch": 0.5017043033659991, + "grad_norm": 32.118313219529966, + "learning_rate": 3.9774048656112775e-07, + "loss": 0.2346, + "step": 2355, + "success_rate.epoch.env.abd": 0.985981308411215, + "success_rate.epoch.env.agentgym:alfworld": 0.8778625954198473, + "success_rate.epoch.env.agentgym:sciworld": 0.9607843137254902, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9515418502202643, + "success_rate.epoch.env.logic": 0.9069767441860465, + "success_rate.epoch.env.math": 0.9557926829268293, + "success_rate.epoch.env.sat": 0.10344827586206896, + "success_rate.epoch.env.science": 0.7748489840746843, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8626780888226062, + "success_rate.epoch.global": 0.8726344886242824, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9375, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9942196531791907, + "tokens_p.mean_in_band": 0.5569196428571429, + "tokens_rate.above_band": 0.9251336898395722, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0748663101604278 + }, + { + "epoch": 0.5027694929697486, + "grad_norm": 458.57098464456413, + "learning_rate": 3.977291583678712e-07, + "loss": 0.333, + "step": 2360, + "success_rate.epoch.env.abd": 0.985981308411215, + "success_rate.epoch.env.agentgym:alfworld": 0.8778625954198473, + "success_rate.epoch.env.agentgym:sciworld": 0.9607843137254902, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9515418502202643, + "success_rate.epoch.env.logic": 0.9072305593451568, + "success_rate.epoch.env.math": 0.9558599695585996, + "success_rate.epoch.env.sat": 0.10344827586206896, + "success_rate.epoch.env.science": 0.7750410509031199, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8627247405152715, + "success_rate.epoch.global": 0.8726925525143221, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env_macro_mean": 0.9444444444444445, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9903273809523809, + "tokens_p.mean_in_band": 0.5559895833333334, + "tokens_rate.above_band": 0.9333333333333333, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.06666666666666667 + }, + { + "epoch": 0.5038346825734981, + "grad_norm": 56.32929499081233, + "learning_rate": 3.977178026778752e-07, + "loss": 0.3516, + "step": 2365, + "success_rate.epoch.env.abd": 0.985981308411215, + "success_rate.epoch.env.agentgym:alfworld": 0.8787878787878788, + "success_rate.epoch.env.agentgym:sciworld": 0.9612903225806452, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9515418502202643, + "success_rate.epoch.env.logic": 0.9061224489795918, + "success_rate.epoch.env.math": 0.9559270516717325, + "success_rate.epoch.env.sat": 0.13333333333333333, + "success_rate.epoch.env.science": 0.7752870420995079, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8654994053005809, + "success_rate.epoch.global": 0.8727503705272073, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9977886405959032, + "tokens_p.mean_in_band": 0.716015625, + "tokens_rate.above_band": 0.9817184643510055, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.018281535648994516 + }, + { + "epoch": 0.5048998721772475, + "grad_norm": 93.0944463656511, + "learning_rate": 3.977064194994156e-07, + "loss": 0.2611, + "step": 2370, + "success_rate.epoch.env.abd": 0.985981308411215, + "success_rate.epoch.env.agentgym:alfworld": 0.8796992481203008, + "success_rate.epoch.env.agentgym:sciworld": 0.9615384615384616, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9515418502202643, + "success_rate.epoch.env.logic": 0.9061224489795918, + "success_rate.epoch.env.math": 0.9561270801815431, + "success_rate.epoch.env.sat": 0.13333333333333333, + "success_rate.epoch.env.science": 0.7755324959038776, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8656453135373464, + "success_rate.epoch.global": 0.8730192267061061, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9981566011235955, + "tokens_p.mean_in_band": 0.869140625, + "tokens_rate.above_band": 0.994413407821229, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.00558659217877095 + }, + { + "epoch": 0.505965061780997, + "grad_norm": 0.0, + "learning_rate": 3.976950088407885e-07, + "loss": 0.4418, + "step": 2375, + "success_rate.epoch.env.abd": 0.985981308411215, + "success_rate.epoch.env.agentgym:alfworld": 0.8796992481203008, + "success_rate.epoch.env.agentgym:sciworld": 0.9615384615384616, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9517543859649122, + "success_rate.epoch.env.logic": 0.9061224489795918, + "success_rate.epoch.env.math": 0.9563581640331076, + "success_rate.epoch.env.sat": 0.13333333333333333, + "success_rate.epoch.env.science": 0.7757774140752864, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8657079078798572, + "success_rate.epoch.global": 0.8732869491882774, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 1.000475888324873, + "tokens_p.mean_in_band": 0.796875, + "tokens_rate.above_band": 0.9987325728770595, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0012674271229404308 + }, + { + "epoch": 0.5070302513847464, + "grad_norm": 512.0783073177407, + "learning_rate": 3.9768357071030974e-07, + "loss": 0.2407, + "step": 2380, + "success_rate.epoch.env.abd": 0.985981308411215, + "success_rate.epoch.env.agentgym:alfworld": 0.8796992481203008, + "success_rate.epoch.env.agentgym:sciworld": 0.9615384615384616, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9521739130434783, + "success_rate.epoch.env.logic": 0.9063772048846676, + "success_rate.epoch.env.math": 0.9564237415477085, + "success_rate.epoch.env.sat": 0.13333333333333333, + "success_rate.epoch.env.science": 0.7762656505171476, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8658195530562303, + "success_rate.epoch.global": 0.873553545129392, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.999405705229794, + "tokens_p.mean_in_band": 0.76171875, + "tokens_rate.above_band": 0.9968404423380727, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.00315955766192733 + }, + { + "epoch": 0.5080954409884959, + "grad_norm": 76.92824746945375, + "learning_rate": 3.976721051163155e-07, + "loss": 0.2361, + "step": 2385, + "success_rate.epoch.env.abd": 0.985981308411215, + "success_rate.epoch.env.agentgym:alfworld": 0.8796992481203008, + "success_rate.epoch.env.agentgym:sciworld": 0.9617834394904459, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9523809523809523, + "success_rate.epoch.env.logic": 0.9065040650406504, + "success_rate.epoch.env.math": 0.9565217391304348, + "success_rate.epoch.env.sat": 0.13333333333333333, + "success_rate.epoch.env.science": 0.7767517653449212, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8659252794067704, + "success_rate.epoch.global": 0.8738190216250262, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9984697164948454, + "tokens_p.mean_in_band": 0.865234375, + "tokens_rate.above_band": 0.9948717948717949, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.005128205128205128 + }, + { + "epoch": 0.5091606305922454, + "grad_norm": 121.21388088142787, + "learning_rate": 3.976606120671618e-07, + "loss": 0.3901, + "step": 2390, + "success_rate.epoch.env.abd": 0.986046511627907, + "success_rate.epoch.env.agentgym:alfworld": 0.8796992481203008, + "success_rate.epoch.env.agentgym:sciworld": 0.9620253164556962, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9523809523809523, + "success_rate.epoch.env.logic": 0.9065040650406504, + "success_rate.epoch.env.math": 0.9565868263473054, + "success_rate.epoch.env.sat": 0.13333333333333333, + "success_rate.epoch.env.science": 0.7758527341635084, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8658773826992614, + "success_rate.epoch.global": 0.8734548501990362, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9967873831775701, + "tokens_p.mean_in_band": 0.34765625, + "tokens_rate.above_band": 0.8699186991869918, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.13008130081300814 + }, + { + "epoch": 0.5102258201959949, + "grad_norm": 67.57421037977869, + "learning_rate": 3.976490915712247e-07, + "loss": 0.2638, + "step": 2395, + "success_rate.epoch.env.abd": 0.986046511627907, + "success_rate.epoch.env.agentgym:alfworld": 0.8796992481203008, + "success_rate.epoch.env.agentgym:sciworld": 0.9620253164556962, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9525862068965517, + "success_rate.epoch.env.logic": 0.9065040650406504, + "success_rate.epoch.env.math": 0.9567486950037286, + "success_rate.epoch.env.sat": 0.13333333333333333, + "success_rate.epoch.env.science": 0.775796866558617, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8659056786599097, + "success_rate.epoch.global": 0.8735103491532511, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 1.000289054470709, + "tokens_p.mean_in_band": 0.4290364583333333, + "tokens_rate.above_band": 0.9878172588832488, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.012182741116751269 + }, + { + "epoch": 0.5112910097997444, + "grad_norm": 56.527684669061045, + "learning_rate": 3.976375436369001e-07, + "loss": 0.2849, + "step": 2400, + "success_rate.epoch.env.abd": 0.9861751152073732, + "success_rate.epoch.env.agentgym:alfworld": 0.8814814814814815, + "success_rate.epoch.env.agentgym:sciworld": 0.9620253164556962, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9525862068965517, + "success_rate.epoch.env.logic": 0.9065040650406504, + "success_rate.epoch.env.math": 0.9569093610698366, + "success_rate.epoch.env.sat": 0.13333333333333333, + "success_rate.epoch.env.science": 0.775377969762419, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8660559155881422, + "success_rate.epoch.global": 0.8735656165240976, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.0, + "success_rate.window.env_macro_mean": 0.75, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9992559523809523, + "tokens_p.mean_in_band": 0.6734375, + "tokens_rate.above_band": 0.9820143884892086, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.017985611510791366 + }, + { + "epoch": 0.5123561994034939, + "grad_norm": 66.35024469312371, + "learning_rate": 3.9762596827260425e-07, + "loss": 0.3837, + "step": 2405, + "success_rate.epoch.env.abd": 0.9861751152073732, + "success_rate.epoch.env.agentgym:alfworld": 0.8814814814814815, + "success_rate.epoch.env.agentgym:sciworld": 0.9622641509433962, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9525862068965517, + "success_rate.epoch.env.logic": 0.9066305818673883, + "success_rate.epoch.env.math": 0.9570051890289103, + "success_rate.epoch.env.sat": 0.13333333333333333, + "success_rate.epoch.env.science": 0.7759827679052235, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.866152822625989, + "success_rate.epoch.global": 0.8738288569643973, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9979416167664671, + "tokens_p.mean_in_band": 0.8177083333333334, + "tokens_rate.above_band": 0.9653179190751445, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03468208092485549 + }, + { + "epoch": 0.5134213890072433, + "grad_norm": 47.59831070079333, + "learning_rate": 3.976143654867731e-07, + "loss": 0.3966, + "step": 2410, + "success_rate.epoch.env.abd": 0.9862385321100917, + "success_rate.epoch.env.agentgym:alfworld": 0.8814814814814815, + "success_rate.epoch.env.agentgym:sciworld": 0.9622641509433962, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9527896995708155, + "success_rate.epoch.env.logic": 0.9067567567567567, + "success_rate.epoch.env.math": 0.9571005917159763, + "success_rate.epoch.env.sat": 0.13333333333333333, + "success_rate.epoch.env.science": 0.7759269210102095, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8661921535585712, + "success_rate.epoch.global": 0.8738832329108664, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9959439528023599, + "tokens_p.mean_in_band": 0.596875, + "tokens_rate.above_band": 0.9854651162790697, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.014534883720930232 + }, + { + "epoch": 0.5144865786109928, + "grad_norm": 130.50296199173724, + "learning_rate": 3.976027352878627e-07, + "loss": 0.3629, + "step": 2415, + "success_rate.epoch.env.abd": 0.9862385321100917, + "success_rate.epoch.env.agentgym:alfworld": 0.8823529411764706, + "success_rate.epoch.env.agentgym:sciworld": 0.9622641509433962, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9529914529914529, + "success_rate.epoch.env.logic": 0.9068825910931174, + "success_rate.epoch.env.math": 0.9571639586410635, + "success_rate.epoch.env.sat": 0.13333333333333333, + "success_rate.epoch.env.science": 0.7765273311897106, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8663615012455327, + "success_rate.epoch.global": 0.874144723201327, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9980936004784688, + "tokens_p.mean_in_band": 0.88671875, + "tokens_rate.above_band": 0.9964243146603099, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.003575685339690107 + }, + { + "epoch": 0.5155517682147422, + "grad_norm": 96.31074249888358, + "learning_rate": 3.97591077684349e-07, + "loss": 0.5346, + "step": 2420, + "success_rate.epoch.env.abd": 0.9817351598173516, + "success_rate.epoch.env.agentgym:alfworld": 0.8823529411764706, + "success_rate.epoch.env.agentgym:sciworld": 0.9625, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9533898305084746, + "success_rate.epoch.env.logic": 0.9068825910931174, + "success_rate.epoch.env.math": 0.9571955719557196, + "success_rate.epoch.env.sat": 0.13333333333333333, + "success_rate.epoch.env.science": 0.7771245323356494, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8660669256765762, + "success_rate.epoch.global": 0.8741982205669356, + "success_rate.window.env.abd": 0.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9956168831168831, + "tokens_p.mean_in_band": 0.6621621621621622, + "tokens_rate.above_band": 0.9397884458909682, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.060211554109031735 + }, + { + "epoch": 0.5166169578184917, + "grad_norm": 72.3921305602384, + "learning_rate": 3.9757939268472805e-07, + "loss": 0.4327, + "step": 2425, + "success_rate.epoch.env.abd": 0.9817351598173516, + "success_rate.epoch.env.agentgym:alfworld": 0.8823529411764706, + "success_rate.epoch.env.agentgym:sciworld": 0.9625, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9533898305084746, + "success_rate.epoch.env.logic": 0.9070080862533693, + "success_rate.epoch.env.math": 0.9572901325478645, + "success_rate.epoch.env.sat": 0.13333333333333333, + "success_rate.epoch.env.science": 0.7767714437932871, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8660548317865794, + "success_rate.epoch.global": 0.874045013421433, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.8888888888888888, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9925, + "tokens_p.mean_in_band": 0.5870535714285714, + "tokens_rate.above_band": 0.9146341463414634, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.08536585365853659 + }, + { + "epoch": 0.5176821474222412, + "grad_norm": 72.95999639463913, + "learning_rate": 3.9756768029751575e-07, + "loss": 0.3234, + "step": 2430, + "success_rate.epoch.env.abd": 0.9817351598173516, + "success_rate.epoch.env.agentgym:alfworld": 0.8823529411764706, + "success_rate.epoch.env.agentgym:sciworld": 0.9627329192546584, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9533898305084746, + "success_rate.epoch.env.logic": 0.9071332436069987, + "success_rate.epoch.env.math": 0.9573842762674504, + "success_rate.epoch.env.sat": 0.13333333333333333, + "success_rate.epoch.env.science": 0.7768331562167906, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8661015529457955, + "success_rate.epoch.global": 0.8740984957758088, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9936079545454546, + "tokens_p.mean_in_band": 0.6223958333333334, + "tokens_rate.above_band": 0.9513513513513514, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.04864864864864865 + }, + { + "epoch": 0.5187473370259906, + "grad_norm": 98.629512441129, + "learning_rate": 3.97555940531248e-07, + "loss": 0.5392, + "step": 2435, + "success_rate.epoch.env.abd": 0.9818181818181818, + "success_rate.epoch.env.agentgym:alfworld": 0.8832116788321168, + "success_rate.epoch.env.agentgym:sciworld": 0.9627329192546584, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9533898305084746, + "success_rate.epoch.env.logic": 0.907258064516129, + "success_rate.epoch.env.math": 0.9567765567765568, + "success_rate.epoch.env.sat": 0.13333333333333333, + "success_rate.epoch.env.science": 0.776657824933687, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8661273283813963, + "success_rate.epoch.global": 0.873946123791898, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.75, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.8833333333333332, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9984567901234568, + "tokens_p.mean_in_band": 0.3914930555555556, + "tokens_rate.above_band": 0.9574468085106383, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0425531914893617 + }, + { + "epoch": 0.5198125266297401, + "grad_norm": 52.772482048741715, + "learning_rate": 3.975441733944807e-07, + "loss": 0.2619, + "step": 2440, + "success_rate.epoch.env.abd": 0.9819004524886877, + "success_rate.epoch.env.agentgym:alfworld": 0.8840579710144928, + "success_rate.epoch.env.agentgym:sciworld": 0.9627329192546584, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9535864978902954, + "success_rate.epoch.env.logic": 0.9075067024128687, + "success_rate.epoch.env.math": 0.9567765567765568, + "success_rate.epoch.env.sat": 0.13333333333333333, + "success_rate.epoch.env.science": 0.7772486772486773, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.866305939331072, + "success_rate.epoch.global": 0.874204801970039, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9996675531914894, + "tokens_p.mean_in_band": 0.869140625, + "tokens_rate.above_band": 0.9983660130718954, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0016339869281045752 + }, + { + "epoch": 0.5208777162334896, + "grad_norm": 241.05003876711066, + "learning_rate": 3.9753237889578963e-07, + "loss": 0.2334, + "step": 2445, + "success_rate.epoch.env.abd": 0.9819004524886877, + "success_rate.epoch.env.agentgym:alfworld": 0.8848920863309353, + "success_rate.epoch.env.agentgym:sciworld": 0.9627329192546584, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9535864978902954, + "success_rate.epoch.env.logic": 0.9075067024128687, + "success_rate.epoch.env.math": 0.9569657184536834, + "success_rate.epoch.env.sat": 0.13333333333333333, + "success_rate.epoch.env.science": 0.7776016904384575, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8664310566204674, + "success_rate.epoch.global": 0.8744624206430474, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.997890625, + "tokens_p.mean_in_band": 0.68359375, + "tokens_rate.above_band": 0.9950248756218906, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.004975124378109453 + }, + { + "epoch": 0.521942905837239, + "grad_norm": 97.15092585275974, + "learning_rate": 3.9752055704377057e-07, + "loss": 0.3471, + "step": 2450, + "success_rate.epoch.env.abd": 0.9819004524886877, + "success_rate.epoch.env.agentgym:alfworld": 0.8857142857142857, + "success_rate.epoch.env.agentgym:sciworld": 0.9627329192546584, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9535864978902954, + "success_rate.epoch.env.logic": 0.9078771695594126, + "success_rate.epoch.env.math": 0.9570284049526584, + "success_rate.epoch.env.sat": 0.13333333333333333, + "success_rate.epoch.env.science": 0.7775434897206115, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8665398886487423, + "success_rate.epoch.global": 0.8745146127120376, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9375, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9988957597173145, + "tokens_p.mean_in_band": 0.5485026041666666, + "tokens_rate.above_band": 0.9792387543252595, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.020761245674740483 + }, + { + "epoch": 0.5230080954409885, + "grad_norm": 53.052851937983725, + "learning_rate": 3.9750870784703913e-07, + "loss": 0.4486, + "step": 2455, + "success_rate.epoch.env.abd": 0.9819819819819819, + "success_rate.epoch.env.agentgym:alfworld": 0.8857142857142857, + "success_rate.epoch.env.agentgym:sciworld": 0.9627329192546584, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9537815126050421, + "success_rate.epoch.env.logic": 0.9081225033288948, + "success_rate.epoch.env.math": 0.9571220930232558, + "success_rate.epoch.env.sat": 0.13333333333333333, + "success_rate.epoch.env.science": 0.7778947368421053, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8666277807550707, + "success_rate.epoch.global": 0.8747705486436875, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.998481308411215, + "tokens_p.mean_in_band": 0.62890625, + "tokens_rate.above_band": 0.9962756052141527, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0037243947858473 + }, + { + "epoch": 0.5240732850447379, + "grad_norm": 87.1427053362088, + "learning_rate": 3.9749683131423096e-07, + "loss": 0.2645, + "step": 2460, + "success_rate.epoch.env.abd": 0.9820627802690582, + "success_rate.epoch.env.agentgym:alfworld": 0.8865248226950354, + "success_rate.epoch.env.agentgym:sciworld": 0.9627329192546584, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9537815126050421, + "success_rate.epoch.env.logic": 0.9081225033288948, + "success_rate.epoch.env.math": 0.9572463768115942, + "success_rate.epoch.env.sat": 0.13333333333333333, + "success_rate.epoch.env.science": 0.7783613445378151, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8667625286416047, + "success_rate.epoch.global": 0.8750254427030327, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9960462287104623, + "tokens_p.mean_in_band": 0.7534722222222222, + "tokens_rate.above_band": 0.9785714285714285, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02142857142857143 + }, + { + "epoch": 0.5251384746484874, + "grad_norm": 192.9471286753687, + "learning_rate": 3.974849274540016e-07, + "loss": 0.3167, + "step": 2465, + "success_rate.epoch.env.abd": 0.9820627802690582, + "success_rate.epoch.env.agentgym:alfworld": 0.8865248226950354, + "success_rate.epoch.env.agentgym:sciworld": 0.9629629629629629, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9539748953974896, + "success_rate.epoch.env.logic": 0.9082446808510638, + "success_rate.epoch.env.math": 0.9566787003610109, + "success_rate.epoch.env.sat": 0.13333333333333333, + "success_rate.epoch.env.science": 0.7785939139559287, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8667816647316458, + "success_rate.epoch.global": 0.8750761730652041, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.8, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.96, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9977370689655173, + "tokens_p.mean_in_band": 0.5121527777777778, + "tokens_rate.above_band": 0.9698996655518395, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.030100334448160536 + }, + { + "epoch": 0.5262036642522369, + "grad_norm": 137.28817544581148, + "learning_rate": 3.974729962750264e-07, + "loss": 0.3777, + "step": 2470, + "success_rate.epoch.env.abd": 0.9820627802690582, + "success_rate.epoch.env.agentgym:alfworld": 0.8865248226950354, + "success_rate.epoch.env.agentgym:sciworld": 0.9631901840490797, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9539748953974896, + "success_rate.epoch.env.logic": 0.9082446808510638, + "success_rate.epoch.env.math": 0.9567723342939481, + "success_rate.epoch.env.sat": 0.13333333333333333, + "success_rate.epoch.env.science": 0.7782426778242678, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8667789028123181, + "success_rate.epoch.global": 0.8749239813500912, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.8888888888888888, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9939144736842105, + "tokens_p.mean_in_band": 0.6219308035714286, + "tokens_rate.above_band": 0.9313725490196079, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.06862745098039216 + }, + { + "epoch": 0.5272688538559863, + "grad_norm": 72.98504092505685, + "learning_rate": 3.974610377860009e-07, + "loss": 0.3346, + "step": 2475, + "success_rate.epoch.env.abd": 0.9821428571428571, + "success_rate.epoch.env.agentgym:alfworld": 0.8865248226950354, + "success_rate.epoch.env.agentgym:sciworld": 0.9631901840490797, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9539748953974896, + "success_rate.epoch.env.logic": 0.9084880636604774, + "success_rate.epoch.env.math": 0.9568965517241379, + "success_rate.epoch.env.sat": 0.13333333333333333, + "success_rate.epoch.env.science": 0.7785900783289817, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.866851182595783, + "success_rate.epoch.global": 0.8751770180052599, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9980053191489362, + "tokens_p.mean_in_band": 0.857421875, + "tokens_rate.above_band": 0.9791666666666666, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.020833333333333332 + }, + { + "epoch": 0.5283340434597358, + "grad_norm": 87.94830620386523, + "learning_rate": 3.9744905199564027e-07, + "loss": 0.501, + "step": 2480, + "success_rate.epoch.env.abd": 0.9821428571428571, + "success_rate.epoch.env.agentgym:alfworld": 0.8865248226950354, + "success_rate.epoch.env.agentgym:sciworld": 0.9631901840490797, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9539748953974896, + "success_rate.epoch.env.logic": 0.9088507265521797, + "success_rate.epoch.env.math": 0.956989247311828, + "success_rate.epoch.env.sat": 0.13333333333333333, + "success_rate.epoch.env.science": 0.7790515893694633, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8669345343703171, + "success_rate.epoch.global": 0.8754290329093478, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9932553956834532, + "tokens_p.mean_in_band": 0.7578125, + "tokens_rate.above_band": 0.9586206896551724, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.041379310344827586 + }, + { + "epoch": 0.5293992330634854, + "grad_norm": 26.892422576680357, + "learning_rate": 3.974370389126796e-07, + "loss": 0.2039, + "step": 2485, + "success_rate.epoch.env.abd": 0.9821428571428571, + "success_rate.epoch.env.agentgym:alfworld": 0.8865248226950354, + "success_rate.epoch.env.agentgym:sciworld": 0.9631901840490797, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9539748953974896, + "success_rate.epoch.env.logic": 0.9090909090909091, + "success_rate.epoch.env.math": 0.9570815450643777, + "success_rate.epoch.env.sat": 0.13333333333333333, + "success_rate.epoch.env.science": 0.7796257796257796, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.867016958965553, + "success_rate.epoch.global": 0.8756800322385654, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9965277777777778, + "tokens_p.mean_in_band": 0.8359375, + "tokens_rate.above_band": 0.984375, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.015625 + }, + { + "epoch": 0.5304644226672348, + "grad_norm": 226.98644075811197, + "learning_rate": 3.974249985458741e-07, + "loss": 0.3152, + "step": 2490, + "success_rate.epoch.env.abd": 0.9821428571428571, + "success_rate.epoch.env.agentgym:alfworld": 0.8873239436619719, + "success_rate.epoch.env.agentgym:sciworld": 0.9634146341463414, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9539748953974896, + "success_rate.epoch.env.logic": 0.9092105263157895, + "success_rate.epoch.env.math": 0.9565217391304348, + "success_rate.epoch.env.sat": 0.13333333333333333, + "success_rate.epoch.env.science": 0.7793354101765316, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.867043596502452, + "success_rate.epoch.global": 0.8755278503921174, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.8, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.86, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9992236024844721, + "tokens_p.mean_in_band": 0.5262784090909091, + "tokens_rate.above_band": 0.9777327935222672, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.022267206477732792 + }, + { + "epoch": 0.5315296122709843, + "grad_norm": 99.26347115567036, + "learning_rate": 3.974129309039985e-07, + "loss": 0.3905, + "step": 2495, + "success_rate.epoch.env.abd": 0.9821428571428571, + "success_rate.epoch.env.agentgym:alfworld": 0.8881118881118881, + "success_rate.epoch.env.agentgym:sciworld": 0.9634146341463414, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9541666666666667, + "success_rate.epoch.env.logic": 0.9093298291721419, + "success_rate.epoch.env.math": 0.9566453447050463, + "success_rate.epoch.env.sat": 0.13333333333333333, + "success_rate.epoch.env.science": 0.7795643153526971, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8671755537139266, + "success_rate.epoch.global": 0.8757776439895645, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9997981266149871, + "tokens_p.mean_in_band": 0.86328125, + "tokens_rate.above_band": 0.9987096774193548, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0012903225806451613 + }, + { + "epoch": 0.5325948018747337, + "grad_norm": 146.99056460450822, + "learning_rate": 3.974008359958477e-07, + "loss": 0.4452, + "step": 2500, + "success_rate.epoch.env.abd": 0.9823008849557522, + "success_rate.epoch.env.agentgym:alfworld": 0.8881118881118881, + "success_rate.epoch.env.agentgym:sciworld": 0.9636363636363636, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9543568464730291, + "success_rate.epoch.env.logic": 0.9081364829396326, + "success_rate.epoch.env.math": 0.9566761363636364, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.7793889176592439, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8667147273114804, + "success_rate.epoch.global": 0.8754255958341678, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.6666666666666666, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9984142945544554, + "tokens_p.mean_in_band": 0.5317042151162791, + "tokens_rate.above_band": 0.9494712103407755, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.05052878965922444 + }, + { + "epoch": 0.5336599914784832, + "grad_norm": 324.8467036600824, + "learning_rate": 3.973887138302364e-07, + "loss": 0.446, + "step": 2505, + "success_rate.epoch.env.abd": 0.9823788546255506, + "success_rate.epoch.env.agentgym:alfworld": 0.8881118881118881, + "success_rate.epoch.env.agentgym:sciworld": 0.9636363636363636, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9543568464730291, + "success_rate.epoch.env.logic": 0.9081364829396326, + "success_rate.epoch.env.math": 0.9567375886524823, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.7801857585139319, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8667998421126925, + "success_rate.epoch.global": 0.8756745952428543, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.994400289017341, + "tokens_p.mean_in_band": 0.68310546875, + "tokens_rate.above_band": 0.9774011299435028, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.022598870056497175 + }, + { + "epoch": 0.5347251810822327, + "grad_norm": 140.48786848758652, + "learning_rate": 3.9737656441599927e-07, + "loss": 0.3785, + "step": 2510, + "success_rate.epoch.env.abd": 0.9823788546255506, + "success_rate.epoch.env.agentgym:alfworld": 0.8881118881118881, + "success_rate.epoch.env.agentgym:sciworld": 0.963855421686747, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9543568464730291, + "success_rate.epoch.env.logic": 0.908256880733945, + "success_rate.epoch.env.math": 0.9568904593639576, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.7805255023183926, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8668754848727499, + "success_rate.epoch.global": 0.8759226012367843, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9970034246575342, + "tokens_p.mean_in_band": 0.802734375, + "tokens_rate.above_band": 0.9864864864864865, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.013513513513513514 + }, + { + "epoch": 0.5357903706859821, + "grad_norm": 288.53355924210047, + "learning_rate": 3.9736438776199045e-07, + "loss": 0.3281, + "step": 2515, + "success_rate.epoch.env.abd": 0.9823788546255506, + "success_rate.epoch.env.agentgym:alfworld": 0.8888888888888888, + "success_rate.epoch.env.agentgym:sciworld": 0.9640718562874252, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9543568464730291, + "success_rate.epoch.env.logic": 0.9070680628272252, + "success_rate.epoch.env.math": 0.9569209039548022, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.7810894141829393, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8669117552296908, + "success_rate.epoch.global": 0.8759705355365319, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8333333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9972546095444685, + "tokens_p.mean_in_band": 0.5251116071428571, + "tokens_rate.above_band": 0.9634273772204807, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03657262277951933 + }, + { + "epoch": 0.5368555602897316, + "grad_norm": 37.51261923523871, + "learning_rate": 3.9735218387708443e-07, + "loss": 0.1555, + "step": 2520, + "success_rate.epoch.env.abd": 0.9823788546255506, + "success_rate.epoch.env.agentgym:alfworld": 0.8888888888888888, + "success_rate.epoch.env.agentgym:sciworld": 0.9640718562874252, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9543568464730291, + "success_rate.epoch.env.logic": 0.9071895424836601, + "success_rate.epoch.env.math": 0.9569513055751588, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.7817622950819673, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8669867336093108, + "success_rate.epoch.global": 0.8762169680111266, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9973695286195287, + "tokens_p.mean_in_band": 0.890625, + "tokens_rate.above_band": 0.9983193277310924, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0016806722689075631 + }, + { + "epoch": 0.537920749893481, + "grad_norm": 94.49331500117329, + "learning_rate": 3.9733995277017516e-07, + "loss": 0.2636, + "step": 2525, + "success_rate.epoch.env.abd": 0.9823788546255506, + "success_rate.epoch.env.agentgym:alfworld": 0.8888888888888888, + "success_rate.epoch.env.agentgym:sciworld": 0.9640718562874252, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9545454545454546, + "success_rate.epoch.env.logic": 0.9075520833333334, + "success_rate.epoch.env.math": 0.9570119802677942, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.7816973415132924, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8670364490680434, + "success_rate.epoch.global": 0.8762641284949435, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9375, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 1.0005857544517338, + "tokens_p.mean_in_band": 0.71630859375, + "tokens_rate.above_band": 0.9925581395348837, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0074418604651162795 + }, + { + "epoch": 0.5389859394972305, + "grad_norm": 731.3612810956826, + "learning_rate": 3.9732769445017665e-07, + "loss": 0.4249, + "step": 2530, + "success_rate.epoch.env.abd": 0.9823788546255506, + "success_rate.epoch.env.agentgym:alfworld": 0.8888888888888888, + "success_rate.epoch.env.agentgym:sciworld": 0.9640718562874252, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9545454545454546, + "success_rate.epoch.env.logic": 0.9076723016905072, + "success_rate.epoch.env.math": 0.9571026722925458, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.7817440081591025, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8670598651614739, + "success_rate.epoch.global": 0.8763111023154562, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9957264957264957, + "tokens_p.mean_in_band": 0.40234375, + "tokens_rate.above_band": 0.9915254237288136, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.00847457627118644 + }, + { + "epoch": 0.54005112910098, + "grad_norm": 64.85605221932524, + "learning_rate": 3.973154089260227e-07, + "loss": 0.3177, + "step": 2535, + "success_rate.epoch.env.abd": 0.9823788546255506, + "success_rate.epoch.env.agentgym:alfworld": 0.8896551724137931, + "success_rate.epoch.env.agentgym:sciworld": 0.9640718562874252, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9547325102880658, + "success_rate.epoch.env.logic": 0.9064935064935065, + "success_rate.epoch.env.math": 0.9571629213483146, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.7817904374364191, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8670490671981649, + "success_rate.epoch.global": 0.8761603792218052, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.76, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9978742732558139, + "tokens_p.mean_in_band": 0.4457236842105263, + "tokens_rate.above_band": 0.9783845278725825, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02161547212741752 + }, + { + "epoch": 0.5411163187047294, + "grad_norm": 337.10997695762836, + "learning_rate": 3.973030962066668e-07, + "loss": 0.4547, + "step": 2540, + "success_rate.epoch.env.abd": 0.9823788546255506, + "success_rate.epoch.env.agentgym:alfworld": 0.8904109589041096, + "success_rate.epoch.env.agentgym:sciworld": 0.9640718562874252, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9549180327868853, + "success_rate.epoch.env.logic": 0.9066147859922179, + "success_rate.epoch.env.math": 0.9572230014025246, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.7818366311516997, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8671553274033773, + "success_rate.epoch.global": 0.876207372363493, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.96, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9983595800524935, + "tokens_p.mean_in_band": 0.640625, + "tokens_rate.above_band": 0.9870466321243523, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.012953367875647668 + }, + { + "epoch": 0.5421815083084789, + "grad_norm": 126.4327754675605, + "learning_rate": 3.972907563010826e-07, + "loss": 0.3846, + "step": 2545, + "success_rate.epoch.env.abd": 0.9823788546255506, + "success_rate.epoch.env.agentgym:alfworld": 0.8904109589041096, + "success_rate.epoch.env.agentgym:sciworld": 0.9642857142857143, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9549180327868853, + "success_rate.epoch.env.logic": 0.906856403622251, + "success_rate.epoch.env.math": 0.9573128061581525, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.7817721518987342, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8671990365971033, + "success_rate.epoch.global": 0.8762541806020067, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9375, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9959084440227703, + "tokens_p.mean_in_band": 0.7135416666666666, + "tokens_rate.above_band": 0.9777365491651205, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.022263450834879406 + }, + { + "epoch": 0.5432466979122283, + "grad_norm": 135.44532687206348, + "learning_rate": 3.972783892182631e-07, + "loss": 0.2862, + "step": 2550, + "success_rate.epoch.env.abd": 0.9823788546255506, + "success_rate.epoch.env.agentgym:alfworld": 0.8904109589041096, + "success_rate.epoch.env.agentgym:sciworld": 0.9642857142857143, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9551020408163265, + "success_rate.epoch.env.logic": 0.9070967741935484, + "success_rate.epoch.env.math": 0.9574022346368715, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.7822132390096008, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.86728584515986, + "success_rate.epoch.global": 0.8764971529550363, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9984657622739018, + "tokens_p.mean_in_band": 0.6146918402777778, + "tokens_rate.above_band": 0.9772727272727273, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.022727272727272728 + }, + { + "epoch": 0.5443118875159778, + "grad_norm": 159.68119557055104, + "learning_rate": 3.972659949672214e-07, + "loss": 0.4017, + "step": 2555, + "success_rate.epoch.env.abd": 0.9823788546255506, + "success_rate.epoch.env.agentgym:alfworld": 0.8904109589041096, + "success_rate.epoch.env.agentgym:sciworld": 0.9642857142857143, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9551020408163265, + "success_rate.epoch.env.logic": 0.9073359073359073, + "success_rate.epoch.env.math": 0.9575208913649025, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.7821482602118003, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8673124643482772, + "success_rate.epoch.global": 0.8765432098765432, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9945652173913043, + "tokens_p.mean_in_band": 0.50703125, + "tokens_rate.above_band": 0.9019607843137255, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.09803921568627451 + }, + { + "epoch": 0.5453770771197273, + "grad_norm": 92.1967519557992, + "learning_rate": 3.972535735569904e-07, + "loss": 0.3303, + "step": 2560, + "success_rate.epoch.env.abd": 0.9823788546255506, + "success_rate.epoch.env.agentgym:alfworld": 0.891156462585034, + "success_rate.epoch.env.agentgym:sciworld": 0.9642857142857143, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9551020408163265, + "success_rate.epoch.env.logic": 0.9073359073359073, + "success_rate.epoch.env.math": 0.9576682859125607, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.782083543029693, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8673877535343205, + "success_rate.epoch.global": 0.8765890866418932, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9979281767955801, + "tokens_p.mean_in_band": 0.7309027777777778, + "tokens_rate.above_band": 0.9757412398921833, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02425876010781671 + }, + { + "epoch": 0.5464422667234767, + "grad_norm": 222.11892995636822, + "learning_rate": 3.972411249966227e-07, + "loss": 0.4238, + "step": 2565, + "success_rate.epoch.env.abd": 0.982532751091703, + "success_rate.epoch.env.agentgym:alfworld": 0.891156462585034, + "success_rate.epoch.env.agentgym:sciworld": 0.9642857142857143, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.951417004048583, + "success_rate.epoch.env.logic": 0.9062901155327343, + "success_rate.epoch.env.math": 0.9570637119113573, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.7821931589537223, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8669266726086896, + "success_rate.epoch.global": 0.8762443880538747, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 0.5, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 0.6666666666666666, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.7333333333333333, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9857220767888307, + "tokens_p.mean_in_band": 0.4992922957371226, + "tokens_rate.above_band": 0.7178651966925582, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.28213480330744173 + }, + { + "epoch": 0.5475074563272262, + "grad_norm": 91.53012504637596, + "learning_rate": 3.9722864929519076e-07, + "loss": 0.4132, + "step": 2570, + "success_rate.epoch.env.abd": 0.982532751091703, + "success_rate.epoch.env.agentgym:alfworld": 0.891156462585034, + "success_rate.epoch.env.agentgym:sciworld": 0.9644970414201184, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9518072289156626, + "success_rate.epoch.env.logic": 0.9062901155327343, + "success_rate.epoch.env.math": 0.9571230982019364, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.782237832413447, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8669908191315795, + "success_rate.epoch.global": 0.8762906682252094, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.990175585284281, + "tokens_p.mean_in_band": 0.8039434523809523, + "tokens_rate.above_band": 0.934375, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.065625 + }, + { + "epoch": 0.5485726459309758, + "grad_norm": 201.6586376209722, + "learning_rate": 3.972161464617867e-07, + "loss": 0.2345, + "step": 2575, + "success_rate.epoch.env.abd": 0.982532751091703, + "success_rate.epoch.env.agentgym:alfworld": 0.8940397350993378, + "success_rate.epoch.env.agentgym:sciworld": 0.9644970414201184, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9518072289156626, + "success_rate.epoch.env.logic": 0.9064102564102564, + "success_rate.epoch.env.math": 0.9571823204419889, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.782565130260521, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8672989949023934, + "success_rate.epoch.global": 0.8765312074664593, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9976480836236934, + "tokens_p.mean_in_band": 0.76171875, + "tokens_rate.above_band": 0.997913769123783, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.002086230876216968 + }, + { + "epoch": 0.5496378355347252, + "grad_norm": 71.10610993549085, + "learning_rate": 3.972036165055225e-07, + "loss": 0.5212, + "step": 2580, + "success_rate.epoch.env.abd": 0.9826086956521739, + "success_rate.epoch.env.agentgym:alfworld": 0.8940397350993378, + "success_rate.epoch.env.agentgym:sciworld": 0.9644970414201184, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9518072289156626, + "success_rate.epoch.env.logic": 0.9053708439897699, + "success_rate.epoch.env.math": 0.9572118702553485, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.7827172827172827, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.867227925303312, + "success_rate.epoch.global": 0.8763826896953231, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env_macro_mean": 0.8333333333333334, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9978725416036308, + "tokens_p.mean_in_band": 0.44344429347826086, + "tokens_rate.above_band": 0.966374269005848, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.033625730994152045 + }, + { + "epoch": 0.5507030251384747, + "grad_norm": 152.31945652356728, + "learning_rate": 3.9719105943553e-07, + "loss": 0.3216, + "step": 2585, + "success_rate.epoch.env.abd": 0.9826086956521739, + "success_rate.epoch.env.agentgym:alfworld": 0.8954248366013072, + "success_rate.epoch.env.agentgym:sciworld": 0.9647058823529412, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9518072289156626, + "success_rate.epoch.env.logic": 0.9054916985951469, + "success_rate.epoch.env.math": 0.9572708476912474, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.7831505483549352, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8674285653136503, + "success_rate.epoch.global": 0.8766221189231067, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9991203703703704, + "tokens_p.mean_in_band": 0.78125, + "tokens_rate.above_band": 0.995575221238938, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.004424778761061947 + }, + { + "epoch": 0.5517682147422242, + "grad_norm": 51.022859740643355, + "learning_rate": 3.971784752609607e-07, + "loss": 0.2812, + "step": 2590, + "success_rate.epoch.env.abd": 0.9826086956521739, + "success_rate.epoch.env.agentgym:alfworld": 0.8954248366013072, + "success_rate.epoch.env.agentgym:sciworld": 0.9647058823529412, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9518072289156626, + "success_rate.epoch.env.logic": 0.9057324840764331, + "success_rate.epoch.env.math": 0.9573883161512028, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.7835820895522388, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8675003648716999, + "success_rate.epoch.global": 0.8768606224627875, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9913366336633663, + "tokens_p.mean_in_band": 0.826171875, + "tokens_rate.above_band": 0.9805825242718447, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.019417475728155338 + }, + { + "epoch": 0.5528334043459736, + "grad_norm": 293.4973952286877, + "learning_rate": 3.971658639909857e-07, + "loss": 0.481, + "step": 2595, + "success_rate.epoch.env.abd": 0.9826839826839827, + "success_rate.epoch.env.agentgym:alfworld": 0.8954248366013072, + "success_rate.epoch.env.agentgym:sciworld": 0.9649122807017544, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9518072289156626, + "success_rate.epoch.env.logic": 0.905852417302799, + "success_rate.epoch.env.math": 0.9574759945130316, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.7835153922542205, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8675387830235906, + "success_rate.epoch.global": 0.8769052672197569, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9978849407783418, + "tokens_p.mean_in_band": 0.5120738636363636, + "tokens_rate.above_band": 0.9817275747508306, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.018272425249169437 + }, + { + "epoch": 0.5538985939497231, + "grad_norm": 71.21974049576679, + "learning_rate": 3.9715322563479617e-07, + "loss": 0.383, + "step": 2600, + "success_rate.epoch.env.abd": 0.9827586206896551, + "success_rate.epoch.env.agentgym:alfworld": 0.8954248366013072, + "success_rate.epoch.env.agentgym:sciworld": 0.9649122807017544, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9518072289156626, + "success_rate.epoch.env.logic": 0.9059720457433291, + "success_rate.epoch.env.math": 0.9576213260423787, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.7833415964303421, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8675538559464697, + "success_rate.epoch.global": 0.8769497400346621, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9927884615384616, + "tokens_p.mean_in_band": 0.5384114583333334, + "tokens_rate.above_band": 0.9381443298969072, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.061855670103092786 + }, + { + "epoch": 0.5549637835534725, + "grad_norm": 81.04741703187635, + "learning_rate": 3.971405602016028e-07, + "loss": 0.3355, + "step": 2605, + "success_rate.epoch.env.abd": 0.9827586206896551, + "success_rate.epoch.env.agentgym:alfworld": 0.8961038961038961, + "success_rate.epoch.env.agentgym:sciworld": 0.9649122807017544, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9518072289156626, + "success_rate.epoch.env.logic": 0.9064475347661188, + "success_rate.epoch.env.math": 0.9577368779822768, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.7834489593657086, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8676790798919828, + "success_rate.epoch.global": 0.8771862387084375, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9992038216560509, + "tokens_p.mean_in_band": 0.755859375, + "tokens_rate.above_band": 0.9936708860759493, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.006329113924050633 + }, + { + "epoch": 0.556028973157222, + "grad_norm": 100.81102008230842, + "learning_rate": 3.97127867700636e-07, + "loss": 0.3533, + "step": 2610, + "success_rate.epoch.env.abd": 0.9827586206896551, + "success_rate.epoch.env.agentgym:alfworld": 0.8961038961038961, + "success_rate.epoch.env.agentgym:sciworld": 0.9649122807017544, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9518072289156626, + "success_rate.epoch.env.logic": 0.906801007556675, + "success_rate.epoch.env.math": 0.9577656675749319, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.7821146245059288, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8675925278486584, + "success_rate.epoch.global": 0.8766545175522732, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.3333333333333333, + "success_rate.window.env_macro_mean": 0.7777777777777777, + "success_rate.window.global": 0.6, + "tokens_p.mean_above_band": 0.9960730088495575, + "tokens_p.mean_below_band": 1.6079866327345371e-09, + "tokens_p.mean_in_band": 0.4947350543478261, + "tokens_rate.above_band": 0.9592529711375212, + "tokens_rate.below_band": 0.001697792869269949, + "tokens_rate.in_band": 0.03904923599320883 + }, + { + "epoch": 0.5570941627609715, + "grad_norm": 191.96566746958672, + "learning_rate": 3.97115148141146e-07, + "loss": 0.3816, + "step": 2615, + "success_rate.epoch.env.abd": 0.9828326180257511, + "success_rate.epoch.env.agentgym:alfworld": 0.8961038961038961, + "success_rate.epoch.env.agentgym:sciworld": 0.9649122807017544, + "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.952191235059761, + "success_rate.epoch.env.logic": 0.9069182389937107, + "success_rate.epoch.env.math": 0.9571719918422842, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.782329713721619, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8676786550442059, + "success_rate.epoch.global": 0.8766992150105304, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.6666666666666666, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9444444444444443, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9988127074880118, + "tokens_p.mean_in_band": 0.7046875, + "tokens_rate.above_band": 0.998159057437408, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.001840942562592047 + }, + { + "epoch": 0.5581593523647209, + "grad_norm": 803.1966455888191, + "learning_rate": 3.9710240153240283e-07, + "loss": 0.2919, + "step": 2620, + "success_rate.epoch.env.abd": 0.9828326180257511, + "success_rate.epoch.env.agentgym:alfworld": 0.8961038961038961, + "success_rate.epoch.env.agentgym:sciworld": 0.9649122807017544, + "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, + "success_rate.epoch.env.babyai": 0.9642857142857143, + "success_rate.epoch.env.ded": 0.952191235059761, + "success_rate.epoch.env.logic": 0.9071518193224593, + "success_rate.epoch.env.math": 0.9572591587516961, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.7827586206896552, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.864500052179834, + "success_rate.epoch.global": 0.8767437416395949, + "success_rate.window.env.babyai": 0.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.75, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.995702005730659, + "tokens_p.mean_in_band": 0.5260416666666666, + "tokens_rate.above_band": 0.9914772727272727, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.008522727272727272 + }, + { + "epoch": 0.5592245419684704, + "grad_norm": 277.50867450949494, + "learning_rate": 3.97089627883696e-07, + "loss": 0.6429, + "step": 2625, + "success_rate.epoch.env.abd": 0.9828326180257511, + "success_rate.epoch.env.agentgym:alfworld": 0.8961038961038961, + "success_rate.epoch.env.agentgym:sciworld": 0.9649122807017544, + "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, + "success_rate.epoch.env.babyai": 0.9655172413793104, + "success_rate.epoch.env.ded": 0.9525691699604744, + "success_rate.epoch.env.logic": 0.9072681704260651, + "success_rate.epoch.env.math": 0.9573170731707317, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.7831858407079646, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8647010474103123, + "success_rate.epoch.global": 0.8769788289147434, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.998143115942029, + "tokens_p.mean_in_band": 0.87890625, + "tokens_rate.above_band": 0.999275887038378, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.000724112961622013 + }, + { + "epoch": 0.5602897315722198, + "grad_norm": 126.7794952636177, + "learning_rate": 3.9707682720433493e-07, + "loss": 0.2049, + "step": 2630, + "success_rate.epoch.env.abd": 0.9828326180257511, + "success_rate.epoch.env.agentgym:alfworld": 0.896774193548387, + "success_rate.epoch.env.agentgym:sciworld": 0.9649122807017544, + "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, + "success_rate.epoch.env.babyai": 0.9655172413793104, + "success_rate.epoch.env.ded": 0.9525691699604744, + "success_rate.epoch.env.logic": 0.9075, + "success_rate.epoch.env.math": 0.9574324324324325, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.783014236622484, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8647779457916438, + "success_rate.epoch.global": 0.8770226537216829, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9976293103448276, + "tokens_p.mean_in_band": 0.7550223214285714, + "tokens_rate.above_band": 0.9764309764309764, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02356902356902357 + }, + { + "epoch": 0.5613549211759693, + "grad_norm": 103.30703815180942, + "learning_rate": 3.9706399950364863e-07, + "loss": 0.2585, + "step": 2635, + "success_rate.epoch.env.abd": 0.9829787234042553, + "success_rate.epoch.env.agentgym:alfworld": 0.896774193548387, + "success_rate.epoch.env.agentgym:sciworld": 0.9649122807017544, + "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, + "success_rate.epoch.env.babyai": 0.9655172413793104, + "success_rate.epoch.env.ded": 0.952755905511811, + "success_rate.epoch.env.logic": 0.9075, + "success_rate.epoch.env.math": 0.9575471698113207, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.7833333333333333, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8648476435206963, + "success_rate.epoch.global": 0.8772563176895307, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.997299789251844, + "tokens_p.mean_in_band": 0.87890625, + "tokens_rate.above_band": 0.9989473684210526, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0010526315789473684 + }, + { + "epoch": 0.5624201107797188, + "grad_norm": 154.9652718105165, + "learning_rate": 3.9705114479098583e-07, + "loss": 0.2586, + "step": 2640, + "success_rate.epoch.env.abd": 0.9830508474576272, + "success_rate.epoch.env.agentgym:alfworld": 0.896774193548387, + "success_rate.epoch.env.agentgym:sciworld": 0.9649122807017544, + "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, + "success_rate.epoch.env.babyai": 0.9655172413793104, + "success_rate.epoch.env.ded": 0.9529411764705882, + "success_rate.epoch.env.logic": 0.9076154806491885, + "success_rate.epoch.env.math": 0.9576043068640646, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.7833740831295843, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8648904401125447, + "success_rate.epoch.global": 0.8772994500284468, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.96, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9975765306122449, + "tokens_p.mean_in_band": 0.435546875, + "tokens_rate.above_band": 0.9919028340080972, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.008097165991902834 + }, + { + "epoch": 0.5634853003834682, + "grad_norm": 126.45246194760615, + "learning_rate": 3.9703826307571496e-07, + "loss": 0.4028, + "step": 2645, + "success_rate.epoch.env.abd": 0.9830508474576272, + "success_rate.epoch.env.agentgym:alfworld": 0.8974358974358975, + "success_rate.epoch.env.agentgym:sciworld": 0.9649122807017544, + "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, + "success_rate.epoch.env.babyai": 0.9655172413793104, + "success_rate.epoch.env.ded": 0.953125, + "success_rate.epoch.env.logic": 0.9077306733167082, + "success_rate.epoch.env.math": 0.9576612903225806, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.7834146341463415, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8649866450725189, + "success_rate.epoch.global": 0.8773424190800682, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.96, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.999349925705795, + "tokens_p.mean_in_band": 0.466796875, + "tokens_rate.above_band": 0.9911634756995582, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.008836524300441826 + }, + { + "epoch": 0.5645504899872177, + "grad_norm": 95.04606237284413, + "learning_rate": 3.9702535436722413e-07, + "loss": 0.3611, + "step": 2650, + "success_rate.epoch.env.abd": 0.9830508474576272, + "success_rate.epoch.env.agentgym:alfworld": 0.8974358974358975, + "success_rate.epoch.env.agentgym:sciworld": 0.9649122807017544, + "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, + "success_rate.epoch.env.babyai": 0.9655172413793104, + "success_rate.epoch.env.ded": 0.953307392996109, + "success_rate.epoch.env.logic": 0.9069478908188585, + "success_rate.epoch.env.math": 0.9577464788732394, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.7831384015594542, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8649146965690672, + "success_rate.epoch.global": 0.8771962969960325, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.75, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.8125, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9976554470709147, + "tokens_p.mean_in_band": 0.6715198863636364, + "tokens_rate.above_band": 0.9778894472361809, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.022110552763819097 + }, + { + "epoch": 0.5656156795909671, + "grad_norm": 201.03551712049062, + "learning_rate": 3.970124186749211e-07, + "loss": 0.4737, + "step": 2655, + "success_rate.epoch.env.abd": 0.9830508474576272, + "success_rate.epoch.env.agentgym:alfworld": 0.8974358974358975, + "success_rate.epoch.env.agentgym:sciworld": 0.9651162790697675, + "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, + "success_rate.epoch.env.babyai": 0.9655172413793104, + "success_rate.epoch.env.ded": 0.9534883720930233, + "success_rate.epoch.env.logic": 0.9070631970260223, + "success_rate.epoch.env.math": 0.9578595317725752, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.7824817518248175, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8649107590087755, + "success_rate.epoch.global": 0.8770507260041486, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.3333333333333333, + "success_rate.window.env_macro_mean": 0.8666666666666668, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9981218030690537, + "tokens_p.mean_in_band": 0.4289772727272727, + "tokens_rate.above_band": 0.9861286254728878, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.013871374527112233 + }, + { + "epoch": 0.5666808691947166, + "grad_norm": 159.40912316367812, + "learning_rate": 3.969994560082333e-07, + "loss": 0.3871, + "step": 2660, + "success_rate.epoch.env.abd": 0.9830508474576272, + "success_rate.epoch.env.agentgym:alfworld": 0.8974358974358975, + "success_rate.epoch.env.agentgym:sciworld": 0.9651162790697675, + "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, + "success_rate.epoch.env.babyai": 0.9655172413793104, + "success_rate.epoch.env.ded": 0.9534883720930233, + "success_rate.epoch.env.logic": 0.9071782178217822, + "success_rate.epoch.env.math": 0.9579719813208806, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.7825242718446602, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8649353035873125, + "success_rate.epoch.global": 0.8770939205721815, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9956140350877193, + "tokens_p.mean_in_band": 0.599609375, + "tokens_rate.above_band": 0.9661016949152542, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03389830508474576 + }, + { + "epoch": 0.5677460587984662, + "grad_norm": 173.34160478951966, + "learning_rate": 3.969864663766079e-07, + "loss": 0.3907, + "step": 2665, + "success_rate.epoch.env.abd": 0.9831932773109243, + "success_rate.epoch.env.agentgym:alfworld": 0.8980891719745223, + "success_rate.epoch.env.agentgym:sciworld": 0.9651162790697675, + "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, + "success_rate.epoch.env.babyai": 0.9655172413793104, + "success_rate.epoch.env.ded": 0.9534883720930233, + "success_rate.epoch.env.logic": 0.9074074074074074, + "success_rate.epoch.env.math": 0.958, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.7829457364341085, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8650693378824137, + "success_rate.epoch.global": 0.8773248168326132, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.99658203125, + "tokens_p.mean_in_band": 0.64306640625, + "tokens_rate.above_band": 0.975609756097561, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.024390243902439025 + }, + { + "epoch": 0.5688112484022156, + "grad_norm": 67.47328691524926, + "learning_rate": 3.969734497895116e-07, + "loss": 0.4688, + "step": 2670, + "success_rate.epoch.env.abd": 0.9831932773109243, + "success_rate.epoch.env.agentgym:alfworld": 0.8980891719745223, + "success_rate.epoch.env.agentgym:sciworld": 0.9651162790697675, + "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, + "success_rate.epoch.env.babyai": 0.9655172413793104, + "success_rate.epoch.env.ded": 0.9536679536679536, + "success_rate.epoch.env.logic": 0.9075215782983971, + "success_rate.epoch.env.math": 0.9574468085106383, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.7833655705996132, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.865083919258965, + "success_rate.epoch.global": 0.8773673354584661, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.75, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9375, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9986795774647887, + "tokens_p.mean_in_band": 0.6822916666666666, + "tokens_rate.above_band": 0.9895470383275261, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.010452961672473868 + }, + { + "epoch": 0.5698764380059651, + "grad_norm": 129.90907328395582, + "learning_rate": 3.969604062564308e-07, + "loss": 0.4853, + "step": 2675, + "success_rate.epoch.env.abd": 0.9832635983263598, + "success_rate.epoch.env.agentgym:alfworld": 0.8980891719745223, + "success_rate.epoch.env.agentgym:sciworld": 0.9651162790697675, + "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, + "success_rate.epoch.env.babyai": 0.9655172413793104, + "success_rate.epoch.env.ded": 0.9536679536679536, + "success_rate.epoch.env.logic": 0.9078624078624079, + "success_rate.epoch.env.math": 0.9574750830564784, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.783405692233478, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8651275144188879, + "success_rate.epoch.global": 0.8774096949279431, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9968619246861925, + "tokens_p.mean_in_band": 0.445703125, + "tokens_rate.above_band": 0.9598393574297188, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.040160642570281124 + }, + { + "epoch": 0.5709416276097146, + "grad_norm": 78.91704076627231, + "learning_rate": 3.9694733578687146e-07, + "loss": 0.2861, + "step": 2680, + "success_rate.epoch.env.abd": 0.9832635983263598, + "success_rate.epoch.env.agentgym:alfworld": 0.8980891719745223, + "success_rate.epoch.env.agentgym:sciworld": 0.9653179190751445, + "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, + "success_rate.epoch.env.babyai": 0.9655172413793104, + "success_rate.epoch.env.ded": 0.9536679536679536, + "success_rate.epoch.env.logic": 0.9079754601226994, + "success_rate.epoch.env.math": 0.9575596816976127, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.7834456207892204, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8651674434609375, + "success_rate.epoch.global": 0.8774518961330096, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9979383680555556, + "tokens_p.mean_in_band": 0.6484375, + "tokens_rate.above_band": 0.9829351535836177, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.017064846416382253 + }, + { + "epoch": 0.572006817213464, + "grad_norm": 146.30382255916345, + "learning_rate": 3.9693423839035933e-07, + "loss": 0.2785, + "step": 2685, + "success_rate.epoch.env.abd": 0.983402489626556, + "success_rate.epoch.env.agentgym:alfworld": 0.8987341772151899, + "success_rate.epoch.env.agentgym:sciworld": 0.9653179190751445, + "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, + "success_rate.epoch.env.babyai": 0.9655172413793104, + "success_rate.epoch.env.ded": 0.9536679536679536, + "success_rate.epoch.env.logic": 0.9080882352941176, + "success_rate.epoch.env.math": 0.9576158940397351, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.7833813640730067, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8652482277644094, + "success_rate.epoch.global": 0.8774939399589782, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9962298927613941, + "tokens_p.mean_in_band": 0.6106770833333334, + "tokens_rate.above_band": 0.9920212765957447, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.007978723404255319 + }, + { + "epoch": 0.5730720068172135, + "grad_norm": 110.9350808935789, + "learning_rate": 3.969211140764397e-07, + "loss": 0.293, + "step": 2690, + "success_rate.epoch.env.abd": 0.9834710743801653, + "success_rate.epoch.env.agentgym:alfworld": 0.8987341772151899, + "success_rate.epoch.env.agentgym:sciworld": 0.9653179190751445, + "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, + "success_rate.epoch.env.babyai": 0.9655172413793104, + "success_rate.epoch.env.ded": 0.9536679536679536, + "success_rate.epoch.env.logic": 0.9083129584352079, + "success_rate.epoch.env.math": 0.9576158940397351, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.7835249042145593, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8649213723072946, + "success_rate.epoch.global": 0.8773497115205658, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env_macro_mean": 0.7083333333333334, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9945833333333334, + "tokens_p.mean_in_band": 0.651110197368421, + "tokens_rate.above_band": 0.8875739644970414, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.11242603550295859 + }, + { + "epoch": 0.574137196420963, + "grad_norm": 144.55120302286807, + "learning_rate": 3.969079628546774e-07, + "loss": 0.1735, + "step": 2695, + "success_rate.epoch.env.abd": 0.9835390946502057, + "success_rate.epoch.env.agentgym:alfworld": 0.89937106918239, + "success_rate.epoch.env.agentgym:sciworld": 0.9655172413793104, + "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, + "success_rate.epoch.env.babyai": 0.9655172413793104, + "success_rate.epoch.env.ded": 0.9536679536679536, + "success_rate.epoch.env.logic": 0.9083129584352079, + "success_rate.epoch.env.math": 0.9576719576719577, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.7840420449116101, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8650556849319014, + "success_rate.epoch.global": 0.8775775589819803, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9985219594594594, + "tokens_p.mean_in_band": 0.806640625, + "tokens_rate.above_band": 0.9736842105263158, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02631578947368421 + }, + { + "epoch": 0.5752023860247124, + "grad_norm": 96.04012700928776, + "learning_rate": 3.96894784734657e-07, + "loss": 0.291, + "step": 2700, + "success_rate.epoch.env.abd": 0.9836065573770492, + "success_rate.epoch.env.agentgym:alfworld": 0.89937106918239, + "success_rate.epoch.env.agentgym:sciworld": 0.9655172413793104, + "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, + "success_rate.epoch.env.babyai": 0.9655172413793104, + "success_rate.epoch.env.ded": 0.9538461538461539, + "success_rate.epoch.env.logic": 0.9083129584352079, + "success_rate.epoch.env.math": 0.9577557755775578, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.7840800762631077, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8650890951284599, + "success_rate.epoch.global": 0.8776191359169293, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.997751798561151, + "tokens_p.mean_in_band": 0.4580078125, + "tokens_rate.above_band": 0.9788732394366197, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02112676056338028 + }, + { + "epoch": 0.5762675756284619, + "grad_norm": 87.59971385038605, + "learning_rate": 3.9688157972598273e-07, + "loss": 0.3949, + "step": 2705, + "success_rate.epoch.env.abd": 0.983739837398374, + "success_rate.epoch.env.agentgym:alfworld": 0.89375, + "success_rate.epoch.env.agentgym:sciworld": 0.9655172413793104, + "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, + "success_rate.epoch.env.babyai": 0.9655172413793104, + "success_rate.epoch.env.ded": 0.9540229885057471, + "success_rate.epoch.env.logic": 0.9073170731707317, + "success_rate.epoch.env.math": 0.957811470006592, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.7842857142857143, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8645395035544319, + "success_rate.epoch.global": 0.8774754765870812, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 0.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.75, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 1.0004807692307693, + "tokens_p.mean_in_band": 0.515625, + "tokens_rate.above_band": 0.9903769045709703, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.009623095429029671 + }, + { + "epoch": 0.5773327652322113, + "grad_norm": 72.34888465643535, + "learning_rate": 3.9686834783827814e-07, + "loss": 0.3128, + "step": 2710, + "success_rate.epoch.env.abd": 0.9838056680161943, + "success_rate.epoch.env.agentgym:alfworld": 0.89375, + "success_rate.epoch.env.agentgym:sciworld": 0.9655172413793104, + "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, + "success_rate.epoch.env.babyai": 0.9655172413793104, + "success_rate.epoch.env.ded": 0.9540229885057471, + "success_rate.epoch.env.logic": 0.9074299634591961, + "success_rate.epoch.env.math": 0.957922419460881, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.7842205323193916, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.864559911590273, + "success_rate.epoch.global": 0.8775170884906706, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9375, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9907407407407407, + "tokens_p.mean_in_band": 0.4375, + "tokens_rate.above_band": 0.9, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.1 + }, + { + "epoch": 0.5783979548359608, + "grad_norm": 270.9499998128655, + "learning_rate": 3.9685508908118657e-07, + "loss": 0.359, + "step": 2715, + "success_rate.epoch.env.abd": 0.9838709677419355, + "success_rate.epoch.env.agentgym:alfworld": 0.8944099378881988, + "success_rate.epoch.env.agentgym:sciworld": 0.9655172413793104, + "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, + "success_rate.epoch.env.babyai": 0.9655172413793104, + "success_rate.epoch.env.ded": 0.9540229885057471, + "success_rate.epoch.env.logic": 0.9075425790754258, + "success_rate.epoch.env.math": 0.9579500657030223, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.7843601895734598, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8646512894744892, + "success_rate.epoch.global": 0.8775585469297437, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env_macro_mean": 0.9666666666666668, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9981094306049823, + "tokens_p.mean_in_band": 0.6025390625, + "tokens_rate.above_band": 0.9859649122807017, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.014035087719298246 + }, + { + "epoch": 0.5794631444397103, + "grad_norm": 27.085048256464663, + "learning_rate": 3.9684180346437086e-07, + "loss": 0.1702, + "step": 2720, + "success_rate.epoch.env.abd": 0.9838709677419355, + "success_rate.epoch.env.agentgym:alfworld": 0.8944099378881988, + "success_rate.epoch.env.agentgym:sciworld": 0.9655172413793104, + "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, + "success_rate.epoch.env.babyai": 0.9666666666666667, + "success_rate.epoch.env.ded": 0.9540229885057471, + "success_rate.epoch.env.logic": 0.9076549210206561, + "success_rate.epoch.env.math": 0.9580877537655533, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.7846663511594889, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8648063455545937, + "success_rate.epoch.global": 0.8777839131235045, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9947916666666666, + "tokens_p.mean_in_band": 0.72578125, + "tokens_rate.above_band": 0.9760765550239234, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.023923444976076555 + }, + { + "epoch": 0.5805283340434597, + "grad_norm": 94.42672213569979, + "learning_rate": 3.9682849099751366e-07, + "loss": 0.3339, + "step": 2725, + "success_rate.epoch.env.abd": 0.9838709677419355, + "success_rate.epoch.env.agentgym:alfworld": 0.8944099378881988, + "success_rate.epoch.env.agentgym:sciworld": 0.9659090909090909, + "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, + "success_rate.epoch.env.babyai": 0.9666666666666667, + "success_rate.epoch.env.ded": 0.9543726235741445, + "success_rate.epoch.env.logic": 0.9078787878787878, + "success_rate.epoch.env.math": 0.9581425768476128, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.7848699763593381, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8649176000762498, + "success_rate.epoch.global": 0.8780084512217528, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.998881153654898, + "tokens_p.mean_in_band": 0.6455078125, + "tokens_rate.above_band": 0.9990064580228515, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0009935419771485345 + }, + { + "epoch": 0.5815935236472092, + "grad_norm": 101.82936492693237, + "learning_rate": 3.968151516903168e-07, + "loss": 0.5311, + "step": 2730, + "success_rate.epoch.env.abd": 0.9838709677419355, + "success_rate.epoch.env.agentgym:alfworld": 0.8944099378881988, + "success_rate.epoch.env.agentgym:sciworld": 0.9661016949152542, + "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, + "success_rate.epoch.env.babyai": 0.9666666666666667, + "success_rate.epoch.env.ded": 0.9543726235741445, + "success_rate.epoch.env.logic": 0.9078787878787878, + "success_rate.epoch.env.math": 0.95822454308094, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.785007072135785, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8649550242595169, + "success_rate.epoch.global": 0.8780487804878049, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env_macro_mean": 0.9444444444444445, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.994926948051948, + "tokens_p.mean_in_band": 0.38046875, + "tokens_rate.above_band": 0.9390243902439024, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.06097560975609756 + }, + { + "epoch": 0.5826587132509586, + "grad_norm": 196.2003889816082, + "learning_rate": 3.96801785552502e-07, + "loss": 0.2636, + "step": 2735, + "success_rate.epoch.env.abd": 0.9839357429718876, + "success_rate.epoch.env.agentgym:alfworld": 0.8944099378881988, + "success_rate.epoch.env.agentgym:sciworld": 0.9662921348314607, + "success_rate.epoch.env.agentgym:textcraft": 0.9736842105263158, + "success_rate.epoch.env.babyai": 0.9666666666666667, + "success_rate.epoch.env.ded": 0.9545454545454546, + "success_rate.epoch.env.logic": 0.9078787878787878, + "success_rate.epoch.env.math": 0.9582517938682322, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.7845719661335842, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8650215177555082, + "success_rate.epoch.global": 0.8779059125022881, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.996353598691385, + "tokens_p.mean_in_band": 0.4625651041666667, + "tokens_rate.above_band": 0.9870828848223897, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.012917115177610334 + }, + { + "epoch": 0.5837239028547081, + "grad_norm": 136.88939730849296, + "learning_rate": 3.9678839259381026e-07, + "loss": 0.2627, + "step": 2740, + "success_rate.epoch.env.abd": 0.9839357429718876, + "success_rate.epoch.env.agentgym:alfworld": 0.8944099378881988, + "success_rate.epoch.env.agentgym:sciworld": 0.9662921348314607, + "success_rate.epoch.env.agentgym:textcraft": 0.9736842105263158, + "success_rate.epoch.env.babyai": 0.9666666666666667, + "success_rate.epoch.env.ded": 0.9545454545454546, + "success_rate.epoch.env.logic": 0.9081015719467956, + "success_rate.epoch.env.math": 0.9583061889250815, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.7847091932457786, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8650591910497856, + "success_rate.epoch.global": 0.8779462817467568, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env_macro_mean": 0.9444444444444445, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9959558823529412, + "tokens_p.mean_in_band": 0.6921875, + "tokens_rate.above_band": 0.8947368421052632, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.10526315789473684 + }, + { + "epoch": 0.5847890924584576, + "grad_norm": 165.80382489585057, + "learning_rate": 3.9677497282400245e-07, + "loss": 0.268, + "step": 2745, + "success_rate.epoch.env.abd": 0.984, + "success_rate.epoch.env.agentgym:alfworld": 0.8944099378881988, + "success_rate.epoch.env.agentgym:sciworld": 0.9662921348314607, + "success_rate.epoch.env.agentgym:textcraft": 0.9736842105263158, + "success_rate.epoch.env.babyai": 0.9666666666666667, + "success_rate.epoch.env.ded": 0.9547169811320755, + "success_rate.epoch.env.logic": 0.9083232810615199, + "success_rate.epoch.env.math": 0.9583875162548765, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.7845433255269321, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8650930958080042, + "success_rate.epoch.global": 0.8779865037388291, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9996546961325967, + "tokens_p.mean_in_band": 0.5651041666666666, + "tokens_rate.above_band": 0.9679144385026738, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03208556149732621 + }, + { + "epoch": 0.585854282062207, + "grad_norm": 141.3101173271051, + "learning_rate": 3.967615262528587e-07, + "loss": 0.4855, + "step": 2750, + "success_rate.epoch.env.abd": 0.984, + "success_rate.epoch.env.agentgym:alfworld": 0.8944099378881988, + "success_rate.epoch.env.agentgym:sciworld": 0.9666666666666667, + "success_rate.epoch.env.agentgym:textcraft": 0.9736842105263158, + "success_rate.epoch.env.babyai": 0.9666666666666667, + "success_rate.epoch.env.ded": 0.9547169811320755, + "success_rate.epoch.env.logic": 0.908433734939759, + "success_rate.epoch.env.math": 0.958414554905783, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.7842129845866418, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8651096124829188, + "success_rate.epoch.global": 0.8778445294010558, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9997391001855288, + "tokens_p.mean_in_band": 0.3701171875, + "tokens_rate.above_band": 0.9853747714808044, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.014625228519195612 + }, + { + "epoch": 0.5869194716659566, + "grad_norm": 141.42994281570122, + "learning_rate": 3.967480528901788e-07, + "loss": 0.4361, + "step": 2755, + "success_rate.epoch.env.abd": 0.984, + "success_rate.epoch.env.agentgym:alfworld": 0.8944099378881988, + "success_rate.epoch.env.agentgym:sciworld": 0.9666666666666667, + "success_rate.epoch.env.agentgym:textcraft": 0.9736842105263158, + "success_rate.epoch.env.babyai": 0.9666666666666667, + "success_rate.epoch.env.ded": 0.9547169811320755, + "success_rate.epoch.env.logic": 0.9073405535499398, + "success_rate.epoch.env.math": 0.9585223590408296, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.7841491841491841, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.865014232692716, + "success_rate.epoch.global": 0.8777030710521534, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 0.6875, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9979378172588832, + "tokens_p.mean_in_band": 0.5640243902439024, + "tokens_rate.above_band": 0.9505428226779252, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.04945717732207479 + }, + { + "epoch": 0.587984661269706, + "grad_norm": 48.812305725481735, + "learning_rate": 3.9673455274578204e-07, + "loss": 0.3194, + "step": 2760, + "success_rate.epoch.env.abd": 0.984, + "success_rate.epoch.env.agentgym:alfworld": 0.8944099378881988, + "success_rate.epoch.env.agentgym:sciworld": 0.9668508287292817, + "success_rate.epoch.env.agentgym:textcraft": 0.9736842105263158, + "success_rate.epoch.env.babyai": 0.9666666666666667, + "success_rate.epoch.env.ded": 0.9547169811320755, + "success_rate.epoch.env.logic": 0.9074519230769231, + "success_rate.epoch.env.math": 0.9586028460543338, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.7846511627906977, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8650940506240447, + "success_rate.epoch.global": 0.8779249047705423, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9969135802469136, + "tokens_p.mean_in_band": 0.611328125, + "tokens_rate.above_band": 0.9759036144578314, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.024096385542168676 + }, + { + "epoch": 0.5890498508734555, + "grad_norm": 62.55218594436803, + "learning_rate": 3.967210258295072e-07, + "loss": 0.2571, + "step": 2765, + "success_rate.epoch.env.abd": 0.984, + "success_rate.epoch.env.agentgym:alfworld": 0.8944099378881988, + "success_rate.epoch.env.agentgym:sciworld": 0.9668508287292817, + "success_rate.epoch.env.agentgym:textcraft": 0.9736842105263158, + "success_rate.epoch.env.babyai": 0.9666666666666667, + "success_rate.epoch.env.ded": 0.9547169811320755, + "success_rate.epoch.env.logic": 0.907673860911271, + "success_rate.epoch.env.math": 0.9587362991618311, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.7849512308406874, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8651536378051208, + "success_rate.epoch.global": 0.8781459351801557, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9956095041322314, + "tokens_p.mean_in_band": 0.81201171875, + "tokens_rate.above_band": 0.937984496124031, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.06201550387596899 + }, + { + "epoch": 0.590115040477205, + "grad_norm": 212.9899388467896, + "learning_rate": 3.967074721512126e-07, + "loss": 0.3378, + "step": 2770, + "success_rate.epoch.env.abd": 0.984, + "success_rate.epoch.env.agentgym:alfworld": 0.8944099378881988, + "success_rate.epoch.env.agentgym:sciworld": 0.9668508287292817, + "success_rate.epoch.env.agentgym:textcraft": 0.9736842105263158, + "success_rate.epoch.env.babyai": 0.9666666666666667, + "success_rate.epoch.env.ded": 0.9548872180451128, + "success_rate.epoch.env.logic": 0.9078947368421053, + "success_rate.epoch.env.math": 0.9588424437299036, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.7852504638218923, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8652260460226798, + "success_rate.epoch.global": 0.8783661666365443, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9945977393617021, + "tokens_p.mean_in_band": 0.755859375, + "tokens_rate.above_band": 0.9947089947089947, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.005291005291005291 + }, + { + "epoch": 0.5911802300809544, + "grad_norm": 145.69168191162092, + "learning_rate": 3.966938917207761e-07, + "loss": 0.2639, + "step": 2775, + "success_rate.epoch.env.abd": 0.984, + "success_rate.epoch.env.agentgym:alfworld": 0.8944099378881988, + "success_rate.epoch.env.agentgym:sciworld": 0.9668508287292817, + "success_rate.epoch.env.agentgym:textcraft": 0.9736842105263158, + "success_rate.epoch.env.babyai": 0.9666666666666667, + "success_rate.epoch.env.ded": 0.9548872180451128, + "success_rate.epoch.env.logic": 0.9080047789725209, + "success_rate.epoch.env.math": 0.9589216944801027, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.7849213691026827, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8652133367646256, + "success_rate.epoch.global": 0.8782247880209273, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.8888888888888888, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9922680412371134, + "tokens_p.mean_in_band": 0.34521484375, + "tokens_rate.above_band": 0.9603960396039604, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.039603960396039604 + }, + { + "epoch": 0.5922454196847039, + "grad_norm": 52.29601460066344, + "learning_rate": 3.96680284548095e-07, + "loss": 0.2678, + "step": 2780, + "success_rate.epoch.env.abd": 0.9840637450199203, + "success_rate.epoch.env.agentgym:alfworld": 0.8950617283950617, + "success_rate.epoch.env.agentgym:sciworld": 0.967032967032967, + "success_rate.epoch.env.agentgym:textcraft": 0.9736842105263158, + "success_rate.epoch.env.babyai": 0.9666666666666667, + "success_rate.epoch.env.ded": 0.9548872180451128, + "success_rate.epoch.env.logic": 0.9081145584725537, + "success_rate.epoch.env.math": 0.9590268886043534, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.7851201478743068, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8653325573306598, + "success_rate.epoch.global": 0.8784440842787682, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9972165991902834, + "tokens_p.mean_in_band": 0.87890625, + "tokens_rate.above_band": 0.9959677419354839, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.004032258064516129 + }, + { + "epoch": 0.5933106092884534, + "grad_norm": 121.97529309328436, + "learning_rate": 3.966666506430861e-07, + "loss": 0.3134, + "step": 2785, + "success_rate.epoch.env.abd": 0.9840637450199203, + "success_rate.epoch.env.agentgym:alfworld": 0.8950617283950617, + "success_rate.epoch.env.agentgym:sciworld": 0.967032967032967, + "success_rate.epoch.env.agentgym:textcraft": 0.9736842105263158, + "success_rate.epoch.env.babyai": 0.967741935483871, + "success_rate.epoch.env.ded": 0.9548872180451128, + "success_rate.epoch.env.logic": 0.9082240762812872, + "success_rate.epoch.env.math": 0.9591576260370134, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.7849561605906784, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8654372424920208, + "success_rate.epoch.global": 0.8784828330037749, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9982443820224719, + "tokens_p.mean_in_band": 0.58544921875, + "tokens_rate.above_band": 0.978021978021978, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02197802197802198 + }, + { + "epoch": 0.5943757988922028, + "grad_norm": 30.028166788365052, + "learning_rate": 3.9665299001568577e-07, + "loss": 0.4069, + "step": 2790, + "success_rate.epoch.env.abd": 0.9840637450199203, + "success_rate.epoch.env.agentgym:alfworld": 0.8950617283950617, + "success_rate.epoch.env.agentgym:sciworld": 0.9672131147540983, + "success_rate.epoch.env.agentgym:textcraft": 0.9736842105263158, + "success_rate.epoch.env.babyai": 0.967741935483871, + "success_rate.epoch.env.ded": 0.9548872180451128, + "success_rate.epoch.env.logic": 0.9085510688836105, + "success_rate.epoch.env.math": 0.9591576260370134, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.7855499309710078, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8655373252832739, + "success_rate.epoch.global": 0.8787008792391889, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9976136363636363, + "tokens_p.mean_in_band": 0.7903645833333334, + "tokens_rate.above_band": 0.9892086330935251, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.01079136690647482 + }, + { + "epoch": 0.5954409884959523, + "grad_norm": 126.42427937631184, + "learning_rate": 3.9663930267584965e-07, + "loss": 0.2259, + "step": 2795, + "success_rate.epoch.env.abd": 0.9841269841269841, + "success_rate.epoch.env.agentgym:alfworld": 0.8950617283950617, + "success_rate.epoch.env.agentgym:sciworld": 0.967391304347826, + "success_rate.epoch.env.agentgym:textcraft": 0.9736842105263158, + "success_rate.epoch.env.babyai": 0.967741935483871, + "success_rate.epoch.env.ded": 0.9550561797752809, + "success_rate.epoch.env.logic": 0.9085510688836105, + "success_rate.epoch.env.math": 0.9592356687898089, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.7845659163987139, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8654922724297704, + "success_rate.epoch.global": 0.8783807988536629, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.25, + "success_rate.window.env_macro_mean": 0.85, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9949433656957929, + "tokens_p.mean_in_band": 0.5829503676470589, + "tokens_rate.above_band": 0.9478527607361963, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.05214723926380368 + }, + { + "epoch": 0.5965061780997017, + "grad_norm": 111.955382617761, + "learning_rate": 3.96625588633553e-07, + "loss": 0.4845, + "step": 2800, + "success_rate.epoch.env.abd": 0.9841269841269841, + "success_rate.epoch.env.agentgym:alfworld": 0.8950617283950617, + "success_rate.epoch.env.agentgym:sciworld": 0.967391304347826, + "success_rate.epoch.env.agentgym:textcraft": 0.9736842105263158, + "success_rate.epoch.env.babyai": 0.967741935483871, + "success_rate.epoch.env.ded": 0.9550561797752809, + "success_rate.epoch.env.logic": 0.9074733096085409, + "success_rate.epoch.env.math": 0.9587301587301588, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.7845025217790005, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8653425757066399, + "success_rate.epoch.global": 0.8780618630430895, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 0.8, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.5166666666666667, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9966722129783694, + "tokens_p.mean_in_band": 0.490625, + "tokens_rate.above_band": 0.9161585365853658, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.08384146341463415 + }, + { + "epoch": 0.5975713677034512, + "grad_norm": 28.612260925514917, + "learning_rate": 3.9661184789879066e-07, + "loss": 0.2549, + "step": 2805, + "success_rate.epoch.env.abd": 0.9841269841269841, + "success_rate.epoch.env.agentgym:alfworld": 0.8950617283950617, + "success_rate.epoch.env.agentgym:sciworld": 0.967391304347826, + "success_rate.epoch.env.agentgym:textcraft": 0.9743589743589743, + "success_rate.epoch.env.babyai": 0.967741935483871, + "success_rate.epoch.env.ded": 0.9550561797752809, + "success_rate.epoch.env.logic": 0.9075829383886256, + "success_rate.epoch.env.math": 0.958808618504436, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.7849954254345837, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8654658262559678, + "success_rate.epoch.global": 0.878279493128681, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9975961538461539, + "tokens_p.mean_in_band": 0.78125, + "tokens_rate.above_band": 0.9970501474926253, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0029498525073746312 + }, + { + "epoch": 0.5986365573072007, + "grad_norm": 417.3717178730771, + "learning_rate": 3.965980804815766e-07, + "loss": 0.4059, + "step": 2810, + "success_rate.epoch.env.abd": 0.9841269841269841, + "success_rate.epoch.env.agentgym:alfworld": 0.8950617283950617, + "success_rate.epoch.env.agentgym:sciworld": 0.9675675675675676, + "success_rate.epoch.env.agentgym:textcraft": 0.9743589743589743, + "success_rate.epoch.env.babyai": 0.967741935483871, + "success_rate.epoch.env.ded": 0.9550561797752809, + "success_rate.epoch.env.logic": 0.9078014184397163, + "success_rate.epoch.env.math": 0.958886780518659, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.7853881278538812, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8655445178654543, + "success_rate.epoch.global": 0.878496347764119, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9947519083969466, + "tokens_p.mean_in_band": 0.783203125, + "tokens_rate.above_band": 0.9424460431654677, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.05755395683453238 + }, + { + "epoch": 0.5997017469109501, + "grad_norm": 120.7657315705206, + "learning_rate": 3.9658428639194454e-07, + "loss": 0.2757, + "step": 2815, + "success_rate.epoch.env.abd": 0.9841897233201581, + "success_rate.epoch.env.agentgym:alfworld": 0.8950617283950617, + "success_rate.epoch.env.agentgym:sciworld": 0.9675675675675676, + "success_rate.epoch.env.agentgym:textcraft": 0.9743589743589743, + "success_rate.epoch.env.babyai": 0.967741935483871, + "success_rate.epoch.env.ded": 0.9550561797752809, + "success_rate.epoch.env.logic": 0.9079102715466352, + "success_rate.epoch.env.math": 0.9589646464646465, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.7854214123006834, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8655702217466253, + "success_rate.epoch.global": 0.8785345900764716, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.99425, + "tokens_p.mean_in_band": 0.71875, + "tokens_rate.above_band": 0.9615384615384616, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.038461538461538464 + }, + { + "epoch": 0.6007669365146996, + "grad_norm": 68.14852446729229, + "learning_rate": 3.9657046563994737e-07, + "loss": 0.4115, + "step": 2820, + "success_rate.epoch.env.abd": 0.9841897233201581, + "success_rate.epoch.env.agentgym:alfworld": 0.8950617283950617, + "success_rate.epoch.env.agentgym:sciworld": 0.967741935483871, + "success_rate.epoch.env.agentgym:textcraft": 0.9743589743589743, + "success_rate.epoch.env.babyai": 0.967741935483871, + "success_rate.epoch.env.ded": 0.9516728624535316, + "success_rate.epoch.env.logic": 0.9080188679245284, + "success_rate.epoch.env.math": 0.9590680100755667, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.7851615839781521, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8652741474067014, + "success_rate.epoch.global": 0.8783951713119119, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 0.5, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.8, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9922632629777525, + "tokens_p.mean_below_band": 3.0174851417541504e-07, + "tokens_p.mean_in_band": 0.4756988463488844, + "tokens_rate.above_band": 0.7798042704626335, + "tokens_rate.below_band": 0.0008896797153024911, + "tokens_rate.in_band": 0.21930604982206406 + }, + { + "epoch": 0.601832126118449, + "grad_norm": 77.47932183983724, + "learning_rate": 3.9655661823565776e-07, + "loss": 0.3536, + "step": 2825, + "success_rate.epoch.env.abd": 0.9841897233201581, + "success_rate.epoch.env.agentgym:alfworld": 0.8950617283950617, + "success_rate.epoch.env.agentgym:sciworld": 0.967741935483871, + "success_rate.epoch.env.agentgym:textcraft": 0.9743589743589743, + "success_rate.epoch.env.babyai": 0.967741935483871, + "success_rate.epoch.env.ded": 0.9516728624535316, + "success_rate.epoch.env.logic": 0.9084507042253521, + "success_rate.epoch.env.math": 0.9591451917033312, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.7853569804456572, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8653381850790735, + "success_rate.epoch.global": 0.8786106680843523, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9988425925925926, + "tokens_p.mean_in_band": 0.7981770833333334, + "tokens_rate.above_band": 0.989010989010989, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.01098901098901099 + }, + { + "epoch": 0.6028973157221985, + "grad_norm": 78.7612652563382, + "learning_rate": 3.965427441891674e-07, + "loss": 0.4226, + "step": 2830, + "success_rate.epoch.env.abd": 0.9841897233201581, + "success_rate.epoch.env.agentgym:alfworld": 0.8950617283950617, + "success_rate.epoch.env.agentgym:sciworld": 0.967741935483871, + "success_rate.epoch.env.agentgym:textcraft": 0.9743589743589743, + "success_rate.epoch.env.babyai": 0.967741935483871, + "success_rate.epoch.env.ded": 0.9518518518518518, + "success_rate.epoch.env.logic": 0.9086651053864169, + "success_rate.epoch.env.math": 0.9591964846202135, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.7853901996370236, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8653816307761311, + "success_rate.epoch.global": 0.878648505218468, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9995083579154376, + "tokens_p.mean_in_band": 0.56005859375, + "tokens_rate.above_band": 0.9921951219512195, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.007804878048780488 + }, + { + "epoch": 0.603962505325948, + "grad_norm": 83.16517769869331, + "learning_rate": 3.965288435105877e-07, + "loss": 0.3928, + "step": 2835, + "success_rate.epoch.env.abd": 0.9841897233201581, + "success_rate.epoch.env.agentgym:alfworld": 0.8950617283950617, + "success_rate.epoch.env.agentgym:sciworld": 0.967741935483871, + "success_rate.epoch.env.agentgym:textcraft": 0.9743589743589743, + "success_rate.epoch.env.babyai": 0.967741935483871, + "success_rate.epoch.env.ded": 0.9518518518518518, + "success_rate.epoch.env.logic": 0.9087719298245615, + "success_rate.epoch.env.math": 0.9592476489028213, + "success_rate.epoch.env.sat": 0.12121212121212122, + "success_rate.epoch.env.science": 0.7854232684472612, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8650546470255048, + "success_rate.epoch.global": 0.8785096238742716, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 0.76, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9951421113689095, + "tokens_p.mean_in_band": 0.666610054347826, + "tokens_rate.above_band": 0.9493392070484582, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.05066079295154185 + }, + { + "epoch": 0.6050276949296974, + "grad_norm": 32.080396169566875, + "learning_rate": 3.9651491621004933e-07, + "loss": 0.2439, + "step": 2840, + "success_rate.epoch.env.abd": 0.9841897233201581, + "success_rate.epoch.env.agentgym:alfworld": 0.8950617283950617, + "success_rate.epoch.env.agentgym:sciworld": 0.967741935483871, + "success_rate.epoch.env.agentgym:textcraft": 0.9743589743589743, + "success_rate.epoch.env.babyai": 0.967741935483871, + "success_rate.epoch.env.ded": 0.9518518518518518, + "success_rate.epoch.env.logic": 0.9089848308051341, + "success_rate.epoch.env.math": 0.9593241551939925, + "success_rate.epoch.env.sat": 0.12121212121212122, + "success_rate.epoch.env.science": 0.7859078590785907, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8651250104712389, + "success_rate.epoch.global": 0.8787237793054821, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9975393700787402, + "tokens_p.mean_in_band": 0.757421875, + "tokens_rate.above_band": 0.927007299270073, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.072992700729927 + }, + { + "epoch": 0.606092884533447, + "grad_norm": 124.827136915955, + "learning_rate": 3.9650096229770247e-07, + "loss": 0.3579, + "step": 2845, + "success_rate.epoch.env.abd": 0.9841897233201581, + "success_rate.epoch.env.agentgym:alfworld": 0.8950617283950617, + "success_rate.epoch.env.agentgym:sciworld": 0.967741935483871, + "success_rate.epoch.env.agentgym:textcraft": 0.975, + "success_rate.epoch.env.babyai": 0.967741935483871, + "success_rate.epoch.env.ded": 0.9518518518518518, + "success_rate.epoch.env.logic": 0.9079254079254079, + "success_rate.epoch.env.math": 0.959349593495935, + "success_rate.epoch.env.sat": 0.12121212121212122, + "success_rate.epoch.env.science": 0.7865826204412427, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8651506288735927, + "success_rate.epoch.global": 0.8787612176667253, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.75, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9963881909547738, + "tokens_p.mean_in_band": 0.67626953125, + "tokens_rate.above_band": 0.961352657004831, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03864734299516908 + }, + { + "epoch": 0.6071580741371965, + "grad_norm": 63.92735622924593, + "learning_rate": 3.964869817837166e-07, + "loss": 0.4637, + "step": 2850, + "success_rate.epoch.env.abd": 0.984251968503937, + "success_rate.epoch.env.agentgym:alfworld": 0.8950617283950617, + "success_rate.epoch.env.agentgym:sciworld": 0.9679144385026738, + "success_rate.epoch.env.agentgym:textcraft": 0.975, + "success_rate.epoch.env.babyai": 0.967741935483871, + "success_rate.epoch.env.ded": 0.9518518518518518, + "success_rate.epoch.env.logic": 0.9060324825986079, + "success_rate.epoch.env.math": 0.9594003747657714, + "success_rate.epoch.env.sat": 0.12121212121212122, + "success_rate.epoch.env.science": 0.7858749437696806, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8649401677348707, + "success_rate.epoch.global": 0.8782715615668365, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.0, + "success_rate.window.env_macro_mean": 0.7, + "success_rate.window.global": 0.6, + "tokens_p.mean_above_band": 0.9987438474159147, + "tokens_p.mean_in_band": 0.4264481707317073, + "tokens_rate.above_band": 0.9674603174603175, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03253968253968254 + }, + { + "epoch": 0.6082232637409459, + "grad_norm": 80.6302485353995, + "learning_rate": 3.964729746782805e-07, + "loss": 0.4292, + "step": 2855, + "success_rate.epoch.env.abd": 0.984251968503937, + "success_rate.epoch.env.agentgym:alfworld": 0.8950617283950617, + "success_rate.epoch.env.agentgym:sciworld": 0.9679144385026738, + "success_rate.epoch.env.agentgym:textcraft": 0.975609756097561, + "success_rate.epoch.env.babyai": 0.967741935483871, + "success_rate.epoch.env.ded": 0.9520295202952029, + "success_rate.epoch.env.logic": 0.9060324825986079, + "success_rate.epoch.env.math": 0.9595519601742377, + "success_rate.epoch.env.sat": 0.12121212121212122, + "success_rate.epoch.env.science": 0.7855215827338129, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8649934085451899, + "success_rate.epoch.global": 0.8782883198877587, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.0, + "success_rate.window.env_macro_mean": 0.75, + "success_rate.window.global": 0.8888888888888888, + "tokens_p.mean_above_band": 0.9993196661828737, + "tokens_p.mean_in_band": 0.5947265625, + "tokens_rate.above_band": 0.9942279942279942, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.005772005772005772 + }, + { + "epoch": 0.6092884533446954, + "grad_norm": 105.17318627883428, + "learning_rate": 3.964589409916027e-07, + "loss": 0.2579, + "step": 2860, + "success_rate.epoch.env.abd": 0.984313725490196, + "success_rate.epoch.env.agentgym:alfworld": 0.8950617283950617, + "success_rate.epoch.env.agentgym:sciworld": 0.9679144385026738, + "success_rate.epoch.env.agentgym:textcraft": 0.975609756097561, + "success_rate.epoch.env.babyai": 0.967741935483871, + "success_rate.epoch.env.ded": 0.9520295202952029, + "success_rate.epoch.env.logic": 0.90625, + "success_rate.epoch.env.math": 0.9596523898199876, + "success_rate.epoch.env.sat": 0.12121212121212122, + "success_rate.epoch.env.science": 0.7853614728334082, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8650133716481896, + "success_rate.epoch.global": 0.8783263305322129, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9920280612244898, + "tokens_p.mean_in_band": 0.7021484375, + "tokens_rate.above_band": 0.9245283018867925, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.07547169811320754 + }, + { + "epoch": 0.6103536429484449, + "grad_norm": 54.64373763484356, + "learning_rate": 3.9644488073391063e-07, + "loss": 0.3027, + "step": 2865, + "success_rate.epoch.env.abd": 0.984375, + "success_rate.epoch.env.agentgym:alfworld": 0.8957055214723927, + "success_rate.epoch.env.agentgym:sciworld": 0.9679144385026738, + "success_rate.epoch.env.agentgym:textcraft": 0.975609756097561, + "success_rate.epoch.env.babyai": 0.967741935483871, + "success_rate.epoch.env.ded": 0.9522058823529411, + "success_rate.epoch.env.logic": 0.9063583815028902, + "success_rate.epoch.env.math": 0.9597024178549287, + "success_rate.epoch.env.sat": 0.12121212121212122, + "success_rate.epoch.env.science": 0.7857463021066786, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8651428869623689, + "success_rate.epoch.global": 0.8785389723872772, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9990821678321679, + "tokens_p.mean_in_band": 0.86328125, + "tokens_rate.above_band": 0.9986033519553073, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0013966480446927375 + }, + { + "epoch": 0.6114188325521943, + "grad_norm": 77.30734784913683, + "learning_rate": 3.9643079391545137e-07, + "loss": 0.429, + "step": 2870, + "success_rate.epoch.env.abd": 0.984375, + "success_rate.epoch.env.agentgym:alfworld": 0.8957055214723927, + "success_rate.epoch.env.agentgym:sciworld": 0.9679144385026738, + "success_rate.epoch.env.agentgym:textcraft": 0.975609756097561, + "success_rate.epoch.env.babyai": 0.967741935483871, + "success_rate.epoch.env.ded": 0.9523809523809523, + "success_rate.epoch.env.logic": 0.9065743944636678, + "success_rate.epoch.env.math": 0.9598021026592455, + "success_rate.epoch.env.sat": 0.12121212121212122, + "success_rate.epoch.env.science": 0.7860340196956133, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8652136583607363, + "success_rate.epoch.global": 0.8787508722958828, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.996875, + "tokens_p.mean_in_band": 0.880859375, + "tokens_rate.above_band": 0.9848484848484849, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.015151515151515152 + }, + { + "epoch": 0.6124840221559438, + "grad_norm": 54.297149657348896, + "learning_rate": 3.964166805464914e-07, + "loss": 0.2485, + "step": 2875, + "success_rate.epoch.env.abd": 0.984375, + "success_rate.epoch.env.agentgym:alfworld": 0.8963414634146342, + "success_rate.epoch.env.agentgym:sciworld": 0.9679144385026738, + "success_rate.epoch.env.agentgym:textcraft": 0.975609756097561, + "success_rate.epoch.env.babyai": 0.967741935483871, + "success_rate.epoch.env.ded": 0.9523809523809523, + "success_rate.epoch.env.logic": 0.9066820276497696, + "success_rate.epoch.env.math": 0.9599012954966071, + "success_rate.epoch.env.sat": 0.12121212121212122, + "success_rate.epoch.env.science": 0.7855227882037533, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8652437980401767, + "success_rate.epoch.global": 0.8786137234413096, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.995933734939759, + "tokens_p.mean_in_band": 0.5027901785714286, + "tokens_rate.above_band": 0.9673659673659674, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03263403263403263 + }, + { + "epoch": 0.6135492117596932, + "grad_norm": 66.39901014287504, + "learning_rate": 3.9640254063731625e-07, + "loss": 0.3462, + "step": 2880, + "success_rate.epoch.env.abd": 0.984375, + "success_rate.epoch.env.agentgym:alfworld": 0.8963414634146342, + "success_rate.epoch.env.agentgym:sciworld": 0.9679144385026738, + "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, + "success_rate.epoch.env.babyai": 0.967741935483871, + "success_rate.epoch.env.ded": 0.9523809523809523, + "success_rate.epoch.env.logic": 0.906789413118527, + "success_rate.epoch.env.math": 0.96, + "success_rate.epoch.env.sat": 0.12121212121212122, + "success_rate.epoch.env.science": 0.7859054415700267, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8653501128975711, + "success_rate.epoch.global": 0.8788247566063978, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9971177184466019, + "tokens_p.mean_in_band": 0.763671875, + "tokens_rate.above_band": 0.9363636363636364, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.06363636363636363 + }, + { + "epoch": 0.6146144013634427, + "grad_norm": 206.63351639348156, + "learning_rate": 3.963883741982311e-07, + "loss": 0.2259, + "step": 2885, + "success_rate.epoch.env.abd": 0.9844357976653697, + "success_rate.epoch.env.agentgym:alfworld": 0.8963414634146342, + "success_rate.epoch.env.agentgym:sciworld": 0.9680851063829787, + "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, + "success_rate.epoch.env.babyai": 0.967741935483871, + "success_rate.epoch.env.ded": 0.9523809523809523, + "success_rate.epoch.env.logic": 0.906789413118527, + "success_rate.epoch.env.math": 0.960098219766728, + "success_rate.epoch.env.sat": 0.12121212121212122, + "success_rate.epoch.env.science": 0.7858414959928762, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8653742710553213, + "success_rate.epoch.global": 0.8788615064213815, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9375, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9984797297297298, + "tokens_p.mean_in_band": 0.3671875, + "tokens_rate.above_band": 0.9946236559139785, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.005376344086021506 + }, + { + "epoch": 0.6156795909671922, + "grad_norm": 85.23586880115933, + "learning_rate": 3.963741812395603e-07, + "loss": 0.4086, + "step": 2890, + "success_rate.epoch.env.abd": 0.9844961240310077, + "success_rate.epoch.env.agentgym:alfworld": 0.8963414634146342, + "success_rate.epoch.env.agentgym:sciworld": 0.9680851063829787, + "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, + "success_rate.epoch.env.babyai": 0.967741935483871, + "success_rate.epoch.env.ded": 0.9523809523809523, + "success_rate.epoch.env.logic": 0.9070034443168772, + "success_rate.epoch.env.math": 0.9601959583588487, + "success_rate.epoch.env.sat": 0.12121212121212122, + "success_rate.epoch.env.science": 0.7861271676300579, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8654340681274387, + "success_rate.epoch.global": 0.8790713790713791, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9888613861386139, + "tokens_p.mean_in_band": 0.8489583333333334, + "tokens_rate.above_band": 0.9439252336448598, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.056074766355140186 + }, + { + "epoch": 0.6167447805709416, + "grad_norm": 64.94182937808755, + "learning_rate": 3.9635996177164765e-07, + "loss": 0.2705, + "step": 2895, + "success_rate.epoch.env.abd": 0.9845559845559846, + "success_rate.epoch.env.agentgym:alfworld": 0.8963414634146342, + "success_rate.epoch.env.agentgym:sciworld": 0.9680851063829787, + "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, + "success_rate.epoch.env.babyai": 0.967741935483871, + "success_rate.epoch.env.ded": 0.9523809523809523, + "success_rate.epoch.env.logic": 0.9059633027522935, + "success_rate.epoch.env.math": 0.9596823457544288, + "success_rate.epoch.env.sat": 0.12121212121212122, + "success_rate.epoch.env.science": 0.7860630270750111, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8652924286547955, + "success_rate.epoch.global": 0.8787616741611899, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 0.75, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.625, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9966971544715447, + "tokens_p.mean_in_band": 0.46015625, + "tokens_rate.above_band": 0.924812030075188, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.07518796992481203 + }, + { + "epoch": 0.6178099701746911, + "grad_norm": 66.8263466885697, + "learning_rate": 3.9634571580485615e-07, + "loss": 0.3927, + "step": 2900, + "success_rate.epoch.env.abd": 0.9845559845559846, + "success_rate.epoch.env.agentgym:alfworld": 0.8963414634146342, + "success_rate.epoch.env.agentgym:sciworld": 0.9680851063829787, + "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, + "success_rate.epoch.env.babyai": 0.967741935483871, + "success_rate.epoch.env.ded": 0.9523809523809523, + "success_rate.epoch.env.logic": 0.9061784897025171, + "success_rate.epoch.env.math": 0.959731543624161, + "success_rate.epoch.env.sat": 0.12121212121212122, + "success_rate.epoch.env.science": 0.786631252766711, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8653681205194917, + "success_rate.epoch.global": 0.8789709944751382, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9916424418604651, + "tokens_p.mean_in_band": 0.776611328125, + "tokens_rate.above_band": 0.9148936170212766, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0851063829787234 + }, + { + "epoch": 0.6188751597784405, + "grad_norm": 0.0, + "learning_rate": 3.9633144334956816e-07, + "loss": 0.2377, + "step": 2905, + "success_rate.epoch.env.abd": 0.9846153846153847, + "success_rate.epoch.env.agentgym:alfworld": 0.8963414634146342, + "success_rate.epoch.env.agentgym:sciworld": 0.9680851063829787, + "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, + "success_rate.epoch.env.babyai": 0.967741935483871, + "success_rate.epoch.env.ded": 0.9525547445255474, + "success_rate.epoch.env.logic": 0.906392694063927, + "success_rate.epoch.env.math": 0.9597560975609756, + "success_rate.epoch.env.sat": 0.12121212121212122, + "success_rate.epoch.env.science": 0.786660777385159, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8654137091668251, + "success_rate.epoch.global": 0.8790072388831437, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.96, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9956042199488491, + "tokens_p.mean_in_band": 0.5948350694444444, + "tokens_rate.above_band": 0.9559902200488998, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.044009779951100246 + }, + { + "epoch": 0.61994034938219, + "grad_norm": 87.09099619791554, + "learning_rate": 3.963171444161853e-07, + "loss": 0.2315, + "step": 2910, + "success_rate.epoch.env.abd": 0.9846153846153847, + "success_rate.epoch.env.agentgym:alfworld": 0.8963414634146342, + "success_rate.epoch.env.agentgym:sciworld": 0.9682539682539683, + "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, + "success_rate.epoch.env.babyai": 0.967741935483871, + "success_rate.epoch.env.ded": 0.9525547445255474, + "success_rate.epoch.env.logic": 0.906712172923777, + "success_rate.epoch.env.math": 0.9591961023142509, + "success_rate.epoch.env.sat": 0.12121212121212122, + "success_rate.epoch.env.science": 0.7865961199294532, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8654013171694078, + "success_rate.epoch.global": 0.8788713007570543, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.5, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.8125, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9947115384615385, + "tokens_p.mean_in_band": 0.6510416666666666, + "tokens_rate.above_band": 0.9154929577464789, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.08450704225352113 + }, + { + "epoch": 0.6210055389859395, + "grad_norm": 76.58118430055453, + "learning_rate": 3.963028190151286e-07, + "loss": 0.4663, + "step": 2915, + "success_rate.epoch.env.abd": 0.9847908745247148, + "success_rate.epoch.env.agentgym:alfworld": 0.8963414634146342, + "success_rate.epoch.env.agentgym:sciworld": 0.9682539682539683, + "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, + "success_rate.epoch.env.babyai": 0.967741935483871, + "success_rate.epoch.env.ded": 0.9525547445255474, + "success_rate.epoch.env.logic": 0.906712172923777, + "success_rate.epoch.env.math": 0.9592705167173252, + "success_rate.epoch.env.sat": 0.12121212121212122, + "success_rate.epoch.env.science": 0.7869718309859155, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.86545819129385, + "success_rate.epoch.global": 0.8790793541738234, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9957627118644068, + "tokens_p.mean_in_band": 0.7613636363636364, + "tokens_rate.above_band": 0.9147286821705426, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.08527131782945736 + }, + { + "epoch": 0.6220707285896889, + "grad_norm": 126.88630377336375, + "learning_rate": 3.9628846715683827e-07, + "loss": 0.2585, + "step": 2920, + "success_rate.epoch.env.abd": 0.9847908745247148, + "success_rate.epoch.env.agentgym:alfworld": 0.8963414634146342, + "success_rate.epoch.env.agentgym:sciworld": 0.968421052631579, + "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, + "success_rate.epoch.env.babyai": 0.967741935483871, + "success_rate.epoch.env.ded": 0.9528985507246377, + "success_rate.epoch.env.logic": 0.9069239500567536, + "success_rate.epoch.env.math": 0.9593199757134183, + "success_rate.epoch.env.sat": 0.12121212121212122, + "success_rate.epoch.env.science": 0.7872527472527473, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8655539224731776, + "success_rate.epoch.global": 0.879286694101509, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9986881054897739, + "tokens_p.mean_in_band": 0.890625, + "tokens_rate.above_band": 0.9989247311827957, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.001075268817204301 + }, + { + "epoch": 0.6231359181934384, + "grad_norm": 43.94399546987997, + "learning_rate": 3.9627408885177384e-07, + "loss": 0.2353, + "step": 2925, + "success_rate.epoch.env.abd": 0.9849056603773585, + "success_rate.epoch.env.agentgym:alfworld": 0.8963414634146342, + "success_rate.epoch.env.agentgym:sciworld": 0.968421052631579, + "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, + "success_rate.epoch.env.babyai": 0.967741935483871, + "success_rate.epoch.env.ded": 0.9528985507246377, + "success_rate.epoch.env.logic": 0.9070294784580499, + "success_rate.epoch.env.math": 0.9593939393939394, + "success_rate.epoch.env.sat": 0.12121212121212122, + "success_rate.epoch.env.science": 0.7871873628784555, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8655747309786477, + "success_rate.epoch.global": 0.8793221499486478, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9375, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9945913461538461, + "tokens_p.mean_in_band": 0.5712890625, + "tokens_rate.above_band": 0.8666666666666667, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.13333333333333333 + }, + { + "epoch": 0.6242011077971878, + "grad_norm": 147.44890806931758, + "learning_rate": 3.962596841104142e-07, + "loss": 0.3198, + "step": 2930, + "success_rate.epoch.env.abd": 0.9849056603773585, + "success_rate.epoch.env.agentgym:alfworld": 0.8963414634146342, + "success_rate.epoch.env.agentgym:sciworld": 0.968421052631579, + "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, + "success_rate.epoch.env.babyai": 0.967741935483871, + "success_rate.epoch.env.ded": 0.9494584837545126, + "success_rate.epoch.env.logic": 0.9071347678369196, + "success_rate.epoch.env.math": 0.9595166163141994, + "success_rate.epoch.env.sat": 0.12121212121212122, + "success_rate.epoch.env.science": 0.7870289219982471, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8652683181103562, + "success_rate.epoch.global": 0.8791866028708134, + "success_rate.window.env.ded": 0.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.6666666666666666, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9966773216689099, + "tokens_p.mean_in_band": 0.6368680334394905, + "tokens_rate.above_band": 0.9341994970662196, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.06580050293378038 + }, + { + "epoch": 0.6252662974009374, + "grad_norm": 130.25080165861453, + "learning_rate": 3.9624525294325727e-07, + "loss": 0.2884, + "step": 2935, + "success_rate.epoch.env.abd": 0.9851851851851852, + "success_rate.epoch.env.agentgym:alfworld": 0.891566265060241, + "success_rate.epoch.env.agentgym:sciworld": 0.968421052631579, + "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, + "success_rate.epoch.env.babyai": 0.967741935483871, + "success_rate.epoch.env.ded": 0.9494584837545126, + "success_rate.epoch.env.logic": 0.9071347678369196, + "success_rate.epoch.env.math": 0.9595654797827399, + "success_rate.epoch.env.sat": 0.12121212121212122, + "success_rate.epoch.env.science": 0.7871222076215506, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.864872543159927, + "success_rate.epoch.global": 0.8792221084953941, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9978813559322034, + "tokens_p.mean_in_band": 0.65087890625, + "tokens_rate.above_band": 0.9925233644859813, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.007476635514018692 + }, + { + "epoch": 0.6263314870046869, + "grad_norm": 65.69813651884323, + "learning_rate": 3.962307953608205e-07, + "loss": 0.2149, + "step": 2940, + "success_rate.epoch.env.abd": 0.9851851851851852, + "success_rate.epoch.env.agentgym:alfworld": 0.8922155688622755, + "success_rate.epoch.env.agentgym:sciworld": 0.968421052631579, + "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, + "success_rate.epoch.env.babyai": 0.967741935483871, + "success_rate.epoch.env.ded": 0.9494584837545126, + "success_rate.epoch.env.logic": 0.9071347678369196, + "success_rate.epoch.env.math": 0.9596871239470517, + "success_rate.epoch.env.sat": 0.12121212121212122, + "success_rate.epoch.env.science": 0.7874945343244425, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8649764772207669, + "success_rate.epoch.global": 0.8794277929155313, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9969951923076923, + "tokens_p.mean_in_band": 0.74560546875, + "tokens_rate.above_band": 0.9873417721518988, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.012658227848101266 + }, + { + "epoch": 0.6273966766084363, + "grad_norm": 77.82909573964751, + "learning_rate": 3.962163113736404e-07, + "loss": 0.2132, + "step": 2945, + "success_rate.epoch.env.abd": 0.9851851851851852, + "success_rate.epoch.env.agentgym:alfworld": 0.8922155688622755, + "success_rate.epoch.env.agentgym:sciworld": 0.9685863874345549, + "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, + "success_rate.epoch.env.babyai": 0.967741935483871, + "success_rate.epoch.env.ded": 0.9496402877697842, + "success_rate.epoch.env.logic": 0.9072398190045249, + "success_rate.epoch.env.math": 0.9597113650030066, + "success_rate.epoch.env.sat": 0.12121212121212122, + "success_rate.epoch.env.science": 0.7871783689489752, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8649910468267976, + "success_rate.epoch.global": 0.8792927575654539, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9973190348525469, + "tokens_p.mean_in_band": 0.5862379807692307, + "tokens_rate.above_band": 0.966321243523316, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03367875647668394 + }, + { + "epoch": 0.6284618662121858, + "grad_norm": 124.50361686150308, + "learning_rate": 3.9620180099227287e-07, + "loss": 0.3018, + "step": 2950, + "success_rate.epoch.env.abd": 0.9851851851851852, + "success_rate.epoch.env.agentgym:alfworld": 0.8922155688622755, + "success_rate.epoch.env.agentgym:sciworld": 0.96875, + "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, + "success_rate.epoch.env.babyai": 0.967741935483871, + "success_rate.epoch.env.ded": 0.9496402877697842, + "success_rate.epoch.env.logic": 0.9072398190045249, + "success_rate.epoch.env.math": 0.959832134292566, + "success_rate.epoch.env.sat": 0.12121212121212122, + "success_rate.epoch.env.science": 0.7875489769264258, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8650505913570208, + "success_rate.epoch.global": 0.8794976238968092, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9924903100775194, + "tokens_p.mean_in_band": 0.7513020833333334, + "tokens_rate.above_band": 0.9555555555555556, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.044444444444444446 + }, + { + "epoch": 0.6295270558159353, + "grad_norm": 101.09664789858942, + "learning_rate": 3.961872642272929e-07, + "loss": 0.4169, + "step": 2955, + "success_rate.epoch.env.abd": 0.9851851851851852, + "success_rate.epoch.env.agentgym:alfworld": 0.8922155688622755, + "success_rate.epoch.env.agentgym:sciworld": 0.96875, + "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, + "success_rate.epoch.env.babyai": 0.96875, + "success_rate.epoch.env.ded": 0.9496402877697842, + "success_rate.epoch.env.logic": 0.9064261555806088, + "success_rate.epoch.env.math": 0.959832134292566, + "success_rate.epoch.env.sat": 0.12121212121212122, + "success_rate.epoch.env.science": 0.7881024750325663, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8651185821932349, + "success_rate.epoch.global": 0.8795323619112165, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.logic": 0.6666666666666666, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8888888888888888, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9940232240437158, + "tokens_p.mean_in_band": 0.6376378676470589, + "tokens_rate.above_band": 0.915, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.085 + }, + { + "epoch": 0.6305922454196847, + "grad_norm": 50.78340309572184, + "learning_rate": 3.9617270108929483e-07, + "loss": 0.3334, + "step": 2960, + "success_rate.epoch.env.abd": 0.9851851851851852, + "success_rate.epoch.env.agentgym:alfworld": 0.8922155688622755, + "success_rate.epoch.env.agentgym:sciworld": 0.96875, + "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, + "success_rate.epoch.env.babyai": 0.96875, + "success_rate.epoch.env.ded": 0.9498207885304659, + "success_rate.epoch.env.logic": 0.9064261555806088, + "success_rate.epoch.env.math": 0.959904248952723, + "success_rate.epoch.env.sat": 0.12121212121212122, + "success_rate.epoch.env.science": 0.7882200086617583, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.865152232106874, + "success_rate.epoch.global": 0.8795669824086604, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env_macro_mean": 0.9444444444444445, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9945652173913043, + "tokens_p.mean_in_band": 0.6083333333333333, + "tokens_rate.above_band": 0.9608355091383812, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0391644908616188 + }, + { + "epoch": 0.6316574350234342, + "grad_norm": 89.86553292487679, + "learning_rate": 3.9615811158889214e-07, + "loss": 0.4859, + "step": 2965, + "success_rate.epoch.env.abd": 0.9851851851851852, + "success_rate.epoch.env.agentgym:alfworld": 0.8922155688622755, + "success_rate.epoch.env.agentgym:sciworld": 0.9689119170984456, + "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, + "success_rate.epoch.env.babyai": 0.96875, + "success_rate.epoch.env.ded": 0.9498207885304659, + "success_rate.epoch.env.logic": 0.9064261555806088, + "success_rate.epoch.env.math": 0.9599521817095039, + "success_rate.epoch.env.sat": 0.12121212121212122, + "success_rate.epoch.env.science": 0.7879965457685665, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.865150994557968, + "success_rate.epoch.global": 0.8794326241134752, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.7142857142857143, + "success_rate.window.env_macro_mean": 0.9047619047619048, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9934505988023952, + "tokens_p.mean_in_band": 0.4755108173076923, + "tokens_rate.above_band": 0.9277777777777778, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.07222222222222222 + }, + { + "epoch": 0.6327226246271836, + "grad_norm": 61.02038454128402, + "learning_rate": 3.961434957367175e-07, + "loss": 0.3879, + "step": 2970, + "success_rate.epoch.env.abd": 0.985239852398524, + "success_rate.epoch.env.agentgym:alfworld": 0.8922155688622755, + "success_rate.epoch.env.agentgym:sciworld": 0.9689119170984456, + "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, + "success_rate.epoch.env.babyai": 0.96875, + "success_rate.epoch.env.ded": 0.9498207885304659, + "success_rate.epoch.env.logic": 0.9065315315315315, + "success_rate.epoch.env.math": 0.9600238663484487, + "success_rate.epoch.env.sat": 0.12121212121212122, + "success_rate.epoch.env.science": 0.7884532529082292, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8652135795527743, + "success_rate.epoch.global": 0.8796358732299393, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9956995412844036, + "tokens_p.mean_in_band": 0.6962890625, + "tokens_rate.above_band": 0.9646017699115044, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.035398230088495575 + }, + { + "epoch": 0.6337878142309331, + "grad_norm": 24.652628933243395, + "learning_rate": 3.96128853543423e-07, + "loss": 0.251, + "step": 2975, + "success_rate.epoch.env.abd": 0.985239852398524, + "success_rate.epoch.env.agentgym:alfworld": 0.8922155688622755, + "success_rate.epoch.env.agentgym:sciworld": 0.9689119170984456, + "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, + "success_rate.epoch.env.babyai": 0.96875, + "success_rate.epoch.env.ded": 0.9498207885304659, + "success_rate.epoch.env.logic": 0.9057239057239057, + "success_rate.epoch.env.math": 0.9601190476190476, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.7882049074472665, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8648021384267214, + "success_rate.epoch.global": 0.8793335577246718, + "success_rate.window.env.logic": 0.6666666666666666, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.5416666666666666, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9986877916018663, + "tokens_p.mean_in_band": 0.5513888888888889, + "tokens_rate.above_band": 0.934593023255814, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.06540697674418605 + }, + { + "epoch": 0.6348530038346826, + "grad_norm": 92.98172277335975, + "learning_rate": 3.9611418501967965e-07, + "loss": 0.2789, + "step": 2980, + "success_rate.epoch.env.abd": 0.985239852398524, + "success_rate.epoch.env.agentgym:alfworld": 0.8928571428571429, + "success_rate.epoch.env.agentgym:sciworld": 0.9689119170984456, + "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, + "success_rate.epoch.env.babyai": 0.96875, + "success_rate.epoch.env.ded": 0.9498207885304659, + "success_rate.epoch.env.logic": 0.9057239057239057, + "success_rate.epoch.env.math": 0.960166468489893, + "success_rate.epoch.env.sat": 0.11428571428571428, + "success_rate.epoch.env.science": 0.7887505367110348, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8646088002077821, + "success_rate.epoch.global": 0.8793682795698925, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.75, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9958428899082569, + "tokens_p.mean_in_band": 0.6463815789473685, + "tokens_rate.above_band": 0.919831223628692, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.08016877637130802 + }, + { + "epoch": 0.635918193438432, + "grad_norm": 211.65083339587707, + "learning_rate": 3.9609949017617773e-07, + "loss": 0.4691, + "step": 2985, + "success_rate.epoch.env.abd": 0.985239852398524, + "success_rate.epoch.env.agentgym:alfworld": 0.8941176470588236, + "success_rate.epoch.env.agentgym:sciworld": 0.9689119170984456, + "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, + "success_rate.epoch.env.babyai": 0.96875, + "success_rate.epoch.env.ded": 0.9498207885304659, + "success_rate.epoch.env.logic": 0.905829596412556, + "success_rate.epoch.env.math": 0.9595959595959596, + "success_rate.epoch.env.sat": 0.11428571428571428, + "success_rate.epoch.env.science": 0.788865096359743, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8646915498118826, + "success_rate.epoch.global": 0.8792351559879236, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env_macro_mean": 0.7083333333333334, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9962060702875399, + "tokens_p.mean_in_band": 0.4326171875, + "tokens_rate.above_band": 0.9827315541601256, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.01726844583987441 + }, + { + "epoch": 0.6369833830421815, + "grad_norm": 315.10729174699105, + "learning_rate": 3.9608476902362684e-07, + "loss": 0.4177, + "step": 2990, + "success_rate.epoch.env.abd": 0.985239852398524, + "success_rate.epoch.env.agentgym:alfworld": 0.8941176470588236, + "success_rate.epoch.env.agentgym:sciworld": 0.9689119170984456, + "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, + "success_rate.epoch.env.babyai": 0.96875, + "success_rate.epoch.env.ded": 0.95, + "success_rate.epoch.env.logic": 0.9060402684563759, + "success_rate.epoch.env.math": 0.9596678529062871, + "success_rate.epoch.env.sat": 0.11428571428571428, + "success_rate.epoch.env.science": 0.7887986318939718, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8647274872989654, + "success_rate.epoch.global": 0.87926992632284, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9375, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9974922839506173, + "tokens_p.mean_in_band": 0.5529513888888888, + "tokens_rate.above_band": 0.9818181818181818, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.01818181818181818 + }, + { + "epoch": 0.638048572645931, + "grad_norm": 62.47016863990783, + "learning_rate": 3.960700215727556e-07, + "loss": 0.4276, + "step": 2995, + "success_rate.epoch.env.abd": 0.985239852398524, + "success_rate.epoch.env.agentgym:alfworld": 0.8941176470588236, + "success_rate.epoch.env.agentgym:sciworld": 0.9689119170984456, + "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, + "success_rate.epoch.env.babyai": 0.9696969696969697, + "success_rate.epoch.env.ded": 0.950530035335689, + "success_rate.epoch.env.logic": 0.9051339285714286, + "success_rate.epoch.env.math": 0.9596917605216361, + "success_rate.epoch.env.sat": 0.11428571428571428, + "success_rate.epoch.env.science": 0.7886421861656704, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.864767317029398, + "success_rate.epoch.global": 0.8791374122367102, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.8333333333333333, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9973591549295775, + "tokens_p.mean_in_band": 0.5703125, + "tokens_rate.above_band": 0.9848084544253632, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.015191545574636724 + }, + { + "epoch": 0.6391137622496804, + "grad_norm": 70.06722331362305, + "learning_rate": 3.9605524783431176e-07, + "loss": 0.22, + "step": 3000, + "success_rate.epoch.env.abd": 0.985239852398524, + "success_rate.epoch.env.agentgym:alfworld": 0.8947368421052632, + "success_rate.epoch.env.agentgym:sciworld": 0.9689119170984456, + "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, + "success_rate.epoch.env.babyai": 0.9696969696969697, + "success_rate.epoch.env.ded": 0.9507042253521126, + "success_rate.epoch.env.logic": 0.9053452115812918, + "success_rate.epoch.env.math": 0.9597394908229722, + "success_rate.epoch.env.sat": 0.1388888888888889, + "success_rate.epoch.env.science": 0.7889125799573561, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8671242230993002, + "success_rate.epoch.global": 0.8793391188251002, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9982576438848921, + "tokens_p.mean_in_band": 0.8229166666666666, + "tokens_rate.above_band": 0.9946332737030411, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.005366726296958855 + }, + { + "epoch": 0.6401789518534299, + "grad_norm": 55.41039715773697, + "learning_rate": 3.960404478190625e-07, + "loss": 0.3152, + "step": 3005, + "success_rate.epoch.env.abd": 0.9852941176470589, + "success_rate.epoch.env.agentgym:alfworld": 0.8953488372093024, + "success_rate.epoch.env.agentgym:sciworld": 0.9689119170984456, + "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, + "success_rate.epoch.env.babyai": 0.9696969696969697, + "success_rate.epoch.env.ded": 0.9508771929824561, + "success_rate.epoch.env.logic": 0.9055555555555556, + "success_rate.epoch.env.math": 0.9597871082199881, + "success_rate.epoch.env.sat": 0.1388888888888889, + "success_rate.epoch.env.science": 0.7891822827938672, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.867248486025728, + "success_rate.epoch.global": 0.8795401532822392, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9996873262923847, + "tokens_p.mean_in_band": 0.7981770833333334, + "tokens_rate.above_band": 0.9983351831298557, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.001664816870144284 + }, + { + "epoch": 0.6412441414571793, + "grad_norm": 50.43401833071595, + "learning_rate": 3.960256215377938e-07, + "loss": 0.3123, + "step": 3010, + "success_rate.epoch.env.abd": 0.9853479853479854, + "success_rate.epoch.env.agentgym:alfworld": 0.8953488372093024, + "success_rate.epoch.env.agentgym:sciworld": 0.9689119170984456, + "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, + "success_rate.epoch.env.babyai": 0.9696969696969697, + "success_rate.epoch.env.ded": 0.9508771929824561, + "success_rate.epoch.env.logic": 0.9058693244739756, + "success_rate.epoch.env.math": 0.9598346131128175, + "success_rate.epoch.env.sat": 0.1388888888888889, + "success_rate.epoch.env.science": 0.7895408163265306, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8673188201207135, + "success_rate.epoch.global": 0.8797405189620758, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9965679190751445, + "tokens_p.mean_in_band": 0.76171875, + "tokens_rate.above_band": 0.9885714285714285, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.011428571428571429 + }, + { + "epoch": 0.6423093310609288, + "grad_norm": 69.76955269122864, + "learning_rate": 3.9601076900131104e-07, + "loss": 0.2167, + "step": 3015, + "success_rate.epoch.env.abd": 0.9853479853479854, + "success_rate.epoch.env.agentgym:alfworld": 0.8953488372093024, + "success_rate.epoch.env.agentgym:sciworld": 0.9689119170984456, + "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, + "success_rate.epoch.env.babyai": 0.9696969696969697, + "success_rate.epoch.env.ded": 0.9508771929824561, + "success_rate.epoch.env.logic": 0.906284454244763, + "success_rate.epoch.env.math": 0.9599056603773585, + "success_rate.epoch.env.sat": 0.1388888888888889, + "success_rate.epoch.env.science": 0.7898089171974523, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8673873908394634, + "success_rate.epoch.global": 0.8799402191962803, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9959077380952381, + "tokens_p.mean_in_band": 0.72314453125, + "tokens_rate.above_band": 0.9545454545454546, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.045454545454545456 + }, + { + "epoch": 0.6433745206646783, + "grad_norm": 173.826529122372, + "learning_rate": 3.959958902204386e-07, + "loss": 0.4126, + "step": 3020, + "success_rate.epoch.env.abd": 0.9854014598540146, + "success_rate.epoch.env.agentgym:alfworld": 0.8953488372093024, + "success_rate.epoch.env.agentgym:sciworld": 0.9690721649484536, + "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, + "success_rate.epoch.env.babyai": 0.9696969696969697, + "success_rate.epoch.env.ded": 0.951048951048951, + "success_rate.epoch.env.logic": 0.9063876651982379, + "success_rate.epoch.env.math": 0.9599528857479388, + "success_rate.epoch.env.sat": 0.1388888888888889, + "success_rate.epoch.env.science": 0.7901653242899533, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8674685111884716, + "success_rate.epoch.global": 0.8801392572944297, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9943076208178439, + "tokens_p.mean_in_band": 0.8450520833333334, + "tokens_rate.above_band": 0.9889705882352942, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.011029411764705883 + }, + { + "epoch": 0.6444397102684278, + "grad_norm": 115.61607271610774, + "learning_rate": 3.9598098520602007e-07, + "loss": 0.3508, + "step": 3025, + "success_rate.epoch.env.abd": 0.9854014598540146, + "success_rate.epoch.env.agentgym:alfworld": 0.8953488372093024, + "success_rate.epoch.env.agentgym:sciworld": 0.9692307692307692, + "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, + "success_rate.epoch.env.babyai": 0.9696969696969697, + "success_rate.epoch.env.ded": 0.951048951048951, + "success_rate.epoch.env.logic": 0.9065934065934066, + "success_rate.epoch.env.math": 0.9600235155790712, + "success_rate.epoch.env.sat": 0.1388888888888889, + "success_rate.epoch.env.science": 0.7900973338975879, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8675018734717671, + "success_rate.epoch.global": 0.8801721284342933, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9375, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9953703703703703, + "tokens_p.mean_in_band": 0.52734375, + "tokens_rate.above_band": 0.9854014598540146, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.014598540145985401 + }, + { + "epoch": 0.6455048998721773, + "grad_norm": 67.72661895035077, + "learning_rate": 3.9596605396891807e-07, + "loss": 0.3197, + "step": 3030, + "success_rate.epoch.env.abd": 0.9855072463768116, + "success_rate.epoch.env.agentgym:alfworld": 0.8953488372093024, + "success_rate.epoch.env.agentgym:sciworld": 0.9693877551020408, + "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, + "success_rate.epoch.env.babyai": 0.9696969696969697, + "success_rate.epoch.env.ded": 0.9512195121951219, + "success_rate.epoch.env.logic": 0.9066959385290889, + "success_rate.epoch.env.math": 0.9601173020527859, + "success_rate.epoch.env.sat": 0.1388888888888889, + "success_rate.epoch.env.science": 0.7897631133671743, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8675287308735146, + "success_rate.epoch.global": 0.8802048909451421, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.0, + "success_rate.window.env_macro_mean": 0.8333333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9992937853107344, + "tokens_p.mean_in_band": 0.43526785714285715, + "tokens_rate.above_band": 0.9902097902097902, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.009790209790209791 + }, + { + "epoch": 0.6465700894759268, + "grad_norm": 175.8165967116826, + "learning_rate": 3.9595109652001433e-07, + "loss": 0.4834, + "step": 3035, + "success_rate.epoch.env.abd": 0.9855072463768116, + "success_rate.epoch.env.agentgym:alfworld": 0.8953488372093024, + "success_rate.epoch.env.agentgym:sciworld": 0.9693877551020408, + "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, + "success_rate.epoch.env.babyai": 0.9696969696969697, + "success_rate.epoch.env.ded": 0.9512195121951219, + "success_rate.epoch.env.logic": 0.9067982456140351, + "success_rate.epoch.env.math": 0.9601873536299765, + "success_rate.epoch.env.sat": 0.1388888888888889, + "success_rate.epoch.env.science": 0.789873417721519, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8675544275113765, + "success_rate.epoch.global": 0.8802375453645661, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env_macro_mean": 0.9444444444444445, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9901315789473685, + "tokens_p.mean_in_band": 0.41573660714285715, + "tokens_rate.above_band": 0.9313725490196079, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.06862745098039216 + }, + { + "epoch": 0.6476352790796762, + "grad_norm": 93.66628429899336, + "learning_rate": 3.959361128702099e-07, + "loss": 0.3501, + "step": 3040, + "success_rate.epoch.env.abd": 0.9855072463768116, + "success_rate.epoch.env.agentgym:alfworld": 0.8901734104046243, + "success_rate.epoch.env.agentgym:sciworld": 0.9693877551020408, + "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, + "success_rate.epoch.env.babyai": 0.9696969696969697, + "success_rate.epoch.env.ded": 0.9512195121951219, + "success_rate.epoch.env.logic": 0.9059080962800875, + "success_rate.epoch.env.math": 0.960233918128655, + "success_rate.epoch.env.sat": 0.1388888888888889, + "success_rate.epoch.env.science": 0.7898947368421053, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.867009182736889, + "success_rate.epoch.global": 0.8799407114624506, + "success_rate.window.env.agentgym:alfworld": 0.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.575, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9992794296116505, + "tokens_p.mean_in_band": 0.5702582465277778, + "tokens_rate.above_band": 0.958139534883721, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.04186046511627907 + }, + { + "epoch": 0.6487004686834257, + "grad_norm": 113.88749145748248, + "learning_rate": 3.9592110303042457e-07, + "loss": 0.3678, + "step": 3045, + "success_rate.epoch.env.abd": 0.9855072463768116, + "success_rate.epoch.env.agentgym:alfworld": 0.8901734104046243, + "success_rate.epoch.env.agentgym:sciworld": 0.9693877551020408, + "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, + "success_rate.epoch.env.babyai": 0.9696969696969697, + "success_rate.epoch.env.ded": 0.9512195121951219, + "success_rate.epoch.env.logic": 0.9050218340611353, + "success_rate.epoch.env.math": 0.9603729603729604, + "success_rate.epoch.env.sat": 0.1388888888888889, + "success_rate.epoch.env.science": 0.790071518721077, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8669573247281913, + "success_rate.epoch.global": 0.8799736928641894, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8333333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 1.0007148692810457, + "tokens_p.mean_in_band": 0.4453125, + "tokens_rate.above_band": 0.9429892141756548, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.05701078582434515 + }, + { + "epoch": 0.6497656582871751, + "grad_norm": 66.08946418028508, + "learning_rate": 3.959060670115976e-07, + "loss": 0.3665, + "step": 3050, + "success_rate.epoch.env.abd": 0.9855072463768116, + "success_rate.epoch.env.agentgym:alfworld": 0.8901734104046243, + "success_rate.epoch.env.agentgym:sciworld": 0.9693877551020408, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9696969696969697, + "success_rate.epoch.env.ded": 0.9512195121951219, + "success_rate.epoch.env.logic": 0.9050218340611353, + "success_rate.epoch.env.math": 0.9604421175101804, + "success_rate.epoch.env.sat": 0.1388888888888889, + "success_rate.epoch.env.science": 0.790180444817457, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8670238513727039, + "success_rate.epoch.global": 0.8800065659881812, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env_macro_mean": 0.9444444444444445, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9962748344370861, + "tokens_p.mean_in_band": 0.5110677083333334, + "tokens_rate.above_band": 0.9263803680981595, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0736196319018405 + }, + { + "epoch": 0.6508308478909246, + "grad_norm": 161.5359544401479, + "learning_rate": 3.958910048246869e-07, + "loss": 0.3103, + "step": 3055, + "success_rate.epoch.env.abd": 0.9855072463768116, + "success_rate.epoch.env.agentgym:alfworld": 0.8908045977011494, + "success_rate.epoch.env.agentgym:sciworld": 0.9695431472081218, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9696969696969697, + "success_rate.epoch.env.ded": 0.9512195121951219, + "success_rate.epoch.env.logic": 0.9051254089422028, + "success_rate.epoch.env.math": 0.9605110336817654, + "success_rate.epoch.env.sat": 0.1388888888888889, + "success_rate.epoch.env.science": 0.7904442581726739, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8671350226282015, + "success_rate.epoch.global": 0.8802032120616191, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9987273755656109, + "tokens_p.mean_in_band": 0.7272135416666666, + "tokens_rate.above_band": 0.9910313901345291, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.008968609865470852 + }, + { + "epoch": 0.6518960374946741, + "grad_norm": 78.9716837308335, + "learning_rate": 3.9587591648066984e-07, + "loss": 0.2134, + "step": 3060, + "success_rate.epoch.env.abd": 0.9855072463768116, + "success_rate.epoch.env.agentgym:alfworld": 0.8914285714285715, + "success_rate.epoch.env.agentgym:sciworld": 0.9696969696969697, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9696969696969697, + "success_rate.epoch.env.ded": 0.9512195121951219, + "success_rate.epoch.env.logic": 0.9052287581699346, + "success_rate.epoch.env.math": 0.9605797101449275, + "success_rate.epoch.env.sat": 0.1388888888888889, + "success_rate.epoch.env.science": 0.7907949790794979, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8672532537931094, + "success_rate.epoch.global": 0.8803992146596858, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9987725040916531, + "tokens_p.mean_in_band": 0.7825520833333334, + "tokens_rate.above_band": 0.995114006514658, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.004885993485342019 + }, + { + "epoch": 0.6529612270984235, + "grad_norm": 113.67140442021879, + "learning_rate": 3.958608019905427e-07, + "loss": 0.4321, + "step": 3065, + "success_rate.epoch.env.abd": 0.9855072463768116, + "success_rate.epoch.env.agentgym:alfworld": 0.8920454545454546, + "success_rate.epoch.env.agentgym:sciworld": 0.9698492462311558, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9696969696969697, + "success_rate.epoch.env.ded": 0.9512195121951219, + "success_rate.epoch.env.logic": 0.9055374592833876, + "success_rate.epoch.env.math": 0.9606709080393291, + "success_rate.epoch.env.sat": 0.1388888888888889, + "success_rate.epoch.env.science": 0.7908824759514848, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8673674861141013, + "success_rate.epoch.global": 0.8805945769356419, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9985023961661342, + "tokens_p.mean_in_band": 0.802734375, + "tokens_rate.above_band": 0.9750778816199377, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.024922118380062305 + }, + { + "epoch": 0.654026416702173, + "grad_norm": 133.36748476007517, + "learning_rate": 3.958456613653208e-07, + "loss": 0.4466, + "step": 3070, + "success_rate.epoch.env.abd": 0.9855072463768116, + "success_rate.epoch.env.agentgym:alfworld": 0.8920454545454546, + "success_rate.epoch.env.agentgym:sciworld": 0.9698492462311558, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9696969696969697, + "success_rate.epoch.env.ded": 0.9513888888888888, + "success_rate.epoch.env.logic": 0.905742145178765, + "success_rate.epoch.env.math": 0.9606709080393291, + "success_rate.epoch.env.sat": 0.1388888888888889, + "success_rate.epoch.env.science": 0.7914059240717564, + "success_rate.epoch.env.webshop": 0.96875, + "success_rate.epoch.env_macro_mean": 0.8646081689058666, + "success_rate.epoch.global": 0.8806262230919765, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env.webshop": 0.0, + "success_rate.window.env_macro_mean": 0.75, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9954173166926678, + "tokens_p.mean_in_band": 0.6647135416666666, + "tokens_rate.above_band": 0.946824224519941, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.053175775480059084 + }, + { + "epoch": 0.6550916063059224, + "grad_norm": 62.85620699386314, + "learning_rate": 3.958304946160384e-07, + "loss": 0.336, + "step": 3075, + "success_rate.epoch.env.abd": 0.9855072463768116, + "success_rate.epoch.env.agentgym:alfworld": 0.8932584269662921, + "success_rate.epoch.env.agentgym:sciworld": 0.9698492462311558, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9696969696969697, + "success_rate.epoch.env.ded": 0.9513888888888888, + "success_rate.epoch.env.logic": 0.9058441558441559, + "success_rate.epoch.env.math": 0.9606936416184971, + "success_rate.epoch.env.sat": 0.13513513513513514, + "success_rate.epoch.env.science": 0.791007493755204, + "success_rate.epoch.env.webshop": 0.96875, + "success_rate.epoch.env_macro_mean": 0.8643523082326928, + "success_rate.epoch.global": 0.8803321393682839, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 0.6, + "success_rate.window.env_macro_mean": 0.72, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9985991379310345, + "tokens_p.mean_in_band": 0.584703947368421, + "tokens_rate.above_band": 0.9682804674457429, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03171953255425709 + }, + { + "epoch": 0.6561567959096719, + "grad_norm": 75.00990374215259, + "learning_rate": 3.95815301753749e-07, + "loss": 0.2635, + "step": 3080, + "success_rate.epoch.env.abd": 0.9856115107913669, + "success_rate.epoch.env.agentgym:alfworld": 0.8932584269662921, + "success_rate.epoch.env.agentgym:sciworld": 0.9698492462311558, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9696969696969697, + "success_rate.epoch.env.ded": 0.9513888888888888, + "success_rate.epoch.env.logic": 0.906047516198704, + "success_rate.epoch.env.math": 0.9607843137254902, + "success_rate.epoch.env.sat": 0.13513513513513514, + "success_rate.epoch.env.science": 0.790765391014975, + "success_rate.epoch.env.webshop": 0.96875, + "success_rate.epoch.env_macro_mean": 0.8643665076995899, + "success_rate.epoch.global": 0.8803641092327699, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9980085784313726, + "tokens_p.mean_in_band": 0.4800347222222222, + "tokens_rate.above_band": 0.9577464788732394, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.04225352112676056 + }, + { + "epoch": 0.6572219855134214, + "grad_norm": 301.4585408016698, + "learning_rate": 3.958000827895251e-07, + "loss": 0.2665, + "step": 3085, + "success_rate.epoch.env.abd": 0.9856115107913669, + "success_rate.epoch.env.agentgym:alfworld": 0.8932584269662921, + "success_rate.epoch.env.agentgym:sciworld": 0.9698492462311558, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9696969696969697, + "success_rate.epoch.env.ded": 0.9515570934256056, + "success_rate.epoch.env.logic": 0.90625, + "success_rate.epoch.env.math": 0.9608294930875576, + "success_rate.epoch.env.sat": 0.13513513513513514, + "success_rate.epoch.env.science": 0.7907845579078456, + "success_rate.epoch.env.webshop": 0.96875, + "success_rate.epoch.env_macro_mean": 0.8644060562989491, + "success_rate.epoch.global": 0.8803959753326842, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9959435096153846, + "tokens_p.mean_in_band": 0.7327008928571429, + "tokens_rate.above_band": 0.9674418604651163, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03255813953488372 + }, + { + "epoch": 0.6582871751171708, + "grad_norm": 107.86347945791175, + "learning_rate": 3.957848377344581e-07, + "loss": 0.4075, + "step": 3090, + "success_rate.epoch.env.abd": 0.9856115107913669, + "success_rate.epoch.env.agentgym:alfworld": 0.8938547486033519, + "success_rate.epoch.env.agentgym:sciworld": 0.9698492462311558, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9696969696969697, + "success_rate.epoch.env.ded": 0.9515570934256056, + "success_rate.epoch.env.logic": 0.9063509149623251, + "success_rate.epoch.env.math": 0.9609419873635842, + "success_rate.epoch.env.sat": 0.13513513513513514, + "success_rate.epoch.env.science": 0.7910447761194029, + "success_rate.epoch.env.webshop": 0.96875, + "success_rate.epoch.env_macro_mean": 0.8645033243977643, + "success_rate.epoch.global": 0.8805897602073882, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9946524064171123, + "tokens_p.mean_in_band": 0.8151041666666666, + "tokens_rate.above_band": 0.9842105263157894, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.015789473684210527 + }, + { + "epoch": 0.6593523647209203, + "grad_norm": 39.961257713514556, + "learning_rate": 3.957695665996586e-07, + "loss": 0.2415, + "step": 3095, + "success_rate.epoch.env.abd": 0.9856115107913669, + "success_rate.epoch.env.agentgym:alfworld": 0.8938547486033519, + "success_rate.epoch.env.agentgym:sciworld": 0.97, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9696969696969697, + "success_rate.epoch.env.ded": 0.9515570934256056, + "success_rate.epoch.env.logic": 0.9065520945220193, + "success_rate.epoch.env.math": 0.9609868043602984, + "success_rate.epoch.env.sat": 0.13513513513513514, + "success_rate.epoch.env.science": 0.7910633016135705, + "success_rate.epoch.env.webshop": 0.96875, + "success_rate.epoch.env_macro_mean": 0.8645410767449845, + "success_rate.epoch.global": 0.8806211582012293, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9968971631205674, + "tokens_p.mean_in_band": 0.57666015625, + "tokens_rate.above_band": 0.9463087248322147, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.053691275167785234 + }, + { + "epoch": 0.6604175543246698, + "grad_norm": 181.14104179507885, + "learning_rate": 3.95754269396256e-07, + "loss": 0.3315, + "step": 3100, + "success_rate.epoch.env.abd": 0.9856115107913669, + "success_rate.epoch.env.agentgym:alfworld": 0.8944444444444445, + "success_rate.epoch.env.agentgym:sciworld": 0.97, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9696969696969697, + "success_rate.epoch.env.ded": 0.9515570934256056, + "success_rate.epoch.env.logic": 0.9065520945220193, + "success_rate.epoch.env.math": 0.9605488850771869, + "success_rate.epoch.env.sat": 0.13513513513513514, + "success_rate.epoch.env.science": 0.7909090909090909, + "success_rate.epoch.env.webshop": 0.96875, + "success_rate.epoch.env_macro_mean": 0.8645408554589391, + "success_rate.epoch.global": 0.8804909560723514, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.math": 0.8333333333333334, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.8333333333333334, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9972014925373134, + "tokens_p.mean_in_band": 0.5950520833333334, + "tokens_rate.above_band": 0.9710144927536232, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.028985507246376812 + }, + { + "epoch": 0.6614827439284192, + "grad_norm": 139.8058433972585, + "learning_rate": 3.9573894613539876e-07, + "loss": 0.4565, + "step": 3105, + "success_rate.epoch.env.abd": 0.985663082437276, + "success_rate.epoch.env.agentgym:alfworld": 0.8944444444444445, + "success_rate.epoch.env.agentgym:sciworld": 0.97, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9696969696969697, + "success_rate.epoch.env.ded": 0.9517241379310345, + "success_rate.epoch.env.logic": 0.9055793991416309, + "success_rate.epoch.env.math": 0.9606164383561644, + "success_rate.epoch.env.sat": 0.13513513513513514, + "success_rate.epoch.env.science": 0.7908415841584159, + "success_rate.epoch.env.webshop": 0.96875, + "success_rate.epoch.env_macro_mean": 0.8644723070315984, + "success_rate.epoch.global": 0.8803611738148984, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.75, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.997501369112815, + "tokens_p.mean_in_band": 0.5556640625, + "tokens_rate.above_band": 0.9661375661375662, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.033862433862433865 + }, + { + "epoch": 0.6625479335321687, + "grad_norm": 112.5701285692833, + "learning_rate": 3.9572359682825435e-07, + "loss": 0.2538, + "step": 3110, + "success_rate.epoch.env.abd": 0.9857142857142858, + "success_rate.epoch.env.agentgym:alfworld": 0.8944444444444445, + "success_rate.epoch.env.agentgym:sciworld": 0.97, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9696969696969697, + "success_rate.epoch.env.ded": 0.9517241379310345, + "success_rate.epoch.env.logic": 0.9057815845824411, + "success_rate.epoch.env.math": 0.9607061503416856, + "success_rate.epoch.env.sat": 0.13513513513513514, + "success_rate.epoch.env.science": 0.7911001236093943, + "success_rate.epoch.env.webshop": 0.96875, + "success_rate.epoch.env_macro_mean": 0.8645270015910819, + "success_rate.epoch.global": 0.8805537669027689, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9951171875, + "tokens_p.mean_in_band": 0.484375, + "tokens_rate.above_band": 0.9896907216494846, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.010309278350515464 + }, + { + "epoch": 0.6636131231359182, + "grad_norm": 101.16825630521417, + "learning_rate": 3.957082214860094e-07, + "loss": 0.4569, + "step": 3115, + "success_rate.epoch.env.abd": 0.9857142857142858, + "success_rate.epoch.env.agentgym:alfworld": 0.8950276243093923, + "success_rate.epoch.env.agentgym:sciworld": 0.97, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9696969696969697, + "success_rate.epoch.env.ded": 0.9517241379310345, + "success_rate.epoch.env.logic": 0.9060832443970117, + "success_rate.epoch.env.math": 0.9607731665719159, + "success_rate.epoch.env.sat": 0.13513513513513514, + "success_rate.epoch.env.science": 0.7905349794238683, + "success_rate.epoch.env.webshop": 0.96875, + "success_rate.epoch.env_macro_mean": 0.8645621572023748, + "success_rate.epoch.global": 0.8804243008678881, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.3333333333333333, + "success_rate.window.env_macro_mean": 0.8333333333333333, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9986323851203501, + "tokens_p.mean_in_band": 0.3902994791666667, + "tokens_rate.above_band": 0.9870410367170627, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.012958963282937365 + }, + { + "epoch": 0.6646783127396677, + "grad_norm": 35.52048006219275, + "learning_rate": 3.956928201198691e-07, + "loss": 0.1316, + "step": 3120, + "success_rate.epoch.env.abd": 0.9857651245551602, + "success_rate.epoch.env.agentgym:alfworld": 0.8950276243093923, + "success_rate.epoch.env.agentgym:sciworld": 0.97, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9696969696969697, + "success_rate.epoch.env.ded": 0.952054794520548, + "success_rate.epoch.env.logic": 0.9060832443970117, + "success_rate.epoch.env.math": 0.9608399545970489, + "success_rate.epoch.env.sat": 0.13513513513513514, + "success_rate.epoch.env.science": 0.7908792111750206, + "success_rate.epoch.env.webshop": 0.96875, + "success_rate.epoch.env_macro_mean": 0.8646342040393453, + "success_rate.epoch.global": 0.8806161745827985, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9989965596330275, + "tokens_p.mean_in_band": 0.875, + "tokens_rate.above_band": 0.9984732824427481, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0015267175572519084 + }, + { + "epoch": 0.6657435023434172, + "grad_norm": 241.4843036383521, + "learning_rate": 3.9567739274105814e-07, + "loss": 0.2839, + "step": 3125, + "success_rate.epoch.env.abd": 0.9857651245551602, + "success_rate.epoch.env.agentgym:alfworld": 0.8956043956043956, + "success_rate.epoch.env.agentgym:sciworld": 0.97, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9696969696969697, + "success_rate.epoch.env.ded": 0.952054794520548, + "success_rate.epoch.env.logic": 0.9053191489361702, + "success_rate.epoch.env.math": 0.9609065155807366, + "success_rate.epoch.env.sat": 0.13513513513513514, + "success_rate.epoch.env.science": 0.7911366434140337, + "success_rate.epoch.env.webshop": 0.96875, + "success_rate.epoch.env_macro_mean": 0.8646466284990599, + "success_rate.epoch.global": 0.880647228452419, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 0.6666666666666666, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.99581589958159, + "tokens_p.mean_in_band": 0.741875, + "tokens_rate.above_band": 0.9502982107355865, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.04970178926441352 + }, + { + "epoch": 0.6668086919471666, + "grad_norm": 100.44127427879963, + "learning_rate": 3.9566193936081965e-07, + "loss": 0.3357, + "step": 3130, + "success_rate.epoch.env.abd": 0.9857651245551602, + "success_rate.epoch.env.agentgym:alfworld": 0.8961748633879781, + "success_rate.epoch.env.agentgym:sciworld": 0.9701492537313433, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9705882352941176, + "success_rate.epoch.env.ded": 0.9522184300341296, + "success_rate.epoch.env.logic": 0.9053191489361702, + "success_rate.epoch.env.math": 0.9609507640067911, + "success_rate.epoch.env.sat": 0.13513513513513514, + "success_rate.epoch.env.science": 0.7910692339205244, + "success_rate.epoch.env.webshop": 0.96875, + "success_rate.epoch.env_macro_mean": 0.8648058522770783, + "success_rate.epoch.global": 0.880678182981446, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9583333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9965356871678056, + "tokens_p.mean_below_band": 2.8405338525772095e-08, + "tokens_p.mean_in_band": 0.89453125, + "tokens_rate.above_band": 0.9984836997725549, + "tokens_rate.below_band": 0.000758150113722517, + "tokens_rate.in_band": 0.000758150113722517 + }, + { + "epoch": 0.6678738815509161, + "grad_norm": 91.8348046926915, + "learning_rate": 3.9564645999041603e-07, + "loss": 0.4371, + "step": 3135, + "success_rate.epoch.env.abd": 0.9857651245551602, + "success_rate.epoch.env.agentgym:alfworld": 0.8961748633879781, + "success_rate.epoch.env.agentgym:sciworld": 0.9702970297029703, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9705882352941176, + "success_rate.epoch.env.ded": 0.9522184300341296, + "success_rate.epoch.env.logic": 0.9053191489361702, + "success_rate.epoch.env.math": 0.961038961038961, + "success_rate.epoch.env.sat": 0.13513513513513514, + "success_rate.epoch.env.science": 0.7906786590351594, + "success_rate.epoch.env.webshop": 0.96875, + "success_rate.epoch.env_macro_mean": 0.8647917975605721, + "success_rate.epoch.global": 0.8805493452571064, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6, + "success_rate.window.env_macro_mean": 0.8666666666666667, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.994860197368421, + "tokens_p.mean_in_band": 0.49885110294117646, + "tokens_rate.above_band": 0.8994082840236687, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.10059171597633136 + }, + { + "epoch": 0.6689390711546656, + "grad_norm": 31.807500765998263, + "learning_rate": 3.956309546411285e-07, + "loss": 0.4623, + "step": 3140, + "success_rate.epoch.env.abd": 0.9857651245551602, + "success_rate.epoch.env.agentgym:alfworld": 0.8967391304347826, + "success_rate.epoch.env.agentgym:sciworld": 0.9702970297029703, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9705882352941176, + "success_rate.epoch.env.ded": 0.9522184300341296, + "success_rate.epoch.env.logic": 0.9054197662061636, + "success_rate.epoch.env.math": 0.9606299212598425, + "success_rate.epoch.env.sat": 0.13513513513513514, + "success_rate.epoch.env.science": 0.7907642010625255, + "success_rate.epoch.env.webshop": 0.96875, + "success_rate.epoch.env_macro_mean": 0.8648228327028491, + "success_rate.epoch.global": 0.8805803571428571, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.8571428571428571, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9642857142857143, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9985677083333333, + "tokens_p.mean_in_band": 0.4677734375, + "tokens_rate.above_band": 0.9836065573770492, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.01639344262295082 + }, + { + "epoch": 0.670004260758415, + "grad_norm": 29.05227497055417, + "learning_rate": 3.956154233242573e-07, + "loss": 0.1436, + "step": 3145, + "success_rate.epoch.env.abd": 0.9857651245551602, + "success_rate.epoch.env.agentgym:alfworld": 0.8967391304347826, + "success_rate.epoch.env.agentgym:sciworld": 0.9702970297029703, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9705882352941176, + "success_rate.epoch.env.ded": 0.9522184300341296, + "success_rate.epoch.env.logic": 0.9055201698513801, + "success_rate.epoch.env.math": 0.9607623318385651, + "success_rate.epoch.env.sat": 0.13513513513513514, + "success_rate.epoch.env.science": 0.7906122448979592, + "success_rate.epoch.env.webshop": 0.96875, + "success_rate.epoch.env_macro_mean": 0.8648301834355191, + "success_rate.epoch.global": 0.8806112702960841, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.8888888888888888, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9925595238095238, + "tokens_p.mean_in_band": 0.3203125, + "tokens_rate.above_band": 0.9130434782608695, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.08695652173913043 + }, + { + "epoch": 0.6710694503621645, + "grad_norm": 91.6137060369997, + "learning_rate": 3.955998660511216e-07, + "loss": 0.4673, + "step": 3150, + "success_rate.epoch.env.abd": 0.9857651245551602, + "success_rate.epoch.env.agentgym:alfworld": 0.8967391304347826, + "success_rate.epoch.env.agentgym:sciworld": 0.9704433497536946, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9705882352941176, + "success_rate.epoch.env.ded": 0.9522184300341296, + "success_rate.epoch.env.logic": 0.9047619047619048, + "success_rate.epoch.env.math": 0.9607843137254902, + "success_rate.epoch.env.sat": 0.13157894736842105, + "success_rate.epoch.env.science": 0.7909535452322738, + "success_rate.epoch.env.webshop": 0.96875, + "success_rate.epoch.env_macro_mean": 0.8644842879278625, + "success_rate.epoch.global": 0.8804831532104259, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 0.6666666666666666, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.7333333333333333, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 1.0004052593659942, + "tokens_p.mean_in_band": 0.5079012784090909, + "tokens_rate.above_band": 0.940379403794038, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.05962059620596206 + }, + { + "epoch": 0.6721346399659139, + "grad_norm": 52.52254948832143, + "learning_rate": 3.955842828330593e-07, + "loss": 0.318, + "step": 3155, + "success_rate.epoch.env.abd": 0.9857651245551602, + "success_rate.epoch.env.agentgym:alfworld": 0.8967391304347826, + "success_rate.epoch.env.agentgym:sciworld": 0.9704433497536946, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9714285714285714, + "success_rate.epoch.env.ded": 0.9525423728813559, + "success_rate.epoch.env.logic": 0.904862579281184, + "success_rate.epoch.env.math": 0.9608501118568232, + "success_rate.epoch.env.sat": 0.13157894736842105, + "success_rate.epoch.env.science": 0.7912087912087912, + "success_rate.epoch.env.webshop": 0.96875, + "success_rate.epoch.env_macro_mean": 0.8646284695286632, + "success_rate.epoch.global": 0.8806728022849889, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9972893432465924, + "tokens_p.mean_in_band": 0.6796875, + "tokens_rate.above_band": 0.9975278121137207, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.002472187886279357 + }, + { + "epoch": 0.6731998295696634, + "grad_norm": 84.86463189748122, + "learning_rate": 3.9556867368142736e-07, + "loss": 0.3367, + "step": 3160, + "success_rate.epoch.env.abd": 0.9858156028368794, + "success_rate.epoch.env.agentgym:alfworld": 0.8967391304347826, + "success_rate.epoch.env.agentgym:sciworld": 0.9704433497536946, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9714285714285714, + "success_rate.epoch.env.ded": 0.9525423728813559, + "success_rate.epoch.env.logic": 0.9050632911392406, + "success_rate.epoch.env.math": 0.9609156895589056, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.7910569105691057, + "success_rate.epoch.env.webshop": 0.96875, + "success_rate.epoch.env_macro_mean": 0.8643367484412887, + "success_rate.epoch.global": 0.8805449936628644, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.7333333333333333, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9932885906040269, + "tokens_p.mean_in_band": 0.65375, + "tokens_rate.above_band": 0.8563218390804598, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.14367816091954022 + }, + { + "epoch": 0.6742650191734129, + "grad_norm": 107.55001791589437, + "learning_rate": 3.955530386076017e-07, + "loss": 0.2526, + "step": 3165, + "success_rate.epoch.env.abd": 0.9858657243816255, + "success_rate.epoch.env.agentgym:alfworld": 0.8972972972972973, + "success_rate.epoch.env.agentgym:sciworld": 0.9705882352941176, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9714285714285714, + "success_rate.epoch.env.ded": 0.9525423728813559, + "success_rate.epoch.env.logic": 0.9052631578947369, + "success_rate.epoch.env.math": 0.9609375, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.7909902597402597, + "success_rate.epoch.env.webshop": 0.96875, + "success_rate.epoch.env_macro_mean": 0.8644193121063275, + "success_rate.epoch.global": 0.8805757671622905, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9583333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9975678066037735, + "tokens_p.mean_in_band": 0.5662109375, + "tokens_rate.above_band": 0.9769585253456221, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02304147465437788 + }, + { + "epoch": 0.6753302087771623, + "grad_norm": 223.61082910181617, + "learning_rate": 3.9553737762297687e-07, + "loss": 0.6083, + "step": 3170, + "success_rate.epoch.env.abd": 0.9859154929577465, + "success_rate.epoch.env.agentgym:alfworld": 0.8983957219251337, + "success_rate.epoch.env.agentgym:sciworld": 0.9705882352941176, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9722222222222222, + "success_rate.epoch.env.ded": 0.9525423728813559, + "success_rate.epoch.env.logic": 0.9053627760252366, + "success_rate.epoch.env.math": 0.9609375, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.7906034831915756, + "success_rate.epoch.env.webshop": 0.96875, + "success_rate.epoch.env_macro_mean": 0.8645697380680933, + "success_rate.epoch.global": 0.8804485154769425, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.science": 0.6, + "success_rate.window.env_macro_mean": 0.9199999999999999, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9986087328767124, + "tokens_p.mean_in_band": 0.5404575892857143, + "tokens_rate.above_band": 0.9765886287625418, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.023411371237458192 + }, + { + "epoch": 0.6763953983809118, + "grad_norm": 146.32987137747477, + "learning_rate": 3.955216907389667e-07, + "loss": 0.3268, + "step": 3175, + "success_rate.epoch.env.abd": 0.9859154929577465, + "success_rate.epoch.env.agentgym:alfworld": 0.898936170212766, + "success_rate.epoch.env.agentgym:sciworld": 0.9705882352941176, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9722222222222222, + "success_rate.epoch.env.ded": 0.9525423728813559, + "success_rate.epoch.env.logic": 0.9046121593291404, + "success_rate.epoch.env.math": 0.9610027855153204, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.790453074433657, + "success_rate.epoch.env.webshop": 0.96875, + "success_rate.epoch.env_macro_mean": 0.8645428933725422, + "success_rate.epoch.global": 0.880321665089877, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 0.6666666666666666, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.8333333333333333, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9963114754098361, + "tokens_p.mean_in_band": 0.6072916666666667, + "tokens_rate.above_band": 0.953125, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.046875 + }, + { + "epoch": 0.6774605879846612, + "grad_norm": 52.62957023119499, + "learning_rate": 3.955059779670036e-07, + "loss": 0.2357, + "step": 3180, + "success_rate.epoch.env.abd": 0.986013986013986, + "success_rate.epoch.env.agentgym:alfworld": 0.898936170212766, + "success_rate.epoch.env.agentgym:sciworld": 0.9707317073170731, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9722222222222222, + "success_rate.epoch.env.ded": 0.9525423728813559, + "success_rate.epoch.env.logic": 0.9047120418848168, + "success_rate.epoch.env.math": 0.9610678531701891, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.7903030303030303, + "success_rate.epoch.env.webshop": 0.96875, + "success_rate.epoch.env_macro_mean": 0.8645662452960979, + "success_rate.epoch.global": 0.8803526448362721, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9995019920318725, + "tokens_p.mean_in_band": 0.68212890625, + "tokens_rate.above_band": 0.9920948616600791, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.007905138339920948 + }, + { + "epoch": 0.6785257775884107, + "grad_norm": 84.92924496189973, + "learning_rate": 3.954902393185389e-07, + "loss": 0.4566, + "step": 3185, + "success_rate.epoch.env.abd": 0.986013986013986, + "success_rate.epoch.env.agentgym:alfworld": 0.9, + "success_rate.epoch.env.agentgym:sciworld": 0.970873786407767, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9722222222222222, + "success_rate.epoch.env.ded": 0.9525423728813559, + "success_rate.epoch.env.logic": 0.9047120418848168, + "success_rate.epoch.env.math": 0.9611111111111111, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.790641387656313, + "success_rate.epoch.env.webshop": 0.96875, + "success_rate.epoch.env_macro_mean": 0.8647105656753827, + "success_rate.epoch.global": 0.8805219305140701, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9987383540372671, + "tokens_p.mean_in_band": 0.5364583333333334, + "tokens_rate.above_band": 0.9953632148377125, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.00463678516228748 + }, + { + "epoch": 0.6795909671921602, + "grad_norm": 148.25663049601903, + "learning_rate": 3.9547447480504283e-07, + "loss": 0.3963, + "step": 3190, + "success_rate.epoch.env.abd": 0.986013986013986, + "success_rate.epoch.env.agentgym:alfworld": 0.9, + "success_rate.epoch.env.agentgym:sciworld": 0.9710144927536232, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9722222222222222, + "success_rate.epoch.env.ded": 0.9527027027027027, + "success_rate.epoch.env.logic": 0.9047120418848168, + "success_rate.epoch.env.math": 0.9611111111111111, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.7908286403861625, + "success_rate.epoch.env.webshop": 0.9696969696969697, + "success_rate.epoch.env_macro_mean": 0.8648410437293848, + "success_rate.epoch.global": 0.8805525035316277, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.science": 0.8571428571428571, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 0.9642857142857143, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9980039491758241, + "tokens_p.mean_in_band": 0.7965745192307693, + "tokens_rate.above_band": 0.9911504424778761, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.008849557522123894 + }, + { + "epoch": 0.6806561567959096, + "grad_norm": 29.239085193637013, + "learning_rate": 3.9545868443800446e-07, + "loss": 0.2416, + "step": 3195, + "success_rate.epoch.env.abd": 0.986013986013986, + "success_rate.epoch.env.agentgym:alfworld": 0.900523560209424, + "success_rate.epoch.env.agentgym:sciworld": 0.9710144927536232, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9722222222222222, + "success_rate.epoch.env.ded": 0.9527027027027027, + "success_rate.epoch.env.logic": 0.9049111807732497, + "success_rate.epoch.env.math": 0.9611327040533038, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.790529695024077, + "success_rate.epoch.env.webshop": 0.9696969696969697, + "success_rate.epoch.env_macro_mean": 0.8648815297910178, + "success_rate.epoch.global": 0.8804262654756307, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9975665983606558, + "tokens_p.mean_in_band": 0.5225694444444444, + "tokens_rate.above_band": 0.9644268774703557, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03557312252964427 + }, + { + "epoch": 0.6817213463996591, + "grad_norm": 295.43416358103303, + "learning_rate": 3.9544286822893164e-07, + "loss": 0.3241, + "step": 3200, + "success_rate.epoch.env.abd": 0.986013986013986, + "success_rate.epoch.env.agentgym:alfworld": 0.900523560209424, + "success_rate.epoch.env.agentgym:sciworld": 0.9710144927536232, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9722222222222222, + "success_rate.epoch.env.ded": 0.9528619528619529, + "success_rate.epoch.env.logic": 0.9051094890510949, + "success_rate.epoch.env.math": 0.9611542730299667, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.7909491389667601, + "success_rate.epoch.env.webshop": 0.9696969696969697, + "success_rate.epoch.env_macro_mean": 0.8649541271870579, + "success_rate.epoch.global": 0.8805946791862285, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.999334094368341, + "tokens_p.mean_in_band": 0.753125, + "tokens_rate.above_band": 0.9924471299093656, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0075528700906344415 + }, + { + "epoch": 0.6827865360034087, + "grad_norm": 85.90754617058761, + "learning_rate": 3.9542702618935114e-07, + "loss": 0.3404, + "step": 3205, + "success_rate.epoch.env.abd": 0.986013986013986, + "success_rate.epoch.env.agentgym:alfworld": 0.9010416666666666, + "success_rate.epoch.env.agentgym:sciworld": 0.9710144927536232, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9722222222222222, + "success_rate.epoch.env.ded": 0.9530201342281879, + "success_rate.epoch.env.logic": 0.9051094890510949, + "success_rate.epoch.env.math": 0.9612403100775194, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.7900839664134346, + "success_rate.epoch.env.webshop": 0.9696969696969697, + "success_rate.epoch.env_macro_mean": 0.8649447773977584, + "success_rate.epoch.global": 0.8803125, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.25, + "success_rate.window.env_macro_mean": 0.8125, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9992541766109785, + "tokens_p.mean_in_band": 0.6747532894736842, + "tokens_rate.above_band": 0.9778296382730455, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.022170361726954493 + }, + { + "epoch": 0.6838517256071581, + "grad_norm": 47.95526373441156, + "learning_rate": 3.954111583308086e-07, + "loss": 0.3169, + "step": 3210, + "success_rate.epoch.env.abd": 0.9860627177700348, + "success_rate.epoch.env.agentgym:alfworld": 0.9015544041450777, + "success_rate.epoch.env.agentgym:sciworld": 0.9710144927536232, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9722222222222222, + "success_rate.epoch.env.ded": 0.9531772575250836, + "success_rate.epoch.env.logic": 0.9043659043659044, + "success_rate.epoch.env.math": 0.9612831858407079, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.7902516979624451, + "success_rate.epoch.env.webshop": 0.9696969696969697, + "success_rate.epoch.env_macro_mean": 0.8649616515030641, + "success_rate.epoch.global": 0.8803432137285492, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.6666666666666666, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9444444444444445, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9982945810914681, + "tokens_p.mean_in_band": 0.5130208333333334, + "tokens_rate.above_band": 0.9886018237082067, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.011398176291793313 + }, + { + "epoch": 0.6849169152109076, + "grad_norm": 866.405241175585, + "learning_rate": 3.953952646648683e-07, + "loss": 0.4581, + "step": 3215, + "success_rate.epoch.env.abd": 0.9860627177700348, + "success_rate.epoch.env.agentgym:alfworld": 0.9015544041450777, + "success_rate.epoch.env.agentgym:sciworld": 0.9710144927536232, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9722222222222222, + "success_rate.epoch.env.ded": 0.9531772575250836, + "success_rate.epoch.env.logic": 0.9043659043659044, + "success_rate.epoch.env.math": 0.9613473219215903, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.7899561578318055, + "success_rate.epoch.env.webshop": 0.9705882352941176, + "success_rate.epoch.env_macro_mean": 0.8650216389164633, + "success_rate.epoch.global": 0.8802180685358255, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 0.8888888888888888, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9976027397260274, + "tokens_p.mean_in_band": 0.4996995192307692, + "tokens_rate.above_band": 0.9656084656084656, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03439153439153439 + }, + { + "epoch": 0.685982104814657, + "grad_norm": 114.41905403715027, + "learning_rate": 3.9537934520311346e-07, + "loss": 0.3582, + "step": 3220, + "success_rate.epoch.env.abd": 0.9860627177700348, + "success_rate.epoch.env.agentgym:alfworld": 0.9020618556701031, + "success_rate.epoch.env.agentgym:sciworld": 0.9711538461538461, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9722222222222222, + "success_rate.epoch.env.ded": 0.9531772575250836, + "success_rate.epoch.env.logic": 0.9046632124352332, + "success_rate.epoch.env.math": 0.9613899613899614, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.7898089171974523, + "success_rate.epoch.env.webshop": 0.9705882352941176, + "success_rate.epoch.env_macro_mean": 0.8650979581736085, + "success_rate.epoch.global": 0.880248833592535, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9969798657718121, + "tokens_p.mean_in_band": 0.5189732142857143, + "tokens_rate.above_band": 0.9906914893617021, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.009308510638297872 + }, + { + "epoch": 0.6870472944184065, + "grad_norm": 535.4498004762445, + "learning_rate": 3.953633999571461e-07, + "loss": 0.3363, + "step": 3225, + "success_rate.epoch.env.abd": 0.9861111111111112, + "success_rate.epoch.env.agentgym:alfworld": 0.9020618556701031, + "success_rate.epoch.env.agentgym:sciworld": 0.9712918660287081, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9722222222222222, + "success_rate.epoch.env.ded": 0.9531772575250836, + "success_rate.epoch.env.logic": 0.9048603929679421, + "success_rate.epoch.env.math": 0.9614325068870524, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.7900596421471173, + "success_rate.epoch.env.webshop": 0.9714285714285714, + "success_rate.epoch.env_macro_mean": 0.8652358854763228, + "success_rate.epoch.global": 0.8804347826086957, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9996789383561644, + "tokens_p.mean_in_band": 0.857421875, + "tokens_rate.above_band": 0.9965870307167235, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0034129692832764505 + }, + { + "epoch": 0.688112484022156, + "grad_norm": 38.29977573786904, + "learning_rate": 3.95347428938587e-07, + "loss": 0.3186, + "step": 3230, + "success_rate.epoch.env.abd": 0.986159169550173, + "success_rate.epoch.env.agentgym:alfworld": 0.9020618556701031, + "success_rate.epoch.env.agentgym:sciworld": 0.9712918660287081, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9722222222222222, + "success_rate.epoch.env.ded": 0.9533333333333334, + "success_rate.epoch.env.logic": 0.9048603929679421, + "success_rate.epoch.env.math": 0.9614961496149615, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.790079365079365, + "success_rate.epoch.env.webshop": 0.9714285714285714, + "success_rate.epoch.env_macro_mean": 0.8652620218315472, + "success_rate.epoch.global": 0.8804651162790698, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9985981308411215, + "tokens_p.mean_in_band": 0.7447916666666666, + "tokens_rate.above_band": 0.9834558823529411, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.016544117647058824 + }, + { + "epoch": 0.6891776736259054, + "grad_norm": 171.37582553234643, + "learning_rate": 3.953314321590757e-07, + "loss": 0.3422, + "step": 3235, + "success_rate.epoch.env.abd": 0.986159169550173, + "success_rate.epoch.env.agentgym:alfworld": 0.9020618556701031, + "success_rate.epoch.env.agentgym:sciworld": 0.9714285714285714, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9722222222222222, + "success_rate.epoch.env.ded": 0.9533333333333334, + "success_rate.epoch.env.logic": 0.9049586776859504, + "success_rate.epoch.env.math": 0.9615595826468973, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.7904950495049505, + "success_rate.epoch.env.webshop": 0.9714285714285714, + "success_rate.epoch.env_macro_mean": 0.8653269407020374, + "success_rate.epoch.global": 0.8806501547987616, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9935213414634146, + "tokens_p.mean_in_band": 0.796875, + "tokens_rate.above_band": 0.9704142011834319, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.029585798816568046 + }, + { + "epoch": 0.6902428632296549, + "grad_norm": 514.0063047416629, + "learning_rate": 3.953154096302705e-07, + "loss": 0.2892, + "step": 3240, + "success_rate.epoch.env.abd": 0.986159169550173, + "success_rate.epoch.env.agentgym:alfworld": 0.9020618556701031, + "success_rate.epoch.env.agentgym:sciworld": 0.9714285714285714, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9722222222222222, + "success_rate.epoch.env.ded": 0.9533333333333334, + "success_rate.epoch.env.logic": 0.9050567595459237, + "success_rate.epoch.env.math": 0.9616438356164384, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.7905138339920948, + "success_rate.epoch.env.webshop": 0.9714285714285714, + "success_rate.epoch.env_macro_mean": 0.865345224276279, + "success_rate.epoch.global": 0.8806800618238022, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9939759036144579, + "tokens_p.mean_in_band": 0.5625, + "tokens_rate.above_band": 0.8736842105263158, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.12631578947368421 + }, + { + "epoch": 0.6913080528334044, + "grad_norm": 67.02260207537304, + "learning_rate": 3.952993613638485e-07, + "loss": 0.1984, + "step": 3245, + "success_rate.epoch.env.abd": 0.9862068965517241, + "success_rate.epoch.env.agentgym:alfworld": 0.9020618556701031, + "success_rate.epoch.env.agentgym:sciworld": 0.9715639810426541, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9722222222222222, + "success_rate.epoch.env.ded": 0.9501661129568106, + "success_rate.epoch.env.logic": 0.9053497942386831, + "success_rate.epoch.env.math": 0.9616648411829135, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.790761942360837, + "success_rate.epoch.env.webshop": 0.9714285714285714, + "success_rate.epoch.env_macro_mean": 0.8651250483551053, + "success_rate.epoch.global": 0.8807098765432099, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 0.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8333333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.996900826446281, + "tokens_p.mean_in_band": 0.6171875, + "tokens_rate.above_band": 0.983739837398374, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.016260162601626018 + }, + { + "epoch": 0.6923732424371538, + "grad_norm": 104.4845828693136, + "learning_rate": 3.9528328737150573e-07, + "loss": 0.1751, + "step": 3250, + "success_rate.epoch.env.abd": 0.9862068965517241, + "success_rate.epoch.env.agentgym:alfworld": 0.9020618556701031, + "success_rate.epoch.env.agentgym:sciworld": 0.9716981132075472, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.972972972972973, + "success_rate.epoch.env.ded": 0.9503311258278145, + "success_rate.epoch.env.logic": 0.9054470709146968, + "success_rate.epoch.env.math": 0.9617277200656096, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.7906151419558359, + "success_rate.epoch.env.webshop": 0.9714285714285714, + "success_rate.epoch.env_macro_mean": 0.8652217075315015, + "success_rate.epoch.global": 0.8807395993836672, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9444444444444443, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9982707509881423, + "tokens_p.mean_in_band": 0.5966796875, + "tokens_rate.above_band": 0.9768339768339769, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.023166023166023165 + }, + { + "epoch": 0.6934384320409033, + "grad_norm": 129.07205380525727, + "learning_rate": 3.9526718766495663e-07, + "loss": 0.6632, + "step": 3255, + "success_rate.epoch.env.abd": 0.9862068965517241, + "success_rate.epoch.env.agentgym:alfworld": 0.9020618556701031, + "success_rate.epoch.env.agentgym:sciworld": 0.9716981132075472, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.972972972972973, + "success_rate.epoch.env.ded": 0.9504950495049505, + "success_rate.epoch.env.logic": 0.9047131147540983, + "success_rate.epoch.env.math": 0.9612445414847162, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.790862544308783, + "success_rate.epoch.env.webshop": 0.9714285714285714, + "success_rate.epoch.env_macro_mean": 0.8651484521941007, + "success_rate.epoch.global": 0.8806153846153846, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.6666666666666666, + "success_rate.window.env.math": 0.6666666666666666, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8333333333333333, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9945570570570571, + "tokens_p.mean_in_band": 0.6122159090909091, + "tokens_rate.above_band": 0.9680232558139535, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03197674418604651 + }, + { + "epoch": 0.6945036216446527, + "grad_norm": 77.98550794680186, + "learning_rate": 3.9525106225593454e-07, + "loss": 0.1984, + "step": 3260, + "success_rate.epoch.env.abd": 0.9862068965517241, + "success_rate.epoch.env.agentgym:alfworld": 0.9025641025641026, + "success_rate.epoch.env.agentgym:sciworld": 0.9716981132075472, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.972972972972973, + "success_rate.epoch.env.ded": 0.9508196721311475, + "success_rate.epoch.env.logic": 0.9050051072522982, + "success_rate.epoch.env.math": 0.9612868047982552, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.7909448818897638, + "success_rate.epoch.env.webshop": 0.9722222222222222, + "success_rate.epoch.env_macro_mean": 0.8653336443492431, + "success_rate.epoch.global": 0.8807987711213517, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9997093023255814, + "tokens_p.mean_in_band": 0.7395833333333334, + "tokens_rate.above_band": 0.996523754345307, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0034762456546929316 + }, + { + "epoch": 0.6955688112484022, + "grad_norm": 181.03549815716272, + "learning_rate": 3.9523491115619166e-07, + "loss": 0.284, + "step": 3265, + "success_rate.epoch.env.abd": 0.9862068965517241, + "success_rate.epoch.env.agentgym:alfworld": 0.9030612244897959, + "success_rate.epoch.env.agentgym:sciworld": 0.9716981132075472, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.972972972972973, + "success_rate.epoch.env.ded": 0.9509803921568627, + "success_rate.epoch.env.logic": 0.9051987767584098, + "success_rate.epoch.env.math": 0.9608056614044638, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.7911915060951632, + "success_rate.epoch.env.webshop": 0.9722222222222222, + "success_rate.epoch.env_macro_mean": 0.8653897345555275, + "success_rate.epoch.global": 0.8808282208588957, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.6666666666666666, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9333333333333333, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9979979108635098, + "tokens_p.mean_in_band": 0.7078125, + "tokens_rate.above_band": 0.9862637362637363, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.013736263736263736 + }, + { + "epoch": 0.6966340008521517, + "grad_norm": 48.56063784795593, + "learning_rate": 3.9521873437749874e-07, + "loss": 0.2744, + "step": 3270, + "success_rate.epoch.env.abd": 0.9862068965517241, + "success_rate.epoch.env.agentgym:alfworld": 0.9030612244897959, + "success_rate.epoch.env.agentgym:sciworld": 0.9716981132075472, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.972972972972973, + "success_rate.epoch.env.ded": 0.9509803921568627, + "success_rate.epoch.env.logic": 0.9053916581892166, + "success_rate.epoch.env.math": 0.9609120521172638, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.7910447761194029, + "success_rate.epoch.env.webshop": 0.9722222222222222, + "success_rate.epoch.env_macro_mean": 0.8654036020253316, + "success_rate.epoch.global": 0.8808575803981623, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.8888888888888888, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9936835106382979, + "tokens_p.mean_in_band": 0.638671875, + "tokens_rate.above_band": 0.8867924528301887, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.11320754716981132 + }, + { + "epoch": 0.6976991904559011, + "grad_norm": 184.814521263067, + "learning_rate": 3.9520253193164525e-07, + "loss": 0.2215, + "step": 3275, + "success_rate.epoch.env.abd": 0.9862068965517241, + "success_rate.epoch.env.agentgym:alfworld": 0.9030612244897959, + "success_rate.epoch.env.agentgym:sciworld": 0.9719626168224299, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.972972972972973, + "success_rate.epoch.env.ded": 0.9511400651465798, + "success_rate.epoch.env.logic": 0.9046653144016227, + "success_rate.epoch.env.math": 0.9609332609875203, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.7912087912087912, + "success_rate.epoch.env.webshop": 0.972972972972973, + "success_rate.epoch.env_macro_mean": 0.8654612208914592, + "success_rate.epoch.global": 0.8808868501529052, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.6666666666666666, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 0.9444444444444443, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9992125331564987, + "tokens_p.mean_in_band": 0.68359375, + "tokens_rate.above_band": 0.9856209150326798, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.01437908496732026 + }, + { + "epoch": 0.6987643800596506, + "grad_norm": 215.02758437928475, + "learning_rate": 3.951863038304395e-07, + "loss": 0.4055, + "step": 3280, + "success_rate.epoch.env.abd": 0.9862542955326461, + "success_rate.epoch.env.agentgym:alfworld": 0.9030612244897959, + "success_rate.epoch.env.agentgym:sciworld": 0.9719626168224299, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.972972972972973, + "success_rate.epoch.env.ded": 0.9512987012987013, + "success_rate.epoch.env.logic": 0.9040404040404041, + "success_rate.epoch.env.math": 0.9604550379198267, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.7908983915260887, + "success_rate.epoch.env.webshop": 0.972972972972973, + "success_rate.epoch.env_macro_mean": 0.8653514483479526, + "success_rate.epoch.global": 0.8806106870229008, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.75, + "success_rate.window.env.math": 0.6666666666666666, + "success_rate.window.env.science": 0.0, + "success_rate.window.env_macro_mean": 0.6833333333333333, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9965625, + "tokens_p.mean_below_band": 6.007030606269836e-08, + "tokens_p.mean_in_band": 0.506103515625, + "tokens_rate.above_band": 0.9823182711198428, + "tokens_rate.below_band": 0.0019646365422396855, + "tokens_rate.in_band": 0.015717092337917484 + }, + { + "epoch": 0.6998295696634, + "grad_norm": 161.25177275927612, + "learning_rate": 3.9517005008570833e-07, + "loss": 0.3882, + "step": 3285, + "success_rate.epoch.env.abd": 0.9863013698630136, + "success_rate.epoch.env.agentgym:alfworld": 0.8984771573604061, + "success_rate.epoch.env.agentgym:sciworld": 0.9719626168224299, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.972972972972973, + "success_rate.epoch.env.ded": 0.9512987012987013, + "success_rate.epoch.env.logic": 0.9041372351160444, + "success_rate.epoch.env.math": 0.9605191995673337, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.791226008617313, + "success_rate.epoch.env.webshop": 0.972972972972973, + "success_rate.epoch.env_macro_mean": 0.8649834135311661, + "success_rate.epoch.global": 0.880640243902439, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 0.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9965572033898306, + "tokens_p.mean_in_band": 0.656640625, + "tokens_rate.above_band": 0.979253112033195, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02074688796680498 + }, + { + "epoch": 0.7008947592671495, + "grad_norm": 668.8095306826872, + "learning_rate": 3.9515377070929745e-07, + "loss": 0.3655, + "step": 3290, + "success_rate.epoch.env.abd": 0.9863013698630136, + "success_rate.epoch.env.agentgym:alfworld": 0.898989898989899, + "success_rate.epoch.env.agentgym:sciworld": 0.9719626168224299, + "success_rate.epoch.env.agentgym:textcraft": 0.9772727272727273, + "success_rate.epoch.env.babyai": 0.972972972972973, + "success_rate.epoch.env.ded": 0.9514563106796117, + "success_rate.epoch.env.logic": 0.9044265593561368, + "success_rate.epoch.env.math": 0.9605831533477321, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.7913077525450274, + "success_rate.epoch.env.webshop": 0.972972972972973, + "success_rate.epoch.env_macro_mean": 0.865131951184332, + "success_rate.epoch.global": 0.8808219178082192, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9997614503816794, + "tokens_p.mean_in_band": 0.64453125, + "tokens_rate.above_band": 0.9974619289340102, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0025380710659898475 + }, + { + "epoch": 0.7019599488708991, + "grad_norm": 104.10640438964306, + "learning_rate": 3.951374657130711e-07, + "loss": 0.2848, + "step": 3295, + "success_rate.epoch.env.abd": 0.9863013698630136, + "success_rate.epoch.env.agentgym:alfworld": 0.898989898989899, + "success_rate.epoch.env.agentgym:sciworld": 0.9719626168224299, + "success_rate.epoch.env.agentgym:textcraft": 0.9772727272727273, + "success_rate.epoch.env.babyai": 0.972972972972973, + "success_rate.epoch.env.ded": 0.9483870967741935, + "success_rate.epoch.env.logic": 0.9045226130653267, + "success_rate.epoch.env.math": 0.9606681034482759, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.7912431587177482, + "success_rate.epoch.env.webshop": 0.972972972972973, + "success_rate.epoch.env_macro_mean": 0.8648635144640626, + "success_rate.epoch.global": 0.8806990881458967, + "success_rate.window.env.ded": 0.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.6875, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9855510752688172, + "tokens_p.mean_in_band": 0.74072265625, + "tokens_rate.above_band": 0.840867992766727, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.15913200723327306 + }, + { + "epoch": 0.7030251384746485, + "grad_norm": 223.18411783631583, + "learning_rate": 3.951211351089122e-07, + "loss": 0.4322, + "step": 3300, + "success_rate.epoch.env.abd": 0.9863013698630136, + "success_rate.epoch.env.agentgym:alfworld": 0.898989898989899, + "success_rate.epoch.env.agentgym:sciworld": 0.9719626168224299, + "success_rate.epoch.env.agentgym:textcraft": 0.9772727272727273, + "success_rate.epoch.env.babyai": 0.972972972972973, + "success_rate.epoch.env.ded": 0.9483870967741935, + "success_rate.epoch.env.logic": 0.9046184738955824, + "success_rate.epoch.env.math": 0.960752688172043, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.7912602419040188, + "success_rate.epoch.env.webshop": 0.972972972972973, + "success_rate.epoch.env_macro_mean": 0.864881471622271, + "success_rate.epoch.global": 0.8807283763277693, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9930167597765364, + "tokens_p.mean_in_band": 0.7779947916666666, + "tokens_rate.above_band": 0.93717277486911, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.06282722513089005 + }, + { + "epoch": 0.704090328078398, + "grad_norm": 82.82949553281563, + "learning_rate": 3.951047789087224e-07, + "loss": 0.2821, + "step": 3305, + "success_rate.epoch.env.abd": 0.9863013698630136, + "success_rate.epoch.env.agentgym:alfworld": 0.898989898989899, + "success_rate.epoch.env.agentgym:sciworld": 0.9720930232558139, + "success_rate.epoch.env.agentgym:textcraft": 0.9772727272727273, + "success_rate.epoch.env.babyai": 0.972972972972973, + "success_rate.epoch.env.ded": 0.9483870967741935, + "success_rate.epoch.env.logic": 0.9047141424272819, + "success_rate.epoch.env.math": 0.9608158883521203, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.7916666666666666, + "success_rate.epoch.env.webshop": 0.972972972972973, + "success_rate.epoch.env_macro_mean": 0.8649447170684355, + "success_rate.epoch.global": 0.8809090909090909, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9942781690140845, + "tokens_p.mean_in_band": 0.7734375, + "tokens_rate.above_band": 0.9726027397260274, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0273972602739726 + }, + { + "epoch": 0.7051555176821475, + "grad_norm": 179.93621902742274, + "learning_rate": 3.950883971244221e-07, + "loss": 0.2719, + "step": 3310, + "success_rate.epoch.env.abd": 0.9863013698630136, + "success_rate.epoch.env.agentgym:alfworld": 0.9, + "success_rate.epoch.env.agentgym:sciworld": 0.9720930232558139, + "success_rate.epoch.env.agentgym:textcraft": 0.9772727272727273, + "success_rate.epoch.env.babyai": 0.972972972972973, + "success_rate.epoch.env.ded": 0.9483870967741935, + "success_rate.epoch.env.logic": 0.9039039039039038, + "success_rate.epoch.env.math": 0.960857908847185, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.7916018662519441, + "success_rate.epoch.env.webshop": 0.972972972972973, + "success_rate.epoch.env_macro_mean": 0.8649608154836234, + "success_rate.epoch.global": 0.880786686838124, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.8125, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9975349872773537, + "tokens_p.mean_in_band": 0.3880208333333333, + "tokens_rate.above_band": 0.9961977186311787, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0038022813688212928 + }, + { + "epoch": 0.7062207072858969, + "grad_norm": 101.51721690606249, + "learning_rate": 3.9507198976795e-07, + "loss": 0.2256, + "step": 3315, + "success_rate.epoch.env.abd": 0.9863013698630136, + "success_rate.epoch.env.agentgym:alfworld": 0.9, + "success_rate.epoch.env.agentgym:sciworld": 0.9722222222222222, + "success_rate.epoch.env.agentgym:textcraft": 0.9772727272727273, + "success_rate.epoch.env.babyai": 0.972972972972973, + "success_rate.epoch.env.ded": 0.9487179487179487, + "success_rate.epoch.env.logic": 0.903, + "success_rate.epoch.env.math": 0.9603429796355841, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.7920062087698875, + "success_rate.epoch.env.webshop": 0.972972972972973, + "success_rate.epoch.env_macro_mean": 0.8649104118756781, + "success_rate.epoch.global": 0.8806646525679759, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 0.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.6, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9988149578651685, + "tokens_p.mean_in_band": 0.3966796875, + "tokens_rate.above_band": 0.99302649930265, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.00697350069735007 + }, + { + "epoch": 0.7072858968896464, + "grad_norm": 119.79466163351181, + "learning_rate": 3.9505555685126384e-07, + "loss": 0.2835, + "step": 3320, + "success_rate.epoch.env.abd": 0.9863481228668942, + "success_rate.epoch.env.agentgym:alfworld": 0.9, + "success_rate.epoch.env.agentgym:sciworld": 0.9722222222222222, + "success_rate.epoch.env.agentgym:textcraft": 0.9772727272727273, + "success_rate.epoch.env.babyai": 0.972972972972973, + "success_rate.epoch.env.ded": 0.9487179487179487, + "success_rate.epoch.env.logic": 0.9021956087824351, + "success_rate.epoch.env.math": 0.960427807486631, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.7922480620155039, + "success_rate.epoch.env.webshop": 0.972972972972973, + "success_rate.epoch.env_macro_mean": 0.8648712339559489, + "success_rate.epoch.global": 0.8806938159879336, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 1.0006397637795275, + "tokens_p.mean_in_band": 0.5681818181818182, + "tokens_rate.above_band": 0.9665144596651446, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0334855403348554 + }, + { + "epoch": 0.7083510864933958, + "grad_norm": 93.6873784895068, + "learning_rate": 3.950390983863398e-07, + "loss": 0.4759, + "step": 3325, + "success_rate.epoch.env.abd": 0.9863481228668942, + "success_rate.epoch.env.agentgym:alfworld": 0.9, + "success_rate.epoch.env.agentgym:sciworld": 0.9723502304147466, + "success_rate.epoch.env.agentgym:textcraft": 0.9772727272727273, + "success_rate.epoch.env.babyai": 0.972972972972973, + "success_rate.epoch.env.ded": 0.9488817891373802, + "success_rate.epoch.env.logic": 0.9021956087824351, + "success_rate.epoch.env.math": 0.9599358974358975, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.7923433874709976, + "success_rate.epoch.env.webshop": 0.972972972972973, + "success_rate.epoch.env_macro_mean": 0.864861712502923, + "success_rate.epoch.global": 0.8805722891566266, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 0.5, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env_macro_mean": 0.8333333333333334, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9961811531841652, + "tokens_p.mean_in_band": 0.4685202205882353, + "tokens_rate.above_band": 0.9715719063545151, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.028428093645484948 + }, + { + "epoch": 0.7094162760971453, + "grad_norm": 92.5826567443287, + "learning_rate": 3.950226143851727e-07, + "loss": 0.2608, + "step": 3330, + "success_rate.epoch.env.abd": 0.9863481228668942, + "success_rate.epoch.env.agentgym:alfworld": 0.9, + "success_rate.epoch.env.agentgym:sciworld": 0.9723502304147466, + "success_rate.epoch.env.agentgym:textcraft": 0.9772727272727273, + "success_rate.epoch.env.babyai": 0.972972972972973, + "success_rate.epoch.env.ded": 0.9490445859872612, + "success_rate.epoch.env.logic": 0.9024875621890547, + "success_rate.epoch.env.math": 0.9594882729211087, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.7924236567452648, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8649343154637704, + "success_rate.epoch.global": 0.8806015037593985, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.75, + "success_rate.window.env.science": 1.0, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9974727838258165, + "tokens_p.mean_in_band": 0.6740451388888888, + "tokens_rate.above_band": 0.9861963190184049, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.013803680981595092 + }, + { + "epoch": 0.7104814657008948, + "grad_norm": 133.45518713465225, + "learning_rate": 3.950061048597758e-07, + "loss": 0.354, + "step": 3335, + "success_rate.epoch.env.abd": 0.9863945578231292, + "success_rate.epoch.env.agentgym:alfworld": 0.9, + "success_rate.epoch.env.agentgym:sciworld": 0.9723502304147466, + "success_rate.epoch.env.agentgym:textcraft": 0.9772727272727273, + "success_rate.epoch.env.babyai": 0.972972972972973, + "success_rate.epoch.env.ded": 0.9490445859872612, + "success_rate.epoch.env.logic": 0.9024875621890547, + "success_rate.epoch.env.math": 0.9595529536987759, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.7921326648669494, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8649179630870055, + "success_rate.epoch.global": 0.8804804804804804, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.8888888888888888, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9948326771653543, + "tokens_p.mean_in_band": 0.707275390625, + "tokens_rate.above_band": 0.8881118881118881, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.11188811188811189 + }, + { + "epoch": 0.7115466553046442, + "grad_norm": 44.22495932755032, + "learning_rate": 3.9498956982218126e-07, + "loss": 0.223, + "step": 3340, + "success_rate.epoch.env.abd": 0.9865319865319865, + "success_rate.epoch.env.agentgym:alfworld": 0.9, + "success_rate.epoch.env.agentgym:sciworld": 0.9723502304147466, + "success_rate.epoch.env.agentgym:textcraft": 0.9772727272727273, + "success_rate.epoch.env.babyai": 0.972972972972973, + "success_rate.epoch.env.ded": 0.9490445859872612, + "success_rate.epoch.env.logic": 0.9024875621890547, + "success_rate.epoch.env.math": 0.9595959595959596, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.792147806004619, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8649357427000701, + "success_rate.epoch.global": 0.8805097451274363, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.99375, + "tokens_p.mean_in_band": 0.3984375, + "tokens_rate.above_band": 0.9562841530054644, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.04371584699453552 + }, + { + "epoch": 0.7126118449083937, + "grad_norm": 494.0600063833727, + "learning_rate": 3.949730092844397e-07, + "loss": 0.2885, + "step": 3345, + "success_rate.epoch.env.abd": 0.9865771812080537, + "success_rate.epoch.env.agentgym:alfworld": 0.9, + "success_rate.epoch.env.agentgym:sciworld": 0.9724770642201835, + "success_rate.epoch.env.agentgym:textcraft": 0.9772727272727273, + "success_rate.epoch.env.babyai": 0.972972972972973, + "success_rate.epoch.env.ded": 0.9492063492063492, + "success_rate.epoch.env.logic": 0.9017857142857143, + "success_rate.epoch.env.math": 0.9596388741370154, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.7923076923076923, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8649207194856502, + "success_rate.epoch.global": 0.8805389221556886, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.6666666666666666, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9444444444444443, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9993990384615384, + "tokens_p.mean_in_band": 0.4153225806451613, + "tokens_rate.above_band": 0.991555434486516, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.008444565513484064 + }, + { + "epoch": 0.7136770345121431, + "grad_norm": 53.50826643689471, + "learning_rate": 3.949564232586203e-07, + "loss": 0.2906, + "step": 3350, + "success_rate.epoch.env.abd": 0.9865771812080537, + "success_rate.epoch.env.agentgym:alfworld": 0.9, + "success_rate.epoch.env.agentgym:sciworld": 0.9724770642201835, + "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, + "success_rate.epoch.env.babyai": 0.972972972972973, + "success_rate.epoch.env.ded": 0.9493670886075949, + "success_rate.epoch.env.logic": 0.901980198019802, + "success_rate.epoch.env.math": 0.9597030752916225, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.7925470610833654, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8650265234466197, + "success_rate.epoch.global": 0.8807174887892377, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9993742638398115, + "tokens_p.mean_in_band": 0.8203125, + "tokens_rate.above_band": 0.9964788732394366, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0035211267605633804 + }, + { + "epoch": 0.7147422241158926, + "grad_norm": 99.39295881817814, + "learning_rate": 3.9493981175681083e-07, + "loss": 0.3975, + "step": 3355, + "success_rate.epoch.env.abd": 0.9865771812080537, + "success_rate.epoch.env.agentgym:alfworld": 0.9, + "success_rate.epoch.env.agentgym:sciworld": 0.9724770642201835, + "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, + "success_rate.epoch.env.babyai": 0.972972972972973, + "success_rate.epoch.env.ded": 0.9493670886075949, + "success_rate.epoch.env.logic": 0.901980198019802, + "success_rate.epoch.env.math": 0.9597883597883597, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.7926408585665006, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8650428036266081, + "success_rate.epoch.global": 0.8807462686567165, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env_macro_mean": 0.9166666666666667, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9924768518518519, + "tokens_p.mean_in_band": 0.6631944444444444, + "tokens_rate.above_band": 0.9230769230769231, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.07692307692307693 + }, + { + "epoch": 0.7158074137196421, + "grad_norm": 298.4261496116819, + "learning_rate": 3.9492317479111767e-07, + "loss": 0.2755, + "step": 3360, + "success_rate.epoch.env.abd": 0.9866220735785953, + "success_rate.epoch.env.agentgym:alfworld": 0.9, + "success_rate.epoch.env.agentgym:sciworld": 0.9724770642201835, + "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, + "success_rate.epoch.env.babyai": 0.972972972972973, + "success_rate.epoch.env.ded": 0.9493670886075949, + "success_rate.epoch.env.logic": 0.9011857707509882, + "success_rate.epoch.env.math": 0.9598732840549102, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.7928790199081164, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8650040355093257, + "success_rate.epoch.global": 0.8807749627421758, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 1.0011354660347551, + "tokens_p.mean_in_band": 0.5427631578947368, + "tokens_rate.above_band": 0.9708588957055214, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.029141104294478526 + }, + { + "epoch": 0.7168726033233915, + "grad_norm": 24.211421703463163, + "learning_rate": 3.9490651237366565e-07, + "loss": 0.3115, + "step": 3365, + "success_rate.epoch.env.abd": 0.9866666666666667, + "success_rate.epoch.env.agentgym:alfworld": 0.9, + "success_rate.epoch.env.agentgym:sciworld": 0.9724770642201835, + "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, + "success_rate.epoch.env.babyai": 0.972972972972973, + "success_rate.epoch.env.ded": 0.9493670886075949, + "success_rate.epoch.env.logic": 0.9003944773175543, + "success_rate.epoch.env.math": 0.9599578503688093, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.7931166347992352, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8649654428602035, + "success_rate.epoch.global": 0.8808035714285715, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9960029069767442, + "tokens_p.mean_in_band": 0.477734375, + "tokens_rate.above_band": 0.9678456591639871, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03215434083601286 + }, + { + "epoch": 0.717937792927141, + "grad_norm": 123.13488311486617, + "learning_rate": 3.948898245165982e-07, + "loss": 0.4743, + "step": 3370, + "success_rate.epoch.env.abd": 0.9866666666666667, + "success_rate.epoch.env.agentgym:alfworld": 0.8960396039603961, + "success_rate.epoch.env.agentgym:sciworld": 0.9724770642201835, + "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, + "success_rate.epoch.env.babyai": 0.972972972972973, + "success_rate.epoch.env.ded": 0.9493670886075949, + "success_rate.epoch.env.logic": 0.9003944773175543, + "success_rate.epoch.env.math": 0.9595588235294118, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.7928926251432938, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8645487671752087, + "success_rate.epoch.global": 0.8805349182763744, + "success_rate.window.env.agentgym:alfworld": 0.5, + "success_rate.window.env.math": 0.8333333333333334, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.6111111111111112, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9989754098360656, + "tokens_p.mean_below_band": 8.149072527885437e-09, + "tokens_p.mean_in_band": 0.5138221153846154, + "tokens_rate.above_band": 0.9751332149200711, + "tokens_rate.below_band": 0.0017761989342806395, + "tokens_rate.in_band": 0.023090586145648313 + }, + { + "epoch": 0.7190029825308905, + "grad_norm": 54.784791052820545, + "learning_rate": 3.948731112320775e-07, + "loss": 0.2882, + "step": 3375, + "success_rate.epoch.env.abd": 0.9866666666666667, + "success_rate.epoch.env.agentgym:alfworld": 0.8960396039603961, + "success_rate.epoch.env.agentgym:sciworld": 0.9724770642201835, + "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, + "success_rate.epoch.env.babyai": 0.972972972972973, + "success_rate.epoch.env.ded": 0.9495268138801262, + "success_rate.epoch.env.logic": 0.9006882989183874, + "success_rate.epoch.env.math": 0.959601259181532, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.7932086989698588, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8646225904799405, + "success_rate.epoch.global": 0.8807121661721068, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9985593971631206, + "tokens_p.mean_in_band": 0.7109375, + "tokens_rate.above_band": 0.986013986013986, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.013986013986013986 + }, + { + "epoch": 0.7200681721346399, + "grad_norm": 369.140327671539, + "learning_rate": 3.9485637253228387e-07, + "loss": 0.4746, + "step": 3380, + "success_rate.epoch.env.abd": 0.9867549668874173, + "success_rate.epoch.env.agentgym:alfworld": 0.8960396039603961, + "success_rate.epoch.env.agentgym:sciworld": 0.9724770642201835, + "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, + "success_rate.epoch.env.babyai": 0.972972972972973, + "success_rate.epoch.env.ded": 0.9495268138801262, + "success_rate.epoch.env.logic": 0.900883218842002, + "success_rate.epoch.env.math": 0.959643605870021, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.7927619047619048, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8646115698094768, + "success_rate.epoch.global": 0.8805925925925926, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9942781690140845, + "tokens_p.mean_in_band": 0.6019965277777778, + "tokens_rate.above_band": 0.9403973509933775, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.059602649006622516 + }, + { + "epoch": 0.7211333617383894, + "grad_norm": 76.21711864528496, + "learning_rate": 3.948396084294164e-07, + "loss": 0.5591, + "step": 3385, + "success_rate.epoch.env.abd": 0.9834983498349835, + "success_rate.epoch.env.agentgym:alfworld": 0.8960396039603961, + "success_rate.epoch.env.agentgym:sciworld": 0.9724770642201835, + "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, + "success_rate.epoch.env.babyai": 0.972972972972973, + "success_rate.epoch.env.ded": 0.9495268138801262, + "success_rate.epoch.env.logic": 0.9000979431929481, + "success_rate.epoch.env.math": 0.9596858638743455, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.7931558935361217, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8642837838164819, + "success_rate.epoch.global": 0.8804733727810651, + "success_rate.window.env.abd": 0.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.625, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9918769113149847, + "tokens_p.mean_in_band": 0.24728265942353644, + "tokens_rate.above_band": 0.1634182908545727, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.8365817091454273 + }, + { + "epoch": 0.722198551342139, + "grad_norm": 132.382102030933, + "learning_rate": 3.9482281893569267e-07, + "loss": 0.2995, + "step": 3390, + "success_rate.epoch.env.abd": 0.9834983498349835, + "success_rate.epoch.env.agentgym:alfworld": 0.8960396039603961, + "success_rate.epoch.env.agentgym:sciworld": 0.9724770642201835, + "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, + "success_rate.epoch.env.babyai": 0.972972972972973, + "success_rate.epoch.env.ded": 0.9498432601880877, + "success_rate.epoch.env.logic": 0.9001956947162426, + "success_rate.epoch.env.math": 0.9597280334728033, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.793168880455408, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8643264523936636, + "success_rate.epoch.global": 0.8805022156573117, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9969758064516129, + "tokens_p.mean_in_band": 0.6006944444444444, + "tokens_rate.above_band": 0.9897377423033067, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.010262257696693273 + }, + { + "epoch": 0.7232637409458884, + "grad_norm": 138.13340499455256, + "learning_rate": 3.948060040633488e-07, + "loss": 0.316, + "step": 3395, + "success_rate.epoch.env.abd": 0.9834983498349835, + "success_rate.epoch.env.agentgym:alfworld": 0.8960396039603961, + "success_rate.epoch.env.agentgym:sciworld": 0.9724770642201835, + "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, + "success_rate.epoch.env.babyai": 0.972972972972973, + "success_rate.epoch.env.ded": 0.9498432601880877, + "success_rate.epoch.env.logic": 0.9002932551319648, + "success_rate.epoch.env.math": 0.9597701149425287, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.7937168811506434, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8643889653555438, + "success_rate.epoch.global": 0.8806784660766962, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.991304347826087, + "tokens_p.mean_in_band": 0.7490234375, + "tokens_rate.above_band": 0.9349593495934959, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.06504065040650407 + }, + { + "epoch": 0.7243289305496379, + "grad_norm": 99.86950913131045, + "learning_rate": 3.9478916382463923e-07, + "loss": 0.4035, + "step": 3400, + "success_rate.epoch.env.abd": 0.9834983498349835, + "success_rate.epoch.env.agentgym:alfworld": 0.8960396039603961, + "success_rate.epoch.env.agentgym:sciworld": 0.9726027397260274, + "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, + "success_rate.epoch.env.babyai": 0.972972972972973, + "success_rate.epoch.env.ded": 0.9498432601880877, + "success_rate.epoch.env.logic": 0.9002932551319648, + "success_rate.epoch.env.math": 0.9598540145985401, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.7933509633547412, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8643747523888123, + "success_rate.epoch.global": 0.880559646539028, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6, + "success_rate.window.env_macro_mean": 0.8666666666666667, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9953663793103448, + "tokens_p.mean_in_band": 0.5267857142857143, + "tokens_rate.above_band": 0.9119496855345912, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0880503144654088 + }, + { + "epoch": 0.7253941201533873, + "grad_norm": 252.53708598592215, + "learning_rate": 3.947722982318371e-07, + "loss": 0.5432, + "step": 3405, + "success_rate.epoch.env.abd": 0.9834983498349835, + "success_rate.epoch.env.agentgym:alfworld": 0.8970588235294118, + "success_rate.epoch.env.agentgym:sciworld": 0.9726027397260274, + "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, + "success_rate.epoch.env.babyai": 0.972972972972973, + "success_rate.epoch.env.ded": 0.95, + "success_rate.epoch.env.logic": 0.9002932551319648, + "success_rate.epoch.env.math": 0.9599167100468506, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.7936627687665032, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8645157033198124, + "success_rate.epoch.global": 0.8807352941176471, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9979838709677419, + "tokens_p.mean_in_band": 0.54296875, + "tokens_rate.above_band": 0.9914712153518124, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.008528784648187633 + }, + { + "epoch": 0.7264593097571368, + "grad_norm": 75.69768401986141, + "learning_rate": 3.947554072972339e-07, + "loss": 0.232, + "step": 3410, + "success_rate.epoch.env.abd": 0.9834983498349835, + "success_rate.epoch.env.agentgym:alfworld": 0.8975609756097561, + "success_rate.epoch.env.agentgym:sciworld": 0.9726027397260274, + "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, + "success_rate.epoch.env.babyai": 0.972972972972973, + "success_rate.epoch.env.ded": 0.95, + "success_rate.epoch.env.logic": 0.9002932551319648, + "success_rate.epoch.env.math": 0.96, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.7940512048192772, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8646042376912912, + "success_rate.epoch.global": 0.8809104258443465, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9981235565819861, + "tokens_p.mean_in_band": 0.70849609375, + "tokens_rate.above_band": 0.9908466819221968, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.009153318077803204 + }, + { + "epoch": 0.7275244993608863, + "grad_norm": 110.31940769978746, + "learning_rate": 3.947384910331396e-07, + "loss": 0.2308, + "step": 3415, + "success_rate.epoch.env.abd": 0.9834983498349835, + "success_rate.epoch.env.agentgym:alfworld": 0.8975609756097561, + "success_rate.epoch.env.agentgym:sciworld": 0.9726027397260274, + "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, + "success_rate.epoch.env.babyai": 0.972972972972973, + "success_rate.epoch.env.ded": 0.95, + "success_rate.epoch.env.logic": 0.900390625, + "success_rate.epoch.env.math": 0.9600415153087701, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.7945925647765678, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8646660781580272, + "success_rate.epoch.global": 0.8810850439882698, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9904891304347826, + "tokens_p.mean_in_band": 0.8078125, + "tokens_rate.above_band": 0.965034965034965, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03496503496503497 + }, + { + "epoch": 0.7285896889646357, + "grad_norm": 111.18847149318067, + "learning_rate": 3.947215494518827e-07, + "loss": 0.3533, + "step": 3420, + "success_rate.epoch.env.abd": 0.9834983498349835, + "success_rate.epoch.env.agentgym:alfworld": 0.8980582524271845, + "success_rate.epoch.env.agentgym:sciworld": 0.9726027397260274, + "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, + "success_rate.epoch.env.babyai": 0.972972972972973, + "success_rate.epoch.env.ded": 0.9501557632398754, + "success_rate.epoch.env.logic": 0.9004878048780488, + "success_rate.epoch.env.math": 0.9601036269430052, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.7945256842894638, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8647338464382531, + "success_rate.epoch.global": 0.881112737920937, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9982538535645472, + "tokens_p.mean_in_band": 0.57421875, + "tokens_rate.above_band": 0.9904580152671756, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.009541984732824428 + }, + { + "epoch": 0.7296548785683852, + "grad_norm": 51.36233429912145, + "learning_rate": 3.9470458256581007e-07, + "loss": 0.4076, + "step": 3425, + "success_rate.epoch.env.abd": 0.9834983498349835, + "success_rate.epoch.env.agentgym:alfworld": 0.8980582524271845, + "success_rate.epoch.env.agentgym:sciworld": 0.9727272727272728, + "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, + "success_rate.epoch.env.babyai": 0.9736842105263158, + "success_rate.epoch.env.ded": 0.9501557632398754, + "success_rate.epoch.env.logic": 0.9005847953216374, + "success_rate.epoch.env.math": 0.9601449275362319, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.7941616766467066, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8647893058881299, + "success_rate.epoch.global": 0.8809941520467837, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6, + "success_rate.window.env_macro_mean": 0.9199999999999999, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.992505081300813, + "tokens_p.mean_in_band": 0.60595703125, + "tokens_rate.above_band": 0.9389312977099237, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.061068702290076333 + }, + { + "epoch": 0.7307200681721346, + "grad_norm": 145.25772227247307, + "learning_rate": 3.94687590387287e-07, + "loss": 0.3291, + "step": 3430, + "success_rate.epoch.env.abd": 0.9834983498349835, + "success_rate.epoch.env.agentgym:alfworld": 0.8980582524271845, + "success_rate.epoch.env.agentgym:sciworld": 0.9727272727272728, + "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, + "success_rate.epoch.env.babyai": 0.9736842105263158, + "success_rate.epoch.env.ded": 0.9501557632398754, + "success_rate.epoch.env.logic": 0.9006815968841285, + "success_rate.epoch.env.math": 0.9602478058853898, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.7940956651718983, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8648014575642062, + "success_rate.epoch.global": 0.881021897810219, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9954166666666666, + "tokens_p.mean_in_band": 0.65625, + "tokens_rate.above_band": 0.9433962264150944, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.05660377358490566 + }, + { + "epoch": 0.7317852577758841, + "grad_norm": 232.9425916066108, + "learning_rate": 3.946705729286974e-07, + "loss": 0.2694, + "step": 3435, + "success_rate.epoch.env.abd": 0.9834983498349835, + "success_rate.epoch.env.agentgym:alfworld": 0.8980582524271845, + "success_rate.epoch.env.agentgym:sciworld": 0.9727272727272728, + "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, + "success_rate.epoch.env.babyai": 0.9736842105263158, + "success_rate.epoch.env.ded": 0.9501557632398754, + "success_rate.epoch.env.logic": 0.9008746355685131, + "success_rate.epoch.env.math": 0.9603297269448737, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.7940298507462686, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8648204707749553, + "success_rate.epoch.global": 0.8810495626822158, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9935897435897436, + "tokens_p.mean_in_band": 0.6338975694444444, + "tokens_rate.above_band": 0.9285714285714286, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.07142857142857142 + }, + { + "epoch": 0.7328504473796336, + "grad_norm": 141.46887205025453, + "learning_rate": 3.9465353020244336e-07, + "loss": 0.4536, + "step": 3440, + "success_rate.epoch.env.abd": 0.9835526315789473, + "success_rate.epoch.env.agentgym:alfworld": 0.8980582524271845, + "success_rate.epoch.env.agentgym:sciworld": 0.9728506787330317, + "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, + "success_rate.epoch.env.babyai": 0.9736842105263158, + "success_rate.epoch.env.ded": 0.9501557632398754, + "success_rate.epoch.env.logic": 0.9008746355685131, + "success_rate.epoch.env.math": 0.9603705609881626, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.7937453462397617, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8648144723464557, + "success_rate.epoch.global": 0.8809315866084425, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.997093023255814, + "tokens_p.mean_in_band": 0.4880642361111111, + "tokens_rate.above_band": 0.9828571428571429, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.017142857142857144 + }, + { + "epoch": 0.733915636983383, + "grad_norm": 246.5183921455988, + "learning_rate": 3.946364622209456e-07, + "loss": 0.6791, + "step": 3445, + "success_rate.epoch.env.abd": 0.9835526315789473, + "success_rate.epoch.env.agentgym:alfworld": 0.8995215311004785, + "success_rate.epoch.env.agentgym:sciworld": 0.9728506787330317, + "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, + "success_rate.epoch.env.babyai": 0.9736842105263158, + "success_rate.epoch.env.ded": 0.9501557632398754, + "success_rate.epoch.env.logic": 0.9000969932104753, + "success_rate.epoch.env.math": 0.9604113110539846, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.7932316846411306, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8648338109630417, + "success_rate.epoch.global": 0.8806686046511628, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.3333333333333333, + "success_rate.window.env_macro_mean": 0.7083333333333333, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.999771897810219, + "tokens_p.mean_in_band": 0.4527622767857143, + "tokens_rate.above_band": 0.9750889679715302, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02491103202846975 + }, + { + "epoch": 0.7349808265871325, + "grad_norm": 178.60062676322593, + "learning_rate": 3.94619368996643e-07, + "loss": 0.3199, + "step": 3450, + "success_rate.epoch.env.abd": 0.9835526315789473, + "success_rate.epoch.env.agentgym:alfworld": 0.8995215311004785, + "success_rate.epoch.env.agentgym:sciworld": 0.9728506787330317, + "success_rate.epoch.env.agentgym:textcraft": 0.9782608695652174, + "success_rate.epoch.env.babyai": 0.9736842105263158, + "success_rate.epoch.env.ded": 0.9503105590062112, + "success_rate.epoch.env.logic": 0.9000969932104753, + "success_rate.epoch.env.math": 0.9604925602873269, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.7934621099554234, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8646287594990675, + "success_rate.epoch.global": 0.8806966618287373, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9987555309734514, + "tokens_p.mean_in_band": 0.7517361111111112, + "tokens_rate.above_band": 0.9617021276595744, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03829787234042553 + }, + { + "epoch": 0.736046016190882, + "grad_norm": 312.11935835420775, + "learning_rate": 3.946022505419931e-07, + "loss": 0.3558, + "step": 3455, + "success_rate.epoch.env.abd": 0.9835526315789473, + "success_rate.epoch.env.agentgym:alfworld": 0.8995215311004785, + "success_rate.epoch.env.agentgym:sciworld": 0.9728506787330317, + "success_rate.epoch.env.agentgym:textcraft": 0.9782608695652174, + "success_rate.epoch.env.babyai": 0.9736842105263158, + "success_rate.epoch.env.ded": 0.9503105590062112, + "success_rate.epoch.env.logic": 0.9001937984496124, + "success_rate.epoch.env.math": 0.9605734767025089, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.7934742306266221, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8646460178922964, + "success_rate.epoch.global": 0.8807246376811594, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9903846153846154, + "tokens_p.mean_in_band": 0.680084228515625, + "tokens_rate.above_band": 0.8666666666666667, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.13333333333333333 + }, + { + "epoch": 0.7371112057946314, + "grad_norm": 283.3552891098637, + "learning_rate": 3.945851068694716e-07, + "loss": 0.3878, + "step": 3460, + "success_rate.epoch.env.abd": 0.9835526315789473, + "success_rate.epoch.env.agentgym:alfworld": 0.9, + "success_rate.epoch.env.agentgym:sciworld": 0.972972972972973, + "success_rate.epoch.env.agentgym:textcraft": 0.9782608695652174, + "success_rate.epoch.env.babyai": 0.9743589743589743, + "success_rate.epoch.env.ded": 0.9503105590062112, + "success_rate.epoch.env.logic": 0.9001937984496124, + "success_rate.epoch.env.math": 0.960613810741688, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.7934863064396743, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8647667394217831, + "success_rate.epoch.global": 0.8807525325615051, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.96, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9966044142614601, + "tokens_p.mean_in_band": 0.6845703125, + "tokens_rate.above_band": 0.9865996649916248, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.01340033500837521 + }, + { + "epoch": 0.7381763953983809, + "grad_norm": 57.614269926790676, + "learning_rate": 3.945679379915728e-07, + "loss": 0.3129, + "step": 3465, + "success_rate.epoch.env.abd": 0.9835526315789473, + "success_rate.epoch.env.agentgym:alfworld": 0.9, + "success_rate.epoch.env.agentgym:sciworld": 0.9730941704035875, + "success_rate.epoch.env.agentgym:textcraft": 0.9782608695652174, + "success_rate.epoch.env.babyai": 0.9743589743589743, + "success_rate.epoch.env.ded": 0.9503105590062112, + "success_rate.epoch.env.logic": 0.9005791505791506, + "success_rate.epoch.env.math": 0.9606741573033708, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.7936390532544378, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8648321615069284, + "success_rate.epoch.global": 0.8809248554913295, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9967672413793104, + "tokens_p.mean_in_band": 0.818359375, + "tokens_rate.above_band": 0.9775280898876404, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02247191011235955 + }, + { + "epoch": 0.7392415850021303, + "grad_norm": 208.87748558830458, + "learning_rate": 3.9455074392080924e-07, + "loss": 0.4107, + "step": 3470, + "success_rate.epoch.env.abd": 0.9835526315789473, + "success_rate.epoch.env.agentgym:alfworld": 0.9, + "success_rate.epoch.env.agentgym:sciworld": 0.9730941704035875, + "success_rate.epoch.env.agentgym:textcraft": 0.9782608695652174, + "success_rate.epoch.env.babyai": 0.9743589743589743, + "success_rate.epoch.env.ded": 0.9504643962848297, + "success_rate.epoch.env.logic": 0.9005791505791506, + "success_rate.epoch.env.math": 0.9602649006622517, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.7935745937961596, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8648030816141303, + "success_rate.epoch.global": 0.8808080808080808, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 0.8, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.85, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9983850129198967, + "tokens_p.mean_in_band": 0.353515625, + "tokens_rate.above_band": 0.9699248120300752, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03007518796992481 + }, + { + "epoch": 0.7403067746058798, + "grad_norm": 97.87773724970161, + "learning_rate": 3.945335246697118e-07, + "loss": 0.4721, + "step": 3475, + "success_rate.epoch.env.abd": 0.9835526315789473, + "success_rate.epoch.env.agentgym:alfworld": 0.9, + "success_rate.epoch.env.agentgym:sciworld": 0.9732142857142857, + "success_rate.epoch.env.agentgym:textcraft": 0.9782608695652174, + "success_rate.epoch.env.babyai": 0.9743589743589743, + "success_rate.epoch.env.ded": 0.9504643962848297, + "success_rate.epoch.env.logic": 0.9005791505791506, + "success_rate.epoch.env.math": 0.960285132382892, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.7930780559646539, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.864770700632297, + "success_rate.epoch.global": 0.8805475504322766, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.625, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9979016786570744, + "tokens_p.mean_in_band": 0.5667067307692307, + "tokens_rate.above_band": 0.9697674418604652, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.030232558139534883 + }, + { + "epoch": 0.7413719642096294, + "grad_norm": 116.52193815172264, + "learning_rate": 3.9451628025082966e-07, + "loss": 0.2187, + "step": 3480, + "success_rate.epoch.env.abd": 0.9835526315789473, + "success_rate.epoch.env.agentgym:alfworld": 0.9004739336492891, + "success_rate.epoch.env.agentgym:sciworld": 0.9733333333333334, + "success_rate.epoch.env.agentgym:textcraft": 0.9782608695652174, + "success_rate.epoch.env.babyai": 0.9743589743589743, + "success_rate.epoch.env.ded": 0.9504643962848297, + "success_rate.epoch.env.logic": 0.9006750241080038, + "success_rate.epoch.env.math": 0.9603457041179461, + "success_rate.epoch.env.sat": 0.12195121951219512, + "success_rate.epoch.env.science": 0.7929385803604266, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8645489888541344, + "success_rate.epoch.global": 0.880431654676259, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.7777777777777777, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9980377906976744, + "tokens_p.mean_in_band": 0.6788194444444444, + "tokens_rate.above_band": 0.9598214285714286, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.04017857142857143 + }, + { + "epoch": 0.7424371538133788, + "grad_norm": 146.8391565509642, + "learning_rate": 3.9449901067673057e-07, + "loss": 0.2319, + "step": 3485, + "success_rate.epoch.env.abd": 0.9835526315789473, + "success_rate.epoch.env.agentgym:alfworld": 0.9004739336492891, + "success_rate.epoch.env.agentgym:sciworld": 0.9733333333333334, + "success_rate.epoch.env.agentgym:textcraft": 0.9782608695652174, + "success_rate.epoch.env.babyai": 0.9743589743589743, + "success_rate.epoch.env.ded": 0.9504643962848297, + "success_rate.epoch.env.logic": 0.9007707129094412, + "success_rate.epoch.env.math": 0.9603859827323514, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7930275229357798, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8664699737028747, + "success_rate.epoch.global": 0.8804597701149425, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 1.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env_macro_mean": 0.9583333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9979807692307693, + "tokens_p.mean_in_band": 0.7337239583333334, + "tokens_rate.above_band": 0.9908536585365854, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.009146341463414634 + }, + { + "epoch": 0.7435023434171283, + "grad_norm": 87.83358489027319, + "learning_rate": 3.9448171596000035e-07, + "loss": 0.5532, + "step": 3490, + "success_rate.epoch.env.abd": 0.9835526315789473, + "success_rate.epoch.env.agentgym:alfworld": 0.9004739336492891, + "success_rate.epoch.env.agentgym:sciworld": 0.973568281938326, + "success_rate.epoch.env.agentgym:textcraft": 0.9782608695652174, + "success_rate.epoch.env.babyai": 0.9743589743589743, + "success_rate.epoch.env.ded": 0.9504643962848297, + "success_rate.epoch.env.logic": 0.9008662175168431, + "success_rate.epoch.env.math": 0.9604662949822605, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7932551319648093, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8665280077475415, + "success_rate.epoch.global": 0.8806312769010043, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.998126102292769, + "tokens_p.mean_in_band": 0.8020833333333334, + "tokens_rate.above_band": 0.984375, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.015625 + }, + { + "epoch": 0.7445675330208777, + "grad_norm": 140.63870216443541, + "learning_rate": 3.9446439611324345e-07, + "loss": 0.3322, + "step": 3495, + "success_rate.epoch.env.abd": 0.9835526315789473, + "success_rate.epoch.env.agentgym:alfworld": 0.9009433962264151, + "success_rate.epoch.env.agentgym:sciworld": 0.973568281938326, + "success_rate.epoch.env.agentgym:textcraft": 0.9782608695652174, + "success_rate.epoch.env.babyai": 0.9743589743589743, + "success_rate.epoch.env.ded": 0.9504643962848297, + "success_rate.epoch.env.logic": 0.9009615384615385, + "success_rate.epoch.env.math": 0.960546282245827, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7931918008784773, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8665808659020009, + "success_rate.epoch.global": 0.8806590257879656, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9375, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9952431289640592, + "tokens_p.mean_in_band": 0.6541466346153846, + "tokens_rate.above_band": 0.9732510288065843, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.026748971193415638 + }, + { + "epoch": 0.7456327226246272, + "grad_norm": 41.51084693612499, + "learning_rate": 3.9444705114908223e-07, + "loss": 0.2664, + "step": 3500, + "success_rate.epoch.env.abd": 0.9835526315789473, + "success_rate.epoch.env.agentgym:alfworld": 0.9009433962264151, + "success_rate.epoch.env.agentgym:sciworld": 0.973568281938326, + "success_rate.epoch.env.agentgym:textcraft": 0.9782608695652174, + "success_rate.epoch.env.babyai": 0.9743589743589743, + "success_rate.epoch.env.ded": 0.9506172839506173, + "success_rate.epoch.env.logic": 0.9010566762728146, + "success_rate.epoch.env.math": 0.9605662285136501, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7933552391383717, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8666200849933446, + "success_rate.epoch.global": 0.8806866952789699, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8571428571428571, + "success_rate.window.env_macro_mean": 0.9642857142857143, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9958425720620843, + "tokens_p.mean_in_band": 0.67578125, + "tokens_rate.above_band": 0.986870897155361, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.01312910284463895 + }, + { + "epoch": 0.7466979122283767, + "grad_norm": 76.42118417073381, + "learning_rate": 3.944296810801577e-07, + "loss": 0.2915, + "step": 3505, + "success_rate.epoch.env.abd": 0.9835526315789473, + "success_rate.epoch.env.agentgym:alfworld": 0.9009433962264151, + "success_rate.epoch.env.agentgym:sciworld": 0.9736842105263158, + "success_rate.epoch.env.agentgym:textcraft": 0.9782608695652174, + "success_rate.epoch.env.babyai": 0.9743589743589743, + "success_rate.epoch.env.ded": 0.9510703363914373, + "success_rate.epoch.env.logic": 0.9011516314779271, + "success_rate.epoch.env.math": 0.9606259464916709, + "success_rate.epoch.env.sat": 0.16279069767441862, + "success_rate.epoch.env.science": 0.7930656934306569, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8684716907498451, + "success_rate.epoch.global": 0.8807142857142857, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 1.0, + "success_rate.window.env.science": 0.0, + "success_rate.window.env_macro_mean": 0.8333333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 1.0001061120543293, + "tokens_p.mean_in_band": 0.66845703125, + "tokens_rate.above_band": 0.9932546374367622, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.006745362563237774 + }, + { + "epoch": 0.7477631018321261, + "grad_norm": 209.20302871249308, + "learning_rate": 3.9441228591912903e-07, + "loss": 0.3736, + "step": 3510, + "success_rate.epoch.env.abd": 0.9836065573770492, + "success_rate.epoch.env.agentgym:alfworld": 0.9009433962264151, + "success_rate.epoch.env.agentgym:sciworld": 0.9736842105263158, + "success_rate.epoch.env.agentgym:textcraft": 0.9787234042553191, + "success_rate.epoch.env.babyai": 0.9743589743589743, + "success_rate.epoch.env.ded": 0.9510703363914373, + "success_rate.epoch.env.logic": 0.9003831417624522, + "success_rate.epoch.env.math": 0.9606854838709677, + "success_rate.epoch.env.sat": 0.16279069767441862, + "success_rate.epoch.env.science": 0.7929274516952242, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8684416240604446, + "success_rate.epoch.global": 0.8805991440798859, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.8333333333333333, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 1.0006471893491125, + "tokens_p.mean_in_band": 0.6270559210526315, + "tokens_rate.above_band": 0.9726618705035971, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.027338129496402876 + }, + { + "epoch": 0.7488282914358756, + "grad_norm": 72.92206030945077, + "learning_rate": 3.943948656786737e-07, + "loss": 0.3632, + "step": 3515, + "success_rate.epoch.env.abd": 0.9837133550488599, + "success_rate.epoch.env.agentgym:alfworld": 0.9009433962264151, + "success_rate.epoch.env.agentgym:sciworld": 0.9736842105263158, + "success_rate.epoch.env.agentgym:textcraft": 0.9787234042553191, + "success_rate.epoch.env.babyai": 0.9743589743589743, + "success_rate.epoch.env.ded": 0.9510703363914373, + "success_rate.epoch.env.logic": 0.9003831417624522, + "success_rate.epoch.env.math": 0.9607448414695521, + "success_rate.epoch.env.sat": 0.16279069767441862, + "success_rate.epoch.env.science": 0.7929403202328966, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8684578989520869, + "success_rate.epoch.global": 0.8806267806267807, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9939983443708609, + "tokens_p.mean_in_band": 0.29296875, + "tokens_rate.above_band": 0.993421052631579, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.006578947368421052 + }, + { + "epoch": 0.749893481039625, + "grad_norm": 43.01160491240064, + "learning_rate": 3.943774203714874e-07, + "loss": 0.4027, + "step": 3520, + "success_rate.epoch.env.abd": 0.9837662337662337, + "success_rate.epoch.env.agentgym:alfworld": 0.9009433962264151, + "success_rate.epoch.env.agentgym:sciworld": 0.9736842105263158, + "success_rate.epoch.env.agentgym:textcraft": 0.9787234042553191, + "success_rate.epoch.env.babyai": 0.9743589743589743, + "success_rate.epoch.env.ded": 0.9510703363914373, + "success_rate.epoch.env.logic": 0.9005736137667304, + "success_rate.epoch.env.math": 0.9607843137254902, + "success_rate.epoch.env.sat": 0.16279069767441862, + "success_rate.epoch.env.science": 0.7925899019251725, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8684517539220749, + "success_rate.epoch.global": 0.8805120910384068, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6, + "success_rate.window.env_macro_mean": 0.9, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.990728021978022, + "tokens_p.mean_in_band": 0.6328125, + "tokens_rate.above_band": 0.883495145631068, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.11650485436893204 + }, + { + "epoch": 0.7509586706433745, + "grad_norm": 52.05738905273373, + "learning_rate": 3.9435995001028417e-07, + "loss": 0.2509, + "step": 3525, + "success_rate.epoch.env.abd": 0.9838187702265372, + "success_rate.epoch.env.agentgym:alfworld": 0.9009433962264151, + "success_rate.epoch.env.agentgym:sciworld": 0.9736842105263158, + "success_rate.epoch.env.agentgym:textcraft": 0.9787234042553191, + "success_rate.epoch.env.babyai": 0.9743589743589743, + "success_rate.epoch.env.ded": 0.9510703363914373, + "success_rate.epoch.env.logic": 0.9008579599618685, + "success_rate.epoch.env.math": 0.9608433734939759, + "success_rate.epoch.env.sat": 0.16279069767441862, + "success_rate.epoch.env.science": 0.7928156748911466, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8685082735029749, + "success_rate.epoch.global": 0.8806818181818182, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9951388888888889, + "tokens_p.mean_in_band": 0.7734375, + "tokens_rate.above_band": 0.9440559440559441, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.055944055944055944 + }, + { + "epoch": 0.752023860247124, + "grad_norm": 53.87590904162749, + "learning_rate": 3.9434245460779636e-07, + "loss": 0.1966, + "step": 3530, + "success_rate.epoch.env.abd": 0.9838187702265372, + "success_rate.epoch.env.agentgym:alfworld": 0.8967136150234741, + "success_rate.epoch.env.agentgym:sciworld": 0.9736842105263158, + "success_rate.epoch.env.agentgym:textcraft": 0.9787234042553191, + "success_rate.epoch.env.babyai": 0.9743589743589743, + "success_rate.epoch.env.ded": 0.9510703363914373, + "success_rate.epoch.env.logic": 0.90104662226451, + "success_rate.epoch.env.math": 0.9609022556390977, + "success_rate.epoch.env.sat": 0.16279069767441862, + "success_rate.epoch.env.science": 0.7927536231884058, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8681406109158915, + "success_rate.epoch.global": 0.8805673758865248, + "success_rate.window.env.agentgym:alfworld": 0.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.6875, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.997877358490566, + "tokens_p.mean_in_band": 0.641015625, + "tokens_rate.above_band": 0.9464285714285714, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.05357142857142857 + }, + { + "epoch": 0.7530890498508734, + "grad_norm": 77.96960488403518, + "learning_rate": 3.9432493417677435e-07, + "loss": 0.2345, + "step": 3535, + "success_rate.epoch.env.abd": 0.9838187702265372, + "success_rate.epoch.env.agentgym:alfworld": 0.8967136150234741, + "success_rate.epoch.env.agentgym:sciworld": 0.9737991266375546, + "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, + "success_rate.epoch.env.babyai": 0.9743589743589743, + "success_rate.epoch.env.ded": 0.9512195121951219, + "success_rate.epoch.env.logic": 0.9001901140684411, + "success_rate.epoch.env.math": 0.9609414121181773, + "success_rate.epoch.env.sat": 0.16279069767441862, + "success_rate.epoch.env.science": 0.7926167209554832, + "success_rate.epoch.env.webshop": 0.9743589743589743, + "success_rate.epoch.env_macro_mean": 0.8681795076621658, + "success_rate.epoch.global": 0.8804532577903683, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 0.8095238095238094, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9985699549887472, + "tokens_p.mean_in_band": 0.6447916666666667, + "tokens_rate.above_band": 0.9888724035608308, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.01112759643916914 + }, + { + "epoch": 0.7541542394546229, + "grad_norm": 93.0625763483448, + "learning_rate": 3.94307388729987e-07, + "loss": 0.2656, + "step": 3540, + "success_rate.epoch.env.abd": 0.9838709677419355, + "success_rate.epoch.env.agentgym:alfworld": 0.897196261682243, + "success_rate.epoch.env.agentgym:sciworld": 0.9737991266375546, + "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, + "success_rate.epoch.env.babyai": 0.9743589743589743, + "success_rate.epoch.env.ded": 0.9512195121951219, + "success_rate.epoch.env.logic": 0.8995260663507109, + "success_rate.epoch.env.math": 0.9609804902451226, + "success_rate.epoch.env.sat": 0.16279069767441862, + "success_rate.epoch.env.science": 0.7924801156905278, + "success_rate.epoch.env.webshop": 0.9743589743589743, + "success_rate.epoch.env_macro_mean": 0.8681588957820228, + "success_rate.epoch.global": 0.8803394625176804, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 0.6666666666666666, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.8666666666666666, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9982142857142857, + "tokens_p.mean_in_band": 0.6884428879310345, + "tokens_rate.above_band": 0.9476534296028881, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.052346570397111915 + }, + { + "epoch": 0.7552194290583724, + "grad_norm": 207.53699727916376, + "learning_rate": 3.9428981828022126e-07, + "loss": 0.3141, + "step": 3545, + "success_rate.epoch.env.abd": 0.9838709677419355, + "success_rate.epoch.env.agentgym:alfworld": 0.8976744186046511, + "success_rate.epoch.env.agentgym:sciworld": 0.9739130434782609, + "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, + "success_rate.epoch.env.babyai": 0.9743589743589743, + "success_rate.epoch.env.ded": 0.9513677811550152, + "success_rate.epoch.env.logic": 0.8996212121212122, + "success_rate.epoch.env.math": 0.961, + "success_rate.epoch.env.sat": 0.16279069767441862, + "success_rate.epoch.env.science": 0.792854565138939, + "success_rate.epoch.env.webshop": 0.9743589743589743, + "success_rate.epoch.env_macro_mean": 0.8682706637544588, + "success_rate.epoch.global": 0.8805084745762712, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9965902278177458, + "tokens_p.mean_in_band": 0.765625, + "tokens_rate.above_band": 0.9904988123515439, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.009501187648456057 + }, + { + "epoch": 0.7562846186621218, + "grad_norm": 29.11987209177875, + "learning_rate": 3.9427222284028237e-07, + "loss": 0.2818, + "step": 3550, + "success_rate.epoch.env.abd": 0.9839228295819936, + "success_rate.epoch.env.agentgym:alfworld": 0.8976744186046511, + "success_rate.epoch.env.agentgym:sciworld": 0.974025974025974, + "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, + "success_rate.epoch.env.babyai": 0.9743589743589743, + "success_rate.epoch.env.ded": 0.9513677811550152, + "success_rate.epoch.env.logic": 0.8999055712936733, + "success_rate.epoch.env.math": 0.961038961038961, + "success_rate.epoch.env.sat": 0.16279069767441862, + "success_rate.epoch.env.science": 0.7923576063446287, + "success_rate.epoch.env.webshop": 0.9743589743589743, + "success_rate.epoch.env_macro_mean": 0.868269859554903, + "success_rate.epoch.global": 0.8803949224259521, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.3333333333333333, + "success_rate.window.env_macro_mean": 0.8666666666666666, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9976158038147139, + "tokens_p.mean_in_band": 0.5552455357142857, + "tokens_rate.above_band": 0.963254593175853, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03674540682414698 + }, + { + "epoch": 0.7573498082658713, + "grad_norm": 94.83104027104345, + "learning_rate": 3.942546024229938e-07, + "loss": 0.4543, + "step": 3555, + "success_rate.epoch.env.abd": 0.9839228295819936, + "success_rate.epoch.env.agentgym:alfworld": 0.8976744186046511, + "success_rate.epoch.env.agentgym:sciworld": 0.974025974025974, + "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, + "success_rate.epoch.env.babyai": 0.9743589743589743, + "success_rate.epoch.env.ded": 0.9513677811550152, + "success_rate.epoch.env.logic": 0.9000942507068803, + "success_rate.epoch.env.math": 0.9605788423153693, + "success_rate.epoch.env.sat": 0.16279069767441862, + "success_rate.epoch.env.science": 0.7923713566030947, + "success_rate.epoch.env.webshop": 0.975, + "success_rate.epoch.env_macro_mean": 0.8683047083357308, + "success_rate.epoch.global": 0.8802816901408451, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.5, + "success_rate.window.env.science": 0.8, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 0.825, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.997504752851711, + "tokens_p.mean_in_band": 0.30282738095238093, + "tokens_rate.above_band": 0.926056338028169, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.07394366197183098 + }, + { + "epoch": 0.7584149978696207, + "grad_norm": 42.85649698328146, + "learning_rate": 3.9423695704119713e-07, + "loss": 0.6038, + "step": 3560, + "success_rate.epoch.env.abd": 0.9839743589743589, + "success_rate.epoch.env.agentgym:alfworld": 0.8981481481481481, + "success_rate.epoch.env.agentgym:sciworld": 0.974025974025974, + "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, + "success_rate.epoch.env.babyai": 0.9743589743589743, + "success_rate.epoch.env.ded": 0.9513677811550152, + "success_rate.epoch.env.logic": 0.89924670433145, + "success_rate.epoch.env.math": 0.96061814556331, + "success_rate.epoch.env.sat": 0.16279069767441862, + "success_rate.epoch.env.science": 0.7923850574712644, + "success_rate.epoch.env.webshop": 0.975, + "success_rate.epoch.env_macro_mean": 0.8682802280335982, + "success_rate.epoch.global": 0.880168776371308, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.76, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 1.0011784785435631, + "tokens_p.mean_in_band": 0.60166015625, + "tokens_rate.above_band": 0.9505562422744128, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.049443757725587144 + }, + { + "epoch": 0.7594801874733702, + "grad_norm": 95.62170587598638, + "learning_rate": 3.942192867077522e-07, + "loss": 0.3187, + "step": 3565, + "success_rate.epoch.env.abd": 0.9840255591054313, + "success_rate.epoch.env.agentgym:alfworld": 0.8981481481481481, + "success_rate.epoch.env.agentgym:sciworld": 0.974025974025974, + "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, + "success_rate.epoch.env.babyai": 0.9743589743589743, + "success_rate.epoch.env.ded": 0.9513677811550152, + "success_rate.epoch.env.logic": 0.8993414863593603, + "success_rate.epoch.env.math": 0.960179193628671, + "success_rate.epoch.env.sat": 0.16279069767441862, + "success_rate.epoch.env.science": 0.7927572606669057, + "success_rate.epoch.env.webshop": 0.975, + "success_rate.epoch.env_macro_mean": 0.8682874310717786, + "success_rate.epoch.global": 0.8801966292134832, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.6666666666666666, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.992655529953917, + "tokens_p.mean_in_band": 0.6979166666666666, + "tokens_rate.above_band": 0.9730941704035875, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.026905829596412557 + }, + { + "epoch": 0.7605453770771198, + "grad_norm": 82.75140417138097, + "learning_rate": 3.94201591435537e-07, + "loss": 0.2974, + "step": 3570, + "success_rate.epoch.env.abd": 0.9840764331210191, + "success_rate.epoch.env.agentgym:alfworld": 0.8981481481481481, + "success_rate.epoch.env.agentgym:sciworld": 0.974025974025974, + "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, + "success_rate.epoch.env.babyai": 0.9743589743589743, + "success_rate.epoch.env.ded": 0.9513677811550152, + "success_rate.epoch.env.logic": 0.8994360902255639, + "success_rate.epoch.env.math": 0.9602977667493796, + "success_rate.epoch.env.sat": 0.16279069767441862, + "success_rate.epoch.env.science": 0.7929057685417413, + "success_rate.epoch.env.webshop": 0.975, + "success_rate.epoch.env_macro_mean": 0.8683249364242638, + "success_rate.epoch.global": 0.8803646563814866, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9958389945652174, + "tokens_p.mean_in_band": 0.77109375, + "tokens_rate.above_band": 0.9865951742627346, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.013404825737265416 + }, + { + "epoch": 0.7616105666808692, + "grad_norm": 89.97007715645253, + "learning_rate": 3.9418387123744775e-07, + "loss": 0.3633, + "step": 3575, + "success_rate.epoch.env.abd": 0.9840764331210191, + "success_rate.epoch.env.agentgym:alfworld": 0.8981481481481481, + "success_rate.epoch.env.agentgym:sciworld": 0.9741379310344828, + "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, + "success_rate.epoch.env.babyai": 0.9743589743589743, + "success_rate.epoch.env.ded": 0.9513677811550152, + "success_rate.epoch.env.logic": 0.899624765478424, + "success_rate.epoch.env.math": 0.9603174603174603, + "success_rate.epoch.env.sat": 0.16279069767441862, + "success_rate.epoch.env.science": 0.7929924919556668, + "success_rate.epoch.env.webshop": 0.975, + "success_rate.epoch.env_macro_mean": 0.8683619409009342, + "success_rate.epoch.global": 0.8803921568627451, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env_macro_mean": 0.9583333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9961168639053254, + "tokens_p.mean_in_band": 0.6439732142857143, + "tokens_rate.above_band": 0.9602272727272727, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03977272727272727 + }, + { + "epoch": 0.7626757562846187, + "grad_norm": 158.98763832371196, + "learning_rate": 3.941661261263988e-07, + "loss": 0.3576, + "step": 3580, + "success_rate.epoch.env.abd": 0.9840764331210191, + "success_rate.epoch.env.agentgym:alfworld": 0.8986175115207373, + "success_rate.epoch.env.agentgym:sciworld": 0.9742489270386266, + "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, + "success_rate.epoch.env.babyai": 0.9743589743589743, + "success_rate.epoch.env.ded": 0.9513677811550152, + "success_rate.epoch.env.logic": 0.8987816307403936, + "success_rate.epoch.env.math": 0.9603764239722635, + "success_rate.epoch.env.sat": 0.16279069767441862, + "success_rate.epoch.env.science": 0.7929310960371296, + "success_rate.epoch.env.webshop": 0.975, + "success_rate.epoch.env_macro_mean": 0.8683378311168405, + "success_rate.epoch.global": 0.8802797202797202, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.75, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9984730113636363, + "tokens_p.mean_in_band": 0.71240234375, + "tokens_rate.above_band": 0.9821428571428571, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.017857142857142856 + }, + { + "epoch": 0.7637409458883682, + "grad_norm": 39.27060480238137, + "learning_rate": 3.9414835611532267e-07, + "loss": 0.3172, + "step": 3585, + "success_rate.epoch.env.abd": 0.9840764331210191, + "success_rate.epoch.env.agentgym:alfworld": 0.8986175115207373, + "success_rate.epoch.env.agentgym:sciworld": 0.9744680851063829, + "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, + "success_rate.epoch.env.babyai": 0.9743589743589743, + "success_rate.epoch.env.ded": 0.9513677811550152, + "success_rate.epoch.env.logic": 0.898876404494382, + "success_rate.epoch.env.math": 0.9604547701433515, + "success_rate.epoch.env.sat": 0.16279069767441862, + "success_rate.epoch.env.science": 0.7931526390870185, + "success_rate.epoch.env.webshop": 0.975, + "success_rate.epoch.env_macro_mean": 0.8683936330298152, + "success_rate.epoch.global": 0.8804469273743016, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9985702614379085, + "tokens_p.mean_in_band": 0.876953125, + "tokens_rate.above_band": 0.9956616052060737, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.004338394793926247 + }, + { + "epoch": 0.7648061354921176, + "grad_norm": 57.03669704244732, + "learning_rate": 3.9413056121716995e-07, + "loss": 0.2352, + "step": 3590, + "success_rate.epoch.env.abd": 0.9840764331210191, + "success_rate.epoch.env.agentgym:alfworld": 0.8990825688073395, + "success_rate.epoch.env.agentgym:sciworld": 0.9744680851063829, + "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, + "success_rate.epoch.env.babyai": 0.9743589743589743, + "success_rate.epoch.env.ded": 0.9516616314199395, + "success_rate.epoch.env.logic": 0.898876404494382, + "success_rate.epoch.env.math": 0.9600394671928959, + "success_rate.epoch.env.sat": 0.1590909090909091, + "success_rate.epoch.env.science": 0.7933000712758375, + "success_rate.epoch.env.webshop": 0.975, + "success_rate.epoch.env_macro_mean": 0.8681019283213043, + "success_rate.epoch.global": 0.8803347280334728, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 0.75, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.75, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9980332167832168, + "tokens_p.mean_in_band": 0.65234375, + "tokens_rate.above_band": 0.984267453294002, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.015732546705998034 + }, + { + "epoch": 0.7658713250958671, + "grad_norm": 124.93272759491029, + "learning_rate": 3.941127414449096e-07, + "loss": 0.7262, + "step": 3595, + "success_rate.epoch.env.abd": 0.9840764331210191, + "success_rate.epoch.env.agentgym:alfworld": 0.8990825688073395, + "success_rate.epoch.env.agentgym:sciworld": 0.9744680851063829, + "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, + "success_rate.epoch.env.babyai": 0.9743589743589743, + "success_rate.epoch.env.ded": 0.9519519519519519, + "success_rate.epoch.env.logic": 0.8989710009354537, + "success_rate.epoch.env.math": 0.9601377952755905, + "success_rate.epoch.env.sat": 0.1590909090909091, + "success_rate.epoch.env.science": 0.7930911680911681, + "success_rate.epoch.env.webshop": 0.975, + "success_rate.epoch.env_macro_mean": 0.8681268684914051, + "success_rate.epoch.global": 0.8803621169916435, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9981770833333333, + "tokens_p.mean_in_band": 0.146484375, + "tokens_rate.above_band": 0.995850622406639, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.004149377593360996 + }, + { + "epoch": 0.7669365146996165, + "grad_norm": 149.49156079177754, + "learning_rate": 3.940948968115283e-07, + "loss": 0.4148, + "step": 3600, + "success_rate.epoch.env.abd": 0.9840764331210191, + "success_rate.epoch.env.agentgym:alfworld": 0.8949771689497716, + "success_rate.epoch.env.agentgym:sciworld": 0.9744680851063829, + "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, + "success_rate.epoch.env.babyai": 0.9743589743589743, + "success_rate.epoch.env.ded": 0.9519519519519519, + "success_rate.epoch.env.logic": 0.8993476234855545, + "success_rate.epoch.env.math": 0.9601769911504425, + "success_rate.epoch.env.sat": 0.1590909090909091, + "success_rate.epoch.env.science": 0.7929562433297759, + "success_rate.epoch.env.webshop": 0.975, + "success_rate.epoch.env_macro_mean": 0.8677791861101316, + "success_rate.epoch.global": 0.880250347705146, + "success_rate.window.env.agentgym:alfworld": 0.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.6666666666666666, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.999371408045977, + "tokens_p.mean_in_band": 0.36607142857142855, + "tokens_rate.above_band": 0.9802816901408451, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.01971830985915493 + }, + { + "epoch": 0.768001704303366, + "grad_norm": 255.56765457368186, + "learning_rate": 3.9407702733003125e-07, + "loss": 0.3189, + "step": 3605, + "success_rate.epoch.env.abd": 0.9840764331210191, + "success_rate.epoch.env.agentgym:alfworld": 0.8954545454545455, + "success_rate.epoch.env.agentgym:sciworld": 0.9745762711864406, + "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, + "success_rate.epoch.env.babyai": 0.9743589743589743, + "success_rate.epoch.env.ded": 0.9519519519519519, + "success_rate.epoch.env.logic": 0.8985102420856611, + "success_rate.epoch.env.math": 0.9602746444335458, + "success_rate.epoch.env.sat": 0.1590909090909091, + "success_rate.epoch.env.science": 0.7931034482758621, + "success_rate.epoch.env.webshop": 0.975, + "success_rate.epoch.env_macro_mean": 0.8677785533295979, + "success_rate.epoch.global": 0.8802777777777778, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9985227272727273, + "tokens_p.mean_in_band": 0.6287202380952381, + "tokens_rate.above_band": 0.975177304964539, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.024822695035460994 + }, + { + "epoch": 0.7690668939071155, + "grad_norm": 41.88641026449317, + "learning_rate": 3.940591330134416e-07, + "loss": 0.2524, + "step": 3610, + "success_rate.epoch.env.abd": 0.9840764331210191, + "success_rate.epoch.env.agentgym:alfworld": 0.8918918918918919, + "success_rate.epoch.env.agentgym:sciworld": 0.9745762711864406, + "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, + "success_rate.epoch.env.babyai": 0.9743589743589743, + "success_rate.epoch.env.ded": 0.9522388059701492, + "success_rate.epoch.env.logic": 0.8986046511627906, + "success_rate.epoch.env.math": 0.9602941176470589, + "success_rate.epoch.env.sat": 0.1590909090909091, + "success_rate.epoch.env.science": 0.7933972310969116, + "success_rate.epoch.env.webshop": 0.975, + "success_rate.epoch.env_macro_mean": 0.8675178138357101, + "success_rate.epoch.global": 0.8803051317614424, + "success_rate.window.env.agentgym:alfworld": 0.5, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9996012180974478, + "tokens_p.mean_in_band": 0.691796875, + "tokens_rate.above_band": 0.994232987312572, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0057670126874279125 + }, + { + "epoch": 0.7701320835108649, + "grad_norm": 106.82604898741157, + "learning_rate": 3.940412138748005e-07, + "loss": 0.5378, + "step": 3615, + "success_rate.epoch.env.abd": 0.9841269841269841, + "success_rate.epoch.env.agentgym:alfworld": 0.8918918918918919, + "success_rate.epoch.env.agentgym:sciworld": 0.9746835443037974, + "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, + "success_rate.epoch.env.babyai": 0.9743589743589743, + "success_rate.epoch.env.ded": 0.9523809523809523, + "success_rate.epoch.env.logic": 0.8979591836734694, + "success_rate.epoch.env.math": 0.9602941176470589, + "success_rate.epoch.env.sat": 0.1590909090909091, + "success_rate.epoch.env.science": 0.793690180786955, + "success_rate.epoch.env.webshop": 0.975, + "success_rate.epoch.env_macro_mean": 0.8675130368116054, + "success_rate.epoch.global": 0.8803324099722992, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.6666666666666666, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9999093326885881, + "tokens_p.mean_in_band": 0.6770833333333334, + "tokens_rate.above_band": 0.9913710450623202, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.00862895493767977 + }, + { + "epoch": 0.7711972731146144, + "grad_norm": 90.23979785986623, + "learning_rate": 3.9402326992716743e-07, + "loss": 0.3911, + "step": 3620, + "success_rate.epoch.env.abd": 0.9841772151898734, + "success_rate.epoch.env.agentgym:alfworld": 0.8918918918918919, + "success_rate.epoch.env.agentgym:sciworld": 0.9746835443037974, + "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, + "success_rate.epoch.env.babyai": 0.9743589743589743, + "success_rate.epoch.env.ded": 0.9523809523809523, + "success_rate.epoch.env.logic": 0.8980537534754403, + "success_rate.epoch.env.math": 0.960352422907489, + "success_rate.epoch.env.sat": 0.1590909090909091, + "success_rate.epoch.env.science": 0.7933474876150035, + "success_rate.epoch.env.webshop": 0.975, + "success_rate.epoch.env_macro_mean": 0.8675003470800907, + "success_rate.epoch.global": 0.8802213001383126, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6, + "success_rate.window.env_macro_mean": 0.9, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9944526627218935, + "tokens_p.mean_in_band": 0.6388888888888888, + "tokens_rate.above_band": 0.949438202247191, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.05056179775280899 + }, + { + "epoch": 0.7722624627183639, + "grad_norm": 463.65678170863237, + "learning_rate": 3.940053011836197e-07, + "loss": 0.3113, + "step": 3625, + "success_rate.epoch.env.abd": 0.9842767295597484, + "success_rate.epoch.env.agentgym:alfworld": 0.8918918918918919, + "success_rate.epoch.env.agentgym:sciworld": 0.9746835443037974, + "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, + "success_rate.epoch.env.babyai": 0.9743589743589743, + "success_rate.epoch.env.ded": 0.9525222551928784, + "success_rate.epoch.env.logic": 0.8982423681776133, + "success_rate.epoch.env.math": 0.9604105571847508, + "success_rate.epoch.env.sat": 0.1590909090909091, + "success_rate.epoch.env.science": 0.7934936350777935, + "success_rate.epoch.env.webshop": 0.975, + "success_rate.epoch.env_macro_mean": 0.8675579574095476, + "success_rate.epoch.global": 0.8803867403314917, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9962962962962963, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 0.7733276523221133, + "grad_norm": 1015.7081195335469, + "learning_rate": 3.9398730765725285e-07, + "loss": 0.4532, + "step": 3630, + "success_rate.epoch.env.abd": 0.9842767295597484, + "success_rate.epoch.env.agentgym:alfworld": 0.8918918918918919, + "success_rate.epoch.env.agentgym:sciworld": 0.9746835443037974, + "success_rate.epoch.env.agentgym:textcraft": 0.9795918367346939, + "success_rate.epoch.env.babyai": 0.9743589743589743, + "success_rate.epoch.env.ded": 0.9525222551928784, + "success_rate.epoch.env.logic": 0.8984302862419206, + "success_rate.epoch.env.math": 0.9605070697220868, + "success_rate.epoch.env.sat": 0.1590909090909091, + "success_rate.epoch.env.science": 0.7936395759717314, + "success_rate.epoch.env.webshop": 0.975, + "success_rate.epoch.env_macro_mean": 0.8676357339153302, + "success_rate.epoch.global": 0.880551724137931, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9971590909090909, + "tokens_p.mean_in_band": 0.625, + "tokens_rate.above_band": 0.993660855784469, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.006339144215530904 + }, + { + "epoch": 0.7743928419258628, + "grad_norm": 254.45479816578788, + "learning_rate": 3.939692893611804e-07, + "loss": 0.4188, + "step": 3635, + "success_rate.epoch.env.abd": 0.9842767295597484, + "success_rate.epoch.env.agentgym:alfworld": 0.8918918918918919, + "success_rate.epoch.env.agentgym:sciworld": 0.9746835443037974, + "success_rate.epoch.env.agentgym:textcraft": 0.9795918367346939, + "success_rate.epoch.env.babyai": 0.9743589743589743, + "success_rate.epoch.env.ded": 0.9526627218934911, + "success_rate.epoch.env.logic": 0.8984302862419206, + "success_rate.epoch.env.math": 0.9605839416058394, + "success_rate.epoch.env.sat": 0.1590909090909091, + "success_rate.epoch.env.science": 0.7936507936507936, + "success_rate.epoch.env.webshop": 0.975, + "success_rate.epoch.env_macro_mean": 0.8676565117574598, + "success_rate.epoch.global": 0.8805785123966943, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9977532679738562, + "tokens_p.mean_in_band": 0.66650390625, + "tokens_rate.above_band": 0.9828693790149893, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.017130620985010708 + }, + { + "epoch": 0.7754580315296122, + "grad_norm": 129.43309277805292, + "learning_rate": 3.93951246308534e-07, + "loss": 0.2792, + "step": 3640, + "success_rate.epoch.env.abd": 0.9842767295597484, + "success_rate.epoch.env.agentgym:alfworld": 0.8918918918918919, + "success_rate.epoch.env.agentgym:sciworld": 0.9746835443037974, + "success_rate.epoch.env.agentgym:textcraft": 0.9795918367346939, + "success_rate.epoch.env.babyai": 0.9743589743589743, + "success_rate.epoch.env.ded": 0.9526627218934911, + "success_rate.epoch.env.logic": 0.8985239852398524, + "success_rate.epoch.env.math": 0.9606796116504854, + "success_rate.epoch.env.sat": 0.1590909090909091, + "success_rate.epoch.env.science": 0.7939415287072913, + "success_rate.epoch.env.webshop": 0.975, + "success_rate.epoch.env_macro_mean": 0.8677001575846486, + "success_rate.epoch.global": 0.8807427785419533, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9939365671641791, + "tokens_p.mean_in_band": 0.84765625, + "tokens_rate.above_band": 0.9710144927536232, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.028985507246376812 + }, + { + "epoch": 0.7765232211333617, + "grad_norm": 76.38240990925108, + "learning_rate": 3.939331785124632e-07, + "loss": 0.3218, + "step": 3645, + "success_rate.epoch.env.abd": 0.9842767295597484, + "success_rate.epoch.env.agentgym:alfworld": 0.8918918918918919, + "success_rate.epoch.env.agentgym:sciworld": 0.9746835443037974, + "success_rate.epoch.env.agentgym:textcraft": 0.9795918367346939, + "success_rate.epoch.env.babyai": 0.9743589743589743, + "success_rate.epoch.env.ded": 0.9526627218934911, + "success_rate.epoch.env.logic": 0.8986175115207373, + "success_rate.epoch.env.math": 0.9607938044530494, + "success_rate.epoch.env.sat": 0.1590909090909091, + "success_rate.epoch.env.science": 0.7941590429275158, + "success_rate.epoch.env.webshop": 0.975, + "success_rate.epoch.env_macro_mean": 0.8677388151577098, + "success_rate.epoch.global": 0.8809065934065934, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9912790697674418, + "tokens_p.mean_in_band": 0.8291015625, + "tokens_rate.above_band": 0.9148936170212766, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0851063829787234 + }, + { + "epoch": 0.7775884107371112, + "grad_norm": 0.0, + "learning_rate": 3.9391508598613586e-07, + "loss": 0.1496, + "step": 3650, + "success_rate.epoch.env.abd": 0.9842767295597484, + "success_rate.epoch.env.agentgym:alfworld": 0.8878923766816144, + "success_rate.epoch.env.agentgym:sciworld": 0.9746835443037974, + "success_rate.epoch.env.agentgym:textcraft": 0.9795918367346939, + "success_rate.epoch.env.babyai": 0.9743589743589743, + "success_rate.epoch.env.ded": 0.9526627218934911, + "success_rate.epoch.env.logic": 0.8987108655616943, + "success_rate.epoch.env.math": 0.9608506524891252, + "success_rate.epoch.env.sat": 0.1590909090909091, + "success_rate.epoch.env.science": 0.7944483485593816, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.8674706104846355, + "success_rate.epoch.global": 0.8809327846364884, + "success_rate.window.env.agentgym:alfworld": 0.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 0.8, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9986270920502092, + "tokens_p.mean_in_band": 0.5, + "tokens_rate.above_band": 0.9958333333333333, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.004166666666666667 + }, + { + "epoch": 0.7786536003408606, + "grad_norm": 138.6709908629782, + "learning_rate": 3.938969687427375e-07, + "loss": 0.321, + "step": 3655, + "success_rate.epoch.env.abd": 0.9842767295597484, + "success_rate.epoch.env.agentgym:alfworld": 0.8883928571428571, + "success_rate.epoch.env.agentgym:sciworld": 0.9746835443037974, + "success_rate.epoch.env.agentgym:textcraft": 0.9795918367346939, + "success_rate.epoch.env.babyai": 0.9743589743589743, + "success_rate.epoch.env.ded": 0.9526627218934911, + "success_rate.epoch.env.logic": 0.898989898989899, + "success_rate.epoch.env.math": 0.9609261939218524, + "success_rate.epoch.env.sat": 0.1590909090909091, + "success_rate.epoch.env.science": 0.7942415730337079, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.8675295450115901, + "success_rate.epoch.global": 0.880958904109589, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9968184389140271, + "tokens_p.mean_in_band": 0.61474609375, + "tokens_rate.above_band": 0.9910313901345291, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.008968609865470852 + }, + { + "epoch": 0.7797187899446102, + "grad_norm": 105.79139106977253, + "learning_rate": 3.9387882679547194e-07, + "loss": 0.3139, + "step": 3660, + "success_rate.epoch.env.abd": 0.9843260188087775, + "success_rate.epoch.env.agentgym:alfworld": 0.8888888888888888, + "success_rate.epoch.env.agentgym:sciworld": 0.9746835443037974, + "success_rate.epoch.env.agentgym:textcraft": 0.9795918367346939, + "success_rate.epoch.env.babyai": 0.975, + "success_rate.epoch.env.ded": 0.9526627218934911, + "success_rate.epoch.env.logic": 0.8990825688073395, + "success_rate.epoch.env.math": 0.9610014443909485, + "success_rate.epoch.env.sat": 0.1590909090909091, + "success_rate.epoch.env.science": 0.7940350877192982, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.8676338887941548, + "success_rate.epoch.global": 0.8809849521203831, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9988567073170732, + "tokens_p.mean_in_band": 0.578125, + "tokens_rate.above_band": 0.9899396378269618, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.01006036217303823 + }, + { + "epoch": 0.7807839795483597, + "grad_norm": 75.0752179318001, + "learning_rate": 3.9386066015756085e-07, + "loss": 0.2647, + "step": 3665, + "success_rate.epoch.env.abd": 0.984375, + "success_rate.epoch.env.agentgym:alfworld": 0.8888888888888888, + "success_rate.epoch.env.agentgym:sciworld": 0.9746835443037974, + "success_rate.epoch.env.agentgym:textcraft": 0.9795918367346939, + "success_rate.epoch.env.babyai": 0.975, + "success_rate.epoch.env.ded": 0.9526627218934911, + "success_rate.epoch.env.logic": 0.8992673992673993, + "success_rate.epoch.env.math": 0.9610576923076923, + "success_rate.epoch.env.sat": 0.1590909090909091, + "success_rate.epoch.env.science": 0.7943237561317449, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.8676865004287435, + "success_rate.epoch.global": 0.8811475409836066, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9959558823529412, + "tokens_p.mean_in_band": 0.8546875, + "tokens_rate.above_band": 0.9444444444444444, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.05555555555555555 + }, + { + "epoch": 0.7818491691521091, + "grad_norm": 180.31670923683598, + "learning_rate": 3.9384246884224397e-07, + "loss": 0.383, + "step": 3670, + "success_rate.epoch.env.abd": 0.9844236760124611, + "success_rate.epoch.env.agentgym:alfworld": 0.8888888888888888, + "success_rate.epoch.env.agentgym:sciworld": 0.9746835443037974, + "success_rate.epoch.env.agentgym:textcraft": 0.9795918367346939, + "success_rate.epoch.env.babyai": 0.975, + "success_rate.epoch.env.ded": 0.9529411764705882, + "success_rate.epoch.env.logic": 0.8992673992673993, + "success_rate.epoch.env.math": 0.9610951008645533, + "success_rate.epoch.env.sat": 0.1590909090909091, + "success_rate.epoch.env.science": 0.7943336831059811, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.8677205428033484, + "success_rate.epoch.global": 0.8811732605729877, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9973507785467128, + "tokens_p.mean_in_band": 0.5111177884615384, + "tokens_rate.above_band": 0.9780033840947546, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.021996615905245348 + }, + { + "epoch": 0.7829143587558586, + "grad_norm": 61.70226454700537, + "learning_rate": 3.93824252862779e-07, + "loss": 0.1727, + "step": 3675, + "success_rate.epoch.env.abd": 0.9844236760124611, + "success_rate.epoch.env.agentgym:alfworld": 0.8893805309734514, + "success_rate.epoch.env.agentgym:sciworld": 0.9747899159663865, + "success_rate.epoch.env.agentgym:textcraft": 0.9795918367346939, + "success_rate.epoch.env.babyai": 0.975, + "success_rate.epoch.env.ded": 0.9529411764705882, + "success_rate.epoch.env.logic": 0.8993595608417201, + "success_rate.epoch.env.math": 0.961169702780441, + "success_rate.epoch.env.sat": 0.1590909090909091, + "success_rate.epoch.env.science": 0.7945492662473794, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.8678096664741446, + "success_rate.epoch.global": 0.8813351498637603, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9967261904761905, + "tokens_p.mean_in_band": 0.498046875, + "tokens_rate.above_band": 0.997624703087886, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0023752969121140144 + }, + { + "epoch": 0.783979548359608, + "grad_norm": 78.05984307283907, + "learning_rate": 3.938060122324416e-07, + "loss": 0.3464, + "step": 3680, + "success_rate.epoch.env.abd": 0.9844236760124611, + "success_rate.epoch.env.agentgym:alfworld": 0.8898678414096917, + "success_rate.epoch.env.agentgym:sciworld": 0.9747899159663865, + "success_rate.epoch.env.agentgym:textcraft": 0.9795918367346939, + "success_rate.epoch.env.babyai": 0.975, + "success_rate.epoch.env.ded": 0.9529411764705882, + "success_rate.epoch.env.logic": 0.8995433789954338, + "success_rate.epoch.env.math": 0.9611883085769046, + "success_rate.epoch.env.sat": 0.15555555555555556, + "success_rate.epoch.env.science": 0.7949075688873387, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.8675835467915104, + "success_rate.epoch.global": 0.8813605442176871, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.995492053789731, + "tokens_p.mean_in_band": 0.44375, + "tokens_rate.above_band": 0.964622641509434, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03537735849056604 + }, + { + "epoch": 0.7850447379633575, + "grad_norm": 169.5093349589927, + "learning_rate": 3.9378774696452543e-07, + "loss": 0.4289, + "step": 3685, + "success_rate.epoch.env.abd": 0.984472049689441, + "success_rate.epoch.env.agentgym:alfworld": 0.8898678414096917, + "success_rate.epoch.env.agentgym:sciworld": 0.9748953974895398, + "success_rate.epoch.env.agentgym:textcraft": 0.9795918367346939, + "success_rate.epoch.env.babyai": 0.975, + "success_rate.epoch.env.ded": 0.9529411764705882, + "success_rate.epoch.env.logic": 0.8998178506375227, + "success_rate.epoch.env.math": 0.9612625538020086, + "success_rate.epoch.env.sat": 0.15555555555555556, + "success_rate.epoch.env.science": 0.7949790794979079, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.8676357361258645, + "success_rate.epoch.global": 0.8815217391304347, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9985039893617021, + "tokens_p.mean_in_band": 0.7734375, + "tokens_rate.above_band": 0.9947089947089947, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.005291005291005291 + }, + { + "epoch": 0.786109927567107, + "grad_norm": 524.3044770124952, + "learning_rate": 3.9376945707234207e-07, + "loss": 0.354, + "step": 3690, + "success_rate.epoch.env.abd": 0.984472049689441, + "success_rate.epoch.env.agentgym:alfworld": 0.8908296943231441, + "success_rate.epoch.env.agentgym:sciworld": 0.9748953974895398, + "success_rate.epoch.env.agentgym:textcraft": 0.9803921568627451, + "success_rate.epoch.env.babyai": 0.975, + "success_rate.epoch.env.ded": 0.9529411764705882, + "success_rate.epoch.env.logic": 0.899909008189263, + "success_rate.epoch.env.math": 0.9612995699952221, + "success_rate.epoch.env.sat": 0.15555555555555556, + "success_rate.epoch.env.science": 0.7948450017415535, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.867795396946783, + "success_rate.epoch.global": 0.8815468113975576, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9996159754224271, + "tokens_p.mean_in_band": 0.333984375, + "tokens_rate.above_band": 0.9938931297709923, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0061068702290076335 + }, + { + "epoch": 0.7871751171708564, + "grad_norm": 98.16120192533465, + "learning_rate": 3.937511425692211e-07, + "loss": 0.2423, + "step": 3695, + "success_rate.epoch.env.abd": 0.9845201238390093, + "success_rate.epoch.env.agentgym:alfworld": 0.8908296943231441, + "success_rate.epoch.env.agentgym:sciworld": 0.975, + "success_rate.epoch.env.agentgym:textcraft": 0.9803921568627451, + "success_rate.epoch.env.babyai": 0.975, + "success_rate.epoch.env.ded": 0.9529411764705882, + "success_rate.epoch.env.logic": 0.9001814882032668, + "success_rate.epoch.env.math": 0.961354961832061, + "success_rate.epoch.env.sat": 0.15555555555555556, + "success_rate.epoch.env.science": 0.794987817612252, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.8678520664360164, + "success_rate.epoch.global": 0.8817073170731707, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9957959641255605, + "tokens_p.mean_in_band": 0.796875, + "tokens_rate.above_band": 0.9955357142857143, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.004464285714285714 + }, + { + "epoch": 0.7882403067746059, + "grad_norm": 69.69042512171194, + "learning_rate": 3.9373280346851e-07, + "loss": 0.3902, + "step": 3700, + "success_rate.epoch.env.abd": 0.9845201238390093, + "success_rate.epoch.env.agentgym:alfworld": 0.8908296943231441, + "success_rate.epoch.env.agentgym:sciworld": 0.975, + "success_rate.epoch.env.agentgym:textcraft": 0.9803921568627451, + "success_rate.epoch.env.babyai": 0.975, + "success_rate.epoch.env.ded": 0.9530791788856305, + "success_rate.epoch.env.logic": 0.8994565217391305, + "success_rate.epoch.env.math": 0.96141019533111, + "success_rate.epoch.env.sat": 0.15555555555555556, + "success_rate.epoch.env.science": 0.794577685088634, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.8677664425202288, + "success_rate.epoch.global": 0.8814614343707713, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.75, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9988610478359908, + "tokens_p.mean_in_band": 0.5053125, + "tokens_rate.above_band": 0.981371087928465, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.018628912071535022 + }, + { + "epoch": 0.7893054963783553, + "grad_norm": 68.02646839305561, + "learning_rate": 3.9371443978357404e-07, + "loss": 0.3503, + "step": 3705, + "success_rate.epoch.env.abd": 0.9845679012345679, + "success_rate.epoch.env.agentgym:alfworld": 0.8917748917748918, + "success_rate.epoch.env.agentgym:sciworld": 0.9752066115702479, + "success_rate.epoch.env.agentgym:textcraft": 0.9803921568627451, + "success_rate.epoch.env.babyai": 0.975, + "success_rate.epoch.env.ded": 0.9530791788856305, + "success_rate.epoch.env.logic": 0.8986425339366516, + "success_rate.epoch.env.math": 0.9614469300333175, + "success_rate.epoch.env.sat": 0.15555555555555556, + "success_rate.epoch.env.science": 0.7947203890239667, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.867817809543194, + "success_rate.epoch.global": 0.8814864864864865, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8333333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9978649068322981, + "tokens_p.mean_in_band": 0.6979166666666666, + "tokens_rate.above_band": 0.9889434889434889, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.011056511056511056 + }, + { + "epoch": 0.7903706859821048, + "grad_norm": 24.68769935889459, + "learning_rate": 3.936960515277967e-07, + "loss": 0.3215, + "step": 3710, + "success_rate.epoch.env.abd": 0.9846153846153847, + "success_rate.epoch.env.agentgym:alfworld": 0.8922413793103449, + "success_rate.epoch.env.agentgym:sciworld": 0.9752066115702479, + "success_rate.epoch.env.agentgym:textcraft": 0.9803921568627451, + "success_rate.epoch.env.babyai": 0.975, + "success_rate.epoch.env.ded": 0.9532163742690059, + "success_rate.epoch.env.logic": 0.8986425339366516, + "success_rate.epoch.env.math": 0.961465271170314, + "success_rate.epoch.env.sat": 0.15555555555555556, + "success_rate.epoch.env.science": 0.7944540727902947, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.8678544632889186, + "success_rate.epoch.global": 0.8813765182186235, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9968619246861925, + "tokens_p.mean_in_band": 0.54921875, + "tokens_rate.above_band": 0.9795081967213115, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.020491803278688523 + }, + { + "epoch": 0.7914358755858543, + "grad_norm": 212.9112060964346, + "learning_rate": 3.936776387145792e-07, + "loss": 0.2509, + "step": 3715, + "success_rate.epoch.env.abd": 0.9846625766871165, + "success_rate.epoch.env.agentgym:alfworld": 0.8922413793103449, + "success_rate.epoch.env.agentgym:sciworld": 0.9753086419753086, + "success_rate.epoch.env.agentgym:textcraft": 0.9803921568627451, + "success_rate.epoch.env.babyai": 0.975, + "success_rate.epoch.env.ded": 0.9532163742690059, + "success_rate.epoch.env.logic": 0.8987341772151899, + "success_rate.epoch.env.math": 0.961520190023753, + "success_rate.epoch.env.sat": 0.15555555555555556, + "success_rate.epoch.env.science": 0.7947386638975424, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.8679072247176475, + "success_rate.epoch.global": 0.8815363881401618, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9985632183908046, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 0.7925010651896037, + "grad_norm": 78.58180947171188, + "learning_rate": 3.9365920135734055e-07, + "loss": 0.5729, + "step": 3720, + "success_rate.epoch.env.abd": 0.9847560975609756, + "success_rate.epoch.env.agentgym:alfworld": 0.8922413793103449, + "success_rate.epoch.env.agentgym:sciworld": 0.9753086419753086, + "success_rate.epoch.env.agentgym:textcraft": 0.9803921568627451, + "success_rate.epoch.env.babyai": 0.975, + "success_rate.epoch.env.ded": 0.9532163742690059, + "success_rate.epoch.env.logic": 0.8988256549232159, + "success_rate.epoch.env.math": 0.9615931721194879, + "success_rate.epoch.env.sat": 0.15555555555555556, + "success_rate.epoch.env.science": 0.7946058091286307, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.8679185998002573, + "success_rate.epoch.global": 0.8815612382234186, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9899088541666666, + "tokens_p.mean_in_band": 0.609765625, + "tokens_rate.above_band": 0.9504950495049505, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.04950495049504951 + }, + { + "epoch": 0.7935662547933532, + "grad_norm": 99.52998609777296, + "learning_rate": 3.936407394695179e-07, + "loss": 0.4686, + "step": 3725, + "success_rate.epoch.env.abd": 0.9847560975609756, + "success_rate.epoch.env.agentgym:alfworld": 0.8922413793103449, + "success_rate.epoch.env.agentgym:sciworld": 0.9753086419753086, + "success_rate.epoch.env.agentgym:textcraft": 0.9803921568627451, + "success_rate.epoch.env.babyai": 0.975, + "success_rate.epoch.env.ded": 0.9534883720930233, + "success_rate.epoch.env.logic": 0.898014440433213, + "success_rate.epoch.env.math": 0.9616658778987222, + "success_rate.epoch.env.sat": 0.15555555555555556, + "success_rate.epoch.env.science": 0.794818652849741, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.8678955391488354, + "success_rate.epoch.global": 0.8815860215053763, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.75, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9979134689178818, + "tokens_p.mean_in_band": 0.566015625, + "tokens_rate.above_band": 0.9848828420256992, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.015117157974300832 + }, + { + "epoch": 0.7946314443971026, + "grad_norm": 119.00773937110698, + "learning_rate": 3.9362225306456595e-07, + "loss": 0.3206, + "step": 3730, + "success_rate.epoch.env.abd": 0.9847560975609756, + "success_rate.epoch.env.agentgym:alfworld": 0.8922413793103449, + "success_rate.epoch.env.agentgym:sciworld": 0.9753086419753086, + "success_rate.epoch.env.agentgym:textcraft": 0.9803921568627451, + "success_rate.epoch.env.babyai": 0.975, + "success_rate.epoch.env.ded": 0.9534883720930233, + "success_rate.epoch.env.logic": 0.8981981981981982, + "success_rate.epoch.env.math": 0.9617383089277279, + "success_rate.epoch.env.sat": 0.15555555555555556, + "success_rate.epoch.env.science": 0.7947568126940324, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.867913207206861, + "success_rate.epoch.global": 0.8816107382550336, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9925, + "tokens_p.mean_in_band": 0.6253551136363636, + "tokens_rate.above_band": 0.872093023255814, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.12790697674418605 + }, + { + "epoch": 0.7956966340008521, + "grad_norm": 129.37629931877842, + "learning_rate": 3.9360374215595766e-07, + "loss": 0.5105, + "step": 3735, + "success_rate.epoch.env.abd": 0.9847560975609756, + "success_rate.epoch.env.agentgym:alfworld": 0.8922413793103449, + "success_rate.epoch.env.agentgym:sciworld": 0.9753086419753086, + "success_rate.epoch.env.agentgym:textcraft": 0.9803921568627451, + "success_rate.epoch.env.babyai": 0.975, + "success_rate.epoch.env.ded": 0.9536231884057971, + "success_rate.epoch.env.logic": 0.8974820143884892, + "success_rate.epoch.env.math": 0.9613207547169811, + "success_rate.epoch.env.sat": 0.15217391304347827, + "success_rate.epoch.env.science": 0.7946243969676086, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.8675029363026625, + "success_rate.epoch.global": 0.8812332439678284, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 0.6666666666666666, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.5666666666666667, + "success_rate.window.global": 0.6, + "tokens_p.mean_above_band": 0.9972584575688074, + "tokens_p.mean_below_band": 5.699694156646729e-07, + "tokens_p.mean_in_band": 0.504856418918919, + "tokens_rate.above_band": 0.9582417582417583, + "tokens_rate.below_band": 0.001098901098901099, + "tokens_rate.in_band": 0.04065934065934066 + }, + { + "epoch": 0.7967618236046016, + "grad_norm": 25.051683918926575, + "learning_rate": 3.9358520675718355e-07, + "loss": 0.3241, + "step": 3740, + "success_rate.epoch.env.abd": 0.9848024316109423, + "success_rate.epoch.env.agentgym:alfworld": 0.8922413793103449, + "success_rate.epoch.env.agentgym:sciworld": 0.9753086419753086, + "success_rate.epoch.env.agentgym:textcraft": 0.9803921568627451, + "success_rate.epoch.env.babyai": 0.975, + "success_rate.epoch.env.ded": 0.9536231884057971, + "success_rate.epoch.env.logic": 0.8974820143884892, + "success_rate.epoch.env.math": 0.9609411764705882, + "success_rate.epoch.env.sat": 0.15217391304347827, + "success_rate.epoch.env.science": 0.7945629731589814, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.8674670573931125, + "success_rate.epoch.global": 0.8811244979919679, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.math": 0.8, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.85, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9916424418604651, + "tokens_p.mean_in_band": 0.7161458333333334, + "tokens_rate.above_band": 0.8514851485148515, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.1485148514851485 + }, + { + "epoch": 0.797827013208351, + "grad_norm": 215.1764072248656, + "learning_rate": 3.9356664688175215e-07, + "loss": 0.4316, + "step": 3745, + "success_rate.epoch.env.abd": 0.9848024316109423, + "success_rate.epoch.env.agentgym:alfworld": 0.8922413793103449, + "success_rate.epoch.env.agentgym:sciworld": 0.9753086419753086, + "success_rate.epoch.env.agentgym:textcraft": 0.9803921568627451, + "success_rate.epoch.env.babyai": 0.975, + "success_rate.epoch.env.ded": 0.9536231884057971, + "success_rate.epoch.env.logic": 0.8960573476702509, + "success_rate.epoch.env.math": 0.9609779031499764, + "success_rate.epoch.env.sat": 0.15217391304347827, + "success_rate.epoch.env.science": 0.7945017182130584, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.8673353123944966, + "success_rate.epoch.global": 0.8808823529411764, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.75, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.99902496099844, + "tokens_p.mean_in_band": 0.57421875, + "tokens_rate.above_band": 0.9567164179104478, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.04328358208955224 + }, + { + "epoch": 0.7988922028121006, + "grad_norm": 59.39626896344402, + "learning_rate": 3.9354806254318967e-07, + "loss": 0.3688, + "step": 3750, + "success_rate.epoch.env.abd": 0.9848484848484849, + "success_rate.epoch.env.agentgym:alfworld": 0.8922413793103449, + "success_rate.epoch.env.agentgym:sciworld": 0.9754098360655737, + "success_rate.epoch.env.agentgym:textcraft": 0.9803921568627451, + "success_rate.epoch.env.babyai": 0.975, + "success_rate.epoch.env.ded": 0.9536231884057971, + "success_rate.epoch.env.logic": 0.8960573476702509, + "success_rate.epoch.env.math": 0.9610328638497653, + "success_rate.epoch.env.sat": 0.15217391304347827, + "success_rate.epoch.env.science": 0.7948542024013722, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.8673857389595793, + "success_rate.epoch.global": 0.881041388518024, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9954578488372093, + "tokens_p.mean_in_band": 0.66640625, + "tokens_rate.above_band": 0.9717514124293786, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02824858757062147 + }, + { + "epoch": 0.7999573924158501, + "grad_norm": 228.32473794319367, + "learning_rate": 3.935294537550403e-07, + "loss": 0.4245, + "step": 3755, + "success_rate.epoch.env.abd": 0.9848484848484849, + "success_rate.epoch.env.agentgym:alfworld": 0.8922413793103449, + "success_rate.epoch.env.agentgym:sciworld": 0.9755102040816327, + "success_rate.epoch.env.agentgym:textcraft": 0.9803921568627451, + "success_rate.epoch.env.babyai": 0.975, + "success_rate.epoch.env.ded": 0.9536231884057971, + "success_rate.epoch.env.logic": 0.8961504028648165, + "success_rate.epoch.env.math": 0.9611241217798595, + "success_rate.epoch.env.sat": 0.15217391304347827, + "success_rate.epoch.env.science": 0.7947224126113777, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.8673996381732817, + "success_rate.epoch.global": 0.8810666666666667, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.99775, + "tokens_p.mean_in_band": 0.7295619419642857, + "tokens_rate.above_band": 0.946969696969697, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.05303030303030303 + }, + { + "epoch": 0.8010225820195995, + "grad_norm": 150.2309611810832, + "learning_rate": 3.9351082053086603e-07, + "loss": 0.3381, + "step": 3760, + "success_rate.epoch.env.abd": 0.9848484848484849, + "success_rate.epoch.env.agentgym:alfworld": 0.8922413793103449, + "success_rate.epoch.env.agentgym:sciworld": 0.9755102040816327, + "success_rate.epoch.env.agentgym:textcraft": 0.9807692307692307, + "success_rate.epoch.env.babyai": 0.975, + "success_rate.epoch.env.ded": 0.9536231884057971, + "success_rate.epoch.env.logic": 0.8962432915921288, + "success_rate.epoch.env.math": 0.961178671655753, + "success_rate.epoch.env.sat": 0.15217391304347827, + "success_rate.epoch.env.science": 0.7947314403010606, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.8674481418277703, + "success_rate.epoch.global": 0.8810918774966711, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9963235294117647, + "tokens_p.mean_in_band": 0.6863839285714286, + "tokens_rate.above_band": 0.974910394265233, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.025089605734767026 + }, + { + "epoch": 0.802087771623349, + "grad_norm": 71.81975056934229, + "learning_rate": 3.934921628842465e-07, + "loss": 0.5153, + "step": 3765, + "success_rate.epoch.env.abd": 0.9848484848484849, + "success_rate.epoch.env.agentgym:alfworld": 0.8846153846153846, + "success_rate.epoch.env.agentgym:sciworld": 0.9755102040816327, + "success_rate.epoch.env.agentgym:textcraft": 0.9807692307692307, + "success_rate.epoch.env.babyai": 0.975, + "success_rate.epoch.env.ded": 0.953757225433526, + "success_rate.epoch.env.logic": 0.8963360142984808, + "success_rate.epoch.env.math": 0.9611968209443665, + "success_rate.epoch.env.sat": 0.15217391304347827, + "success_rate.epoch.env.science": 0.7943989071038251, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.8667469037487247, + "success_rate.epoch.global": 0.8807180851063829, + "success_rate.window.env.agentgym:alfworld": 0.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6, + "success_rate.window.env_macro_mean": 0.72, + "success_rate.window.global": 0.6, + "tokens_p.mean_above_band": 0.9991147308781869, + "tokens_p.mean_in_band": 0.4549696180555556, + "tokens_rate.above_band": 0.9514824797843666, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.04851752021563342 + }, + { + "epoch": 0.8031529612270984, + "grad_norm": 131.7346705630221, + "learning_rate": 3.934734808287794e-07, + "loss": 0.2724, + "step": 3770, + "success_rate.epoch.env.abd": 0.9848484848484849, + "success_rate.epoch.env.agentgym:alfworld": 0.8851063829787233, + "success_rate.epoch.env.agentgym:sciworld": 0.9755102040816327, + "success_rate.epoch.env.agentgym:textcraft": 0.9807692307692307, + "success_rate.epoch.env.babyai": 0.975, + "success_rate.epoch.env.ded": 0.953757225433526, + "success_rate.epoch.env.logic": 0.8956289027653881, + "success_rate.epoch.env.math": 0.9612330686595049, + "success_rate.epoch.env.sat": 0.15217391304347827, + "success_rate.epoch.env.science": 0.7944084555063076, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.8667314203803488, + "success_rate.epoch.global": 0.8806108897742364, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.825, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 1.0004664179104477, + "tokens_p.mean_in_band": 0.5202907986111112, + "tokens_rate.above_band": 0.9811715481171548, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.01882845188284519 + }, + { + "epoch": 0.8042181508308479, + "grad_norm": 58.02425940249168, + "learning_rate": 3.9345477437808e-07, + "loss": 0.3586, + "step": 3775, + "success_rate.epoch.env.abd": 0.9849397590361446, + "success_rate.epoch.env.agentgym:alfworld": 0.885593220338983, + "success_rate.epoch.env.agentgym:sciworld": 0.9755102040816327, + "success_rate.epoch.env.agentgym:textcraft": 0.9807692307692307, + "success_rate.epoch.env.babyai": 0.975, + "success_rate.epoch.env.ded": 0.953757225433526, + "success_rate.epoch.env.logic": 0.8957219251336899, + "success_rate.epoch.env.math": 0.9612511671335201, + "success_rate.epoch.env.sat": 0.15217391304347827, + "success_rate.epoch.env.science": 0.7947583390061266, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.866825885461263, + "success_rate.epoch.global": 0.8807692307692307, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9985829355608592, + "tokens_p.mean_in_band": 0.7946428571428571, + "tokens_rate.above_band": 0.9835680751173709, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.01643192488262911 + }, + { + "epoch": 0.8052833404345974, + "grad_norm": 147.11832637493086, + "learning_rate": 3.9343604354578144e-07, + "loss": 0.2905, + "step": 3780, + "success_rate.epoch.env.abd": 0.9850299401197605, + "success_rate.epoch.env.agentgym:alfworld": 0.885593220338983, + "success_rate.epoch.env.agentgym:sciworld": 0.9755102040816327, + "success_rate.epoch.env.agentgym:textcraft": 0.9807692307692307, + "success_rate.epoch.env.babyai": 0.975, + "success_rate.epoch.env.ded": 0.9538904899135446, + "success_rate.epoch.env.logic": 0.8958147818343722, + "success_rate.epoch.env.math": 0.9608391608391609, + "success_rate.epoch.env.sat": 0.15217391304347827, + "success_rate.epoch.env.science": 0.7946276776606597, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.8668053067907622, + "success_rate.epoch.global": 0.8806622516556292, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.6666666666666666, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.8666666666666666, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9972722960151803, + "tokens_p.mean_in_band": 0.3949652777777778, + "tokens_rate.above_band": 0.9669724770642202, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03302752293577982 + }, + { + "epoch": 0.8063485300383468, + "grad_norm": 66.76721594519827, + "learning_rate": 3.9341728834553464e-07, + "loss": 0.3325, + "step": 3785, + "success_rate.epoch.env.abd": 0.9850299401197605, + "success_rate.epoch.env.agentgym:alfworld": 0.885593220338983, + "success_rate.epoch.env.agentgym:sciworld": 0.975609756097561, + "success_rate.epoch.env.agentgym:textcraft": 0.9807692307692307, + "success_rate.epoch.env.babyai": 0.975, + "success_rate.epoch.env.ded": 0.9511494252873564, + "success_rate.epoch.env.logic": 0.8959074733096085, + "success_rate.epoch.env.math": 0.9609120521172638, + "success_rate.epoch.env.sat": 0.15217391304347827, + "success_rate.epoch.env.science": 0.7944972826086957, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.8665683681626817, + "success_rate.epoch.global": 0.8805555555555555, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 0.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.7333333333333333, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.995216670997921, + "tokens_p.mean_in_band": 0.5562855113636364, + "tokens_rate.above_band": 0.8882733148661126, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.11172668513388735 + }, + { + "epoch": 0.8074137196420963, + "grad_norm": 336.6178712001766, + "learning_rate": 3.933985087910082e-07, + "loss": 0.5309, + "step": 3790, + "success_rate.epoch.env.abd": 0.9850299401197605, + "success_rate.epoch.env.agentgym:alfworld": 0.8860759493670886, + "success_rate.epoch.env.agentgym:sciworld": 0.975609756097561, + "success_rate.epoch.env.agentgym:textcraft": 0.9807692307692307, + "success_rate.epoch.env.babyai": 0.975, + "success_rate.epoch.env.ded": 0.9511494252873564, + "success_rate.epoch.env.logic": 0.8962765957446809, + "success_rate.epoch.env.math": 0.9609483960948396, + "success_rate.epoch.env.sat": 0.15217391304347827, + "success_rate.epoch.env.science": 0.7936884967763828, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.8665755872179947, + "success_rate.epoch.global": 0.8803170409511228, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.0, + "success_rate.window.env_macro_mean": 0.75, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9994365464632455, + "tokens_p.mean_in_band": 0.5403262867647058, + "tokens_rate.above_band": 0.9769647696476965, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.023035230352303523 + }, + { + "epoch": 0.8084789092458458, + "grad_norm": 256.05312463976554, + "learning_rate": 3.9337970489588857e-07, + "loss": 0.4742, + "step": 3795, + "success_rate.epoch.env.abd": 0.9850746268656716, + "success_rate.epoch.env.agentgym:alfworld": 0.8860759493670886, + "success_rate.epoch.env.agentgym:sciworld": 0.975609756097561, + "success_rate.epoch.env.agentgym:textcraft": 0.9807692307692307, + "success_rate.epoch.env.babyai": 0.975, + "success_rate.epoch.env.ded": 0.9512893982808023, + "success_rate.epoch.env.logic": 0.8964601769911504, + "success_rate.epoch.env.math": 0.9609846725499304, + "success_rate.epoch.env.sat": 0.15217391304347827, + "success_rate.epoch.env.science": 0.7939681463910538, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.866637784223048, + "success_rate.epoch.global": 0.8804749340369393, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9977292387543253, + "tokens_p.mean_in_band": 0.439453125, + "tokens_rate.above_band": 0.9863481228668942, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.013651877133105802 + }, + { + "epoch": 0.8095440988495952, + "grad_norm": 86.39443396100542, + "learning_rate": 3.933608766738799e-07, + "loss": 0.2105, + "step": 3800, + "success_rate.epoch.env.abd": 0.9851632047477745, + "success_rate.epoch.env.agentgym:alfworld": 0.8860759493670886, + "success_rate.epoch.env.agentgym:sciworld": 0.975609756097561, + "success_rate.epoch.env.agentgym:textcraft": 0.9807692307692307, + "success_rate.epoch.env.babyai": 0.975, + "success_rate.epoch.env.ded": 0.9512893982808023, + "success_rate.epoch.env.logic": 0.896551724137931, + "success_rate.epoch.env.math": 0.9610750695088045, + "success_rate.epoch.env.sat": 0.15217391304347827, + "success_rate.epoch.env.science": 0.7941076870978666, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.8666750626498273, + "success_rate.epoch.global": 0.8806324110671937, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9979319852941176, + "tokens_p.mean_in_band": 0.88671875, + "tokens_rate.above_band": 0.9927007299270073, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0072992700729927005 + }, + { + "epoch": 0.8106092884533447, + "grad_norm": 263.1065856297251, + "learning_rate": 3.9334202413870406e-07, + "loss": 0.3527, + "step": 3805, + "success_rate.epoch.env.abd": 0.9851632047477745, + "success_rate.epoch.env.agentgym:alfworld": 0.8860759493670886, + "success_rate.epoch.env.agentgym:sciworld": 0.975609756097561, + "success_rate.epoch.env.agentgym:textcraft": 0.9807692307692307, + "success_rate.epoch.env.babyai": 0.975, + "success_rate.epoch.env.ded": 0.9512893982808023, + "success_rate.epoch.env.logic": 0.8967343336275375, + "success_rate.epoch.env.math": 0.961129106894956, + "success_rate.epoch.env.sat": 0.15217391304347827, + "success_rate.epoch.env.science": 0.7937795807978364, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.8666667481567116, + "success_rate.epoch.global": 0.8805263157894737, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6, + "success_rate.window.env_macro_mean": 0.8666666666666667, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.98828125, + "tokens_p.mean_in_band": 0.6473214285714286, + "tokens_rate.above_band": 0.8627450980392157, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.13725490196078433 + }, + { + "epoch": 0.8116744780570941, + "grad_norm": 117.03578843605969, + "learning_rate": 3.933231473041006e-07, + "loss": 0.365, + "step": 3810, + "success_rate.epoch.env.abd": 0.9851632047477745, + "success_rate.epoch.env.agentgym:alfworld": 0.8823529411764706, + "success_rate.epoch.env.agentgym:sciworld": 0.9757085020242915, + "success_rate.epoch.env.agentgym:textcraft": 0.9807692307692307, + "success_rate.epoch.env.babyai": 0.975, + "success_rate.epoch.env.ded": 0.9512893982808023, + "success_rate.epoch.env.logic": 0.8968253968253969, + "success_rate.epoch.env.math": 0.9611470860314524, + "success_rate.epoch.env.sat": 0.15217391304347827, + "success_rate.epoch.env.science": 0.7931848852901484, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.8662931194806004, + "success_rate.epoch.global": 0.8801576872536137, + "success_rate.window.env.agentgym:alfworld": 0.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.7, + "success_rate.window.global": 0.6, + "tokens_p.mean_above_band": 0.9983606557377049, + "tokens_p.mean_in_band": 0.6310292119565217, + "tokens_rate.above_band": 0.9298780487804879, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0701219512195122 + }, + { + "epoch": 0.8127396676608436, + "grad_norm": 235.85528454256533, + "learning_rate": 3.9330424618382685e-07, + "loss": 0.1631, + "step": 3815, + "success_rate.epoch.env.abd": 0.985207100591716, + "success_rate.epoch.env.agentgym:alfworld": 0.8823529411764706, + "success_rate.epoch.env.agentgym:sciworld": 0.9757085020242915, + "success_rate.epoch.env.agentgym:textcraft": 0.9807692307692307, + "success_rate.epoch.env.babyai": 0.975, + "success_rate.epoch.env.ded": 0.9512893982808023, + "success_rate.epoch.env.logic": 0.8969162995594714, + "success_rate.epoch.env.math": 0.961218836565097, + "success_rate.epoch.env.sat": 0.15217391304347827, + "success_rate.epoch.env.science": 0.7934636118598383, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.8663372354516325, + "success_rate.epoch.global": 0.8803149606299212, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9880725190839694, + "tokens_p.mean_in_band": 0.8359375, + "tokens_rate.above_band": 0.9924242424242424, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.007575757575757576 + }, + { + "epoch": 0.8138048572645931, + "grad_norm": 93.67931665339518, + "learning_rate": 3.9328532079165786e-07, + "loss": 0.491, + "step": 3820, + "success_rate.epoch.env.abd": 0.9852507374631269, + "success_rate.epoch.env.agentgym:alfworld": 0.8823529411764706, + "success_rate.epoch.env.agentgym:sciworld": 0.9757085020242915, + "success_rate.epoch.env.agentgym:textcraft": 0.9807692307692307, + "success_rate.epoch.env.babyai": 0.975, + "success_rate.epoch.env.ded": 0.9512893982808023, + "success_rate.epoch.env.logic": 0.8970070422535211, + "success_rate.epoch.env.math": 0.9612724757952974, + "success_rate.epoch.env.sat": 0.15217391304347827, + "success_rate.epoch.env.science": 0.7934746047763203, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.8663553274254635, + "success_rate.epoch.global": 0.8803407601572739, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9911123853211009, + "tokens_p.mean_in_band": 0.6265625, + "tokens_rate.above_band": 0.9159663865546218, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.08403361344537816 + }, + { + "epoch": 0.8148700468683425, + "grad_norm": 125.4854821723632, + "learning_rate": 3.9326637114138625e-07, + "loss": 0.4263, + "step": 3825, + "success_rate.epoch.env.abd": 0.9853372434017595, + "success_rate.epoch.env.agentgym:alfworld": 0.8823529411764706, + "success_rate.epoch.env.agentgym:sciworld": 0.9758064516129032, + "success_rate.epoch.env.agentgym:textcraft": 0.9807692307692307, + "success_rate.epoch.env.babyai": 0.975, + "success_rate.epoch.env.ded": 0.9514285714285714, + "success_rate.epoch.env.logic": 0.8971880492091389, + "success_rate.epoch.env.math": 0.9613259668508287, + "success_rate.epoch.env.sat": 0.15217391304347827, + "success_rate.epoch.env.science": 0.7935440484196369, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.8664123792735982, + "success_rate.epoch.global": 0.8804973821989529, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9908900176678446, + "tokens_p.mean_in_band": 0.88125, + "tokens_rate.above_band": 0.9912434325744308, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.008756567425569177 + }, + { + "epoch": 0.815935236472092, + "grad_norm": 82.51051557376586, + "learning_rate": 3.9324739724682237e-07, + "loss": 0.4545, + "step": 3830, + "success_rate.epoch.env.abd": 0.9853801169590644, + "success_rate.epoch.env.agentgym:alfworld": 0.8823529411764706, + "success_rate.epoch.env.agentgym:sciworld": 0.9758064516129032, + "success_rate.epoch.env.agentgym:textcraft": 0.9807692307692307, + "success_rate.epoch.env.babyai": 0.975, + "success_rate.epoch.env.ded": 0.9514285714285714, + "success_rate.epoch.env.logic": 0.8973684210526316, + "success_rate.epoch.env.math": 0.9613437643810401, + "success_rate.epoch.env.sat": 0.15217391304347827, + "success_rate.epoch.env.science": 0.7938213566151779, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.8665626320986436, + "success_rate.epoch.global": 0.8806535947712418, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9991883116883117, + "tokens_p.mean_in_band": 0.84921875, + "tokens_rate.above_band": 0.9908088235294118, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.009191176470588236 + }, + { + "epoch": 0.8170004260758414, + "grad_norm": 197.3993672538796, + "learning_rate": 3.9322839912179434e-07, + "loss": 0.2086, + "step": 3835, + "success_rate.epoch.env.abd": 0.9854651162790697, + "success_rate.epoch.env.agentgym:alfworld": 0.8833333333333333, + "success_rate.epoch.env.agentgym:sciworld": 0.9758064516129032, + "success_rate.epoch.env.agentgym:textcraft": 0.9807692307692307, + "success_rate.epoch.env.babyai": 0.975, + "success_rate.epoch.env.ded": 0.9514285714285714, + "success_rate.epoch.env.logic": 0.8973684210526316, + "success_rate.epoch.env.math": 0.9613793103448276, + "success_rate.epoch.env.sat": 0.1702127659574468, + "success_rate.epoch.env.science": 0.7940288493794029, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.8683214760185389, + "success_rate.epoch.global": 0.8808093994778068, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.996926883780332, + "tokens_p.mean_in_band": 0.7890625, + "tokens_rate.above_band": 0.9974522292993631, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0025477707006369425 + }, + { + "epoch": 0.818065615679591, + "grad_norm": 328.06152320211413, + "learning_rate": 3.932093767801478e-07, + "loss": 0.36, + "step": 3840, + "success_rate.epoch.env.abd": 0.9854651162790697, + "success_rate.epoch.env.agentgym:alfworld": 0.8833333333333333, + "success_rate.epoch.env.agentgym:sciworld": 0.9758064516129032, + "success_rate.epoch.env.agentgym:textcraft": 0.9807692307692307, + "success_rate.epoch.env.babyai": 0.975609756097561, + "success_rate.epoch.env.ded": 0.9514285714285714, + "success_rate.epoch.env.logic": 0.8976377952755905, + "success_rate.epoch.env.math": 0.9614325068870524, + "success_rate.epoch.env.sat": 0.1702127659574468, + "success_rate.epoch.env.science": 0.7942359249329759, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.8684250580563859, + "success_rate.epoch.global": 0.8809647979139504, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9992897727272727, + "tokens_p.mean_in_band": 0.88671875, + "tokens_rate.above_band": 0.995475113122172, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.004524886877828055 + }, + { + "epoch": 0.8191308052833405, + "grad_norm": 108.94169171376168, + "learning_rate": 3.931903302357461e-07, + "loss": 0.3125, + "step": 3845, + "success_rate.epoch.env.abd": 0.9855072463768116, + "success_rate.epoch.env.agentgym:alfworld": 0.8833333333333333, + "success_rate.epoch.env.agentgym:sciworld": 0.9758064516129032, + "success_rate.epoch.env.agentgym:textcraft": 0.9811320754716981, + "success_rate.epoch.env.babyai": 0.975609756097561, + "success_rate.epoch.env.ded": 0.9514285714285714, + "success_rate.epoch.env.logic": 0.8978165938864628, + "success_rate.epoch.env.math": 0.9614678899082569, + "success_rate.epoch.env.sat": 0.1702127659574468, + "success_rate.epoch.env.science": 0.7945113788487282, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.8685063862698439, + "success_rate.epoch.global": 0.8811197916666667, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9957414829659319, + "tokens_p.mean_in_band": 0.8453125, + "tokens_rate.above_band": 0.9900793650793651, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.00992063492063492 + }, + { + "epoch": 0.8201959948870899, + "grad_norm": 74.44546940479921, + "learning_rate": 3.931712595024703e-07, + "loss": 0.3465, + "step": 3850, + "success_rate.epoch.env.abd": 0.9855072463768116, + "success_rate.epoch.env.agentgym:alfworld": 0.8833333333333333, + "success_rate.epoch.env.agentgym:sciworld": 0.9758064516129032, + "success_rate.epoch.env.agentgym:textcraft": 0.9811320754716981, + "success_rate.epoch.env.babyai": 0.975609756097561, + "success_rate.epoch.env.ded": 0.9514285714285714, + "success_rate.epoch.env.logic": 0.8979057591623036, + "success_rate.epoch.env.math": 0.9610627576729271, + "success_rate.epoch.env.sat": 0.1702127659574468, + "success_rate.epoch.env.science": 0.7949231796927188, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.8685150984411621, + "success_rate.epoch.global": 0.8811443433029909, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.6666666666666666, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8888888888888888, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9909274193548387, + "tokens_p.mean_in_band": 0.6448863636363636, + "tokens_rate.above_band": 0.9337349397590361, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.06626506024096386 + }, + { + "epoch": 0.8212611844908394, + "grad_norm": 69.07461298775165, + "learning_rate": 3.931521645942189e-07, + "loss": 0.2801, + "step": 3855, + "success_rate.epoch.env.abd": 0.9855491329479769, + "success_rate.epoch.env.agentgym:alfworld": 0.8842975206611571, + "success_rate.epoch.env.agentgym:sciworld": 0.9759036144578314, + "success_rate.epoch.env.agentgym:textcraft": 0.9811320754716981, + "success_rate.epoch.env.babyai": 0.975609756097561, + "success_rate.epoch.env.ded": 0.9514285714285714, + "success_rate.epoch.env.logic": 0.8979057591623036, + "success_rate.epoch.env.math": 0.9611339734796525, + "success_rate.epoch.env.sat": 0.1702127659574468, + "success_rate.epoch.env.science": 0.7947263017356475, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.8686039688587598, + "success_rate.epoch.global": 0.8811688311688312, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.9, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9979042473919523, + "tokens_p.mean_in_band": 0.65625, + "tokens_rate.above_band": 0.9955489614243324, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.004451038575667656 + }, + { + "epoch": 0.8223263740945889, + "grad_norm": 201.19100104991378, + "learning_rate": 3.931330455249082e-07, + "loss": 0.3657, + "step": 3860, + "success_rate.epoch.env.abd": 0.9855491329479769, + "success_rate.epoch.env.agentgym:alfworld": 0.8842975206611571, + "success_rate.epoch.env.agentgym:sciworld": 0.9759036144578314, + "success_rate.epoch.env.agentgym:textcraft": 0.9811320754716981, + "success_rate.epoch.env.babyai": 0.975609756097561, + "success_rate.epoch.env.ded": 0.9515669515669516, + "success_rate.epoch.env.logic": 0.8979947689625108, + "success_rate.epoch.env.math": 0.9611872146118722, + "success_rate.epoch.env.sat": 0.1702127659574468, + "success_rate.epoch.env.science": 0.7944018660446518, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.8685999866205609, + "success_rate.epoch.global": 0.8810635538261997, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6, + "success_rate.window.env_macro_mean": 0.9, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9982585139318886, + "tokens_p.mean_in_band": 0.5830965909090909, + "tokens_rate.above_band": 0.9670658682634731, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03293413173652695 + }, + { + "epoch": 0.8233915636983383, + "grad_norm": 102.19170892886017, + "learning_rate": 3.9311390230847195e-07, + "loss": 0.3352, + "step": 3865, + "success_rate.epoch.env.abd": 0.9855907780979827, + "success_rate.epoch.env.agentgym:alfworld": 0.8842975206611571, + "success_rate.epoch.env.agentgym:sciworld": 0.9759036144578314, + "success_rate.epoch.env.agentgym:textcraft": 0.9811320754716981, + "success_rate.epoch.env.babyai": 0.975609756097561, + "success_rate.epoch.env.ded": 0.9515669515669516, + "success_rate.epoch.env.logic": 0.8979947689625108, + "success_rate.epoch.env.math": 0.9612579762989972, + "success_rate.epoch.env.sat": 0.1702127659574468, + "success_rate.epoch.env.science": 0.7944111776447106, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.8686110519330327, + "success_rate.epoch.global": 0.8810880829015544, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.995049504950495, + "tokens_p.mean_in_band": 0.6328125, + "tokens_rate.above_band": 0.926605504587156, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.07339449541284404 + }, + { + "epoch": 0.8244567533020878, + "grad_norm": 79.29959450182675, + "learning_rate": 3.930947349588618e-07, + "loss": 0.3906, + "step": 3870, + "success_rate.epoch.env.abd": 0.985632183908046, + "success_rate.epoch.env.agentgym:alfworld": 0.8842975206611571, + "success_rate.epoch.env.agentgym:sciworld": 0.976, + "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, + "success_rate.epoch.env.babyai": 0.975609756097561, + "success_rate.epoch.env.ded": 0.9515669515669516, + "success_rate.epoch.env.logic": 0.8982608695652174, + "success_rate.epoch.env.math": 0.9612932604735883, + "success_rate.epoch.env.sat": 0.1702127659574468, + "success_rate.epoch.env.science": 0.7942154255319149, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.8686649455718068, + "success_rate.epoch.global": 0.8811125485122898, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9966475095785441, + "tokens_p.mean_in_band": 0.621875, + "tokens_rate.above_band": 0.9936548223350253, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.006345177664974619 + }, + { + "epoch": 0.8255219429058372, + "grad_norm": 119.10976458053719, + "learning_rate": 3.930755434900465e-07, + "loss": 0.2456, + "step": 3875, + "success_rate.epoch.env.abd": 0.985632183908046, + "success_rate.epoch.env.agentgym:alfworld": 0.8842975206611571, + "success_rate.epoch.env.agentgym:sciworld": 0.976, + "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, + "success_rate.epoch.env.babyai": 0.975609756097561, + "success_rate.epoch.env.ded": 0.9517045454545454, + "success_rate.epoch.env.logic": 0.8983492615117289, + "success_rate.epoch.env.math": 0.9613636363636363, + "success_rate.epoch.env.sat": 0.1702127659574468, + "success_rate.epoch.env.science": 0.7944887118193891, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.868716731754682, + "success_rate.epoch.global": 0.881266149870801, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9959747229916898, + "tokens_p.mean_in_band": 0.7857142857142857, + "tokens_rate.above_band": 0.99039780521262, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.009602194787379973 + }, + { + "epoch": 0.8265871325095867, + "grad_norm": 32.38899961535907, + "learning_rate": 3.9305632791601284e-07, + "loss": 0.3963, + "step": 3880, + "success_rate.epoch.env.abd": 0.985632183908046, + "success_rate.epoch.env.agentgym:alfworld": 0.8842975206611571, + "success_rate.epoch.env.agentgym:sciworld": 0.976, + "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, + "success_rate.epoch.env.babyai": 0.975609756097561, + "success_rate.epoch.env.ded": 0.9518413597733711, + "success_rate.epoch.env.logic": 0.8984375, + "success_rate.epoch.env.math": 0.9614162505674081, + "success_rate.epoch.env.sat": 0.1702127659574468, + "success_rate.epoch.env.science": 0.7941663904540934, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.868712672267916, + "success_rate.epoch.global": 0.8811612903225806, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6, + "success_rate.window.env_macro_mean": 0.9, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.999060621242485, + "tokens_p.mean_in_band": 0.4990234375, + "tokens_rate.above_band": 0.9950149551345963, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.004985044865403789 + }, + { + "epoch": 0.8276523221133362, + "grad_norm": 124.78146306378007, + "learning_rate": 3.93037088250765e-07, + "loss": 0.4274, + "step": 3885, + "success_rate.epoch.env.abd": 0.985632183908046, + "success_rate.epoch.env.agentgym:alfworld": 0.8847736625514403, + "success_rate.epoch.env.agentgym:sciworld": 0.976, + "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, + "success_rate.epoch.env.babyai": 0.975609756097561, + "success_rate.epoch.env.ded": 0.9518413597733711, + "success_rate.epoch.env.logic": 0.8986135181975736, + "success_rate.epoch.env.math": 0.9614512471655329, + "success_rate.epoch.env.sat": 0.1702127659574468, + "success_rate.epoch.env.science": 0.7941760423560555, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.8687760185031835, + "success_rate.epoch.global": 0.8811855670103093, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9975235849056604, + "tokens_p.mean_in_band": 0.62939453125, + "tokens_rate.above_band": 0.9706959706959707, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.029304029304029304 + }, + { + "epoch": 0.8287175117170856, + "grad_norm": 116.54112186974726, + "learning_rate": 3.930178245083246e-07, + "loss": 0.6208, + "step": 3890, + "success_rate.epoch.env.abd": 0.9856733524355301, + "success_rate.epoch.env.agentgym:alfworld": 0.8852459016393442, + "success_rate.epoch.env.agentgym:sciworld": 0.976, + "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, + "success_rate.epoch.env.babyai": 0.975609756097561, + "success_rate.epoch.env.ded": 0.9518413597733711, + "success_rate.epoch.env.logic": 0.8986135181975736, + "success_rate.epoch.env.math": 0.9614861803352968, + "success_rate.epoch.env.sat": 0.1702127659574468, + "success_rate.epoch.env.science": 0.7939233817701453, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.8688028985212967, + "success_rate.epoch.global": 0.8810810810810811, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9976615646258503, + "tokens_p.mean_in_band": 0.5653782894736842, + "tokens_rate.above_band": 0.9586956521739131, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.041304347826086954 + }, + { + "epoch": 0.8297827013208351, + "grad_norm": 192.17809651681378, + "learning_rate": 3.9299853670273095e-07, + "loss": 0.3392, + "step": 3895, + "success_rate.epoch.env.abd": 0.9856733524355301, + "success_rate.epoch.env.agentgym:alfworld": 0.8852459016393442, + "success_rate.epoch.env.agentgym:sciworld": 0.976, + "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, + "success_rate.epoch.env.babyai": 0.975609756097561, + "success_rate.epoch.env.ded": 0.952112676056338, + "success_rate.epoch.env.logic": 0.8986135181975736, + "success_rate.epoch.env.math": 0.9615558570782451, + "success_rate.epoch.env.sat": 0.1702127659574468, + "success_rate.epoch.env.science": 0.7935356200527705, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.8687986468220729, + "success_rate.epoch.global": 0.8809768637532134, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.8333333333333334, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9964378238341969, + "tokens_p.mean_in_band": 0.5504261363636364, + "tokens_rate.above_band": 0.9722921914357683, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.027707808564231738 + }, + { + "epoch": 0.8308478909245846, + "grad_norm": 128.3557990554127, + "learning_rate": 3.9297922484804087e-07, + "loss": 0.5831, + "step": 3900, + "success_rate.epoch.env.abd": 0.9856733524355301, + "success_rate.epoch.env.agentgym:alfworld": 0.8857142857142857, + "success_rate.epoch.env.agentgym:sciworld": 0.976, + "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, + "success_rate.epoch.env.babyai": 0.975609756097561, + "success_rate.epoch.env.ded": 0.952112676056338, + "success_rate.epoch.env.logic": 0.8987012987012987, + "success_rate.epoch.env.math": 0.9615906009941256, + "success_rate.epoch.env.sat": 0.1702127659574468, + "success_rate.epoch.env.science": 0.7926267281105991, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.8687697392359254, + "success_rate.epoch.global": 0.8806161745827985, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.3333333333333333, + "success_rate.window.env_macro_mean": 0.8333333333333334, + "success_rate.window.global": 0.6, + "tokens_p.mean_above_band": 0.9958126550868487, + "tokens_p.mean_in_band": 0.5268229166666667, + "tokens_rate.above_band": 0.9641148325358851, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03588516746411483 + }, + { + "epoch": 0.831913080528334, + "grad_norm": 74.77893847425175, + "learning_rate": 3.929598889583288e-07, + "loss": 0.4409, + "step": 3905, + "success_rate.epoch.env.abd": 0.9856733524355301, + "success_rate.epoch.env.agentgym:alfworld": 0.8857142857142857, + "success_rate.epoch.env.agentgym:sciworld": 0.976, + "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, + "success_rate.epoch.env.babyai": 0.975609756097561, + "success_rate.epoch.env.ded": 0.952112676056338, + "success_rate.epoch.env.logic": 0.8987889273356401, + "success_rate.epoch.env.math": 0.9616771866546439, + "success_rate.epoch.env.sat": 0.1702127659574468, + "success_rate.epoch.env.science": 0.7922419460881, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.8687505967152306, + "success_rate.epoch.global": 0.8805128205128205, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.8333333333333334, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9925986842105263, + "tokens_p.mean_in_band": 0.5979567307692307, + "tokens_rate.above_band": 0.8976377952755905, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.10236220472440945 + }, + { + "epoch": 0.8329782701320835, + "grad_norm": 40.35857743506659, + "learning_rate": 3.9294052904768646e-07, + "loss": 0.2629, + "step": 3910, + "success_rate.epoch.env.abd": 0.9856733524355301, + "success_rate.epoch.env.agentgym:alfworld": 0.8857142857142857, + "success_rate.epoch.env.agentgym:sciworld": 0.976, + "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, + "success_rate.epoch.env.babyai": 0.975609756097561, + "success_rate.epoch.env.ded": 0.952247191011236, + "success_rate.epoch.env.logic": 0.8990509059534081, + "success_rate.epoch.env.math": 0.9613135402609086, + "success_rate.epoch.env.sat": 0.1702127659574468, + "success_rate.epoch.env.science": 0.7923102201774564, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.8687597895578022, + "success_rate.epoch.global": 0.8805377720870678, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.8, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 1.0001097775175645, + "tokens_p.mean_in_band": 0.609375, + "tokens_rate.above_band": 0.9964994165694282, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.003500583430571762 + }, + { + "epoch": 0.8340434597358329, + "grad_norm": 110.93402585348511, + "learning_rate": 3.929211451302233e-07, + "loss": 0.3101, + "step": 3915, + "success_rate.epoch.env.abd": 0.9856733524355301, + "success_rate.epoch.env.agentgym:alfworld": 0.8857142857142857, + "success_rate.epoch.env.agentgym:sciworld": 0.976, + "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, + "success_rate.epoch.env.babyai": 0.975609756097561, + "success_rate.epoch.env.ded": 0.952247191011236, + "success_rate.epoch.env.logic": 0.8984509466437177, + "success_rate.epoch.env.math": 0.9609164420485176, + "success_rate.epoch.env.sat": 0.1702127659574468, + "success_rate.epoch.env.science": 0.7922546767312111, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.8686640985606817, + "success_rate.epoch.global": 0.880306905370844, + "success_rate.window.env.logic": 0.6666666666666666, + "success_rate.window.env.math": 0.6666666666666666, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.6944444444444443, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9947321428571428, + "tokens_p.mean_in_band": 0.5212204391891891, + "tokens_rate.above_band": 0.9497964721845319, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.050203527815468114 + }, + { + "epoch": 0.8351086493395824, + "grad_norm": 86.40069497217755, + "learning_rate": 3.9290173722006613e-07, + "loss": 0.2324, + "step": 3920, + "success_rate.epoch.env.abd": 0.9857142857142858, + "success_rate.epoch.env.agentgym:alfworld": 0.8857142857142857, + "success_rate.epoch.env.agentgym:sciworld": 0.976, + "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, + "success_rate.epoch.env.babyai": 0.975609756097561, + "success_rate.epoch.env.ded": 0.952247191011236, + "success_rate.epoch.env.logic": 0.8986254295532646, + "success_rate.epoch.env.math": 0.9609865470852018, + "success_rate.epoch.env.sat": 0.1702127659574468, + "success_rate.epoch.env.science": 0.7924590163934426, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.8687086313686105, + "success_rate.epoch.global": 0.8804597701149425, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9937977099236641, + "tokens_p.mean_in_band": 0.875, + "tokens_rate.above_band": 0.9776119402985075, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.022388059701492536 + }, + { + "epoch": 0.8361738389433319, + "grad_norm": 136.23354918135206, + "learning_rate": 3.928823053313593e-07, + "loss": 0.3131, + "step": 3925, + "success_rate.epoch.env.abd": 0.9857142857142858, + "success_rate.epoch.env.agentgym:alfworld": 0.8857142857142857, + "success_rate.epoch.env.agentgym:sciworld": 0.976, + "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, + "success_rate.epoch.env.babyai": 0.975609756097561, + "success_rate.epoch.env.ded": 0.952513966480447, + "success_rate.epoch.env.logic": 0.8987124463519314, + "success_rate.epoch.env.math": 0.9605734767025089, + "success_rate.epoch.env.sat": 0.1702127659574468, + "success_rate.epoch.env.science": 0.7924713584288052, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.8687043644522966, + "success_rate.epoch.global": 0.8803571428571428, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.5, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.825, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 1.0000736160188457, + "tokens_p.mean_in_band": 0.6852463942307693, + "tokens_rate.above_band": 0.9849187935034803, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.015081206496519721 + }, + { + "epoch": 0.8372390285470814, + "grad_norm": 107.97998022558767, + "learning_rate": 3.9286284947826466e-07, + "loss": 0.3716, + "step": 3930, + "success_rate.epoch.env.abd": 0.9857142857142858, + "success_rate.epoch.env.agentgym:alfworld": 0.8861788617886179, + "success_rate.epoch.env.agentgym:sciworld": 0.9760956175298805, + "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, + "success_rate.epoch.env.babyai": 0.9761904761904762, + "success_rate.epoch.env.ded": 0.952513966480447, + "success_rate.epoch.env.logic": 0.8987993138936535, + "success_rate.epoch.env.math": 0.9606263982102908, + "success_rate.epoch.env.sat": 0.1702127659574468, + "success_rate.epoch.env.science": 0.7926749509483323, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.868839300385584, + "success_rate.epoch.global": 0.8805095541401274, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9977800546448088, + "tokens_p.mean_in_band": 0.81640625, + "tokens_rate.above_band": 0.9891891891891892, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.010810810810810811 + }, + { + "epoch": 0.8383042181508309, + "grad_norm": 90.9543254503011, + "learning_rate": 3.9284336967496144e-07, + "loss": 0.5249, + "step": 3935, + "success_rate.epoch.env.abd": 0.9857549857549858, + "success_rate.epoch.env.agentgym:alfworld": 0.8866396761133604, + "success_rate.epoch.env.agentgym:sciworld": 0.9762845849802372, + "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, + "success_rate.epoch.env.babyai": 0.9761904761904762, + "success_rate.epoch.env.ded": 0.952513966480447, + "success_rate.epoch.env.logic": 0.8987993138936535, + "success_rate.epoch.env.math": 0.9606791778373548, + "success_rate.epoch.env.sat": 0.1702127659574468, + "success_rate.epoch.env.science": 0.7928781443972558, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.8689253417393826, + "success_rate.epoch.global": 0.8806615776081425, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9974922839506173, + "tokens_p.mean_in_band": 0.875, + "tokens_rate.above_band": 0.997946611909651, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.002053388090349076 + }, + { + "epoch": 0.8393694077545804, + "grad_norm": 359.79030197159335, + "learning_rate": 3.9282386593564645e-07, + "loss": 0.321, + "step": 3940, + "success_rate.epoch.env.abd": 0.9857549857549858, + "success_rate.epoch.env.agentgym:alfworld": 0.8830645161290323, + "success_rate.epoch.env.agentgym:sciworld": 0.9762845849802372, + "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, + "success_rate.epoch.env.babyai": 0.9761904761904762, + "success_rate.epoch.env.ded": 0.952513966480447, + "success_rate.epoch.env.logic": 0.898972602739726, + "success_rate.epoch.env.math": 0.9606967396159, + "success_rate.epoch.env.sat": 0.1702127659574468, + "success_rate.epoch.env.science": 0.7929572872513857, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.8686248720570572, + "success_rate.epoch.global": 0.8805590851334181, + "success_rate.window.env.agentgym:alfworld": 0.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env_macro_mean": 0.7083333333333334, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9983753822629969, + "tokens_p.mean_in_band": 0.6539713541666666, + "tokens_rate.above_band": 0.9646017699115044, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.035398230088495575 + }, + { + "epoch": 0.8404345973583298, + "grad_norm": 97.41800296755565, + "learning_rate": 3.928043382745338e-07, + "loss": 0.2942, + "step": 3945, + "success_rate.epoch.env.abd": 0.9857549857549858, + "success_rate.epoch.env.agentgym:alfworld": 0.8830645161290323, + "success_rate.epoch.env.agentgym:sciworld": 0.9762845849802372, + "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, + "success_rate.epoch.env.babyai": 0.9761904761904762, + "success_rate.epoch.env.ded": 0.9527777777777777, + "success_rate.epoch.env.logic": 0.8982905982905983, + "success_rate.epoch.env.math": 0.9607318161535029, + "success_rate.epoch.env.sat": 0.1702127659574468, + "success_rate.epoch.env.science": 0.7932269619016606, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.8686145591512464, + "success_rate.epoch.global": 0.8805837563451777, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 1.000073877068558, + "tokens_p.mean_in_band": 0.5984002976190477, + "tokens_rate.above_band": 0.9757785467128027, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02422145328719723 + }, + { + "epoch": 0.8414997869620793, + "grad_norm": 101.13322897540706, + "learning_rate": 3.927847867058552e-07, + "loss": 0.4425, + "step": 3950, + "success_rate.epoch.env.abd": 0.9857549857549858, + "success_rate.epoch.env.agentgym:alfworld": 0.8835341365461847, + "success_rate.epoch.env.agentgym:sciworld": 0.9762845849802372, + "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, + "success_rate.epoch.env.babyai": 0.9761904761904762, + "success_rate.epoch.env.ded": 0.9527777777777777, + "success_rate.epoch.env.logic": 0.8983774551665243, + "success_rate.epoch.env.math": 0.9607843137254902, + "success_rate.epoch.env.sat": 0.1702127659574468, + "success_rate.epoch.env.science": 0.7929128738621587, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.8686413670444796, + "success_rate.epoch.global": 0.8804816223067173, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6, + "success_rate.window.env_macro_mean": 0.9, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9952008928571429, + "tokens_p.mean_in_band": 0.6100260416666666, + "tokens_rate.above_band": 0.958904109589041, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0410958904109589 + }, + { + "epoch": 0.8425649765658287, + "grad_norm": 45.307086293373196, + "learning_rate": 3.9276521124385966e-07, + "loss": 0.2964, + "step": 3955, + "success_rate.epoch.env.abd": 0.9857549857549858, + "success_rate.epoch.env.agentgym:alfworld": 0.8849206349206349, + "success_rate.epoch.env.agentgym:sciworld": 0.9762845849802372, + "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, + "success_rate.epoch.env.babyai": 0.9761904761904762, + "success_rate.epoch.env.ded": 0.9527777777777777, + "success_rate.epoch.env.logic": 0.8976982097186701, + "success_rate.epoch.env.math": 0.9608017817371938, + "success_rate.epoch.env.sat": 0.1702127659574468, + "success_rate.epoch.env.science": 0.7928571428571428, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.8687021843111417, + "success_rate.epoch.global": 0.8803797468354431, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.8125, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9978929731925265, + "tokens_p.mean_in_band": 0.4717741935483871, + "tokens_rate.above_band": 0.9754358161648178, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02456418383518225 + }, + { + "epoch": 0.8436301661695782, + "grad_norm": 184.76332978411142, + "learning_rate": 3.927456119028136e-07, + "loss": 0.234, + "step": 3960, + "success_rate.epoch.env.abd": 0.9857549857549858, + "success_rate.epoch.env.agentgym:alfworld": 0.8814229249011858, + "success_rate.epoch.env.agentgym:sciworld": 0.9762845849802372, + "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, + "success_rate.epoch.env.babyai": 0.9761904761904762, + "success_rate.epoch.env.ded": 0.9529085872576177, + "success_rate.epoch.env.logic": 0.8977853492333902, + "success_rate.epoch.env.math": 0.9608540925266904, + "success_rate.epoch.env.sat": 0.1702127659574468, + "success_rate.epoch.env.science": 0.7928015564202334, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.8684037264318415, + "success_rate.epoch.global": 0.8802781289506953, + "success_rate.window.env.agentgym:alfworld": 0.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.75, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9985655737704918, + "tokens_p.mean_below_band": 4.6798959374427795e-08, + "tokens_p.mean_in_band": 0.58203125, + "tokens_rate.above_band": 0.9775641025641025, + "tokens_rate.below_band": 0.003205128205128205, + "tokens_rate.in_band": 0.019230769230769232 + }, + { + "epoch": 0.8446953557733277, + "grad_norm": 156.50849508386275, + "learning_rate": 3.927259886970009e-07, + "loss": 0.3372, + "step": 3965, + "success_rate.epoch.env.abd": 0.9857549857549858, + "success_rate.epoch.env.agentgym:alfworld": 0.8814229249011858, + "success_rate.epoch.env.agentgym:sciworld": 0.9762845849802372, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9761904761904762, + "success_rate.epoch.env.ded": 0.9529085872576177, + "success_rate.epoch.env.logic": 0.8977853492333902, + "success_rate.epoch.env.math": 0.9608888888888889, + "success_rate.epoch.env.sat": 0.1702127659574468, + "success_rate.epoch.env.science": 0.7932707861533485, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.868480156107479, + "success_rate.epoch.global": 0.880429292929293, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9926581325301205, + "tokens_p.mean_in_band": 0.72265625, + "tokens_rate.above_band": 0.9880952380952381, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.011904761904761904 + }, + { + "epoch": 0.8457605453770771, + "grad_norm": 61.34322900107282, + "learning_rate": 3.9270634164072287e-07, + "loss": 0.3148, + "step": 3970, + "success_rate.epoch.env.abd": 0.9857954545454546, + "success_rate.epoch.env.agentgym:alfworld": 0.8814229249011858, + "success_rate.epoch.env.agentgym:sciworld": 0.9762845849802372, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9767441860465116, + "success_rate.epoch.env.ded": 0.9529085872576177, + "success_rate.epoch.env.logic": 0.8979591836734694, + "success_rate.epoch.env.math": 0.9609409675987572, + "success_rate.epoch.env.sat": 0.1702127659574468, + "success_rate.epoch.env.science": 0.793148028442146, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.8685435501152289, + "success_rate.epoch.global": 0.880453972257251, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.99811872909699, + "tokens_p.mean_in_band": 0.5481770833333334, + "tokens_rate.above_band": 0.9900662251655629, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.009933774834437087 + }, + { + "epoch": 0.8468257349808266, + "grad_norm": 141.95414518239207, + "learning_rate": 3.92686670748298e-07, + "loss": 0.4391, + "step": 3975, + "success_rate.epoch.env.abd": 0.9857954545454546, + "success_rate.epoch.env.agentgym:alfworld": 0.8814229249011858, + "success_rate.epoch.env.agentgym:sciworld": 0.9762845849802372, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9767441860465116, + "success_rate.epoch.env.ded": 0.9530386740331491, + "success_rate.epoch.env.logic": 0.8980458793542906, + "success_rate.epoch.env.math": 0.9601240584847143, + "success_rate.epoch.env.sat": 0.1702127659574468, + "success_rate.epoch.env.science": 0.7934151065203358, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.8685132729716379, + "success_rate.epoch.global": 0.8803526448362721, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.5, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9955533596837944, + "tokens_p.mean_below_band": 4.602043190971017e-10, + "tokens_p.mean_in_band": 0.72607421875, + "tokens_rate.above_band": 0.9656488549618321, + "tokens_rate.below_band": 0.003816793893129771, + "tokens_rate.in_band": 0.030534351145038167 + }, + { + "epoch": 0.847890924584576, + "grad_norm": 169.26328746822526, + "learning_rate": 3.9266697603406245e-07, + "loss": 0.2742, + "step": 3980, + "success_rate.epoch.env.abd": 0.9858757062146892, + "success_rate.epoch.env.agentgym:alfworld": 0.8818897637795275, + "success_rate.epoch.env.agentgym:sciworld": 0.9762845849802372, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9767441860465116, + "success_rate.epoch.env.ded": 0.9530386740331491, + "success_rate.epoch.env.logic": 0.8982188295165394, + "success_rate.epoch.env.math": 0.9601769911504425, + "success_rate.epoch.env.sat": 0.1702127659574468, + "success_rate.epoch.env.science": 0.7935483870967742, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.8685956596945463, + "success_rate.epoch.global": 0.8805031446540881, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9989608076009501, + "tokens_p.mean_in_band": 0.83203125, + "tokens_rate.above_band": 0.9952718676122931, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.004728132387706856 + }, + { + "epoch": 0.8489561141883255, + "grad_norm": 100.70757867506678, + "learning_rate": 3.9264725751236945e-07, + "loss": 0.4345, + "step": 3985, + "success_rate.epoch.env.abd": 0.9858757062146892, + "success_rate.epoch.env.agentgym:alfworld": 0.8818897637795275, + "success_rate.epoch.env.agentgym:sciworld": 0.9762845849802372, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9767441860465116, + "success_rate.epoch.env.ded": 0.9505494505494505, + "success_rate.epoch.env.logic": 0.8974576271186441, + "success_rate.epoch.env.math": 0.9602297834732656, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.7931034482758621, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.8679421440881406, + "success_rate.epoch.global": 0.8800251256281407, + "success_rate.window.env.ded": 0.5, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 0.3333333333333333, + "success_rate.window.env_macro_mean": 0.36666666666666664, + "success_rate.window.global": 0.5, + "tokens_p.mean_above_band": 0.9952293882978723, + "tokens_p.mean_in_band": 0.686374470338983, + "tokens_rate.above_band": 0.9409409409409409, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.05905905905905906 + }, + { + "epoch": 0.850021303792075, + "grad_norm": 116.7782777363338, + "learning_rate": 3.9262751519758984e-07, + "loss": 0.3873, + "step": 3990, + "success_rate.epoch.env.abd": 0.9859550561797753, + "success_rate.epoch.env.agentgym:alfworld": 0.8818897637795275, + "success_rate.epoch.env.agentgym:sciworld": 0.9762845849802372, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9767441860465116, + "success_rate.epoch.env.ded": 0.9505494505494505, + "success_rate.epoch.env.logic": 0.8976311336717429, + "success_rate.epoch.env.math": 0.9602649006622517, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.7930479562278725, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.8679632787844298, + "success_rate.epoch.global": 0.8800501882057716, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9375, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9964953271028038, + "tokens_p.mean_in_band": 0.3697916666666667, + "tokens_rate.above_band": 0.9727272727272728, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02727272727272727 + }, + { + "epoch": 0.8510864933958244, + "grad_norm": 43.19740481601878, + "learning_rate": 3.926077491041116e-07, + "loss": 0.2786, + "step": 3995, + "success_rate.epoch.env.abd": 0.9859550561797753, + "success_rate.epoch.env.agentgym:alfworld": 0.8818897637795275, + "success_rate.epoch.env.agentgym:sciworld": 0.9763779527559056, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9767441860465116, + "success_rate.epoch.env.ded": 0.9505494505494505, + "success_rate.epoch.env.logic": 0.8976311336717429, + "success_rate.epoch.env.math": 0.9603174603174603, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.7934468358496627, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.8680128066983087, + "success_rate.epoch.global": 0.8802005012531329, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9934593023255814, + "tokens_p.mean_in_band": 0.78466796875, + "tokens_rate.above_band": 0.9555555555555556, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.044444444444444446 + }, + { + "epoch": 0.8521516829995739, + "grad_norm": 1092.510596362934, + "learning_rate": 3.9258795924634016e-07, + "loss": 0.2843, + "step": 4000, + "success_rate.epoch.env.abd": 0.9859550561797753, + "success_rate.epoch.env.agentgym:alfworld": 0.8823529411764706, + "success_rate.epoch.env.agentgym:sciworld": 0.9764705882352941, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9767441860465116, + "success_rate.epoch.env.ded": 0.9508196721311475, + "success_rate.epoch.env.logic": 0.8977176669484361, + "success_rate.epoch.env.math": 0.9603174603174603, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.7937780628608082, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.868125878947933, + "success_rate.epoch.global": 0.8803504380475594, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9964699074074074, + "tokens_p.mean_in_band": 0.7330729166666666, + "tokens_rate.above_band": 0.989010989010989, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.01098901098901099 + }, + { + "epoch": 0.8532168726033234, + "grad_norm": 72.34301840217742, + "learning_rate": 3.925681456386981e-07, + "loss": 0.413, + "step": 4005, + "success_rate.epoch.env.abd": 0.9859550561797753, + "success_rate.epoch.env.agentgym:alfworld": 0.8832684824902723, + "success_rate.epoch.env.agentgym:sciworld": 0.9764705882352941, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9767441860465116, + "success_rate.epoch.env.ded": 0.9508196721311475, + "success_rate.epoch.env.logic": 0.8962900505902193, + "success_rate.epoch.env.math": 0.9603349493168797, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.7940422805893658, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.8681049363737113, + "success_rate.epoch.global": 0.88025, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 0.3333333333333333, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8333333333333334, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9988313990973565, + "tokens_p.mean_in_band": 0.5166311553030303, + "tokens_rate.above_band": 0.9591836734693877, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.04081632653061224 + }, + { + "epoch": 0.8542820622070728, + "grad_norm": 106.61756173721486, + "learning_rate": 3.925483082956257e-07, + "loss": 0.3229, + "step": 4010, + "success_rate.epoch.env.abd": 0.9859550561797753, + "success_rate.epoch.env.agentgym:alfworld": 0.8832684824902723, + "success_rate.epoch.env.agentgym:sciworld": 0.9765625, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9767441860465116, + "success_rate.epoch.env.ded": 0.9509536784741145, + "success_rate.epoch.env.logic": 0.8962900505902193, + "success_rate.epoch.env.math": 0.9604047514298284, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.7936660268714012, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.8680976151466802, + "success_rate.epoch.global": 0.8801498127340824, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9971543874172185, + "tokens_p.mean_in_band": 0.6640625, + "tokens_rate.above_band": 0.9885433715220949, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.011456628477905073 + }, + { + "epoch": 0.8553472518108223, + "grad_norm": 58.6507490021795, + "learning_rate": 3.9252844723158e-07, + "loss": 0.2494, + "step": 4015, + "success_rate.epoch.env.abd": 0.9859943977591037, + "success_rate.epoch.env.agentgym:alfworld": 0.8832684824902723, + "success_rate.epoch.env.agentgym:sciworld": 0.9765625, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9767441860465116, + "success_rate.epoch.env.ded": 0.9510869565217391, + "success_rate.epoch.env.logic": 0.8963774220724515, + "success_rate.epoch.env.math": 0.9604221635883905, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.7937420178799489, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.8681297418990707, + "success_rate.epoch.global": 0.8801745635910224, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env_macro_mean": 0.9666666666666668, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9916914682539683, + "tokens_p.mean_in_band": 0.61328125, + "tokens_rate.above_band": 0.9882352941176471, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.011764705882352941 + }, + { + "epoch": 0.8564124414145718, + "grad_norm": 137.65795026171776, + "learning_rate": 3.925085624610358e-07, + "loss": 0.4212, + "step": 4020, + "success_rate.epoch.env.abd": 0.9859943977591037, + "success_rate.epoch.env.agentgym:alfworld": 0.8832684824902723, + "success_rate.epoch.env.agentgym:sciworld": 0.9765625, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9767441860465116, + "success_rate.epoch.env.ded": 0.9510869565217391, + "success_rate.epoch.env.logic": 0.896551724137931, + "success_rate.epoch.env.math": 0.9604916593503073, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.793939393939394, + "success_rate.epoch.env.webshop": 0.9772727272727273, + "success_rate.epoch.env_macro_mean": 0.8682178978184395, + "success_rate.epoch.global": 0.8803237858032379, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9964345637583892, + "tokens_p.mean_in_band": 0.75390625, + "tokens_rate.above_band": 0.9490445859872612, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.050955414012738856 + }, + { + "epoch": 0.8574776310183213, + "grad_norm": 203.70698780465727, + "learning_rate": 3.924886539984848e-07, + "loss": 0.3429, + "step": 4025, + "success_rate.epoch.env.abd": 0.9860335195530726, + "success_rate.epoch.env.agentgym:alfworld": 0.8832684824902723, + "success_rate.epoch.env.agentgym:sciworld": 0.9765625, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9767441860465116, + "success_rate.epoch.env.ded": 0.9510869565217391, + "success_rate.epoch.env.logic": 0.896551724137931, + "success_rate.epoch.env.math": 0.9605781865965834, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.7942019751513221, + "success_rate.epoch.env.webshop": 0.9772727272727273, + "success_rate.epoch.env_macro_mean": 0.8682531914777278, + "success_rate.epoch.global": 0.8804726368159204, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9917929292929293, + "tokens_p.mean_in_band": 0.85546875, + "tokens_rate.above_band": 0.9801980198019802, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.019801980198019802 + }, + { + "epoch": 0.8585428206220708, + "grad_norm": 89.44674044654744, + "learning_rate": 3.9246872185843627e-07, + "loss": 0.2971, + "step": 4030, + "success_rate.epoch.env.abd": 0.9860335195530726, + "success_rate.epoch.env.agentgym:alfworld": 0.8832684824902723, + "success_rate.epoch.env.agentgym:sciworld": 0.9765625, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9767441860465116, + "success_rate.epoch.env.ded": 0.9513513513513514, + "success_rate.epoch.env.logic": 0.8966386554621849, + "success_rate.epoch.env.math": 0.9601924759405074, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.7944638880050907, + "success_rate.epoch.env.webshop": 0.9772727272727273, + "success_rate.epoch.env_macro_mean": 0.868273875873324, + "success_rate.epoch.global": 0.8804968944099378, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.6666666666666666, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9964303607214429, + "tokens_p.mean_in_band": 0.4097222222222222, + "tokens_rate.above_band": 0.9822834645669292, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.017716535433070866 + }, + { + "epoch": 0.8596080102258202, + "grad_norm": 578.7139223438281, + "learning_rate": 3.9244876605541657e-07, + "loss": 0.2879, + "step": 4035, + "success_rate.epoch.env.abd": 0.9860335195530726, + "success_rate.epoch.env.agentgym:alfworld": 0.8837209302325582, + "success_rate.epoch.env.agentgym:sciworld": 0.9765625, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9767441860465116, + "success_rate.epoch.env.ded": 0.9513513513513514, + "success_rate.epoch.env.logic": 0.8966386554621849, + "success_rate.epoch.env.math": 0.959825327510917, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.7941550190597204, + "success_rate.epoch.env.webshop": 0.9772727272727273, + "success_rate.epoch.env_macro_mean": 0.8682535513612627, + "success_rate.epoch.global": 0.880272952853598, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.math": 0.75, + "success_rate.window.env.science": 0.6, + "success_rate.window.env_macro_mean": 0.7833333333333333, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9986111111111111, + "tokens_p.mean_in_band": 0.44389204545454547, + "tokens_rate.above_band": 0.9703504043126685, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.029649595687331536 + }, + { + "epoch": 0.8606731998295697, + "grad_norm": 74.01880630776523, + "learning_rate": 3.924287866039694e-07, + "loss": 0.3464, + "step": 4040, + "success_rate.epoch.env.abd": 0.9860335195530726, + "success_rate.epoch.env.agentgym:alfworld": 0.8841698841698842, + "success_rate.epoch.env.agentgym:sciworld": 0.9766536964980544, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9767441860465116, + "success_rate.epoch.env.ded": 0.9513513513513514, + "success_rate.epoch.env.logic": 0.8966386554621849, + "success_rate.epoch.env.math": 0.959895379250218, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.7944162436548223, + "success_rate.epoch.env.webshop": 0.9772727272727273, + "success_rate.epoch.env_macro_mean": 0.8683327719766976, + "success_rate.epoch.global": 0.8804213135068154, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9967657930107527, + "tokens_p.mean_in_band": 0.7330729166666666, + "tokens_rate.above_band": 0.992, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.008 + }, + { + "epoch": 0.8617383894333192, + "grad_norm": 92.83759227187693, + "learning_rate": 3.924087835186555e-07, + "loss": 0.1937, + "step": 4045, + "success_rate.epoch.env.abd": 0.9860335195530726, + "success_rate.epoch.env.agentgym:alfworld": 0.8846153846153846, + "success_rate.epoch.env.agentgym:sciworld": 0.9767441860465116, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9767441860465116, + "success_rate.epoch.env.ded": 0.9513513513513514, + "success_rate.epoch.env.logic": 0.8959731543624161, + "success_rate.epoch.env.math": 0.9599303135888502, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.7943599493029151, + "success_rate.epoch.env.webshop": 0.9772727272727273, + "success_rate.epoch.env_macro_mean": 0.8683190564204171, + "success_rate.epoch.global": 0.8803217821782178, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.85, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9970163316582915, + "tokens_p.mean_in_band": 0.6305803571428571, + "tokens_rate.above_band": 0.9770867430441899, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.022913256955810146 + }, + { + "epoch": 0.8628035790370686, + "grad_norm": 129.2356792180417, + "learning_rate": 3.923887568140532e-07, + "loss": 0.2815, + "step": 4050, + "success_rate.epoch.env.abd": 0.9860724233983287, + "success_rate.epoch.env.agentgym:alfworld": 0.8846153846153846, + "success_rate.epoch.env.agentgym:sciworld": 0.9767441860465116, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9767441860465116, + "success_rate.epoch.env.ded": 0.9513513513513514, + "success_rate.epoch.env.logic": 0.8960603520536463, + "success_rate.epoch.env.math": 0.9595827900912647, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.7945552389996834, + "success_rate.epoch.env.webshop": 0.9772727272727273, + "success_rate.epoch.env_macro_mean": 0.8683166807600233, + "success_rate.epoch.global": 0.8803461063040791, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.8, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9975887345679012, + "tokens_p.mean_in_band": 0.662890625, + "tokens_rate.above_band": 0.9418604651162791, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.05813953488372093 + }, + { + "epoch": 0.8638687686408181, + "grad_norm": 447.11940720490844, + "learning_rate": 3.9236870650475755e-07, + "loss": 0.2807, + "step": 4055, + "success_rate.epoch.env.abd": 0.9860724233983287, + "success_rate.epoch.env.agentgym:alfworld": 0.8846153846153846, + "success_rate.epoch.env.agentgym:sciworld": 0.9730769230769231, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9767441860465116, + "success_rate.epoch.env.ded": 0.9514824797843666, + "success_rate.epoch.env.logic": 0.8961474036850922, + "success_rate.epoch.env.math": 0.9596529284164859, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.7946852261942423, + "success_rate.epoch.env.webshop": 0.9772727272727273, + "success_rate.epoch.env_macro_mean": 0.868021320997719, + "success_rate.epoch.global": 0.8803703703703704, + "success_rate.window.env.agentgym:sciworld": 0.5, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9970817120622568, + "tokens_p.mean_in_band": 0.5904947916666666, + "tokens_rate.above_band": 0.9941972920696325, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.005802707930367505 + }, + { + "epoch": 0.8649339582445675, + "grad_norm": 418.50623340237587, + "learning_rate": 3.9234863260538133e-07, + "loss": 0.4336, + "step": 4060, + "success_rate.epoch.env.abd": 0.9860724233983287, + "success_rate.epoch.env.agentgym:alfworld": 0.8812260536398467, + "success_rate.epoch.env.agentgym:sciworld": 0.9731800766283525, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9767441860465116, + "success_rate.epoch.env.ded": 0.9514824797843666, + "success_rate.epoch.env.logic": 0.8963210702341137, + "success_rate.epoch.env.math": 0.9596879063719116, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.7943127962085308, + "success_rate.epoch.env.webshop": 0.9772727272727273, + "success_rate.epoch.env_macro_mean": 0.8677076880063215, + "success_rate.epoch.global": 0.8801479654747225, + "success_rate.window.env.agentgym:alfworld": 0.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.7, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9975684438040345, + "tokens_p.mean_in_band": 0.659912109375, + "tokens_rate.above_band": 0.9774647887323944, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.022535211267605635 + }, + { + "epoch": 0.865999147848317, + "grad_norm": 78.41672080751667, + "learning_rate": 3.9232853513055403e-07, + "loss": 0.3121, + "step": 4065, + "success_rate.epoch.env.abd": 0.9861495844875346, + "success_rate.epoch.env.agentgym:alfworld": 0.8816793893129771, + "success_rate.epoch.env.agentgym:sciworld": 0.9731800766283525, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9767441860465116, + "success_rate.epoch.env.ded": 0.9514824797843666, + "success_rate.epoch.env.logic": 0.8964076858813701, + "success_rate.epoch.env.math": 0.9596879063719116, + "success_rate.epoch.env.sat": 0.16326530612244897, + "success_rate.epoch.env.science": 0.7945724203218681, + "success_rate.epoch.env.webshop": 0.9777777777777777, + "success_rate.epoch.env_macro_mean": 0.8675240904139364, + "success_rate.epoch.global": 0.8801724137931034, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 0.8333333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.998042656587473, + "tokens_p.mean_in_band": 0.7019230769230769, + "tokens_rate.above_band": 0.9726890756302521, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0273109243697479 + }, + { + "epoch": 0.8670643374520665, + "grad_norm": 60.0318132659138, + "learning_rate": 3.923084140949227e-07, + "loss": 0.3531, + "step": 4070, + "success_rate.epoch.env.abd": 0.9861495844875346, + "success_rate.epoch.env.agentgym:alfworld": 0.8816793893129771, + "success_rate.epoch.env.agentgym:sciworld": 0.9731800766283525, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9767441860465116, + "success_rate.epoch.env.ded": 0.9514824797843666, + "success_rate.epoch.env.logic": 0.8967527060782681, + "success_rate.epoch.env.math": 0.9597053726169844, + "success_rate.epoch.env.sat": 0.16326530612244897, + "success_rate.epoch.env.science": 0.7948960302457467, + "success_rate.epoch.env.webshop": 0.9777777777777777, + "success_rate.epoch.env_macro_mean": 0.8675864628108318, + "success_rate.epoch.global": 0.8803198031980319, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9891732283464567, + "tokens_p.mean_in_band": 0.8234375, + "tokens_rate.above_band": 0.927007299270073, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.072992700729927 + }, + { + "epoch": 0.8681295270558159, + "grad_norm": 139.29665278406733, + "learning_rate": 3.9228826951315135e-07, + "loss": 0.3499, + "step": 4075, + "success_rate.epoch.env.abd": 0.9861495844875346, + "success_rate.epoch.env.agentgym:alfworld": 0.8816793893129771, + "success_rate.epoch.env.agentgym:sciworld": 0.9731800766283525, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9767441860465116, + "success_rate.epoch.env.ded": 0.9514824797843666, + "success_rate.epoch.env.logic": 0.8967527060782681, + "success_rate.epoch.env.math": 0.9593777009507347, + "success_rate.epoch.env.sat": 0.16326530612244897, + "success_rate.epoch.env.science": 0.7942101950912523, + "success_rate.epoch.env.webshop": 0.9777777777777777, + "success_rate.epoch.env_macro_mean": 0.8674943258271278, + "success_rate.epoch.global": 0.87997542997543, + "success_rate.window.env.math": 0.8333333333333334, + "success_rate.window.env.science": 0.25, + "success_rate.window.env_macro_mean": 0.5416666666666667, + "success_rate.window.global": 0.6, + "tokens_p.mean_above_band": 0.9901315789473685, + "tokens_p.mean_in_band": 0.5724909855769231, + "tokens_rate.above_band": 0.8142857142857143, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.18571428571428572 + }, + { + "epoch": 0.8691947166595654, + "grad_norm": 85.80021342783449, + "learning_rate": 3.9226810139992115e-07, + "loss": 0.3224, + "step": 4080, + "success_rate.epoch.env.abd": 0.9861878453038674, + "success_rate.epoch.env.agentgym:alfworld": 0.8821292775665399, + "success_rate.epoch.env.agentgym:sciworld": 0.9731800766283525, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9767441860465116, + "success_rate.epoch.env.ded": 0.9514824797843666, + "success_rate.epoch.env.logic": 0.8961794019933554, + "success_rate.epoch.env.math": 0.959412780656304, + "success_rate.epoch.env.sat": 0.16326530612244897, + "success_rate.epoch.env.science": 0.7944042753850991, + "success_rate.epoch.env.webshop": 0.9777777777777777, + "success_rate.epoch.env_macro_mean": 0.8675074171893459, + "success_rate.epoch.global": 0.88, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 0.6666666666666666, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.998291015625, + "tokens_p.mean_in_band": 0.4763327205882353, + "tokens_rate.above_band": 0.978343949044586, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02165605095541401 + }, + { + "epoch": 0.8702599062633148, + "grad_norm": 190.94003553549646, + "learning_rate": 3.9224790976993063e-07, + "loss": 0.4686, + "step": 4085, + "success_rate.epoch.env.abd": 0.9861878453038674, + "success_rate.epoch.env.agentgym:alfworld": 0.8825757575757576, + "success_rate.epoch.env.agentgym:sciworld": 0.9731800766283525, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9767441860465116, + "success_rate.epoch.env.ded": 0.9516129032258065, + "success_rate.epoch.env.logic": 0.896351575456053, + "success_rate.epoch.env.math": 0.9594652867615352, + "success_rate.epoch.env.sat": 0.16326530612244897, + "success_rate.epoch.env.science": 0.7939698492462312, + "success_rate.epoch.env.webshop": 0.9777777777777777, + "success_rate.epoch.env_macro_mean": 0.867540795087502, + "success_rate.epoch.global": 0.8799019607843137, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.3333333333333333, + "success_rate.window.env_macro_mean": 0.8666666666666666, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9973908918406073, + "tokens_p.mean_in_band": 0.53359375, + "tokens_rate.above_band": 0.9723247232472325, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.027675276752767528 + }, + { + "epoch": 0.8713250958670643, + "grad_norm": 164.87786367897124, + "learning_rate": 3.922276946378952e-07, + "loss": 0.2685, + "step": 4090, + "success_rate.epoch.env.abd": 0.9861878453038674, + "success_rate.epoch.env.agentgym:alfworld": 0.8825757575757576, + "success_rate.epoch.env.agentgym:sciworld": 0.9731800766283525, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9767441860465116, + "success_rate.epoch.env.ded": 0.9517426273458445, + "success_rate.epoch.env.logic": 0.896351575456053, + "success_rate.epoch.env.math": 0.9595350839431769, + "success_rate.epoch.env.sat": 0.16326530612244897, + "success_rate.epoch.env.science": 0.794228356336261, + "success_rate.epoch.env.webshop": 0.9782608695652174, + "success_rate.epoch.env_macro_mean": 0.8676263514674247, + "success_rate.epoch.global": 0.8800489596083231, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9975837628865979, + "tokens_p.mean_in_band": 0.8454241071428571, + "tokens_rate.above_band": 0.9910600255427842, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.008939974457215836 + }, + { + "epoch": 0.8723902854708138, + "grad_norm": 90.08712578404828, + "learning_rate": 3.922074560185474e-07, + "loss": 0.2735, + "step": 4095, + "success_rate.epoch.env.abd": 0.9863013698630136, + "success_rate.epoch.env.agentgym:alfworld": 0.8825757575757576, + "success_rate.epoch.env.agentgym:sciworld": 0.9731800766283525, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9767441860465116, + "success_rate.epoch.env.ded": 0.952, + "success_rate.epoch.env.logic": 0.896351575456053, + "success_rate.epoch.env.math": 0.959552495697074, + "success_rate.epoch.env.sat": 0.16326530612244897, + "success_rate.epoch.env.science": 0.7944218113444061, + "success_rate.epoch.env.webshop": 0.9787234042553191, + "success_rate.epoch.env_macro_mean": 0.8677212877097381, + "success_rate.epoch.global": 0.8801955990220048, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9999055177626606, + "tokens_p.mean_in_band": 0.8828125, + "tokens_rate.above_band": 0.9992447129909365, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0007552870090634441 + }, + { + "epoch": 0.8734554750745632, + "grad_norm": 158.03905301086172, + "learning_rate": 3.921871939266372e-07, + "loss": 0.4088, + "step": 4100, + "success_rate.epoch.env.abd": 0.9863013698630136, + "success_rate.epoch.env.agentgym:alfworld": 0.8825757575757576, + "success_rate.epoch.env.agentgym:sciworld": 0.9731800766283525, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9767441860465116, + "success_rate.epoch.env.ded": 0.9521276595744681, + "success_rate.epoch.env.logic": 0.8956089478044739, + "success_rate.epoch.env.math": 0.959604641168887, + "success_rate.epoch.env.sat": 0.16326530612244897, + "success_rate.epoch.env.science": 0.7934918648310388, + "success_rate.epoch.env.webshop": 0.9787234042553191, + "success_rate.epoch.env_macro_mean": 0.8675855814262231, + "success_rate.epoch.global": 0.8797313797313797, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.2, + "success_rate.window.env_macro_mean": 0.55, + "success_rate.window.global": 0.5, + "tokens_p.mean_above_band": 0.9980680868838764, + "tokens_p.mean_below_band": 1.8189894035458565e-09, + "tokens_p.mean_in_band": 0.49518229166666666, + "tokens_rate.above_band": 0.9747557003257329, + "tokens_rate.below_band": 0.0008143322475570033, + "tokens_rate.in_band": 0.024429967426710098 + }, + { + "epoch": 0.8745206646783127, + "grad_norm": 111.59912226078207, + "learning_rate": 3.9216690837693136e-07, + "loss": 0.3482, + "step": 4105, + "success_rate.epoch.env.abd": 0.9863387978142076, + "success_rate.epoch.env.agentgym:alfworld": 0.8830188679245283, + "success_rate.epoch.env.agentgym:sciworld": 0.9731800766283525, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9767441860465116, + "success_rate.epoch.env.ded": 0.9521276595744681, + "success_rate.epoch.env.logic": 0.8950413223140495, + "success_rate.epoch.env.math": 0.9596739596739596, + "success_rate.epoch.env.sat": 0.16326530612244897, + "success_rate.epoch.env.science": 0.7935564591804817, + "success_rate.epoch.env.webshop": 0.9787234042553191, + "success_rate.epoch.env_macro_mean": 0.8675898383047734, + "success_rate.epoch.global": 0.8797560975609756, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 0.6666666666666666, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9971556886227545, + "tokens_p.mean_in_band": 0.5600328947368421, + "tokens_rate.above_band": 0.977751756440281, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02224824355971897 + }, + { + "epoch": 0.8755858542820623, + "grad_norm": 90.48979363370822, + "learning_rate": 3.921465993842138e-07, + "loss": 0.4444, + "step": 4110, + "success_rate.epoch.env.abd": 0.9863387978142076, + "success_rate.epoch.env.agentgym:alfworld": 0.8830188679245283, + "success_rate.epoch.env.agentgym:sciworld": 0.9731800766283525, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9767441860465116, + "success_rate.epoch.env.ded": 0.9521276595744681, + "success_rate.epoch.env.logic": 0.8952145214521452, + "success_rate.epoch.env.math": 0.9597602739726028, + "success_rate.epoch.env.sat": 0.16326530612244897, + "success_rate.epoch.env.science": 0.793125, + "success_rate.epoch.env.webshop": 0.9787234042553191, + "success_rate.epoch.env_macro_mean": 0.867574206873524, + "success_rate.epoch.global": 0.8796589524969549, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.3333333333333333, + "success_rate.window.env_macro_mean": 0.7777777777777777, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9912790697674418, + "tokens_p.mean_in_band": 0.406982421875, + "tokens_rate.above_band": 0.8431372549019608, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.1568627450980392 + }, + { + "epoch": 0.8766510438858117, + "grad_norm": 92.5598991521915, + "learning_rate": 3.9212626696328564e-07, + "loss": 0.3172, + "step": 4115, + "success_rate.epoch.env.abd": 0.9863387978142076, + "success_rate.epoch.env.agentgym:alfworld": 0.8830188679245283, + "success_rate.epoch.env.agentgym:sciworld": 0.9731800766283525, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9767441860465116, + "success_rate.epoch.env.ded": 0.9521276595744681, + "success_rate.epoch.env.logic": 0.8938271604938272, + "success_rate.epoch.env.math": 0.9598118854211202, + "success_rate.epoch.env.sat": 0.16326530612244897, + "success_rate.epoch.env.science": 0.7933832709113608, + "success_rate.epoch.env.webshop": 0.9787234042553191, + "success_rate.epoch.env_macro_mean": 0.8674762542736659, + "success_rate.epoch.global": 0.8795620437956204, + "success_rate.window.env.logic": 0.3333333333333333, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.7777777777777777, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.997539592760181, + "tokens_p.mean_in_band": 0.5785590277777778, + "tokens_rate.above_band": 0.9608695652173913, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0391304347826087 + }, + { + "epoch": 0.8777162334895612, + "grad_norm": 320.6005692861649, + "learning_rate": 3.9210591112896503e-07, + "loss": 0.2694, + "step": 4120, + "success_rate.epoch.env.abd": 0.9863387978142076, + "success_rate.epoch.env.agentgym:alfworld": 0.8834586466165414, + "success_rate.epoch.env.agentgym:sciworld": 0.9731800766283525, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9772727272727273, + "success_rate.epoch.env.ded": 0.9522546419098143, + "success_rate.epoch.env.logic": 0.8940016433853739, + "success_rate.epoch.env.math": 0.9598804950917627, + "success_rate.epoch.env.sat": 0.16326530612244897, + "success_rate.epoch.env.science": 0.7934477379095164, + "success_rate.epoch.env.webshop": 0.9787234042553191, + "success_rate.epoch.env_macro_mean": 0.8676037871658405, + "success_rate.epoch.global": 0.8797083839611178, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9972596153846154, + "tokens_p.mean_in_band": 0.8125, + "tokens_rate.above_band": 0.9984639016897081, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0015360983102918587 + }, + { + "epoch": 0.8787814230933106, + "grad_norm": 226.12339653147313, + "learning_rate": 3.9208553189608706e-07, + "loss": 0.283, + "step": 4125, + "success_rate.epoch.env.abd": 0.9863387978142076, + "success_rate.epoch.env.agentgym:alfworld": 0.8834586466165414, + "success_rate.epoch.env.agentgym:sciworld": 0.9731800766283525, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9772727272727273, + "success_rate.epoch.env.ded": 0.9522546419098143, + "success_rate.epoch.env.logic": 0.8942622950819672, + "success_rate.epoch.env.math": 0.9598976109215017, + "success_rate.epoch.env.sat": 0.16326530612244897, + "success_rate.epoch.env.science": 0.7932108377452507, + "success_rate.epoch.env.webshop": 0.9787234042553191, + "success_rate.epoch.env_macro_mean": 0.8676075023805737, + "success_rate.epoch.global": 0.8796116504854369, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.8888888888888888, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9880257009345794, + "tokens_p.mean_in_band": 0.3138020833333333, + "tokens_rate.above_band": 0.9224137931034483, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.07758620689655173 + }, + { + "epoch": 0.8798466126970601, + "grad_norm": 168.1893708708438, + "learning_rate": 3.920651292795041e-07, + "loss": 0.3633, + "step": 4130, + "success_rate.epoch.env.abd": 0.9864130434782609, + "success_rate.epoch.env.agentgym:alfworld": 0.8834586466165414, + "success_rate.epoch.env.agentgym:sciworld": 0.9731800766283525, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9772727272727273, + "success_rate.epoch.env.ded": 0.9522546419098143, + "success_rate.epoch.env.logic": 0.8943488943488943, + "success_rate.epoch.env.math": 0.9599829714772243, + "success_rate.epoch.env.sat": 0.16326530612244897, + "success_rate.epoch.env.science": 0.7933395580454404, + "success_rate.epoch.env.webshop": 0.9787234042553191, + "success_rate.epoch.env_macro_mean": 0.8676415865430186, + "success_rate.epoch.global": 0.8797575757575757, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9978243670886076, + "tokens_p.mean_in_band": 0.734375, + "tokens_rate.above_band": 0.9753086419753086, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.024691358024691357 + }, + { + "epoch": 0.8809118023008096, + "grad_norm": 115.04204821439724, + "learning_rate": 3.9204470329408526e-07, + "loss": 0.4687, + "step": 4135, + "success_rate.epoch.env.abd": 0.9864130434782609, + "success_rate.epoch.env.agentgym:alfworld": 0.8834586466165414, + "success_rate.epoch.env.agentgym:sciworld": 0.9731800766283525, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9772727272727273, + "success_rate.epoch.env.ded": 0.9522546419098143, + "success_rate.epoch.env.logic": 0.8943488943488943, + "success_rate.epoch.env.math": 0.96, + "success_rate.epoch.env.sat": 0.16326530612244897, + "success_rate.epoch.env.science": 0.7929857231533209, + "success_rate.epoch.env.webshop": 0.9787234042553191, + "success_rate.epoch.env_macro_mean": 0.8676109677821692, + "success_rate.epoch.global": 0.8795399515738499, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.8333333333333333, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9892578125, + "tokens_p.mean_in_band": 0.5427631578947368, + "tokens_rate.above_band": 0.8347826086956521, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.16521739130434782 + }, + { + "epoch": 0.881976991904559, + "grad_norm": 233.01581167450817, + "learning_rate": 3.9202425395471694e-07, + "loss": 0.3323, + "step": 4140, + "success_rate.epoch.env.abd": 0.986449864498645, + "success_rate.epoch.env.agentgym:alfworld": 0.8834586466165414, + "success_rate.epoch.env.agentgym:sciworld": 0.9731800766283525, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9772727272727273, + "success_rate.epoch.env.ded": 0.9522546419098143, + "success_rate.epoch.env.logic": 0.8944353518821604, + "success_rate.epoch.env.math": 0.9600340136054422, + "success_rate.epoch.env.sat": 0.16326530612244897, + "success_rate.epoch.env.science": 0.7930607187112764, + "success_rate.epoch.env.webshop": 0.9787234042553191, + "success_rate.epoch.env_macro_mean": 0.8676320848473552, + "success_rate.epoch.global": 0.8795646916565901, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env_macro_mean": 0.9583333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9897629310344828, + "tokens_p.mean_in_band": 0.678515625, + "tokens_rate.above_band": 0.9206349206349206, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.07936507936507936 + }, + { + "epoch": 0.8830421815083085, + "grad_norm": 121.88659449962579, + "learning_rate": 3.920037812763025e-07, + "loss": 0.3967, + "step": 4145, + "success_rate.epoch.env.abd": 0.9864864864864865, + "success_rate.epoch.env.agentgym:alfworld": 0.8834586466165414, + "success_rate.epoch.env.agentgym:sciworld": 0.9731800766283525, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9772727272727273, + "success_rate.epoch.env.ded": 0.9523809523809523, + "success_rate.epoch.env.logic": 0.8946078431372549, + "success_rate.epoch.env.math": 0.9600679694137638, + "success_rate.epoch.env.sat": 0.16326530612244897, + "success_rate.epoch.env.science": 0.7933168316831684, + "success_rate.epoch.env.webshop": 0.9787234042553191, + "success_rate.epoch.env_macro_mean": 0.8676889478013816, + "success_rate.epoch.global": 0.8797101449275362, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9994739057239057, + "tokens_p.mean_in_band": 0.89453125, + "tokens_rate.above_band": 0.9983193277310924, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0016806722689075631 + }, + { + "epoch": 0.884107371112058, + "grad_norm": 98.22379595810378, + "learning_rate": 3.9198328527376226e-07, + "loss": 0.1935, + "step": 4150, + "success_rate.epoch.env.abd": 0.9864864864864865, + "success_rate.epoch.env.agentgym:alfworld": 0.8834586466165414, + "success_rate.epoch.env.agentgym:sciworld": 0.9732824427480916, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9772727272727273, + "success_rate.epoch.env.ded": 0.9525065963060686, + "success_rate.epoch.env.logic": 0.8946078431372549, + "success_rate.epoch.env.math": 0.9601357082273113, + "success_rate.epoch.env.sat": 0.16326530612244897, + "success_rate.epoch.env.science": 0.7932632880098888, + "success_rate.epoch.env.webshop": 0.9787234042553191, + "success_rate.epoch.env_macro_mean": 0.8677109664545746, + "success_rate.epoch.global": 0.8797346200241255, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9375, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9994645305003427, + "tokens_p.mean_in_band": 0.6650390625, + "tokens_rate.above_band": 0.9972658920027341, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.002734107997265892 + }, + { + "epoch": 0.8851725607158074, + "grad_norm": 99.45895646299643, + "learning_rate": 3.9196276596203355e-07, + "loss": 0.3137, + "step": 4155, + "success_rate.epoch.env.abd": 0.9865591397849462, + "success_rate.epoch.env.agentgym:alfworld": 0.8834586466165414, + "success_rate.epoch.env.agentgym:sciworld": 0.973384030418251, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9772727272727273, + "success_rate.epoch.env.ded": 0.9525065963060686, + "success_rate.epoch.env.logic": 0.8946078431372549, + "success_rate.epoch.env.math": 0.96015260703688, + "success_rate.epoch.env.sat": 0.16326530612244897, + "success_rate.epoch.env.science": 0.7930289944478717, + "success_rate.epoch.env.webshop": 0.9787234042553191, + "success_rate.epoch.env_macro_mean": 0.8677070433833173, + "success_rate.epoch.global": 0.8796385542168674, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9979452054794521, + "tokens_p.mean_in_band": 0.5353422619047619, + "tokens_rate.above_band": 0.9455958549222798, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.054404145077720206 + }, + { + "epoch": 0.8862377503195569, + "grad_norm": 358.70068808547074, + "learning_rate": 3.9194222335607065e-07, + "loss": 0.4291, + "step": 4160, + "success_rate.epoch.env.abd": 0.9865591397849462, + "success_rate.epoch.env.agentgym:alfworld": 0.8834586466165414, + "success_rate.epoch.env.agentgym:sciworld": 0.973384030418251, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9772727272727273, + "success_rate.epoch.env.ded": 0.9525065963060686, + "success_rate.epoch.env.logic": 0.8939641109298532, + "success_rate.epoch.env.math": 0.9602368866328257, + "success_rate.epoch.env.sat": 0.16326530612244897, + "success_rate.epoch.env.science": 0.7932203389830509, + "success_rate.epoch.env.webshop": 0.9787234042553191, + "success_rate.epoch.env_macro_mean": 0.8676735790127467, + "success_rate.epoch.global": 0.8796630565583634, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8333333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9920804794520548, + "tokens_p.mean_in_band": 0.8014322916666666, + "tokens_rate.above_band": 0.8902439024390244, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.10975609756097561 + }, + { + "epoch": 0.8873029399233063, + "grad_norm": 56.52206180161196, + "learning_rate": 3.919216574708449e-07, + "loss": 0.4367, + "step": 4165, + "success_rate.epoch.env.abd": 0.9865951742627346, + "success_rate.epoch.env.agentgym:alfworld": 0.8834586466165414, + "success_rate.epoch.env.agentgym:sciworld": 0.973384030418251, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9772727272727273, + "success_rate.epoch.env.ded": 0.9525065963060686, + "success_rate.epoch.env.logic": 0.8942229454841334, + "success_rate.epoch.env.math": 0.9603040540540541, + "success_rate.epoch.env.sat": 0.16326530612244897, + "success_rate.epoch.env.science": 0.7933477055743764, + "success_rate.epoch.env.webshop": 0.9787234042553191, + "success_rate.epoch.env_macro_mean": 0.8677180701986213, + "success_rate.epoch.global": 0.8798076923076923, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9941123188405797, + "tokens_p.mean_in_band": 0.84765625, + "tokens_rate.above_band": 0.971830985915493, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.028169014084507043 + }, + { + "epoch": 0.8883681295270558, + "grad_norm": 359.63707897897535, + "learning_rate": 3.919010683213447e-07, + "loss": 0.2945, + "step": 4170, + "success_rate.epoch.env.abd": 0.9866310160427807, + "success_rate.epoch.env.agentgym:alfworld": 0.8834586466165414, + "success_rate.epoch.env.agentgym:sciworld": 0.973384030418251, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9772727272727273, + "success_rate.epoch.env.ded": 0.9525065963060686, + "success_rate.epoch.env.logic": 0.8944805194805194, + "success_rate.epoch.env.math": 0.9598986914309835, + "success_rate.epoch.env.sat": 0.16326530612244897, + "success_rate.epoch.env.science": 0.7933579335793358, + "success_rate.epoch.env.webshop": 0.9787234042553191, + "success_rate.epoch.env_macro_mean": 0.867708823031196, + "success_rate.epoch.global": 0.8797118847539016, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.7, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9921052631578947, + "tokens_p.mean_in_band": 0.6541466346153846, + "tokens_rate.above_band": 0.8796296296296297, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.12037037037037036 + }, + { + "epoch": 0.8894333191308053, + "grad_norm": 114.01964139845309, + "learning_rate": 3.9188045592257505e-07, + "loss": 0.3021, + "step": 4175, + "success_rate.epoch.env.abd": 0.9866666666666667, + "success_rate.epoch.env.agentgym:alfworld": 0.8834586466165414, + "success_rate.epoch.env.agentgym:sciworld": 0.973384030418251, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9772727272727273, + "success_rate.epoch.env.ded": 0.9526315789473684, + "success_rate.epoch.env.logic": 0.8944805194805194, + "success_rate.epoch.env.math": 0.9595278246205734, + "success_rate.epoch.env.sat": 0.16326530612244897, + "success_rate.epoch.env.science": 0.7933681301811483, + "success_rate.epoch.env.webshop": 0.9787234042553191, + "success_rate.epoch.env_macro_mean": 0.8676906378545223, + "success_rate.epoch.global": 0.8796163069544365, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 0.6666666666666666, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.8666666666666667, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9955357142857143, + "tokens_p.mean_in_band": 0.6779119318181818, + "tokens_rate.above_band": 0.9502262443438914, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.049773755656108594 + }, + { + "epoch": 0.8904985087345547, + "grad_norm": 230.81344888148251, + "learning_rate": 3.918598202895582e-07, + "loss": 0.345, + "step": 4180, + "success_rate.epoch.env.abd": 0.9867021276595744, + "success_rate.epoch.env.agentgym:alfworld": 0.8838951310861424, + "success_rate.epoch.env.agentgym:sciworld": 0.973384030418251, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9772727272727273, + "success_rate.epoch.env.ded": 0.9526315789473684, + "success_rate.epoch.env.logic": 0.8944805194805194, + "success_rate.epoch.env.math": 0.9595789473684211, + "success_rate.epoch.env.sat": 0.16326530612244897, + "success_rate.epoch.env.science": 0.7936848559166155, + "success_rate.epoch.env.webshop": 0.9787234042553191, + "success_rate.epoch.env_macro_mean": 0.8677669827586881, + "success_rate.epoch.global": 0.8797604790419161, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9974688473520249, + "tokens_p.mean_in_band": 0.87890625, + "tokens_rate.above_band": 0.9968944099378882, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.003105590062111801 + }, + { + "epoch": 0.8915636983383042, + "grad_norm": 145.8650421568688, + "learning_rate": 3.9183916143733335e-07, + "loss": 0.2868, + "step": 4185, + "success_rate.epoch.env.abd": 0.9867021276595744, + "success_rate.epoch.env.agentgym:alfworld": 0.8838951310861424, + "success_rate.epoch.env.agentgym:sciworld": 0.9734848484848485, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9772727272727273, + "success_rate.epoch.env.ded": 0.952755905511811, + "success_rate.epoch.env.logic": 0.8944805194805194, + "success_rate.epoch.env.math": 0.9596299411269975, + "success_rate.epoch.env.sat": 0.16326530612244897, + "success_rate.epoch.env.science": 0.7933884297520661, + "success_rate.epoch.env.webshop": 0.9787234042553191, + "success_rate.epoch.env_macro_mean": 0.8677651384155124, + "success_rate.epoch.global": 0.8796650717703349, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6, + "success_rate.window.env_macro_mean": 0.9, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.997101814516129, + "tokens_p.mean_in_band": 0.5989583333333334, + "tokens_rate.above_band": 0.9323308270676691, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.06766917293233082 + }, + { + "epoch": 0.8926288879420536, + "grad_norm": 308.1724686277715, + "learning_rate": 3.918184793809564e-07, + "loss": 0.2782, + "step": 4190, + "success_rate.epoch.env.abd": 0.9867021276595744, + "success_rate.epoch.env.agentgym:alfworld": 0.8838951310861424, + "success_rate.epoch.env.agentgym:sciworld": 0.9734848484848485, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9772727272727273, + "success_rate.epoch.env.ded": 0.952755905511811, + "success_rate.epoch.env.logic": 0.8937550689375506, + "success_rate.epoch.env.math": 0.9596808063838723, + "success_rate.epoch.env.sat": 0.16326530612244897, + "success_rate.epoch.env.science": 0.793398533007335, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8677450275410145, + "success_rate.epoch.global": 0.8795698924731182, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 0.7, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9979619565217391, + "tokens_p.mean_in_band": 0.5003255208333334, + "tokens_rate.above_band": 0.968421052631579, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.031578947368421054 + }, + { + "epoch": 0.8936940775458031, + "grad_norm": 69.32629351418618, + "learning_rate": 3.917977741355004e-07, + "loss": 0.4975, + "step": 4195, + "success_rate.epoch.env.abd": 0.9868073878627969, + "success_rate.epoch.env.agentgym:alfworld": 0.8838951310861424, + "success_rate.epoch.env.agentgym:sciworld": 0.9734848484848485, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9772727272727273, + "success_rate.epoch.env.ded": 0.9528795811518325, + "success_rate.epoch.env.logic": 0.8939271255060729, + "success_rate.epoch.env.math": 0.9596977329974811, + "success_rate.epoch.env.sat": 0.16326530612244897, + "success_rate.epoch.env.science": 0.793587786259542, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8678002250207946, + "success_rate.epoch.global": 0.8797136038186157, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9987980769230769, + "tokens_p.mean_in_band": 0.6298828125, + "tokens_rate.above_band": 0.9701492537313433, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.029850746268656716 + }, + { + "epoch": 0.8947592671495527, + "grad_norm": 41.96870777450887, + "learning_rate": 3.9177704571605503e-07, + "loss": 0.1808, + "step": 4200, + "success_rate.epoch.env.abd": 0.9868073878627969, + "success_rate.epoch.env.agentgym:alfworld": 0.8838951310861424, + "success_rate.epoch.env.agentgym:sciworld": 0.9734848484848485, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9772727272727273, + "success_rate.epoch.env.ded": 0.9528795811518325, + "success_rate.epoch.env.logic": 0.8940129449838188, + "success_rate.epoch.env.math": 0.959748427672956, + "success_rate.epoch.env.sat": 0.16326530612244897, + "success_rate.epoch.env.science": 0.7939652544955806, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8678469506925454, + "success_rate.epoch.global": 0.8798569725864124, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.991688829787234, + "tokens_p.mean_in_band": 0.857421875, + "tokens_rate.above_band": 0.94, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.06 + }, + { + "epoch": 0.8958244567533021, + "grad_norm": 43.17995073444214, + "learning_rate": 3.917562941377272e-07, + "loss": 0.2579, + "step": 4205, + "success_rate.epoch.env.abd": 0.9868073878627969, + "success_rate.epoch.env.agentgym:alfworld": 0.8843283582089553, + "success_rate.epoch.env.agentgym:sciworld": 0.9734848484848485, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9772727272727273, + "success_rate.epoch.env.ded": 0.9530026109660574, + "success_rate.epoch.env.logic": 0.8940986257073565, + "success_rate.epoch.env.math": 0.9598326359832636, + "success_rate.epoch.env.sat": 0.16326530612244897, + "success_rate.epoch.env.science": 0.7940907706366128, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8679243745209013, + "success_rate.epoch.global": 0.88, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9994019138755981, + "tokens_p.mean_in_band": 0.8352864583333334, + "tokens_rate.above_band": 0.9928741092636579, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.007125890736342043 + }, + { + "epoch": 0.8968896463570516, + "grad_norm": 56.92204749227752, + "learning_rate": 3.9173551941564027e-07, + "loss": 0.2152, + "step": 4210, + "success_rate.epoch.env.abd": 0.9868073878627969, + "success_rate.epoch.env.agentgym:alfworld": 0.8847583643122676, + "success_rate.epoch.env.agentgym:sciworld": 0.9734848484848485, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9772727272727273, + "success_rate.epoch.env.ded": 0.953125, + "success_rate.epoch.env.logic": 0.8940986257073565, + "success_rate.epoch.env.math": 0.9598494353826851, + "success_rate.epoch.env.sat": 0.16326530612244897, + "success_rate.epoch.env.science": 0.794224924012158, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8679883152401942, + "success_rate.epoch.global": 0.8800237812128419, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8571428571428571, + "success_rate.window.env_macro_mean": 0.9642857142857143, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9967813670411985, + "tokens_p.mean_in_band": 0.548828125, + "tokens_rate.above_band": 0.9888888888888889, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.011111111111111112 + }, + { + "epoch": 0.8979548359608011, + "grad_norm": 150.98657235749974, + "learning_rate": 3.9171472156493495e-07, + "loss": 0.1373, + "step": 4215, + "success_rate.epoch.env.abd": 0.9868073878627969, + "success_rate.epoch.env.agentgym:alfworld": 0.8851851851851852, + "success_rate.epoch.env.agentgym:sciworld": 0.9734848484848485, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9772727272727273, + "success_rate.epoch.env.ded": 0.953125, + "success_rate.epoch.env.logic": 0.8940986257073565, + "success_rate.epoch.env.math": 0.9594820384294068, + "success_rate.epoch.env.sat": 0.16326530612244897, + "success_rate.epoch.env.science": 0.7942961165048543, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8680001894594974, + "success_rate.epoch.global": 0.8799287410926366, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.math": 0.6666666666666666, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env_macro_mean": 0.8333333333333334, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.997718253968254, + "tokens_p.mean_in_band": 0.46337890625, + "tokens_rate.above_band": 0.9752321981424149, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02476780185758514 + }, + { + "epoch": 0.8990200255645505, + "grad_norm": 34.085881201807204, + "learning_rate": 3.9169390060076844e-07, + "loss": 0.289, + "step": 4220, + "success_rate.epoch.env.abd": 0.9868073878627969, + "success_rate.epoch.env.agentgym:alfworld": 0.8856088560885609, + "success_rate.epoch.env.agentgym:sciworld": 0.9734848484848485, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9772727272727273, + "success_rate.epoch.env.ded": 0.953125, + "success_rate.epoch.env.logic": 0.8940986257073565, + "success_rate.epoch.env.math": 0.9595664860358483, + "success_rate.epoch.env.sat": 0.16, + "success_rate.epoch.env.science": 0.7944831767202183, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8677665415142912, + "success_rate.epoch.global": 0.8799525504151838, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.75, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9982049608355091, + "tokens_p.mean_in_band": 0.7213541666666666, + "tokens_rate.above_band": 0.9551122194513716, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.04488778054862843 + }, + { + "epoch": 0.9000852151683, + "grad_norm": 204.394889676229, + "learning_rate": 3.9167305653831494e-07, + "loss": 0.373, + "step": 4225, + "success_rate.epoch.env.abd": 0.9868421052631579, + "success_rate.epoch.env.agentgym:alfworld": 0.8856088560885609, + "success_rate.epoch.env.agentgym:sciworld": 0.9734848484848485, + "success_rate.epoch.env.agentgym:textcraft": 0.9821428571428571, + "success_rate.epoch.env.babyai": 0.9777777777777777, + "success_rate.epoch.env.ded": 0.9506493506493506, + "success_rate.epoch.env.logic": 0.8941841680129241, + "success_rate.epoch.env.math": 0.9596001665972511, + "success_rate.epoch.env.sat": 0.16, + "success_rate.epoch.env.science": 0.7946698970321018, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8676478812468633, + "success_rate.epoch.global": 0.8799763033175355, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.ded": 0.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8571428571428571, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.98820816008316, + "tokens_p.mean_in_band": 0.5863589638157894, + "tokens_rate.above_band": 0.8350694444444444, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.16493055555555555 + }, + { + "epoch": 0.9011504047720494, + "grad_norm": 104.2274901005957, + "learning_rate": 3.916521893927654e-07, + "loss": 0.4514, + "step": 4230, + "success_rate.epoch.env.abd": 0.9868421052631579, + "success_rate.epoch.env.agentgym:alfworld": 0.8823529411764706, + "success_rate.epoch.env.agentgym:sciworld": 0.9735849056603774, + "success_rate.epoch.env.agentgym:textcraft": 0.9821428571428571, + "success_rate.epoch.env.babyai": 0.9777777777777777, + "success_rate.epoch.env.ded": 0.9506493506493506, + "success_rate.epoch.env.logic": 0.8943548387096775, + "success_rate.epoch.env.math": 0.9596337910944652, + "success_rate.epoch.env.sat": 0.16, + "success_rate.epoch.env.science": 0.794313369630974, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.867347145797434, + "success_rate.epoch.global": 0.8797633136094675, + "success_rate.window.env.agentgym:alfworld": 0.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.7, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9973611111111111, + "tokens_p.mean_in_band": 0.4739583333333333, + "tokens_rate.above_band": 0.9375, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0625 + }, + { + "epoch": 0.9022155943757989, + "grad_norm": 340.6410492277857, + "learning_rate": 3.916312991793277e-07, + "loss": 0.4968, + "step": 4235, + "success_rate.epoch.env.abd": 0.9868766404199475, + "success_rate.epoch.env.agentgym:alfworld": 0.8823529411764706, + "success_rate.epoch.env.agentgym:sciworld": 0.9738805970149254, + "success_rate.epoch.env.agentgym:textcraft": 0.9821428571428571, + "success_rate.epoch.env.babyai": 0.9777777777777777, + "success_rate.epoch.env.ded": 0.9506493506493506, + "success_rate.epoch.env.logic": 0.894524959742351, + "success_rate.epoch.env.math": 0.9596505823627288, + "success_rate.epoch.env.sat": 0.1568627450980392, + "success_rate.epoch.env.science": 0.7944377267230955, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8671202586158372, + "success_rate.epoch.global": 0.8797872340425532, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8333333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9979596219931272, + "tokens_p.mean_in_band": 0.6875, + "tokens_rate.above_band": 0.9603960396039604, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.039603960396039604 + }, + { + "epoch": 0.9032807839795484, + "grad_norm": 29.060378578750147, + "learning_rate": 3.916103859132265e-07, + "loss": 0.2905, + "step": 4240, + "success_rate.epoch.env.abd": 0.9868766404199475, + "success_rate.epoch.env.agentgym:alfworld": 0.8823529411764706, + "success_rate.epoch.env.agentgym:sciworld": 0.9738805970149254, + "success_rate.epoch.env.agentgym:textcraft": 0.9821428571428571, + "success_rate.epoch.env.babyai": 0.9777777777777777, + "success_rate.epoch.env.ded": 0.9507772020725389, + "success_rate.epoch.env.logic": 0.8946945337620579, + "success_rate.epoch.env.math": 0.959717607973422, + "success_rate.epoch.env.sat": 0.1568627450980392, + "success_rate.epoch.env.science": 0.7946239806704923, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8671703227068359, + "success_rate.epoch.global": 0.8799291617473436, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9970414201183432, + "tokens_p.mean_in_band": 0.87890625, + "tokens_rate.above_band": 0.9883040935672515, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.011695906432748537 + }, + { + "epoch": 0.9043459735832978, + "grad_norm": 0.0, + "learning_rate": 3.915894496097032e-07, + "loss": 0.286, + "step": 4245, + "success_rate.epoch.env.abd": 0.9869109947643979, + "success_rate.epoch.env.agentgym:alfworld": 0.8823529411764706, + "success_rate.epoch.env.agentgym:sciworld": 0.9739776951672863, + "success_rate.epoch.env.agentgym:textcraft": 0.9821428571428571, + "success_rate.epoch.env.babyai": 0.9777777777777777, + "success_rate.epoch.env.ded": 0.9483204134366925, + "success_rate.epoch.env.logic": 0.8948635634028892, + "success_rate.epoch.env.math": 0.9597343295973433, + "success_rate.epoch.env.sat": 0.1568627450980392, + "success_rate.epoch.env.science": 0.7948717948717948, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8669983435547468, + "success_rate.epoch.global": 0.8799528301886792, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 0.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8333333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9947916666666666, + "tokens_p.mean_below_band": 1.2014061212539673e-07, + "tokens_p.mean_in_band": 0.5553546167695473, + "tokens_rate.above_band": 0.8762677484787018, + "tokens_rate.below_band": 0.0005070993914807302, + "tokens_rate.in_band": 0.12322515212981744 + }, + { + "epoch": 0.9054111631870473, + "grad_norm": 184.09510660925153, + "learning_rate": 3.9156849028401606e-07, + "loss": 0.696, + "step": 4250, + "success_rate.epoch.env.abd": 0.9869451697127938, + "success_rate.epoch.env.agentgym:alfworld": 0.8823529411764706, + "success_rate.epoch.env.agentgym:sciworld": 0.9739776951672863, + "success_rate.epoch.env.agentgym:textcraft": 0.9821428571428571, + "success_rate.epoch.env.babyai": 0.9777777777777777, + "success_rate.epoch.env.ded": 0.9484536082474226, + "success_rate.epoch.env.logic": 0.8949478748997595, + "success_rate.epoch.env.math": 0.9598177299088649, + "success_rate.epoch.env.sat": 0.1568627450980392, + "success_rate.epoch.env.science": 0.7946940006029545, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.867012642400081, + "success_rate.epoch.global": 0.8799764428739694, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.9, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9975316210635002, + "tokens_p.mean_in_band": 0.7434895833333334, + "tokens_rate.above_band": 0.9969119917653114, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.003088008234688626 + }, + { + "epoch": 0.9064763527907967, + "grad_norm": 30.279198121952643, + "learning_rate": 3.9154750795144e-07, + "loss": 0.2131, + "step": 4255, + "success_rate.epoch.env.abd": 0.9869451697127938, + "success_rate.epoch.env.agentgym:alfworld": 0.8823529411764706, + "success_rate.epoch.env.agentgym:sciworld": 0.9739776951672863, + "success_rate.epoch.env.agentgym:textcraft": 0.9821428571428571, + "success_rate.epoch.env.babyai": 0.9777777777777777, + "success_rate.epoch.env.ded": 0.9485861182519281, + "success_rate.epoch.env.logic": 0.8950320512820513, + "success_rate.epoch.env.math": 0.9598676044683492, + "success_rate.epoch.env.sat": 0.1568627450980392, + "success_rate.epoch.env.science": 0.7947019867549668, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8670376012271986, + "success_rate.epoch.global": 0.88, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9981664540816326, + "tokens_p.mean_in_band": 0.7180397727272727, + "tokens_rate.above_band": 0.9727047146401985, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02729528535980149 + }, + { + "epoch": 0.9075415423945462, + "grad_norm": 174.59313184097746, + "learning_rate": 3.9152650262726684e-07, + "loss": 0.4507, + "step": 4260, + "success_rate.epoch.env.abd": 0.987012987012987, + "success_rate.epoch.env.agentgym:alfworld": 0.8823529411764706, + "success_rate.epoch.env.agentgym:sciworld": 0.9739776951672863, + "success_rate.epoch.env.agentgym:textcraft": 0.9821428571428571, + "success_rate.epoch.env.babyai": 0.9777777777777777, + "success_rate.epoch.env.ded": 0.9485861182519281, + "success_rate.epoch.env.logic": 0.8952, + "success_rate.epoch.env.math": 0.9595208591491119, + "success_rate.epoch.env.sat": 0.1568627450980392, + "success_rate.epoch.env.science": 0.7948255114320096, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8670387417159211, + "success_rate.epoch.global": 0.8800235017626322, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.75, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9375, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9932725694444444, + "tokens_p.mean_in_band": 0.6432291666666666, + "tokens_rate.above_band": 0.96, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.04 + }, + { + "epoch": 0.9086067319982957, + "grad_norm": 40.598275032733184, + "learning_rate": 3.915054743268052e-07, + "loss": 0.2617, + "step": 4265, + "success_rate.epoch.env.abd": 0.9870801033591732, + "success_rate.epoch.env.agentgym:alfworld": 0.8827838827838828, + "success_rate.epoch.env.agentgym:sciworld": 0.9740740740740741, + "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, + "success_rate.epoch.env.babyai": 0.9777777777777777, + "success_rate.epoch.env.ded": 0.9485861182519281, + "success_rate.epoch.env.logic": 0.8952837729816147, + "success_rate.epoch.env.math": 0.9595375722543352, + "success_rate.epoch.env.sat": 0.1568627450980392, + "success_rate.epoch.env.science": 0.7950105199879771, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8671472157805767, + "success_rate.epoch.global": 0.8801643192488263, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9994071815718157, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 0.9096719216020451, + "grad_norm": 242.4380537843047, + "learning_rate": 3.914844230653802e-07, + "loss": 0.2221, + "step": 4270, + "success_rate.epoch.env.abd": 0.9870801033591732, + "success_rate.epoch.env.agentgym:alfworld": 0.8827838827838828, + "success_rate.epoch.env.agentgym:sciworld": 0.9740740740740741, + "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, + "success_rate.epoch.env.babyai": 0.9777777777777777, + "success_rate.epoch.env.ded": 0.948849104859335, + "success_rate.epoch.env.logic": 0.8952837729816147, + "success_rate.epoch.env.math": 0.9591752577319588, + "success_rate.epoch.env.sat": 0.1568627450980392, + "success_rate.epoch.env.science": 0.7953181272509003, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8671661502667544, + "success_rate.epoch.global": 0.8801875732708089, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 0.6666666666666666, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8888888888888888, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9975016937669376, + "tokens_p.mean_in_band": 0.72998046875, + "tokens_rate.above_band": 0.9892761394101877, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.010723860589812333 + }, + { + "epoch": 0.9107371112057946, + "grad_norm": 669.6763207594614, + "learning_rate": 3.9146334885833396e-07, + "loss": 0.3902, + "step": 4275, + "success_rate.epoch.env.abd": 0.9870801033591732, + "success_rate.epoch.env.agentgym:alfworld": 0.8832116788321168, + "success_rate.epoch.env.agentgym:sciworld": 0.9740740740740741, + "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, + "success_rate.epoch.env.babyai": 0.9782608695652174, + "success_rate.epoch.env.ded": 0.948849104859335, + "success_rate.epoch.env.logic": 0.8954509177972865, + "success_rate.epoch.env.math": 0.9591920857378401, + "success_rate.epoch.env.sat": 0.1568627450980392, + "success_rate.epoch.env.science": 0.7953251423434222, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8672663207894588, + "success_rate.epoch.global": 0.8802107728337236, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.96, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9972956730769231, + "tokens_p.mean_in_band": 0.5191761363636364, + "tokens_rate.above_band": 0.9497716894977168, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0502283105022831 + }, + { + "epoch": 0.911802300809544, + "grad_norm": 240.13102506303147, + "learning_rate": 3.914422517210251e-07, + "loss": 0.3916, + "step": 4280, + "success_rate.epoch.env.abd": 0.9870801033591732, + "success_rate.epoch.env.agentgym:alfworld": 0.8832116788321168, + "success_rate.epoch.env.agentgym:sciworld": 0.9740740740740741, + "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, + "success_rate.epoch.env.babyai": 0.9782608695652174, + "success_rate.epoch.env.ded": 0.948849104859335, + "success_rate.epoch.env.logic": 0.8947368421052632, + "success_rate.epoch.env.math": 0.9592760180995475, + "success_rate.epoch.env.sat": 0.1568627450980392, + "success_rate.epoch.env.science": 0.7955701885662975, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8672313119615097, + "success_rate.epoch.global": 0.8802339181286549, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.6666666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.994140625, + "tokens_p.mean_in_band": 0.7431640625, + "tokens_rate.above_band": 0.9230769230769231, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.07692307692307693 + }, + { + "epoch": 0.9128674904132935, + "grad_norm": 84.75150776611382, + "learning_rate": 3.9142113166882925e-07, + "loss": 0.354, + "step": 4285, + "success_rate.epoch.env.abd": 0.9870801033591732, + "success_rate.epoch.env.agentgym:alfworld": 0.8832116788321168, + "success_rate.epoch.env.agentgym:sciworld": 0.9740740740740741, + "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, + "success_rate.epoch.env.babyai": 0.9782608695652174, + "success_rate.epoch.env.ded": 0.9489795918367347, + "success_rate.epoch.env.logic": 0.8948207171314742, + "success_rate.epoch.env.math": 0.9593596059113301, + "success_rate.epoch.env.sat": 0.1568627450980392, + "success_rate.epoch.env.science": 0.7954545454545454, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8672478852982043, + "success_rate.epoch.global": 0.8802570093457944, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9971498371335505, + "tokens_p.mean_in_band": 0.6361607142857143, + "tokens_rate.above_band": 0.956386292834891, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.04361370716510903 + }, + { + "epoch": 0.9139326800170431, + "grad_norm": 156.27392671996975, + "learning_rate": 3.9139998871713836e-07, + "loss": 0.3656, + "step": 4290, + "success_rate.epoch.env.abd": 0.9870801033591732, + "success_rate.epoch.env.agentgym:alfworld": 0.8832116788321168, + "success_rate.epoch.env.agentgym:sciworld": 0.974169741697417, + "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, + "success_rate.epoch.env.babyai": 0.9782608695652174, + "success_rate.epoch.env.ded": 0.9489795918367347, + "success_rate.epoch.env.logic": 0.8949044585987261, + "success_rate.epoch.env.math": 0.9593762823143209, + "success_rate.epoch.env.sat": 0.1568627450980392, + "success_rate.epoch.env.science": 0.7952849895553566, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8672502970795133, + "success_rate.epoch.global": 0.8801633605600934, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.7142857142857143, + "success_rate.window.env_macro_mean": 0.9285714285714286, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9907142857142858, + "tokens_p.mean_in_band": 0.651611328125, + "tokens_rate.above_band": 0.9162303664921466, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.08376963350785341 + }, + { + "epoch": 0.9149978696207925, + "grad_norm": 63.44082478644645, + "learning_rate": 3.913788228813614e-07, + "loss": 0.2266, + "step": 4295, + "success_rate.epoch.env.abd": 0.9870801033591732, + "success_rate.epoch.env.agentgym:alfworld": 0.8836363636363637, + "success_rate.epoch.env.agentgym:sciworld": 0.974169741697417, + "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, + "success_rate.epoch.env.babyai": 0.9782608695652174, + "success_rate.epoch.env.ded": 0.9489795918367347, + "success_rate.epoch.env.logic": 0.8951548848292296, + "success_rate.epoch.env.math": 0.9594428512904547, + "success_rate.epoch.env.sat": 0.1568627450980392, + "success_rate.epoch.env.science": 0.7951088577393379, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8673017105517736, + "success_rate.epoch.global": 0.8801864801864802, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9965029761904762, + "tokens_p.mean_in_band": 0.6805555555555556, + "tokens_rate.above_band": 0.9790209790209791, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02097902097902098 + }, + { + "epoch": 0.916063059224542, + "grad_norm": 111.16298476786717, + "learning_rate": 3.913576341769238e-07, + "loss": 0.3859, + "step": 4300, + "success_rate.epoch.env.abd": 0.9870801033591732, + "success_rate.epoch.env.agentgym:alfworld": 0.8836363636363637, + "success_rate.epoch.env.agentgym:sciworld": 0.974169741697417, + "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, + "success_rate.epoch.env.babyai": 0.9782608695652174, + "success_rate.epoch.env.ded": 0.9489795918367347, + "success_rate.epoch.env.logic": 0.8953211736716892, + "success_rate.epoch.env.math": 0.95949263502455, + "success_rate.epoch.env.sat": 0.1568627450980392, + "success_rate.epoch.env.science": 0.7954139368671829, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8673490879794463, + "success_rate.epoch.global": 0.880325960419092, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9920833333333333, + "tokens_p.mean_in_band": 0.7415364583333334, + "tokens_rate.above_band": 0.9259259259259259, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.07407407407407407 + }, + { + "epoch": 0.9171282488282915, + "grad_norm": 349.41621481729527, + "learning_rate": 3.9133642261926775e-07, + "loss": 0.4682, + "step": 4305, + "success_rate.epoch.env.abd": 0.9871134020618557, + "success_rate.epoch.env.agentgym:alfworld": 0.8836363636363637, + "success_rate.epoch.env.agentgym:sciworld": 0.9743589743589743, + "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, + "success_rate.epoch.env.babyai": 0.9782608695652174, + "success_rate.epoch.env.ded": 0.9489795918367347, + "success_rate.epoch.env.logic": 0.8953211736716892, + "success_rate.epoch.env.math": 0.9595753368721928, + "success_rate.epoch.env.sat": 0.1568627450980392, + "success_rate.epoch.env.science": 0.7955357142857142, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8673879071276658, + "success_rate.epoch.global": 0.8804651162790698, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9979987684729064, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 0.9181934384320409, + "grad_norm": 404.63530297591666, + "learning_rate": 3.91315188223852e-07, + "loss": 0.4303, + "step": 4310, + "success_rate.epoch.env.abd": 0.9871134020618557, + "success_rate.epoch.env.agentgym:alfworld": 0.8836363636363637, + "success_rate.epoch.env.agentgym:sciworld": 0.9743589743589743, + "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, + "success_rate.epoch.env.babyai": 0.9787234042553191, + "success_rate.epoch.env.ded": 0.9465648854961832, + "success_rate.epoch.env.logic": 0.894695170229612, + "success_rate.epoch.env.math": 0.9596083231334149, + "success_rate.epoch.env.sat": 0.1568627450980392, + "success_rate.epoch.env.science": 0.7954815695600476, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8671516040770321, + "success_rate.epoch.global": 0.8802555168408827, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.ded": 0.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.65, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9928125, + "tokens_p.mean_in_band": 0.6816625702247191, + "tokens_rate.above_band": 0.910010111223458, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.08998988877654196 + }, + { + "epoch": 0.9192586280357904, + "grad_norm": 70.38997713588995, + "learning_rate": 3.9129393100615224e-07, + "loss": 0.526, + "step": 4315, + "success_rate.epoch.env.abd": 0.9871134020618557, + "success_rate.epoch.env.agentgym:alfworld": 0.8836363636363637, + "success_rate.epoch.env.agentgym:sciworld": 0.9743589743589743, + "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, + "success_rate.epoch.env.babyai": 0.9787234042553191, + "success_rate.epoch.env.ded": 0.9467005076142132, + "success_rate.epoch.env.logic": 0.8948616600790514, + "success_rate.epoch.env.math": 0.9596412556053812, + "success_rate.epoch.env.sat": 0.1568627450980392, + "success_rate.epoch.env.science": 0.7957850994360345, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.867209656287525, + "success_rate.epoch.global": 0.8803944315545243, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9972826086956522, + "tokens_p.mean_in_band": 0.83203125, + "tokens_rate.above_band": 0.9928057553956835, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.007194244604316547 + }, + { + "epoch": 0.9203238176395399, + "grad_norm": 281.8248787932557, + "learning_rate": 3.912726509816604e-07, + "loss": 0.3975, + "step": 4320, + "success_rate.epoch.env.abd": 0.987146529562982, + "success_rate.epoch.env.agentgym:alfworld": 0.8836363636363637, + "success_rate.epoch.env.agentgym:sciworld": 0.9743589743589743, + "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, + "success_rate.epoch.env.babyai": 0.9787234042553191, + "success_rate.epoch.env.ded": 0.9467005076142132, + "success_rate.epoch.env.logic": 0.8951104100946372, + "success_rate.epoch.env.math": 0.959674134419552, + "success_rate.epoch.env.sat": 0.1568627450980392, + "success_rate.epoch.env.science": 0.7954343314556774, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8672063825012092, + "success_rate.epoch.global": 0.8803012746234067, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9939088983050848, + "tokens_p.mean_in_band": 0.471484375, + "tokens_rate.above_band": 0.921875, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.078125 + }, + { + "epoch": 0.9213890072432893, + "grad_norm": 192.9832077656174, + "learning_rate": 3.9125134816588524e-07, + "loss": 0.2983, + "step": 4325, + "success_rate.epoch.env.abd": 0.9872122762148338, + "success_rate.epoch.env.agentgym:alfworld": 0.8836363636363637, + "success_rate.epoch.env.agentgym:sciworld": 0.9745454545454545, + "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, + "success_rate.epoch.env.babyai": 0.9787234042553191, + "success_rate.epoch.env.ded": 0.9467005076142132, + "success_rate.epoch.env.logic": 0.8952755905511811, + "success_rate.epoch.env.math": 0.959674134419552, + "success_rate.epoch.env.sat": 0.1568627450980392, + "success_rate.epoch.env.science": 0.7956766360675156, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8672663563109105, + "success_rate.epoch.global": 0.8804398148148148, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9992647058823529, + "tokens_p.mean_in_band": 0.7731370192307693, + "tokens_rate.above_band": 0.9786184210526315, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02138157894736842 + }, + { + "epoch": 0.9224541968470388, + "grad_norm": 103.33526123252042, + "learning_rate": 3.912300225743521e-07, + "loss": 0.252, + "step": 4330, + "success_rate.epoch.env.abd": 0.9872122762148338, + "success_rate.epoch.env.agentgym:alfworld": 0.8836363636363637, + "success_rate.epoch.env.agentgym:sciworld": 0.9745454545454545, + "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, + "success_rate.epoch.env.babyai": 0.9787234042553191, + "success_rate.epoch.env.ded": 0.9467005076142132, + "success_rate.epoch.env.logic": 0.8953579858379229, + "success_rate.epoch.env.math": 0.9597397315982107, + "success_rate.epoch.env.sat": 0.1568627450980392, + "success_rate.epoch.env.science": 0.7956830277942046, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8672803912374643, + "success_rate.epoch.global": 0.8804624277456647, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9922680412371134, + "tokens_p.mean_in_band": 0.5939670138888888, + "tokens_rate.above_band": 0.8434782608695652, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.1565217391304348 + }, + { + "epoch": 0.9235193864507882, + "grad_norm": 61.21320026286401, + "learning_rate": 3.91208674222603e-07, + "loss": 0.4061, + "step": 4335, + "success_rate.epoch.env.abd": 0.9872122762148338, + "success_rate.epoch.env.agentgym:alfworld": 0.8840579710144928, + "success_rate.epoch.env.agentgym:sciworld": 0.9745454545454545, + "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, + "success_rate.epoch.env.babyai": 0.9787234042553191, + "success_rate.epoch.env.ded": 0.9468354430379747, + "success_rate.epoch.env.logic": 0.8954402515723271, + "success_rate.epoch.env.math": 0.9597724502234863, + "success_rate.epoch.env.sat": 0.1568627450980392, + "success_rate.epoch.env.science": 0.7956894006495424, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8673420185117284, + "success_rate.epoch.global": 0.8804849884526559, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.96, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9980837264150944, + "tokens_p.mean_in_band": 0.62255859375, + "tokens_rate.above_band": 0.9906542056074766, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.009345794392523364 + }, + { + "epoch": 0.9245845760545377, + "grad_norm": 48.43378313483629, + "learning_rate": 3.911873031261963e-07, + "loss": 0.4339, + "step": 4340, + "success_rate.epoch.env.abd": 0.9872773536895675, + "success_rate.epoch.env.agentgym:alfworld": 0.8840579710144928, + "success_rate.epoch.env.agentgym:sciworld": 0.9745454545454545, + "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, + "success_rate.epoch.env.babyai": 0.9787234042553191, + "success_rate.epoch.env.ded": 0.9468354430379747, + "success_rate.epoch.env.logic": 0.8955223880597015, + "success_rate.epoch.env.math": 0.9593826157595451, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.7954009433962265, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8670195031474527, + "success_rate.epoch.global": 0.880161476355248, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 0.6, + "success_rate.window.env_macro_mean": 0.52, + "success_rate.window.global": 0.6, + "tokens_p.mean_above_band": 0.9958333333333333, + "tokens_p.mean_in_band": 0.5508928571428572, + "tokens_rate.above_band": 0.825, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.175 + }, + { + "epoch": 0.9256497656582872, + "grad_norm": 127.43720098759341, + "learning_rate": 3.911659093007073e-07, + "loss": 0.3267, + "step": 4345, + "success_rate.epoch.env.abd": 0.9872773536895675, + "success_rate.epoch.env.agentgym:alfworld": 0.8844765342960289, + "success_rate.epoch.env.agentgym:sciworld": 0.9746376811594203, + "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, + "success_rate.epoch.env.babyai": 0.9787234042553191, + "success_rate.epoch.env.ded": 0.9468354430379747, + "success_rate.epoch.env.logic": 0.8956862745098039, + "success_rate.epoch.env.math": 0.9593991067803491, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.7954077126876656, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8670829519345297, + "success_rate.epoch.global": 0.880184331797235, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.96, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9971931137724551, + "tokens_p.mean_in_band": 0.47794117647058826, + "tokens_rate.above_band": 0.9515669515669516, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.04843304843304843 + }, + { + "epoch": 0.9267149552620366, + "grad_norm": 48.717733298436826, + "learning_rate": 3.9114449276172745e-07, + "loss": 0.3, + "step": 4350, + "success_rate.epoch.env.abd": 0.9872773536895675, + "success_rate.epoch.env.agentgym:alfworld": 0.8853046594982079, + "success_rate.epoch.env.agentgym:sciworld": 0.9746376811594203, + "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, + "success_rate.epoch.env.babyai": 0.9787234042553191, + "success_rate.epoch.env.ded": 0.946969696969697, + "success_rate.epoch.env.logic": 0.8956862745098039, + "success_rate.epoch.env.math": 0.9594155844155844, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.7954745812518366, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8671780178739212, + "success_rate.epoch.global": 0.8802071346375144, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env_macro_mean": 0.9583333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9995101880877743, + "tokens_p.mean_in_band": 0.5290178571428571, + "tokens_rate.above_band": 0.9891472868217054, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.010852713178294573 + }, + { + "epoch": 0.9277801448657861, + "grad_norm": 108.72267923029256, + "learning_rate": 3.911230535248652e-07, + "loss": 0.4403, + "step": 4355, + "success_rate.epoch.env.abd": 0.9872773536895675, + "success_rate.epoch.env.agentgym:alfworld": 0.8853046594982079, + "success_rate.epoch.env.agentgym:sciworld": 0.9746376811594203, + "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, + "success_rate.epoch.env.babyai": 0.9791666666666666, + "success_rate.epoch.env.ded": 0.946969696969697, + "success_rate.epoch.env.logic": 0.8959311424100157, + "success_rate.epoch.env.math": 0.9594977723774808, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.7955346650998825, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8672535089758759, + "success_rate.epoch.global": 0.8803448275862069, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9966755319148937, + "tokens_p.mean_in_band": 0.87109375, + "tokens_rate.above_band": 0.9791666666666666, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.020833333333333332 + }, + { + "epoch": 0.9288453344695355, + "grad_norm": 101.6346237726961, + "learning_rate": 3.9110159160574513e-07, + "loss": 0.3878, + "step": 4360, + "success_rate.epoch.env.abd": 0.9872773536895675, + "success_rate.epoch.env.agentgym:alfworld": 0.8861209964412812, + "success_rate.epoch.env.agentgym:sciworld": 0.9746376811594203, + "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, + "success_rate.epoch.env.babyai": 0.9791666666666666, + "success_rate.epoch.env.ded": 0.946969696969697, + "success_rate.epoch.env.logic": 0.8959311424100157, + "success_rate.epoch.env.math": 0.959546925566343, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.795834555588149, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8673594526686216, + "success_rate.epoch.global": 0.8804822043628013, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9975269784172662, + "tokens_p.mean_in_band": 0.765625, + "tokens_rate.above_band": 0.9985632183908046, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0014367816091954023 + }, + { + "epoch": 0.929910524073285, + "grad_norm": 82.60250224563595, + "learning_rate": 3.9108010702000866e-07, + "loss": 0.404, + "step": 4365, + "success_rate.epoch.env.abd": 0.9873096446700508, + "success_rate.epoch.env.agentgym:alfworld": 0.8861209964412812, + "success_rate.epoch.env.agentgym:sciworld": 0.9746376811594203, + "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, + "success_rate.epoch.env.babyai": 0.9791666666666666, + "success_rate.epoch.env.ded": 0.947103274559194, + "success_rate.epoch.env.logic": 0.89609375, + "success_rate.epoch.env.math": 0.9596122778675282, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.7956610964526531, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.867379486243681, + "success_rate.epoch.global": 0.8805045871559632, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.9, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9982199367088608, + "tokens_p.mean_in_band": 0.6875, + "tokens_rate.above_band": 0.9875, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0125 + }, + { + "epoch": 0.9309757136770345, + "grad_norm": 32.798710622569736, + "learning_rate": 3.910585997833135e-07, + "loss": 0.2729, + "step": 4370, + "success_rate.epoch.env.abd": 0.9873417721518988, + "success_rate.epoch.env.agentgym:alfworld": 0.8865248226950354, + "success_rate.epoch.env.agentgym:sciworld": 0.9746376811594203, + "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, + "success_rate.epoch.env.babyai": 0.9791666666666666, + "success_rate.epoch.env.ded": 0.9472361809045227, + "success_rate.epoch.env.logic": 0.8962558502340093, + "success_rate.epoch.env.math": 0.959628582963262, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.7956076134699853, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8674425573734996, + "success_rate.epoch.global": 0.8805269186712485, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9583333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9995300751879699, + "tokens_p.mean_in_band": 0.521875, + "tokens_rate.above_band": 0.9851851851851852, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.014814814814814815 + }, + { + "epoch": 0.9320409032807839, + "grad_norm": 74.0998821619742, + "learning_rate": 3.9103706991133397e-07, + "loss": 0.261, + "step": 4375, + "success_rate.epoch.env.abd": 0.9873417721518988, + "success_rate.epoch.env.agentgym:alfworld": 0.8865248226950354, + "success_rate.epoch.env.agentgym:sciworld": 0.9711191335740073, + "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, + "success_rate.epoch.env.babyai": 0.9791666666666666, + "success_rate.epoch.env.ded": 0.9472361809045227, + "success_rate.epoch.env.logic": 0.8964174454828661, + "success_rate.epoch.env.math": 0.9596774193548387, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.7957870099473376, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.867158128330988, + "success_rate.epoch.global": 0.8805355303810505, + "success_rate.window.env.agentgym:sciworld": 0.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.75, + "success_rate.window.global": 0.8888888888888888, + "tokens_p.mean_above_band": 0.9956293706293706, + "tokens_p.mean_in_band": 0.62890625, + "tokens_rate.above_band": 0.9862068965517241, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.013793103448275862 + }, + { + "epoch": 0.9331060928845335, + "grad_norm": 207.2327266580442, + "learning_rate": 3.91015517419761e-07, + "loss": 0.3245, + "step": 4380, + "success_rate.epoch.env.abd": 0.9873417721518988, + "success_rate.epoch.env.agentgym:alfworld": 0.8869257950530035, + "success_rate.epoch.env.agentgym:sciworld": 0.9711191335740073, + "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, + "success_rate.epoch.env.babyai": 0.9791666666666666, + "success_rate.epoch.env.ded": 0.9472361809045227, + "success_rate.epoch.env.logic": 0.8965785381026439, + "success_rate.epoch.env.math": 0.9596936719064894, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.7961448598130841, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8672432344578194, + "success_rate.epoch.global": 0.8806720768087781, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9974464980544747, + "tokens_p.mean_in_band": 0.716796875, + "tokens_rate.above_band": 0.9922779922779923, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.007722007722007722 + }, + { + "epoch": 0.934171282488283, + "grad_norm": 127.374653904594, + "learning_rate": 3.909939423243018e-07, + "loss": 0.2777, + "step": 4385, + "success_rate.epoch.env.abd": 0.9873417721518988, + "success_rate.epoch.env.agentgym:alfworld": 0.8869257950530035, + "success_rate.epoch.env.agentgym:sciworld": 0.9712230215827338, + "success_rate.epoch.env.agentgym:textcraft": 0.9827586206896551, + "success_rate.epoch.env.babyai": 0.9791666666666666, + "success_rate.epoch.env.ded": 0.9472361809045227, + "success_rate.epoch.env.logic": 0.8967391304347826, + "success_rate.epoch.env.math": 0.959323399113975, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.7960910151691949, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8672562202072047, + "success_rate.epoch.global": 0.8805799748829775, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.5, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.85, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9961376404494382, + "tokens_p.mean_in_band": 0.5571732954545454, + "tokens_rate.above_band": 0.89, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.11 + }, + { + "epoch": 0.9352364720920324, + "grad_norm": 187.94048768209956, + "learning_rate": 3.9097234464068015e-07, + "loss": 0.3394, + "step": 4390, + "success_rate.epoch.env.abd": 0.9873417721518988, + "success_rate.epoch.env.agentgym:alfworld": 0.8869257950530035, + "success_rate.epoch.env.agentgym:sciworld": 0.9712230215827338, + "success_rate.epoch.env.agentgym:textcraft": 0.9827586206896551, + "success_rate.epoch.env.babyai": 0.9791666666666666, + "success_rate.epoch.env.ded": 0.9473684210526315, + "success_rate.epoch.env.logic": 0.8969790859798605, + "success_rate.epoch.env.math": 0.9593888218737434, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.7962099125364431, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8673068125544959, + "success_rate.epoch.global": 0.8807161591971718, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9989265267175572, + "tokens_p.mean_in_band": 0.7578125, + "tokens_rate.above_band": 0.9924242424242424, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.007575757575757576 + }, + { + "epoch": 0.9363016616957819, + "grad_norm": 216.5611589905703, + "learning_rate": 3.909507243846363e-07, + "loss": 0.1997, + "step": 4395, + "success_rate.epoch.env.abd": 0.9873417721518988, + "success_rate.epoch.env.agentgym:alfworld": 0.8869257950530035, + "success_rate.epoch.env.agentgym:sciworld": 0.9712230215827338, + "success_rate.epoch.env.agentgym:textcraft": 0.9827586206896551, + "success_rate.epoch.env.babyai": 0.9791666666666666, + "success_rate.epoch.env.ded": 0.9473684210526315, + "success_rate.epoch.env.logic": 0.897138437741686, + "success_rate.epoch.env.math": 0.9594377510040161, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.7965065502183406, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.867352714243041, + "success_rate.epoch.global": 0.8808520332611914, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9928125, + "tokens_p.mean_in_band": 0.8154296875, + "tokens_rate.above_band": 0.9615384615384616, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.038461538461538464 + }, + { + "epoch": 0.9373668512995313, + "grad_norm": 131.85835247237105, + "learning_rate": 3.9092908157192694e-07, + "loss": 0.3984, + "step": 4400, + "success_rate.epoch.env.abd": 0.9873417721518988, + "success_rate.epoch.env.agentgym:alfworld": 0.8869257950530035, + "success_rate.epoch.env.agentgym:sciworld": 0.9712230215827338, + "success_rate.epoch.env.agentgym:textcraft": 0.9827586206896551, + "success_rate.epoch.env.babyai": 0.9795918367346939, + "success_rate.epoch.env.ded": 0.9473684210526315, + "success_rate.epoch.env.logic": 0.8972972972972973, + "success_rate.epoch.env.math": 0.9590690208667737, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.7968023255813953, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8673991755929911, + "success_rate.epoch.global": 0.8808738195471613, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.5, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9954861111111111, + "tokens_p.mean_in_band": 0.283203125, + "tokens_rate.above_band": 0.9782608695652174, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.021739130434782608 + }, + { + "epoch": 0.9384320409032808, + "grad_norm": 232.27565672408662, + "learning_rate": 3.9090741621832517e-07, + "loss": 0.2866, + "step": 4405, + "success_rate.epoch.env.abd": 0.9873417721518988, + "success_rate.epoch.env.agentgym:alfworld": 0.8873239436619719, + "success_rate.epoch.env.agentgym:sciworld": 0.9712230215827338, + "success_rate.epoch.env.agentgym:textcraft": 0.9827586206896551, + "success_rate.epoch.env.babyai": 0.9795918367346939, + "success_rate.epoch.env.ded": 0.9473684210526315, + "success_rate.epoch.env.logic": 0.8972972972972973, + "success_rate.epoch.env.math": 0.9591509811774129, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.7970383275261324, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8674642765806588, + "success_rate.epoch.global": 0.8810092055915445, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.995816256830601, + "tokens_p.mean_in_band": 0.794921875, + "tokens_rate.above_band": 0.9891891891891892, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.010810810810810811 + }, + { + "epoch": 0.9394972305070303, + "grad_norm": 128.26299991951106, + "learning_rate": 3.908857283396206e-07, + "loss": 0.2535, + "step": 4410, + "success_rate.epoch.env.abd": 0.9874055415617129, + "success_rate.epoch.env.agentgym:alfworld": 0.8873239436619719, + "success_rate.epoch.env.agentgym:sciworld": 0.9713261648745519, + "success_rate.epoch.env.agentgym:textcraft": 0.9827586206896551, + "success_rate.epoch.env.babyai": 0.9795918367346939, + "success_rate.epoch.env.ded": 0.9473684210526315, + "success_rate.epoch.env.logic": 0.89745566692367, + "success_rate.epoch.env.math": 0.9591836734693877, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.7972149695387293, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8675128780927112, + "success_rate.epoch.global": 0.8811442842547395, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9966577540106952, + "tokens_p.mean_in_band": 0.86328125, + "tokens_rate.above_band": 0.9842105263157894, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.015789473684210527 + }, + { + "epoch": 0.9405624201107797, + "grad_norm": 72.79858714010084, + "learning_rate": 3.908640179516192e-07, + "loss": 0.2646, + "step": 4415, + "success_rate.epoch.env.abd": 0.9874055415617129, + "success_rate.epoch.env.agentgym:alfworld": 0.8873239436619719, + "success_rate.epoch.env.agentgym:sciworld": 0.9713261648745519, + "success_rate.epoch.env.agentgym:textcraft": 0.9827586206896551, + "success_rate.epoch.env.babyai": 0.9795918367346939, + "success_rate.epoch.env.ded": 0.9476309226932669, + "success_rate.epoch.env.logic": 0.8969230769230769, + "success_rate.epoch.env.math": 0.9592, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.7971602434077079, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8674848337326778, + "success_rate.epoch.global": 0.8810522735003968, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.6666666666666666, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.8541666666666666, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9994762569832403, + "tokens_p.mean_in_band": 0.5894886363636364, + "tokens_rate.above_band": 0.9701897018970189, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02981029810298103 + }, + { + "epoch": 0.9416276097145292, + "grad_norm": 172.1469900938378, + "learning_rate": 3.908422850701432e-07, + "loss": 0.234, + "step": 4420, + "success_rate.epoch.env.abd": 0.9874055415617129, + "success_rate.epoch.env.agentgym:alfworld": 0.8873239436619719, + "success_rate.epoch.env.agentgym:sciworld": 0.9713261648745519, + "success_rate.epoch.env.agentgym:textcraft": 0.9827586206896551, + "success_rate.epoch.env.babyai": 0.9795918367346939, + "success_rate.epoch.env.ded": 0.9476309226932669, + "success_rate.epoch.env.logic": 0.897239263803681, + "success_rate.epoch.env.math": 0.9592651757188498, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.7972777295105705, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8675301836147067, + "success_rate.epoch.global": 0.8811869973949484, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9918154761904762, + "tokens_p.mean_in_band": 0.7373046875, + "tokens_rate.above_band": 0.9545454545454546, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.045454545454545456 + }, + { + "epoch": 0.9426927993182787, + "grad_norm": 46.584986789611, + "learning_rate": 3.9082052971103155e-07, + "loss": 0.2454, + "step": 4425, + "success_rate.epoch.env.abd": 0.9874055415617129, + "success_rate.epoch.env.agentgym:alfworld": 0.887719298245614, + "success_rate.epoch.env.agentgym:sciworld": 0.9713261648745519, + "success_rate.epoch.env.agentgym:textcraft": 0.9827586206896551, + "success_rate.epoch.env.babyai": 0.9795918367346939, + "success_rate.epoch.env.ded": 0.9476309226932669, + "success_rate.epoch.env.logic": 0.8966309341500766, + "success_rate.epoch.env.math": 0.9593139210211408, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.7972230257448655, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8675102805662178, + "success_rate.epoch.global": 0.8810951465097862, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.8125, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9986056430446194, + "tokens_p.mean_in_band": 0.7485608552631579, + "tokens_rate.above_band": 0.9525, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0475 + }, + { + "epoch": 0.9437579889220281, + "grad_norm": 133.07355614365466, + "learning_rate": 3.907987518901393e-07, + "loss": 0.2971, + "step": 4430, + "success_rate.epoch.env.abd": 0.9874371859296482, + "success_rate.epoch.env.agentgym:alfworld": 0.887719298245614, + "success_rate.epoch.env.agentgym:sciworld": 0.9713261648745519, + "success_rate.epoch.env.agentgym:textcraft": 0.9827586206896551, + "success_rate.epoch.env.babyai": 0.9795918367346939, + "success_rate.epoch.env.ded": 0.9478908188585607, + "success_rate.epoch.env.logic": 0.8967100229533282, + "success_rate.epoch.env.math": 0.959378733572282, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.7970511708586296, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8675342430208893, + "success_rate.epoch.global": 0.8811165103401515, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.9, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9985795454545454, + "tokens_p.mean_in_band": 0.47574013157894735, + "tokens_rate.above_band": 0.9418960244648318, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0581039755351682 + }, + { + "epoch": 0.9448231785257776, + "grad_norm": 85.39737039498038, + "learning_rate": 3.90776951623338e-07, + "loss": 0.1705, + "step": 4435, + "success_rate.epoch.env.abd": 0.9874371859296482, + "success_rate.epoch.env.agentgym:alfworld": 0.8881118881118881, + "success_rate.epoch.env.agentgym:sciworld": 0.9713261648745519, + "success_rate.epoch.env.agentgym:textcraft": 0.9827586206896551, + "success_rate.epoch.env.babyai": 0.9795918367346939, + "success_rate.epoch.env.ded": 0.9478908188585607, + "success_rate.epoch.env.logic": 0.8968678380443086, + "success_rate.epoch.env.math": 0.9594272076372315, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.7972855905284435, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8676099974474365, + "success_rate.epoch.global": 0.8812507054972345, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9948642061281338, + "tokens_p.mean_in_band": 0.779296875, + "tokens_rate.above_band": 0.9944598337950139, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.00554016620498615 + }, + { + "epoch": 0.945888368129527, + "grad_norm": 180.09039020012654, + "learning_rate": 3.907551289265156e-07, + "loss": 0.7679, + "step": 4440, + "success_rate.epoch.env.abd": 0.9874686716791979, + "success_rate.epoch.env.agentgym:alfworld": 0.8881118881118881, + "success_rate.epoch.env.agentgym:sciworld": 0.9713261648745519, + "success_rate.epoch.env.agentgym:textcraft": 0.9827586206896551, + "success_rate.epoch.env.babyai": 0.9795918367346939, + "success_rate.epoch.env.ded": 0.948019801980198, + "success_rate.epoch.env.logic": 0.8968678380443086, + "success_rate.epoch.env.math": 0.9594916600476568, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.797231035477358, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8676254852865752, + "success_rate.epoch.global": 0.8812718457548765, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9375, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 1.0003378378378378, + "tokens_p.mean_in_band": 0.6696428571428571, + "tokens_rate.above_band": 0.9906291834002677, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.009370816599732263 + }, + { + "epoch": 0.9469535577332765, + "grad_norm": 418.2889210377602, + "learning_rate": 3.9073328381557616e-07, + "loss": 0.5479, + "step": 4445, + "success_rate.epoch.env.abd": 0.9874686716791979, + "success_rate.epoch.env.agentgym:alfworld": 0.8881118881118881, + "success_rate.epoch.env.agentgym:sciworld": 0.9714285714285714, + "success_rate.epoch.env.agentgym:textcraft": 0.9827586206896551, + "success_rate.epoch.env.babyai": 0.9795918367346939, + "success_rate.epoch.env.ded": 0.948019801980198, + "success_rate.epoch.env.logic": 0.8971036585365854, + "success_rate.epoch.env.math": 0.9595238095238096, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.7971181556195965, + "success_rate.epoch.env.webshop": 0.9795918367346939, + "success_rate.epoch.env_macro_mean": 0.8676875458986402, + "success_rate.epoch.global": 0.8812929383939633, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9980730563002681, + "tokens_p.mean_in_band": 0.53359375, + "tokens_rate.above_band": 0.9867724867724867, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.013227513227513227 + }, + { + "epoch": 0.948018747337026, + "grad_norm": 134.60925717271044, + "learning_rate": 3.907114163064404e-07, + "loss": 0.3851, + "step": 4450, + "success_rate.epoch.env.abd": 0.9874686716791979, + "success_rate.epoch.env.agentgym:alfworld": 0.8881118881118881, + "success_rate.epoch.env.agentgym:sciworld": 0.9714285714285714, + "success_rate.epoch.env.agentgym:textcraft": 0.9827586206896551, + "success_rate.epoch.env.babyai": 0.9795918367346939, + "success_rate.epoch.env.ded": 0.9484029484029484, + "success_rate.epoch.env.logic": 0.8964992389649924, + "success_rate.epoch.env.math": 0.9595879556259905, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.7971766061653702, + "success_rate.epoch.env.webshop": 0.9795918367346939, + "success_rate.epoch.env_macro_mean": 0.8676785753076505, + "success_rate.epoch.global": 0.8813139835752053, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9990942028985508, + "tokens_p.mean_in_band": 0.6746651785714286, + "tokens_rate.above_band": 0.9857142857142858, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.014285714285714285 + }, + { + "epoch": 0.9490839369407754, + "grad_norm": 171.1407566587678, + "learning_rate": 3.906895264150451e-07, + "loss": 0.3907, + "step": 4455, + "success_rate.epoch.env.abd": 0.9874686716791979, + "success_rate.epoch.env.agentgym:alfworld": 0.8885017421602788, + "success_rate.epoch.env.agentgym:sciworld": 0.9715302491103203, + "success_rate.epoch.env.agentgym:textcraft": 0.9827586206896551, + "success_rate.epoch.env.babyai": 0.9795918367346939, + "success_rate.epoch.env.ded": 0.9484029484029484, + "success_rate.epoch.env.logic": 0.8967350037965072, + "success_rate.epoch.env.math": 0.9596039603960396, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.7971223021582734, + "success_rate.epoch.env.webshop": 0.9795918367346939, + "success_rate.epoch.env_macro_mean": 0.8677412114280691, + "success_rate.epoch.global": 0.8813349814585909, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9980274086378738, + "tokens_p.mean_in_band": 0.587890625, + "tokens_rate.above_band": 0.9804560260586319, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.019543973941368076 + }, + { + "epoch": 0.9501491265445249, + "grad_norm": 120.35469660672833, + "learning_rate": 3.906676141573435e-07, + "loss": 0.3803, + "step": 4460, + "success_rate.epoch.env.abd": 0.9875311720698254, + "success_rate.epoch.env.agentgym:alfworld": 0.8885017421602788, + "success_rate.epoch.env.agentgym:sciworld": 0.9716312056737588, + "success_rate.epoch.env.agentgym:textcraft": 0.9827586206896551, + "success_rate.epoch.env.babyai": 0.9795918367346939, + "success_rate.epoch.env.ded": 0.9484029484029484, + "success_rate.epoch.env.logic": 0.8967350037965072, + "success_rate.epoch.env.math": 0.9596518987341772, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.7973555619430871, + "success_rate.epoch.env.webshop": 0.9795918367346939, + "success_rate.epoch.env_macro_mean": 0.867781634616889, + "success_rate.epoch.global": 0.8814681782467167, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9975868725868726, + "tokens_p.mean_in_band": 0.8385416666666666, + "tokens_rate.above_band": 0.9885496183206107, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.011450381679389313 + }, + { + "epoch": 0.9512143161482743, + "grad_norm": 179.98681087678486, + "learning_rate": 3.9064567954930506e-07, + "loss": 0.1955, + "step": 4465, + "success_rate.epoch.env.abd": 0.9875311720698254, + "success_rate.epoch.env.agentgym:alfworld": 0.8888888888888888, + "success_rate.epoch.env.agentgym:sciworld": 0.9716312056737588, + "success_rate.epoch.env.agentgym:textcraft": 0.9827586206896551, + "success_rate.epoch.env.babyai": 0.9795918367346939, + "success_rate.epoch.env.ded": 0.9485294117647058, + "success_rate.epoch.env.logic": 0.8968133535660091, + "success_rate.epoch.env.math": 0.9597156398104265, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.7975301550832855, + "success_rate.epoch.env.webshop": 0.9795918367346939, + "success_rate.epoch.env_macro_mean": 0.867857115896554, + "success_rate.epoch.global": 0.8816010763538513, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9995535714285714, + "tokens_p.mean_in_band": 0.708984375, + "tokens_rate.above_band": 0.9955555555555555, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0044444444444444444 + }, + { + "epoch": 0.9522795057520239, + "grad_norm": 190.29942577057642, + "learning_rate": 3.906237226069156e-07, + "loss": 0.3317, + "step": 4470, + "success_rate.epoch.env.abd": 0.9875621890547264, + "success_rate.epoch.env.agentgym:alfworld": 0.889273356401384, + "success_rate.epoch.env.agentgym:sciworld": 0.9717314487632509, + "success_rate.epoch.env.agentgym:textcraft": 0.9833333333333333, + "success_rate.epoch.env.babyai": 0.9795918367346939, + "success_rate.epoch.env.ded": 0.9486552567237164, + "success_rate.epoch.env.logic": 0.8968133535660091, + "success_rate.epoch.env.math": 0.9597156398104265, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.7977044476327116, + "success_rate.epoch.env.webshop": 0.9795918367346939, + "success_rate.epoch.env_macro_mean": 0.8679835320546453, + "success_rate.epoch.global": 0.8817204301075269, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9971496683250415, + "tokens_p.mean_in_band": 0.78125, + "tokens_rate.above_band": 0.9934102141680395, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.006589785831960461 + }, + { + "epoch": 0.9533446953557734, + "grad_norm": 65.00760099139049, + "learning_rate": 3.906017433461772e-07, + "loss": 0.2975, + "step": 4475, + "success_rate.epoch.env.abd": 0.9875621890547264, + "success_rate.epoch.env.agentgym:alfworld": 0.889273356401384, + "success_rate.epoch.env.agentgym:sciworld": 0.9719298245614035, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.9795918367346939, + "success_rate.epoch.env.ded": 0.948780487804878, + "success_rate.epoch.env.logic": 0.8968915845337376, + "success_rate.epoch.env.math": 0.9597633136094674, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.7975336965873243, + "success_rate.epoch.env.webshop": 0.9795918367346939, + "success_rate.epoch.env_macro_mean": 0.8680337124768646, + "success_rate.epoch.global": 0.8817408816289998, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9986940298507463, + "tokens_p.mean_in_band": 0.55859375, + "tokens_rate.above_band": 0.9766763848396501, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.023323615160349854 + }, + { + "epoch": 0.9544098849595228, + "grad_norm": 309.78135810933054, + "learning_rate": 3.905797417831081e-07, + "loss": 0.4408, + "step": 4480, + "success_rate.epoch.env.abd": 0.9875621890547264, + "success_rate.epoch.env.agentgym:alfworld": 0.889273356401384, + "success_rate.epoch.env.agentgym:sciworld": 0.972027972027972, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.9795918367346939, + "success_rate.epoch.env.ded": 0.948905109489051, + "success_rate.epoch.env.logic": 0.896969696969697, + "success_rate.epoch.env.math": 0.9598108747044918, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.7977077363896848, + "success_rate.epoch.env.webshop": 0.98, + "success_rate.epoch.env_macro_mean": 0.8681183166359002, + "success_rate.epoch.global": 0.8818730442556996, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 1.0003551136363635, + "tokens_p.mean_in_band": 0.75390625, + "tokens_rate.above_band": 0.9956483899042646, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.004351610095735422 + }, + { + "epoch": 0.9554750745632723, + "grad_norm": 31.83015682785597, + "learning_rate": 3.90557717933743e-07, + "loss": 0.2196, + "step": 4485, + "success_rate.epoch.env.abd": 0.9875621890547264, + "success_rate.epoch.env.agentgym:alfworld": 0.889273356401384, + "success_rate.epoch.env.agentgym:sciworld": 0.9721254355400697, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.9795918367346939, + "success_rate.epoch.env.ded": 0.948905109489051, + "success_rate.epoch.env.logic": 0.8970476911430735, + "success_rate.epoch.env.math": 0.9599056603773585, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.797823596792669, + "success_rate.epoch.env.webshop": 0.98, + "success_rate.epoch.env_macro_mean": 0.8681534169778389, + "success_rate.epoch.global": 0.882004911810672, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9946428571428572, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 0.9565402641670218, + "grad_norm": 310.69052213980814, + "learning_rate": 3.905356718141327e-07, + "loss": 0.4136, + "step": 4490, + "success_rate.epoch.env.abd": 0.9875621890547264, + "success_rate.epoch.env.agentgym:alfworld": 0.8862068965517241, + "success_rate.epoch.env.agentgym:sciworld": 0.972318339100346, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.9795918367346939, + "success_rate.epoch.env.ded": 0.948905109489051, + "success_rate.epoch.env.logic": 0.8965256797583081, + "success_rate.epoch.env.math": 0.959937156323645, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.7979393245563824, + "success_rate.epoch.env.webshop": 0.98, + "success_rate.epoch.env_macro_mean": 0.8678581129810982, + "success_rate.epoch.global": 0.8819134701159679, + "success_rate.window.env.agentgym:alfworld": 0.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 0.6666666666666666, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.7333333333333333, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9978971962616823, + "tokens_p.mean_in_band": 0.45951021634615385, + "tokens_rate.above_band": 0.9762773722627737, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.023722627737226276 + }, + { + "epoch": 0.9576054537707712, + "grad_norm": 70.41276784357738, + "learning_rate": 3.905136034403441e-07, + "loss": 0.26, + "step": 4495, + "success_rate.epoch.env.abd": 0.9875621890547264, + "success_rate.epoch.env.agentgym:alfworld": 0.8865979381443299, + "success_rate.epoch.env.agentgym:sciworld": 0.972318339100346, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.9795918367346939, + "success_rate.epoch.env.ded": 0.948905109489051, + "success_rate.epoch.env.logic": 0.8967596081386586, + "success_rate.epoch.env.math": 0.9599686028257457, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.7981703830760435, + "success_rate.epoch.env.webshop": 0.98, + "success_rate.epoch.env_macro_mean": 0.8679387925260724, + "success_rate.epoch.global": 0.8820449988861662, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9990150429799427, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 0.9586706433745207, + "grad_norm": 192.62387585988628, + "learning_rate": 3.9049151282846067e-07, + "loss": 0.355, + "step": 4500, + "success_rate.epoch.env.abd": 0.9875621890547264, + "success_rate.epoch.env.agentgym:alfworld": 0.8865979381443299, + "success_rate.epoch.env.agentgym:sciworld": 0.972318339100346, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.9795918367346939, + "success_rate.epoch.env.ded": 0.948905109489051, + "success_rate.epoch.env.logic": 0.8968373493975904, + "success_rate.epoch.env.math": 0.9600470035252644, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.7981153626499143, + "success_rate.epoch.env.webshop": 0.98, + "success_rate.epoch.env_macro_mean": 0.8679479853926471, + "success_rate.epoch.global": 0.8820649755229194, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9938226744186046, + "tokens_p.mean_in_band": 0.57109375, + "tokens_rate.above_band": 0.945054945054945, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.054945054945054944 + }, + { + "epoch": 0.9597358329782701, + "grad_norm": 212.75497605821852, + "learning_rate": 3.904693999945818e-07, + "loss": 0.2537, + "step": 4505, + "success_rate.epoch.env.abd": 0.9875621890547264, + "success_rate.epoch.env.agentgym:alfworld": 0.8865979381443299, + "success_rate.epoch.env.agentgym:sciworld": 0.972318339100346, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.9795918367346939, + "success_rate.epoch.env.ded": 0.948905109489051, + "success_rate.epoch.env.logic": 0.8969924812030076, + "success_rate.epoch.env.math": 0.960093896713615, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.7978329056173368, + "success_rate.epoch.env.webshop": 0.98, + "success_rate.epoch.env_macro_mean": 0.8679406733891191, + "success_rate.epoch.global": 0.881973771949322, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6, + "success_rate.window.env_macro_mean": 0.8666666666666667, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9881578947368421, + "tokens_p.mean_in_band": 0.561141304347826, + "tokens_rate.above_band": 0.8050847457627118, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.19491525423728814 + }, + { + "epoch": 0.9608010225820196, + "grad_norm": 63.52016339492654, + "learning_rate": 3.9044726495482316e-07, + "loss": 0.1731, + "step": 4510, + "success_rate.epoch.env.abd": 0.9875621890547264, + "success_rate.epoch.env.agentgym:alfworld": 0.8865979381443299, + "success_rate.epoch.env.agentgym:sciworld": 0.9724137931034482, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.9795918367346939, + "success_rate.epoch.env.ded": 0.948905109489051, + "success_rate.epoch.env.logic": 0.8970698722764838, + "success_rate.epoch.env.math": 0.9601095033242081, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.7979510529311327, + "success_rate.epoch.env.webshop": 0.98, + "success_rate.epoch.env_macro_mean": 0.8679685460255705, + "success_rate.epoch.global": 0.88199378330373, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8571428571428571, + "success_rate.window.env_macro_mean": 0.9642857142857143, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9947916666666666, + "tokens_p.mean_in_band": 0.5911458333333334, + "tokens_rate.above_band": 0.9615384615384616, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.038461538461538464 + }, + { + "epoch": 0.9618662121857691, + "grad_norm": 143.65631179993076, + "learning_rate": 3.904251077253166e-07, + "loss": 0.3271, + "step": 4515, + "success_rate.epoch.env.abd": 0.9875621890547264, + "success_rate.epoch.env.agentgym:alfworld": 0.8865979381443299, + "success_rate.epoch.env.agentgym:sciworld": 0.9724137931034482, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.9795918367346939, + "success_rate.epoch.env.ded": 0.9490291262135923, + "success_rate.epoch.env.logic": 0.8972243060765192, + "success_rate.epoch.env.math": 0.960171807887544, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.7978390673869775, + "success_rate.epoch.env.webshop": 0.98, + "success_rate.epoch.env_macro_mean": 0.8679893432568213, + "success_rate.epoch.global": 0.8820137502772233, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9972457627118644, + "tokens_p.mean_in_band": 0.6216947115384616, + "tokens_rate.above_band": 0.9577922077922078, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.04220779220779221 + }, + { + "epoch": 0.9629314017895185, + "grad_norm": 46.06446517638285, + "learning_rate": 3.904029283222102e-07, + "loss": 0.2877, + "step": 4520, + "success_rate.epoch.env.abd": 0.9875621890547264, + "success_rate.epoch.env.agentgym:alfworld": 0.8865979381443299, + "success_rate.epoch.env.agentgym:sciworld": 0.9724137931034482, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.9795918367346939, + "success_rate.epoch.env.ded": 0.9491525423728814, + "success_rate.epoch.env.logic": 0.8973013493253373, + "success_rate.epoch.env.math": 0.9602028872415138, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.7981833664490491, + "success_rate.epoch.env.webshop": 0.98, + "success_rate.epoch.env_macro_mean": 0.8680416921499255, + "success_rate.epoch.global": 0.8821444395214887, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9977605863192183, + "tokens_p.mean_in_band": 0.662109375, + "tokens_rate.above_band": 0.9903225806451613, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.00967741935483871 + }, + { + "epoch": 0.963996591393268, + "grad_norm": 103.31495680208405, + "learning_rate": 3.9038072676166814e-07, + "loss": 0.1764, + "step": 4525, + "success_rate.epoch.env.abd": 0.9875621890547264, + "success_rate.epoch.env.agentgym:alfworld": 0.8865979381443299, + "success_rate.epoch.env.agentgym:sciworld": 0.9724137931034482, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.9795918367346939, + "success_rate.epoch.env.ded": 0.9491525423728814, + "success_rate.epoch.env.logic": 0.897378277153558, + "success_rate.epoch.env.math": 0.9602958349552355, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.7983550765740216, + "success_rate.epoch.env.webshop": 0.98, + "success_rate.epoch.env_macro_mean": 0.8680727453923726, + "success_rate.epoch.global": 0.8822748395662757, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9926470588235294, + "tokens_p.mean_in_band": 0.840625, + "tokens_rate.above_band": 0.9444444444444444, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.05555555555555555 + }, + { + "epoch": 0.9650617809970174, + "grad_norm": 122.40428764586784, + "learning_rate": 3.903585030598707e-07, + "loss": 0.4268, + "step": 4530, + "success_rate.epoch.env.abd": 0.9875930521091811, + "success_rate.epoch.env.agentgym:alfworld": 0.8865979381443299, + "success_rate.epoch.env.agentgym:sciworld": 0.9724137931034482, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.9795918367346939, + "success_rate.epoch.env.ded": 0.9492753623188406, + "success_rate.epoch.env.logic": 0.8974550898203593, + "success_rate.epoch.env.math": 0.9603112840466926, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.79841449603624, + "success_rate.epoch.env.webshop": 0.98, + "success_rate.epoch.env_macro_mean": 0.8681005057760899, + "success_rate.epoch.global": 0.8822944297082228, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env_macro_mean": 0.9666666666666668, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9969347133757962, + "tokens_p.mean_in_band": 0.3919270833333333, + "tokens_rate.above_band": 0.9849435382685069, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.015056461731493099 + }, + { + "epoch": 0.9661269706007669, + "grad_norm": 162.64072871959314, + "learning_rate": 3.903362572330144e-07, + "loss": 0.1943, + "step": 4535, + "success_rate.epoch.env.abd": 0.9876237623762376, + "success_rate.epoch.env.agentgym:alfworld": 0.8865979381443299, + "success_rate.epoch.env.agentgym:sciworld": 0.9725085910652921, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.9795918367346939, + "success_rate.epoch.env.ded": 0.9492753623188406, + "success_rate.epoch.env.logic": 0.8968609865470852, + "success_rate.epoch.env.math": 0.9603421461897356, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.7986425339366516, + "success_rate.epoch.env.webshop": 0.98, + "success_rate.epoch.env_macro_mean": 0.8680814425941882, + "success_rate.epoch.global": 0.8823139765952749, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9959302325581395, + "tokens_p.mean_in_band": 0.7574869791666666, + "tokens_rate.above_band": 0.947136563876652, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.05286343612334802 + }, + { + "epoch": 0.9671921602045164, + "grad_norm": 97.4979954005614, + "learning_rate": 3.9031398929731187e-07, + "loss": 0.3347, + "step": 4540, + "success_rate.epoch.env.abd": 0.9876237623762376, + "success_rate.epoch.env.agentgym:alfworld": 0.8865979381443299, + "success_rate.epoch.env.agentgym:sciworld": 0.9725085910652921, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.9795918367346939, + "success_rate.epoch.env.ded": 0.9492753623188406, + "success_rate.epoch.env.logic": 0.8970149253731343, + "success_rate.epoch.env.math": 0.9603883495145631, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.798079638520192, + "success_rate.epoch.env.webshop": 0.98, + "success_rate.epoch.env_macro_mean": 0.8680484650245895, + "success_rate.epoch.global": 0.8821129245699162, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.4, + "success_rate.window.env_macro_mean": 0.7999999999999999, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9964788732394366, + "tokens_p.mean_below_band": 3.510081114654895e-12, + "tokens_p.mean_in_band": 0.5606971153846154, + "tokens_rate.above_band": 0.9102564102564102, + "tokens_rate.below_band": 0.00641025641025641, + "tokens_rate.in_band": 0.08333333333333333 + }, + { + "epoch": 0.9682573498082658, + "grad_norm": 88.78830832046516, + "learning_rate": 3.9029169926899173e-07, + "loss": 0.1327, + "step": 4545, + "success_rate.epoch.env.abd": 0.9876237623762376, + "success_rate.epoch.env.agentgym:alfworld": 0.8865979381443299, + "success_rate.epoch.env.agentgym:sciworld": 0.9726962457337884, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.9795918367346939, + "success_rate.epoch.env.ded": 0.9492753623188406, + "success_rate.epoch.env.logic": 0.8971684053651267, + "success_rate.epoch.env.math": 0.9604804339403332, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.798079638520192, + "success_rate.epoch.env.webshop": 0.98, + "success_rate.epoch.env_macro_mean": 0.8680878485778859, + "success_rate.epoch.global": 0.8822427847543511, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 1.0008423180592991, + "tokens_p.mean_in_band": 0.8212890625, + "tokens_rate.above_band": 0.9946380697050938, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.005361930294906166 + }, + { + "epoch": 0.9693225394120153, + "grad_norm": 71.49164727229137, + "learning_rate": 3.9026938716429883e-07, + "loss": 0.2396, + "step": 4550, + "success_rate.epoch.env.abd": 0.9876237623762376, + "success_rate.epoch.env.agentgym:alfworld": 0.886986301369863, + "success_rate.epoch.env.agentgym:sciworld": 0.9727891156462585, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.9795918367346939, + "success_rate.epoch.env.ded": 0.9492753623188406, + "success_rate.epoch.env.logic": 0.8965773809523809, + "success_rate.epoch.env.math": 0.9605110336817654, + "success_rate.epoch.env.sat": 0.1509433962264151, + "success_rate.epoch.env.science": 0.7982505643340858, + "success_rate.epoch.env.webshop": 0.98, + "success_rate.epoch.env_macro_mean": 0.867832301001599, + "success_rate.epoch.global": 0.8821522887323944, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.75, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 1.000680126002291, + "tokens_p.mean_in_band": 0.48931308962264153, + "tokens_rate.above_band": 0.9427645788336934, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.05723542116630669 + }, + { + "epoch": 0.9703877290157648, + "grad_norm": 340.58904556963864, + "learning_rate": 3.9024705299949416e-07, + "loss": 0.5943, + "step": 4555, + "success_rate.epoch.env.abd": 0.9876543209876543, + "success_rate.epoch.env.agentgym:alfworld": 0.8877551020408163, + "success_rate.epoch.env.agentgym:sciworld": 0.9727891156462585, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.9795918367346939, + "success_rate.epoch.env.ded": 0.9493975903614458, + "success_rate.epoch.env.logic": 0.8965773809523809, + "success_rate.epoch.env.math": 0.9605720912253576, + "success_rate.epoch.env.sat": 0.1509433962264151, + "success_rate.epoch.env.science": 0.7983643542019176, + "success_rate.epoch.env.webshop": 0.98, + "success_rate.epoch.env_macro_mean": 0.8679319768867263, + "success_rate.epoch.global": 0.8822818201802594, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9968580163043478, + "tokens_p.mean_in_band": 0.87890625, + "tokens_rate.above_band": 0.9986431478968792, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0013568521031207597 + }, + { + "epoch": 0.9714529186195143, + "grad_norm": 46.45539765360912, + "learning_rate": 3.902246967908546e-07, + "loss": 0.3957, + "step": 4560, + "success_rate.epoch.env.abd": 0.9876847290640394, + "success_rate.epoch.env.agentgym:alfworld": 0.8847457627118644, + "success_rate.epoch.env.agentgym:sciworld": 0.9727891156462585, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.9795918367346939, + "success_rate.epoch.env.ded": 0.9493975903614458, + "success_rate.epoch.env.logic": 0.8966542750929368, + "success_rate.epoch.env.math": 0.9606329602470088, + "success_rate.epoch.env.sat": 0.1509433962264151, + "success_rate.epoch.env.science": 0.7982530290222598, + "success_rate.epoch.env.webshop": 0.98, + "success_rate.epoch.env_macro_mean": 0.8676635684076338, + "success_rate.epoch.global": 0.882191480017567, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 0.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.7333333333333333, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9955929487179487, + "tokens_p.mean_in_band": 0.58203125, + "tokens_rate.above_band": 0.9811320754716981, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.018867924528301886 + }, + { + "epoch": 0.9725181082232638, + "grad_norm": 251.31241027828054, + "learning_rate": 3.902023185546732e-07, + "loss": 0.526, + "step": 4565, + "success_rate.epoch.env.abd": 0.9876847290640394, + "success_rate.epoch.env.agentgym:alfworld": 0.8847457627118644, + "success_rate.epoch.env.agentgym:sciworld": 0.9728813559322034, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.9795918367346939, + "success_rate.epoch.env.ded": 0.9493975903614458, + "success_rate.epoch.env.logic": 0.896807720861173, + "success_rate.epoch.env.math": 0.9602776706517547, + "success_rate.epoch.env.sat": 0.1509433962264151, + "success_rate.epoch.env.science": 0.7982554867754643, + "success_rate.epoch.env.webshop": 0.98, + "success_rate.epoch.env_macro_mean": 0.867653827881464, + "success_rate.epoch.global": 0.8821013380127221, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.5, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.825, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9983584630350194, + "tokens_p.mean_in_band": 0.3706597222222222, + "tokens_rate.above_band": 0.982791586998088, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.017208413001912046 + }, + { + "epoch": 0.9735832978270132, + "grad_norm": 19.46919745696725, + "learning_rate": 3.9017991830725907e-07, + "loss": 0.2465, + "step": 4570, + "success_rate.epoch.env.abd": 0.9876847290640394, + "success_rate.epoch.env.agentgym:alfworld": 0.8847457627118644, + "success_rate.epoch.env.agentgym:sciworld": 0.972972972972973, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.9795918367346939, + "success_rate.epoch.env.ded": 0.9493975903614458, + "success_rate.epoch.env.logic": 0.8962194217939214, + "success_rate.epoch.env.math": 0.9603235747303543, + "success_rate.epoch.env.sat": 0.1509433962264151, + "success_rate.epoch.env.science": 0.797920179876335, + "success_rate.epoch.env.webshop": 0.98, + "success_rate.epoch.env_macro_mean": 0.8675823656226446, + "success_rate.epoch.global": 0.8819018404907976, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.75, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9970509105960265, + "tokens_p.mean_below_band": 1.2759119272232056e-07, + "tokens_p.mean_in_band": 0.6207853618421053, + "tokens_rate.above_band": 0.9393468118195957, + "tokens_rate.below_band": 0.0015552099533437014, + "tokens_rate.in_band": 0.05909797822706065 + }, + { + "epoch": 0.9746484874307627, + "grad_norm": 264.0910856776589, + "learning_rate": 3.901574960649373e-07, + "loss": 0.2539, + "step": 4575, + "success_rate.epoch.env.abd": 0.9876847290640394, + "success_rate.epoch.env.agentgym:alfworld": 0.8847457627118644, + "success_rate.epoch.env.agentgym:sciworld": 0.972972972972973, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.9795918367346939, + "success_rate.epoch.env.ded": 0.9495192307692307, + "success_rate.epoch.env.logic": 0.8962962962962963, + "success_rate.epoch.env.math": 0.9603846153846154, + "success_rate.epoch.env.sat": 0.1509433962264151, + "success_rate.epoch.env.science": 0.7981471083660865, + "success_rate.epoch.env.webshop": 0.98, + "success_rate.epoch.env_macro_mean": 0.8676265914457513, + "success_rate.epoch.global": 0.8820310790107244, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9972474093264249, + "tokens_p.mean_in_band": 0.890625, + "tokens_rate.above_band": 0.9974160206718347, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.002583979328165375 + }, + { + "epoch": 0.9757136770345122, + "grad_norm": 67.71812632946968, + "learning_rate": 3.9013505184404924e-07, + "loss": 0.2042, + "step": 4580, + "success_rate.epoch.env.abd": 0.9877149877149877, + "success_rate.epoch.env.agentgym:alfworld": 0.8847457627118644, + "success_rate.epoch.env.agentgym:sciworld": 0.972972972972973, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.9795918367346939, + "success_rate.epoch.env.ded": 0.9496402877697842, + "success_rate.epoch.env.logic": 0.8965262379896526, + "success_rate.epoch.env.math": 0.9604150653343582, + "success_rate.epoch.env.sat": 0.1509433962264151, + "success_rate.epoch.env.science": 0.7980364656381487, + "success_rate.epoch.env.webshop": 0.98, + "success_rate.epoch.env_macro_mean": 0.8676539609518116, + "success_rate.epoch.global": 0.8820507214691736, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9967657342657342, + "tokens_p.mean_in_band": 0.578125, + "tokens_rate.above_band": 0.9821428571428571, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.017857142857142856 + }, + { + "epoch": 0.9767788666382616, + "grad_norm": 104.68609114344093, + "learning_rate": 3.901125856609519e-07, + "loss": 0.304, + "step": 4585, + "success_rate.epoch.env.abd": 0.9877450980392157, + "success_rate.epoch.env.agentgym:alfworld": 0.8851351351351351, + "success_rate.epoch.env.agentgym:sciworld": 0.972972972972973, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.9795918367346939, + "success_rate.epoch.env.ded": 0.9496402877697842, + "success_rate.epoch.env.logic": 0.8966789667896679, + "success_rate.epoch.env.math": 0.9604454685099847, + "success_rate.epoch.env.sat": 0.1509433962264151, + "success_rate.epoch.env.science": 0.7979260089686099, + "success_rate.epoch.env.webshop": 0.9803921568627451, + "success_rate.epoch.env_macro_mean": 0.8677343532169339, + "success_rate.epoch.global": 0.8820703210307927, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 0.9444444444444445, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9976968174204355, + "tokens_p.mean_in_band": 0.44921875, + "tokens_rate.above_band": 0.9933444259567388, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0066555740432612314 + }, + { + "epoch": 0.9778440562420111, + "grad_norm": 108.78841907739165, + "learning_rate": 3.900900975320185e-07, + "loss": 0.5862, + "step": 4590, + "success_rate.epoch.env.abd": 0.9877450980392157, + "success_rate.epoch.env.agentgym:alfworld": 0.8851351351351351, + "success_rate.epoch.env.agentgym:sciworld": 0.972972972972973, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.9795918367346939, + "success_rate.epoch.env.ded": 0.9473684210526315, + "success_rate.epoch.env.logic": 0.8966789667896679, + "success_rate.epoch.env.math": 0.9604606525911709, + "success_rate.epoch.env.sat": 0.1509433962264151, + "success_rate.epoch.env.science": 0.7975391498881432, + "success_rate.epoch.env.webshop": 0.9803921568627451, + "success_rate.epoch.env_macro_mean": 0.8674940312427128, + "success_rate.epoch.global": 0.881762652705061, + "success_rate.window.env.ded": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.625, + "success_rate.window.env_macro_mean": 0.5416666666666666, + "success_rate.window.global": 0.6, + "tokens_p.mean_above_band": 0.9961947737068966, + "tokens_p.mean_below_band": 8.307397365570068e-07, + "tokens_p.mean_in_band": 0.4822353957286432, + "tokens_rate.above_band": 0.9488752556237219, + "tokens_rate.below_band": 0.0002556237218813906, + "tokens_rate.in_band": 0.05086912065439673 + }, + { + "epoch": 0.9789092458457606, + "grad_norm": 63.496836056276955, + "learning_rate": 3.900675874736383e-07, + "loss": 0.306, + "step": 4595, + "success_rate.epoch.env.abd": 0.9877450980392157, + "success_rate.epoch.env.agentgym:alfworld": 0.8851351351351351, + "success_rate.epoch.env.agentgym:sciworld": 0.972972972972973, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.9795918367346939, + "success_rate.epoch.env.ded": 0.9473684210526315, + "success_rate.epoch.env.logic": 0.8968312453942521, + "success_rate.epoch.env.math": 0.9605061349693251, + "success_rate.epoch.env.sat": 0.1509433962264151, + "success_rate.epoch.env.science": 0.7972633342641721, + "success_rate.epoch.env.webshop": 0.9803921568627451, + "success_rate.epoch.env_macro_mean": 0.867486935366237, + "success_rate.epoch.global": 0.8816735672259751, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6, + "success_rate.window.env_macro_mean": 0.8666666666666667, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.989375, + "tokens_p.mean_in_band": 0.5602678571428571, + "tokens_rate.above_band": 0.8771929824561403, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.12280701754385964 + }, + { + "epoch": 0.97997443544951, + "grad_norm": 61.82864261391061, + "learning_rate": 3.900450555022164e-07, + "loss": 0.2415, + "step": 4600, + "success_rate.epoch.env.abd": 0.9877450980392157, + "success_rate.epoch.env.agentgym:alfworld": 0.8859060402684564, + "success_rate.epoch.env.agentgym:sciworld": 0.972972972972973, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.9795918367346939, + "success_rate.epoch.env.ded": 0.9473684210526315, + "success_rate.epoch.env.logic": 0.8969830757910228, + "success_rate.epoch.env.math": 0.9605817068503636, + "success_rate.epoch.env.sat": 0.1509433962264151, + "success_rate.epoch.env.science": 0.7973199329983249, + "success_rate.epoch.env.webshop": 0.9803921568627451, + "success_rate.epoch.env_macro_mean": 0.8675828359248992, + "success_rate.epoch.global": 0.8818023508924684, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9992559523809523, + "tokens_p.mean_in_band": 0.7760416666666666, + "tokens_rate.above_band": 0.9949238578680203, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.005076142131979695 + }, + { + "epoch": 0.9810396250532595, + "grad_norm": 124.27735162344112, + "learning_rate": 3.900225016341739e-07, + "loss": 0.3287, + "step": 4605, + "success_rate.epoch.env.abd": 0.9877450980392157, + "success_rate.epoch.env.agentgym:alfworld": 0.8859060402684564, + "success_rate.epoch.env.agentgym:sciworld": 0.972972972972973, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.9795918367346939, + "success_rate.epoch.env.ded": 0.9474940334128878, + "success_rate.epoch.env.logic": 0.8971344599559148, + "success_rate.epoch.env.math": 0.9605967865340475, + "success_rate.epoch.env.sat": 0.1509433962264151, + "success_rate.epoch.env.science": 0.7971014492753623, + "success_rate.epoch.env.webshop": 0.9803921568627451, + "success_rate.epoch.env_macro_mean": 0.8675895261508871, + "success_rate.epoch.global": 0.8817134159599913, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9964755639097744, + "tokens_p.mean_in_band": 0.6796875, + "tokens_rate.above_band": 0.9708029197080292, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.029197080291970802 + }, + { + "epoch": 0.9821048146570089, + "grad_norm": 396.5823067791187, + "learning_rate": 3.8999992588594804e-07, + "loss": 0.2374, + "step": 4610, + "success_rate.epoch.env.abd": 0.9877450980392157, + "success_rate.epoch.env.agentgym:alfworld": 0.8859060402684564, + "success_rate.epoch.env.agentgym:sciworld": 0.9730639730639731, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.9795918367346939, + "success_rate.epoch.env.ded": 0.9474940334128878, + "success_rate.epoch.env.logic": 0.8972853998532648, + "success_rate.epoch.env.math": 0.9606720122184039, + "success_rate.epoch.env.sat": 0.1509433962264151, + "success_rate.epoch.env.science": 0.7969359331476323, + "success_rate.epoch.env.webshop": 0.9803921568627451, + "success_rate.epoch.env_macro_mean": 0.8676033124731578, + "success_rate.epoch.global": 0.8817332754126846, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9969800420168067, + "tokens_p.mean_in_band": 0.6180555555555556, + "tokens_rate.above_band": 0.9635627530364372, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03643724696356275 + }, + { + "epoch": 0.9831700042607584, + "grad_norm": 117.84430981737731, + "learning_rate": 3.899773282739917e-07, + "loss": 0.2983, + "step": 4615, + "success_rate.epoch.env.abd": 0.9877450980392157, + "success_rate.epoch.env.agentgym:alfworld": 0.8859060402684564, + "success_rate.epoch.env.agentgym:sciworld": 0.9731543624161074, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.9795918367346939, + "success_rate.epoch.env.ded": 0.9474940334128878, + "success_rate.epoch.env.logic": 0.8973607038123167, + "success_rate.epoch.env.math": 0.9607469512195121, + "success_rate.epoch.env.sat": 0.1509433962264151, + "success_rate.epoch.env.science": 0.7971054828833843, + "success_rate.epoch.env.webshop": 0.9803921568627451, + "success_rate.epoch.env_macro_mean": 0.8676406017502529, + "success_rate.epoch.global": 0.8818615751789977, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9946646341463414, + "tokens_p.mean_in_band": 0.8854166666666666, + "tokens_rate.above_band": 0.9820359281437125, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.017964071856287425 + }, + { + "epoch": 0.9842351938645079, + "grad_norm": 33.14586394289726, + "learning_rate": 3.89954708814774e-07, + "loss": 0.3016, + "step": 4620, + "success_rate.epoch.env.abd": 0.9877450980392157, + "success_rate.epoch.env.agentgym:alfworld": 0.8859060402684564, + "success_rate.epoch.env.agentgym:sciworld": 0.9731543624161074, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.98, + "success_rate.epoch.env.ded": 0.9476190476190476, + "success_rate.epoch.env.logic": 0.8973607038123167, + "success_rate.epoch.env.math": 0.9608216051730696, + "success_rate.epoch.env.sat": 0.1509433962264151, + "success_rate.epoch.env.science": 0.7967185761957731, + "success_rate.epoch.env.webshop": 0.9803921568627451, + "success_rate.epoch.env_macro_mean": 0.8676606858172906, + "success_rate.epoch.global": 0.8817728651928912, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.3333333333333333, + "success_rate.window.env_macro_mean": 0.8333333333333333, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9969919786096256, + "tokens_p.mean_in_band": 0.4407552083333333, + "tokens_rate.above_band": 0.9396984924623115, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.06030150753768844 + }, + { + "epoch": 0.9853003834682573, + "grad_norm": 56.180461328817415, + "learning_rate": 3.8993206752477977e-07, + "loss": 0.5396, + "step": 4625, + "success_rate.epoch.env.abd": 0.9877750611246944, + "success_rate.epoch.env.agentgym:alfworld": 0.8859060402684564, + "success_rate.epoch.env.agentgym:sciworld": 0.9731543624161074, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.98, + "success_rate.epoch.env.ded": 0.9476190476190476, + "success_rate.epoch.env.logic": 0.8975859546452085, + "success_rate.epoch.env.math": 0.9608662613981763, + "success_rate.epoch.env.sat": 0.1509433962264151, + "success_rate.epoch.env.science": 0.7968880244512364, + "success_rate.epoch.env.webshop": 0.9803921568627451, + "success_rate.epoch.env_macro_mean": 0.8677033511262852, + "success_rate.epoch.global": 0.8819008443386014, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9960526315789474, + "tokens_p.mean_in_band": 0.8046875, + "tokens_rate.above_band": 0.979381443298969, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.020618556701030927 + }, + { + "epoch": 0.9863655730720068, + "grad_norm": 249.9598395808165, + "learning_rate": 3.8990940442050986e-07, + "loss": 0.2886, + "step": 4630, + "success_rate.epoch.env.abd": 0.9877750611246944, + "success_rate.epoch.env.agentgym:alfworld": 0.8859060402684564, + "success_rate.epoch.env.agentgym:sciworld": 0.9733333333333334, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.98, + "success_rate.epoch.env.ded": 0.9476190476190476, + "success_rate.epoch.env.logic": 0.8976608187134503, + "success_rate.epoch.env.math": 0.960910815939279, + "success_rate.epoch.env.sat": 0.1509433962264151, + "success_rate.epoch.env.science": 0.7965584235359423, + "success_rate.epoch.env.webshop": 0.9803921568627451, + "success_rate.epoch.env_macro_mean": 0.8677005137273103, + "success_rate.epoch.global": 0.8818122837370242, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9964247881355932, + "tokens_p.mean_in_band": 0.5497159090909091, + "tokens_rate.above_band": 0.9554655870445344, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.044534412955465584 + }, + { + "epoch": 0.9874307626757562, + "grad_norm": 165.16161308596384, + "learning_rate": 3.8988671951848085e-07, + "loss": 0.3718, + "step": 4635, + "success_rate.epoch.env.abd": 0.9877750611246944, + "success_rate.epoch.env.agentgym:alfworld": 0.8859060402684564, + "success_rate.epoch.env.agentgym:sciworld": 0.9733333333333334, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.98, + "success_rate.epoch.env.ded": 0.9476190476190476, + "success_rate.epoch.env.logic": 0.8977355734112491, + "success_rate.epoch.env.math": 0.9609700644183403, + "success_rate.epoch.env.sat": 0.1509433962264151, + "success_rate.epoch.env.science": 0.7962860310421286, + "success_rate.epoch.env.webshop": 0.9803921568627451, + "success_rate.epoch.env_macro_mean": 0.8676879328803145, + "success_rate.epoch.global": 0.8817239144523655, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6, + "success_rate.window.env_macro_mean": 0.8666666666666667, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.994724025974026, + "tokens_p.mean_in_band": 0.5266927083333334, + "tokens_rate.above_band": 0.927710843373494, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.07228915662650602 + }, + { + "epoch": 0.9884959522795057, + "grad_norm": 76.65230786453098, + "learning_rate": 3.8986401283522554e-07, + "loss": 0.4237, + "step": 4640, + "success_rate.epoch.env.abd": 0.9878048780487805, + "success_rate.epoch.env.agentgym:alfworld": 0.8859060402684564, + "success_rate.epoch.env.agentgym:sciworld": 0.973421926910299, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.98, + "success_rate.epoch.env.ded": 0.9477434679334917, + "success_rate.epoch.env.logic": 0.8978847556528081, + "success_rate.epoch.env.math": 0.9609996213555472, + "success_rate.epoch.env.sat": 0.1509433962264151, + "success_rate.epoch.env.science": 0.7959014123511493, + "success_rate.epoch.env.webshop": 0.9803921568627451, + "success_rate.epoch.env_macro_mean": 0.8676912920897037, + "success_rate.epoch.global": 0.8816357358653432, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.3333333333333333, + "success_rate.window.env_macro_mean": 0.888888888888889, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.998995983935743, + "tokens_p.mean_in_band": 0.489013671875, + "tokens_rate.above_band": 0.9841897233201581, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.015810276679841896 + }, + { + "epoch": 0.9895611418832552, + "grad_norm": 110.83515572846225, + "learning_rate": 3.898412843872922e-07, + "loss": 0.3044, + "step": 4645, + "success_rate.epoch.env.abd": 0.9878048780487805, + "success_rate.epoch.env.agentgym:alfworld": 0.8859060402684564, + "success_rate.epoch.env.agentgym:sciworld": 0.973421926910299, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.98, + "success_rate.epoch.env.ded": 0.9481132075471698, + "success_rate.epoch.env.logic": 0.8978847556528081, + "success_rate.epoch.env.math": 0.9610291335603481, + "success_rate.epoch.env.sat": 0.1509433962264151, + "success_rate.epoch.env.science": 0.7961272475795297, + "success_rate.epoch.env.webshop": 0.9807692307692307, + "success_rate.epoch.env_macro_mean": 0.867782397630917, + "success_rate.epoch.global": 0.8817633110584178, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.998046875, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 0.9906263314870047, + "grad_norm": 142.16975994360337, + "learning_rate": 3.898185341912453e-07, + "loss": 0.4906, + "step": 4650, + "success_rate.epoch.env.abd": 0.9878048780487805, + "success_rate.epoch.env.agentgym:alfworld": 0.8859060402684564, + "success_rate.epoch.env.agentgym:sciworld": 0.9735099337748344, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.98, + "success_rate.epoch.env.ded": 0.9481132075471698, + "success_rate.epoch.env.logic": 0.8979591836734694, + "success_rate.epoch.env.math": 0.9606953892668179, + "success_rate.epoch.env.sat": 0.1509433962264151, + "success_rate.epoch.env.science": 0.7964088397790056, + "success_rate.epoch.env.webshop": 0.9807692307692307, + "success_rate.epoch.env_macro_mean": 0.8677924233392027, + "success_rate.epoch.global": 0.8817829457364341, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.6666666666666666, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.996641074856046, + "tokens_p.mean_in_band": 0.50703125, + "tokens_rate.above_band": 0.9904942965779467, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.009505703422053232 + }, + { + "epoch": 0.9916915210907542, + "grad_norm": 34.441900158692974, + "learning_rate": 3.897957622636649e-07, + "loss": 0.1985, + "step": 4655, + "success_rate.epoch.env.abd": 0.9878048780487805, + "success_rate.epoch.env.agentgym:alfworld": 0.8859060402684564, + "success_rate.epoch.env.agentgym:sciworld": 0.9735099337748344, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.9803921568627451, + "success_rate.epoch.env.ded": 0.9482352941176471, + "success_rate.epoch.env.logic": 0.8981077147016011, + "success_rate.epoch.env.math": 0.9607547169811321, + "success_rate.epoch.env.sat": 0.1509433962264151, + "success_rate.epoch.env.science": 0.7965212589729431, + "success_rate.epoch.env.webshop": 0.9807692307692307, + "success_rate.epoch.env_macro_mean": 0.8678682889182577, + "success_rate.epoch.global": 0.8819100881910088, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9973653395784543, + "tokens_p.mean_in_band": 0.87890625, + "tokens_rate.above_band": 0.9976635514018691, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.002336448598130841 + }, + { + "epoch": 0.9927567106945037, + "grad_norm": 105.86226379307871, + "learning_rate": 3.8977296862114704e-07, + "loss": 0.4194, + "step": 4660, + "success_rate.epoch.env.abd": 0.9878048780487805, + "success_rate.epoch.env.agentgym:alfworld": 0.8859060402684564, + "success_rate.epoch.env.agentgym:sciworld": 0.9735099337748344, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.9803921568627451, + "success_rate.epoch.env.ded": 0.9482352941176471, + "success_rate.epoch.env.logic": 0.8982558139534884, + "success_rate.epoch.env.math": 0.9607695209354961, + "success_rate.epoch.env.sat": 0.1509433962264151, + "success_rate.epoch.env.science": 0.7965821389195149, + "success_rate.epoch.env.webshop": 0.9811320754716981, + "success_rate.epoch.env_macro_mean": 0.8679216187232842, + "success_rate.epoch.global": 0.8819295229909755, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 0.9583333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9975453172205438, + "tokens_p.mean_in_band": 0.6919642857142857, + "tokens_rate.above_band": 0.9792899408284024, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.020710059171597635 + }, + { + "epoch": 0.9938219002982531, + "grad_norm": 119.19625646197002, + "learning_rate": 3.897501532803037e-07, + "loss": 0.4187, + "step": 4665, + "success_rate.epoch.env.abd": 0.9878048780487805, + "success_rate.epoch.env.agentgym:alfworld": 0.8859060402684564, + "success_rate.epoch.env.agentgym:sciworld": 0.9735099337748344, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.9803921568627451, + "success_rate.epoch.env.ded": 0.9484777517564403, + "success_rate.epoch.env.logic": 0.8977519941986947, + "success_rate.epoch.env.math": 0.9604221635883905, + "success_rate.epoch.env.sat": 0.1509433962264151, + "success_rate.epoch.env.science": 0.7967502065546681, + "success_rate.epoch.env.webshop": 0.9811320754716981, + "success_rate.epoch.env_macro_mean": 0.8678815594661976, + "success_rate.epoch.global": 0.8818415969092079, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.6666666666666666, + "success_rate.window.env.math": 0.5, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.7916666666666666, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9987026862026862, + "tokens_p.mean_in_band": 0.6143973214285714, + "tokens_rate.above_band": 0.975, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.025 + }, + { + "epoch": 0.9948870899020026, + "grad_norm": 160.6021871896516, + "learning_rate": 3.8972731625776244e-07, + "loss": 0.343, + "step": 4670, + "success_rate.epoch.env.abd": 0.9878048780487805, + "success_rate.epoch.env.agentgym:alfworld": 0.8859060402684564, + "success_rate.epoch.env.agentgym:sciworld": 0.9735099337748344, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.9811320754716981, + "success_rate.epoch.env.ded": 0.9484777517564403, + "success_rate.epoch.env.logic": 0.8978260869565218, + "success_rate.epoch.env.math": 0.9604817463304479, + "success_rate.epoch.env.sat": 0.1509433962264151, + "success_rate.epoch.env.science": 0.7963676389653275, + "success_rate.epoch.env.webshop": 0.9811320754716981, + "success_rate.epoch.env_macro_mean": 0.8679261982406973, + "success_rate.epoch.global": 0.8817538593481989, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.3333333333333333, + "success_rate.window.env_macro_mean": 0.8333333333333334, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9942354368932039, + "tokens_p.mean_in_band": 0.3483664772727273, + "tokens_rate.above_band": 0.9035087719298246, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.09649122807017543 + }, + { + "epoch": 0.995952279505752, + "grad_norm": 371.81707220231846, + "learning_rate": 3.897044575701667e-07, + "loss": 0.4426, + "step": 4675, + "success_rate.epoch.env.abd": 0.9878640776699029, + "success_rate.epoch.env.agentgym:alfworld": 0.8859060402684564, + "success_rate.epoch.env.agentgym:sciworld": 0.9735099337748344, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.9811320754716981, + "success_rate.epoch.env.ded": 0.9484777517564403, + "success_rate.epoch.env.logic": 0.8979000724112962, + "success_rate.epoch.env.math": 0.9601653513716648, + "success_rate.epoch.env.sat": 0.1509433962264151, + "success_rate.epoch.env.science": 0.7962606543854825, + "success_rate.epoch.env.webshop": 0.9811320754716981, + "success_rate.epoch.env_macro_mean": 0.8678998169259036, + "success_rate.epoch.global": 0.8816663097022918, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.75, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.8541666666666666, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9940378289473685, + "tokens_p.mean_in_band": 0.5005095108695652, + "tokens_rate.above_band": 0.8685714285714285, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.13142857142857142 + }, + { + "epoch": 0.9970174691095015, + "grad_norm": 447.35902529599747, + "learning_rate": 3.8968157723417574e-07, + "loss": 0.4631, + "step": 4680, + "success_rate.epoch.env.abd": 0.9878640776699029, + "success_rate.epoch.env.agentgym:alfworld": 0.8859060402684564, + "success_rate.epoch.env.agentgym:sciworld": 0.9735973597359736, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.9811320754716981, + "success_rate.epoch.env.ded": 0.9484777517564403, + "success_rate.epoch.env.logic": 0.8979000724112962, + "success_rate.epoch.env.math": 0.9602400600150037, + "success_rate.epoch.env.sat": 0.1509433962264151, + "success_rate.epoch.env.science": 0.7964844822850865, + "success_rate.epoch.env.webshop": 0.9811320754716981, + "success_rate.epoch.env_macro_mean": 0.8679349044262746, + "success_rate.epoch.global": 0.8817928968763372, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9977477477477478, + "tokens_p.mean_in_band": 0.78515625, + "tokens_rate.above_band": 0.9964093357271095, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.003590664272890485 + }, + { + "epoch": 0.998082658713251, + "grad_norm": 170.68042219194936, + "learning_rate": 3.896586752664645e-07, + "loss": 0.6085, + "step": 4685, + "success_rate.epoch.env.abd": 0.9878640776699029, + "success_rate.epoch.env.agentgym:alfworld": 0.8866666666666667, + "success_rate.epoch.env.agentgym:sciworld": 0.9735973597359736, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.9811320754716981, + "success_rate.epoch.env.ded": 0.9484777517564403, + "success_rate.epoch.env.logic": 0.8972503617945007, + "success_rate.epoch.env.math": 0.9602996254681648, + "success_rate.epoch.env.sat": 0.1509433962264151, + "success_rate.epoch.env.science": 0.7961031833150384, + "success_rate.epoch.env.webshop": 0.9811320754716981, + "success_rate.epoch.env_macro_mean": 0.8679157391775955, + "success_rate.epoch.global": 0.8815986321863646, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.3333333333333333, + "success_rate.window.env_macro_mean": 0.5833333333333334, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9975667104111986, + "tokens_p.mean_in_band": 0.3960597826086957, + "tokens_rate.above_band": 0.9613120269133726, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03868797308662742 + }, + { + "epoch": 0.9991478483170004, + "grad_norm": 184.5098625520693, + "learning_rate": 3.89635751683724e-07, + "loss": 0.5809, + "step": 4690, + "success_rate.epoch.env.abd": 0.9878640776699029, + "success_rate.epoch.env.agentgym:alfworld": 0.8866666666666667, + "success_rate.epoch.env.agentgym:sciworld": 0.9735973597359736, + "success_rate.epoch.env.agentgym:textcraft": 0.9838709677419355, + "success_rate.epoch.env.babyai": 0.9811320754716981, + "success_rate.epoch.env.ded": 0.9484777517564403, + "success_rate.epoch.env.logic": 0.8973246565437455, + "success_rate.epoch.env.math": 0.9603293413173652, + "success_rate.epoch.env.sat": 0.1509433962264151, + "success_rate.epoch.env.science": 0.7958904109589041, + "success_rate.epoch.env.webshop": 0.9811320754716981, + "success_rate.epoch.env_macro_mean": 0.867929889050977, + "success_rate.epoch.global": 0.8815115286080273, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.995306324110672, + "tokens_p.mean_in_band": 0.671630859375, + "tokens_rate.above_band": 0.9405204460966543, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.05947955390334572 + }, + { + "epoch": 1.00021303792075, + "grad_norm": 144.29975023452522, + "learning_rate": 3.8961280650266055e-07, + "loss": 0.5425, + "step": 4695, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.3333333333333333, + "success_rate.window.env_macro_mean": 0.8888888888888888, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9979272959183674, + "tokens_p.mean_in_band": 0.2652994791666667, + "tokens_rate.above_band": 0.9702970297029703, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0297029702970297 + }, + { + "epoch": 1.0012782275244994, + "grad_norm": 431.7095323972743, + "learning_rate": 3.895898397399966e-07, + "loss": 0.5209, + "step": 4700, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.ded": 1.0, + "success_rate.epoch.env.logic": 1.0, + "success_rate.epoch.env.math": 1.0, + "success_rate.epoch.env.science": 1.0, + "success_rate.epoch.env_macro_mean": 1.0, + "success_rate.epoch.global": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9992122755524862, + "tokens_p.mean_in_band": 0.8058035714285714, + "tokens_rate.above_band": 0.9975887013434378, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.002411298656562177 + }, + { + "epoch": 1.0023434171282488, + "grad_norm": 251.24346855595547, + "learning_rate": 3.8956685141247e-07, + "loss": 1.0089, + "step": 4705, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.ded": 1.0, + "success_rate.epoch.env.logic": 0.6666666666666666, + "success_rate.epoch.env.math": 0.8333333333333334, + "success_rate.epoch.env.science": 0.3333333333333333, + "success_rate.epoch.env_macro_mean": 0.8333333333333334, + "success_rate.epoch.global": 0.8, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 0.75, + "success_rate.window.env.science": 0.0, + "success_rate.window.env_macro_mean": 0.55, + "success_rate.window.global": 0.6, + "tokens_p.mean_above_band": 0.9947363429438544, + "tokens_p.mean_below_band": 2.9976945370435715e-09, + "tokens_p.mean_in_band": 0.5964307598039216, + "tokens_rate.above_band": 0.9268635724331927, + "tokens_rate.below_band": 0.0014064697609001407, + "tokens_rate.in_band": 0.07172995780590717 + }, + { + "epoch": 1.0034086067319983, + "grad_norm": 105.24981153941961, + "learning_rate": 3.895438415368346e-07, + "loss": 1.0138, + "step": 4710, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.ded": 1.0, + "success_rate.epoch.env.logic": 0.6666666666666666, + "success_rate.epoch.env.math": 0.9, + "success_rate.epoch.env.science": 0.375, + "success_rate.epoch.env_macro_mean": 0.8488095238095238, + "success_rate.epoch.global": 0.7666666666666667, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.4, + "success_rate.window.env_macro_mean": 0.7999999999999999, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9921171171171171, + "tokens_p.mean_in_band": 0.4274796195652174, + "tokens_rate.above_band": 0.8283582089552238, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.17164179104477612 + }, + { + "epoch": 1.0044737963357477, + "grad_norm": 216.0384760527122, + "learning_rate": 3.895208101298598e-07, + "loss": 0.9622, + "step": 4715, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.ded": 1.0, + "success_rate.epoch.env.logic": 0.5, + "success_rate.epoch.env.math": 0.8571428571428571, + "success_rate.epoch.env.science": 0.36363636363636365, + "success_rate.epoch.env_macro_mean": 0.8172541743970315, + "success_rate.epoch.global": 0.725, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 0.75, + "success_rate.window.env.science": 0.3333333333333333, + "success_rate.window.env_macro_mean": 0.5208333333333334, + "success_rate.window.global": 0.6, + "tokens_p.mean_above_band": 0.9988245784695201, + "tokens_p.mean_in_band": 0.5294596354166666, + "tokens_rate.above_band": 0.969811320754717, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03018867924528302 + }, + { + "epoch": 1.0055389859394972, + "grad_norm": 109.8191325563643, + "learning_rate": 3.894977572083308e-07, + "loss": 0.8322, + "step": 4720, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.ded": 1.0, + "success_rate.epoch.env.logic": 0.4, + "success_rate.epoch.env.math": 0.8, + "success_rate.epoch.env.science": 0.5555555555555556, + "success_rate.epoch.env_macro_mean": 0.8222222222222222, + "success_rate.epoch.global": 0.72, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 0.0, + "success_rate.window.env.science": 0.8571428571428571, + "success_rate.window.env_macro_mean": 0.4642857142857143, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9980439642324889, + "tokens_p.mean_in_band": 0.47028186274509803, + "tokens_rate.above_band": 0.9293628808864266, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.07063711911357341 + }, + { + "epoch": 1.0066041755432467, + "grad_norm": 101.64979341645656, + "learning_rate": 3.894746827890482e-07, + "loss": 0.699, + "step": 4725, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.ded": 1.0, + "success_rate.epoch.env.logic": 0.4, + "success_rate.epoch.env.math": 0.8125, + "success_rate.epoch.env.science": 0.5416666666666666, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.861574074074074, + "success_rate.epoch.global": 0.7166666666666667, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 0.9, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9998349471830986, + "tokens_p.mean_in_band": 0.44388020833333336, + "tokens_rate.above_band": 0.9742710120068611, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.025728987993138937 + }, + { + "epoch": 1.0076693651469961, + "grad_norm": 71.71592240162255, + "learning_rate": 3.8945158688882864e-07, + "loss": 0.6099, + "step": 4730, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.ded": 1.0, + "success_rate.epoch.env.logic": 0.3333333333333333, + "success_rate.epoch.env.math": 0.85, + "success_rate.epoch.env.science": 0.5714285714285714, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8616402116402117, + "success_rate.epoch.global": 0.7285714285714285, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.6875, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9951645496535797, + "tokens_p.mean_in_band": 0.6534778225806451, + "tokens_rate.above_band": 0.9331896551724138, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0668103448275862 + }, + { + "epoch": 1.0087345547507456, + "grad_norm": 123.03568597455691, + "learning_rate": 3.8942846952450415e-07, + "loss": 0.7393, + "step": 4735, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.ded": 1.0, + "success_rate.epoch.env.logic": 0.3333333333333333, + "success_rate.epoch.env.math": 0.76, + "success_rate.epoch.env.science": 0.5, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8437037037037037, + "success_rate.epoch.global": 0.675, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 0.4, + "success_rate.window.env.science": 0.0, + "success_rate.window.env_macro_mean": 0.4666666666666666, + "success_rate.window.global": 0.3, + "tokens_p.mean_above_band": 0.9960756933744221, + "tokens_p.mean_in_band": 0.47421875, + "tokens_rate.above_band": 0.941944847605225, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.05805515239477504 + }, + { + "epoch": 1.009799744354495, + "grad_norm": 508.9809006199772, + "learning_rate": 3.8940533071292264e-07, + "loss": 0.6637, + "step": 4740, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.ded": 1.0, + "success_rate.epoch.env.logic": 0.3333333333333333, + "success_rate.epoch.env.math": 0.7931034482758621, + "success_rate.epoch.env.science": 0.5135135135135135, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8488833661247455, + "success_rate.epoch.global": 0.6888888888888889, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6, + "success_rate.window.env_macro_mean": 0.8666666666666667, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9948453608247423, + "tokens_p.mean_in_band": 0.53515625, + "tokens_rate.above_band": 0.9651741293532339, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03482587064676617 + }, + { + "epoch": 1.0108649339582445, + "grad_norm": 82.26163557012896, + "learning_rate": 3.8938217047094735e-07, + "loss": 0.6302, + "step": 4745, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 1.0, + "success_rate.epoch.env.logic": 0.3333333333333333, + "success_rate.epoch.env.math": 0.8181818181818182, + "success_rate.epoch.env.science": 0.5, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8651515151515152, + "success_rate.epoch.global": 0.71, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.0, + "success_rate.window.env_macro_mean": 0.8, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9977361505681818, + "tokens_p.mean_in_band": 0.53125, + "tokens_rate.above_band": 0.9943502824858758, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.005649717514124294 + }, + { + "epoch": 1.011930123561994, + "grad_norm": 122.4384358762404, + "learning_rate": 3.8935898881545743e-07, + "loss": 0.6716, + "step": 4750, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 1.0, + "success_rate.epoch.env.logic": 0.5, + "success_rate.epoch.env.math": 0.8108108108108109, + "success_rate.epoch.env.science": 0.5, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8810810810810811, + "success_rate.epoch.global": 0.7090909090909091, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.75, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.75, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9906994047619048, + "tokens_p.mean_in_band": 0.54931640625, + "tokens_rate.above_band": 0.84, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.16 + }, + { + "epoch": 1.0129953131657434, + "grad_norm": 108.56192483199875, + "learning_rate": 3.893357857633474e-07, + "loss": 0.6893, + "step": 4755, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9090909090909091, + "success_rate.epoch.env.logic": 0.5, + "success_rate.epoch.env.math": 0.8205128205128205, + "success_rate.epoch.env.science": 0.5111111111111111, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.874071484071484, + "success_rate.epoch.global": 0.7083333333333334, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 0.5, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.7333333333333333, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9989399592944369, + "tokens_p.mean_in_band": 0.4557179418103448, + "tokens_rate.above_band": 0.980705256154358, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.01929474384564205 + }, + { + "epoch": 1.0140605027694929, + "grad_norm": 101.09932185155446, + "learning_rate": 3.8931256133152754e-07, + "loss": 0.6921, + "step": 4760, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9166666666666666, + "success_rate.epoch.env.logic": 0.45454545454545453, + "success_rate.epoch.env.math": 0.8292682926829268, + "success_rate.epoch.env.science": 0.52, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8720480413895049, + "success_rate.epoch.global": 0.7076923076923077, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6, + "success_rate.window.env_macro_mean": 0.72, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 1.0006801470588236, + "tokens_p.mean_in_band": 0.46293826219512196, + "tokens_rate.above_band": 0.9764503159103963, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.023549684089603676 + }, + { + "epoch": 1.0151256923732423, + "grad_norm": 69.95946542944019, + "learning_rate": 3.892893155369236e-07, + "loss": 0.5881, + "step": 4765, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9230769230769231, + "success_rate.epoch.env.logic": 0.46153846153846156, + "success_rate.epoch.env.math": 0.8444444444444444, + "success_rate.epoch.env.science": 0.5192307692307693, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8748290598290598, + "success_rate.epoch.global": 0.7142857142857143, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.8, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9994926948051948, + "tokens_p.mean_in_band": 0.4560810810810811, + "tokens_rate.above_band": 0.9614984391259105, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03850156087408949 + }, + { + "epoch": 1.0161908819769918, + "grad_norm": 82.82324559157526, + "learning_rate": 3.8926604839647696e-07, + "loss": 0.5157, + "step": 4770, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9285714285714286, + "success_rate.epoch.env.logic": 0.5333333333333333, + "success_rate.epoch.env.math": 0.8541666666666666, + "success_rate.epoch.env.science": 0.5357142857142857, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8851785714285715, + "success_rate.epoch.global": 0.7266666666666667, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9375, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9973525747508306, + "tokens_p.mean_in_band": 0.4153645833333333, + "tokens_rate.above_band": 0.9709677419354839, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02903225806451613 + }, + { + "epoch": 1.0172560715807413, + "grad_norm": 95.11877465215886, + "learning_rate": 3.892427599271446e-07, + "loss": 0.6093, + "step": 4775, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9285714285714286, + "success_rate.epoch.env.logic": 0.5, + "success_rate.epoch.env.math": 0.8679245283018868, + "success_rate.epoch.env.science": 0.5517241379310345, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8848220094804351, + "success_rate.epoch.global": 0.7375, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 0.8, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9999257719714965, + "tokens_p.mean_in_band": 0.46902901785714285, + "tokens_rate.above_band": 0.967816091954023, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03218390804597701 + }, + { + "epoch": 1.0183212611844907, + "grad_norm": 118.84020836443663, + "learning_rate": 3.8921945014589894e-07, + "loss": 0.2974, + "step": 4780, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9285714285714286, + "success_rate.epoch.env.logic": 0.5555555555555556, + "success_rate.epoch.env.math": 0.8771929824561403, + "success_rate.epoch.env.science": 0.5806451612903226, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8941965127873447, + "success_rate.epoch.global": 0.7529411764705882, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9974197247706422, + "tokens_p.mean_in_band": 0.6588541666666666, + "tokens_rate.above_band": 0.960352422907489, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.039647577092511016 + }, + { + "epoch": 1.0193864507882404, + "grad_norm": 147.37522023409903, + "learning_rate": 3.89196119069728e-07, + "loss": 0.4447, + "step": 4785, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9285714285714286, + "success_rate.epoch.env.logic": 0.5789473684210527, + "success_rate.epoch.env.math": 0.8852459016393442, + "success_rate.epoch.env.science": 0.578125, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8970889698631825, + "success_rate.epoch.global": 0.7611111111111111, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.9, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9986940298507463, + "tokens_p.mean_in_band": 0.5078125, + "tokens_rate.above_band": 0.9970238095238095, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.002976190476190476 + }, + { + "epoch": 1.0204516403919899, + "grad_norm": 32.429467066288986, + "learning_rate": 3.891727667156353e-07, + "loss": 0.3183, + "step": 4790, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9285714285714286, + "success_rate.epoch.env.logic": 0.6190476190476191, + "success_rate.epoch.env.math": 0.890625, + "success_rate.epoch.env.science": 0.5909090909090909, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.9029153138528139, + "success_rate.epoch.global": 0.7736842105263158, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9992270318021201, + "tokens_p.mean_in_band": 0.73046875, + "tokens_rate.above_band": 0.9860627177700348, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.013937282229965157 + }, + { + "epoch": 1.0215168299957393, + "grad_norm": 50.749797732298596, + "learning_rate": 3.8914939310063983e-07, + "loss": 0.4503, + "step": 4795, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9285714285714286, + "success_rate.epoch.env.logic": 0.6521739130434783, + "success_rate.epoch.env.math": 0.8955223880597015, + "success_rate.epoch.env.science": 0.6, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.9076267729674609, + "success_rate.epoch.global": 0.78, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 0.9375, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9955481843575419, + "tokens_p.mean_in_band": 0.5564236111111112, + "tokens_rate.above_band": 0.9754768392370572, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02452316076294278 + }, + { + "epoch": 1.0225820195994888, + "grad_norm": 90.48850129165183, + "learning_rate": 3.891259982417761e-07, + "loss": 0.5427, + "step": 4800, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9285714285714286, + "success_rate.epoch.env.logic": 0.68, + "success_rate.epoch.env.math": 0.8873239436619719, + "success_rate.epoch.env.science": 0.5945945945945946, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.9090489966827995, + "success_rate.epoch.global": 0.7761904761904762, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.75, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.75, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.991875, + "tokens_p.mean_in_band": 0.3108723958333333, + "tokens_rate.above_band": 0.8620689655172413, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.13793103448275862 + }, + { + "epoch": 1.0236472092032383, + "grad_norm": 76.460497999413, + "learning_rate": 3.891025821560942e-07, + "loss": 0.5584, + "step": 4805, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9285714285714286, + "success_rate.epoch.env.logic": 0.6923076923076923, + "success_rate.epoch.env.math": 0.8947368421052632, + "success_rate.epoch.env.science": 0.5974025974025974, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.9113018560386982, + "success_rate.epoch.global": 0.7818181818181819, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9943484042553191, + "tokens_p.mean_in_band": 0.5876736111111112, + "tokens_rate.above_band": 0.912621359223301, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.08737864077669903 + }, + { + "epoch": 1.0247123988069877, + "grad_norm": 78.17021514718782, + "learning_rate": 3.890791448606596e-07, + "loss": 0.3499, + "step": 4810, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9285714285714286, + "success_rate.epoch.env.logic": 0.7037037037037037, + "success_rate.epoch.env.math": 0.9, + "success_rate.epoch.env.science": 0.5875, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.9119775132275132, + "success_rate.epoch.global": 0.7816593886462883, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.3333333333333333, + "success_rate.window.env_macro_mean": 0.8333333333333334, + "success_rate.window.global": 0.7777777777777778, + "tokens_p.mean_above_band": 0.9948224852071006, + "tokens_p.mean_in_band": 0.4973958333333333, + "tokens_rate.above_band": 0.9337016574585635, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.06629834254143646 + }, + { + "epoch": 1.0257775884107372, + "grad_norm": 92.1443658600186, + "learning_rate": 3.8905568637255313e-07, + "loss": 0.5582, + "step": 4815, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9375, + "success_rate.epoch.env.logic": 0.7037037037037037, + "success_rate.epoch.env.math": 0.8902439024390244, + "success_rate.epoch.env.science": 0.5903614457831325, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.912180905192586, + "success_rate.epoch.global": 0.7824267782426778, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 0.5, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.8333333333333333, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9980757389162561, + "tokens_p.mean_in_band": 0.5909090909090909, + "tokens_rate.above_band": 0.973621103117506, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.026378896882494004 + }, + { + "epoch": 1.0268427780144866, + "grad_norm": 223.73644024574008, + "learning_rate": 3.890322067088712e-07, + "loss": 0.2925, + "step": 4820, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9375, + "success_rate.epoch.env.logic": 0.7241379310344828, + "success_rate.epoch.env.math": 0.8941176470588236, + "success_rate.epoch.env.science": 0.5930232558139535, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.914877883390726, + "success_rate.epoch.global": 0.7871485943775101, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9957096474953617, + "tokens_p.mean_in_band": 0.74765625, + "tokens_rate.above_band": 0.9817850637522769, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.018214936247723135 + }, + { + "epoch": 1.027907967618236, + "grad_norm": 70.5536113220452, + "learning_rate": 3.890087058867258e-07, + "loss": 0.4808, + "step": 4825, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9444444444444444, + "success_rate.epoch.env.logic": 0.7333333333333333, + "success_rate.epoch.env.math": 0.8953488372093024, + "success_rate.epoch.env.science": 0.6067415730337079, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.9179868188020788, + "success_rate.epoch.global": 0.7953667953667953, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.998694120505345, + "tokens_p.mean_in_band": 0.818359375, + "tokens_rate.above_band": 0.9961277831558567, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.003872216844143272 + }, + { + "epoch": 1.0289731572219856, + "grad_norm": 30.762366799003015, + "learning_rate": 3.8898518392324393e-07, + "loss": 0.4171, + "step": 4830, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9444444444444444, + "success_rate.epoch.env.logic": 0.7419354838709677, + "success_rate.epoch.env.math": 0.898876404494382, + "success_rate.epoch.env.science": 0.6105263157894737, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.9195782648599268, + "success_rate.epoch.global": 0.7955390334572491, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.8888888888888888, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9913651315789473, + "tokens_p.mean_in_band": 0.4079861111111111, + "tokens_rate.above_band": 0.8941176470588236, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.10588235294117647 + }, + { + "epoch": 1.030038346825735, + "grad_norm": 146.6021634374276, + "learning_rate": 3.889616408355683e-07, + "loss": 0.4162, + "step": 4835, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9473684210526315, + "success_rate.epoch.env.logic": 0.7575757575757576, + "success_rate.epoch.env.math": 0.9010989010989011, + "success_rate.epoch.env.science": 0.6, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.9206043079727291, + "success_rate.epoch.global": 0.7921146953405018, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.4, + "success_rate.window.env_macro_mean": 0.85, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9976250791640279, + "tokens_p.mean_in_band": 0.55125, + "tokens_rate.above_band": 0.9844139650872819, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.015586034912718205 + }, + { + "epoch": 1.0311035364294845, + "grad_norm": 24.174367018000552, + "learning_rate": 3.889380766408569e-07, + "loss": 0.3678, + "step": 4840, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9473684210526315, + "success_rate.epoch.env.logic": 0.7575757575757576, + "success_rate.epoch.env.math": 0.9052631578947369, + "success_rate.epoch.env.science": 0.6132075471698113, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.9223414883692937, + "success_rate.epoch.global": 0.7958477508650519, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env_macro_mean": 0.9166666666666667, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9942010309278351, + "tokens_p.mean_in_band": 0.560546875, + "tokens_rate.above_band": 0.9238095238095239, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0761904761904762 + }, + { + "epoch": 1.032168726033234, + "grad_norm": 153.3969011372809, + "learning_rate": 3.8891449135628333e-07, + "loss": 0.3122, + "step": 4845, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9473684210526315, + "success_rate.epoch.env.logic": 0.7777777777777778, + "success_rate.epoch.env.math": 0.9072164948453608, + "success_rate.epoch.env.science": 0.6018518518518519, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.9234214545527621, + "success_rate.epoch.global": 0.7959866220735786, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.0, + "success_rate.window.env_macro_mean": 0.8333333333333334, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9977448453608248, + "tokens_p.mean_in_band": 0.3815789473684211, + "tokens_rate.above_band": 0.9107981220657277, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0892018779342723 + }, + { + "epoch": 1.0332339156369834, + "grad_norm": 82.50481899376963, + "learning_rate": 3.8889088499903615e-07, + "loss": 0.3598, + "step": 4850, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9473684210526315, + "success_rate.epoch.env.logic": 0.7837837837837838, + "success_rate.epoch.env.math": 0.9090909090909091, + "success_rate.epoch.env.science": 0.6052631578947368, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.924550627182206, + "success_rate.epoch.global": 0.7961165048543689, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9926985981308412, + "tokens_p.mean_in_band": 0.466796875, + "tokens_rate.above_band": 0.8699186991869918, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.13008130081300814 + }, + { + "epoch": 1.0342991052407329, + "grad_norm": 56.89804470088932, + "learning_rate": 3.8886725758631956e-07, + "loss": 0.5136, + "step": 4855, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.95, + "success_rate.epoch.env.logic": 0.7948717948717948, + "success_rate.epoch.env.math": 0.9117647058823529, + "success_rate.epoch.env.science": 0.6153846153846154, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.9272021116138763, + "success_rate.epoch.global": 0.8025078369905956, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.999169921875, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 1.0353642948444823, + "grad_norm": 129.15884002225232, + "learning_rate": 3.8884360913535316e-07, + "loss": 0.4229, + "step": 4860, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.95, + "success_rate.epoch.env.logic": 0.8, + "success_rate.epoch.env.math": 0.9074074074074074, + "success_rate.epoch.env.science": 0.625, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.9282407407407408, + "success_rate.epoch.global": 0.8054711246200608, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.8333333333333334, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9444444444444445, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.995398773006135, + "tokens_p.mean_in_band": 0.721875, + "tokens_rate.above_band": 0.9702380952380952, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02976190476190476 + }, + { + "epoch": 1.0364294844482318, + "grad_norm": 51.30541733272511, + "learning_rate": 3.888199396633717e-07, + "loss": 0.3944, + "step": 4865, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.95, + "success_rate.epoch.env.logic": 0.813953488372093, + "success_rate.epoch.env.math": 0.9009009009009009, + "success_rate.epoch.env.science": 0.6341463414634146, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.929900073073641, + "success_rate.epoch.global": 0.8082595870206489, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.6666666666666666, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9989530988274706, + "tokens_p.mean_in_band": 0.49462890625, + "tokens_rate.above_band": 0.9867768595041322, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.013223140495867768 + }, + { + "epoch": 1.0374946740519813, + "grad_norm": 97.00191312932553, + "learning_rate": 3.887962491876253e-07, + "loss": 0.4166, + "step": 4870, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.95, + "success_rate.epoch.env.logic": 0.813953488372093, + "success_rate.epoch.env.math": 0.9026548672566371, + "success_rate.epoch.env.science": 0.640625, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.930723335562873, + "success_rate.epoch.global": 0.8108882521489972, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 0.96, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9976181402439024, + "tokens_p.mean_in_band": 0.584375, + "tokens_rate.above_band": 0.9924357034795764, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.007564296520423601 + }, + { + "epoch": 1.0385598636557307, + "grad_norm": 559.6773731772412, + "learning_rate": 3.887725377253794e-07, + "loss": 0.3291, + "step": 4875, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9545454545454546, + "success_rate.epoch.env.logic": 0.8, + "success_rate.epoch.env.math": 0.9051724137931034, + "success_rate.epoch.env.science": 0.6307692307692307, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.9290487099107789, + "success_rate.epoch.global": 0.807799442896936, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.0, + "success_rate.window.env_macro_mean": 0.7, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9980066872427984, + "tokens_p.mean_in_band": 0.553886217948718, + "tokens_rate.above_band": 0.9739478957915831, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.026052104208416832 + }, + { + "epoch": 1.0396250532594802, + "grad_norm": 97.8107143834208, + "learning_rate": 3.887488052939148e-07, + "loss": 0.4862, + "step": 4880, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9545454545454546, + "success_rate.epoch.env.logic": 0.8125, + "success_rate.epoch.env.math": 0.9067796610169492, + "success_rate.epoch.env.science": 0.6444444444444445, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.9318269560006849, + "success_rate.epoch.global": 0.8130081300813008, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9933252427184466, + "tokens_p.mean_in_band": 0.725, + "tokens_rate.above_band": 0.911504424778761, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.08849557522123894 + }, + { + "epoch": 1.0406902428632296, + "grad_norm": 64.6257125792207, + "learning_rate": 3.8872505191052755e-07, + "loss": 0.3941, + "step": 4885, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9545454545454546, + "success_rate.epoch.env.logic": 0.8163265306122449, + "success_rate.epoch.env.math": 0.907563025210084, + "success_rate.epoch.env.science": 0.6453900709219859, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.9323825081289769, + "success_rate.epoch.global": 0.8126649076517151, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9986620795107034, + "tokens_p.mean_in_band": 0.5478515625, + "tokens_rate.above_band": 0.9533527696793003, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.04664723032069971 + }, + { + "epoch": 1.041755432466979, + "grad_norm": 65.91021777134186, + "learning_rate": 3.8870127759252883e-07, + "loss": 0.4176, + "step": 4890, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9545454545454546, + "success_rate.epoch.env.logic": 0.82, + "success_rate.epoch.env.math": 0.9098360655737705, + "success_rate.epoch.env.science": 0.636986301369863, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.9321367821489087, + "success_rate.epoch.global": 0.8097686375321337, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.4, + "success_rate.window.env_macro_mean": 0.85, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9957107843137255, + "tokens_p.mean_in_band": 0.530078125, + "tokens_rate.above_band": 0.884393063583815, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.11560693641618497 + }, + { + "epoch": 1.0428206220707286, + "grad_norm": 138.3148173872502, + "learning_rate": 3.886774823572453e-07, + "loss": 0.3484, + "step": 4895, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9565217391304348, + "success_rate.epoch.env.logic": 0.8235294117647058, + "success_rate.epoch.env.math": 0.912, + "success_rate.epoch.env.sat": 1.0, + "success_rate.epoch.env.science": 0.64, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.9392773773541037, + "success_rate.epoch.global": 0.8120300751879699, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9921465968586387, + "tokens_p.mean_below_band": 4.602043190971017e-10, + "tokens_p.mean_in_band": 0.7078125, + "tokens_rate.above_band": 0.9455445544554455, + "tokens_rate.below_band": 0.0049504950495049506, + "tokens_rate.in_band": 0.04950495049504951 + }, + { + "epoch": 1.043885811674478, + "grad_norm": 163.97837891873726, + "learning_rate": 3.8865366622201865e-07, + "loss": 0.4808, + "step": 4900, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9565217391304348, + "success_rate.epoch.env.logic": 0.8269230769230769, + "success_rate.epoch.env.math": 0.9069767441860465, + "success_rate.epoch.env.sat": 1.0, + "success_rate.epoch.env.science": 0.6516129032258065, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.9401849512241239, + "success_rate.epoch.global": 0.8141809290953546, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.75, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9871134020618557, + "tokens_p.mean_in_band": 0.64501953125, + "tokens_rate.above_band": 0.9238095238095239, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0761904761904762 + }, + { + "epoch": 1.0449510012782275, + "grad_norm": 82.54680181501901, + "learning_rate": 3.8862982920420595e-07, + "loss": 0.4513, + "step": 4905, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9583333333333334, + "success_rate.epoch.env.logic": 0.8148148148148148, + "success_rate.epoch.env.math": 0.9083969465648855, + "success_rate.epoch.env.sat": 1.0, + "success_rate.epoch.env.science": 0.6477987421383647, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.9390312578955817, + "success_rate.epoch.global": 0.8114558472553699, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.8, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 1.000828313253012, + "tokens_p.mean_in_band": 0.4991179435483871, + "tokens_rate.above_band": 0.9639953542392566, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.036004645760743324 + }, + { + "epoch": 1.046016190881977, + "grad_norm": 85.50658915223514, + "learning_rate": 3.886059713211795e-07, + "loss": 0.3659, + "step": 4910, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.96, + "success_rate.epoch.env.logic": 0.8245614035087719, + "success_rate.epoch.env.math": 0.9097744360902256, + "success_rate.epoch.env.sat": 1.0, + "success_rate.epoch.env.science": 0.6481481481481481, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.9402258170679224, + "success_rate.epoch.global": 0.8135198135198135, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9967447916666666, + "tokens_p.mean_in_band": 0.6534090909090909, + "tokens_rate.above_band": 0.9751693002257337, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.024830699774266364 + }, + { + "epoch": 1.0470813804857264, + "grad_norm": 72.46491896122384, + "learning_rate": 3.8858209259032677e-07, + "loss": 0.3633, + "step": 4915, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9615384615384616, + "success_rate.epoch.env.logic": 0.819672131147541, + "success_rate.epoch.env.math": 0.9104477611940298, + "success_rate.epoch.env.sat": 1.0, + "success_rate.epoch.env.science": 0.6545454545454545, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.9405639825841352, + "success_rate.epoch.global": 0.8154897494305239, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.75, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9992008196721311, + "tokens_p.mean_in_band": 0.46510416666666665, + "tokens_rate.above_band": 0.9807073954983923, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.01929260450160772 + }, + { + "epoch": 1.0481465700894759, + "grad_norm": 204.420510141689, + "learning_rate": 3.885581930290503e-07, + "loss": 0.4072, + "step": 4920, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9629629629629629, + "success_rate.epoch.env.logic": 0.828125, + "success_rate.epoch.env.math": 0.9117647058823529, + "success_rate.epoch.env.sat": 1.0, + "success_rate.epoch.env.science": 0.6506024096385542, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.9412231889530791, + "success_rate.epoch.global": 0.8173719376391982, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.0, + "success_rate.window.env_macro_mean": 0.8333333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9993636877828054, + "tokens_p.mean_in_band": 0.728125, + "tokens_rate.above_band": 0.9888143176733781, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.011185682326621925 + }, + { + "epoch": 1.0492117596932253, + "grad_norm": 84.8321401295415, + "learning_rate": 3.88534272654768e-07, + "loss": 0.4457, + "step": 4925, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9629629629629629, + "success_rate.epoch.env.logic": 0.828125, + "success_rate.epoch.env.math": 0.9148936170212766, + "success_rate.epoch.env.sat": 0.5, + "success_rate.epoch.env.science": 0.6488095238095238, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8958901003448876, + "success_rate.epoch.global": 0.8169934640522876, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.625, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9953044041450777, + "tokens_p.mean_in_band": 0.516858552631579, + "tokens_rate.above_band": 0.910377358490566, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.08962264150943396 + }, + { + "epoch": 1.0502769492969748, + "grad_norm": 104.41239993017396, + "learning_rate": 3.8851033148491277e-07, + "loss": 0.36, + "step": 4930, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9629629629629629, + "success_rate.epoch.env.logic": 0.8307692307692308, + "success_rate.epoch.env.math": 0.916083916083916, + "success_rate.epoch.env.sat": 0.5, + "success_rate.epoch.env.science": 0.6514285714285715, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8964767892040619, + "success_rate.epoch.global": 0.8166311300639659, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.7142857142857143, + "success_rate.window.env_macro_mean": 0.9047619047619048, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9944661458333334, + "tokens_p.mean_in_band": 0.6220703125, + "tokens_rate.above_band": 0.9230769230769231, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.07692307692307693 + }, + { + "epoch": 1.0513421389007243, + "grad_norm": 58.52264459697979, + "learning_rate": 3.8848636953693293e-07, + "loss": 0.3306, + "step": 4935, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9629629629629629, + "success_rate.epoch.env.logic": 0.8333333333333334, + "success_rate.epoch.env.math": 0.9183673469387755, + "success_rate.epoch.env.sat": 0.5, + "success_rate.epoch.env.science": 0.6611111111111111, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.897797704940562, + "success_rate.epoch.global": 0.8204592901878914, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9919241573033708, + "tokens_p.mean_in_band": 0.8388671875, + "tokens_rate.above_band": 0.956989247311828, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.043010752688172046 + }, + { + "epoch": 1.0524073285044737, + "grad_norm": 104.52778488318644, + "learning_rate": 3.8846238682829165e-07, + "loss": 0.3454, + "step": 4940, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9629629629629629, + "success_rate.epoch.env.logic": 0.8208955223880597, + "success_rate.epoch.env.math": 0.9205298013245033, + "success_rate.epoch.env.sat": 0.5, + "success_rate.epoch.env.science": 0.6593406593406593, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8967026314560168, + "success_rate.epoch.global": 0.820040899795501, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.7, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9987864077669902, + "tokens_p.mean_in_band": 0.5056818181818182, + "tokens_rate.above_band": 0.965625, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.034375 + }, + { + "epoch": 1.0534725181082232, + "grad_norm": 44.45873111286777, + "learning_rate": 3.8843838337646735e-07, + "loss": 0.383, + "step": 4945, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9629629629629629, + "success_rate.epoch.env.logic": 0.8235294117647058, + "success_rate.epoch.env.math": 0.9210526315789473, + "success_rate.epoch.env.sat": 0.5, + "success_rate.epoch.env.science": 0.6595744680851063, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8970108613083383, + "success_rate.epoch.global": 0.8196392785571143, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9900662251655629, + "tokens_p.mean_in_band": 0.5162259615384616, + "tokens_rate.above_band": 0.9207317073170732, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.07926829268292683 + }, + { + "epoch": 1.0545377077119726, + "grad_norm": 147.0472720252295, + "learning_rate": 3.8841435919895364e-07, + "loss": 0.259, + "step": 4950, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9629629629629629, + "success_rate.epoch.env.logic": 0.8285714285714286, + "success_rate.epoch.env.math": 0.9225806451612903, + "success_rate.epoch.env.sat": 0.5, + "success_rate.epoch.env.science": 0.6614583333333334, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8977793972753649, + "success_rate.epoch.global": 0.8212180746561886, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9375, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9978932584269663, + "tokens_p.mean_in_band": 0.78125, + "tokens_rate.above_band": 0.9816176470588235, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.01838235294117647 + }, + { + "epoch": 1.055602897315722, + "grad_norm": 21.701993238466507, + "learning_rate": 3.883903143132592e-07, + "loss": 0.2104, + "step": 4955, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9642857142857143, + "success_rate.epoch.env.logic": 0.8309859154929577, + "success_rate.epoch.env.math": 0.9240506329113924, + "success_rate.epoch.env.sat": 0.3333333333333333, + "success_rate.epoch.env.science": 0.6615384615384615, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8831085506874419, + "success_rate.epoch.global": 0.8208092485549133, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.7777777777777777, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9921052631578947, + "tokens_p.mean_in_band": 0.7532784598214286, + "tokens_rate.above_band": 0.9223300970873787, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.07766990291262135 + }, + { + "epoch": 1.0566680869194716, + "grad_norm": 157.59482989962441, + "learning_rate": 3.8836624873690757e-07, + "loss": 0.3333, + "step": 4960, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9642857142857143, + "success_rate.epoch.env.logic": 0.8356164383561644, + "success_rate.epoch.env.math": 0.9245283018867925, + "success_rate.epoch.env.sat": 0.3333333333333333, + "success_rate.epoch.env.science": 0.6716417910447762, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8844914162642528, + "success_rate.epoch.global": 0.8241965973534972, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.998, + "tokens_p.mean_in_band": 0.633203125, + "tokens_rate.above_band": 0.9615384615384616, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.038461538461538464 + }, + { + "epoch": 1.057733276523221, + "grad_norm": 48.53878482620712, + "learning_rate": 3.883421624874378e-07, + "loss": 0.2178, + "step": 4965, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9655172413793104, + "success_rate.epoch.env.logic": 0.8356164383561644, + "success_rate.epoch.env.math": 0.91875, + "success_rate.epoch.env.sat": 0.3333333333333333, + "success_rate.epoch.env.science": 0.6842105263157895, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8852206853986, + "success_rate.epoch.global": 0.8256029684601113, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 0.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.6666666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9958333333333333, + "tokens_p.mean_in_band": 0.7588975694444444, + "tokens_rate.above_band": 0.9693877551020408, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.030612244897959183 + }, + { + "epoch": 1.0587984661269707, + "grad_norm": 88.66200637255409, + "learning_rate": 3.8831805558240367e-07, + "loss": 0.2159, + "step": 4970, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9655172413793104, + "success_rate.epoch.env.logic": 0.8356164383561644, + "success_rate.epoch.env.math": 0.9212121212121213, + "success_rate.epoch.env.sat": 0.3333333333333333, + "success_rate.epoch.env.science": 0.6872037914691943, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8857166296136477, + "success_rate.epoch.global": 0.8287795992714025, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9974376114081996, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 1.0598636557307202, + "grad_norm": 221.96347487104146, + "learning_rate": 3.882939280393741e-07, + "loss": 0.4282, + "step": 4975, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9666666666666667, + "success_rate.epoch.env.logic": 0.8356164383561644, + "success_rate.epoch.env.math": 0.9216867469879518, + "success_rate.epoch.env.sat": 0.3333333333333333, + "success_rate.epoch.env.science": 0.6912442396313364, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8862315840886776, + "success_rate.epoch.global": 0.8297491039426523, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env_macro_mean": 0.9583333333333334, + "success_rate.window.global": 0.8888888888888888, + "tokens_p.mean_above_band": 0.9967749110320284, + "tokens_p.mean_in_band": 0.6456801470588235, + "tokens_rate.above_band": 0.9706390328151986, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02936096718480138 + }, + { + "epoch": 1.0609288453344696, + "grad_norm": 89.16341137533541, + "learning_rate": 3.8826977987593314e-07, + "loss": 0.2891, + "step": 4980, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9666666666666667, + "success_rate.epoch.env.logic": 0.84, + "success_rate.epoch.env.math": 0.9221556886227545, + "success_rate.epoch.env.sat": 0.3333333333333333, + "success_rate.epoch.env.science": 0.6936936936936937, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8868953983924045, + "success_rate.epoch.global": 0.8309859154929577, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.96, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9976652298850575, + "tokens_p.mean_in_band": 0.6651785714285714, + "tokens_rate.above_band": 0.9802816901408451, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.01971830985915493 + }, + { + "epoch": 1.061994034938219, + "grad_norm": 91.32410024472037, + "learning_rate": 3.882456111096797e-07, + "loss": 0.3006, + "step": 4985, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.967741935483871, + "success_rate.epoch.env.logic": 0.84, + "success_rate.epoch.env.math": 0.9226190476190477, + "success_rate.epoch.env.sat": 0.3333333333333333, + "success_rate.epoch.env.science": 0.6902654867256637, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8867236184692652, + "success_rate.epoch.global": 0.8304498269896193, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.9, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.995547385620915, + "tokens_p.mean_in_band": 0.646728515625, + "tokens_rate.above_band": 0.9896507115135834, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.01034928848641656 + }, + { + "epoch": 1.0630592245419686, + "grad_norm": 62.97554115020096, + "learning_rate": 3.8822142175822793e-07, + "loss": 0.2876, + "step": 4990, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.967741935483871, + "success_rate.epoch.env.logic": 0.84, + "success_rate.epoch.env.math": 0.9239766081871345, + "success_rate.epoch.env.sat": 0.3333333333333333, + "success_rate.epoch.env.science": 0.6926406926406926, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8870629608768211, + "success_rate.epoch.global": 0.8316326530612245, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9937845303867403, + "tokens_p.mean_in_band": 0.5815972222222222, + "tokens_rate.above_band": 0.9526315789473684, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.04736842105263158 + }, + { + "epoch": 1.064124414145718, + "grad_norm": 111.485715575201, + "learning_rate": 3.881972118392068e-07, + "loss": 0.2385, + "step": 4995, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9375, + "success_rate.epoch.env.logic": 0.8421052631578947, + "success_rate.epoch.env.math": 0.9252873563218391, + "success_rate.epoch.env.sat": 0.3333333333333333, + "success_rate.epoch.env.science": 0.6949152542372882, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8848310188227596, + "success_rate.epoch.global": 0.8311036789297659, + "success_rate.window.env.ded": 0.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.7, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9884868421052632, + "tokens_p.mean_below_band": 9.424984455108643e-07, + "tokens_p.mean_in_band": 0.47788438688212925, + "tokens_rate.above_band": 0.7373134328358208, + "tokens_rate.below_band": 0.0009950248756218905, + "tokens_rate.in_band": 0.26169154228855723 + }, + { + "epoch": 1.0651896037494675, + "grad_norm": 44.55584774499437, + "learning_rate": 3.8817298137026026e-07, + "loss": 0.1276, + "step": 5000, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9375, + "success_rate.epoch.env.logic": 0.8441558441558441, + "success_rate.epoch.env.math": 0.9257142857142857, + "success_rate.epoch.env.sat": 0.3333333333333333, + "success_rate.epoch.env.science": 0.7049180327868853, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8859655905445772, + "success_rate.epoch.global": 0.8338815789473685, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9897540983606558, + "tokens_p.mean_in_band": 0.875, + "tokens_rate.above_band": 0.9838709677419355, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.016129032258064516 + }, + { + "epoch": 1.066254793353217, + "grad_norm": 37.44887426044418, + "learning_rate": 3.881487303690473e-07, + "loss": 0.3014, + "step": 5005, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9375, + "success_rate.epoch.env.logic": 0.8518518518518519, + "success_rate.epoch.env.math": 0.9261363636363636, + "success_rate.epoch.env.sat": 0.3333333333333333, + "success_rate.epoch.env.science": 0.7096774193548387, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8871362698342171, + "success_rate.epoch.global": 0.8365695792880259, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9974112426035503, + "tokens_p.mean_in_band": 0.73046875, + "tokens_rate.above_band": 0.9941176470588236, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0058823529411764705 + }, + { + "epoch": 1.0673199829569664, + "grad_norm": 63.786733274596294, + "learning_rate": 3.8812445885324205e-07, + "loss": 0.162, + "step": 5010, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9375, + "success_rate.epoch.env.logic": 0.8518518518518519, + "success_rate.epoch.env.math": 0.9269662921348315, + "success_rate.epoch.env.sat": 0.3333333333333333, + "success_rate.epoch.env.science": 0.7086614173228346, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8871193540584411, + "success_rate.epoch.global": 0.835725677830941, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.8888888888888888, + "success_rate.window.global": 0.7777777777777778, + "tokens_p.mean_above_band": 0.9939453125, + "tokens_p.mean_in_band": 0.5845424107142857, + "tokens_rate.above_band": 0.9195402298850575, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.08045977011494253 + }, + { + "epoch": 1.0683851725607159, + "grad_norm": 97.63486070867486, + "learning_rate": 3.881001668405332e-07, + "loss": 0.2328, + "step": 5015, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9375, + "success_rate.epoch.env.logic": 0.8536585365853658, + "success_rate.epoch.env.math": 0.9297297297297298, + "success_rate.epoch.env.sat": 0.3333333333333333, + "success_rate.epoch.env.science": 0.7109375, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8877417363316755, + "success_rate.epoch.global": 0.8383045525902669, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9896875, + "tokens_p.mean_in_band": 0.64453125, + "tokens_rate.above_band": 0.9803921568627451, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0196078431372549 + }, + { + "epoch": 1.0694503621644653, + "grad_norm": 156.34628875119938, + "learning_rate": 3.8807585434862475e-07, + "loss": 0.3879, + "step": 5020, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9375, + "success_rate.epoch.env.logic": 0.8554216867469879, + "success_rate.epoch.env.math": 0.93048128342246, + "success_rate.epoch.env.sat": 0.3333333333333333, + "success_rate.epoch.env.science": 0.7115384615384616, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.888024978640113, + "success_rate.epoch.global": 0.839258114374034, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9966384462151394, + "tokens_p.mean_in_band": 0.4739583333333333, + "tokens_rate.above_band": 0.9881889763779528, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.011811023622047244 + }, + { + "epoch": 1.0705155517682148, + "grad_norm": 59.72047243352351, + "learning_rate": 3.8805152139523536e-07, + "loss": 0.2054, + "step": 5025, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9375, + "success_rate.epoch.env.logic": 0.8571428571428571, + "success_rate.epoch.env.math": 0.9315789473684211, + "success_rate.epoch.env.sat": 0.3333333333333333, + "success_rate.epoch.env.science": 0.7132075471698113, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8884329713649475, + "success_rate.epoch.global": 0.8401826484018264, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.99375, + "tokens_p.mean_in_band": 0.53125, + "tokens_rate.above_band": 0.9448818897637795, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.05511811023622047 + }, + { + "epoch": 1.0715807413719642, + "grad_norm": 816.1240272641251, + "learning_rate": 3.8802716799809885e-07, + "loss": 0.3993, + "step": 5030, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9375, + "success_rate.epoch.env.logic": 0.8505747126436781, + "success_rate.epoch.env.math": 0.9270833333333334, + "success_rate.epoch.env.sat": 0.3333333333333333, + "success_rate.epoch.env.science": 0.7111111111111111, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8872365900383141, + "success_rate.epoch.global": 0.8365817091454273, + "success_rate.window.env.logic": 0.6666666666666666, + "success_rate.window.env.math": 0.5, + "success_rate.window.env.science": 0.6, + "success_rate.window.env_macro_mean": 0.5888888888888889, + "success_rate.window.global": 0.6, + "tokens_p.mean_above_band": 1.0000521702838063, + "tokens_p.mean_in_band": 0.4214564732142857, + "tokens_rate.above_band": 0.9553429027113237, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.044657097288676235 + }, + { + "epoch": 1.0726459309757137, + "grad_norm": 154.32847542752535, + "learning_rate": 3.880027941749636e-07, + "loss": 0.1867, + "step": 5035, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9375, + "success_rate.epoch.env.logic": 0.8571428571428571, + "success_rate.epoch.env.math": 0.9285714285714286, + "success_rate.epoch.env.sat": 0.3333333333333333, + "success_rate.epoch.env.science": 0.7111111111111111, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8879689754689756, + "success_rate.epoch.global": 0.8387573964497042, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9972278225806451, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 1.0737111205794632, + "grad_norm": 94.40867589017665, + "learning_rate": 3.8797839994359325e-07, + "loss": 0.3767, + "step": 5040, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9393939393939394, + "success_rate.epoch.env.logic": 0.8586956521739131, + "success_rate.epoch.env.math": 0.9292929292929293, + "success_rate.epoch.env.sat": 0.3333333333333333, + "success_rate.epoch.env.science": 0.7090909090909091, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8881642512077296, + "success_rate.epoch.global": 0.8381924198250729, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6, + "success_rate.window.env_macro_mean": 0.9199999999999999, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9947076612903226, + "tokens_p.mean_in_band": 0.5738636363636364, + "tokens_rate.above_band": 0.9712793733681462, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.028720626631853787 + }, + { + "epoch": 1.0747763101832126, + "grad_norm": 79.92737662068915, + "learning_rate": 3.8795398532176604e-07, + "loss": 0.2173, + "step": 5045, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9411764705882353, + "success_rate.epoch.env.logic": 0.8602150537634409, + "success_rate.epoch.env.math": 0.9303482587064676, + "success_rate.epoch.env.sat": 0.3333333333333333, + "success_rate.epoch.env.science": 0.7132616487455197, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8889395241033635, + "success_rate.epoch.global": 0.8405172413793104, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9982638888888888, + "tokens_p.mean_in_band": 0.611328125, + "tokens_rate.above_band": 0.9908256880733946, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.009174311926605505 + }, + { + "epoch": 1.075841499786962, + "grad_norm": 43.49103723119927, + "learning_rate": 3.879295503272753e-07, + "loss": 0.2848, + "step": 5050, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9411764705882353, + "success_rate.epoch.env.logic": 0.8617021276595744, + "success_rate.epoch.env.math": 0.9310344827586207, + "success_rate.epoch.env.sat": 0.25, + "success_rate.epoch.env.science": 0.7183098591549296, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8820202672873964, + "success_rate.epoch.global": 0.8413597733711048, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9980945121951219, + "tokens_p.mean_in_band": 0.6932744565217391, + "tokens_rate.above_band": 0.9661266568483063, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.033873343151693665 + }, + { + "epoch": 1.0769066893907115, + "grad_norm": 218.3599509629281, + "learning_rate": 3.879050949779289e-07, + "loss": 0.19, + "step": 5055, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9411764705882353, + "success_rate.epoch.env.logic": 0.8645833333333334, + "success_rate.epoch.env.math": 0.9313725490196079, + "success_rate.epoch.env.sat": 0.2, + "success_rate.epoch.env.science": 0.7222222222222222, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8781231431966726, + "success_rate.epoch.global": 0.8421787709497207, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8333333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9960164835164835, + "tokens_p.mean_in_band": 0.6979166666666666, + "tokens_rate.above_band": 0.974304068522484, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02569593147751606 + }, + { + "epoch": 1.077971878994461, + "grad_norm": 0.0, + "learning_rate": 3.878806192915498e-07, + "loss": 0.1457, + "step": 5060, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9444444444444444, + "success_rate.epoch.env.logic": 0.865979381443299, + "success_rate.epoch.env.math": 0.9320388349514563, + "success_rate.epoch.env.sat": 0.2, + "success_rate.epoch.env.science": 0.7241379310344828, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8787818719885165, + "success_rate.epoch.global": 0.8443526170798898, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9947959183673469, + "tokens_p.mean_in_band": 0.89453125, + "tokens_rate.above_band": 0.9983700081499592, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0016299918500407497 + }, + { + "epoch": 1.0790370685982105, + "grad_norm": 56.46685685501144, + "learning_rate": 3.878561232859758e-07, + "loss": 0.1699, + "step": 5065, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9444444444444444, + "success_rate.epoch.env.logic": 0.865979381443299, + "success_rate.epoch.env.math": 0.9333333333333333, + "success_rate.epoch.env.sat": 0.2, + "success_rate.epoch.env.science": 0.7226027397260274, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8787599908133731, + "success_rate.epoch.global": 0.845108695652174, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9983498349834984, + "tokens_p.mean_in_band": 0.58203125, + "tokens_rate.above_band": 0.9869706840390879, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.013029315960912053 + }, + { + "epoch": 1.08010225820196, + "grad_norm": 97.10832917549202, + "learning_rate": 3.8783160697905923e-07, + "loss": 0.2793, + "step": 5070, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9473684210526315, + "success_rate.epoch.env.logic": 0.865979381443299, + "success_rate.epoch.env.math": 0.933649289099526, + "success_rate.epoch.env.sat": 0.2, + "success_rate.epoch.env.science": 0.7263513513513513, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8793953129951643, + "success_rate.epoch.global": 0.8471849865951743, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9992919670442842, + "tokens_p.mean_in_band": 0.8359375, + "tokens_rate.above_band": 0.9989711934156379, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.00102880658436214 + }, + { + "epoch": 1.0811674478057094, + "grad_norm": 61.785518551137216, + "learning_rate": 3.878070703886676e-07, + "loss": 0.2096, + "step": 5075, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9473684210526315, + "success_rate.epoch.env.logic": 0.8585858585858586, + "success_rate.epoch.env.math": 0.9348837209302325, + "success_rate.epoch.env.sat": 0.2, + "success_rate.epoch.env.science": 0.7266666666666667, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8788640606577626, + "success_rate.epoch.global": 0.8465608465608465, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.75, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9948275862068966, + "tokens_p.mean_in_band": 0.5502232142857143, + "tokens_rate.above_band": 0.9539473684210527, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.046052631578947366 + }, + { + "epoch": 1.0822326374094589, + "grad_norm": 2.873540350938449, + "learning_rate": 3.8778251353268283e-07, + "loss": 0.2163, + "step": 5080, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9473684210526315, + "success_rate.epoch.env.logic": 0.8627450980392157, + "success_rate.epoch.env.math": 0.9360730593607306, + "success_rate.epoch.env.sat": 0.2, + "success_rate.epoch.env.science": 0.7284768211920529, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8795148545131481, + "success_rate.epoch.global": 0.8485639686684073, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9996408045977011, + "tokens_p.mean_in_band": 0.6875, + "tokens_rate.above_band": 0.9775280898876404, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02247191011235955 + }, + { + "epoch": 1.0832978270132083, + "grad_norm": 96.3451665318494, + "learning_rate": 3.8775793642900186e-07, + "loss": 0.2786, + "step": 5085, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9473684210526315, + "success_rate.epoch.env.logic": 0.8627450980392157, + "success_rate.epoch.env.math": 0.9372197309417041, + "success_rate.epoch.env.sat": 0.2, + "success_rate.epoch.env.science": 0.7296416938110749, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8797249948949659, + "success_rate.epoch.global": 0.8492268041237113, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9920804794520548, + "tokens_p.mean_in_band": 0.621484375, + "tokens_rate.above_band": 0.9358974358974359, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0641025641025641 + }, + { + "epoch": 1.0843630166169578, + "grad_norm": 224.72004536662845, + "learning_rate": 3.877333390955363e-07, + "loss": 0.3518, + "step": 5090, + "success_rate.epoch.env.abd": 0.9767441860465116, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9487179487179487, + "success_rate.epoch.env.logic": 0.8640776699029126, + "success_rate.epoch.env.math": 0.9380530973451328, + "success_rate.epoch.env.sat": 0.2, + "success_rate.epoch.env.science": 0.7322580645161291, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8781682696844212, + "success_rate.epoch.global": 0.8498727735368957, + "success_rate.window.env.abd": 0.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8333333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9972519729425028, + "tokens_p.mean_below_band": 2.812594175338745e-07, + "tokens_p.mean_in_band": 0.10240558769463667, + "tokens_rate.above_band": 0.2765824758341129, + "tokens_rate.below_band": 0.002494543186778921, + "tokens_rate.in_band": 0.7209229809791082 + }, + { + "epoch": 1.0854282062207072, + "grad_norm": 63.17554418013749, + "learning_rate": 3.8770872155021253e-07, + "loss": 0.1439, + "step": 5095, + "success_rate.epoch.env.abd": 0.9772727272727273, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9487179487179487, + "success_rate.epoch.env.logic": 0.8666666666666667, + "success_rate.epoch.env.math": 0.9385964912280702, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.7348242811501597, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8757040710638399, + "success_rate.epoch.global": 0.8505025125628141, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8333333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9986313868613139, + "tokens_p.mean_in_band": 0.6607142857142857, + "tokens_rate.above_band": 0.9670588235294117, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03294117647058824 + }, + { + "epoch": 1.0864933958244567, + "grad_norm": 65.12427258618388, + "learning_rate": 3.8768408381097155e-07, + "loss": 0.3441, + "step": 5100, + "success_rate.epoch.env.abd": 0.9772727272727273, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9487179487179487, + "success_rate.epoch.env.logic": 0.8691588785046729, + "success_rate.epoch.env.math": 0.9388646288209607, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.7398119122257053, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.876408432928062, + "success_rate.epoch.global": 0.8523573200992556, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9962818696883853, + "tokens_p.mean_in_band": 0.7907366071428571, + "tokens_rate.above_band": 0.9805555555555555, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.019444444444444445 + }, + { + "epoch": 1.0875585854282062, + "grad_norm": 230.26982974330005, + "learning_rate": 3.876594258957693e-07, + "loss": 0.3642, + "step": 5105, + "success_rate.epoch.env.abd": 0.9772727272727273, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9487179487179487, + "success_rate.epoch.env.logic": 0.8691588785046729, + "success_rate.epoch.env.math": 0.9399141630901288, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.7376543209876543, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8763077004763453, + "success_rate.epoch.global": 0.8517156862745098, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6, + "success_rate.window.env_macro_mean": 0.8666666666666667, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9975124378109452, + "tokens_p.mean_in_band": 0.5914306640625, + "tokens_rate.above_band": 0.9617224880382775, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03827751196172249 + }, + { + "epoch": 1.0886237750319556, + "grad_norm": 67.96402510385181, + "learning_rate": 3.876347478225762e-07, + "loss": 0.2941, + "step": 5110, + "success_rate.epoch.env.abd": 0.9772727272727273, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9487179487179487, + "success_rate.epoch.env.logic": 0.8703703703703703, + "success_rate.epoch.env.math": 0.9409282700421941, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.7384615384615385, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8765834110483133, + "success_rate.epoch.global": 0.8535108958837773, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9983898527865405, + "tokens_p.mean_in_band": 0.734375, + "tokens_rate.above_band": 0.9968553459119497, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0031446540880503146 + }, + { + "epoch": 1.089688964635705, + "grad_norm": 135.17207102464585, + "learning_rate": 3.876100496093775e-07, + "loss": 0.1945, + "step": 5115, + "success_rate.epoch.env.abd": 0.9777777777777777, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.95, + "success_rate.epoch.env.logic": 0.8703703703703703, + "success_rate.epoch.env.math": 0.9416666666666667, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.7370030581039755, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.876680412689587, + "success_rate.epoch.global": 0.854066985645933, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.9285714285714286, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9999553252323088, + "tokens_p.mean_in_band": 0.5703125, + "tokens_rate.above_band": 0.9985724482512491, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0014275517487508922 + }, + { + "epoch": 1.0907541542394545, + "grad_norm": 160.17105370646595, + "learning_rate": 3.8758533127417297e-07, + "loss": 0.4869, + "step": 5120, + "success_rate.epoch.env.abd": 0.9782608695652174, + "success_rate.epoch.env.agentgym:alfworld": 0.96, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9512195121951219, + "success_rate.epoch.env.logic": 0.8703703703703703, + "success_rate.epoch.env.math": 0.941908713692946, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.7409638554216867, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8735809079920007, + "success_rate.epoch.global": 0.8546099290780141, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 0.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8333333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9993977516059958, + "tokens_p.mean_in_band": 0.69453125, + "tokens_rate.above_band": 0.9946751863684771, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.005324813631522897 + }, + { + "epoch": 1.091819343843204, + "grad_norm": 206.12375796428358, + "learning_rate": 3.8756059283497733e-07, + "loss": 0.3018, + "step": 5125, + "success_rate.epoch.env.abd": 0.9787234042553191, + "success_rate.epoch.env.agentgym:alfworld": 0.96, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9534883720930233, + "success_rate.epoch.env.logic": 0.8715596330275229, + "success_rate.epoch.env.math": 0.9385245901639344, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.7425149700598802, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8737706942060314, + "success_rate.epoch.global": 0.8551401869158879, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.6666666666666666, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9444444444444445, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9976498983739838, + "tokens_p.mean_in_band": 0.625, + "tokens_rate.above_band": 0.9959514170040485, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.004048582995951417 + }, + { + "epoch": 1.0928845334469535, + "grad_norm": 53.4426212461758, + "learning_rate": 3.875358343098197e-07, + "loss": 0.1871, + "step": 5130, + "success_rate.epoch.env.abd": 0.9787234042553191, + "success_rate.epoch.env.agentgym:alfworld": 0.9615384615384616, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9534883720930233, + "success_rate.epoch.env.logic": 0.8738738738738738, + "success_rate.epoch.env.math": 0.94, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.7432835820895523, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8743249418651723, + "success_rate.epoch.global": 0.8568129330254042, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9955778301886793, + "tokens_p.mean_in_band": 0.802734375, + "tokens_rate.above_band": 0.9814814814814815, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.018518518518518517 + }, + { + "epoch": 1.093949723050703, + "grad_norm": 110.0774637185527, + "learning_rate": 3.875110557167438e-07, + "loss": 0.2661, + "step": 5135, + "success_rate.epoch.env.abd": 0.9787234042553191, + "success_rate.epoch.env.agentgym:alfworld": 0.9615384615384616, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9545454545454546, + "success_rate.epoch.env.logic": 0.8738738738738738, + "success_rate.epoch.env.math": 0.9407114624505929, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.7419354838709677, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8743631642910306, + "success_rate.epoch.global": 0.8561643835616438, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.8888888888888888, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9942103794642857, + "tokens_p.mean_in_band": 0.614990234375, + "tokens_rate.above_band": 0.9655172413793104, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.034482758620689655 + }, + { + "epoch": 1.0950149126544524, + "grad_norm": 238.6272099337383, + "learning_rate": 3.8748625707380824e-07, + "loss": 0.2169, + "step": 5140, + "success_rate.epoch.env.abd": 0.9787234042553191, + "success_rate.epoch.env.agentgym:alfworld": 0.9615384615384616, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9545454545454546, + "success_rate.epoch.env.logic": 0.8738738738738738, + "success_rate.epoch.env.math": 0.94140625, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.7435158501440923, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8745699964567152, + "success_rate.epoch.global": 0.8566591422121896, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env_macro_mean": 0.9444444444444445, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9955658783783784, + "tokens_p.mean_in_band": 0.23177083333333334, + "tokens_rate.above_band": 0.9801324503311258, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.019867549668874173 + }, + { + "epoch": 1.096080102258202, + "grad_norm": 110.67437073230069, + "learning_rate": 3.8746143839908606e-07, + "loss": 0.3265, + "step": 5145, + "success_rate.epoch.env.abd": 0.9787234042553191, + "success_rate.epoch.env.agentgym:alfworld": 0.9642857142857143, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9333333333333333, + "success_rate.epoch.env.logic": 0.8738738738738738, + "success_rate.epoch.env.math": 0.9420849420849421, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.7435897435897436, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8729597889172358, + "success_rate.epoch.global": 0.8560267857142857, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.6875, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9938991323210412, + "tokens_p.mean_in_band": 0.640126329787234, + "tokens_rate.above_band": 0.90748031496063, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.09251968503937008 + }, + { + "epoch": 1.0971452918619513, + "grad_norm": 182.92325646847095, + "learning_rate": 3.874365997106649e-07, + "loss": 0.5109, + "step": 5150, + "success_rate.epoch.env.abd": 0.9787234042553191, + "success_rate.epoch.env.agentgym:alfworld": 0.9642857142857143, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9333333333333333, + "success_rate.epoch.env.logic": 0.8738738738738738, + "success_rate.epoch.env.math": 0.9425287356321839, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.7478991596638656, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8733918988828143, + "success_rate.epoch.global": 0.8576158940397351, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9954166666666666, + "tokens_p.mean_in_band": 0.80859375, + "tokens_rate.above_band": 0.9782608695652174, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.021739130434782608 + }, + { + "epoch": 1.098210481465701, + "grad_norm": 124.94864449011877, + "learning_rate": 3.874117410266471e-07, + "loss": 0.389, + "step": 5155, + "success_rate.epoch.env.abd": 0.9787234042553191, + "success_rate.epoch.env.agentgym:alfworld": 0.9642857142857143, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9347826086956522, + "success_rate.epoch.env.logic": 0.8672566371681416, + "success_rate.epoch.env.math": 0.9431818181818182, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.75, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8731724408412103, + "success_rate.epoch.global": 0.8580786026200873, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9973897345823576, + "tokens_p.mean_in_band": 0.7161458333333334, + "tokens_rate.above_band": 0.9953379953379954, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.004662004662004662 + }, + { + "epoch": 1.0992756710694505, + "grad_norm": 109.79976932617627, + "learning_rate": 3.8738686236514937e-07, + "loss": 0.2852, + "step": 5160, + "success_rate.epoch.env.abd": 0.9787234042553191, + "success_rate.epoch.env.agentgym:alfworld": 0.9642857142857143, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9347826086956522, + "success_rate.epoch.env.logic": 0.8672566371681416, + "success_rate.epoch.env.math": 0.943609022556391, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.75, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8732112776025349, + "success_rate.epoch.global": 0.857451403887689, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9884020618556701, + "tokens_p.mean_in_band": 0.4296875, + "tokens_rate.above_band": 0.941747572815534, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.05825242718446602 + }, + { + "epoch": 1.1003408606732, + "grad_norm": 84.99864019904389, + "learning_rate": 3.873619637443031e-07, + "loss": 0.2377, + "step": 5165, + "success_rate.epoch.env.abd": 0.9787234042553191, + "success_rate.epoch.env.agentgym:alfworld": 0.9333333333333333, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9347826086956522, + "success_rate.epoch.env.logic": 0.8695652173913043, + "success_rate.epoch.env.math": 0.9438202247191011, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.7506702412868632, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8706874269407492, + "success_rate.epoch.global": 0.8568376068376068, + "success_rate.window.env.agentgym:alfworld": 0.5, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.825, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9992421540656206, + "tokens_p.mean_in_band": 0.390625, + "tokens_rate.above_band": 0.9915134370579916, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.008486562942008486 + }, + { + "epoch": 1.1014060502769494, + "grad_norm": 60.99410794911759, + "learning_rate": 3.873370451822544e-07, + "loss": 0.2666, + "step": 5170, + "success_rate.epoch.env.abd": 0.9787234042553191, + "success_rate.epoch.env.agentgym:alfworld": 0.9333333333333333, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9361702127659575, + "success_rate.epoch.env.logic": 0.8695652173913043, + "success_rate.epoch.env.math": 0.9446494464944649, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.7506631299734748, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8708883100800473, + "success_rate.epoch.global": 0.857293868921776, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9375, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9936625874125874, + "tokens_p.mean_in_band": 0.6428571428571429, + "tokens_rate.above_band": 0.9761092150170648, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.023890784982935155 + }, + { + "epoch": 1.1024712398806988, + "grad_norm": 40.62948199770087, + "learning_rate": 3.8731210669716355e-07, + "loss": 0.2366, + "step": 5175, + "success_rate.epoch.env.abd": 0.9791666666666666, + "success_rate.epoch.env.agentgym:alfworld": 0.9333333333333333, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9375, + "success_rate.epoch.env.logic": 0.865546218487395, + "success_rate.epoch.env.math": 0.9448529411764706, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.75, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8706423478482302, + "success_rate.epoch.global": 0.856694560669456, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.75, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.8833333333333332, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9994550511124474, + "tokens_p.mean_in_band": 0.5189393939393939, + "tokens_rate.above_band": 0.9805424528301887, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.01945754716981132 + }, + { + "epoch": 1.1035364294844483, + "grad_norm": 40.615117543713055, + "learning_rate": 3.872871483072056e-07, + "loss": 0.2951, + "step": 5180, + "success_rate.epoch.env.abd": 0.9791666666666666, + "success_rate.epoch.env.agentgym:alfworld": 0.9354838709677419, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9375, + "success_rate.epoch.env.logic": 0.8666666666666667, + "success_rate.epoch.env.math": 0.9458483754512635, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.7519582245430809, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8712082246329168, + "success_rate.epoch.global": 0.8581780538302277, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9980769230769231, + "tokens_p.mean_in_band": 0.7958096590909091, + "tokens_rate.above_band": 0.9672619047619048, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03273809523809524 + }, + { + "epoch": 1.1046016190881978, + "grad_norm": 112.40872957289658, + "learning_rate": 3.872621700305701e-07, + "loss": 0.2109, + "step": 5185, + "success_rate.epoch.env.abd": 0.98, + "success_rate.epoch.env.agentgym:alfworld": 0.9354838709677419, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9375, + "success_rate.epoch.env.logic": 0.8688524590163934, + "success_rate.epoch.env.math": 0.9464285714285714, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.7532467532467533, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8716525746660114, + "success_rate.epoch.global": 0.8596311475409836, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9970588235294118, + "tokens_p.mean_in_band": 0.7578125, + "tokens_rate.above_band": 0.9941520467836257, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.005847953216374269 + }, + { + "epoch": 1.1056668086919472, + "grad_norm": 77.06821168483752, + "learning_rate": 3.8723717188546095e-07, + "loss": 0.274, + "step": 5190, + "success_rate.epoch.env.abd": 0.98, + "success_rate.epoch.env.agentgym:alfworld": 0.9354838709677419, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9387755102040817, + "success_rate.epoch.env.logic": 0.8709677419354839, + "success_rate.epoch.env.math": 0.9469964664310954, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.7506426735218509, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8717757208842655, + "success_rate.epoch.global": 0.859026369168357, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9956995412844036, + "tokens_p.mean_in_band": 0.4481534090909091, + "tokens_rate.above_band": 0.9083333333333333, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.09166666666666666 + }, + { + "epoch": 1.1067319982956967, + "grad_norm": 201.13386995976776, + "learning_rate": 3.872121538900967e-07, + "loss": 0.4417, + "step": 5195, + "success_rate.epoch.env.abd": 0.9807692307692307, + "success_rate.epoch.env.agentgym:alfworld": 0.9354838709677419, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9387755102040817, + "success_rate.epoch.env.logic": 0.873015873015873, + "success_rate.epoch.env.math": 0.9471830985915493, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.751269035532995, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8721057532498308, + "success_rate.epoch.global": 0.8594377510040161, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9977678571428571, + "tokens_p.mean_in_band": 0.740234375, + "tokens_rate.above_band": 0.9680851063829787, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.031914893617021274 + }, + { + "epoch": 1.1077971878994461, + "grad_norm": 84.52786372488357, + "learning_rate": 3.871871160627102e-07, + "loss": 0.3288, + "step": 5200, + "success_rate.epoch.env.abd": 0.9807692307692307, + "success_rate.epoch.env.agentgym:alfworld": 0.9354838709677419, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9387755102040817, + "success_rate.epoch.env.logic": 0.8661417322834646, + "success_rate.epoch.env.math": 0.9477351916376306, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.7525, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8716429275026196, + "success_rate.epoch.global": 0.8588469184890656, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env_macro_mean": 0.6111111111111112, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9920634920634921, + "tokens_p.mean_in_band": 0.5963541666666666, + "tokens_rate.above_band": 0.9130434782608695, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.08695652173913043 + }, + { + "epoch": 1.1088623775031956, + "grad_norm": 304.79828297604735, + "learning_rate": 3.8716205842154896e-07, + "loss": 0.3263, + "step": 5205, + "success_rate.epoch.env.abd": 0.9807692307692307, + "success_rate.epoch.env.agentgym:alfworld": 0.9375, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9423076923076923, + "success_rate.epoch.env.logic": 0.8682170542635659, + "success_rate.epoch.env.math": 0.9480968858131488, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.7531172069825436, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8724249760729862, + "success_rate.epoch.global": 0.860236220472441, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9990207765667575, + "tokens_p.mean_in_band": 0.5625, + "tokens_rate.above_band": 0.9993192648059904, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0006807351940095302 + }, + { + "epoch": 1.109927567106945, + "grad_norm": 89.70948254159615, + "learning_rate": 3.8713698098487466e-07, + "loss": 0.281, + "step": 5210, + "success_rate.epoch.env.abd": 0.9811320754716981, + "success_rate.epoch.env.agentgym:alfworld": 0.9375, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9444444444444444, + "success_rate.epoch.env.logic": 0.8692307692307693, + "success_rate.epoch.env.math": 0.9484536082474226, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.7506172839506173, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8725495316374197, + "success_rate.epoch.global": 0.8596491228070176, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.9, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9984065686862628, + "tokens_p.mean_in_band": 0.6142578125, + "tokens_rate.above_band": 0.9928528886241811, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.00714711137581894 + }, + { + "epoch": 1.1109927567106945, + "grad_norm": 158.5209917720987, + "learning_rate": 3.8711188377096365e-07, + "loss": 0.2265, + "step": 5215, + "success_rate.epoch.env.abd": 0.9811320754716981, + "success_rate.epoch.env.agentgym:alfworld": 0.9393939393939394, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9454545454545454, + "success_rate.epoch.env.logic": 0.8721804511278195, + "success_rate.epoch.env.math": 0.9486301369863014, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.75, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8730416195546336, + "success_rate.epoch.global": 0.86003861003861, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9444444444444443, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9997344944774851, + "tokens_p.mean_in_band": 0.7356770833333334, + "tokens_rate.above_band": 0.997457627118644, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.002542372881355932 + }, + { + "epoch": 1.112057946314444, + "grad_norm": 25.825288991355624, + "learning_rate": 3.8708676679810666e-07, + "loss": 0.3018, + "step": 5220, + "success_rate.epoch.env.abd": 0.9811320754716981, + "success_rate.epoch.env.agentgym:alfworld": 0.9428571428571428, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9464285714285714, + "success_rate.epoch.env.logic": 0.8731343283582089, + "success_rate.epoch.env.math": 0.9489795918367347, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.7518248175182481, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8737293812852063, + "success_rate.epoch.global": 0.861376673040153, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9974398569570871, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 1.1131231359181935, + "grad_norm": 45.0277663430148, + "learning_rate": 3.870616300846086e-07, + "loss": 0.2867, + "step": 5225, + "success_rate.epoch.env.abd": 0.9811320754716981, + "success_rate.epoch.env.agentgym:alfworld": 0.9428571428571428, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9464285714285714, + "success_rate.epoch.env.logic": 0.8740740740740741, + "success_rate.epoch.env.math": 0.9491525423728814, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.7541766109785203, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8740443348954141, + "success_rate.epoch.global": 0.8617424242424242, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.875, + "success_rate.window.env_macro_mean": 0.9583333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9915865384615384, + "tokens_p.mean_in_band": 0.24283854166666666, + "tokens_rate.above_band": 0.9719626168224299, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.028037383177570093 + }, + { + "epoch": 1.114188325521943, + "grad_norm": 57.33559610050672, + "learning_rate": 3.8703647364878893e-07, + "loss": 0.3743, + "step": 5230, + "success_rate.epoch.env.abd": 0.9818181818181818, + "success_rate.epoch.env.agentgym:alfworld": 0.9428571428571428, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9464285714285714, + "success_rate.epoch.env.logic": 0.8740740740740741, + "success_rate.epoch.env.math": 0.9491525423728814, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.7494145199063232, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8736737908294401, + "success_rate.epoch.global": 0.8592870544090057, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.75, + "success_rate.window.global": 0.6, + "tokens_p.mean_above_band": 0.9933176100628931, + "tokens_p.mean_in_band": 0.50633544921875, + "tokens_rate.above_band": 0.8412698412698413, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.15873015873015872 + }, + { + "epoch": 1.1152535151256924, + "grad_norm": 39.400872988123666, + "learning_rate": 3.870112975089817e-07, + "loss": 0.3044, + "step": 5235, + "success_rate.epoch.env.abd": 0.9818181818181818, + "success_rate.epoch.env.agentgym:alfworld": 0.9444444444444444, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9491525423728814, + "success_rate.epoch.env.logic": 0.8759124087591241, + "success_rate.epoch.env.math": 0.9494949494949495, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.7505827505827506, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8743701767399089, + "success_rate.epoch.global": 0.8605947955390335, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9987549800796812, + "tokens_p.mean_in_band": 0.87109375, + "tokens_rate.above_band": 0.9992038216560509, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0007961783439490446 + }, + { + "epoch": 1.1163187047294418, + "grad_norm": 67.95782985940595, + "learning_rate": 3.869861016835349e-07, + "loss": 0.3773, + "step": 5240, + "success_rate.epoch.env.abd": 0.9818181818181818, + "success_rate.epoch.env.agentgym:alfworld": 0.9444444444444444, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.95, + "success_rate.epoch.env.logic": 0.8776978417266187, + "success_rate.epoch.env.math": 0.9504950495049505, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.7505827505827506, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8747004486130556, + "success_rate.epoch.global": 0.861878453038674, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9981773997569866, + "tokens_p.mean_in_band": 0.87890625, + "tokens_rate.above_band": 0.9987864077669902, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0012135922330097086 + }, + { + "epoch": 1.1173838943331913, + "grad_norm": 850.734297224957, + "learning_rate": 3.8696088619081106e-07, + "loss": 0.4694, + "step": 5245, + "success_rate.epoch.env.abd": 0.9821428571428571, + "success_rate.epoch.env.agentgym:alfworld": 0.9444444444444444, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.95, + "success_rate.epoch.env.logic": 0.8811188811188811, + "success_rate.epoch.env.math": 0.9506578947368421, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7517401392111369, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8729964872283005, + "success_rate.epoch.global": 0.8622262773722628, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8333333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9971382783882784, + "tokens_p.mean_in_band": 0.5446428571428571, + "tokens_rate.above_band": 0.975, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.025 + }, + { + "epoch": 1.1184490839369408, + "grad_norm": 93.74605823030838, + "learning_rate": 3.8693565104918715e-07, + "loss": 0.3742, + "step": 5250, + "success_rate.epoch.env.abd": 0.9821428571428571, + "success_rate.epoch.env.agentgym:alfworld": 0.9444444444444444, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.95, + "success_rate.epoch.env.logic": 0.8827586206896552, + "success_rate.epoch.env.math": 0.9514563106796117, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7534562211981567, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8733741451828969, + "success_rate.epoch.global": 0.8634719710669078, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9930555555555556, + "tokens_p.mean_in_band": 0.8671875, + "tokens_rate.above_band": 0.989010989010989, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.01098901098901099 + }, + { + "epoch": 1.1195142735406902, + "grad_norm": 167.60115870344111, + "learning_rate": 3.8691039627705433e-07, + "loss": 0.3261, + "step": 5255, + "success_rate.epoch.env.abd": 0.9827586206896551, + "success_rate.epoch.env.agentgym:alfworld": 0.9444444444444444, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9508196721311475, + "success_rate.epoch.env.logic": 0.8843537414965986, + "success_rate.epoch.env.math": 0.9516129032258065, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7557077625570776, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8738685715819883, + "success_rate.epoch.global": 0.8646953405017921, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9986538461538461, + "tokens_p.mean_in_band": 0.67109375, + "tokens_rate.above_band": 0.9923664122137404, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.007633587786259542 + }, + { + "epoch": 1.1205794631444397, + "grad_norm": 75.72491858043374, + "learning_rate": 3.868851218928181e-07, + "loss": 0.3269, + "step": 5260, + "success_rate.epoch.env.abd": 0.9827586206896551, + "success_rate.epoch.env.agentgym:alfworld": 0.9459459459459459, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9508196721311475, + "success_rate.epoch.env.logic": 0.88, + "success_rate.epoch.env.math": 0.9517684887459807, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7573696145124716, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8737744986256676, + "success_rate.epoch.global": 0.8650088809946714, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 0.6666666666666666, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 0.9444444444444443, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9979771205357143, + "tokens_p.mean_in_band": 0.6905381944444444, + "tokens_rate.above_band": 0.9803063457330415, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.019693654266958426 + }, + { + "epoch": 1.1216446527481891, + "grad_norm": 110.57320370533763, + "learning_rate": 3.8685982791489825e-07, + "loss": 0.2583, + "step": 5265, + "success_rate.epoch.env.abd": 0.9827586206896551, + "success_rate.epoch.env.agentgym:alfworld": 0.9473684210526315, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9508196721311475, + "success_rate.epoch.env.logic": 0.8807947019867549, + "success_rate.epoch.env.math": 0.9525316455696202, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7584650112866818, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8741450195976032, + "success_rate.epoch.global": 0.8661971830985915, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9961300309597523, + "tokens_p.mean_in_band": 0.65234375, + "tokens_rate.above_band": 0.9938461538461538, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.006153846153846154 + }, + { + "epoch": 1.1227098423519386, + "grad_norm": 154.89165743127634, + "learning_rate": 3.868345143617288e-07, + "loss": 0.362, + "step": 5270, + "success_rate.epoch.env.abd": 0.9833333333333333, + "success_rate.epoch.env.agentgym:alfworld": 0.9487179487179487, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9508196721311475, + "success_rate.epoch.env.logic": 0.881578947368421, + "success_rate.epoch.env.math": 0.9526813880126183, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7583892617449665, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8743979721968707, + "success_rate.epoch.global": 0.8664921465968587, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9583333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9991871387283237, + "tokens_p.mean_in_band": 0.6640625, + "tokens_rate.above_band": 0.9885714285714285, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.011428571428571429 + }, + { + "epoch": 1.123775031955688, + "grad_norm": 68.2933313424731, + "learning_rate": 3.868091812517581e-07, + "loss": 0.171, + "step": 5275, + "success_rate.epoch.env.abd": 0.9833333333333333, + "success_rate.epoch.env.agentgym:alfworld": 0.9487179487179487, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9508196721311475, + "success_rate.epoch.env.logic": 0.8838709677419355, + "success_rate.epoch.env.math": 0.9532710280373832, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7577777777777778, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8746043518724244, + "success_rate.epoch.global": 0.8667820069204152, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.8888888888888888, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.994921875, + "tokens_p.mean_in_band": 0.6536458333333334, + "tokens_rate.above_band": 0.9302325581395349, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.06976744186046512 + }, + { + "epoch": 1.1248402215594375, + "grad_norm": 242.5255759945865, + "learning_rate": 3.867838286034488e-07, + "loss": 0.1494, + "step": 5280, + "success_rate.epoch.env.abd": 0.9833333333333333, + "success_rate.epoch.env.agentgym:alfworld": 0.9487179487179487, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9508196721311475, + "success_rate.epoch.env.logic": 0.8846153846153846, + "success_rate.epoch.env.math": 0.9507692307692308, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7593818984547461, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.874590419170812, + "success_rate.epoch.global": 0.8670668953687821, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.75, + "success_rate.window.env.science": 1.0, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 1.000490837696335, + "tokens_p.mean_in_band": 0.76123046875, + "tokens_rate.above_band": 0.9958289885297185, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.004171011470281543 + }, + { + "epoch": 1.125905411163187, + "grad_norm": 310.19556807866786, + "learning_rate": 3.8675845643527765e-07, + "loss": 0.3814, + "step": 5285, + "success_rate.epoch.env.abd": 0.9833333333333333, + "success_rate.epoch.env.agentgym:alfworld": 0.9487179487179487, + "success_rate.epoch.env.agentgym:sciworld": 0.972972972972973, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9516129032258065, + "success_rate.epoch.env.logic": 0.8853503184713376, + "success_rate.epoch.env.math": 0.9512195121951219, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7614879649890591, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8725047360693384, + "success_rate.epoch.global": 0.8673469387755102, + "success_rate.window.env.agentgym:sciworld": 0.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9989908854166667, + "tokens_p.mean_below_band": 4.602043190971017e-10, + "tokens_p.mean_in_band": 0.79931640625, + "tokens_rate.above_band": 0.9907120743034056, + "tokens_rate.below_band": 0.0010319917440660474, + "tokens_rate.in_band": 0.008255933952528379 + }, + { + "epoch": 1.1269706007669364, + "grad_norm": 71.57284706372906, + "learning_rate": 3.8673306476573575e-07, + "loss": 0.315, + "step": 5290, + "success_rate.epoch.env.abd": 0.9833333333333333, + "success_rate.epoch.env.agentgym:alfworld": 0.9487179487179487, + "success_rate.epoch.env.agentgym:sciworld": 0.972972972972973, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9523809523809523, + "success_rate.epoch.env.logic": 0.8881987577639752, + "success_rate.epoch.env.math": 0.9518072289156626, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7620087336244541, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8729342791424038, + "success_rate.epoch.global": 0.8684654300168634, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9986293859649122, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 1.128035790370686, + "grad_norm": 82.1475035259727, + "learning_rate": 3.867076536133284e-07, + "loss": 0.2763, + "step": 5295, + "success_rate.epoch.env.abd": 0.9833333333333333, + "success_rate.epoch.env.agentgym:alfworld": 0.9487179487179487, + "success_rate.epoch.env.agentgym:sciworld": 0.972972972972973, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9523809523809523, + "success_rate.epoch.env.logic": 0.8881987577639752, + "success_rate.epoch.env.math": 0.9520958083832335, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7660944206008584, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8733319397282198, + "success_rate.epoch.global": 0.8695652173913043, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9943693693693694, + "tokens_p.mean_in_band": 0.83203125, + "tokens_rate.above_band": 0.9736842105263158, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02631578947368421 + }, + { + "epoch": 1.1291009799744354, + "grad_norm": 89.6272378455988, + "learning_rate": 3.86682222996575e-07, + "loss": 0.3422, + "step": 5300, + "success_rate.epoch.env.abd": 0.9836065573770492, + "success_rate.epoch.env.agentgym:alfworld": 0.9487179487179487, + "success_rate.epoch.env.agentgym:sciworld": 0.972972972972973, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9523809523809523, + "success_rate.epoch.env.logic": 0.8902439024390244, + "success_rate.epoch.env.math": 0.9525222551928784, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7633262260127932, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8733298143591601, + "success_rate.epoch.global": 0.8689883913764511, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.3333333333333333, + "success_rate.window.env_macro_mean": 0.8333333333333334, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9946732954545454, + "tokens_p.mean_in_band": 0.4893465909090909, + "tokens_rate.above_band": 0.8, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.2 + }, + { + "epoch": 1.1301661695781848, + "grad_norm": 95.10152157247057, + "learning_rate": 3.8665677293400924e-07, + "loss": 0.2529, + "step": 5305, + "success_rate.epoch.env.abd": 0.9841269841269841, + "success_rate.epoch.env.agentgym:alfworld": 0.9487179487179487, + "success_rate.epoch.env.agentgym:sciworld": 0.972972972972973, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.953125, + "success_rate.epoch.env.logic": 0.8909090909090909, + "success_rate.epoch.env.math": 0.9529411764705882, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7648305084745762, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8736800749572095, + "success_rate.epoch.global": 0.8700657894736842, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9969512195121951, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 1.1312313591819343, + "grad_norm": 27.945044526773536, + "learning_rate": 3.866313034441789e-07, + "loss": 0.3504, + "step": 5310, + "success_rate.epoch.env.abd": 0.984375, + "success_rate.epoch.env.agentgym:alfworld": 0.9487179487179487, + "success_rate.epoch.env.agentgym:sciworld": 0.972972972972973, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.953125, + "success_rate.epoch.env.logic": 0.891566265060241, + "success_rate.epoch.env.math": 0.9534883720930233, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7668067226890757, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.873991765853673, + "success_rate.epoch.global": 0.8711256117455138, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.994410569105691, + "tokens_p.mean_in_band": 0.8505859375, + "tokens_rate.above_band": 0.968503937007874, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.031496062992125984 + }, + { + "epoch": 1.1322965487856838, + "grad_norm": 118.49802409828332, + "learning_rate": 3.86605814545646e-07, + "loss": 0.2682, + "step": 5315, + "success_rate.epoch.env.abd": 0.984375, + "success_rate.epoch.env.agentgym:alfworld": 0.9487179487179487, + "success_rate.epoch.env.agentgym:sciworld": 0.972972972972973, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.953125, + "success_rate.epoch.env.logic": 0.8928571428571429, + "success_rate.epoch.env.math": 0.9541547277936963, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7677824267782427, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8742583965433769, + "success_rate.epoch.global": 0.872168284789644, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9988799283154122, + "tokens_p.mean_in_band": 0.74609375, + "tokens_rate.above_band": 0.9928825622775801, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0071174377224199285 + }, + { + "epoch": 1.1333617383894334, + "grad_norm": 77.98706334718261, + "learning_rate": 3.8658030625698663e-07, + "loss": 0.385, + "step": 5320, + "success_rate.epoch.env.abd": 0.984375, + "success_rate.epoch.env.agentgym:alfworld": 0.95, + "success_rate.epoch.env.agentgym:sciworld": 0.972972972972973, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.953125, + "success_rate.epoch.env.logic": 0.893491124260355, + "success_rate.epoch.env.math": 0.9541547277936963, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7654320987654321, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8742189151499634, + "success_rate.epoch.global": 0.8707865168539326, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.science": 0.625, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9960466867469879, + "tokens_p.mean_in_band": 0.59765625, + "tokens_rate.above_band": 0.9485714285714286, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.05142857142857143 + }, + { + "epoch": 1.1344269279931827, + "grad_norm": 153.89347713916143, + "learning_rate": 3.8655477859679114e-07, + "loss": 0.1973, + "step": 5325, + "success_rate.epoch.env.abd": 0.984375, + "success_rate.epoch.env.agentgym:alfworld": 0.9523809523809523, + "success_rate.epoch.env.agentgym:sciworld": 0.972972972972973, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9538461538461539, + "success_rate.epoch.env.logic": 0.8947368421052632, + "success_rate.epoch.env.math": 0.9544159544159544, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.764344262295082, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8729156489105798, + "success_rate.epoch.global": 0.8702229299363057, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.75, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9974908759124088, + "tokens_p.mean_in_band": 0.5177734375, + "tokens_rate.above_band": 0.9762470308788599, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.023752969121140142 + }, + { + "epoch": 1.1354921175969324, + "grad_norm": 233.52559795034185, + "learning_rate": 3.865292315836638e-07, + "loss": 0.4401, + "step": 5330, + "success_rate.epoch.env.abd": 0.984375, + "success_rate.epoch.env.agentgym:alfworld": 0.9523809523809523, + "success_rate.epoch.env.agentgym:sciworld": 0.972972972972973, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9538461538461539, + "success_rate.epoch.env.logic": 0.8953488372093024, + "success_rate.epoch.env.math": 0.9550561797752809, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.7621951219512195, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8728341107396257, + "success_rate.epoch.global": 0.8696682464454977, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.8333333333333334, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9917091836734694, + "tokens_p.mean_in_band": 0.5876736111111112, + "tokens_rate.above_band": 0.9158878504672897, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.08411214953271028 + }, + { + "epoch": 1.1365573072006816, + "grad_norm": 72.88675988213836, + "learning_rate": 3.8650366523622307e-07, + "loss": 0.2221, + "step": 5335, + "success_rate.epoch.env.abd": 0.984375, + "success_rate.epoch.env.agentgym:alfworld": 0.9523809523809523, + "success_rate.epoch.env.agentgym:sciworld": 0.972972972972973, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9545454545454546, + "success_rate.epoch.env.logic": 0.8977272727272727, + "success_rate.epoch.env.math": 0.9550561797752809, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.7620967741935484, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.873104964235953, + "success_rate.epoch.global": 0.8699059561128527, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9375, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9990768094534712, + "tokens_p.mean_in_band": 0.63671875, + "tokens_rate.above_band": 0.9941262848751835, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.005873715124816446 + }, + { + "epoch": 1.1376224968044313, + "grad_norm": 72.2643236243864, + "learning_rate": 3.8647807957310167e-07, + "loss": 0.3064, + "step": 5340, + "success_rate.epoch.env.abd": 0.9846153846153847, + "success_rate.epoch.env.agentgym:alfworld": 0.9523809523809523, + "success_rate.epoch.env.agentgym:sciworld": 0.972972972972973, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9545454545454546, + "success_rate.epoch.env.logic": 0.8983050847457628, + "success_rate.epoch.env.math": 0.9555555555555556, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.762, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8732159458923712, + "success_rate.epoch.global": 0.8701399688958009, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9375, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9965277777777778, + "tokens_p.mean_in_band": 0.5963541666666666, + "tokens_rate.above_band": 0.972972972972973, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02702702702702703 + }, + { + "epoch": 1.1386876864081807, + "grad_norm": 115.95460668351762, + "learning_rate": 3.8645247461294607e-07, + "loss": 0.3364, + "step": 5345, + "success_rate.epoch.env.abd": 0.9846153846153847, + "success_rate.epoch.env.agentgym:alfworld": 0.9523809523809523, + "success_rate.epoch.env.agentgym:sciworld": 0.972972972972973, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9552238805970149, + "success_rate.epoch.env.logic": 0.898876404494382, + "success_rate.epoch.env.math": 0.9558011049723757, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.7628458498023716, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8734287772577687, + "success_rate.epoch.global": 0.8703703703703703, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env_macro_mean": 0.9583333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.991042345276873, + "tokens_p.mean_in_band": 0.7278645833333334, + "tokens_rate.above_band": 0.9715189873417721, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.028481012658227847 + }, + { + "epoch": 1.1397528760119302, + "grad_norm": 15.346054358634856, + "learning_rate": 3.8642685037441705e-07, + "loss": 0.414, + "step": 5350, + "success_rate.epoch.env.abd": 0.9846153846153847, + "success_rate.epoch.env.agentgym:alfworld": 0.9523809523809523, + "success_rate.epoch.env.agentgym:sciworld": 0.972972972972973, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9558823529411765, + "success_rate.epoch.env.logic": 0.9, + "success_rate.epoch.env.math": 0.9564032697547684, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.7637795275590551, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8737304054749373, + "success_rate.epoch.global": 0.8713629402756509, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.998342803030303, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 1.1408180656156797, + "grad_norm": 101.31493930051695, + "learning_rate": 3.864012068761895e-07, + "loss": 0.2962, + "step": 5355, + "success_rate.epoch.env.abd": 0.9846153846153847, + "success_rate.epoch.env.agentgym:alfworld": 0.9523809523809523, + "success_rate.epoch.env.agentgym:sciworld": 0.9736842105263158, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9565217391304348, + "success_rate.epoch.env.logic": 0.9010989010989011, + "success_rate.epoch.env.math": 0.9565217391304348, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.763671875, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8739540728984022, + "success_rate.epoch.global": 0.871580547112462, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9583333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9990873247663551, + "tokens_p.mean_in_band": 0.578125, + "tokens_rate.above_band": 0.9839080459770115, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.016091954022988506 + }, + { + "epoch": 1.1418832552194291, + "grad_norm": 104.0451129278403, + "learning_rate": 3.86375544136952e-07, + "loss": 0.3797, + "step": 5360, + "success_rate.epoch.env.abd": 0.9846153846153847, + "success_rate.epoch.env.agentgym:alfworld": 0.9523809523809523, + "success_rate.epoch.env.agentgym:sciworld": 0.9736842105263158, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9571428571428572, + "success_rate.epoch.env.logic": 0.9010989010989011, + "success_rate.epoch.env.math": 0.956989247311828, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.7620889748549323, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8739091389028338, + "success_rate.epoch.global": 0.8710407239819005, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6, + "success_rate.window.env_macro_mean": 0.8666666666666667, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9967369477911646, + "tokens_p.mean_below_band": 3.655441105365753e-08, + "tokens_p.mean_in_band": 0.40625, + "tokens_rate.above_band": 0.9467680608365019, + "tokens_rate.below_band": 0.0038022813688212928, + "tokens_rate.in_band": 0.049429657794676805 + }, + { + "epoch": 1.1429484448231786, + "grad_norm": 80.17257523697818, + "learning_rate": 3.863498621754075e-07, + "loss": 0.2845, + "step": 5365, + "success_rate.epoch.env.abd": 0.9848484848484849, + "success_rate.epoch.env.agentgym:alfworld": 0.9523809523809523, + "success_rate.epoch.env.agentgym:sciworld": 0.9736842105263158, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9577464788732394, + "success_rate.epoch.env.logic": 0.9016393442622951, + "success_rate.epoch.env.math": 0.9574468085106383, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.7634615384615384, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8742007107148605, + "success_rate.epoch.global": 0.8720059880239521, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9979395604395604, + "tokens_p.mean_in_band": 0.720703125, + "tokens_rate.above_band": 0.978494623655914, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.021505376344086023 + }, + { + "epoch": 1.144013634426928, + "grad_norm": 50.97903601685837, + "learning_rate": 3.8632416101027286e-07, + "loss": 0.2743, + "step": 5370, + "success_rate.epoch.env.abd": 0.9848484848484849, + "success_rate.epoch.env.agentgym:alfworld": 0.9523809523809523, + "success_rate.epoch.env.agentgym:sciworld": 0.9743589743589743, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9577464788732394, + "success_rate.epoch.env.logic": 0.9021739130434783, + "success_rate.epoch.env.math": 0.9578947368421052, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.7609942638623327, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8741270731099607, + "success_rate.epoch.global": 0.8714710252600297, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.3333333333333333, + "success_rate.window.env_macro_mean": 0.8666666666666666, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9981155778894473, + "tokens_p.mean_in_band": 0.325, + "tokens_rate.above_band": 0.9875930521091811, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.01240694789081886 + }, + { + "epoch": 1.1450788240306775, + "grad_norm": 150.86810287871177, + "learning_rate": 3.8629844066027877e-07, + "loss": 0.2703, + "step": 5375, + "success_rate.epoch.env.abd": 0.9852941176470589, + "success_rate.epoch.env.agentgym:alfworld": 0.9523809523809523, + "success_rate.epoch.env.agentgym:sciworld": 0.9743589743589743, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9577464788732394, + "success_rate.epoch.env.logic": 0.8978494623655914, + "success_rate.epoch.env.math": 0.95822454308094, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.7623574144486692, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.873928358468675, + "success_rate.epoch.global": 0.8716814159292036, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9963884430176565, + "tokens_p.mean_in_band": 0.46875, + "tokens_rate.above_band": 0.978021978021978, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02197802197802198 + }, + { + "epoch": 1.146144013634427, + "grad_norm": 155.58822346589005, + "learning_rate": 3.862727011441701e-07, + "loss": 0.2506, + "step": 5380, + "success_rate.epoch.env.abd": 0.9852941176470589, + "success_rate.epoch.env.agentgym:alfworld": 0.9523809523809523, + "success_rate.epoch.env.agentgym:sciworld": 0.975, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9577464788732394, + "success_rate.epoch.env.logic": 0.8983957219251337, + "success_rate.epoch.env.math": 0.9588688946015425, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.7632575757575758, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8741767037441367, + "success_rate.epoch.global": 0.8726207906295754, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9957107843137255, + "tokens_p.mean_in_band": 0.8151041666666666, + "tokens_rate.above_band": 0.9855072463768116, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.014492753623188406 + }, + { + "epoch": 1.1472092032381764, + "grad_norm": 170.61846308388598, + "learning_rate": 3.8624694248070574e-07, + "loss": 0.3202, + "step": 5385, + "success_rate.epoch.env.abd": 0.9852941176470589, + "success_rate.epoch.env.agentgym:alfworld": 0.9523809523809523, + "success_rate.epoch.env.agentgym:sciworld": 0.975, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9577464788732394, + "success_rate.epoch.env.logic": 0.898936170212766, + "success_rate.epoch.env.math": 0.9592875318066157, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.7654784240150094, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8744657886305128, + "success_rate.epoch.global": 0.873546511627907, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9955658783783784, + "tokens_p.mean_in_band": 0.67578125, + "tokens_rate.above_band": 0.961038961038961, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03896103896103896 + }, + { + "epoch": 1.148274392841926, + "grad_norm": 475.247067033273, + "learning_rate": 3.8622116468865823e-07, + "loss": 0.3574, + "step": 5390, + "success_rate.epoch.env.abd": 0.9855072463768116, + "success_rate.epoch.env.agentgym:alfworld": 0.9523809523809523, + "success_rate.epoch.env.agentgym:sciworld": 0.975, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9577464788732394, + "success_rate.epoch.env.logic": 0.8994708994708994, + "success_rate.epoch.env.math": 0.9593908629441624, + "success_rate.epoch.env.sat": 0.2222222222222222, + "success_rate.epoch.env.science": 0.7680890538033395, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8836188832792388, + "success_rate.epoch.global": 0.8744588744588745, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9974724264705882, + "tokens_p.mean_in_band": 0.839453125, + "tokens_rate.above_band": 0.9819494584837545, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.018050541516245487 + }, + { + "epoch": 1.1493395824456754, + "grad_norm": 663.6783173874302, + "learning_rate": 3.8619536778681434e-07, + "loss": 0.4983, + "step": 5395, + "success_rate.epoch.env.abd": 0.9855072463768116, + "success_rate.epoch.env.agentgym:alfworld": 0.9523809523809523, + "success_rate.epoch.env.agentgym:sciworld": 0.975, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9577464788732394, + "success_rate.epoch.env.logic": 0.9010416666666666, + "success_rate.epoch.env.math": 0.9596977329974811, + "success_rate.epoch.env.sat": 0.2222222222222222, + "success_rate.epoch.env.science": 0.7679558011049724, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8837774636929404, + "success_rate.epoch.global": 0.8746418338108882, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.993122009569378, + "tokens_p.mean_in_band": 0.7061941964285714, + "tokens_rate.above_band": 0.9372197309417041, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.06278026905829596 + }, + { + "epoch": 1.1504047720494248, + "grad_norm": 231.91896777017487, + "learning_rate": 3.861695517939747e-07, + "loss": 0.3775, + "step": 5400, + "success_rate.epoch.env.abd": 0.9855072463768116, + "success_rate.epoch.env.agentgym:alfworld": 0.9523809523809523, + "success_rate.epoch.env.agentgym:sciworld": 0.975609756097561, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9577464788732394, + "success_rate.epoch.env.logic": 0.9015544041450777, + "success_rate.epoch.env.math": 0.9601990049751243, + "success_rate.epoch.env.sat": 0.2222222222222222, + "success_rate.epoch.env.science": 0.7692307692307693, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8840409849365234, + "success_rate.epoch.global": 0.8755334281650071, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.999274661508704, + "tokens_p.mean_in_band": 0.7213541666666666, + "tokens_rate.above_band": 0.988527724665392, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.011472275334608031 + }, + { + "epoch": 1.1514699616531743, + "grad_norm": 51.736370475595805, + "learning_rate": 3.861437167289537e-07, + "loss": 0.2492, + "step": 5405, + "success_rate.epoch.env.abd": 0.9855072463768116, + "success_rate.epoch.env.agentgym:alfworld": 0.9523809523809523, + "success_rate.epoch.env.agentgym:sciworld": 0.975609756097561, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9577464788732394, + "success_rate.epoch.env.logic": 0.9025641025641026, + "success_rate.epoch.env.math": 0.9603960396039604, + "success_rate.epoch.env.sat": 0.2, + "success_rate.epoch.env.science": 0.7695099818511797, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8821558688861643, + "success_rate.epoch.global": 0.875, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.7, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9926215277777778, + "tokens_p.mean_in_band": 0.5592041015625, + "tokens_rate.above_band": 0.8181818181818182, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.18181818181818182 + }, + { + "epoch": 1.1525351512569237, + "grad_norm": 88.13301482797574, + "learning_rate": 3.8611786261057983e-07, + "loss": 0.3243, + "step": 5410, + "success_rate.epoch.env.abd": 0.9855072463768116, + "success_rate.epoch.env.agentgym:alfworld": 0.9523809523809523, + "success_rate.epoch.env.agentgym:sciworld": 0.975609756097561, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9577464788732394, + "success_rate.epoch.env.logic": 0.9035532994923858, + "success_rate.epoch.env.math": 0.9605911330049262, + "success_rate.epoch.env.sat": 0.2, + "success_rate.epoch.env.science": 0.7719928186714542, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8824892440815755, + "success_rate.epoch.global": 0.8758765778401122, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9914010067114094, + "tokens_p.mean_in_band": 0.771484375, + "tokens_rate.above_band": 0.9738562091503268, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.026143790849673203 + }, + { + "epoch": 1.1536003408606732, + "grad_norm": 116.91828344381891, + "learning_rate": 3.860919894576954e-07, + "loss": 0.3771, + "step": 5415, + "success_rate.epoch.env.abd": 0.9855072463768116, + "success_rate.epoch.env.agentgym:alfworld": 0.9523809523809523, + "success_rate.epoch.env.agentgym:sciworld": 0.975609756097561, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9577464788732394, + "success_rate.epoch.env.logic": 0.905, + "success_rate.epoch.env.math": 0.960880195599022, + "success_rate.epoch.env.sat": 0.2, + "success_rate.epoch.env.science": 0.7736185383244206, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8827948334229098, + "success_rate.epoch.global": 0.8767409470752089, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9949127906976745, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 1.1546655304644227, + "grad_norm": 38.94428124621091, + "learning_rate": 3.8606609728915655e-07, + "loss": 0.3552, + "step": 5420, + "success_rate.epoch.env.abd": 0.9857142857142858, + "success_rate.epoch.env.agentgym:alfworld": 0.9523809523809523, + "success_rate.epoch.env.agentgym:sciworld": 0.975609756097561, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9583333333333334, + "success_rate.epoch.env.logic": 0.9054726368159204, + "success_rate.epoch.env.math": 0.9609756097560975, + "success_rate.epoch.env.sat": 0.2, + "success_rate.epoch.env.science": 0.7742504409171076, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8829760922741143, + "success_rate.epoch.global": 0.8769017980636238, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env_macro_mean": 0.9666666666666668, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9969858156028368, + "tokens_p.mean_in_band": 0.5529513888888888, + "tokens_rate.above_band": 0.9873949579831933, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.012605042016806723 + }, + { + "epoch": 1.1557307200681721, + "grad_norm": 25.593705439809174, + "learning_rate": 3.860401861238333e-07, + "loss": 0.2794, + "step": 5425, + "success_rate.epoch.env.abd": 0.9859154929577465, + "success_rate.epoch.env.agentgym:alfworld": 0.9534883720930233, + "success_rate.epoch.env.agentgym:sciworld": 0.9761904761904762, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9583333333333334, + "success_rate.epoch.env.logic": 0.9064039408866995, + "success_rate.epoch.env.math": 0.9610705596107056, + "success_rate.epoch.env.sat": 0.18181818181818182, + "success_rate.epoch.env.science": 0.775438596491228, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8816962684892178, + "success_rate.epoch.global": 0.8770604395604396, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8571428571428571, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9990384615384615, + "tokens_p.mean_in_band": 0.6838727678571429, + "tokens_rate.above_band": 0.9653465346534653, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.034653465346534656 + }, + { + "epoch": 1.1567959096719216, + "grad_norm": 76.90661965573406, + "learning_rate": 3.860142559806096e-07, + "loss": 0.3634, + "step": 5430, + "success_rate.epoch.env.abd": 0.9859154929577465, + "success_rate.epoch.env.agentgym:alfworld": 0.9565217391304348, + "success_rate.epoch.env.agentgym:sciworld": 0.9761904761904762, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.958904109589041, + "success_rate.epoch.env.logic": 0.9024390243902439, + "success_rate.epoch.env.math": 0.9611650485436893, + "success_rate.epoch.env.sat": 0.18181818181818182, + "success_rate.epoch.env.science": 0.7766143106457243, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8817789439332308, + "success_rate.epoch.global": 0.8772169167803547, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9981264551804424, + "tokens_p.mean_in_band": 0.6155894886363636, + "tokens_rate.above_band": 0.993637941006362, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.006362058993637941 + }, + { + "epoch": 1.157861099275671, + "grad_norm": 78.61093805233676, + "learning_rate": 3.8598830687838304e-07, + "loss": 0.3668, + "step": 5435, + "success_rate.epoch.env.abd": 0.9859154929577465, + "success_rate.epoch.env.agentgym:alfworld": 0.9565217391304348, + "success_rate.epoch.env.agentgym:sciworld": 0.9761904761904762, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.958904109589041, + "success_rate.epoch.env.logic": 0.9033816425120773, + "success_rate.epoch.env.math": 0.9612590799031477, + "success_rate.epoch.env.sat": 0.18181818181818182, + "success_rate.epoch.env.science": 0.7772020725388601, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8819266176945422, + "success_rate.epoch.global": 0.8773712737127372, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env_macro_mean": 0.9583333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9968932748538012, + "tokens_p.mean_in_band": 0.6066576086956522, + "tokens_rate.above_band": 0.9674681753889675, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03253182461103253 + }, + { + "epoch": 1.1589262888794205, + "grad_norm": 187.5140818528434, + "learning_rate": 3.859623388360652e-07, + "loss": 0.4417, + "step": 5440, + "success_rate.epoch.env.abd": 0.9859154929577465, + "success_rate.epoch.env.agentgym:alfworld": 0.9565217391304348, + "success_rate.epoch.env.agentgym:sciworld": 0.9761904761904762, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.96, + "success_rate.epoch.env.logic": 0.9038461538461539, + "success_rate.epoch.env.math": 0.9615384615384616, + "success_rate.epoch.env.sat": 0.18181818181818182, + "success_rate.epoch.env.science": 0.7753001715265866, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8819209706370945, + "success_rate.epoch.global": 0.8768506056527591, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 1.0004460745440127, + "tokens_p.mean_in_band": 0.5065104166666666, + "tokens_rate.above_band": 0.99057344854674, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.009426551453260016 + }, + { + "epoch": 1.15999147848317, + "grad_norm": 318.484063452464, + "learning_rate": 3.8593635187258134e-07, + "loss": 0.3525, + "step": 5445, + "success_rate.epoch.env.abd": 0.9859154929577465, + "success_rate.epoch.env.agentgym:alfworld": 0.9361702127659575, + "success_rate.epoch.env.agentgym:sciworld": 0.9761904761904762, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9605263157894737, + "success_rate.epoch.env.logic": 0.9056603773584906, + "success_rate.epoch.env.math": 0.9616306954436451, + "success_rate.epoch.env.sat": 0.18181818181818182, + "success_rate.epoch.env.science": 0.7747440273037542, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8802414345116113, + "success_rate.epoch.global": 0.8763368983957219, + "success_rate.window.env.agentgym:alfworld": 0.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.7333333333333333, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9994612068965517, + "tokens_p.mean_in_band": 0.43136160714285715, + "tokens_rate.above_band": 0.9914529914529915, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.008547008547008548 + }, + { + "epoch": 1.1610566680869194, + "grad_norm": 76.04912135143246, + "learning_rate": 3.8591034600687063e-07, + "loss": 0.1966, + "step": 5450, + "success_rate.epoch.env.abd": 0.9859154929577465, + "success_rate.epoch.env.agentgym:alfworld": 0.9361702127659575, + "success_rate.epoch.env.agentgym:sciworld": 0.9761904761904762, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9615384615384616, + "success_rate.epoch.env.logic": 0.9069767441860465, + "success_rate.epoch.env.math": 0.9619047619047619, + "success_rate.epoch.env.sat": 0.18181818181818182, + "success_rate.epoch.env.science": 0.7755102040816326, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8805476850402968, + "success_rate.epoch.global": 0.8771580345285525, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9995005707762558, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 1.162121857690669, + "grad_norm": 17.680857182509204, + "learning_rate": 3.8588432125788597e-07, + "loss": 0.3327, + "step": 5455, + "success_rate.epoch.env.abd": 0.9859154929577465, + "success_rate.epoch.env.agentgym:alfworld": 0.9361702127659575, + "success_rate.epoch.env.agentgym:sciworld": 0.9761904761904762, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9615384615384616, + "success_rate.epoch.env.logic": 0.9078341013824884, + "success_rate.epoch.env.math": 0.9622641509433962, + "success_rate.epoch.env.sat": 0.18181818181818182, + "success_rate.epoch.env.science": 0.7753378378378378, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8806426286758678, + "success_rate.epoch.global": 0.8773087071240105, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9945987654320988, + "tokens_p.mean_in_band": 0.6701388888888888, + "tokens_rate.above_band": 0.9, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.1 + }, + { + "epoch": 1.1631870472944184, + "grad_norm": 32.80299912299739, + "learning_rate": 3.85858277644594e-07, + "loss": 0.2905, + "step": 5460, + "success_rate.epoch.env.abd": 0.9859154929577465, + "success_rate.epoch.env.agentgym:alfworld": 0.9361702127659575, + "success_rate.epoch.env.agentgym:sciworld": 0.9767441860465116, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9615384615384616, + "success_rate.epoch.env.logic": 0.9078341013824884, + "success_rate.epoch.env.math": 0.9604651162790697, + "success_rate.epoch.env.sat": 0.18181818181818182, + "success_rate.epoch.env.science": 0.7764705882352941, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8806323946385192, + "success_rate.epoch.global": 0.8774574049803407, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 0.8333333333333334, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9444444444444445, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9958791208791209, + "tokens_p.mean_in_band": 0.77734375, + "tokens_rate.above_band": 0.978494623655914, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.021505376344086023 + }, + { + "epoch": 1.1642522368981678, + "grad_norm": 146.2031288593941, + "learning_rate": 3.858322151859751e-07, + "loss": 0.2195, + "step": 5465, + "success_rate.epoch.env.abd": 0.9859154929577465, + "success_rate.epoch.env.agentgym:alfworld": 0.9361702127659575, + "success_rate.epoch.env.agentgym:sciworld": 0.9767441860465116, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9620253164556962, + "success_rate.epoch.env.logic": 0.9036697247706422, + "success_rate.epoch.env.math": 0.960919540229885, + "success_rate.epoch.env.sat": 0.18181818181818182, + "success_rate.epoch.env.science": 0.7775919732441472, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8804413298444334, + "success_rate.epoch.global": 0.8776041666666666, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.75, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9972826086956522, + "tokens_p.mean_in_band": 0.7534722222222222, + "tokens_rate.above_band": 0.9745762711864406, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.025423728813559324 + }, + { + "epoch": 1.1653174265019173, + "grad_norm": 327.0093868392864, + "learning_rate": 3.8580613390102334e-07, + "loss": 0.4452, + "step": 5470, + "success_rate.epoch.env.abd": 0.9859154929577465, + "success_rate.epoch.env.agentgym:alfworld": 0.9361702127659575, + "success_rate.epoch.env.agentgym:sciworld": 0.9772727272727273, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9620253164556962, + "success_rate.epoch.env.logic": 0.9049773755656109, + "success_rate.epoch.env.math": 0.9612756264236902, + "success_rate.epoch.env.sat": 0.18181818181818182, + "success_rate.epoch.env.science": 0.7783333333333333, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8807080242357223, + "success_rate.epoch.global": 0.8783958602846055, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9976937269372693, + "tokens_p.mean_in_band": 0.72265625, + "tokens_rate.above_band": 0.9963235294117647, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.003676470588235294 + }, + { + "epoch": 1.1663826161056667, + "grad_norm": 110.09016909146698, + "learning_rate": 3.857800338087467e-07, + "loss": 0.3896, + "step": 5475, + "success_rate.epoch.env.abd": 0.9861111111111112, + "success_rate.epoch.env.agentgym:alfworld": 0.9361702127659575, + "success_rate.epoch.env.agentgym:sciworld": 0.9772727272727273, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9620253164556962, + "success_rate.epoch.env.logic": 0.9054054054054054, + "success_rate.epoch.env.math": 0.9614512471655329, + "success_rate.epoch.env.sat": 0.18181818181818182, + "success_rate.epoch.env.science": 0.7788778877887789, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8808301899803083, + "success_rate.epoch.global": 0.87853470437018, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env_macro_mean": 0.9583333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.99658203125, + "tokens_p.mean_in_band": 0.6439732142857143, + "tokens_rate.above_band": 0.9481481481481482, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.05185185185185185 + }, + { + "epoch": 1.1674478057094162, + "grad_norm": 60.61922062887736, + "learning_rate": 3.8575391492816667e-07, + "loss": 0.4347, + "step": 5480, + "success_rate.epoch.env.abd": 0.9861111111111112, + "success_rate.epoch.env.agentgym:alfworld": 0.9361702127659575, + "success_rate.epoch.env.agentgym:sciworld": 0.9772727272727273, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9620253164556962, + "success_rate.epoch.env.logic": 0.905829596412556, + "success_rate.epoch.env.math": 0.9617977528089887, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.780327868852459, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8796546593041966, + "success_rate.epoch.global": 0.8786717752234994, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.75, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9984809027777778, + "tokens_p.mean_in_band": 0.7020833333333333, + "tokens_rate.above_band": 0.9056603773584906, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.09433962264150944 + }, + { + "epoch": 1.1685129953131657, + "grad_norm": 141.97409396308447, + "learning_rate": 3.8572777727831855e-07, + "loss": 0.2526, + "step": 5485, + "success_rate.epoch.env.abd": 0.9863013698630136, + "success_rate.epoch.env.agentgym:alfworld": 0.9361702127659575, + "success_rate.epoch.env.agentgym:sciworld": 0.9772727272727273, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9625, + "success_rate.epoch.env.logic": 0.9070796460176991, + "success_rate.epoch.env.math": 0.9617977528089887, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.7804878048780488, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8798432891157364, + "success_rate.epoch.global": 0.8788071065989848, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9988650121065376, + "tokens_p.mean_in_band": 0.601318359375, + "tokens_rate.above_band": 0.9904076738609112, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.009592326139088728 + }, + { + "epoch": 1.1695781849169151, + "grad_norm": 82.10796465010836, + "learning_rate": 3.8570162087825116e-07, + "loss": 0.2449, + "step": 5490, + "success_rate.epoch.env.abd": 0.9864864864864865, + "success_rate.epoch.env.agentgym:alfworld": 0.9361702127659575, + "success_rate.epoch.env.agentgym:sciworld": 0.9772727272727273, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9625, + "success_rate.epoch.env.logic": 0.9074889867841409, + "success_rate.epoch.env.math": 0.9621380846325167, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.778675282714055, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.879763495211141, + "success_rate.epoch.global": 0.8783102143757882, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9946646341463414, + "tokens_p.mean_in_band": 0.587890625, + "tokens_rate.above_band": 0.9461538461538461, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.05384615384615385 + }, + { + "epoch": 1.1706433745206646, + "grad_norm": 198.15483683805763, + "learning_rate": 3.856754457470272e-07, + "loss": 0.4647, + "step": 5495, + "success_rate.epoch.env.abd": 0.9866666666666667, + "success_rate.epoch.env.agentgym:alfworld": 0.9361702127659575, + "success_rate.epoch.env.agentgym:sciworld": 0.9772727272727273, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9629629629629629, + "success_rate.epoch.env.logic": 0.9082969432314411, + "success_rate.epoch.env.math": 0.9623059866962306, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.7797427652733119, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8800077210487239, + "success_rate.epoch.global": 0.8790726817042607, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9990712412587412, + "tokens_p.mean_in_band": 0.81640625, + "tokens_rate.above_band": 0.9913344887348353, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.008665511265164644 + }, + { + "epoch": 1.171708564124414, + "grad_norm": 72.65972204623101, + "learning_rate": 3.856492519037229e-07, + "loss": 0.3026, + "step": 5500, + "success_rate.epoch.env.abd": 0.9866666666666667, + "success_rate.epoch.env.agentgym:alfworld": 0.9375, + "success_rate.epoch.env.agentgym:sciworld": 0.9777777777777777, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9629629629629629, + "success_rate.epoch.env.logic": 0.908695652173913, + "success_rate.epoch.env.math": 0.9623059866962306, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.78060413354531, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8802890769535933, + "success_rate.epoch.global": 0.8792029887920298, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.science": 0.8571428571428571, + "success_rate.window.env_macro_mean": 0.9642857142857143, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9986817617866005, + "tokens_p.mean_in_band": 0.65703125, + "tokens_rate.above_band": 0.9527186761229315, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.04728132387706856 + }, + { + "epoch": 1.1727737537281637, + "grad_norm": 106.83478750642901, + "learning_rate": 3.8562303936742817e-07, + "loss": 0.3229, + "step": 5505, + "success_rate.epoch.env.abd": 0.9866666666666667, + "success_rate.epoch.env.agentgym:alfworld": 0.9375, + "success_rate.epoch.env.agentgym:sciworld": 0.9777777777777777, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9629629629629629, + "success_rate.epoch.env.logic": 0.9090909090909091, + "success_rate.epoch.env.math": 0.9626373626373627, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.7823343848580442, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8805124300600354, + "success_rate.epoch.global": 0.879950495049505, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9968251992031872, + "tokens_p.mean_in_band": 0.87109375, + "tokens_rate.above_band": 0.9980119284294234, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0019880715705765406 + }, + { + "epoch": 1.173838943331913, + "grad_norm": 333.1304944656572, + "learning_rate": 3.8559680815724645e-07, + "loss": 0.2103, + "step": 5510, + "success_rate.epoch.env.abd": 0.9866666666666667, + "success_rate.epoch.env.agentgym:alfworld": 0.9375, + "success_rate.epoch.env.agentgym:sciworld": 0.9777777777777777, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9629629629629629, + "success_rate.epoch.env.logic": 0.9102564102564102, + "success_rate.epoch.env.math": 0.962800875273523, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.7824726134585289, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8806458157329579, + "success_rate.epoch.global": 0.8800738007380073, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9949324324324325, + "tokens_p.mean_in_band": 0.6569010416666666, + "tokens_rate.above_band": 0.9487179487179487, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.05128205128205128 + }, + { + "epoch": 1.1749041329356626, + "grad_norm": 45.074148468155265, + "learning_rate": 3.8557055829229486e-07, + "loss": 0.235, + "step": 5515, + "success_rate.epoch.env.abd": 0.9868421052631579, + "success_rate.epoch.env.agentgym:alfworld": 0.9375, + "success_rate.epoch.env.agentgym:sciworld": 0.9777777777777777, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9629629629629629, + "success_rate.epoch.env.logic": 0.9071729957805907, + "success_rate.epoch.env.math": 0.9631236442516269, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.7831513260530422, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8804724980687112, + "success_rate.epoch.global": 0.8801955990220048, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 0.6666666666666666, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9932228915662651, + "tokens_p.mean_in_band": 0.70166015625, + "tokens_rate.above_band": 0.9120879120879121, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.08791208791208792 + }, + { + "epoch": 1.175969322539412, + "grad_norm": 0.0, + "learning_rate": 3.855442897917042e-07, + "loss": 0.3012, + "step": 5520, + "success_rate.epoch.env.abd": 0.9868421052631579, + "success_rate.epoch.env.agentgym:alfworld": 0.9387755102040817, + "success_rate.epoch.env.agentgym:sciworld": 0.9777777777777777, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9634146341463414, + "success_rate.epoch.env.logic": 0.9083333333333333, + "success_rate.epoch.env.math": 0.9632829373650108, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.7841614906832298, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8808413141308725, + "success_rate.epoch.global": 0.8809234507897934, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 1.0018025078369905, + "tokens_p.mean_in_band": 0.53125, + "tokens_rate.above_band": 0.9993734335839599, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0006265664160401002 + }, + { + "epoch": 1.1770345121431616, + "grad_norm": 193.9761483691228, + "learning_rate": 3.8551800267461863e-07, + "loss": 0.2372, + "step": 5525, + "success_rate.epoch.env.abd": 0.987012987012987, + "success_rate.epoch.env.agentgym:alfworld": 0.9387755102040817, + "success_rate.epoch.env.agentgym:sciworld": 0.9782608695652174, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9634146341463414, + "success_rate.epoch.env.logic": 0.9087136929460581, + "success_rate.epoch.env.math": 0.9636752136752137, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.7848297213622291, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8810317541435267, + "success_rate.epoch.global": 0.8816425120772947, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9974287974683544, + "tokens_p.mean_in_band": 0.859375, + "tokens_rate.above_band": 0.9875, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0125 + }, + { + "epoch": 1.178099701746911, + "grad_norm": 184.24616975516568, + "learning_rate": 3.8549169696019613e-07, + "loss": 0.2773, + "step": 5530, + "success_rate.epoch.env.abd": 0.9873417721518988, + "success_rate.epoch.env.agentgym:alfworld": 0.9387755102040817, + "success_rate.epoch.env.agentgym:sciworld": 0.9782608695652174, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.963855421686747, + "success_rate.epoch.env.logic": 0.9094650205761317, + "success_rate.epoch.env.math": 0.9637526652452025, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.7830769230769231, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8810177135611699, + "success_rate.epoch.global": 0.8811524609843937, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.9, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9975792253521126, + "tokens_p.mean_in_band": 0.4, + "tokens_rate.above_band": 0.9912739965095986, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.008726003490401396 + }, + { + "epoch": 1.1791648913506605, + "grad_norm": 158.10076953042807, + "learning_rate": 3.854653726676081e-07, + "loss": 0.4389, + "step": 5535, + "success_rate.epoch.env.abd": 0.9875, + "success_rate.epoch.env.agentgym:alfworld": 0.9387755102040817, + "success_rate.epoch.env.agentgym:sciworld": 0.9782608695652174, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9523809523809523, + "success_rate.epoch.env.logic": 0.9098360655737705, + "success_rate.epoch.env.math": 0.9640591966173362, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.7840735068912711, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8801411607181179, + "success_rate.epoch.global": 0.8812649164677804, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 0.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9885684742647058, + "tokens_p.mean_in_band": 0.656, + "tokens_rate.above_band": 0.8131539611360239, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.18684603886397608 + }, + { + "epoch": 1.18023008095441, + "grad_norm": 800.7934423077986, + "learning_rate": 3.8543902981603944e-07, + "loss": 0.2522, + "step": 5540, + "success_rate.epoch.env.abd": 0.9875, + "success_rate.epoch.env.agentgym:alfworld": 0.9387755102040817, + "success_rate.epoch.env.agentgym:sciworld": 0.9782608695652174, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9523809523809523, + "success_rate.epoch.env.logic": 0.9102040816326531, + "success_rate.epoch.env.math": 0.9641350210970464, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.7833333333333333, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8801142213527228, + "success_rate.epoch.global": 0.8807829181494662, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.7142857142857143, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 0.9285714285714286, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9991680532445923, + "tokens_p.mean_in_band": 0.5614013671875, + "tokens_rate.above_band": 0.986863711001642, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.013136288998357963 + }, + { + "epoch": 1.1812952705581594, + "grad_norm": 207.2933995708041, + "learning_rate": 3.8541266842468866e-07, + "loss": 0.3123, + "step": 5545, + "success_rate.epoch.env.abd": 0.9876543209876543, + "success_rate.epoch.env.agentgym:alfworld": 0.92, + "success_rate.epoch.env.agentgym:sciworld": 0.9791666666666666, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9523809523809523, + "success_rate.epoch.env.logic": 0.9068825910931174, + "success_rate.epoch.env.math": 0.9642857142857143, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.783987915407855, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8782749843171479, + "success_rate.epoch.global": 0.8803066037735849, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 0.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.75, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9980849582172702, + "tokens_p.mean_in_band": 0.4635416666666667, + "tokens_rate.above_band": 0.9917127071823204, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.008287292817679558 + }, + { + "epoch": 1.1823604601619089, + "grad_norm": 65.42479756802668, + "learning_rate": 3.8538628851276777e-07, + "loss": 0.3117, + "step": 5550, + "success_rate.epoch.env.abd": 0.9878048780487805, + "success_rate.epoch.env.agentgym:alfworld": 0.92, + "success_rate.epoch.env.agentgym:sciworld": 0.9791666666666666, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9523809523809523, + "success_rate.epoch.env.logic": 0.907258064516129, + "success_rate.epoch.env.math": 0.964509394572025, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.7852852852852853, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8784610825578641, + "success_rate.epoch.global": 0.8810082063305978, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9976308664259927, + "tokens_p.mean_in_band": 0.859375, + "tokens_rate.above_band": 0.992831541218638, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.007168458781362007 + }, + { + "epoch": 1.1834256497656583, + "grad_norm": 48.38196783799604, + "learning_rate": 3.853598900995022e-07, + "loss": 0.2193, + "step": 5555, + "success_rate.epoch.env.abd": 0.9878048780487805, + "success_rate.epoch.env.agentgym:alfworld": 0.92, + "success_rate.epoch.env.agentgym:sciworld": 0.9795918367346939, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9523809523809523, + "success_rate.epoch.env.logic": 0.908, + "success_rate.epoch.env.math": 0.9628099173553719, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.7859281437125748, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8784711268090036, + "success_rate.epoch.global": 0.8811188811188811, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.8, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9966317365269461, + "tokens_p.mean_below_band": 4.3655745685100555e-09, + "tokens_p.mean_in_band": 0.751953125, + "tokens_rate.above_band": 0.9709302325581395, + "tokens_rate.below_band": 0.005813953488372093, + "tokens_rate.in_band": 0.023255813953488372 + }, + { + "epoch": 1.1844908393694078, + "grad_norm": 84.9673215473575, + "learning_rate": 3.85333473204131e-07, + "loss": 0.3241, + "step": 5560, + "success_rate.epoch.env.abd": 0.9879518072289156, + "success_rate.epoch.env.agentgym:alfworld": 0.92, + "success_rate.epoch.env.agentgym:sciworld": 0.98, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9523809523809523, + "success_rate.epoch.env.logic": 0.9083665338645418, + "success_rate.epoch.env.math": 0.9628099173553719, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.7863501483679525, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8785932750785818, + "success_rate.epoch.global": 0.8812282734646582, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env_macro_mean": 0.9666666666666668, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9981203007518797, + "tokens_p.mean_in_band": 0.541015625, + "tokens_rate.above_band": 0.9950124688279302, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.004987531172069825 + }, + { + "epoch": 1.1855560289731573, + "grad_norm": 181.6094812162179, + "learning_rate": 3.8530703784590655e-07, + "loss": 0.2354, + "step": 5565, + "success_rate.epoch.env.abd": 0.9879518072289156, + "success_rate.epoch.env.agentgym:alfworld": 0.92, + "success_rate.epoch.env.agentgym:sciworld": 0.98, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9534883720930233, + "success_rate.epoch.env.logic": 0.9051383399209486, + "success_rate.epoch.env.math": 0.9631147540983607, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.7866666666666666, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8772914630776424, + "success_rate.epoch.global": 0.8807603686635944, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.7, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9961832061068703, + "tokens_p.mean_in_band": 0.5447048611111112, + "tokens_rate.above_band": 0.9622245540398741, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03777544596012592 + }, + { + "epoch": 1.1866212185769067, + "grad_norm": 124.83374529074972, + "learning_rate": 3.852805840440948e-07, + "loss": 0.3869, + "step": 5570, + "success_rate.epoch.env.abd": 0.9879518072289156, + "success_rate.epoch.env.agentgym:alfworld": 0.9215686274509803, + "success_rate.epoch.env.agentgym:sciworld": 0.98, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9540229885057471, + "success_rate.epoch.env.logic": 0.905511811023622, + "success_rate.epoch.env.math": 0.9633401221995926, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.7879234167893961, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8776513570040371, + "success_rate.epoch.global": 0.8814432989690721, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9970201711491442, + "tokens_p.mean_in_band": 0.77265625, + "tokens_rate.above_band": 0.9761336515513126, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02386634844868735 + }, + { + "epoch": 1.1876864081806562, + "grad_norm": 144.63310154738687, + "learning_rate": 3.8525411181797513e-07, + "loss": 0.305, + "step": 5575, + "success_rate.epoch.env.abd": 0.9879518072289156, + "success_rate.epoch.env.agentgym:alfworld": 0.9215686274509803, + "success_rate.epoch.env.agentgym:sciworld": 0.98, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9545454545454546, + "success_rate.epoch.env.logic": 0.9058823529411765, + "success_rate.epoch.env.math": 0.9634888438133874, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.7865497076023392, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8776211770389462, + "success_rate.epoch.global": 0.8809116809116809, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6, + "success_rate.window.env_macro_mean": 0.9, + "success_rate.window.global": 0.7777777777777778, + "tokens_p.mean_above_band": 0.9964539007092199, + "tokens_p.mean_in_band": 0.59375, + "tokens_rate.above_band": 0.9215686274509803, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0784313725490196 + }, + { + "epoch": 1.1887515977844056, + "grad_norm": 101.585484694657, + "learning_rate": 3.8522762118684013e-07, + "loss": 0.5272, + "step": 5580, + "success_rate.epoch.env.abd": 0.9879518072289156, + "success_rate.epoch.env.agentgym:alfworld": 0.9215686274509803, + "success_rate.epoch.env.agentgym:sciworld": 0.98, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9545454545454546, + "success_rate.epoch.env.logic": 0.9058823529411765, + "success_rate.epoch.env.math": 0.9617706237424547, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.7855072463768116, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8773702060119952, + "success_rate.epoch.global": 0.8798866855524079, + "success_rate.window.env.math": 0.75, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.7083333333333333, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9847898230088495, + "tokens_p.mean_in_band": 0.574187247983871, + "tokens_rate.above_band": 0.7847222222222222, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.2152777777777778 + }, + { + "epoch": 1.189816787388155, + "grad_norm": 164.83537110675357, + "learning_rate": 3.852011121699962e-07, + "loss": 0.2708, + "step": 5585, + "success_rate.epoch.env.abd": 0.9880952380952381, + "success_rate.epoch.env.agentgym:alfworld": 0.9215686274509803, + "success_rate.epoch.env.agentgym:sciworld": 0.98, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9545454545454546, + "success_rate.epoch.env.logic": 0.9058823529411765, + "success_rate.epoch.env.math": 0.962, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.7873563218390804, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8775721953380077, + "success_rate.epoch.global": 0.8805633802816901, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9904279279279279, + "tokens_p.mean_in_band": 0.626953125, + "tokens_rate.above_band": 0.9487179487179487, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.05128205128205128 + }, + { + "epoch": 1.1908819769919046, + "grad_norm": 145.51518788109993, + "learning_rate": 3.8517458478676275e-07, + "loss": 0.325, + "step": 5590, + "success_rate.epoch.env.abd": 0.9880952380952381, + "success_rate.epoch.env.agentgym:alfworld": 0.9215686274509803, + "success_rate.epoch.env.agentgym:sciworld": 0.98, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9545454545454546, + "success_rate.epoch.env.logic": 0.9066147859922179, + "success_rate.epoch.env.math": 0.9623762376237623, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.7882689556509299, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8777559502913396, + "success_rate.epoch.global": 0.8812324929971989, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9902912621359223, + "tokens_p.mean_in_band": 0.5859375, + "tokens_rate.above_band": 0.9903846153846154, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.009615384615384616 + }, + { + "epoch": 1.191947166595654, + "grad_norm": 53.3532888710968, + "learning_rate": 3.8514803905647286e-07, + "loss": 0.2571, + "step": 5595, + "success_rate.epoch.env.abd": 0.9880952380952381, + "success_rate.epoch.env.agentgym:alfworld": 0.9215686274509803, + "success_rate.epoch.env.agentgym:sciworld": 0.98, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9545454545454546, + "success_rate.epoch.env.logic": 0.9076923076923077, + "success_rate.epoch.env.math": 0.9625984251968503, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.7894736842105263, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8779836264579557, + "success_rate.epoch.global": 0.8818941504178273, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9987873134328358, + "tokens_p.mean_in_band": 0.890625, + "tokens_rate.above_band": 0.9970238095238095, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.002976190476190476 + }, + { + "epoch": 1.1930123561994035, + "grad_norm": 43.17086447186869, + "learning_rate": 3.851214749984728e-07, + "loss": 0.3857, + "step": 5600, + "success_rate.epoch.env.abd": 0.9880952380952381, + "success_rate.epoch.env.agentgym:alfworld": 0.9215686274509803, + "success_rate.epoch.env.agentgym:sciworld": 0.98, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9545454545454546, + "success_rate.epoch.env.logic": 0.9076923076923077, + "success_rate.epoch.env.math": 0.962671905697446, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.7893258426966292, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8779768663658373, + "success_rate.epoch.global": 0.8814404432132964, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.7777777777777778, + "success_rate.window.env_macro_mean": 0.8888888888888888, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9880952380952381, + "tokens_p.mean_in_band": 0.3967013888888889, + "tokens_rate.above_band": 0.9210526315789473, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.07894736842105263 + }, + { + "epoch": 1.194077545803153, + "grad_norm": 135.6613944030098, + "learning_rate": 3.850948926321223e-07, + "loss": 0.2807, + "step": 5605, + "success_rate.epoch.env.abd": 0.9880952380952381, + "success_rate.epoch.env.agentgym:alfworld": 0.9215686274509803, + "success_rate.epoch.env.agentgym:sciworld": 0.9803921568627451, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9555555555555556, + "success_rate.epoch.env.logic": 0.9076923076923077, + "success_rate.epoch.env.math": 0.9627450980392157, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.7910863509749304, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8782710444106478, + "success_rate.epoch.global": 0.8820936639118457, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9981429303278688, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 1.1951427354069024, + "grad_norm": 52.47215054391258, + "learning_rate": 3.850682919767944e-07, + "loss": 0.2658, + "step": 5610, + "success_rate.epoch.env.abd": 0.9880952380952381, + "success_rate.epoch.env.agentgym:alfworld": 0.9215686274509803, + "success_rate.epoch.env.agentgym:sciworld": 0.9803921568627451, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9555555555555556, + "success_rate.epoch.env.logic": 0.9080459770114943, + "success_rate.epoch.env.math": 0.9631067961165048, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.7908587257617729, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8783153846091313, + "success_rate.epoch.global": 0.8821917808219178, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9944444444444445, + "tokens_p.mean_in_band": 0.60546875, + "tokens_rate.above_band": 0.967741935483871, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03225806451612903 + }, + { + "epoch": 1.1962079250106519, + "grad_norm": 265.44880193654274, + "learning_rate": 3.850416730518754e-07, + "loss": 0.348, + "step": 5615, + "success_rate.epoch.env.abd": 0.9882352941176471, + "success_rate.epoch.env.agentgym:alfworld": 0.9230769230769231, + "success_rate.epoch.env.agentgym:sciworld": 0.9803921568627451, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9560439560439561, + "success_rate.epoch.env.logic": 0.9045801526717557, + "success_rate.epoch.env.math": 0.9631782945736435, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.7895460797799174, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8780817282702493, + "success_rate.epoch.global": 0.8811989100817439, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6, + "success_rate.window.env_macro_mean": 0.7666666666666666, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.997671568627451, + "tokens_p.mean_in_band": 0.4725341796875, + "tokens_rate.above_band": 0.9695817490494296, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.030418250950570342 + }, + { + "epoch": 1.1972731146144013, + "grad_norm": 231.76419326673198, + "learning_rate": 3.850150358767651e-07, + "loss": 0.2946, + "step": 5620, + "success_rate.epoch.env.abd": 0.9882352941176471, + "success_rate.epoch.env.agentgym:alfworld": 0.9230769230769231, + "success_rate.epoch.env.agentgym:sciworld": 0.9807692307692307, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9560439560439561, + "success_rate.epoch.env.logic": 0.9049429657794676, + "success_rate.epoch.env.math": 0.9634615384615385, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.7906976744186046, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8782794305921384, + "success_rate.epoch.global": 0.8818428184281842, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9992131294964028, + "tokens_p.mean_in_band": 0.77734375, + "tokens_rate.above_band": 0.996415770609319, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0035842293906810036 + }, + { + "epoch": 1.1983383042181508, + "grad_norm": 55.25376279417916, + "learning_rate": 3.849883804708764e-07, + "loss": 0.1619, + "step": 5625, + "success_rate.epoch.env.abd": 0.9883720930232558, + "success_rate.epoch.env.agentgym:alfworld": 0.9230769230769231, + "success_rate.epoch.env.agentgym:sciworld": 0.9807692307692307, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9565217391304348, + "success_rate.epoch.env.logic": 0.9053030303030303, + "success_rate.epoch.env.math": 0.9636711281070746, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.791268758526603, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8774400041630631, + "success_rate.epoch.global": 0.8819407008086253, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8571428571428571, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9970103790613718, + "tokens_p.mean_in_band": 0.6615349264705882, + "tokens_rate.above_band": 0.9702276707530648, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0297723292469352 + }, + { + "epoch": 1.1994034938219003, + "grad_norm": 527.4931450433074, + "learning_rate": 3.849617068536356e-07, + "loss": 0.2507, + "step": 5630, + "success_rate.epoch.env.abd": 0.9883720930232558, + "success_rate.epoch.env.agentgym:alfworld": 0.9230769230769231, + "success_rate.epoch.env.agentgym:sciworld": 0.9811320754716981, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.946236559139785, + "success_rate.epoch.env.logic": 0.9053030303030303, + "success_rate.epoch.env.math": 0.9638783269961977, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7913279132791328, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8765621876497424, + "success_rate.epoch.global": 0.8815013404825738, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.7, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9851588628762542, + "tokens_p.mean_in_band": 0.734631990131579, + "tokens_rate.above_band": 0.887240356083086, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.11275964391691394 + }, + { + "epoch": 1.2004686834256497, + "grad_norm": 3507.9177473413506, + "learning_rate": 3.849350150444822e-07, + "loss": 0.2921, + "step": 5635, + "success_rate.epoch.env.abd": 0.9886363636363636, + "success_rate.epoch.env.agentgym:alfworld": 0.9230769230769231, + "success_rate.epoch.env.agentgym:sciworld": 0.9811320754716981, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.946236559139785, + "success_rate.epoch.env.logic": 0.9060150375939849, + "success_rate.epoch.env.math": 0.9640831758034026, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7908232118758435, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8766236808595586, + "success_rate.epoch.global": 0.8816, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9979338842975206, + "tokens_p.mean_in_band": 0.5338541666666666, + "tokens_rate.above_band": 0.9758064516129032, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.024193548387096774 + }, + { + "epoch": 1.2015338730293992, + "grad_norm": 83.35150067453999, + "learning_rate": 3.8490830506286897e-07, + "loss": 0.4006, + "step": 5640, + "success_rate.epoch.env.abd": 0.9886363636363636, + "success_rate.epoch.env.agentgym:alfworld": 0.9230769230769231, + "success_rate.epoch.env.agentgym:sciworld": 0.9814814814814815, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.946236559139785, + "success_rate.epoch.env.logic": 0.9063670411985019, + "success_rate.epoch.env.math": 0.9642857142857143, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7908847184986595, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8767114494704156, + "success_rate.epoch.global": 0.8816976127320955, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9942396313364056, + "tokens_p.mean_in_band": 0.658984375, + "tokens_rate.above_band": 0.9559471365638766, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.04405286343612335 + }, + { + "epoch": 1.2025990626331486, + "grad_norm": 28.75466648149286, + "learning_rate": 3.8488157692826207e-07, + "loss": 0.279, + "step": 5645, + "success_rate.epoch.env.abd": 0.9886363636363636, + "success_rate.epoch.env.agentgym:alfworld": 0.9230769230769231, + "success_rate.epoch.env.agentgym:sciworld": 0.9818181818181818, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9468085106382979, + "success_rate.epoch.env.logic": 0.9067164179104478, + "success_rate.epoch.env.math": 0.9646182495344506, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7911646586345381, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8768814952823951, + "success_rate.epoch.global": 0.8823218997361477, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9992774566473989, + "tokens_p.mean_in_band": 0.8828125, + "tokens_rate.above_band": 0.9942528735632183, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.005747126436781609 + }, + { + "epoch": 1.203664252236898, + "grad_norm": 285.68657119909125, + "learning_rate": 3.8485483066014075e-07, + "loss": 0.2338, + "step": 5650, + "success_rate.epoch.env.abd": 0.9886363636363636, + "success_rate.epoch.env.agentgym:alfworld": 0.9230769230769231, + "success_rate.epoch.env.agentgym:sciworld": 0.9818181818181818, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9468085106382979, + "success_rate.epoch.env.logic": 0.9067164179104478, + "success_rate.epoch.env.math": 0.9648798521256932, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7928286852589641, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8770565524838194, + "success_rate.epoch.global": 0.8829396325459318, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.994338768115942, + "tokens_p.mean_in_band": 0.734375, + "tokens_rate.above_band": 0.9787234042553191, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02127659574468085 + }, + { + "epoch": 1.2047294418406476, + "grad_norm": 117.06626280681023, + "learning_rate": 3.848280662779974e-07, + "loss": 0.461, + "step": 5655, + "success_rate.epoch.env.abd": 0.9886363636363636, + "success_rate.epoch.env.agentgym:alfworld": 0.9230769230769231, + "success_rate.epoch.env.agentgym:sciworld": 0.9818181818181818, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9473684210526315, + "success_rate.epoch.env.logic": 0.9077490774907749, + "success_rate.epoch.env.math": 0.9650735294117647, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7923280423280423, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8771734256065294, + "success_rate.epoch.global": 0.8830287206266318, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9972222222222222, + "tokens_p.mean_in_band": 0.484375, + "tokens_rate.above_band": 0.989010989010989, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.01098901098901099 + }, + { + "epoch": 1.205794631444397, + "grad_norm": 291.76164658597423, + "learning_rate": 3.8480128380133774e-07, + "loss": 0.3348, + "step": 5660, + "success_rate.epoch.env.abd": 0.9886363636363636, + "success_rate.epoch.env.agentgym:alfworld": 0.9230769230769231, + "success_rate.epoch.env.agentgym:sciworld": 0.9821428571428571, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9479166666666666, + "success_rate.epoch.env.logic": 0.9080882352941176, + "success_rate.epoch.env.math": 0.9652014652014652, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7921052631578948, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8772749924575846, + "success_rate.epoch.global": 0.8831168831168831, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 0.9583333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.998342175066313, + "tokens_p.mean_in_band": 0.6028645833333334, + "tokens_rate.above_band": 0.9973544973544973, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0026455026455026454 + }, + { + "epoch": 1.2068598210481465, + "grad_norm": 310.4944857853484, + "learning_rate": 3.847744832496807e-07, + "loss": 0.3339, + "step": 5665, + "success_rate.epoch.env.abd": 0.9887640449438202, + "success_rate.epoch.env.agentgym:alfworld": 0.9230769230769231, + "success_rate.epoch.env.agentgym:sciworld": 0.9821428571428571, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9479166666666666, + "success_rate.epoch.env.logic": 0.9084249084249084, + "success_rate.epoch.env.math": 0.9653916211293261, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7934640522875817, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8774580196844751, + "success_rate.epoch.global": 0.8837209302325582, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9904761904761905, + "tokens_p.mean_in_band": 0.87890625, + "tokens_rate.above_band": 0.9722222222222222, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.027777777777777776 + }, + { + "epoch": 1.207925010651896, + "grad_norm": 37.95794628739215, + "learning_rate": 3.847476646425583e-07, + "loss": 0.3587, + "step": 5670, + "success_rate.epoch.env.abd": 0.9887640449438202, + "success_rate.epoch.env.agentgym:alfworld": 0.9230769230769231, + "success_rate.epoch.env.agentgym:sciworld": 0.9821428571428571, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9484536082474226, + "success_rate.epoch.env.logic": 0.9090909090909091, + "success_rate.epoch.env.math": 0.9655172413793104, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7935064935064935, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.877582656385898, + "success_rate.epoch.global": 0.8838046272493574, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9949024822695035, + "tokens_p.mean_in_band": 0.6636284722222222, + "tokens_rate.above_band": 0.94, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.06 + }, + { + "epoch": 1.2089902002556454, + "grad_norm": 183.66622223791853, + "learning_rate": 3.8472082799951577e-07, + "loss": 0.2424, + "step": 5675, + "success_rate.epoch.env.abd": 0.9888888888888889, + "success_rate.epoch.env.agentgym:alfworld": 0.9245283018867925, + "success_rate.epoch.env.agentgym:sciworld": 0.9821428571428571, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9489795918367347, + "success_rate.epoch.env.logic": 0.9094202898550725, + "success_rate.epoch.env.math": 0.9657039711191335, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7930142302716688, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8777759339871174, + "success_rate.epoch.global": 0.8838874680306905, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9444444444444443, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9987142218246869, + "tokens_p.mean_below_band": 7.729977369308472e-08, + "tokens_p.mean_in_band": 0.89453125, + "tokens_rate.above_band": 0.9964349376114082, + "tokens_rate.below_band": 0.0017825311942959, + "tokens_rate.in_band": 0.0017825311942959 + }, + { + "epoch": 1.210055389859395, + "grad_norm": 101.73876273024432, + "learning_rate": 3.846939733401114e-07, + "loss": 0.2749, + "step": 5680, + "success_rate.epoch.env.abd": 0.9888888888888889, + "success_rate.epoch.env.agentgym:alfworld": 0.9245283018867925, + "success_rate.epoch.env.agentgym:sciworld": 0.9821428571428571, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9494949494949495, + "success_rate.epoch.env.logic": 0.9097472924187726, + "success_rate.epoch.env.math": 0.9658273381294964, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.79204107830552, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8777752590113107, + "success_rate.epoch.global": 0.8834605597964377, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9924150485436893, + "tokens_p.mean_in_band": 0.5642755681818182, + "tokens_rate.above_band": 0.9493087557603687, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.05069124423963134 + }, + { + "epoch": 1.2111205794631443, + "grad_norm": 78.2027511815762, + "learning_rate": 3.846671006839167e-07, + "loss": 0.3059, + "step": 5685, + "success_rate.epoch.env.abd": 0.9888888888888889, + "success_rate.epoch.env.agentgym:alfworld": 0.9245283018867925, + "success_rate.epoch.env.agentgym:sciworld": 0.9821428571428571, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9411764705882353, + "success_rate.epoch.env.logic": 0.9100719424460432, + "success_rate.epoch.env.math": 0.9659498207885304, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7931034482758621, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8771562611703956, + "success_rate.epoch.global": 0.8835443037974684, + "success_rate.window.env.ded": 0.6666666666666666, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9923911020104245, + "tokens_p.mean_in_band": 0.5719088040865384, + "tokens_rate.above_band": 0.865892972275951, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.134107027724049 + }, + { + "epoch": 1.212185769066894, + "grad_norm": 93.66850832785966, + "learning_rate": 3.846402100505164e-07, + "loss": 0.2398, + "step": 5690, + "success_rate.epoch.env.abd": 0.989010989010989, + "success_rate.epoch.env.agentgym:alfworld": 0.9245283018867925, + "success_rate.epoch.env.agentgym:sciworld": 0.9824561403508771, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9411764705882353, + "success_rate.epoch.env.logic": 0.9113475177304965, + "success_rate.epoch.env.math": 0.966131907308378, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7933673469387755, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8773523469701533, + "success_rate.epoch.global": 0.8841309823677582, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9987461419753086, + "tokens_p.mean_in_band": 0.890625, + "tokens_rate.above_band": 0.998972250770812, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0010277492291880781 + }, + { + "epoch": 1.2132509586706433, + "grad_norm": 158.98297010179246, + "learning_rate": 3.8461330145950797e-07, + "loss": 0.2034, + "step": 5695, + "success_rate.epoch.env.abd": 0.989247311827957, + "success_rate.epoch.env.agentgym:alfworld": 0.9245283018867925, + "success_rate.epoch.env.agentgym:sciworld": 0.9824561403508771, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9411764705882353, + "success_rate.epoch.env.logic": 0.9113475177304965, + "success_rate.epoch.env.math": 0.9662522202486679, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7936708860759494, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.877412362869647, + "success_rate.epoch.global": 0.8842105263157894, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env_macro_mean": 0.9444444444444445, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9951923076923077, + "tokens_p.mean_in_band": 0.29194078947368424, + "tokens_rate.above_band": 0.8602941176470589, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.13970588235294118 + }, + { + "epoch": 1.214316148274393, + "grad_norm": 85.41108854358225, + "learning_rate": 3.845863749305024e-07, + "loss": 0.2455, + "step": 5700, + "success_rate.epoch.env.abd": 0.989247311827957, + "success_rate.epoch.env.agentgym:alfworld": 0.9259259259259259, + "success_rate.epoch.env.agentgym:sciworld": 0.9824561403508771, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.941747572815534, + "success_rate.epoch.env.logic": 0.9119718309859155, + "success_rate.epoch.env.math": 0.9664310954063604, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7944514501891551, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8777353154871697, + "success_rate.epoch.global": 0.884788029925187, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9975094876660342, + "tokens_p.mean_in_band": 0.65, + "tokens_rate.above_band": 0.9906015037593985, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.009398496240601503 + }, + { + "epoch": 1.2153813378781424, + "grad_norm": 64.15294846241883, + "learning_rate": 3.845594304831234e-07, + "loss": 0.4064, + "step": 5705, + "success_rate.epoch.env.abd": 0.989247311827957, + "success_rate.epoch.env.agentgym:alfworld": 0.9259259259259259, + "success_rate.epoch.env.agentgym:sciworld": 0.9824561403508771, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9423076923076923, + "success_rate.epoch.env.logic": 0.9122807017543859, + "success_rate.epoch.env.math": 0.9667250437828371, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7952261306532663, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8779114626781894, + "success_rate.epoch.global": 0.8853598014888338, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9932650862068966, + "tokens_p.mean_in_band": 0.83125, + "tokens_rate.above_band": 0.9789029535864979, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02109704641350211 + }, + { + "epoch": 1.2164465274818919, + "grad_norm": 67.64612313912163, + "learning_rate": 3.8453246813700797e-07, + "loss": 0.2585, + "step": 5710, + "success_rate.epoch.env.abd": 0.9893617021276596, + "success_rate.epoch.env.agentgym:alfworld": 0.9259259259259259, + "success_rate.epoch.env.agentgym:sciworld": 0.9827586206896551, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9433962264150944, + "success_rate.epoch.env.logic": 0.9125874125874126, + "success_rate.epoch.env.math": 0.9668411867364747, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7959949937421777, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8781566555528674, + "success_rate.epoch.global": 0.8859259259259259, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9987987276664861, + "tokens_p.mean_in_band": 0.6490885416666666, + "tokens_rate.above_band": 0.9967620075553157, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0032379924446842958 + }, + { + "epoch": 1.2175117170856413, + "grad_norm": 183.80215668402516, + "learning_rate": 3.8450548791180607e-07, + "loss": 0.3292, + "step": 5715, + "success_rate.epoch.env.abd": 0.9893617021276596, + "success_rate.epoch.env.agentgym:alfworld": 0.9259259259259259, + "success_rate.epoch.env.agentgym:sciworld": 0.9827586206896551, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9433962264150944, + "success_rate.epoch.env.logic": 0.9097222222222222, + "success_rate.epoch.env.math": 0.9670138888888888, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7947761194029851, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8778010771390522, + "success_rate.epoch.global": 0.885012285012285, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6, + "success_rate.window.env_macro_mean": 0.7000000000000001, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9961145648312612, + "tokens_p.mean_in_band": 0.72015625, + "tokens_rate.above_band": 0.9574829931972789, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.04251700680272109 + }, + { + "epoch": 1.2185769066893908, + "grad_norm": 90.83328489423037, + "learning_rate": 3.8447848982718065e-07, + "loss": 0.2913, + "step": 5720, + "success_rate.epoch.env.abd": 0.9894736842105263, + "success_rate.epoch.env.agentgym:alfworld": 0.9259259259259259, + "success_rate.epoch.env.agentgym:sciworld": 0.9827586206896551, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9433962264150944, + "success_rate.epoch.env.logic": 0.9097222222222222, + "success_rate.epoch.env.math": 0.9671848013816926, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7938271604938272, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8777405258360079, + "success_rate.epoch.global": 0.8845965770171149, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.8888888888888888, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9925, + "tokens_p.mean_in_band": 0.611328125, + "tokens_rate.above_band": 0.946969696969697, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.05303030303030303 + }, + { + "epoch": 1.2196420962931402, + "grad_norm": 45.55175745928387, + "learning_rate": 3.8445147390280777e-07, + "loss": 0.2472, + "step": 5725, + "success_rate.epoch.env.abd": 0.9894736842105263, + "success_rate.epoch.env.agentgym:alfworld": 0.9259259259259259, + "success_rate.epoch.env.agentgym:sciworld": 0.9827586206896551, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9433962264150944, + "success_rate.epoch.env.logic": 0.9081632653061225, + "success_rate.epoch.env.math": 0.9672977624784854, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7943349753694581, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8776552366593099, + "success_rate.epoch.global": 0.8846715328467153, + "success_rate.window.env.logic": 0.8333333333333334, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9444444444444445, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9973540145985401, + "tokens_p.mean_in_band": 0.6535326086956522, + "tokens_rate.above_band": 0.9675141242937854, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03248587570621469 + }, + { + "epoch": 1.2207072858968897, + "grad_norm": 469.69598646410327, + "learning_rate": 3.8442444015837643e-07, + "loss": 0.4276, + "step": 5730, + "success_rate.epoch.env.abd": 0.9894736842105263, + "success_rate.epoch.env.agentgym:alfworld": 0.9272727272727272, + "success_rate.epoch.env.agentgym:sciworld": 0.9827586206896551, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9433962264150944, + "success_rate.epoch.env.logic": 0.9090909090909091, + "success_rate.epoch.env.math": 0.9675213675213675, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7936117936117936, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.877816588333565, + "success_rate.epoch.global": 0.8847457627118644, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9991391184573003, + "tokens_p.mean_in_band": 0.5263671875, + "tokens_rate.above_band": 0.9477806788511749, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.05221932114882506 + }, + { + "epoch": 1.2217724755006392, + "grad_norm": 60.51911243000029, + "learning_rate": 3.843973886135886e-07, + "loss": 0.3197, + "step": 5735, + "success_rate.epoch.env.abd": 0.9894736842105263, + "success_rate.epoch.env.agentgym:alfworld": 0.9272727272727272, + "success_rate.epoch.env.agentgym:sciworld": 0.9827586206896551, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9433962264150944, + "success_rate.epoch.env.logic": 0.9093959731543624, + "success_rate.epoch.env.math": 0.966044142614601, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7936507936507936, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8777135737149911, + "success_rate.epoch.global": 0.8843373493975903, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.75, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.85, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9926801801801802, + "tokens_p.mean_in_band": 0.5534855769230769, + "tokens_rate.above_band": 0.8951612903225806, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.10483870967741936 + }, + { + "epoch": 1.2228376651043886, + "grad_norm": 47.84149722393532, + "learning_rate": 3.8437031928815927e-07, + "loss": 0.2018, + "step": 5740, + "success_rate.epoch.env.abd": 0.9894736842105263, + "success_rate.epoch.env.agentgym:alfworld": 0.9272727272727272, + "success_rate.epoch.env.agentgym:sciworld": 0.9830508474576272, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9433962264150944, + "success_rate.epoch.env.logic": 0.9093959731543624, + "success_rate.epoch.env.math": 0.9661590524534687, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7941888619854721, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8777995014369474, + "success_rate.epoch.global": 0.8844124700239808, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8571428571428571, + "success_rate.window.env_macro_mean": 0.9523809523809524, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9954268292682927, + "tokens_p.mean_in_band": 0.6428571428571429, + "tokens_rate.above_band": 0.9590643274853801, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.04093567251461988 + }, + { + "epoch": 1.223902854708138, + "grad_norm": 34.12597138938547, + "learning_rate": 3.843432322018164e-07, + "loss": 0.2656, + "step": 5745, + "success_rate.epoch.env.abd": 0.9894736842105263, + "success_rate.epoch.env.agentgym:alfworld": 0.9272727272727272, + "success_rate.epoch.env.agentgym:sciworld": 0.9830508474576272, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9433962264150944, + "success_rate.epoch.env.logic": 0.9108910891089109, + "success_rate.epoch.env.math": 0.9663299663299664, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7946859903381642, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8779961521809235, + "success_rate.epoch.global": 0.8849642004773269, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9987113402061856, + "tokens_p.mean_in_band": 0.75, + "tokens_rate.above_band": 0.9797979797979798, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.020202020202020204 + }, + { + "epoch": 1.2249680443118875, + "grad_norm": 83.03138468541954, + "learning_rate": 3.843161273743008e-07, + "loss": 0.1445, + "step": 5750, + "success_rate.epoch.env.abd": 0.9894736842105263, + "success_rate.epoch.env.agentgym:alfworld": 0.9272727272727272, + "success_rate.epoch.env.agentgym:sciworld": 0.9830508474576272, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9433962264150944, + "success_rate.epoch.env.logic": 0.9108910891089109, + "success_rate.epoch.env.math": 0.966499162479062, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7952095808383234, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8780591327854013, + "success_rate.epoch.global": 0.8850356294536817, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8571428571428571, + "success_rate.window.env_macro_mean": 0.9285714285714286, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9915865384615384, + "tokens_p.mean_in_band": 0.6643880208333334, + "tokens_rate.above_band": 0.896551724137931, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.10344827586206896 + }, + { + "epoch": 1.226033233915637, + "grad_norm": 94.84688360452714, + "learning_rate": 3.8428900482536637e-07, + "loss": 0.2387, + "step": 5755, + "success_rate.epoch.env.abd": 0.9896907216494846, + "success_rate.epoch.env.agentgym:alfworld": 0.9272727272727272, + "success_rate.epoch.env.agentgym:sciworld": 0.9830508474576272, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9433962264150944, + "success_rate.epoch.env.logic": 0.9111842105263158, + "success_rate.epoch.env.math": 0.9665551839464883, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7966706302021404, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8782434263933655, + "success_rate.epoch.global": 0.885579196217494, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9957482993197279, + "tokens_p.mean_in_band": 0.828125, + "tokens_rate.above_band": 0.9865771812080537, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.013422818791946308 + }, + { + "epoch": 1.2270984235193865, + "grad_norm": 220.00251198916067, + "learning_rate": 3.8426186457477974e-07, + "loss": 0.3906, + "step": 5760, + "success_rate.epoch.env.abd": 0.9896907216494846, + "success_rate.epoch.env.agentgym:alfworld": 0.9107142857142857, + "success_rate.epoch.env.agentgym:sciworld": 0.9830508474576272, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9439252336448598, + "success_rate.epoch.env.logic": 0.9114754098360656, + "success_rate.epoch.env.math": 0.9666666666666667, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7966903073286052, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8768246013777033, + "success_rate.epoch.global": 0.8851764705882353, + "success_rate.window.env.agentgym:alfworld": 0.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.76, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9973456964892412, + "tokens_p.mean_in_band": 0.4713541666666667, + "tokens_rate.above_band": 0.9932508436445444, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.006749156355455568 + }, + { + "epoch": 1.228163613123136, + "grad_norm": 95.04780368705549, + "learning_rate": 3.842347066423205e-07, + "loss": 0.2704, + "step": 5765, + "success_rate.epoch.env.abd": 0.9897959183673469, + "success_rate.epoch.env.agentgym:alfworld": 0.9107142857142857, + "success_rate.epoch.env.agentgym:sciworld": 0.9841269841269841, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9439252336448598, + "success_rate.epoch.env.logic": 0.9117647058823529, + "success_rate.epoch.env.math": 0.9667221297836939, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7974087161366313, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8770286469557543, + "success_rate.epoch.global": 0.8857142857142857, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9991099683544303, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 1.2292288027268854, + "grad_norm": 90.63497899350807, + "learning_rate": 3.842075310477813e-07, + "loss": 0.1603, + "step": 5770, + "success_rate.epoch.env.abd": 0.9897959183673469, + "success_rate.epoch.env.agentgym:alfworld": 0.9107142857142857, + "success_rate.epoch.env.agentgym:sciworld": 0.9841269841269841, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9439252336448598, + "success_rate.epoch.env.logic": 0.9123376623376623, + "success_rate.epoch.env.math": 0.9667774086378738, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7978971962616822, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8771301665407123, + "success_rate.epoch.global": 0.8857808857808858, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8571428571428571, + "success_rate.window.env_macro_mean": 0.9523809523809524, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9903706395348837, + "tokens_p.mean_below_band": 1.8189894035458565e-09, + "tokens_rate.above_band": 0.9942196531791907, + "tokens_rate.below_band": 0.005780346820809248, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 1.2302939923306349, + "grad_norm": 59.86166946203114, + "learning_rate": 3.841803378109674e-07, + "loss": 0.1948, + "step": 5775, + "success_rate.epoch.env.abd": 0.98989898989899, + "success_rate.epoch.env.agentgym:alfworld": 0.9107142857142857, + "success_rate.epoch.env.agentgym:sciworld": 0.9841269841269841, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9439252336448598, + "success_rate.epoch.env.logic": 0.912621359223301, + "success_rate.epoch.env.math": 0.966996699669967, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7986030267753201, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8772494292646226, + "success_rate.epoch.global": 0.8863109048723898, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9980603448275862, + "tokens_p.mean_in_band": 0.8763020833333334, + "tokens_rate.above_band": 0.9948542024013722, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.005145797598627788 + }, + { + "epoch": 1.2313591819343843, + "grad_norm": 43.65369572163305, + "learning_rate": 3.8415312695169707e-07, + "loss": 0.2164, + "step": 5780, + "success_rate.epoch.env.abd": 0.98989898989899, + "success_rate.epoch.env.agentgym:alfworld": 0.9107142857142857, + "success_rate.epoch.env.agentgym:sciworld": 0.9841269841269841, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9444444444444444, + "success_rate.epoch.env.logic": 0.9131832797427653, + "success_rate.epoch.env.math": 0.966996699669967, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7976878612716763, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.877264517066023, + "success_rate.epoch.global": 0.8859122401847576, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9972972972972973, + "tokens_p.mean_in_band": 0.48758370535714285, + "tokens_rate.above_band": 0.9814323607427056, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.01856763925729443 + }, + { + "epoch": 1.2324243715381338, + "grad_norm": 53.00415811024405, + "learning_rate": 3.8412589848980134e-07, + "loss": 0.2911, + "step": 5785, + "success_rate.epoch.env.abd": 0.98989898989899, + "success_rate.epoch.env.agentgym:alfworld": 0.9107142857142857, + "success_rate.epoch.env.agentgym:sciworld": 0.984375, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9454545454545454, + "success_rate.epoch.env.logic": 0.9140127388535032, + "success_rate.epoch.env.math": 0.9670510708401977, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7983870967741935, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8775228063993507, + "success_rate.epoch.global": 0.8864367816091954, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9994791666666667, + "tokens_p.mean_in_band": 0.8046875, + "tokens_rate.above_band": 0.996309963099631, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0036900369003690036 + }, + { + "epoch": 1.2334895611418832, + "grad_norm": 87.58181142258837, + "learning_rate": 3.840986524451242e-07, + "loss": 0.1401, + "step": 5790, + "success_rate.epoch.env.abd": 0.98989898989899, + "success_rate.epoch.env.agentgym:alfworld": 0.9107142857142857, + "success_rate.epoch.env.agentgym:sciworld": 0.9846153846153847, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9459459459459459, + "success_rate.epoch.env.logic": 0.9140127388535032, + "success_rate.epoch.env.math": 0.967266775777414, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7993119266055045, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8776930172971064, + "success_rate.epoch.global": 0.8869565217391304, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.993439226519337, + "tokens_p.mean_in_band": 0.8828125, + "tokens_rate.above_band": 0.9917808219178083, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.00821917808219178 + }, + { + "epoch": 1.2345547507456327, + "grad_norm": 124.23957177387686, + "learning_rate": 3.8407138883752233e-07, + "loss": 0.2464, + "step": 5795, + "success_rate.epoch.env.abd": 0.98989898989899, + "success_rate.epoch.env.agentgym:alfworld": 0.9107142857142857, + "success_rate.epoch.env.agentgym:sciworld": 0.9846153846153847, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9459459459459459, + "success_rate.epoch.env.logic": 0.9150943396226415, + "success_rate.epoch.env.math": 0.9674267100977199, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7988571428571428, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8777645401462958, + "success_rate.epoch.global": 0.8870159453302962, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.8888888888888888, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.99453125, + "tokens_p.mean_in_band": 0.4817708333333333, + "tokens_rate.above_band": 0.963855421686747, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03614457831325301 + }, + { + "epoch": 1.2356199403493822, + "grad_norm": 356.60832033058944, + "learning_rate": 3.840441076868653e-07, + "loss": 0.2465, + "step": 5800, + "success_rate.epoch.env.abd": 0.98989898989899, + "success_rate.epoch.env.agentgym:alfworld": 0.9122807017543859, + "success_rate.epoch.env.agentgym:sciworld": 0.9846153846153847, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9459459459459459, + "success_rate.epoch.env.logic": 0.915625, + "success_rate.epoch.env.math": 0.9676375404530745, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7995444191343963, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8780368295144836, + "success_rate.epoch.global": 0.8875283446712018, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9981387147335423, + "tokens_p.mean_in_band": 0.71435546875, + "tokens_rate.above_band": 0.9755351681957186, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.024464831804281346 + }, + { + "epoch": 1.2366851299531316, + "grad_norm": 113.86725879165407, + "learning_rate": 3.8401680901303535e-07, + "loss": 0.3848, + "step": 5805, + "success_rate.epoch.env.abd": 0.98989898989899, + "success_rate.epoch.env.agentgym:alfworld": 0.9122807017543859, + "success_rate.epoch.env.agentgym:sciworld": 0.9846153846153847, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9464285714285714, + "success_rate.epoch.env.logic": 0.9164086687306502, + "success_rate.epoch.env.math": 0.967741935483871, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7981859410430839, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8780379396192799, + "success_rate.epoch.global": 0.8871331828442438, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9963192840646651, + "tokens_p.mean_in_band": 0.607421875, + "tokens_rate.above_band": 0.9643652561247216, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.035634743875278395 + }, + { + "epoch": 1.237750319556881, + "grad_norm": 95.98116374327142, + "learning_rate": 3.8398949283592755e-07, + "loss": 0.3242, + "step": 5810, + "success_rate.epoch.env.abd": 0.98989898989899, + "success_rate.epoch.env.agentgym:alfworld": 0.9137931034482759, + "success_rate.epoch.env.agentgym:sciworld": 0.9846153846153847, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9385964912280702, + "success_rate.epoch.env.logic": 0.9164086687306502, + "success_rate.epoch.env.math": 0.9678456591639871, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7993235625704622, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8775762729557237, + "success_rate.epoch.global": 0.887191011235955, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9967349531005733, + "tokens_p.mean_in_band": 0.5277162532216495, + "tokens_rate.above_band": 0.9081874112636062, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.09181258873639375 + }, + { + "epoch": 1.2388155091606305, + "grad_norm": 150.36430497209574, + "learning_rate": 3.839621591754498e-07, + "loss": 0.2732, + "step": 5815, + "success_rate.epoch.env.abd": 0.9900990099009901, + "success_rate.epoch.env.agentgym:alfworld": 0.9137931034482759, + "success_rate.epoch.env.agentgym:sciworld": 0.9848484848484849, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9391304347826087, + "success_rate.epoch.env.logic": 0.916923076923077, + "success_rate.epoch.env.math": 0.967948717948718, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7986501687289089, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8776591035852913, + "success_rate.epoch.global": 0.887248322147651, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9988517060367454, + "tokens_p.mean_in_band": 0.6783854166666666, + "tokens_rate.above_band": 0.9844961240310077, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.015503875968992248 + }, + { + "epoch": 1.23988069876438, + "grad_norm": 58.441257313308306, + "learning_rate": 3.8393480805152263e-07, + "loss": 0.2066, + "step": 5820, + "success_rate.epoch.env.abd": 0.9901960784313726, + "success_rate.epoch.env.agentgym:alfworld": 0.9137931034482759, + "success_rate.epoch.env.agentgym:sciworld": 0.9848484848484849, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9391304347826087, + "success_rate.epoch.env.logic": 0.9174311926605505, + "success_rate.epoch.env.math": 0.968, + "success_rate.epoch.env.sat": 0.13333333333333333, + "success_rate.epoch.env.science": 0.799552071668533, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8769349726521053, + "success_rate.epoch.global": 0.887305122494432, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8333333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9919871794871795, + "tokens_p.mean_in_band": 0.6583059210526315, + "tokens_rate.above_band": 0.8914285714285715, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.10857142857142857 + }, + { + "epoch": 1.2409458883681295, + "grad_norm": 74.37114795816692, + "learning_rate": 3.8390743948407936e-07, + "loss": 0.3807, + "step": 5825, + "success_rate.epoch.env.abd": 0.9902912621359223, + "success_rate.epoch.env.agentgym:alfworld": 0.9152542372881356, + "success_rate.epoch.env.agentgym:sciworld": 0.9848484848484849, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9391304347826087, + "success_rate.epoch.env.logic": 0.9179331306990881, + "success_rate.epoch.env.math": 0.9682539682539683, + "success_rate.epoch.env.sat": 0.13333333333333333, + "success_rate.epoch.env.science": 0.7997762863534675, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8771655579722736, + "success_rate.epoch.global": 0.8878048780487805, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9968011811023622, + "tokens_p.mean_in_band": 0.75, + "tokens_rate.above_band": 0.9806949806949807, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.019305019305019305 + }, + { + "epoch": 1.242011077971879, + "grad_norm": 61.00403373274862, + "learning_rate": 3.83880053493066e-07, + "loss": 0.3101, + "step": 5830, + "success_rate.epoch.env.abd": 0.9902912621359223, + "success_rate.epoch.env.agentgym:alfworld": 0.9152542372881356, + "success_rate.epoch.env.agentgym:sciworld": 0.9848484848484849, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9391304347826087, + "success_rate.epoch.env.logic": 0.9179331306990881, + "success_rate.epoch.env.math": 0.9684542586750788, + "success_rate.epoch.env.sat": 0.13333333333333333, + "success_rate.epoch.env.science": 0.7986651835372637, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8725373022999922, + "success_rate.epoch.global": 0.8869757174392936, + "success_rate.window.env.agentgym:textcraft": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6, + "success_rate.window.env_macro_mean": 0.5333333333333333, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9961986571879937, + "tokens_p.mean_in_band": 0.6516335227272727, + "tokens_rate.above_band": 0.9504504504504504, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.04954954954954955 + }, + { + "epoch": 1.2430762675756284, + "grad_norm": 306.54720973075433, + "learning_rate": 3.8385265009844123e-07, + "loss": 0.287, + "step": 5835, + "success_rate.epoch.env.abd": 0.9902912621359223, + "success_rate.epoch.env.agentgym:alfworld": 0.9166666666666666, + "success_rate.epoch.env.agentgym:sciworld": 0.9848484848484849, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9391304347826087, + "success_rate.epoch.env.logic": 0.9181818181818182, + "success_rate.epoch.env.math": 0.9686520376175548, + "success_rate.epoch.env.sat": 0.13333333333333333, + "success_rate.epoch.env.science": 0.7984496124031008, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8726866954517718, + "success_rate.epoch.global": 0.8870329670329671, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9375, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9950527704485488, + "tokens_p.mean_in_band": 0.60703125, + "tokens_rate.above_band": 0.974293059125964, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02570694087403599 + }, + { + "epoch": 1.2441414571793779, + "grad_norm": 123.22668811418404, + "learning_rate": 3.838252293201765e-07, + "loss": 0.2486, + "step": 5840, + "success_rate.epoch.env.abd": 0.9902912621359223, + "success_rate.epoch.env.agentgym:alfworld": 0.9180327868852459, + "success_rate.epoch.env.agentgym:sciworld": 0.9848484848484849, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9396551724137931, + "success_rate.epoch.env.logic": 0.918429003021148, + "success_rate.epoch.env.math": 0.9688473520249221, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.7977900552486188, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8720812833252851, + "success_rate.epoch.global": 0.886652078774617, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.75, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9972741433021807, + "tokens_p.mean_in_band": 0.6458333333333334, + "tokens_rate.above_band": 0.9756838905775076, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0243161094224924 + }, + { + "epoch": 1.2452066467831273, + "grad_norm": 197.33373961995605, + "learning_rate": 3.837977911782558e-07, + "loss": 0.4533, + "step": 5845, + "success_rate.epoch.env.abd": 0.9902912621359223, + "success_rate.epoch.env.agentgym:alfworld": 0.9193548387096774, + "success_rate.epoch.env.agentgym:sciworld": 0.9848484848484849, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9396551724137931, + "success_rate.epoch.env.logic": 0.9159159159159159, + "success_rate.epoch.env.math": 0.9689922480620154, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.7964796479647965, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8718670518227825, + "success_rate.epoch.global": 0.885838779956427, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.75, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9971203071672355, + "tokens_p.mean_in_band": 0.5521599264705882, + "tokens_rate.above_band": 0.8960244648318043, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.10397553516819572 + }, + { + "epoch": 1.2462718363868768, + "grad_norm": 216.9435162596948, + "learning_rate": 3.8377033569267596e-07, + "loss": 0.287, + "step": 5850, + "success_rate.epoch.env.abd": 0.9902912621359223, + "success_rate.epoch.env.agentgym:alfworld": 0.9206349206349206, + "success_rate.epoch.env.agentgym:sciworld": 0.9848484848484849, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9401709401709402, + "success_rate.epoch.env.logic": 0.9166666666666666, + "success_rate.epoch.env.math": 0.9675925925925926, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.7969264544456641, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8720119383177447, + "success_rate.epoch.global": 0.8859002169197397, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.6666666666666666, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9975511073253833, + "tokens_p.mean_in_band": 0.029296875, + "tokens_rate.above_band": 0.9982993197278912, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0017006802721088435 + }, + { + "epoch": 1.2473370259906262, + "grad_norm": 137.2533340946094, + "learning_rate": 3.837428628834463e-07, + "loss": 0.298, + "step": 5855, + "success_rate.epoch.env.abd": 0.9902912621359223, + "success_rate.epoch.env.agentgym:alfworld": 0.9206349206349206, + "success_rate.epoch.env.agentgym:sciworld": 0.9848484848484849, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.940677966101695, + "success_rate.epoch.env.logic": 0.9166666666666666, + "success_rate.epoch.env.math": 0.9677914110429447, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.7978142076502732, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8714883616276761, + "success_rate.epoch.global": 0.8859611231101512, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.75, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9998807251908397, + "tokens_p.mean_in_band": 0.6638569078947368, + "tokens_rate.above_band": 0.9323843416370107, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.06761565836298933 + }, + { + "epoch": 1.2484022155943757, + "grad_norm": 59.92096970708178, + "learning_rate": 3.837153727705888e-07, + "loss": 0.2479, + "step": 5860, + "success_rate.epoch.env.abd": 0.9903846153846154, + "success_rate.epoch.env.agentgym:alfworld": 0.9206349206349206, + "success_rate.epoch.env.agentgym:sciworld": 0.9848484848484849, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.940677966101695, + "success_rate.epoch.env.logic": 0.9144542772861357, + "success_rate.epoch.env.math": 0.9679878048780488, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.7982551799345693, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8713536643538181, + "success_rate.epoch.global": 0.886021505376344, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 0.6666666666666666, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9989390432098766, + "tokens_p.mean_in_band": 0.5291666666666667, + "tokens_rate.above_band": 0.9773755656108597, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02262443438914027 + }, + { + "epoch": 1.2494674051981254, + "grad_norm": 136.98942295448293, + "learning_rate": 3.83687865374138e-07, + "loss": 0.485, + "step": 5865, + "success_rate.epoch.env.abd": 0.9903846153846154, + "success_rate.epoch.env.agentgym:alfworld": 0.9206349206349206, + "success_rate.epoch.env.agentgym:sciworld": 0.9848484848484849, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9327731092436975, + "success_rate.epoch.env.logic": 0.9125364431486881, + "success_rate.epoch.env.math": 0.9681335356600911, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.7976060935799782, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8704149328476368, + "success_rate.epoch.global": 0.8852248394004283, + "success_rate.window.env.ded": 0.0, + "success_rate.window.env.logic": 0.75, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.5625, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.985437925170068, + "tokens_p.mean_below_band": 3.948807716369629e-07, + "tokens_p.mean_in_band": 0.4875812861271676, + "tokens_rate.above_band": 0.6288770053475936, + "tokens_rate.below_band": 0.0010695187165775401, + "tokens_rate.in_band": 0.3700534759358289 + }, + { + "epoch": 1.2505325948018746, + "grad_norm": 222.50650366190857, + "learning_rate": 3.8366034071414115e-07, + "loss": 0.3273, + "step": 5870, + "success_rate.epoch.env.abd": 0.9904761904761905, + "success_rate.epoch.env.agentgym:alfworld": 0.9206349206349206, + "success_rate.epoch.env.agentgym:sciworld": 0.9848484848484849, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9327731092436975, + "success_rate.epoch.env.logic": 0.9130434782608695, + "success_rate.epoch.env.math": 0.9683734939759037, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.7980456026058632, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8705311217154055, + "success_rate.epoch.global": 0.8857142857142857, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9949383802816901, + "tokens_p.mean_in_band": 0.8359375, + "tokens_rate.above_band": 0.993006993006993, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.006993006993006993 + }, + { + "epoch": 1.2515977844056243, + "grad_norm": 154.859493260385, + "learning_rate": 3.83632798810658e-07, + "loss": 0.3507, + "step": 5875, + "success_rate.epoch.env.abd": 0.9904761904761905, + "success_rate.epoch.env.agentgym:alfworld": 0.9206349206349206, + "success_rate.epoch.env.agentgym:sciworld": 0.9848484848484849, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9327731092436975, + "success_rate.epoch.env.logic": 0.9135446685878963, + "success_rate.epoch.env.math": 0.968421052631579, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.7982740021574973, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8706017715821633, + "success_rate.epoch.global": 0.8857264231096007, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env_macro_mean": 0.9444444444444445, + "success_rate.window.global": 0.8888888888888888, + "tokens_p.mean_above_band": 0.9883241758241759, + "tokens_p.mean_in_band": 0.5513392857142857, + "tokens_rate.above_band": 0.9285714285714286, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.07142857142857142 + }, + { + "epoch": 1.2526629740093735, + "grad_norm": 108.6232329111237, + "learning_rate": 3.8360523968376096e-07, + "loss": 0.5375, + "step": 5880, + "success_rate.epoch.env.abd": 0.9904761904761905, + "success_rate.epoch.env.agentgym:alfworld": 0.9206349206349206, + "success_rate.epoch.env.agentgym:sciworld": 0.9848484848484849, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9327731092436975, + "success_rate.epoch.env.logic": 0.9140401146131805, + "success_rate.epoch.env.math": 0.9670658682634731, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.7982832618025751, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8705244553369137, + "success_rate.epoch.global": 0.8853637901861252, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.6666666666666666, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.8222222222222223, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9898648648648649, + "tokens_p.mean_in_band": 0.5659877232142857, + "tokens_rate.above_band": 0.8409090909090909, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.1590909090909091 + }, + { + "epoch": 1.2537281636131232, + "grad_norm": 61.52741589906454, + "learning_rate": 3.8357766335353487e-07, + "loss": 0.0921, + "step": 5885, + "success_rate.epoch.env.abd": 0.9905660377358491, + "success_rate.epoch.env.agentgym:alfworld": 0.921875, + "success_rate.epoch.env.agentgym:sciworld": 0.9848484848484849, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9333333333333333, + "success_rate.epoch.env.logic": 0.9150141643059491, + "success_rate.epoch.env.math": 0.9671641791044776, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.7984994640943194, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8708134292950857, + "success_rate.epoch.global": 0.8858466722830666, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.998061560150376, + "tokens_p.mean_in_band": 0.884765625, + "tokens_rate.above_band": 0.9962546816479401, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.003745318352059925 + }, + { + "epoch": 1.2547933532168725, + "grad_norm": 88.37184818480591, + "learning_rate": 3.835500698400771e-07, + "loss": 0.3708, + "step": 5890, + "success_rate.epoch.env.abd": 0.9905660377358491, + "success_rate.epoch.env.agentgym:alfworld": 0.9230769230769231, + "success_rate.epoch.env.agentgym:sciworld": 0.9850746268656716, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9333333333333333, + "success_rate.epoch.env.logic": 0.9124293785310734, + "success_rate.epoch.env.math": 0.9673105497771174, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.7993596584845251, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8707997787843657, + "success_rate.epoch.global": 0.8859060402684564, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9978837471783296, + "tokens_p.mean_in_band": 0.630859375, + "tokens_rate.above_band": 0.9866369710467706, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.013363028953229399 + }, + { + "epoch": 1.2558585428206221, + "grad_norm": 31.21583025921499, + "learning_rate": 3.8352245916349775e-07, + "loss": 0.2606, + "step": 5895, + "success_rate.epoch.env.abd": 0.9905660377358491, + "success_rate.epoch.env.agentgym:alfworld": 0.9242424242424242, + "success_rate.epoch.env.agentgym:sciworld": 0.9855072463768116, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9333333333333333, + "success_rate.epoch.env.logic": 0.9101123595505618, + "success_rate.epoch.env.math": 0.9674074074074074, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.8, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8708014424972652, + "success_rate.epoch.global": 0.8859649122807017, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9994292237442922, + "tokens_p.mean_in_band": 0.6861049107142857, + "tokens_rate.above_band": 0.9690265486725663, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.030973451327433628 + }, + { + "epoch": 1.2569237324243716, + "grad_norm": 215.8912277725402, + "learning_rate": 3.834948313439191e-07, + "loss": 0.139, + "step": 5900, + "success_rate.epoch.env.abd": 0.9905660377358491, + "success_rate.epoch.env.agentgym:alfworld": 0.9253731343283582, + "success_rate.epoch.env.agentgym:sciworld": 0.9855072463768116, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9333333333333333, + "success_rate.epoch.env.logic": 0.9103641456582633, + "success_rate.epoch.env.math": 0.9676945668135095, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.8004246284501062, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.870991831956342, + "success_rate.epoch.global": 0.8864392678868552, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9982531055900621, + "tokens_p.mean_in_band": 0.81640625, + "tokens_rate.above_band": 0.9969040247678018, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0030959752321981426 + }, + { + "epoch": 1.257988922028121, + "grad_norm": 31.410651242501206, + "learning_rate": 3.834671864014763e-07, + "loss": 0.2177, + "step": 5905, + "success_rate.epoch.env.abd": 0.9905660377358491, + "success_rate.epoch.env.agentgym:alfworld": 0.9264705882352942, + "success_rate.epoch.env.agentgym:sciworld": 0.9855072463768116, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9333333333333333, + "success_rate.epoch.env.logic": 0.9108635097493036, + "success_rate.epoch.env.math": 0.9678832116788321, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.801058201058201, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.871211744271923, + "success_rate.epoch.global": 0.8869096934548467, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9986482188295165, + "tokens_p.mean_in_band": 0.677734375, + "tokens_rate.above_band": 0.9703703703703703, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02962962962962963 + }, + { + "epoch": 1.2590541116318705, + "grad_norm": 134.01227005014402, + "learning_rate": 3.834395243563166e-07, + "loss": 0.3778, + "step": 5910, + "success_rate.epoch.env.abd": 0.9906542056074766, + "success_rate.epoch.env.agentgym:alfworld": 0.9264705882352942, + "success_rate.epoch.env.agentgym:sciworld": 0.9714285714285714, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9333333333333333, + "success_rate.epoch.env.logic": 0.9113573407202216, + "success_rate.epoch.env.math": 0.9680232558139535, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.8016877637130801, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8700547379704964, + "success_rate.epoch.global": 0.886963696369637, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 0.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.994119623655914, + "tokens_p.mean_in_band": 0.7034505208333334, + "tokens_rate.above_band": 0.96875, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03125 + }, + { + "epoch": 1.26011930123562, + "grad_norm": 129.79729021954998, + "learning_rate": 3.8341184522860004e-07, + "loss": 0.2074, + "step": 5915, + "success_rate.epoch.env.abd": 0.9907407407407407, + "success_rate.epoch.env.agentgym:alfworld": 0.9264705882352942, + "success_rate.epoch.env.agentgym:sciworld": 0.971830985915493, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9333333333333333, + "success_rate.epoch.env.logic": 0.9113573407202216, + "success_rate.epoch.env.math": 0.968299711815562, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.8021052631578948, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8701622747947336, + "success_rate.epoch.global": 0.8874281018898932, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9966755319148937, + "tokens_p.mean_in_band": 0.8271484375, + "tokens_rate.above_band": 0.9724137931034482, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.027586206896551724 + }, + { + "epoch": 1.2611844908393695, + "grad_norm": 308.50683494089435, + "learning_rate": 3.833841490384989e-07, + "loss": 0.3909, + "step": 5920, + "success_rate.epoch.env.abd": 0.990909090909091, + "success_rate.epoch.env.agentgym:alfworld": 0.9264705882352942, + "success_rate.epoch.env.agentgym:sciworld": 0.971830985915493, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9333333333333333, + "success_rate.epoch.env.logic": 0.9116022099447514, + "success_rate.epoch.env.math": 0.9684361549497847, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.8006295907660022, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8700780920797526, + "success_rate.epoch.global": 0.8870703764320785, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.3333333333333333, + "success_rate.window.env_macro_mean": 0.8666666666666668, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9982798165137615, + "tokens_p.mean_in_band": 0.3977272727272727, + "tokens_rate.above_band": 0.9519650655021834, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.048034934497816595 + }, + { + "epoch": 1.262249680443119, + "grad_norm": 274.2756468289808, + "learning_rate": 3.83356435806198e-07, + "loss": 0.2329, + "step": 5925, + "success_rate.epoch.env.abd": 0.9910714285714286, + "success_rate.epoch.env.agentgym:alfworld": 0.9264705882352942, + "success_rate.epoch.env.agentgym:sciworld": 0.9722222222222222, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9338842975206612, + "success_rate.epoch.env.logic": 0.9118457300275482, + "success_rate.epoch.env.math": 0.9685264663805436, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.801255230125523, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8702657292642499, + "success_rate.epoch.global": 0.8875305623471883, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9970211330935251, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 1.2633148700468684, + "grad_norm": 76.7892158429907, + "learning_rate": 3.833287055518946e-07, + "loss": 0.1849, + "step": 5930, + "success_rate.epoch.env.abd": 0.9910714285714286, + "success_rate.epoch.env.agentgym:alfworld": 0.9264705882352942, + "success_rate.epoch.env.agentgym:sciworld": 0.9722222222222222, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9354838709677419, + "success_rate.epoch.env.logic": 0.9120879120879121, + "success_rate.epoch.env.math": 0.9686162624821684, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.8010416666666667, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8704219100051787, + "success_rate.epoch.global": 0.8875811688311688, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9375, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9979016786570744, + "tokens_p.mean_in_band": 0.5078125, + "tokens_rate.above_band": 0.9940405244338498, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0059594755661501785 + }, + { + "epoch": 1.2643800596506178, + "grad_norm": 52.75959508958796, + "learning_rate": 3.8330095829579807e-07, + "loss": 0.2147, + "step": 5935, + "success_rate.epoch.env.abd": 0.9912280701754386, + "success_rate.epoch.env.agentgym:alfworld": 0.9264705882352942, + "success_rate.epoch.env.agentgym:sciworld": 0.9722222222222222, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9365079365079365, + "success_rate.epoch.env.logic": 0.9123287671232877, + "success_rate.epoch.env.math": 0.968705547652916, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.8006230529595015, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8705212039727388, + "success_rate.epoch.global": 0.8876313662085691, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9973536450662739, + "tokens_p.mean_in_band": 0.650390625, + "tokens_rate.above_band": 0.9956011730205279, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.004398826979472141 + }, + { + "epoch": 1.2654452492543673, + "grad_norm": 83.68690561754137, + "learning_rate": 3.832731940581307e-07, + "loss": 0.4615, + "step": 5940, + "success_rate.epoch.env.abd": 0.991304347826087, + "success_rate.epoch.env.agentgym:alfworld": 0.927536231884058, + "success_rate.epoch.env.agentgym:sciworld": 0.9722222222222222, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9365079365079365, + "success_rate.epoch.env.logic": 0.9128065395095368, + "success_rate.epoch.env.math": 0.9688385269121813, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.8012422360248447, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8707368272463996, + "success_rate.epoch.global": 0.8880837359098228, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9975453172205438, + "tokens_p.mean_in_band": 0.7890625, + "tokens_rate.above_band": 0.993993993993994, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.006006006006006006 + }, + { + "epoch": 1.2665104388581168, + "grad_norm": 93.4659904314759, + "learning_rate": 3.8324541285912675e-07, + "loss": 0.2463, + "step": 5945, + "success_rate.epoch.env.abd": 0.991304347826087, + "success_rate.epoch.env.agentgym:alfworld": 0.927536231884058, + "success_rate.epoch.env.agentgym:sciworld": 0.9726027397260274, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.937007874015748, + "success_rate.epoch.env.logic": 0.9132791327913279, + "success_rate.epoch.env.math": 0.9689703808180536, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.8018575851393189, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8709277591840134, + "success_rate.epoch.global": 0.888532477947073, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.995, + "tokens_p.mean_in_band": 0.8606770833333334, + "tokens_rate.above_band": 0.9786476868327402, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.021352313167259787 + }, + { + "epoch": 1.2675756284618662, + "grad_norm": 59.59307126097623, + "learning_rate": 3.832176147190329e-07, + "loss": 0.35, + "step": 5950, + "success_rate.epoch.env.abd": 0.991304347826087, + "success_rate.epoch.env.agentgym:alfworld": 0.927536231884058, + "success_rate.epoch.env.agentgym:sciworld": 0.9733333333333334, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.937007874015748, + "success_rate.epoch.env.logic": 0.9112903225806451, + "success_rate.epoch.env.math": 0.9691011235955056, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.8012358393408857, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.870768739218163, + "success_rate.epoch.global": 0.8881789137380192, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 0.6666666666666666, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.7916666666666666, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9967900302114804, + "tokens_p.mean_in_band": 0.7013888888888888, + "tokens_rate.above_band": 0.9484240687679083, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.05157593123209169 + }, + { + "epoch": 1.2686408180656157, + "grad_norm": 132.65850479112723, + "learning_rate": 3.8318979965810833e-07, + "loss": 0.411, + "step": 5955, + "success_rate.epoch.env.abd": 0.991304347826087, + "success_rate.epoch.env.agentgym:alfworld": 0.927536231884058, + "success_rate.epoch.env.agentgym:sciworld": 0.9733333333333334, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.937007874015748, + "success_rate.epoch.env.logic": 0.9093333333333333, + "success_rate.epoch.env.math": 0.969187675070028, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.7991803278688525, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8704118347413609, + "success_rate.epoch.global": 0.8870326173428799, + "success_rate.window.env.logic": 0.6666666666666666, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.4, + "success_rate.window.env_macro_mean": 0.6888888888888888, + "success_rate.window.global": 0.6, + "tokens_p.mean_above_band": 0.9926136363636363, + "tokens_p.mean_in_band": 0.5417668269230769, + "tokens_rate.above_band": 0.8638743455497382, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.13612565445026178 + }, + { + "epoch": 1.2697060076693651, + "grad_norm": 58.71334465707662, + "learning_rate": 3.831619676966244e-07, + "loss": 0.1775, + "step": 5960, + "success_rate.epoch.env.abd": 0.991304347826087, + "success_rate.epoch.env.agentgym:alfworld": 0.927536231884058, + "success_rate.epoch.env.agentgym:sciworld": 0.9736842105263158, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.937007874015748, + "success_rate.epoch.env.logic": 0.9095744680851063, + "success_rate.epoch.env.math": 0.9693165969316597, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.799184505606524, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8704777539726387, + "success_rate.epoch.global": 0.8870839936608558, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9977889150943396, + "tokens_p.mean_in_band": 0.6994791666666667, + "tokens_rate.above_band": 0.933920704845815, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.06607929515418502 + }, + { + "epoch": 1.2707711972731146, + "grad_norm": 60.509237642902214, + "learning_rate": 3.8313411885486485e-07, + "loss": 0.3625, + "step": 5965, + "success_rate.epoch.env.abd": 0.9913793103448276, + "success_rate.epoch.env.agentgym:alfworld": 0.927536231884058, + "success_rate.epoch.env.agentgym:sciworld": 0.9736842105263158, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.937007874015748, + "success_rate.epoch.env.logic": 0.91005291005291, + "success_rate.epoch.env.math": 0.9693165969316597, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.7995951417004049, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8705653940254049, + "success_rate.epoch.global": 0.8871349644830308, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.science": 0.8571428571428571, + "success_rate.window.env_macro_mean": 0.9523809523809524, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9929435483870968, + "tokens_p.mean_in_band": 0.6925381747159091, + "tokens_rate.above_band": 0.9441624365482234, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.05583756345177665 + }, + { + "epoch": 1.271836386876864, + "grad_norm": 143.60654046455994, + "learning_rate": 3.831062531531257e-07, + "loss": 0.2114, + "step": 5970, + "success_rate.epoch.env.abd": 0.9913793103448276, + "success_rate.epoch.env.agentgym:alfworld": 0.927536231884058, + "success_rate.epoch.env.agentgym:sciworld": 0.9615384615384616, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9375, + "success_rate.epoch.env.logic": 0.91005291005291, + "success_rate.epoch.env.math": 0.9694868238557559, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.8002018163471241, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8695766011678788, + "success_rate.epoch.global": 0.887185534591195, + "success_rate.window.env.agentgym:sciworld": 0.5, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.998780137772675, + "tokens_p.mean_in_band": 0.744140625, + "tokens_rate.above_band": 0.9954285714285714, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.004571428571428572 + }, + { + "epoch": 1.2729015764806135, + "grad_norm": 342.7154281216755, + "learning_rate": 3.8307837061171537e-07, + "loss": 0.3818, + "step": 5975, + "success_rate.epoch.env.abd": 0.9913793103448276, + "success_rate.epoch.env.agentgym:alfworld": 0.927536231884058, + "success_rate.epoch.env.agentgym:sciworld": 0.9615384615384616, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.937984496124031, + "success_rate.epoch.env.logic": 0.9102902374670184, + "success_rate.epoch.env.math": 0.9697386519944979, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.8004032258064516, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8696834249075341, + "success_rate.epoch.global": 0.8876272513703993, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.998524678111588, + "tokens_p.mean_in_band": 0.69921875, + "tokens_rate.above_band": 0.9831223628691983, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.016877637130801686 + }, + { + "epoch": 1.273966766084363, + "grad_norm": 144.83687020026025, + "learning_rate": 3.8305047125095436e-07, + "loss": 0.2534, + "step": 5980, + "success_rate.epoch.env.abd": 0.9913793103448276, + "success_rate.epoch.env.agentgym:alfworld": 0.9142857142857143, + "success_rate.epoch.env.agentgym:sciworld": 0.9615384615384616, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.937984496124031, + "success_rate.epoch.env.logic": 0.9105263157894737, + "success_rate.epoch.env.math": 0.9698216735253772, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.7995991983967936, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8684347480752916, + "success_rate.epoch.global": 0.8868954758190327, + "success_rate.window.env.agentgym:alfworld": 0.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.6666666666666666, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9969642857142857, + "tokens_p.mean_in_band": 0.49267578125, + "tokens_rate.above_band": 0.9162303664921466, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.08376963350785341 + }, + { + "epoch": 1.2750319556881125, + "grad_norm": 268.3398171037985, + "learning_rate": 3.8302255509117553e-07, + "loss": 0.3505, + "step": 5985, + "success_rate.epoch.env.abd": 0.9913793103448276, + "success_rate.epoch.env.agentgym:alfworld": 0.9142857142857143, + "success_rate.epoch.env.agentgym:sciworld": 0.9620253164556962, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.937984496124031, + "success_rate.epoch.env.logic": 0.9105263157894737, + "success_rate.epoch.env.math": 0.9699453551912568, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.799800796812749, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8685085785297525, + "success_rate.epoch.global": 0.8869463869463869, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env_macro_mean": 0.9444444444444445, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9952083333333334, + "tokens_p.mean_in_band": 0.7217881944444444, + "tokens_rate.above_band": 0.9433962264150944, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.05660377358490566 + }, + { + "epoch": 1.276097145291862, + "grad_norm": 156.89110435172327, + "learning_rate": 3.8299462215272396e-07, + "loss": 0.2568, + "step": 5990, + "success_rate.epoch.env.abd": 0.9915254237288136, + "success_rate.epoch.env.agentgym:alfworld": 0.9142857142857143, + "success_rate.epoch.env.agentgym:sciworld": 0.9620253164556962, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.937984496124031, + "success_rate.epoch.env.logic": 0.9105263157894737, + "success_rate.epoch.env.math": 0.9700680272108844, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.798810703666997, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8684430050986491, + "success_rate.epoch.global": 0.8866099071207431, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6, + "success_rate.window.env_macro_mean": 0.8666666666666667, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9893092105263158, + "tokens_p.mean_in_band": 0.6774553571428571, + "tokens_rate.above_band": 0.890625, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.109375 + }, + { + "epoch": 1.2771623348956114, + "grad_norm": 125.48521639557713, + "learning_rate": 3.829666724559571e-07, + "loss": 0.2436, + "step": 5995, + "success_rate.epoch.env.abd": 0.9915966386554622, + "success_rate.epoch.env.agentgym:alfworld": 0.9142857142857143, + "success_rate.epoch.env.agentgym:sciworld": 0.9620253164556962, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.937984496124031, + "success_rate.epoch.env.logic": 0.9114583333333334, + "success_rate.epoch.env.math": 0.9701897018970189, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.798219584569733, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8684915312858653, + "success_rate.epoch.global": 0.8866615265998458, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9959375, + "tokens_p.mean_in_band": 0.458984375, + "tokens_rate.above_band": 0.9615384615384616, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.038461538461538464 + }, + { + "epoch": 1.2782275244993608, + "grad_norm": 77.57346437988627, + "learning_rate": 3.829387060212443e-07, + "loss": 0.2269, + "step": 6000, + "success_rate.epoch.env.abd": 0.9916666666666667, + "success_rate.epoch.env.agentgym:alfworld": 0.9142857142857143, + "success_rate.epoch.env.agentgym:sciworld": 0.9625, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.937984496124031, + "success_rate.epoch.env.logic": 0.9119170984455959, + "success_rate.epoch.env.math": 0.9702702702702702, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.7970443349753694, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8684832399628342, + "success_rate.epoch.global": 0.8863287250384024, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.9, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9945733532934131, + "tokens_p.mean_in_band": 0.458984375, + "tokens_rate.above_band": 0.943502824858757, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.05649717514124294 + }, + { + "epoch": 1.2792927141031103, + "grad_norm": 468.7125080289853, + "learning_rate": 3.8291072286896745e-07, + "loss": 0.4377, + "step": 6005, + "success_rate.epoch.env.abd": 0.9834710743801653, + "success_rate.epoch.env.agentgym:alfworld": 0.9142857142857143, + "success_rate.epoch.env.agentgym:sciworld": 0.9625, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.937984496124031, + "success_rate.epoch.env.logic": 0.9095607235142119, + "success_rate.epoch.env.math": 0.9703903095558546, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.7970588235294118, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8675362000193563, + "success_rate.epoch.global": 0.8856159143075746, + "success_rate.window.env.abd": 0.0, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.45, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9833776595744681, + "tokens_p.mean_in_band": 0.73687744140625, + "tokens_rate.above_band": 0.7859531772575251, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.2140468227424749 + }, + { + "epoch": 1.2803579037068598, + "grad_norm": 56.85665004058044, + "learning_rate": 3.828827230195204e-07, + "loss": 0.2405, + "step": 6010, + "success_rate.epoch.env.abd": 0.9836065573770492, + "success_rate.epoch.env.agentgym:alfworld": 0.9166666666666666, + "success_rate.epoch.env.agentgym:sciworld": 0.9625, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9384615384615385, + "success_rate.epoch.env.logic": 0.910025706940874, + "success_rate.epoch.env.math": 0.9704301075268817, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.7976539589442815, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.86790832679462, + "success_rate.epoch.global": 0.8860518292682927, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9984789823008849, + "tokens_p.mean_in_band": 0.7877604166666666, + "tokens_rate.above_band": 0.9966923925027563, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0033076074972436605 + }, + { + "epoch": 1.2814230933106092, + "grad_norm": 37.589770371380546, + "learning_rate": 3.828547064933092e-07, + "loss": 0.3531, + "step": 6015, + "success_rate.epoch.env.abd": 0.983739837398374, + "success_rate.epoch.env.agentgym:alfworld": 0.9166666666666666, + "success_rate.epoch.env.agentgym:sciworld": 0.9625, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9384615384615385, + "success_rate.epoch.env.logic": 0.9076923076923077, + "success_rate.epoch.env.math": 0.9705488621151271, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.7974683544303798, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8677022386898112, + "success_rate.epoch.global": 0.8857251328777525, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.75, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9979104754829123, + "tokens_p.mean_in_band": 0.46337890625, + "tokens_rate.above_band": 0.9600570613409415, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.039942938659058486 + }, + { + "epoch": 1.2824882829143587, + "grad_norm": 109.51942541623957, + "learning_rate": 3.8282667331075224e-07, + "loss": 0.4141, + "step": 6020, + "success_rate.epoch.env.abd": 0.983739837398374, + "success_rate.epoch.env.agentgym:alfworld": 0.9166666666666666, + "success_rate.epoch.env.agentgym:sciworld": 0.9625, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9384615384615385, + "success_rate.epoch.env.logic": 0.9053708439897699, + "success_rate.epoch.env.math": 0.9705882352941176, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.7978723404255319, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8675315019145025, + "success_rate.epoch.global": 0.8854009077155824, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8571428571428571, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 0.7142857142857143, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9964717741935484, + "tokens_p.mean_in_band": 0.6153067129629629, + "tokens_rate.above_band": 0.9323308270676691, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.06766917293233082 + }, + { + "epoch": 1.2835534725181081, + "grad_norm": 218.2487337213087, + "learning_rate": 3.8279862349227977e-07, + "loss": 0.2619, + "step": 6025, + "success_rate.epoch.env.abd": 0.983739837398374, + "success_rate.epoch.env.agentgym:alfworld": 0.9166666666666666, + "success_rate.epoch.env.agentgym:sciworld": 0.9625, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9393939393939394, + "success_rate.epoch.env.logic": 0.9030612244897959, + "success_rate.epoch.env.math": 0.9707446808510638, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.7965284474445516, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.86729835046072, + "success_rate.epoch.global": 0.8847023360964582, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.3333333333333333, + "success_rate.window.env_macro_mean": 0.5833333333333333, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9984182098765432, + "tokens_p.mean_in_band": 0.4713792067307692, + "tokens_rate.above_band": 0.9540636042402827, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.045936395759717315 + }, + { + "epoch": 1.2846186621218578, + "grad_norm": 17.019269052050042, + "learning_rate": 3.8277055705833435e-07, + "loss": 0.2387, + "step": 6030, + "success_rate.epoch.env.abd": 0.983739837398374, + "success_rate.epoch.env.agentgym:alfworld": 0.9166666666666666, + "success_rate.epoch.env.agentgym:sciworld": 0.9625, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9393939393939394, + "success_rate.epoch.env.logic": 0.9033078880407125, + "success_rate.epoch.env.math": 0.9709762532981531, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.7971153846153847, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8673951843851601, + "success_rate.epoch.global": 0.8851351351351351, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.993421052631579, + "tokens_p.mean_in_band": 0.7991071428571429, + "tokens_rate.above_band": 0.9313725490196079, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.06862745098039216 + }, + { + "epoch": 1.285683851725607, + "grad_norm": 83.17044845449945, + "learning_rate": 3.827424740293705e-07, + "loss": 0.2619, + "step": 6035, + "success_rate.epoch.env.abd": 0.983739837398374, + "success_rate.epoch.env.agentgym:alfworld": 0.9178082191780822, + "success_rate.epoch.env.agentgym:sciworld": 0.9506172839506173, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9398496240601504, + "success_rate.epoch.env.logic": 0.9037974683544304, + "success_rate.epoch.env.math": 0.9710144927536232, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.7978927203065134, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8665787913477563, + "success_rate.epoch.global": 0.8851907255048617, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 0.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8333333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9989583333333333, + "tokens_p.mean_below_band": 2.066371962428093e-09, + "tokens_p.mean_in_band": 0.859375, + "tokens_rate.above_band": 0.9911054637865311, + "tokens_rate.below_band": 0.0012706480304955528, + "tokens_rate.in_band": 0.007623888182973317 + }, + { + "epoch": 1.2867490413293567, + "grad_norm": 69.87515097013174, + "learning_rate": 3.827143744258551e-07, + "loss": 0.2037, + "step": 6040, + "success_rate.epoch.env.abd": 0.9838709677419355, + "success_rate.epoch.env.agentgym:alfworld": 0.9178082191780822, + "success_rate.epoch.env.agentgym:sciworld": 0.9506172839506173, + "success_rate.epoch.env.agentgym:textcraft": 0.9523809523809523, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9398496240601504, + "success_rate.epoch.env.logic": 0.9045226130653267, + "success_rate.epoch.env.math": 0.9710906701708278, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.7984718242597899, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8669326557846556, + "success_rate.epoch.global": 0.8856184798807749, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 1.0008765778401123, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 1.287814230933106, + "grad_norm": 85.22130404435951, + "learning_rate": 3.8268625826826685e-07, + "loss": 0.2027, + "step": 6045, + "success_rate.epoch.env.abd": 0.9838709677419355, + "success_rate.epoch.env.agentgym:alfworld": 0.9178082191780822, + "success_rate.epoch.env.agentgym:sciworld": 0.9506172839506173, + "success_rate.epoch.env.agentgym:textcraft": 0.9523809523809523, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9398496240601504, + "success_rate.epoch.env.logic": 0.9025, + "success_rate.epoch.env.math": 0.9712041884816754, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.7994296577946768, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8668461774919652, + "success_rate.epoch.global": 0.8856718634001485, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8333333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9946415706051873, + "tokens_p.mean_in_band": 0.45108695652173914, + "tokens_rate.above_band": 0.9679218967921897, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03207810320781032 + }, + { + "epoch": 1.2888794205368557, + "grad_norm": 32.42391816339672, + "learning_rate": 3.8265812557709656e-07, + "loss": 0.2222, + "step": 6050, + "success_rate.epoch.env.abd": 0.9838709677419355, + "success_rate.epoch.env.agentgym:alfworld": 0.9178082191780822, + "success_rate.epoch.env.agentgym:sciworld": 0.9506172839506173, + "success_rate.epoch.env.agentgym:textcraft": 0.9523809523809523, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9398496240601504, + "success_rate.epoch.env.logic": 0.9029850746268657, + "success_rate.epoch.env.math": 0.9713541666666666, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.7992424242424242, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8668868883337475, + "success_rate.epoch.global": 0.8857248520710059, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9938271604938271, + "tokens_p.mean_in_band": 0.5559895833333334, + "tokens_rate.above_band": 0.9642857142857143, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03571428571428571 + }, + { + "epoch": 1.289944610140605, + "grad_norm": 41.161602536444484, + "learning_rate": 3.8262997637284717e-07, + "loss": 0.2037, + "step": 6055, + "success_rate.epoch.env.abd": 0.9838709677419355, + "success_rate.epoch.env.agentgym:alfworld": 0.9178082191780822, + "success_rate.epoch.env.agentgym:sciworld": 0.9506172839506173, + "success_rate.epoch.env.agentgym:textcraft": 0.9523809523809523, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9398496240601504, + "success_rate.epoch.env.logic": 0.9034653465346535, + "success_rate.epoch.env.math": 0.9714656290531777, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.7992459943449576, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8669410069152778, + "success_rate.epoch.global": 0.8857774502579219, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9952830188679245, + "tokens_p.mean_in_band": 0.4703125, + "tokens_rate.above_band": 0.8412698412698413, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.15873015873015872 + }, + { + "epoch": 1.2910097997443546, + "grad_norm": 57.31473458498728, + "learning_rate": 3.826018106760336e-07, + "loss": 0.433, + "step": 6060, + "success_rate.epoch.env.abd": 0.9838709677419355, + "success_rate.epoch.env.agentgym:alfworld": 0.9178082191780822, + "success_rate.epoch.env.agentgym:sciworld": 0.9506172839506173, + "success_rate.epoch.env.agentgym:textcraft": 0.9523809523809523, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9398496240601504, + "success_rate.epoch.env.logic": 0.9034653465346535, + "success_rate.epoch.env.math": 0.9715394566623544, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.8007483629560337, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8670842974807552, + "success_rate.epoch.global": 0.8861967694566814, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9917763157894737, + "tokens_p.mean_in_band": 0.7916666666666666, + "tokens_rate.above_band": 0.9568345323741008, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.04316546762589928 + }, + { + "epoch": 1.2920749893481038, + "grad_norm": 533.8718493512891, + "learning_rate": 3.825736285071829e-07, + "loss": 0.2421, + "step": 6065, + "success_rate.epoch.env.abd": 0.9838709677419355, + "success_rate.epoch.env.agentgym:alfworld": 0.9178082191780822, + "success_rate.epoch.env.agentgym:sciworld": 0.9506172839506173, + "success_rate.epoch.env.agentgym:textcraft": 0.9523809523809523, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9398496240601504, + "success_rate.epoch.env.logic": 0.9037037037037037, + "success_rate.epoch.env.math": 0.9716129032258064, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.8011152416356877, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8671459958818605, + "success_rate.epoch.global": 0.8862472567666423, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8571428571428571, + "success_rate.window.env_macro_mean": 0.9523809523809524, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9889705882352942, + "tokens_p.mean_below_band": 6.845220923423767e-08, + "tokens_p.mean_in_band": 0.5380859375, + "tokens_rate.above_band": 0.9714285714285714, + "tokens_rate.below_band": 0.009523809523809525, + "tokens_rate.in_band": 0.01904761904761905 + }, + { + "epoch": 1.2931401789518535, + "grad_norm": 68.82067571028459, + "learning_rate": 3.8254542988683395e-07, + "loss": 0.1398, + "step": 6070, + "success_rate.epoch.env.abd": 0.9838709677419355, + "success_rate.epoch.env.agentgym:alfworld": 0.9178082191780822, + "success_rate.epoch.env.agentgym:sciworld": 0.9512195121951219, + "success_rate.epoch.env.agentgym:textcraft": 0.9523809523809523, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9398496240601504, + "success_rate.epoch.env.logic": 0.9041769041769042, + "success_rate.epoch.env.math": 0.9717223650385605, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.8018518518518518, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.867320677767917, + "success_rate.epoch.global": 0.8866618075801749, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9955645161290323, + "tokens_p.mean_in_band": 0.8125, + "tokens_rate.above_band": 0.96875, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03125 + }, + { + "epoch": 1.294205368555603, + "grad_norm": 364.4614280474005, + "learning_rate": 3.8251721483553767e-07, + "loss": 0.4229, + "step": 6075, + "success_rate.epoch.env.abd": 0.984, + "success_rate.epoch.env.agentgym:alfworld": 0.9178082191780822, + "success_rate.epoch.env.agentgym:sciworld": 0.9512195121951219, + "success_rate.epoch.env.agentgym:textcraft": 0.9523809523809523, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9398496240601504, + "success_rate.epoch.env.logic": 0.9046454767726161, + "success_rate.epoch.env.math": 0.971830985915493, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.8025830258302583, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8674513504687457, + "success_rate.epoch.global": 0.8870733478576616, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9957386363636364, + "tokens_p.mean_in_band": 0.78125, + "tokens_rate.above_band": 0.9734513274336283, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02654867256637168 + }, + { + "epoch": 1.2952705581593524, + "grad_norm": 64.39942196648772, + "learning_rate": 3.8248898337385705e-07, + "loss": 0.2027, + "step": 6080, + "success_rate.epoch.env.abd": 0.984, + "success_rate.epoch.env.agentgym:alfworld": 0.9178082191780822, + "success_rate.epoch.env.agentgym:sciworld": 0.9512195121951219, + "success_rate.epoch.env.agentgym:textcraft": 0.9523809523809523, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9398496240601504, + "success_rate.epoch.env.logic": 0.9048780487804878, + "success_rate.epoch.env.math": 0.9720101781170484, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.8033088235294118, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8675547651877076, + "success_rate.epoch.global": 0.8874819102749638, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9920343137254902, + "tokens_p.mean_in_band": 0.859375, + "tokens_rate.above_band": 0.9807692307692307, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.019230769230769232 + }, + { + "epoch": 1.296335747763102, + "grad_norm": 398.46200063237325, + "learning_rate": 3.82460735522367e-07, + "loss": 0.2841, + "step": 6085, + "success_rate.epoch.env.abd": 0.984, + "success_rate.epoch.env.agentgym:alfworld": 0.9178082191780822, + "success_rate.epoch.env.agentgym:sciworld": 0.9518072289156626, + "success_rate.epoch.env.agentgym:textcraft": 0.9523809523809523, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9402985074626866, + "success_rate.epoch.env.logic": 0.9051094890510949, + "success_rate.epoch.env.math": 0.9720812182741116, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.8032936870997255, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8676751237441678, + "success_rate.epoch.global": 0.8875270367700072, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.96, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 1.0005232558139534, + "tokens_p.mean_in_band": 0.591796875, + "tokens_rate.above_band": 0.9962928637627433, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0037071362372567192 + }, + { + "epoch": 1.2974009373668514, + "grad_norm": 134.76241440601828, + "learning_rate": 3.824324713016543e-07, + "loss": 0.4547, + "step": 6090, + "success_rate.epoch.env.abd": 0.984, + "success_rate.epoch.env.agentgym:alfworld": 0.9054054054054054, + "success_rate.epoch.env.agentgym:sciworld": 0.9518072289156626, + "success_rate.epoch.env.agentgym:textcraft": 0.9523809523809523, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9411764705882353, + "success_rate.epoch.env.logic": 0.9051094890510949, + "success_rate.epoch.env.math": 0.9720812182741116, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.8036363636363636, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8666585624613958, + "success_rate.epoch.global": 0.8872126436781609, + "success_rate.window.env.agentgym:alfworld": 0.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.science": 0.8571428571428571, + "success_rate.window.env_macro_mean": 0.6190476190476191, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9928872497365648, + "tokens_p.mean_in_band": 0.7869001116071429, + "tokens_rate.above_band": 0.9713408393039918, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.028659160696008188 + }, + { + "epoch": 1.2984661269706008, + "grad_norm": 58.32248069672788, + "learning_rate": 3.824041907323177e-07, + "loss": 0.2071, + "step": 6095, + "success_rate.epoch.env.abd": 0.984, + "success_rate.epoch.env.agentgym:alfworld": 0.9054054054054054, + "success_rate.epoch.env.agentgym:sciworld": 0.9518072289156626, + "success_rate.epoch.env.agentgym:textcraft": 0.9523809523809523, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9416058394160584, + "success_rate.epoch.env.logic": 0.9051094890510949, + "success_rate.epoch.env.math": 0.9709962168978562, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.802536231884058, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8664989475249651, + "success_rate.epoch.global": 0.8865425912670007, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 0.8, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.7666666666666666, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9972141472868217, + "tokens_p.mean_in_band": 0.65625, + "tokens_rate.above_band": 0.9681050656660413, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03189493433395872 + }, + { + "epoch": 1.2995313165743503, + "grad_norm": 210.08980179889673, + "learning_rate": 3.8237589383496785e-07, + "loss": 0.3818, + "step": 6100, + "success_rate.epoch.env.abd": 0.984, + "success_rate.epoch.env.agentgym:alfworld": 0.9054054054054054, + "success_rate.epoch.env.agentgym:sciworld": 0.9518072289156626, + "success_rate.epoch.env.agentgym:textcraft": 0.9523809523809523, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9416058394160584, + "success_rate.epoch.env.logic": 0.9055690072639225, + "success_rate.epoch.env.math": 0.9711055276381909, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.8025247971145176, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8665496197234762, + "success_rate.epoch.global": 0.8865905848787446, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9907407407407407, + "tokens_p.mean_in_band": 0.6702008928571429, + "tokens_rate.above_band": 0.9204545454545454, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.07954545454545454 + }, + { + "epoch": 1.3005965061780997, + "grad_norm": 213.66737535411383, + "learning_rate": 3.823475806302274e-07, + "loss": 0.1966, + "step": 6105, + "success_rate.epoch.env.abd": 0.9841269841269841, + "success_rate.epoch.env.agentgym:alfworld": 0.9066666666666666, + "success_rate.epoch.env.agentgym:sciworld": 0.9518072289156626, + "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9416058394160584, + "success_rate.epoch.env.logic": 0.9055690072639225, + "success_rate.epoch.env.math": 0.9712140175219024, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.8030575539568345, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8669308919306377, + "success_rate.epoch.global": 0.8869936034115139, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9988727454909819, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 1.3016616957818492, + "grad_norm": 67.36991174330433, + "learning_rate": 3.823192511387308e-07, + "loss": 0.2368, + "step": 6110, + "success_rate.epoch.env.abd": 0.984251968503937, + "success_rate.epoch.env.agentgym:alfworld": 0.9066666666666666, + "success_rate.epoch.env.agentgym:sciworld": 0.9518072289156626, + "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9420289855072463, + "success_rate.epoch.env.logic": 0.9060240963855422, + "success_rate.epoch.env.math": 0.9712140175219024, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.8032200357781754, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8670368647861924, + "success_rate.epoch.global": 0.8870396600566572, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env_macro_mean": 0.9583333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9953286082474226, + "tokens_p.mean_in_band": 0.6263020833333334, + "tokens_rate.above_band": 0.97, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03 + }, + { + "epoch": 1.3027268853855987, + "grad_norm": 139.07522932763922, + "learning_rate": 3.8229090538112435e-07, + "loss": 0.2915, + "step": 6115, + "success_rate.epoch.env.abd": 0.984251968503937, + "success_rate.epoch.env.agentgym:alfworld": 0.9066666666666666, + "success_rate.epoch.env.agentgym:sciworld": 0.9529411764705882, + "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9420289855072463, + "success_rate.epoch.env.logic": 0.9064748201438849, + "success_rate.epoch.env.math": 0.9713574097135741, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.8035714285714286, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8672259062678461, + "success_rate.epoch.global": 0.8874382498235709, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9970067049808429, + "tokens_p.mean_in_band": 0.78125, + "tokens_rate.above_band": 0.9961832061068703, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.003816793893129771 + }, + { + "epoch": 1.3037920749893481, + "grad_norm": 10.219117398705716, + "learning_rate": 3.822625433780662e-07, + "loss": 0.1552, + "step": 6120, + "success_rate.epoch.env.abd": 0.984251968503937, + "success_rate.epoch.env.agentgym:alfworld": 0.9066666666666666, + "success_rate.epoch.env.agentgym:sciworld": 0.9534883720930233, + "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9424460431654677, + "success_rate.epoch.env.logic": 0.9069212410501193, + "success_rate.epoch.env.math": 0.9714640198511166, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.8040961709706145, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8674115450609026, + "success_rate.epoch.global": 0.8878340365682138, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9985687022900763, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 1.3048572645930976, + "grad_norm": 376.2719700624019, + "learning_rate": 3.822341651502265e-07, + "loss": 0.2478, + "step": 6125, + "success_rate.epoch.env.abd": 0.984375, + "success_rate.epoch.env.agentgym:alfworld": 0.9066666666666666, + "success_rate.epoch.env.agentgym:sciworld": 0.9534883720930233, + "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9424460431654677, + "success_rate.epoch.env.logic": 0.9073634204275535, + "success_rate.epoch.env.math": 0.971604938271605, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.8046181172291297, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.867523188292948, + "success_rate.epoch.global": 0.8882270497547302, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9970238095238095, + "tokens_p.mean_in_band": 0.8671875, + "tokens_rate.above_band": 0.9932432432432432, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.006756756756756757 + }, + { + "epoch": 1.305922454196847, + "grad_norm": 166.41085091691906, + "learning_rate": 3.8220577071828694e-07, + "loss": 0.2998, + "step": 6130, + "success_rate.epoch.env.abd": 0.9844961240310077, + "success_rate.epoch.env.agentgym:alfworld": 0.9066666666666666, + "success_rate.epoch.env.agentgym:sciworld": 0.9540229885057471, + "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9424460431654677, + "success_rate.epoch.env.logic": 0.9075829383886256, + "success_rate.epoch.env.math": 0.9716748768472906, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.8054818744473917, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8676876386746528, + "success_rate.epoch.global": 0.8886173184357542, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9967905405405405, + "tokens_p.mean_in_band": 0.74658203125, + "tokens_rate.above_band": 0.9585492227979274, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.04145077720207254 + }, + { + "epoch": 1.3069876438005965, + "grad_norm": 56.01492079179995, + "learning_rate": 3.821773601029413e-07, + "loss": 0.3176, + "step": 6135, + "success_rate.epoch.env.abd": 0.9846153846153847, + "success_rate.epoch.env.agentgym:alfworld": 0.9066666666666666, + "success_rate.epoch.env.agentgym:sciworld": 0.9540229885057471, + "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9428571428571428, + "success_rate.epoch.env.logic": 0.9080188679245284, + "success_rate.epoch.env.math": 0.9717791411042945, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.8059964726631393, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8678317434278079, + "success_rate.epoch.global": 0.8890048712595685, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9971751412429378, + "tokens_p.mean_in_band": 0.8035714285714286, + "tokens_rate.above_band": 0.9619565217391305, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03804347826086957 + }, + { + "epoch": 1.308052833404346, + "grad_norm": 121.03762223482265, + "learning_rate": 3.82148933324895e-07, + "loss": 0.3884, + "step": 6140, + "success_rate.epoch.env.abd": 0.9847328244274809, + "success_rate.epoch.env.agentgym:alfworld": 0.9066666666666666, + "success_rate.epoch.env.agentgym:sciworld": 0.9540229885057471, + "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9436619718309859, + "success_rate.epoch.env.logic": 0.908235294117647, + "success_rate.epoch.env.math": 0.9706601466992665, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.8056288478452067, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8678001139510895, + "success_rate.epoch.global": 0.8886962552011096, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.6666666666666666, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.8666666666666666, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9957749695493301, + "tokens_p.mean_in_band": 0.5101776123046875, + "tokens_rate.above_band": 0.9903498190591074, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.009650180940892641 + }, + { + "epoch": 1.3091180230080954, + "grad_norm": 52.06903795901144, + "learning_rate": 3.8212049040486525e-07, + "loss": 0.1936, + "step": 6145, + "success_rate.epoch.env.abd": 0.9847328244274809, + "success_rate.epoch.env.agentgym:alfworld": 0.9066666666666666, + "success_rate.epoch.env.agentgym:sciworld": 0.9540229885057471, + "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9440559440559441, + "success_rate.epoch.env.logic": 0.9088785046728972, + "success_rate.epoch.env.math": 0.9708029197080292, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.8059701492537313, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8679384100599528, + "success_rate.epoch.global": 0.8890808569454043, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9974926189649184, + "tokens_p.mean_in_band": 0.79296875, + "tokens_rate.above_band": 0.9996527777777777, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.00034722222222222224 + }, + { + "epoch": 1.310183212611845, + "grad_norm": 83.59712480675947, + "learning_rate": 3.8209203136358107e-07, + "loss": 0.2384, + "step": 6150, + "success_rate.epoch.env.abd": 0.9847328244274809, + "success_rate.epoch.env.agentgym:alfworld": 0.9066666666666666, + "success_rate.epoch.env.agentgym:sciworld": 0.9540229885057471, + "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9440559440559441, + "success_rate.epoch.env.logic": 0.9093023255813953, + "success_rate.epoch.env.math": 0.9709090909090909, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.8057742782152231, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8679687847027757, + "success_rate.epoch.global": 0.8891184573002755, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9375, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9940476190476191, + "tokens_p.mean_in_band": 0.5809151785714286, + "tokens_rate.above_band": 0.9473684210526315, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.05263157894736842 + }, + { + "epoch": 1.3112484022155944, + "grad_norm": 153.94520943657272, + "learning_rate": 3.8206355622178314e-07, + "loss": 0.2335, + "step": 6155, + "success_rate.epoch.env.abd": 0.9847328244274809, + "success_rate.epoch.env.agentgym:alfworld": 0.9078947368421053, + "success_rate.epoch.env.agentgym:sciworld": 0.9540229885057471, + "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9440559440559441, + "success_rate.epoch.env.logic": 0.9076212471131639, + "success_rate.epoch.env.math": 0.9710144927536232, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.806282722513089, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.867983406325467, + "success_rate.epoch.global": 0.8891557995881949, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 0.6666666666666666, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9965625, + "tokens_p.mean_in_band": 0.7165178571428571, + "tokens_rate.above_band": 0.9861932938856016, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.013806706114398421 + }, + { + "epoch": 1.3123135918193438, + "grad_norm": 380.4709944951116, + "learning_rate": 3.8203506500022403e-07, + "loss": 0.3757, + "step": 6160, + "success_rate.epoch.env.abd": 0.9847328244274809, + "success_rate.epoch.env.agentgym:alfworld": 0.9078947368421053, + "success_rate.epoch.env.agentgym:sciworld": 0.9545454545454546, + "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9440559440559441, + "success_rate.epoch.env.logic": 0.9076212471131639, + "success_rate.epoch.env.math": 0.9711191335740073, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.8064236111111112, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8680532240943865, + "success_rate.epoch.global": 0.8891928864569083, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env_macro_mean": 0.9444444444444445, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.995697463768116, + "tokens_p.mean_in_band": 0.6383928571428571, + "tokens_rate.above_band": 0.9517241379310345, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.04827586206896552 + }, + { + "epoch": 1.3133787814230933, + "grad_norm": 312.2317876408597, + "learning_rate": 3.8200655771966785e-07, + "loss": 0.3792, + "step": 6165, + "success_rate.epoch.env.abd": 0.9847328244274809, + "success_rate.epoch.env.agentgym:alfworld": 0.9102564102564102, + "success_rate.epoch.env.agentgym:sciworld": 0.9545454545454546, + "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9440559440559441, + "success_rate.epoch.env.logic": 0.908256880733945, + "success_rate.epoch.env.math": 0.9700239808153477, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.8058925476603119, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8681778687148981, + "success_rate.epoch.global": 0.8888888888888888, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.6666666666666666, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.7916666666666666, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9995321856287425, + "tokens_p.mean_in_band": 0.6371527777777778, + "tokens_rate.above_band": 0.9867060561299852, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.013293943870014771 + }, + { + "epoch": 1.3144439710268427, + "grad_norm": 42.039312474474656, + "learning_rate": 3.8197803440089063e-07, + "loss": 0.3141, + "step": 6170, + "success_rate.epoch.env.abd": 0.9847328244274809, + "success_rate.epoch.env.agentgym:alfworld": 0.9102564102564102, + "success_rate.epoch.env.agentgym:sciworld": 0.9550561797752809, + "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9440559440559441, + "success_rate.epoch.env.logic": 0.908675799086758, + "success_rate.epoch.env.math": 0.9702026221692491, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.8053633217993079, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8682305104490379, + "success_rate.epoch.global": 0.8889266304347826, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9993573264781491, + "tokens_p.mean_in_band": 0.4270833333333333, + "tokens_rate.above_band": 0.9848101265822785, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.015189873417721518 + }, + { + "epoch": 1.3155091606305922, + "grad_norm": 85.99984471279807, + "learning_rate": 3.8194949506467983e-07, + "loss": 0.2177, + "step": 6175, + "success_rate.epoch.env.abd": 0.9847328244274809, + "success_rate.epoch.env.agentgym:alfworld": 0.9102564102564102, + "success_rate.epoch.env.agentgym:sciworld": 0.9550561797752809, + "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9440559440559441, + "success_rate.epoch.env.logic": 0.909297052154195, + "success_rate.epoch.env.math": 0.9703087885985748, + "success_rate.epoch.env.sat": 0.1111111111111111, + "success_rate.epoch.env.science": 0.8048359240069085, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8676545171755783, + "success_rate.epoch.global": 0.8886255924170616, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.7, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9979707792207793, + "tokens_p.mean_in_band": 0.6045386904761905, + "tokens_rate.above_band": 0.88, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.12 + }, + { + "epoch": 1.3165743502343417, + "grad_norm": 115.68674587278218, + "learning_rate": 3.819209397318347e-07, + "loss": 0.2165, + "step": 6180, + "success_rate.epoch.env.abd": 0.9848484848484849, + "success_rate.epoch.env.agentgym:alfworld": 0.9102564102564102, + "success_rate.epoch.env.agentgym:sciworld": 0.9555555555555556, + "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9440559440559441, + "success_rate.epoch.env.logic": 0.909706546275395, + "success_rate.epoch.env.math": 0.9703791469194313, + "success_rate.epoch.env.sat": 0.1111111111111111, + "success_rate.epoch.env.science": 0.8046471600688468, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8677368921487848, + "success_rate.epoch.global": 0.888663967611336, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9973341232227488, + "tokens_p.mean_in_band": 0.509765625, + "tokens_rate.above_band": 0.9547511312217195, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.04524886877828054 + }, + { + "epoch": 1.3176395398380911, + "grad_norm": 200.31168383725677, + "learning_rate": 3.818923684231663e-07, + "loss": 0.1614, + "step": 6185, + "success_rate.epoch.env.abd": 0.9848484848484849, + "success_rate.epoch.env.agentgym:alfworld": 0.9102564102564102, + "success_rate.epoch.env.agentgym:sciworld": 0.9555555555555556, + "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9444444444444444, + "success_rate.epoch.env.logic": 0.9101123595505618, + "success_rate.epoch.env.math": 0.9705535924617197, + "success_rate.epoch.env.sat": 0.1111111111111111, + "success_rate.epoch.env.science": 0.8041237113402062, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8677773749194497, + "success_rate.epoch.global": 0.8887020847343645, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.996331828442438, + "tokens_p.mean_in_band": 0.6244419642857143, + "tokens_rate.above_band": 0.9693654266958425, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.030634573304157548 + }, + { + "epoch": 1.3187047294418406, + "grad_norm": 448.88518888445986, + "learning_rate": 3.818637811594971e-07, + "loss": 0.4243, + "step": 6190, + "success_rate.epoch.env.abd": 0.9848484848484849, + "success_rate.epoch.env.agentgym:alfworld": 0.8987341772151899, + "success_rate.epoch.env.agentgym:sciworld": 0.9560439560439561, + "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9444444444444444, + "success_rate.epoch.env.logic": 0.9101123595505618, + "success_rate.epoch.env.math": 0.9706227967097533, + "success_rate.epoch.env.sat": 0.1111111111111111, + "success_rate.epoch.env.science": 0.8042735042735043, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8667942080674962, + "success_rate.epoch.global": 0.8884048257372654, + "success_rate.window.env.agentgym:alfworld": 0.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env_macro_mean": 0.7083333333333334, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9978932584269663, + "tokens_p.mean_in_band": 0.5501327514648438, + "tokens_rate.above_band": 0.9910913140311804, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.008908685968819599 + }, + { + "epoch": 1.31976991904559, + "grad_norm": 135.21994319526834, + "learning_rate": 3.818351779616613e-07, + "loss": 0.1889, + "step": 6195, + "success_rate.epoch.env.abd": 0.9848484848484849, + "success_rate.epoch.env.agentgym:alfworld": 0.8987341772151899, + "success_rate.epoch.env.agentgym:sciworld": 0.9560439560439561, + "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9448275862068966, + "success_rate.epoch.env.logic": 0.9105145413870246, + "success_rate.epoch.env.math": 0.9707259953161592, + "success_rate.epoch.env.sat": 0.1111111111111111, + "success_rate.epoch.env.science": 0.8040885860306644, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8668581720640854, + "success_rate.epoch.global": 0.8884435537742151, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9375, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9973695286195287, + "tokens_p.mean_in_band": 0.4270833333333333, + "tokens_rate.above_band": 0.9801980198019802, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.019801980198019802 + }, + { + "epoch": 1.3208351086493395, + "grad_norm": 114.61613603186188, + "learning_rate": 3.818065588505047e-07, + "loss": 0.2364, + "step": 6200, + "success_rate.epoch.env.abd": 0.9848484848484849, + "success_rate.epoch.env.agentgym:alfworld": 0.9, + "success_rate.epoch.env.agentgym:sciworld": 0.9560439560439561, + "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9448275862068966, + "success_rate.epoch.env.logic": 0.9107142857142857, + "success_rate.epoch.env.math": 0.970828471411902, + "success_rate.epoch.env.sat": 0.10526315789473684, + "success_rate.epoch.env.science": 0.8045879354290569, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8665144847358884, + "success_rate.epoch.global": 0.8884820239680427, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 0.8333333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9992481203007518, + "tokens_p.mean_in_band": 0.6166666666666667, + "tokens_rate.above_band": 0.9779411764705882, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.022058823529411766 + }, + { + "epoch": 1.321900298253089, + "grad_norm": 174.49326375883442, + "learning_rate": 3.817779238468847e-07, + "loss": 0.2573, + "step": 6205, + "success_rate.epoch.env.abd": 0.9849624060150376, + "success_rate.epoch.env.agentgym:alfworld": 0.9, + "success_rate.epoch.env.agentgym:sciworld": 0.9565217391304348, + "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9448275862068966, + "success_rate.epoch.env.logic": 0.9107142857142857, + "success_rate.epoch.env.math": 0.9709302325581395, + "success_rate.epoch.env.sat": 0.10526315789473684, + "success_rate.epoch.env.science": 0.8037225042301185, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8664988514813732, + "success_rate.epoch.global": 0.8881884538818845, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6, + "success_rate.window.env_macro_mean": 0.9, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9970154494382022, + "tokens_p.mean_in_band": 0.5562855113636364, + "tokens_rate.above_band": 0.9798165137614679, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02018348623853211 + }, + { + "epoch": 1.3229654878568384, + "grad_norm": 95.52428811038516, + "learning_rate": 3.817492729716704e-07, + "loss": 0.3759, + "step": 6210, + "success_rate.epoch.env.abd": 0.9850746268656716, + "success_rate.epoch.env.agentgym:alfworld": 0.9, + "success_rate.epoch.env.agentgym:sciworld": 0.9574468085106383, + "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9448275862068966, + "success_rate.epoch.env.logic": 0.9086859688195991, + "success_rate.epoch.env.math": 0.9710312862108922, + "success_rate.epoch.env.sat": 0.10526315789473684, + "success_rate.epoch.env.science": 0.8042194092827004, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8664631180305989, + "success_rate.epoch.global": 0.8882275132275133, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9995213347921226, + "tokens_p.mean_in_band": 0.51484375, + "tokens_rate.above_band": 0.9682203389830508, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03177966101694915 + }, + { + "epoch": 1.3240306774605881, + "grad_norm": 138.64735888129488, + "learning_rate": 3.817206062457422e-07, + "loss": 0.2409, + "step": 6215, + "success_rate.epoch.env.abd": 0.9850746268656716, + "success_rate.epoch.env.agentgym:alfworld": 0.9, + "success_rate.epoch.env.agentgym:sciworld": 0.9574468085106383, + "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9448275862068966, + "success_rate.epoch.env.logic": 0.9090909090909091, + "success_rate.epoch.env.math": 0.9710982658959537, + "success_rate.epoch.env.sat": 0.10526315789473684, + "success_rate.epoch.env.science": 0.8043660789252729, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8665193534486849, + "success_rate.epoch.global": 0.8882663150955834, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env_macro_mean": 0.9444444444444445, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9949048913043478, + "tokens_p.mean_in_band": 0.19791666666666666, + "tokens_rate.above_band": 0.9387755102040817, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.061224489795918366 + }, + { + "epoch": 1.3250958670643374, + "grad_norm": 113.2946221991507, + "learning_rate": 3.816919236899922e-07, + "loss": 0.3553, + "step": 6220, + "success_rate.epoch.env.abd": 0.9851851851851852, + "success_rate.epoch.env.agentgym:alfworld": 0.9, + "success_rate.epoch.env.agentgym:sciworld": 0.9578947368421052, + "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9448275862068966, + "success_rate.epoch.env.logic": 0.9092920353982301, + "success_rate.epoch.env.math": 0.9710982658959537, + "success_rate.epoch.env.sat": 0.10526315789473684, + "success_rate.epoch.env.science": 0.8046744574290484, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8666164435816008, + "success_rate.epoch.global": 0.8883048620236531, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.science": 0.8571428571428571, + "success_rate.window.env_macro_mean": 0.9642857142857143, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9955110497237569, + "tokens_p.mean_in_band": 0.5765625, + "tokens_rate.above_band": 0.9731182795698925, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.026881720430107527 + }, + { + "epoch": 1.326161056668087, + "grad_norm": 36.61378744028113, + "learning_rate": 3.8166322532532417e-07, + "loss": 0.3132, + "step": 6225, + "success_rate.epoch.env.abd": 0.9851851851851852, + "success_rate.epoch.env.agentgym:alfworld": 0.9, + "success_rate.epoch.env.agentgym:sciworld": 0.9578947368421052, + "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9448275862068966, + "success_rate.epoch.env.logic": 0.9100877192982456, + "success_rate.epoch.env.math": 0.9711981566820277, + "success_rate.epoch.env.sat": 0.10526315789473684, + "success_rate.epoch.env.science": 0.8051623646960866, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8667422146682487, + "success_rate.epoch.global": 0.8886705959397512, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9958609271523179, + "tokens_p.mean_in_band": 0.87109375, + "tokens_rate.above_band": 0.9741935483870968, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.025806451612903226 + }, + { + "epoch": 1.3272262462718363, + "grad_norm": 307.7916649063872, + "learning_rate": 3.816345111726532e-07, + "loss": 0.229, + "step": 6230, + "success_rate.epoch.env.abd": 0.9851851851851852, + "success_rate.epoch.env.agentgym:alfworld": 0.9, + "success_rate.epoch.env.agentgym:sciworld": 0.9578947368421052, + "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9452054794520548, + "success_rate.epoch.env.logic": 0.9104803493449781, + "success_rate.epoch.env.math": 0.9712313003452244, + "success_rate.epoch.env.sat": 0.10526315789473684, + "success_rate.epoch.env.science": 0.8061309030654515, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.866903324243199, + "success_rate.epoch.global": 0.8890339425587467, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9966603053435115, + "tokens_p.mean_in_band": 0.8984375, + "tokens_rate.above_band": 0.9980952380952381, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0019047619047619048 + }, + { + "epoch": 1.328291435875586, + "grad_norm": 100.5949741906352, + "learning_rate": 3.8160578125290586e-07, + "loss": 0.2162, + "step": 6235, + "success_rate.epoch.env.abd": 0.9851851851851852, + "success_rate.epoch.env.agentgym:alfworld": 0.9, + "success_rate.epoch.env.agentgym:sciworld": 0.9583333333333334, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9455782312925171, + "success_rate.epoch.env.logic": 0.9084967320261438, + "success_rate.epoch.env.math": 0.9712973593570609, + "success_rate.epoch.env.sat": 0.10526315789473684, + "success_rate.epoch.env.science": 0.8067712634186622, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8670406365125523, + "success_rate.epoch.global": 0.8890696161353285, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8333333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.998059866962306, + "tokens_p.mean_in_band": 0.59765625, + "tokens_rate.above_band": 0.9825708061002179, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.017429193899782137 + }, + { + "epoch": 1.3293566254793352, + "grad_norm": 48.48533929342405, + "learning_rate": 3.8157703558702046e-07, + "loss": 0.189, + "step": 6240, + "success_rate.epoch.env.abd": 0.9851851851851852, + "success_rate.epoch.env.agentgym:alfworld": 0.9, + "success_rate.epoch.env.agentgym:sciworld": 0.9587628865979382, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9455782312925171, + "success_rate.epoch.env.logic": 0.9090909090909091, + "success_rate.epoch.env.math": 0.9713631156930126, + "success_rate.epoch.env.sat": 0.10526315789473684, + "success_rate.epoch.env.science": 0.8074074074074075, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8671975120265581, + "success_rate.epoch.global": 0.8894293125810635, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.996415770609319, + "tokens_p.mean_in_band": 0.87890625, + "tokens_rate.above_band": 0.998211091234347, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0017889087656529517 + }, + { + "epoch": 1.3304218150830849, + "grad_norm": 95.20059290147496, + "learning_rate": 3.8154827419594663e-07, + "loss": 0.1553, + "step": 6245, + "success_rate.epoch.env.abd": 0.9851851851851852, + "success_rate.epoch.env.agentgym:alfworld": 0.9, + "success_rate.epoch.env.agentgym:sciworld": 0.9587628865979382, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9459459459459459, + "success_rate.epoch.env.logic": 0.9096774193548387, + "success_rate.epoch.env.math": 0.9714285714285714, + "success_rate.epoch.env.sat": 0.10526315789473684, + "success_rate.epoch.env.science": 0.8080393765381461, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8673476620068906, + "success_rate.epoch.global": 0.8897866839043309, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9964574898785425, + "tokens_p.mean_in_band": 0.716796875, + "tokens_rate.above_band": 0.9973082099596231, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0026917900403768506 + }, + { + "epoch": 1.3314870046868341, + "grad_norm": 160.47813336048813, + "learning_rate": 3.815194971006454e-07, + "loss": 0.2896, + "step": 6250, + "success_rate.epoch.env.abd": 0.9854014598540146, + "success_rate.epoch.env.agentgym:alfworld": 0.9, + "success_rate.epoch.env.agentgym:sciworld": 0.9591836734693877, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9463087248322147, + "success_rate.epoch.env.logic": 0.9098712446351931, + "success_rate.epoch.env.math": 0.9714611872146118, + "success_rate.epoch.env.sat": 0.10526315789473684, + "success_rate.epoch.env.science": 0.8086672117743254, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8675162180731746, + "success_rate.epoch.global": 0.8901417525773195, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9995567375886525, + "tokens_p.mean_in_band": 0.7443181818181818, + "tokens_rate.above_band": 0.9871645274212368, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.012835472578763127 + }, + { + "epoch": 1.3325521942905838, + "grad_norm": 108.29357907157748, + "learning_rate": 3.814907043220893e-07, + "loss": 0.3148, + "step": 6255, + "success_rate.epoch.env.abd": 0.9854014598540146, + "success_rate.epoch.env.agentgym:alfworld": 0.9, + "success_rate.epoch.env.agentgym:sciworld": 0.9595959595959596, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9466666666666667, + "success_rate.epoch.env.logic": 0.9100642398286938, + "success_rate.epoch.env.math": 0.9704209328782708, + "success_rate.epoch.env.sat": 0.1, + "success_rate.epoch.env.science": 0.8083197389885808, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8669991579038747, + "success_rate.epoch.global": 0.8895311496467566, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.6666666666666666, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.7222222222222222, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9960339168490153, + "tokens_p.mean_in_band": 0.5755208333333334, + "tokens_rate.above_band": 0.9326530612244898, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0673469387755102 + }, + { + "epoch": 1.3336173838943333, + "grad_norm": 164.84188773736184, + "learning_rate": 3.814618958812623e-07, + "loss": 0.2622, + "step": 6260, + "success_rate.epoch.env.abd": 0.9854014598540146, + "success_rate.epoch.env.agentgym:alfworld": 0.9, + "success_rate.epoch.env.agentgym:sciworld": 0.96, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9466666666666667, + "success_rate.epoch.env.logic": 0.9102564102564102, + "success_rate.epoch.env.math": 0.970554926387316, + "success_rate.epoch.env.sat": 0.1, + "success_rate.epoch.env.science": 0.8087876322213181, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8671080758651055, + "success_rate.epoch.global": 0.8898847631241997, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9985576923076923, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 1.3346825734980827, + "grad_norm": 76.10561458682768, + "learning_rate": 3.8143307179915983e-07, + "loss": 0.2313, + "step": 6265, + "success_rate.epoch.env.abd": 0.9854014598540146, + "success_rate.epoch.env.agentgym:alfworld": 0.9, + "success_rate.epoch.env.agentgym:sciworld": 0.96, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9466666666666667, + "success_rate.epoch.env.logic": 0.9106382978723404, + "success_rate.epoch.env.math": 0.9707207207207207, + "success_rate.epoch.env.sat": 0.1, + "success_rate.epoch.env.science": 0.8084415584415584, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8671264038805215, + "success_rate.epoch.global": 0.8899170389278876, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.8888888888888888, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9979967948717948, + "tokens_p.mean_in_band": 0.53515625, + "tokens_rate.above_band": 0.9811320754716981, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.018867924528301886 + }, + { + "epoch": 1.3357477631018322, + "grad_norm": 79.28571324802336, + "learning_rate": 3.8140423209678867e-07, + "loss": 0.312, + "step": 6270, + "success_rate.epoch.env.abd": 0.9855072463768116, + "success_rate.epoch.env.agentgym:alfworld": 0.9012345679012346, + "success_rate.epoch.env.agentgym:sciworld": 0.9603960396039604, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9466666666666667, + "success_rate.epoch.env.logic": 0.9110169491525424, + "success_rate.epoch.env.math": 0.9707536557930259, + "success_rate.epoch.env.sat": 0.1, + "success_rate.epoch.env.science": 0.8090614886731392, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8673780321179833, + "success_rate.epoch.global": 0.8902671755725191, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9975369458128078, + "tokens_p.mean_in_band": 0.388671875, + "tokens_rate.above_band": 0.9983606557377049, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.001639344262295082 + }, + { + "epoch": 1.3368129527055816, + "grad_norm": 55.050706905604095, + "learning_rate": 3.81375376795167e-07, + "loss": 0.2543, + "step": 6275, + "success_rate.epoch.env.abd": 0.9856115107913669, + "success_rate.epoch.env.agentgym:alfworld": 0.9012345679012346, + "success_rate.epoch.env.agentgym:sciworld": 0.9603960396039604, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9466666666666667, + "success_rate.epoch.env.logic": 0.9092827004219409, + "success_rate.epoch.env.math": 0.9708520179372198, + "success_rate.epoch.env.sat": 0.1, + "success_rate.epoch.env.science": 0.8096774193548387, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8672947874370602, + "success_rate.epoch.global": 0.8902980342422321, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 1.0011363636363637, + "tokens_p.mean_in_band": 0.49075520833333336, + "tokens_rate.above_band": 0.952755905511811, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.047244094488188976 + }, + { + "epoch": 1.337878142309331, + "grad_norm": 144.94517013224367, + "learning_rate": 3.813465059153243e-07, + "loss": 0.2111, + "step": 6280, + "success_rate.epoch.env.abd": 0.9857142857142858, + "success_rate.epoch.env.agentgym:alfworld": 0.9012345679012346, + "success_rate.epoch.env.agentgym:sciworld": 0.9603960396039604, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9470198675496688, + "success_rate.epoch.env.logic": 0.9094736842105263, + "success_rate.epoch.env.math": 0.9709497206703911, + "success_rate.epoch.env.sat": 0.1, + "success_rate.epoch.env.science": 0.8102893890675241, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8674181176225477, + "success_rate.epoch.global": 0.8906447534766119, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9977106227106227, + "tokens_p.mean_in_band": 0.8203125, + "tokens_rate.above_band": 0.9927272727272727, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.007272727272727273 + }, + { + "epoch": 1.3389433319130806, + "grad_norm": 91.05019917995585, + "learning_rate": 3.813176194783015e-07, + "loss": 0.5581, + "step": 6285, + "success_rate.epoch.env.abd": 0.9857142857142858, + "success_rate.epoch.env.agentgym:alfworld": 0.9012345679012346, + "success_rate.epoch.env.agentgym:sciworld": 0.9603960396039604, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 0.9473684210526315, + "success_rate.epoch.env.ded": 0.9470198675496688, + "success_rate.epoch.env.logic": 0.9100418410041841, + "success_rate.epoch.env.math": 0.9710144927536232, + "success_rate.epoch.env.sat": 0.1, + "success_rate.epoch.env.science": 0.8100961538461539, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8626734007778343, + "success_rate.epoch.global": 0.8903591682419659, + "success_rate.window.env.babyai": 0.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.6875, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9917763157894737, + "tokens_p.mean_below_band": 1.8189894035458565e-09, + "tokens_p.mean_in_band": 0.8000710227272727, + "tokens_rate.above_band": 0.9661016949152542, + "tokens_rate.below_band": 0.002824858757062147, + "tokens_rate.in_band": 0.031073446327683617 + }, + { + "epoch": 1.34000852151683, + "grad_norm": 202.13569091379333, + "learning_rate": 3.8128871750515076e-07, + "loss": 0.2848, + "step": 6290, + "success_rate.epoch.env.abd": 0.9857142857142858, + "success_rate.epoch.env.agentgym:alfworld": 0.9012345679012346, + "success_rate.epoch.env.agentgym:sciworld": 0.9603960396039604, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 0.9473684210526315, + "success_rate.epoch.env.ded": 0.9470198675496688, + "success_rate.epoch.env.logic": 0.9104166666666667, + "success_rate.epoch.env.math": 0.9711751662971175, + "success_rate.epoch.env.sat": 0.1, + "success_rate.epoch.env.science": 0.8105515587529976, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8627634829699088, + "success_rate.epoch.global": 0.8907035175879398, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9944852941176471, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 1.3410737111205795, + "grad_norm": 92.92925693652778, + "learning_rate": 3.8125980001693577e-07, + "loss": 0.3242, + "step": 6295, + "success_rate.epoch.env.abd": 0.9857142857142858, + "success_rate.epoch.env.agentgym:alfworld": 0.891566265060241, + "success_rate.epoch.env.agentgym:sciworld": 0.9603960396039604, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 0.9473684210526315, + "success_rate.epoch.env.ded": 0.9470198675496688, + "success_rate.epoch.env.logic": 0.9107883817427386, + "success_rate.epoch.env.math": 0.9712070874861573, + "success_rate.epoch.env.sat": 0.1, + "success_rate.epoch.env.science": 0.8105095541401274, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8619174219527497, + "success_rate.epoch.global": 0.8904195366311835, + "success_rate.window.env.agentgym:alfworld": 0.5, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.825, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9983054226475279, + "tokens_p.mean_in_band": 0.6090494791666666, + "tokens_rate.above_band": 0.990521327014218, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.009478672985781991 + }, + { + "epoch": 1.342138900724329, + "grad_norm": 196.34498670796538, + "learning_rate": 3.8123086703473126e-07, + "loss": 0.1675, + "step": 6300, + "success_rate.epoch.env.abd": 0.9858156028368794, + "success_rate.epoch.env.agentgym:alfworld": 0.8928571428571429, + "success_rate.epoch.env.agentgym:sciworld": 0.9603960396039604, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 0.9473684210526315, + "success_rate.epoch.env.ded": 0.9473684210526315, + "success_rate.epoch.env.logic": 0.9109730848861284, + "success_rate.epoch.env.math": 0.9712707182320443, + "success_rate.epoch.env.sat": 0.1, + "success_rate.epoch.env.science": 0.8111111111111111, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8621529346148148, + "success_rate.epoch.global": 0.8907615480649188, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9980673862310385, + "tokens_p.mean_in_band": 0.76953125, + "tokens_rate.above_band": 0.9953542392566783, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.004645760743321719 + }, + { + "epoch": 1.3432040903280784, + "grad_norm": 99.48808804546846, + "learning_rate": 3.8120191857962345e-07, + "loss": 0.3759, + "step": 6305, + "success_rate.epoch.env.abd": 0.9859154929577465, + "success_rate.epoch.env.agentgym:alfworld": 0.8941176470588236, + "success_rate.epoch.env.agentgym:sciworld": 0.9611650485436893, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 0.9473684210526315, + "success_rate.epoch.env.ded": 0.9473684210526315, + "success_rate.epoch.env.logic": 0.9111570247933884, + "success_rate.epoch.env.math": 0.9713340683572216, + "success_rate.epoch.env.sat": 0.1, + "success_rate.epoch.env.science": 0.8115597783056215, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8624097855683808, + "success_rate.epoch.global": 0.8911014312383323, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9976635514018691, + "tokens_p.mean_in_band": 0.53125, + "tokens_rate.above_band": 0.9981343283582089, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0018656716417910447 + }, + { + "epoch": 1.3442692799318279, + "grad_norm": 164.3265206634844, + "learning_rate": 3.811729546727097e-07, + "loss": 0.1862, + "step": 6310, + "success_rate.epoch.env.abd": 0.9861111111111112, + "success_rate.epoch.env.agentgym:alfworld": 0.8941176470588236, + "success_rate.epoch.env.agentgym:sciworld": 0.9611650485436893, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 0.9473684210526315, + "success_rate.epoch.env.ded": 0.9477124183006536, + "success_rate.epoch.env.logic": 0.911522633744856, + "success_rate.epoch.env.math": 0.9713656387665198, + "success_rate.epoch.env.sat": 0.1, + "success_rate.epoch.env.science": 0.8113654301499605, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8624772807144254, + "success_rate.epoch.global": 0.8911290322580645, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9968944099378882, + "tokens_p.mean_in_band": 0.5966796875, + "tokens_rate.above_band": 0.9877300613496932, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.012269938650306749 + }, + { + "epoch": 1.3453344695355773, + "grad_norm": 60.41245540209164, + "learning_rate": 3.811439753350988e-07, + "loss": 0.1835, + "step": 6315, + "success_rate.epoch.env.abd": 0.9861111111111112, + "success_rate.epoch.env.agentgym:alfworld": 0.8941176470588236, + "success_rate.epoch.env.agentgym:sciworld": 0.9611650485436893, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 0.9473684210526315, + "success_rate.epoch.env.ded": 0.9483870967741935, + "success_rate.epoch.env.logic": 0.9117043121149897, + "success_rate.epoch.env.math": 0.9714912280701754, + "success_rate.epoch.env.sat": 0.1, + "success_rate.epoch.env.science": 0.8110236220472441, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8625354750821175, + "success_rate.epoch.global": 0.891156462585034, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.99822695035461, + "tokens_p.mean_in_band": 0.4673549107142857, + "tokens_rate.above_band": 0.9917936694021102, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.008206330597889801 + }, + { + "epoch": 1.3463996591393268, + "grad_norm": 172.30674427863735, + "learning_rate": 3.8111498058791055e-07, + "loss": 0.3184, + "step": 6320, + "success_rate.epoch.env.abd": 0.9861111111111112, + "success_rate.epoch.env.agentgym:alfworld": 0.8850574712643678, + "success_rate.epoch.env.agentgym:sciworld": 0.9615384615384616, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 0.9473684210526315, + "success_rate.epoch.env.ded": 0.9487179487179487, + "success_rate.epoch.env.logic": 0.9117043121149897, + "success_rate.epoch.env.math": 0.9715536105032823, + "success_rate.epoch.env.sat": 0.1, + "success_rate.epoch.env.science": 0.8116169544740973, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8618354572643022, + "success_rate.epoch.global": 0.8911837237977805, + "success_rate.window.env.agentgym:alfworld": 0.5, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9993402111324377, + "tokens_p.mean_in_band": 0.7966974431818182, + "tokens_rate.above_band": 0.9895536562203229, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.010446343779677113 + }, + { + "epoch": 1.3474648487430763, + "grad_norm": 209.80856495220658, + "learning_rate": 3.810859704522762e-07, + "loss": 0.2412, + "step": 6325, + "success_rate.epoch.env.abd": 0.9861111111111112, + "success_rate.epoch.env.agentgym:alfworld": 0.8850574712643678, + "success_rate.epoch.env.agentgym:sciworld": 0.9619047619047619, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 0.9473684210526315, + "success_rate.epoch.env.ded": 0.9487179487179487, + "success_rate.epoch.env.logic": 0.9122448979591836, + "success_rate.epoch.env.math": 0.9716466739367503, + "success_rate.epoch.env.sat": 0.1, + "success_rate.epoch.env.science": 0.8120595144870791, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8619665945058426, + "success_rate.epoch.global": 0.8915181315304241, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9945833333333334, + "tokens_p.mean_in_band": 0.80078125, + "tokens_rate.above_band": 0.9868421052631579, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.013157894736842105 + }, + { + "epoch": 1.3485300383468257, + "grad_norm": 367.3057731166485, + "learning_rate": 3.810569449493381e-07, + "loss": 0.4214, + "step": 6330, + "success_rate.epoch.env.abd": 0.9861111111111112, + "success_rate.epoch.env.agentgym:alfworld": 0.8850574712643678, + "success_rate.epoch.env.agentgym:sciworld": 0.9619047619047619, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 0.9473684210526315, + "success_rate.epoch.env.ded": 0.9493670886075949, + "success_rate.epoch.env.logic": 0.9127789046653144, + "success_rate.epoch.env.math": 0.9717391304347827, + "success_rate.epoch.env.sat": 0.1, + "success_rate.epoch.env.science": 0.8115715402658327, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8620381971306211, + "success_rate.epoch.global": 0.8915441176470589, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9971217105263158, + "tokens_p.mean_in_band": 0.615625, + "tokens_rate.above_band": 0.9681528662420382, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03184713375796178 + }, + { + "epoch": 1.3495952279505752, + "grad_norm": 155.3766632946781, + "learning_rate": 3.810279041002499e-07, + "loss": 0.2507, + "step": 6335, + "success_rate.epoch.env.abd": 0.9861111111111112, + "success_rate.epoch.env.agentgym:alfworld": 0.875, + "success_rate.epoch.env.agentgym:sciworld": 0.9619047619047619, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 0.9473684210526315, + "success_rate.epoch.env.ded": 0.9493670886075949, + "success_rate.epoch.env.logic": 0.9131313131313131, + "success_rate.epoch.env.math": 0.971830985915493, + "success_rate.epoch.env.sat": 0.1, + "success_rate.epoch.env.science": 0.8113795791114575, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8611468181786179, + "success_rate.epoch.global": 0.8912645082467929, + "success_rate.window.env.agentgym:alfworld": 0.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.6875, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9978632478632479, + "tokens_p.mean_in_band": 0.5912388392857143, + "tokens_rate.above_band": 0.970954356846473, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.029045643153526972 + }, + { + "epoch": 1.3506604175543246, + "grad_norm": 69.71023145249913, + "learning_rate": 3.8099884792617625e-07, + "loss": 0.2066, + "step": 6340, + "success_rate.epoch.env.abd": 0.9863945578231292, + "success_rate.epoch.env.agentgym:alfworld": 0.875, + "success_rate.epoch.env.agentgym:sciworld": 0.9622641509433962, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 0.9473684210526315, + "success_rate.epoch.env.ded": 0.9503105590062112, + "success_rate.epoch.env.logic": 0.9131313131313131, + "success_rate.epoch.env.math": 0.9718614718614719, + "success_rate.epoch.env.sat": 0.1, + "success_rate.epoch.env.science": 0.8116731517509728, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.86132048769996, + "success_rate.epoch.global": 0.8915956151035322, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9981966590736523, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 1.351725607158074, + "grad_norm": 140.59910021523186, + "learning_rate": 3.809697764482932e-07, + "loss": 0.2556, + "step": 6345, + "success_rate.epoch.env.abd": 0.9863945578231292, + "success_rate.epoch.env.agentgym:alfworld": 0.875, + "success_rate.epoch.env.agentgym:sciworld": 0.9622641509433962, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 0.9473684210526315, + "success_rate.epoch.env.ded": 0.9503105590062112, + "success_rate.epoch.env.logic": 0.9134808853118712, + "success_rate.epoch.env.math": 0.9719827586206896, + "success_rate.epoch.env.sat": 0.1, + "success_rate.epoch.env.science": 0.8122575640031032, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8614164214446788, + "success_rate.epoch.global": 0.8919247115968427, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.997564935064935, + "tokens_p.mean_in_band": 0.6625, + "tokens_rate.above_band": 0.9390243902439024, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.06097560975609756 + }, + { + "epoch": 1.3527907967618236, + "grad_norm": 37.347102505725914, + "learning_rate": 3.8094068968778766e-07, + "loss": 0.2749, + "step": 6350, + "success_rate.epoch.env.abd": 0.9863945578231292, + "success_rate.epoch.env.agentgym:alfworld": 0.875, + "success_rate.epoch.env.agentgym:sciworld": 0.9622641509433962, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 0.95, + "success_rate.epoch.env.ded": 0.9503105590062112, + "success_rate.epoch.env.logic": 0.912, + "success_rate.epoch.env.math": 0.9720730397422127, + "success_rate.epoch.env.sat": 0.1, + "success_rate.epoch.env.science": 0.8111455108359134, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8614281415892088, + "success_rate.epoch.global": 0.8913438256658596, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.logic": 0.6666666666666666, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.3333333333333333, + "success_rate.window.env_macro_mean": 0.7499999999999999, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9968527843601895, + "tokens_p.mean_in_band": 0.6088005514705882, + "tokens_rate.above_band": 0.9254385964912281, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.07456140350877193 + }, + { + "epoch": 1.353855986365573, + "grad_norm": 112.842414155859, + "learning_rate": 3.8091158766585803e-07, + "loss": 0.2128, + "step": 6355, + "success_rate.epoch.env.abd": 0.9863945578231292, + "success_rate.epoch.env.agentgym:alfworld": 0.875, + "success_rate.epoch.env.agentgym:sciworld": 0.9622641509433962, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 0.95, + "success_rate.epoch.env.ded": 0.9503105590062112, + "success_rate.epoch.env.logic": 0.9125248508946322, + "success_rate.epoch.env.math": 0.9721329046087889, + "success_rate.epoch.env.sat": 0.09523809523809523, + "success_rate.epoch.env.science": 0.8117283950617284, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8611013866096741, + "success_rate.epoch.global": 0.8913699456849729, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.75, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9972527472527473, + "tokens_p.mean_in_band": 0.6123046875, + "tokens_rate.above_band": 0.883495145631068, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.11650485436893204 + }, + { + "epoch": 1.3549211759693225, + "grad_norm": 86.09852786617513, + "learning_rate": 3.808824704037136e-07, + "loss": 0.2684, + "step": 6360, + "success_rate.epoch.env.abd": 0.9863945578231292, + "success_rate.epoch.env.agentgym:alfworld": 0.875, + "success_rate.epoch.env.agentgym:sciworld": 0.9622641509433962, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 0.95, + "success_rate.epoch.env.ded": 0.9503105590062112, + "success_rate.epoch.env.logic": 0.9128712871287129, + "success_rate.epoch.env.math": 0.9722222222222222, + "success_rate.epoch.env.sat": 0.09523809523809523, + "success_rate.epoch.env.science": 0.8109146810146042, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8610670265915278, + "success_rate.epoch.global": 0.8910950661853189, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6, + "success_rate.window.env_macro_mean": 0.8666666666666667, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9921875, + "tokens_p.mean_in_band": 0.57177734375, + "tokens_rate.above_band": 0.9090909090909091, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.09090909090909091 + }, + { + "epoch": 1.355986365573072, + "grad_norm": 157.42789500147703, + "learning_rate": 3.808533379225748e-07, + "loss": 0.3104, + "step": 6365, + "success_rate.epoch.env.abd": 0.9863945578231292, + "success_rate.epoch.env.agentgym:alfworld": 0.875, + "success_rate.epoch.env.agentgym:sciworld": 0.9622641509433962, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 0.95, + "success_rate.epoch.env.ded": 0.950920245398773, + "success_rate.epoch.env.logic": 0.9133858267716536, + "success_rate.epoch.env.math": 0.9722814498933902, + "success_rate.epoch.env.sat": 0.09523809523809523, + "success_rate.epoch.env.science": 0.8113496932515337, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8612141598591276, + "success_rate.epoch.global": 0.8914217156568687, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9959016393442623, + "tokens_p.mean_in_band": 0.85546875, + "tokens_rate.above_band": 0.993485342019544, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.006514657980456026 + }, + { + "epoch": 1.3570515551768214, + "grad_norm": 29.18823663664378, + "learning_rate": 3.808241902436731e-07, + "loss": 0.2402, + "step": 6370, + "success_rate.epoch.env.abd": 0.9864864864864865, + "success_rate.epoch.env.agentgym:alfworld": 0.8764044943820225, + "success_rate.epoch.env.agentgym:sciworld": 0.9622641509433962, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 0.95, + "success_rate.epoch.env.ded": 0.950920245398773, + "success_rate.epoch.env.logic": 0.913894324853229, + "success_rate.epoch.env.math": 0.9723404255319149, + "success_rate.epoch.env.sat": 0.09523809523809523, + "success_rate.epoch.env.science": 0.811017597551645, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.861371596319636, + "success_rate.epoch.global": 0.8914473684210527, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9987258583690987, + "tokens_p.mean_in_band": 0.3734375, + "tokens_rate.above_band": 0.9893842887473461, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.010615711252653927 + }, + { + "epoch": 1.3581167447805709, + "grad_norm": 44.11932529402207, + "learning_rate": 3.807950273882513e-07, + "loss": 0.2602, + "step": 6375, + "success_rate.epoch.env.abd": 0.9865771812080537, + "success_rate.epoch.env.agentgym:alfworld": 0.8777777777777778, + "success_rate.epoch.env.agentgym:sciworld": 0.9626168224299065, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 0.95, + "success_rate.epoch.env.ded": 0.950920245398773, + "success_rate.epoch.env.logic": 0.9140625, + "success_rate.epoch.env.math": 0.9724284199363733, + "success_rate.epoch.env.sat": 0.09523809523809523, + "success_rate.epoch.env.science": 0.8114503816793893, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8615993784362548, + "success_rate.epoch.global": 0.8917710196779964, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9994635193133047, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 1.3591819343843203, + "grad_norm": 132.48806301729334, + "learning_rate": 3.807658493775629e-07, + "loss": 0.2491, + "step": 6380, + "success_rate.epoch.env.abd": 0.9866666666666667, + "success_rate.epoch.env.agentgym:alfworld": 0.8777777777777778, + "success_rate.epoch.env.agentgym:sciworld": 0.9626168224299065, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 0.95, + "success_rate.epoch.env.ded": 0.9512195121951219, + "success_rate.epoch.env.logic": 0.9142300194931774, + "success_rate.epoch.env.math": 0.9714889123548046, + "success_rate.epoch.env.sat": 0.09523809523809523, + "success_rate.epoch.env.science": 0.8118811881188119, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8616037030367996, + "success_rate.epoch.global": 0.89179548156956, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.75, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9967749110320284, + "tokens_p.mean_in_band": 0.75, + "tokens_rate.above_band": 0.9773913043478261, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.022608695652173914 + }, + { + "epoch": 1.3602471239880698, + "grad_norm": 38.08547869801414, + "learning_rate": 3.8073665623287276e-07, + "loss": 0.2254, + "step": 6385, + "success_rate.epoch.env.abd": 0.9867549668874173, + "success_rate.epoch.env.agentgym:alfworld": 0.8777777777777778, + "success_rate.epoch.env.agentgym:sciworld": 0.9626168224299065, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 0.95, + "success_rate.epoch.env.ded": 0.9512195121951219, + "success_rate.epoch.env.logic": 0.9142300194931774, + "success_rate.epoch.env.math": 0.9715789473684211, + "success_rate.epoch.env.sat": 0.08695652173913043, + "success_rate.epoch.env.science": 0.812452543659833, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8609189864255654, + "success_rate.epoch.global": 0.8915234143449912, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.75, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9957264957264957, + "tokens_p.mean_in_band": 0.5834821428571428, + "tokens_rate.above_band": 0.8698884758364313, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.13011152416356878 + }, + { + "epoch": 1.3613123135918195, + "grad_norm": 186.08686477217364, + "learning_rate": 3.807074479754565e-07, + "loss": 0.2976, + "step": 6390, + "success_rate.epoch.env.abd": 0.9868421052631579, + "success_rate.epoch.env.agentgym:alfworld": 0.8777777777777778, + "success_rate.epoch.env.agentgym:sciworld": 0.9626168224299065, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 0.95, + "success_rate.epoch.env.ded": 0.9512195121951219, + "success_rate.epoch.env.logic": 0.914396887159533, + "success_rate.epoch.env.math": 0.9716981132075472, + "success_rate.epoch.env.sat": 0.08695652173913043, + "success_rate.epoch.env.science": 0.8130204390613172, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8610045379967205, + "success_rate.epoch.global": 0.8918439716312057, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9988738738738738, + "tokens_p.mean_in_band": 0.8828125, + "tokens_rate.above_band": 0.9910714285714286, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.008928571428571428 + }, + { + "epoch": 1.3623775031955687, + "grad_norm": 185.12091954140467, + "learning_rate": 3.80678224626601e-07, + "loss": 0.2925, + "step": 6395, + "success_rate.epoch.env.abd": 0.9868421052631579, + "success_rate.epoch.env.agentgym:alfworld": 0.8777777777777778, + "success_rate.epoch.env.agentgym:sciworld": 0.9629629629629629, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 0.95, + "success_rate.epoch.env.ded": 0.9512195121951219, + "success_rate.epoch.env.logic": 0.9127906976744186, + "success_rate.epoch.env.math": 0.9717868338557993, + "success_rate.epoch.env.sat": 0.08695652173913043, + "success_rate.epoch.env.science": 0.8135849056603773, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8609493687508345, + "success_rate.epoch.global": 0.8918680023571007, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9993602362204724, + "tokens_p.mean_in_band": 0.6879595588235294, + "tokens_rate.above_band": 0.9739263803680982, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02607361963190184 + }, + { + "epoch": 1.3634426927993184, + "grad_norm": 34.22538408383357, + "learning_rate": 3.80648986207604e-07, + "loss": 0.371, + "step": 6400, + "success_rate.epoch.env.abd": 0.9868421052631579, + "success_rate.epoch.env.agentgym:alfworld": 0.8791208791208791, + "success_rate.epoch.env.agentgym:sciworld": 0.9629629629629629, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 0.95, + "success_rate.epoch.env.ded": 0.9512195121951219, + "success_rate.epoch.env.logic": 0.9127906976744186, + "success_rate.epoch.env.math": 0.9719334719334719, + "success_rate.epoch.env.sat": 0.08695652173913043, + "success_rate.epoch.env.science": 0.8132530120481928, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8610546274607064, + "success_rate.epoch.global": 0.8918918918918919, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9980440967283073, + "tokens_p.mean_in_band": 0.578125, + "tokens_rate.above_band": 0.9943422913719944, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.005657708628005658 + }, + { + "epoch": 1.3645078824030676, + "grad_norm": 438.43910397734777, + "learning_rate": 3.8061973273977423e-07, + "loss": 0.5133, + "step": 6405, + "success_rate.epoch.env.abd": 0.9868421052631579, + "success_rate.epoch.env.agentgym:alfworld": 0.8791208791208791, + "success_rate.epoch.env.agentgym:sciworld": 0.963302752293578, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 0.95, + "success_rate.epoch.env.ded": 0.9512195121951219, + "success_rate.epoch.env.logic": 0.9131274131274131, + "success_rate.epoch.env.math": 0.9720785935884177, + "success_rate.epoch.env.sat": 0.08695652173913043, + "success_rate.epoch.env.science": 0.8135338345864662, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8611548500949635, + "success_rate.epoch.global": 0.8922085530169889, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9976756198347108, + "tokens_p.mean_in_band": 0.55859375, + "tokens_rate.above_band": 0.9918032786885246, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.00819672131147541 + }, + { + "epoch": 1.3655730720068173, + "grad_norm": 188.1195347317062, + "learning_rate": 3.8059046424443146e-07, + "loss": 0.2558, + "step": 6410, + "success_rate.epoch.env.abd": 0.9869281045751634, + "success_rate.epoch.env.agentgym:alfworld": 0.8791208791208791, + "success_rate.epoch.env.agentgym:sciworld": 0.963302752293578, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 0.9523809523809523, + "success_rate.epoch.env.ded": 0.9512195121951219, + "success_rate.epoch.env.logic": 0.9134615384615384, + "success_rate.epoch.env.math": 0.9722222222222222, + "success_rate.epoch.env.sat": 0.08695652173913043, + "success_rate.epoch.env.science": 0.8129226145755072, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.861366985154048, + "success_rate.epoch.global": 0.892231308411215, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.0, + "success_rate.window.env_macro_mean": 0.8, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9988789237668162, + "tokens_p.mean_in_band": 0.5125, + "tokens_rate.above_band": 0.9780701754385965, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.021929824561403508 + }, + { + "epoch": 1.3666382616105666, + "grad_norm": 104.15366686163136, + "learning_rate": 3.805611807429063e-07, + "loss": 0.3664, + "step": 6415, + "success_rate.epoch.env.abd": 0.9869281045751634, + "success_rate.epoch.env.agentgym:alfworld": 0.8791208791208791, + "success_rate.epoch.env.agentgym:sciworld": 0.963302752293578, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 0.9523809523809523, + "success_rate.epoch.env.ded": 0.9512195121951219, + "success_rate.epoch.env.logic": 0.9136276391554703, + "success_rate.epoch.env.math": 0.9723926380368099, + "success_rate.epoch.env.sat": 0.08695652173913043, + "success_rate.epoch.env.science": 0.8125937031484258, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8613676765250878, + "success_rate.epoch.global": 0.8922539312754805, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.8888888888888888, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9904336734693877, + "tokens_p.mean_in_band": 0.640625, + "tokens_rate.above_band": 0.9333333333333333, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.06666666666666667 + }, + { + "epoch": 1.3677034512143162, + "grad_norm": 434.8637491584342, + "learning_rate": 3.805318822565403e-07, + "loss": 0.3238, + "step": 6420, + "success_rate.epoch.env.abd": 0.9869281045751634, + "success_rate.epoch.env.agentgym:alfworld": 0.8804347826086957, + "success_rate.epoch.env.agentgym:sciworld": 0.963302752293578, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 0.9523809523809523, + "success_rate.epoch.env.ded": 0.9512195121951219, + "success_rate.epoch.env.logic": 0.9141221374045801, + "success_rate.epoch.env.math": 0.9724208375893769, + "success_rate.epoch.env.sat": 0.08695652173913043, + "success_rate.epoch.env.science": 0.8125466766243465, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8615303651401255, + "success_rate.epoch.global": 0.8922764227642277, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.998014502762431, + "tokens_p.mean_in_band": 0.6802455357142857, + "tokens_rate.above_band": 0.981029810298103, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.018970189701897018 + }, + { + "epoch": 1.3687686408180655, + "grad_norm": 58.69290932814623, + "learning_rate": 3.8050256880668617e-07, + "loss": 0.1743, + "step": 6425, + "success_rate.epoch.env.abd": 0.9869281045751634, + "success_rate.epoch.env.agentgym:alfworld": 0.8804347826086957, + "success_rate.epoch.env.agentgym:sciworld": 0.963302752293578, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 0.9523809523809523, + "success_rate.epoch.env.ded": 0.9512195121951219, + "success_rate.epoch.env.logic": 0.9142857142857143, + "success_rate.epoch.env.math": 0.9726443768996961, + "success_rate.epoch.env.sat": 0.08695652173913043, + "success_rate.epoch.env.science": 0.8126865671641791, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8615782748429696, + "success_rate.epoch.global": 0.8925883034163289, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9978448275862069, + "tokens_p.mean_in_band": 0.8125, + "tokens_rate.above_band": 0.9886363636363636, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.011363636363636364 + }, + { + "epoch": 1.3698338304218152, + "grad_norm": 32.77634286680063, + "learning_rate": 3.8047324041470714e-07, + "loss": 0.2403, + "step": 6430, + "success_rate.epoch.env.abd": 0.9869281045751634, + "success_rate.epoch.env.agentgym:alfworld": 0.8804347826086957, + "success_rate.epoch.env.agentgym:sciworld": 0.963963963963964, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 0.9523809523809523, + "success_rate.epoch.env.ded": 0.9512195121951219, + "success_rate.epoch.env.logic": 0.9147727272727273, + "success_rate.epoch.env.math": 0.9726720647773279, + "success_rate.epoch.env.sat": 0.08695652173913043, + "success_rate.epoch.env.science": 0.8125, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.861668215331229, + "success_rate.epoch.global": 0.8926096997690531, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9375, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9978165938864629, + "tokens_p.mean_in_band": 0.6396484375, + "tokens_rate.above_band": 0.9772403982930299, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02275960170697013 + }, + { + "epoch": 1.3708990200255646, + "grad_norm": 54.888350228417536, + "learning_rate": 3.8044389710197767e-07, + "loss": 0.1723, + "step": 6435, + "success_rate.epoch.env.abd": 0.9869281045751634, + "success_rate.epoch.env.agentgym:alfworld": 0.8804347826086957, + "success_rate.epoch.env.agentgym:sciworld": 0.963963963963964, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 0.9523809523809523, + "success_rate.epoch.env.ded": 0.9515151515151515, + "success_rate.epoch.env.logic": 0.9147727272727273, + "success_rate.epoch.env.math": 0.9727547931382442, + "success_rate.epoch.env.sat": 0.08695652173913043, + "success_rate.epoch.env.science": 0.8133333333333334, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8617783699688907, + "success_rate.epoch.global": 0.8929188255613126, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9980031948881789, + "tokens_p.mean_in_band": 0.859375, + "tokens_rate.above_band": 0.990506329113924, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.00949367088607595 + }, + { + "epoch": 1.371964209629314, + "grad_norm": 53.63212212620064, + "learning_rate": 3.8041453888988286e-07, + "loss": 0.1363, + "step": 6440, + "success_rate.epoch.env.abd": 0.9869281045751634, + "success_rate.epoch.env.agentgym:alfworld": 0.8804347826086957, + "success_rate.epoch.env.agentgym:sciworld": 0.963963963963964, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 0.9523809523809523, + "success_rate.epoch.env.ded": 0.9518072289156626, + "success_rate.epoch.env.logic": 0.9149338374291115, + "success_rate.epoch.env.math": 0.9728643216080402, + "success_rate.epoch.env.sat": 0.08695652173913043, + "success_rate.epoch.env.science": 0.8138847858197932, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8618796580155408, + "success_rate.epoch.global": 0.8932261768082663, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9996345029239766, + "tokens_p.mean_in_band": 0.828125, + "tokens_rate.above_band": 0.9985401459854014, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.00145985401459854 + }, + { + "epoch": 1.3730293992330636, + "grad_norm": 61.93265022939555, + "learning_rate": 3.8038516579981887e-07, + "loss": 0.3874, + "step": 6445, + "success_rate.epoch.env.abd": 0.9869281045751634, + "success_rate.epoch.env.agentgym:alfworld": 0.8817204301075269, + "success_rate.epoch.env.agentgym:sciworld": 0.963963963963964, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 0.9523809523809523, + "success_rate.epoch.env.ded": 0.9518072289156626, + "success_rate.epoch.env.logic": 0.9150943396226415, + "success_rate.epoch.env.math": 0.973, + "success_rate.epoch.env.sat": 0.08695652173913043, + "success_rate.epoch.env.science": 0.8142962417096536, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8620608656495573, + "success_rate.epoch.global": 0.8935317687464225, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9984497389033943, + "tokens_p.mean_in_band": 0.7203125, + "tokens_rate.above_band": 0.9871134020618557, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.01288659793814433 + }, + { + "epoch": 1.374094588836813, + "grad_norm": 181.94511050278481, + "learning_rate": 3.8035577785319244e-07, + "loss": 0.3039, + "step": 6450, + "success_rate.epoch.env.abd": 0.987012987012987, + "success_rate.epoch.env.agentgym:alfworld": 0.8817204301075269, + "success_rate.epoch.env.agentgym:sciworld": 0.9553571428571429, + "success_rate.epoch.env.agentgym:textcraft": 0.9583333333333334, + "success_rate.epoch.env.babyai": 0.9523809523809523, + "success_rate.epoch.env.ded": 0.9518072289156626, + "success_rate.epoch.env.logic": 0.9152542372881356, + "success_rate.epoch.env.math": 0.9730807577268196, + "success_rate.epoch.env.sat": 0.08333333333333333, + "success_rate.epoch.env.science": 0.8138337012509198, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8611012822006194, + "success_rate.epoch.global": 0.8929794520547946, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 0.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.6428571428571429, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9976003490401396, + "tokens_p.mean_in_band": 0.5589488636363636, + "tokens_rate.above_band": 0.9630252100840336, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03697478991596639 + }, + { + "epoch": 1.3751597784405625, + "grad_norm": 68.81424314700064, + "learning_rate": 3.803263750714215e-07, + "loss": 0.1792, + "step": 6455, + "success_rate.epoch.env.abd": 0.987012987012987, + "success_rate.epoch.env.agentgym:alfworld": 0.8736842105263158, + "success_rate.epoch.env.agentgym:sciworld": 0.9553571428571429, + "success_rate.epoch.env.agentgym:textcraft": 0.9583333333333334, + "success_rate.epoch.env.babyai": 0.9523809523809523, + "success_rate.epoch.env.ded": 0.9518072289156626, + "success_rate.epoch.env.logic": 0.9152542372881356, + "success_rate.epoch.env.math": 0.9731610337972167, + "success_rate.epoch.env.sat": 0.08333333333333333, + "success_rate.epoch.env.science": 0.8145161290322581, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8604400534979398, + "success_rate.epoch.global": 0.8929994308480365, + "success_rate.window.env.agentgym:alfworld": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8333333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9968549250535332, + "tokens_p.mean_in_band": 0.6298828125, + "tokens_rate.above_band": 0.9915074309978769, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.008492569002123142 + }, + { + "epoch": 1.376224968044312, + "grad_norm": 60.39449018273993, + "learning_rate": 3.802969574759344e-07, + "loss": 0.198, + "step": 6460, + "success_rate.epoch.env.abd": 0.987012987012987, + "success_rate.epoch.env.agentgym:alfworld": 0.8736842105263158, + "success_rate.epoch.env.agentgym:sciworld": 0.9553571428571429, + "success_rate.epoch.env.agentgym:textcraft": 0.9583333333333334, + "success_rate.epoch.env.babyai": 0.9523809523809523, + "success_rate.epoch.env.ded": 0.9518072289156626, + "success_rate.epoch.env.logic": 0.9157303370786517, + "success_rate.epoch.env.math": 0.9732408325074331, + "success_rate.epoch.env.sat": 0.08333333333333333, + "success_rate.epoch.env.science": 0.8135964912280702, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8604069862885347, + "success_rate.epoch.global": 0.8927355278093076, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.8333333333333334, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9965018656716418, + "tokens_p.mean_in_band": 0.6957465277777778, + "tokens_rate.above_band": 0.9370629370629371, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.06293706293706294 + }, + { + "epoch": 1.3772901576480614, + "grad_norm": 32.850261336378935, + "learning_rate": 3.802675250881706e-07, + "loss": 0.2016, + "step": 6465, + "success_rate.epoch.env.abd": 0.9870967741935484, + "success_rate.epoch.env.agentgym:alfworld": 0.875, + "success_rate.epoch.env.agentgym:sciworld": 0.9557522123893806, + "success_rate.epoch.env.agentgym:textcraft": 0.9583333333333334, + "success_rate.epoch.env.babyai": 0.9523809523809523, + "success_rate.epoch.env.ded": 0.9518072289156626, + "success_rate.epoch.env.logic": 0.9158878504672897, + "success_rate.epoch.env.math": 0.973293768545994, + "success_rate.epoch.env.sat": 0.08333333333333333, + "success_rate.epoch.env.science": 0.8126822157434402, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8605061517548123, + "success_rate.epoch.global": 0.8924731182795699, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9992744610281924, + "tokens_p.mean_in_band": 0.5710227272727273, + "tokens_rate.above_band": 0.9820846905537459, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.017915309446254073 + }, + { + "epoch": 1.3783553472518109, + "grad_norm": 248.38658953855327, + "learning_rate": 3.8023807792958015e-07, + "loss": 0.4306, + "step": 6470, + "success_rate.epoch.env.abd": 0.9870967741935484, + "success_rate.epoch.env.agentgym:alfworld": 0.875, + "success_rate.epoch.env.agentgym:sciworld": 0.9557522123893806, + "success_rate.epoch.env.agentgym:textcraft": 0.96, + "success_rate.epoch.env.babyai": 0.9545454545454546, + "success_rate.epoch.env.ded": 0.9518072289156626, + "success_rate.epoch.env.logic": 0.9162011173184358, + "success_rate.epoch.env.math": 0.9733727810650887, + "success_rate.epoch.env.sat": 0.08333333333333333, + "success_rate.epoch.env.science": 0.8123636363636364, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8608611398295039, + "success_rate.epoch.global": 0.8924943566591422, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9982002617801047, + "tokens_p.mean_in_band": 0.25, + "tokens_rate.above_band": 0.9937565036420395, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.006243496357960458 + }, + { + "epoch": 1.3794205368555603, + "grad_norm": 123.42322396420471, + "learning_rate": 3.8020861602162395e-07, + "loss": 0.4671, + "step": 6475, + "success_rate.epoch.env.abd": 0.9870967741935484, + "success_rate.epoch.env.agentgym:alfworld": 0.875, + "success_rate.epoch.env.agentgym:sciworld": 0.9478260869565217, + "success_rate.epoch.env.agentgym:textcraft": 0.96, + "success_rate.epoch.env.babyai": 0.9545454545454546, + "success_rate.epoch.env.ded": 0.9520958083832335, + "success_rate.epoch.env.logic": 0.9165120593692022, + "success_rate.epoch.env.math": 0.9733990147783251, + "success_rate.epoch.env.sat": 0.08333333333333333, + "success_rate.epoch.env.science": 0.8129079042784626, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8602469487125529, + "success_rate.epoch.global": 0.8925154755205402, + "success_rate.window.env.agentgym:sciworld": 0.5, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9991666666666666, + "tokens_p.mean_in_band": 0.5836588541666666, + "tokens_rate.above_band": 0.9900990099009901, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.009900990099009901 + }, + { + "epoch": 1.3804857264593098, + "grad_norm": 61.413473100282815, + "learning_rate": 3.801791393857736e-07, + "loss": 0.4028, + "step": 6480, + "success_rate.epoch.env.abd": 0.9871794871794872, + "success_rate.epoch.env.agentgym:alfworld": 0.8762886597938144, + "success_rate.epoch.env.agentgym:sciworld": 0.9478260869565217, + "success_rate.epoch.env.agentgym:textcraft": 0.96, + "success_rate.epoch.env.babyai": 0.9545454545454546, + "success_rate.epoch.env.ded": 0.9523809523809523, + "success_rate.epoch.env.logic": 0.9165120593692022, + "success_rate.epoch.env.math": 0.9734251968503937, + "success_rate.epoch.env.sat": 0.08333333333333333, + "success_rate.epoch.env.science": 0.8129963898916968, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8604079654818961, + "success_rate.epoch.global": 0.8925364758698092, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env_macro_mean": 0.9666666666666668, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.998581847649919, + "tokens_p.mean_in_band": 0.7317708333333334, + "tokens_rate.above_band": 0.9903691813804173, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.009630818619582664 + }, + { + "epoch": 1.3815509160630592, + "grad_norm": 145.8586619870465, + "learning_rate": 3.801496480435114e-07, + "loss": 0.2628, + "step": 6485, + "success_rate.epoch.env.abd": 0.9872611464968153, + "success_rate.epoch.env.agentgym:alfworld": 0.8762886597938144, + "success_rate.epoch.env.agentgym:sciworld": 0.9478260869565217, + "success_rate.epoch.env.agentgym:textcraft": 0.96, + "success_rate.epoch.env.babyai": 0.9545454545454546, + "success_rate.epoch.env.ded": 0.9523809523809523, + "success_rate.epoch.env.logic": 0.9172794117647058, + "success_rate.epoch.env.math": 0.9734774066797642, + "success_rate.epoch.env.sat": 0.08333333333333333, + "success_rate.epoch.env.science": 0.8132660418168709, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.860514408524385, + "success_rate.epoch.global": 0.8928371572467824, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9964028776978417, + "tokens_p.mean_in_band": 0.7965494791666666, + "tokens_rate.above_band": 0.9205298013245033, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.07947019867549669 + }, + { + "epoch": 1.3826161056668087, + "grad_norm": 77.10137556507472, + "learning_rate": 3.8012014201633067e-07, + "loss": 0.2933, + "step": 6490, + "success_rate.epoch.env.abd": 0.9872611464968153, + "success_rate.epoch.env.agentgym:alfworld": 0.8787878787878788, + "success_rate.epoch.env.agentgym:sciworld": 0.9478260869565217, + "success_rate.epoch.env.agentgym:textcraft": 0.9615384615384616, + "success_rate.epoch.env.babyai": 0.9545454545454546, + "success_rate.epoch.env.ded": 0.9523809523809523, + "success_rate.epoch.env.logic": 0.9157509157509157, + "success_rate.epoch.env.math": 0.9735294117647059, + "success_rate.epoch.env.sat": 0.08333333333333333, + "success_rate.epoch.env.science": 0.8129496402877698, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8607184801675282, + "success_rate.epoch.global": 0.892578125, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.8333333333333333, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.999738219895288, + "tokens_p.mean_in_band": 0.4994140625, + "tokens_rate.above_band": 0.9794871794871794, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.020512820512820513 + }, + { + "epoch": 1.3836812952705582, + "grad_norm": 50.64381257559459, + "learning_rate": 3.8009062132573487e-07, + "loss": 0.7428, + "step": 6495, + "success_rate.epoch.env.abd": 0.9872611464968153, + "success_rate.epoch.env.agentgym:alfworld": 0.8787878787878788, + "success_rate.epoch.env.agentgym:sciworld": 0.9478260869565217, + "success_rate.epoch.env.agentgym:textcraft": 0.9615384615384616, + "success_rate.epoch.env.babyai": 0.9545454545454546, + "success_rate.epoch.env.ded": 0.9523809523809523, + "success_rate.epoch.env.logic": 0.9157509157509157, + "success_rate.epoch.env.math": 0.9735812133072407, + "success_rate.epoch.env.sat": 0.08333333333333333, + "success_rate.epoch.env.science": 0.8140200286123033, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8608204974281708, + "success_rate.epoch.global": 0.8928770172509738, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9917929292929293, + "tokens_p.mean_in_band": 0.853515625, + "tokens_rate.above_band": 0.9801980198019802, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.019801980198019802 + }, + { + "epoch": 1.3847464848743076, + "grad_norm": 23.621564802448294, + "learning_rate": 3.800610859932387e-07, + "loss": 0.4678, + "step": 6500, + "success_rate.epoch.env.abd": 0.9872611464968153, + "success_rate.epoch.env.agentgym:alfworld": 0.8613861386138614, + "success_rate.epoch.env.agentgym:sciworld": 0.9478260869565217, + "success_rate.epoch.env.agentgym:textcraft": 0.9615384615384616, + "success_rate.epoch.env.babyai": 0.9545454545454546, + "success_rate.epoch.env.ded": 0.9523809523809523, + "success_rate.epoch.env.logic": 0.9159049360146252, + "success_rate.epoch.env.math": 0.9736842105263158, + "success_rate.epoch.env.sat": 0.08333333333333333, + "success_rate.epoch.env.science": 0.814418272662384, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8592980902789751, + "success_rate.epoch.global": 0.8926193118756937, + "success_rate.window.env.agentgym:alfworld": 0.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.75, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9981269716088328, + "tokens_p.mean_in_band": 0.4677734375, + "tokens_rate.above_band": 0.9937304075235109, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.006269592476489028 + }, + { + "epoch": 1.385811674478057, + "grad_norm": 121.00984645607008, + "learning_rate": 3.800315360403672e-07, + "loss": 0.6042, + "step": 6505, + "success_rate.epoch.env.abd": 0.9811320754716981, + "success_rate.epoch.env.agentgym:alfworld": 0.8627450980392157, + "success_rate.epoch.env.agentgym:sciworld": 0.9478260869565217, + "success_rate.epoch.env.agentgym:textcraft": 0.9615384615384616, + "success_rate.epoch.env.babyai": 0.9545454545454546, + "success_rate.epoch.env.ded": 0.9523809523809523, + "success_rate.epoch.env.logic": 0.9162112932604736, + "success_rate.epoch.env.math": 0.9737609329446064, + "success_rate.epoch.env.sat": 0.08333333333333333, + "success_rate.epoch.env.science": 0.8139700641482538, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.858858522965361, + "success_rate.epoch.global": 0.8923630326508024, + "success_rate.window.env.abd": 0.5, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.8, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9943633952254642, + "tokens_p.mean_below_band": 4.3213367462158203e-07, + "tokens_p.mean_in_band": 0.14775089073634204, + "tokens_rate.above_band": 0.47125, + "tokens_rate.below_band": 0.0025, + "tokens_rate.in_band": 0.52625 + }, + { + "epoch": 1.3868768640818065, + "grad_norm": 91.12276951054956, + "learning_rate": 3.800019714886562e-07, + "loss": 0.1535, + "step": 6510, + "success_rate.epoch.env.abd": 0.9811320754716981, + "success_rate.epoch.env.agentgym:alfworld": 0.8653846153846154, + "success_rate.epoch.env.agentgym:sciworld": 0.9482758620689655, + "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, + "success_rate.epoch.env.babyai": 0.9545454545454546, + "success_rate.epoch.env.ded": 0.9523809523809523, + "success_rate.epoch.env.logic": 0.9162112932604736, + "success_rate.epoch.env.math": 0.9738372093023255, + "success_rate.epoch.env.sat": 0.08333333333333333, + "success_rate.epoch.env.science": 0.813655761024182, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8592472290668148, + "success_rate.epoch.global": 0.8923841059602649, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9978885135135135, + "tokens_p.mean_in_band": 0.640625, + "tokens_rate.above_band": 0.9955156950672646, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.004484304932735426 + }, + { + "epoch": 1.387942053685556, + "grad_norm": 253.19707789653654, + "learning_rate": 3.799723923596521e-07, + "loss": 0.3439, + "step": 6515, + "success_rate.epoch.env.abd": 0.9811320754716981, + "success_rate.epoch.env.agentgym:alfworld": 0.8653846153846154, + "success_rate.epoch.env.agentgym:sciworld": 0.9482758620689655, + "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, + "success_rate.epoch.env.babyai": 0.9545454545454546, + "success_rate.epoch.env.ded": 0.9523809523809523, + "success_rate.epoch.env.logic": 0.9165154264972777, + "success_rate.epoch.env.math": 0.9739382239382239, + "success_rate.epoch.env.sat": 0.08333333333333333, + "success_rate.epoch.env.science": 0.8134751773049645, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.859267643989859, + "success_rate.epoch.global": 0.8924050632911392, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9910714285714286, + "tokens_p.mean_in_band": 0.5850694444444444, + "tokens_rate.above_band": 0.9032258064516129, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0967741935483871 + }, + { + "epoch": 1.3890072432893055, + "grad_norm": 528.5307166821377, + "learning_rate": 3.79942798674912e-07, + "loss": 0.2511, + "step": 6520, + "success_rate.epoch.env.abd": 0.9811320754716981, + "success_rate.epoch.env.agentgym:alfworld": 0.8666666666666667, + "success_rate.epoch.env.agentgym:sciworld": 0.9482758620689655, + "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, + "success_rate.epoch.env.babyai": 0.9545454545454546, + "success_rate.epoch.env.ded": 0.9526627218934911, + "success_rate.epoch.env.logic": 0.9168173598553345, + "success_rate.epoch.env.math": 0.973963355834137, + "success_rate.epoch.env.sat": 0.08333333333333333, + "success_rate.epoch.env.science": 0.8141342756183746, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8594994607500381, + "success_rate.epoch.global": 0.8927003293084522, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9975609756097561, + "tokens_p.mean_in_band": 0.7109375, + "tokens_rate.above_band": 0.9808612440191388, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.019138755980861243 + }, + { + "epoch": 1.390072432893055, + "grad_norm": 66.50404543134246, + "learning_rate": 3.799131904560035e-07, + "loss": 0.2614, + "step": 6525, + "success_rate.epoch.env.abd": 0.9811320754716981, + "success_rate.epoch.env.agentgym:alfworld": 0.8666666666666667, + "success_rate.epoch.env.agentgym:sciworld": 0.9482758620689655, + "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, + "success_rate.epoch.env.babyai": 0.9545454545454546, + "success_rate.epoch.env.ded": 0.9526627218934911, + "success_rate.epoch.env.logic": 0.9171171171171171, + "success_rate.epoch.env.math": 0.9740384615384615, + "success_rate.epoch.env.sat": 0.08333333333333333, + "success_rate.epoch.env.science": 0.8147887323943662, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.859593035272047, + "success_rate.epoch.global": 0.8929939792008758, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9931640625, + "tokens_p.mean_in_band": 0.8487723214285714, + "tokens_rate.above_band": 0.9580838323353293, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.041916167664670656 + }, + { + "epoch": 1.3911376224968044, + "grad_norm": 21.33630328405529, + "learning_rate": 3.79883567724505e-07, + "loss": 0.3121, + "step": 6530, + "success_rate.epoch.env.abd": 0.9811320754716981, + "success_rate.epoch.env.agentgym:alfworld": 0.8666666666666667, + "success_rate.epoch.env.agentgym:sciworld": 0.9487179487179487, + "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, + "success_rate.epoch.env.babyai": 0.9545454545454546, + "success_rate.epoch.env.ded": 0.9532163742690059, + "success_rate.epoch.env.logic": 0.9174147217235189, + "success_rate.epoch.env.math": 0.974088291746641, + "success_rate.epoch.env.sat": 0.08333333333333333, + "success_rate.epoch.env.science": 0.8151791988756149, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8597506389375315, + "success_rate.epoch.global": 0.8932860262008734, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.997120596205962, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 1.3922028121005539, + "grad_norm": 84.79092647423097, + "learning_rate": 3.7985393050200505e-07, + "loss": 0.2504, + "step": 6535, + "success_rate.epoch.env.abd": 0.98125, + "success_rate.epoch.env.agentgym:alfworld": 0.8666666666666667, + "success_rate.epoch.env.agentgym:sciworld": 0.9491525423728814, + "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, + "success_rate.epoch.env.babyai": 0.9545454545454546, + "success_rate.epoch.env.ded": 0.9532163742690059, + "success_rate.epoch.env.logic": 0.9178571428571428, + "success_rate.epoch.env.math": 0.9741379310344828, + "success_rate.epoch.env.sat": 0.08333333333333333, + "success_rate.epoch.env.science": 0.814866760168303, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8598171971100214, + "success_rate.epoch.global": 0.8933043004899293, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.994969512195122, + "tokens_p.mean_in_band": 0.80625, + "tokens_rate.above_band": 0.9761904761904762, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.023809523809523808 + }, + { + "epoch": 1.3932680017043033, + "grad_norm": 96.84051352345725, + "learning_rate": 3.7982427881010335e-07, + "loss": 0.3038, + "step": 6540, + "success_rate.epoch.env.abd": 0.98125, + "success_rate.epoch.env.agentgym:alfworld": 0.8666666666666667, + "success_rate.epoch.env.agentgym:sciworld": 0.9491525423728814, + "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, + "success_rate.epoch.env.babyai": 0.9545454545454546, + "success_rate.epoch.env.ded": 0.9532163742690059, + "success_rate.epoch.env.logic": 0.9178571428571428, + "success_rate.epoch.env.math": 0.9741873804971319, + "success_rate.epoch.env.sat": 0.08333333333333333, + "success_rate.epoch.env.science": 0.8152022315202232, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8598521899113458, + "success_rate.epoch.global": 0.8933224755700325, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.875, + "success_rate.window.env_macro_mean": 0.9375, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9892146017699115, + "tokens_p.mean_in_band": 0.6199776785714286, + "tokens_rate.above_band": 0.9416666666666667, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.058333333333333334 + }, + { + "epoch": 1.3943331913080528, + "grad_norm": 171.81048454330602, + "learning_rate": 3.7979461267040973e-07, + "loss": 0.3358, + "step": 6545, + "success_rate.epoch.env.abd": 0.9813664596273292, + "success_rate.epoch.env.agentgym:alfworld": 0.8679245283018868, + "success_rate.epoch.env.agentgym:sciworld": 0.9491525423728814, + "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, + "success_rate.epoch.env.babyai": 0.9545454545454546, + "success_rate.epoch.env.ded": 0.9534883720930233, + "success_rate.epoch.env.logic": 0.9185840707964602, + "success_rate.epoch.env.math": 0.9742366412213741, + "success_rate.epoch.env.sat": 0.08333333333333333, + "success_rate.epoch.env.science": 0.8152022315202232, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.86007241788863, + "success_rate.epoch.global": 0.8936112615051435, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9998263888888889, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 1.3953983809118022, + "grad_norm": 47.210620468723015, + "learning_rate": 3.7976493210454457e-07, + "loss": 0.2238, + "step": 6550, + "success_rate.epoch.env.abd": 0.9814814814814815, + "success_rate.epoch.env.agentgym:alfworld": 0.8679245283018868, + "success_rate.epoch.env.agentgym:sciworld": 0.95, + "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, + "success_rate.epoch.env.babyai": 0.9545454545454546, + "success_rate.epoch.env.ded": 0.9534883720930233, + "success_rate.epoch.env.logic": 0.9185840707964602, + "success_rate.epoch.env.math": 0.9743101807802094, + "success_rate.epoch.env.sat": 0.08333333333333333, + "success_rate.epoch.env.science": 0.8150208623087621, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8601501133275978, + "success_rate.epoch.global": 0.8936285097192225, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9375, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9969152360515021, + "tokens_p.mean_in_band": 0.7074652777777778, + "tokens_rate.above_band": 0.9628099173553719, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0371900826446281 + }, + { + "epoch": 1.3964635705155517, + "grad_norm": 292.0422152890326, + "learning_rate": 3.7973523713413896e-07, + "loss": 0.2168, + "step": 6555, + "success_rate.epoch.env.abd": 0.9814814814814815, + "success_rate.epoch.env.agentgym:alfworld": 0.8691588785046729, + "success_rate.epoch.env.agentgym:sciworld": 0.9504132231404959, + "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, + "success_rate.epoch.env.babyai": 0.9545454545454546, + "success_rate.epoch.env.ded": 0.953757225433526, + "success_rate.epoch.env.logic": 0.9185840707964602, + "success_rate.epoch.env.math": 0.9744075829383886, + "success_rate.epoch.env.sat": 0.08333333333333333, + "success_rate.epoch.env.science": 0.8154059680777238, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8603681982922274, + "success_rate.epoch.global": 0.8939149165320409, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9968112244897959, + "tokens_p.mean_in_band": 0.8109375, + "tokens_rate.above_band": 0.9915682967959528, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.008431703204047217 + }, + { + "epoch": 1.3975287601193012, + "grad_norm": 76.40497848136185, + "learning_rate": 3.7970552778083433e-07, + "loss": 0.2701, + "step": 6560, + "success_rate.epoch.env.abd": 0.9814814814814815, + "success_rate.epoch.env.agentgym:alfworld": 0.8691588785046729, + "success_rate.epoch.env.agentgym:sciworld": 0.9504132231404959, + "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, + "success_rate.epoch.env.babyai": 0.9545454545454546, + "success_rate.epoch.env.ded": 0.953757225433526, + "success_rate.epoch.env.logic": 0.9191564147627417, + "success_rate.epoch.env.math": 0.9744560075685903, + "success_rate.epoch.env.sat": 0.08333333333333333, + "success_rate.epoch.env.science": 0.815916955017301, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.860471085159142, + "success_rate.epoch.global": 0.8941997851772288, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9984158986175116, + "tokens_p.mean_in_band": 0.861328125, + "tokens_rate.above_band": 0.9954128440366973, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0045871559633027525 + }, + { + "epoch": 1.3985939497230506, + "grad_norm": 120.90800533906611, + "learning_rate": 3.796758040662827e-07, + "loss": 0.1943, + "step": 6565, + "success_rate.epoch.env.abd": 0.9817073170731707, + "success_rate.epoch.env.agentgym:alfworld": 0.8691588785046729, + "success_rate.epoch.env.agentgym:sciworld": 0.9504132231404959, + "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, + "success_rate.epoch.env.babyai": 0.9545454545454546, + "success_rate.epoch.env.ded": 0.9540229885057471, + "success_rate.epoch.env.logic": 0.9195804195804196, + "success_rate.epoch.env.math": 0.9744560075685903, + "success_rate.epoch.env.sat": 0.08, + "success_rate.epoch.env.science": 0.8162983425414365, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8602859631293591, + "success_rate.epoch.global": 0.8942153186930906, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9944607023411371, + "tokens_p.mean_in_band": 0.5891544117647058, + "tokens_rate.above_band": 0.9462025316455697, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.05379746835443038 + }, + { + "epoch": 1.3996591393268, + "grad_norm": 39.58255868842192, + "learning_rate": 3.7964606601214646e-07, + "loss": 0.3347, + "step": 6570, + "success_rate.epoch.env.abd": 0.9817073170731707, + "success_rate.epoch.env.agentgym:alfworld": 0.8691588785046729, + "success_rate.epoch.env.agentgym:sciworld": 0.9508196721311475, + "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, + "success_rate.epoch.env.babyai": 0.9545454545454546, + "success_rate.epoch.env.ded": 0.9540229885057471, + "success_rate.epoch.env.logic": 0.9198606271777003, + "success_rate.epoch.env.math": 0.9745042492917847, + "success_rate.epoch.env.sat": 0.08, + "success_rate.epoch.env.science": 0.8169304886441845, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8604102398942569, + "success_rate.epoch.global": 0.8944978632478633, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9953853626943006, + "tokens_p.mean_in_band": 0.759765625, + "tokens_rate.above_band": 0.9846938775510204, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.015306122448979591 + }, + { + "epoch": 1.4007243289305498, + "grad_norm": 115.1670534981098, + "learning_rate": 3.7961631364009843e-07, + "loss": 0.3397, + "step": 6575, + "success_rate.epoch.env.abd": 0.9817073170731707, + "success_rate.epoch.env.agentgym:alfworld": 0.8703703703703703, + "success_rate.epoch.env.agentgym:sciworld": 0.9512195121951219, + "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, + "success_rate.epoch.env.babyai": 0.9545454545454546, + "success_rate.epoch.env.ded": 0.9545454545454546, + "success_rate.epoch.env.logic": 0.92, + "success_rate.epoch.env.math": 0.9745283018867924, + "success_rate.epoch.env.sat": 0.08, + "success_rate.epoch.env.science": 0.817433081674674, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8606647686594546, + "success_rate.epoch.global": 0.8947789025039957, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.998991935483871, + "tokens_p.mean_in_band": 0.74609375, + "tokens_rate.above_band": 0.9962917181705809, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.003708281829419036 + }, + { + "epoch": 1.401789518534299, + "grad_norm": 45.89066912212665, + "learning_rate": 3.7958654697182207e-07, + "loss": 0.236, + "step": 6580, + "success_rate.epoch.env.abd": 0.9817073170731707, + "success_rate.epoch.env.agentgym:alfworld": 0.8703703703703703, + "success_rate.epoch.env.agentgym:sciworld": 0.9512195121951219, + "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, + "success_rate.epoch.env.babyai": 0.9565217391304348, + "success_rate.epoch.env.ded": 0.9545454545454546, + "success_rate.epoch.env.logic": 0.92, + "success_rate.epoch.env.math": 0.9745762711864406, + "success_rate.epoch.env.sat": 0.08, + "success_rate.epoch.env.science": 0.8176229508196722, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.860866052571239, + "success_rate.epoch.global": 0.8947927736450585, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8571428571428571, + "success_rate.window.env_macro_mean": 0.9523809523809524, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9902777777777778, + "tokens_p.mean_in_band": 0.4388020833333333, + "tokens_rate.above_band": 0.9090909090909091, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.09090909090909091 + }, + { + "epoch": 1.4028547081380487, + "grad_norm": 213.56654429197718, + "learning_rate": 3.7955676602901094e-07, + "loss": 0.2489, + "step": 6585, + "success_rate.epoch.env.abd": 0.9817073170731707, + "success_rate.epoch.env.agentgym:alfworld": 0.8703703703703703, + "success_rate.epoch.env.agentgym:sciworld": 0.9512195121951219, + "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, + "success_rate.epoch.env.babyai": 0.9565217391304348, + "success_rate.epoch.env.ded": 0.9548022598870056, + "success_rate.epoch.env.logic": 0.9204152249134948, + "success_rate.epoch.env.math": 0.974671669793621, + "success_rate.epoch.env.sat": 0.08, + "success_rate.epoch.env.science": 0.8171896316507503, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8608964261797212, + "success_rate.epoch.global": 0.8948065712771595, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9969565217391304, + "tokens_p.mean_below_band": 1.126900315284729e-07, + "tokens_p.mean_in_band": 0.6805555555555556, + "tokens_rate.above_band": 0.9829059829059829, + "tokens_rate.below_band": 0.0017094017094017094, + "tokens_rate.in_band": 0.015384615384615385 + }, + { + "epoch": 1.403919897741798, + "grad_norm": 318.4895880693358, + "learning_rate": 3.7952697083336933e-07, + "loss": 0.3475, + "step": 6590, + "success_rate.epoch.env.abd": 0.9817073170731707, + "success_rate.epoch.env.agentgym:alfworld": 0.8703703703703703, + "success_rate.epoch.env.agentgym:sciworld": 0.9516129032258065, + "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, + "success_rate.epoch.env.babyai": 0.9565217391304348, + "success_rate.epoch.env.ded": 0.9548022598870056, + "success_rate.epoch.env.logic": 0.9205526770293609, + "success_rate.epoch.env.math": 0.9747899159663865, + "success_rate.epoch.env.sat": 0.08, + "success_rate.epoch.env.science": 0.8175629680054459, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8609893739682678, + "success_rate.epoch.global": 0.8950845665961945, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.996929190751445, + "tokens_p.mean_in_band": 0.810546875, + "tokens_rate.above_band": 0.9774011299435028, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.022598870056497175 + }, + { + "epoch": 1.4049850873455476, + "grad_norm": 194.14735251004694, + "learning_rate": 3.7949716140661166e-07, + "loss": 0.3152, + "step": 6595, + "success_rate.epoch.env.abd": 0.9819277108433735, + "success_rate.epoch.env.agentgym:alfworld": 0.8703703703703703, + "success_rate.epoch.env.agentgym:sciworld": 0.952, + "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, + "success_rate.epoch.env.babyai": 0.9565217391304348, + "success_rate.epoch.env.ded": 0.9548022598870056, + "success_rate.epoch.env.logic": 0.9205526770293609, + "success_rate.epoch.env.math": 0.9748837209302326, + "success_rate.epoch.env.sat": 0.08, + "success_rate.epoch.env.science": 0.8172554347826086, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8610251705396682, + "success_rate.epoch.global": 0.8950975224037955, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9952400662251656, + "tokens_p.mean_in_band": 0.67431640625, + "tokens_rate.above_band": 0.949685534591195, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.050314465408805034 + }, + { + "epoch": 1.4060502769492969, + "grad_norm": 77.83894711552718, + "learning_rate": 3.7946733777046294e-07, + "loss": 0.2131, + "step": 6600, + "success_rate.epoch.env.abd": 0.9820359281437125, + "success_rate.epoch.env.agentgym:alfworld": 0.8703703703703703, + "success_rate.epoch.env.agentgym:sciworld": 0.952, + "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, + "success_rate.epoch.env.babyai": 0.9565217391304348, + "success_rate.epoch.env.ded": 0.9548022598870056, + "success_rate.epoch.env.logic": 0.9205526770293609, + "success_rate.epoch.env.math": 0.9749536178107606, + "success_rate.epoch.env.sat": 0.08, + "success_rate.epoch.env.science": 0.8166441136671178, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8609857880910661, + "success_rate.epoch.global": 0.8948475289169295, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.8888888888888888, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9954637096774194, + "tokens_p.mean_below_band": 4.4517219066619873e-07, + "tokens_p.mean_in_band": 0.6354166666666666, + "tokens_rate.above_band": 0.9465648854961832, + "tokens_rate.below_band": 0.007633587786259542, + "tokens_rate.in_band": 0.04580152671755725 + }, + { + "epoch": 1.4071154665530465, + "grad_norm": 63.397528101143806, + "learning_rate": 3.7943749994665826e-07, + "loss": 0.113, + "step": 6605, + "success_rate.epoch.env.abd": 0.9821428571428571, + "success_rate.epoch.env.agentgym:alfworld": 0.8715596330275229, + "success_rate.epoch.env.agentgym:sciworld": 0.9523809523809523, + "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, + "success_rate.epoch.env.babyai": 0.9565217391304348, + "success_rate.epoch.env.ded": 0.9548022598870056, + "success_rate.epoch.env.logic": 0.9205526770293609, + "success_rate.epoch.env.math": 0.9750462107208873, + "success_rate.epoch.env.sat": 0.08, + "success_rate.epoch.env.science": 0.8170155300472653, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8611804383935682, + "success_rate.epoch.global": 0.8951232302045097, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9993734335839599, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 1.4081806561567958, + "grad_norm": 223.88233976494806, + "learning_rate": 3.7940764795694333e-07, + "loss": 0.3996, + "step": 6610, + "success_rate.epoch.env.abd": 0.9821428571428571, + "success_rate.epoch.env.agentgym:alfworld": 0.8715596330275229, + "success_rate.epoch.env.agentgym:sciworld": 0.952755905511811, + "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, + "success_rate.epoch.env.babyai": 0.9565217391304348, + "success_rate.epoch.env.ded": 0.9548022598870056, + "success_rate.epoch.env.logic": 0.9206896551724137, + "success_rate.epoch.env.math": 0.9751381215469613, + "success_rate.epoch.env.sat": 0.08, + "success_rate.epoch.env.science": 0.8168350168350168, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8612189228379079, + "success_rate.epoch.global": 0.8951359832635983, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9375, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9990828092243187, + "tokens_p.mean_in_band": 0.65625, + "tokens_rate.above_band": 0.9774590163934426, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.022540983606557378 + }, + { + "epoch": 1.4092458457605455, + "grad_norm": 313.68536393246194, + "learning_rate": 3.793777818230741e-07, + "loss": 0.2412, + "step": 6615, + "success_rate.epoch.env.abd": 0.9822485207100592, + "success_rate.epoch.env.agentgym:alfworld": 0.8715596330275229, + "success_rate.epoch.env.agentgym:sciworld": 0.9534883720930233, + "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, + "success_rate.epoch.env.babyai": 0.9565217391304348, + "success_rate.epoch.env.ded": 0.9548022598870056, + "success_rate.epoch.env.logic": 0.9206896551724137, + "success_rate.epoch.env.math": 0.9742883379247016, + "success_rate.epoch.env.sat": 0.08, + "success_rate.epoch.env.science": 0.8173270651443922, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8612625950956834, + "success_rate.epoch.global": 0.8951486697965572, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 0.6666666666666666, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 1.0004171914357682, + "tokens_p.mean_in_band": 0.6875, + "tokens_rate.above_band": 0.9987421383647799, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0012578616352201257 + }, + { + "epoch": 1.410311035364295, + "grad_norm": 94.8747274718839, + "learning_rate": 3.7934790156681664e-07, + "loss": 0.1694, + "step": 6620, + "success_rate.epoch.env.abd": 0.9822485207100592, + "success_rate.epoch.env.agentgym:alfworld": 0.8727272727272727, + "success_rate.epoch.env.agentgym:sciworld": 0.9534883720930233, + "success_rate.epoch.env.agentgym:textcraft": 0.9642857142857143, + "success_rate.epoch.env.babyai": 0.9565217391304348, + "success_rate.epoch.env.ded": 0.9553072625698324, + "success_rate.epoch.env.logic": 0.9209621993127147, + "success_rate.epoch.env.math": 0.9743354720439963, + "success_rate.epoch.env.sat": 0.08, + "success_rate.epoch.env.science": 0.8175720992622401, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.861586241103208, + "success_rate.epoch.global": 0.8954214360041624, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9991086862575627, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 1.4113762249680444, + "grad_norm": 72.39484835246404, + "learning_rate": 3.793180072099476e-07, + "loss": 0.1445, + "step": 6625, + "success_rate.epoch.env.abd": 0.9823529411764705, + "success_rate.epoch.env.agentgym:alfworld": 0.8727272727272727, + "success_rate.epoch.env.agentgym:sciworld": 0.9534883720930233, + "success_rate.epoch.env.agentgym:textcraft": 0.9642857142857143, + "success_rate.epoch.env.babyai": 0.9565217391304348, + "success_rate.epoch.env.ded": 0.9560439560439561, + "success_rate.epoch.env.logic": 0.9213675213675213, + "success_rate.epoch.env.math": 0.9743589743589743, + "success_rate.epoch.env.sat": 0.08, + "success_rate.epoch.env.science": 0.8176943699731903, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8617128055596872, + "success_rate.epoch.global": 0.8956927867151012, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9993564073226545, + "tokens_p.mean_in_band": 0.8515625, + "tokens_rate.above_band": 0.9994282447112636, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0005717552887364208 + }, + { + "epoch": 1.4124414145717938, + "grad_norm": 54.82186480674202, + "learning_rate": 3.792880987742537e-07, + "loss": 0.3586, + "step": 6630, + "success_rate.epoch.env.abd": 0.9823529411764705, + "success_rate.epoch.env.agentgym:alfworld": 0.8738738738738738, + "success_rate.epoch.env.agentgym:sciworld": 0.9538461538461539, + "success_rate.epoch.env.agentgym:textcraft": 0.9642857142857143, + "success_rate.epoch.env.babyai": 0.9583333333333334, + "success_rate.epoch.env.ded": 0.9560439560439561, + "success_rate.epoch.env.logic": 0.9215017064846417, + "success_rate.epoch.env.math": 0.9744058500914077, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.8180602006688963, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8617842551570477, + "success_rate.epoch.global": 0.895703933747412, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8571428571428571, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9988001745200699, + "tokens_p.mean_in_band": 0.5962611607142857, + "tokens_rate.above_band": 0.9761499148211243, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02385008517887564 + }, + { + "epoch": 1.4135066041755433, + "grad_norm": 109.14430459485727, + "learning_rate": 3.79258176281532e-07, + "loss": 0.2295, + "step": 6635, + "success_rate.epoch.env.abd": 0.9825581395348837, + "success_rate.epoch.env.agentgym:alfworld": 0.8738738738738738, + "success_rate.epoch.env.agentgym:sciworld": 0.9538461538461539, + "success_rate.epoch.env.agentgym:textcraft": 0.9642857142857143, + "success_rate.epoch.env.babyai": 0.9583333333333334, + "success_rate.epoch.env.ded": 0.9560439560439561, + "success_rate.epoch.env.logic": 0.9200680272108843, + "success_rate.epoch.env.math": 0.9745222929936306, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.8181818181818182, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8616942169297569, + "success_rate.epoch.global": 0.8957150232318017, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 1.0013521634615385, + "tokens_p.mean_in_band": 0.49383223684210525, + "tokens_rate.above_band": 0.9704510108864697, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.029548989113530325 + }, + { + "epoch": 1.4145717937792928, + "grad_norm": 236.2002781402803, + "learning_rate": 3.7922823975358987e-07, + "loss": 0.4063, + "step": 6640, + "success_rate.epoch.env.abd": 0.9827586206896551, + "success_rate.epoch.env.agentgym:alfworld": 0.8738738738738738, + "success_rate.epoch.env.agentgym:sciworld": 0.9538461538461539, + "success_rate.epoch.env.agentgym:textcraft": 0.9642857142857143, + "success_rate.epoch.env.babyai": 0.9583333333333334, + "success_rate.epoch.env.ded": 0.9560439560439561, + "success_rate.epoch.env.logic": 0.9204737732656514, + "success_rate.epoch.env.math": 0.9745685740236149, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.8178785857238159, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8617259692735314, + "success_rate.epoch.global": 0.8957260556127703, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9956210191082803, + "tokens_p.mean_in_band": 0.5041852678571429, + "tokens_rate.above_band": 0.9573170731707317, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.042682926829268296 + }, + { + "epoch": 1.4156369833830422, + "grad_norm": 37.2934130103539, + "learning_rate": 3.791982892122448e-07, + "loss": 0.2961, + "step": 6645, + "success_rate.epoch.env.abd": 0.9828571428571429, + "success_rate.epoch.env.agentgym:alfworld": 0.8738738738738738, + "success_rate.epoch.env.agentgym:sciworld": 0.9538461538461539, + "success_rate.epoch.env.agentgym:textcraft": 0.9642857142857143, + "success_rate.epoch.env.babyai": 0.9583333333333334, + "success_rate.epoch.env.ded": 0.9565217391304348, + "success_rate.epoch.env.logic": 0.9204737732656514, + "success_rate.epoch.env.math": 0.9746376811594203, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.8170326014637392, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8617077354671402, + "success_rate.epoch.global": 0.8954802259887006, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9987359550561797, + "tokens_p.mean_in_band": 0.50439453125, + "tokens_rate.above_band": 0.96529284164859, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03470715835140998 + }, + { + "epoch": 1.4167021729867917, + "grad_norm": 260.61858205434055, + "learning_rate": 3.7916832467932453e-07, + "loss": 0.1875, + "step": 6650, + "success_rate.epoch.env.abd": 0.9830508474576272, + "success_rate.epoch.env.agentgym:alfworld": 0.8738738738738738, + "success_rate.epoch.env.agentgym:sciworld": 0.9538461538461539, + "success_rate.epoch.env.agentgym:textcraft": 0.9642857142857143, + "success_rate.epoch.env.babyai": 0.9583333333333334, + "success_rate.epoch.env.ded": 0.9567567567567568, + "success_rate.epoch.env.logic": 0.9204737732656514, + "success_rate.epoch.env.math": 0.9746835443037974, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.8163129973474801, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8616854610357696, + "success_rate.epoch.global": 0.8952356557377049, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6, + "success_rate.window.env_macro_mean": 0.9, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9954637096774194, + "tokens_p.mean_in_band": 0.5845170454545454, + "tokens_rate.above_band": 0.9712793733681462, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.028720626631853787 + }, + { + "epoch": 1.4177673625905411, + "grad_norm": 10.648756138245203, + "learning_rate": 3.791383461766669e-07, + "loss": 0.2567, + "step": 6655, + "success_rate.epoch.env.abd": 0.9830508474576272, + "success_rate.epoch.env.agentgym:alfworld": 0.875, + "success_rate.epoch.env.agentgym:sciworld": 0.9541984732824428, + "success_rate.epoch.env.agentgym:textcraft": 0.9642857142857143, + "success_rate.epoch.env.babyai": 0.9583333333333334, + "success_rate.epoch.env.ded": 0.9567567567567568, + "success_rate.epoch.env.logic": 0.9191919191919192, + "success_rate.epoch.env.math": 0.974706413730804, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.8166776968894772, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8617385665319229, + "success_rate.epoch.global": 0.8952478283086357, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 0.6666666666666666, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 0.9444444444444443, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9988435660218671, + "tokens_p.mean_in_band": 0.5840567129629629, + "tokens_rate.above_band": 0.977796052631579, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.022203947368421052 + }, + { + "epoch": 1.4188325521942906, + "grad_norm": 312.8146916147841, + "learning_rate": 3.791083537261202e-07, + "loss": 0.2469, + "step": 6660, + "success_rate.epoch.env.abd": 0.9830508474576272, + "success_rate.epoch.env.agentgym:alfworld": 0.875, + "success_rate.epoch.env.agentgym:sciworld": 0.9541984732824428, + "success_rate.epoch.env.agentgym:textcraft": 0.9642857142857143, + "success_rate.epoch.env.babyai": 0.9583333333333334, + "success_rate.epoch.env.ded": 0.9567567567567568, + "success_rate.epoch.env.logic": 0.9195979899497487, + "success_rate.epoch.env.math": 0.9738503155996393, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.8172823218997362, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8617526208625523, + "success_rate.epoch.global": 0.8952599388379205, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.5, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8333333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9929315476190477, + "tokens_p.mean_in_band": 0.64453125, + "tokens_rate.above_band": 0.9824561403508771, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.017543859649122806 + }, + { + "epoch": 1.41989774179804, + "grad_norm": 230.53629946637415, + "learning_rate": 3.790783473495425e-07, + "loss": 0.1912, + "step": 6665, + "success_rate.epoch.env.abd": 0.9830508474576272, + "success_rate.epoch.env.agentgym:alfworld": 0.875, + "success_rate.epoch.env.agentgym:sciworld": 0.9541984732824428, + "success_rate.epoch.env.agentgym:textcraft": 0.9642857142857143, + "success_rate.epoch.env.babyai": 0.9583333333333334, + "success_rate.epoch.env.ded": 0.9518716577540107, + "success_rate.epoch.env.logic": 0.919732441471572, + "success_rate.epoch.env.math": 0.9739208633093526, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.8177631578947369, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8613708696101697, + "success_rate.epoch.global": 0.8952719877986782, + "success_rate.window.env.ded": 0.5, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9956005204053684, + "tokens_p.mean_below_band": 1.525040715932846e-08, + "tokens_p.mean_in_band": 0.5325520833333334, + "tokens_rate.above_band": 0.8959509202453988, + "tokens_rate.below_band": 0.000245398773006135, + "tokens_rate.in_band": 0.10380368098159509 + }, + { + "epoch": 1.4209629314017895, + "grad_norm": 216.84921308729815, + "learning_rate": 3.7904832706880244e-07, + "loss": 0.3145, + "step": 6670, + "success_rate.epoch.env.abd": 0.9831460674157303, + "success_rate.epoch.env.agentgym:alfworld": 0.875, + "success_rate.epoch.env.agentgym:sciworld": 0.9541984732824428, + "success_rate.epoch.env.agentgym:textcraft": 0.9642857142857143, + "success_rate.epoch.env.babyai": 0.9583333333333334, + "success_rate.epoch.env.ded": 0.9518716577540107, + "success_rate.epoch.env.logic": 0.92, + "success_rate.epoch.env.math": 0.9740143369175627, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.8181221273801708, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8614449806629131, + "success_rate.epoch.global": 0.8955375253549696, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9894021739130435, + "tokens_p.mean_in_band": 0.685546875, + "tokens_rate.above_band": 0.9829059829059829, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.017094017094017096 + }, + { + "epoch": 1.422028121005539, + "grad_norm": 159.3070869283212, + "learning_rate": 3.790182929057785e-07, + "loss": 0.1988, + "step": 6675, + "success_rate.epoch.env.abd": 0.9831460674157303, + "success_rate.epoch.env.agentgym:alfworld": 0.875, + "success_rate.epoch.env.agentgym:sciworld": 0.9541984732824428, + "success_rate.epoch.env.agentgym:textcraft": 0.9642857142857143, + "success_rate.epoch.env.babyai": 0.9583333333333334, + "success_rate.epoch.env.ded": 0.9521276595744681, + "success_rate.epoch.env.logic": 0.92, + "success_rate.epoch.env.math": 0.9741532976827094, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.8171690694626474, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8613942447236476, + "success_rate.epoch.global": 0.8952959028831563, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.3333333333333333, + "success_rate.window.env_macro_mean": 0.7777777777777777, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9978316326530612, + "tokens_p.mean_in_band": 0.6150173611111112, + "tokens_rate.above_band": 0.9645669291338582, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03543307086614173 + }, + { + "epoch": 1.4230933106092885, + "grad_norm": 148.83961974942127, + "learning_rate": 3.789882448823593e-07, + "loss": 0.304, + "step": 6680, + "success_rate.epoch.env.abd": 0.9832402234636871, + "success_rate.epoch.env.agentgym:alfworld": 0.875, + "success_rate.epoch.env.agentgym:sciworld": 0.9541984732824428, + "success_rate.epoch.env.agentgym:textcraft": 0.9642857142857143, + "success_rate.epoch.env.babyai": 0.9583333333333334, + "success_rate.epoch.env.ded": 0.9523809523809523, + "success_rate.epoch.env.logic": 0.92, + "success_rate.epoch.env.math": 0.9742222222222222, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.8171129980404964, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8614269994483568, + "success_rate.epoch.global": 0.8953077699293642, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9944968553459119, + "tokens_p.mean_in_band": 0.5299479166666666, + "tokens_rate.above_band": 0.9137931034482759, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.08620689655172414 + }, + { + "epoch": 1.424158500213038, + "grad_norm": 106.54259448152446, + "learning_rate": 3.7895818302044375e-07, + "loss": 0.2162, + "step": 6685, + "success_rate.epoch.env.abd": 0.9832402234636871, + "success_rate.epoch.env.agentgym:alfworld": 0.875, + "success_rate.epoch.env.agentgym:sciworld": 0.9545454545454546, + "success_rate.epoch.env.agentgym:textcraft": 0.9642857142857143, + "success_rate.epoch.env.babyai": 0.9583333333333334, + "success_rate.epoch.env.ded": 0.9526315789473684, + "success_rate.epoch.env.logic": 0.9188741721854304, + "success_rate.epoch.env.math": 0.974267968056788, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.817351598173516, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8614048290831243, + "success_rate.epoch.global": 0.895319577252139, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.75, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9976531620553359, + "tokens_p.mean_in_band": 0.7421875, + "tokens_rate.above_band": 0.9902152641878669, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.009784735812133072 + }, + { + "epoch": 1.4252236898167874, + "grad_norm": 25.894717203224385, + "learning_rate": 3.789281073419406e-07, + "loss": 0.2225, + "step": 6690, + "success_rate.epoch.env.abd": 0.9833333333333333, + "success_rate.epoch.env.agentgym:alfworld": 0.875, + "success_rate.epoch.env.agentgym:sciworld": 0.9545454545454546, + "success_rate.epoch.env.agentgym:textcraft": 0.9642857142857143, + "success_rate.epoch.env.babyai": 0.9583333333333334, + "success_rate.epoch.env.ded": 0.9526315789473684, + "success_rate.epoch.env.logic": 0.9188741721854304, + "success_rate.epoch.env.math": 0.974267968056788, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.8184176394293126, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8615102064581648, + "success_rate.epoch.global": 0.8955823293172691, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9951746323529411, + "tokens_p.mean_in_band": 0.740234375, + "tokens_rate.above_band": 0.9444444444444444, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.05555555555555555 + }, + { + "epoch": 1.4262888794205368, + "grad_norm": 94.23598100955417, + "learning_rate": 3.788980178687689e-07, + "loss": 0.346, + "step": 6695, + "success_rate.epoch.env.abd": 0.9833333333333333, + "success_rate.epoch.env.agentgym:alfworld": 0.875, + "success_rate.epoch.env.agentgym:sciworld": 0.9545454545454546, + "success_rate.epoch.env.agentgym:textcraft": 0.9642857142857143, + "success_rate.epoch.env.babyai": 0.9583333333333334, + "success_rate.epoch.env.ded": 0.9526315789473684, + "success_rate.epoch.env.logic": 0.9191419141914191, + "success_rate.epoch.env.math": 0.9743589743589743, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.8182406209573092, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8615267273523624, + "success_rate.epoch.global": 0.8955933900851277, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9915540540540541, + "tokens_p.mean_in_band": 0.294921875, + "tokens_rate.above_band": 0.9866666666666667, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.013333333333333334 + }, + { + "epoch": 1.4273540690242863, + "grad_norm": 73.87194091231333, + "learning_rate": 3.7886791462285753e-07, + "loss": 0.2362, + "step": 6700, + "success_rate.epoch.env.abd": 0.9833333333333333, + "success_rate.epoch.env.agentgym:alfworld": 0.875, + "success_rate.epoch.env.agentgym:sciworld": 0.9545454545454546, + "success_rate.epoch.env.agentgym:textcraft": 0.9655172413793104, + "success_rate.epoch.env.babyai": 0.9583333333333334, + "success_rate.epoch.env.ded": 0.9528795811518325, + "success_rate.epoch.env.logic": 0.9194078947368421, + "success_rate.epoch.env.math": 0.9744493392070485, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.8178294573643411, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8616562465431431, + "success_rate.epoch.global": 0.8956043956043956, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.9, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9975671140939597, + "tokens_p.mean_in_band": 0.6646205357142857, + "tokens_rate.above_band": 0.9906914893617021, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.009308510638297872 + }, + { + "epoch": 1.4284192586280358, + "grad_norm": 152.62958825739872, + "learning_rate": 3.788377976261456e-07, + "loss": 0.2612, + "step": 6705, + "success_rate.epoch.env.abd": 0.9833333333333333, + "success_rate.epoch.env.agentgym:alfworld": 0.875, + "success_rate.epoch.env.agentgym:sciworld": 0.9545454545454546, + "success_rate.epoch.env.agentgym:textcraft": 0.9655172413793104, + "success_rate.epoch.env.babyai": 0.9583333333333334, + "success_rate.epoch.env.ded": 0.953125, + "success_rate.epoch.env.logic": 0.9198036006546645, + "success_rate.epoch.env.math": 0.9744942832014072, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.8170103092783505, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8616441484226303, + "success_rate.epoch.global": 0.8953662182361734, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9974343185550082, + "tokens_p.mean_in_band": 0.677734375, + "tokens_rate.above_band": 0.9744, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0256 + }, + { + "epoch": 1.4294844482317852, + "grad_norm": 176.73531538762126, + "learning_rate": 3.788076669005821e-07, + "loss": 0.2004, + "step": 6710, + "success_rate.epoch.env.abd": 0.9833333333333333, + "success_rate.epoch.env.agentgym:alfworld": 0.8761061946902655, + "success_rate.epoch.env.agentgym:sciworld": 0.9545454545454546, + "success_rate.epoch.env.agentgym:textcraft": 0.9655172413793104, + "success_rate.epoch.env.babyai": 0.9583333333333334, + "success_rate.epoch.env.ded": 0.953125, + "success_rate.epoch.env.logic": 0.9199346405228758, + "success_rate.epoch.env.math": 0.9745836985100789, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.817363344051447, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8617968470262888, + "success_rate.epoch.global": 0.8956262425447317, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9978595890410958, + "tokens_p.mean_in_band": 0.84765625, + "tokens_rate.above_band": 0.9984802431610942, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.001519756838905775 + }, + { + "epoch": 1.4305496378355347, + "grad_norm": 110.1182691765544, + "learning_rate": 3.787775224681261e-07, + "loss": 0.2539, + "step": 6715, + "success_rate.epoch.env.abd": 0.9834254143646409, + "success_rate.epoch.env.agentgym:alfworld": 0.8761061946902655, + "success_rate.epoch.env.agentgym:sciworld": 0.9545454545454546, + "success_rate.epoch.env.agentgym:textcraft": 0.9655172413793104, + "success_rate.epoch.env.babyai": 0.9583333333333334, + "success_rate.epoch.env.ded": 0.9533678756476683, + "success_rate.epoch.env.logic": 0.9201954397394136, + "success_rate.epoch.env.math": 0.9746724890829694, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.8175979447655748, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8618804058610645, + "success_rate.epoch.global": 0.895884977689638, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9981715425531915, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 1.4316148274392841, + "grad_norm": 86.2750031817364, + "learning_rate": 3.7874736435074675e-07, + "loss": 0.2888, + "step": 6720, + "success_rate.epoch.env.abd": 0.9836065573770492, + "success_rate.epoch.env.agentgym:alfworld": 0.8761061946902655, + "success_rate.epoch.env.agentgym:sciworld": 0.9545454545454546, + "success_rate.epoch.env.agentgym:textcraft": 0.9655172413793104, + "success_rate.epoch.env.babyai": 0.9583333333333334, + "success_rate.epoch.env.ded": 0.9533678756476683, + "success_rate.epoch.env.logic": 0.9201954397394136, + "success_rate.epoch.env.math": 0.9747386759581882, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.8175416133162612, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8618977693554566, + "success_rate.epoch.global": 0.895895153313551, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9945211038961039, + "tokens_p.mean_in_band": 0.7151988636363636, + "tokens_rate.above_band": 0.9333333333333333, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.06666666666666667 + }, + { + "epoch": 1.4326800170430336, + "grad_norm": 518.0878196517226, + "learning_rate": 3.7871719257042293e-07, + "loss": 0.3413, + "step": 6725, + "success_rate.epoch.env.abd": 0.9836065573770492, + "success_rate.epoch.env.agentgym:alfworld": 0.8761061946902655, + "success_rate.epoch.env.agentgym:sciworld": 0.9545454545454546, + "success_rate.epoch.env.agentgym:textcraft": 0.9655172413793104, + "success_rate.epoch.env.babyai": 0.9583333333333334, + "success_rate.epoch.env.ded": 0.9540816326530612, + "success_rate.epoch.env.logic": 0.9204545454545454, + "success_rate.epoch.env.math": 0.9748045178105995, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.8171355498721228, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8619552821853471, + "success_rate.epoch.global": 0.8959052787370498, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9978187106156083, + "tokens_p.mean_in_band": 0.732421875, + "tokens_rate.above_band": 0.9980648282535075, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0019351717464925011 + }, + { + "epoch": 1.433745206646783, + "grad_norm": 198.5140189718881, + "learning_rate": 3.7868700714914366e-07, + "loss": 0.2179, + "step": 6730, + "success_rate.epoch.env.abd": 0.9836065573770492, + "success_rate.epoch.env.agentgym:alfworld": 0.8761061946902655, + "success_rate.epoch.env.agentgym:sciworld": 0.9545454545454546, + "success_rate.epoch.env.agentgym:textcraft": 0.9655172413793104, + "success_rate.epoch.env.babyai": 0.9583333333333334, + "success_rate.epoch.env.ded": 0.9540816326530612, + "success_rate.epoch.env.logic": 0.9207119741100324, + "success_rate.epoch.env.math": 0.9748263888888888, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.8179503500954806, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.862054745817814, + "success_rate.epoch.global": 0.8961614173228346, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.99836867862969, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 1.4348103962505325, + "grad_norm": 88.38338475919998, + "learning_rate": 3.786568081089079e-07, + "loss": 0.1391, + "step": 6735, + "success_rate.epoch.env.abd": 0.9836065573770492, + "success_rate.epoch.env.agentgym:alfworld": 0.8761061946902655, + "success_rate.epoch.env.agentgym:sciworld": 0.9548872180451128, + "success_rate.epoch.env.agentgym:textcraft": 0.9655172413793104, + "success_rate.epoch.env.babyai": 0.9583333333333334, + "success_rate.epoch.env.ded": 0.9540816326530612, + "success_rate.epoch.env.logic": 0.9209677419354839, + "success_rate.epoch.env.math": 0.9749134948096886, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.818297331639136, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8621485293441381, + "success_rate.epoch.global": 0.8964162984781542, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9964171974522293, + "tokens_p.mean_in_band": 0.730078125, + "tokens_rate.above_band": 0.9401197604790419, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.059880239520958084 + }, + { + "epoch": 1.435875585854282, + "grad_norm": 179.15100171992324, + "learning_rate": 3.7862659547172443e-07, + "loss": 0.3668, + "step": 6740, + "success_rate.epoch.env.abd": 0.9836065573770492, + "success_rate.epoch.env.agentgym:alfworld": 0.8761061946902655, + "success_rate.epoch.env.agentgym:sciworld": 0.9548872180451128, + "success_rate.epoch.env.agentgym:textcraft": 0.9655172413793104, + "success_rate.epoch.env.babyai": 0.9583333333333334, + "success_rate.epoch.env.ded": 0.9540816326530612, + "success_rate.epoch.env.logic": 0.9209677419354839, + "success_rate.epoch.env.math": 0.975, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.8189873417721519, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8622191216462588, + "success_rate.epoch.global": 0.8966699314397649, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.99375, + "tokens_p.mean_in_band": 0.875, + "tokens_rate.above_band": 0.9923664122137404, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.007633587786259542 + }, + { + "epoch": 1.4369407754580314, + "grad_norm": 99.12112667556465, + "learning_rate": 3.78596369259612e-07, + "loss": 0.2861, + "step": 6745, + "success_rate.epoch.env.abd": 0.9836065573770492, + "success_rate.epoch.env.agentgym:alfworld": 0.8761061946902655, + "success_rate.epoch.env.agentgym:sciworld": 0.9548872180451128, + "success_rate.epoch.env.agentgym:textcraft": 0.9655172413793104, + "success_rate.epoch.env.babyai": 0.9583333333333334, + "success_rate.epoch.env.ded": 0.9543147208121827, + "success_rate.epoch.env.logic": 0.9212218649517685, + "success_rate.epoch.env.math": 0.9750215331610681, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.819672131147541, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8623276247109736, + "success_rate.epoch.global": 0.8969223253541768, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9970128676470589, + "tokens_p.mean_in_band": 0.828125, + "tokens_rate.above_band": 0.9927007299270073, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0072992700729927005 + }, + { + "epoch": 1.4380059650617811, + "grad_norm": 255.09485764190708, + "learning_rate": 3.785661294945994e-07, + "loss": 0.2561, + "step": 6750, + "success_rate.epoch.env.abd": 0.9836065573770492, + "success_rate.epoch.env.agentgym:alfworld": 0.8761061946902655, + "success_rate.epoch.env.agentgym:sciworld": 0.9552238805970149, + "success_rate.epoch.env.agentgym:textcraft": 0.9655172413793104, + "success_rate.epoch.env.babyai": 0.9583333333333334, + "success_rate.epoch.env.ded": 0.9543147208121827, + "success_rate.epoch.env.logic": 0.92, + "success_rate.epoch.env.math": 0.9750859106529209, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.8200125865324103, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8622839547543241, + "success_rate.epoch.global": 0.8969298245614035, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 0.6666666666666666, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9946120689655172, + "tokens_p.mean_in_band": 0.6243489583333334, + "tokens_rate.above_band": 0.9354838709677419, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.06451612903225806 + }, + { + "epoch": 1.4390711546655304, + "grad_norm": 156.33161177317115, + "learning_rate": 3.7853587619872496e-07, + "loss": 0.5939, + "step": 6755, + "success_rate.epoch.env.abd": 0.9836065573770492, + "success_rate.epoch.env.agentgym:alfworld": 0.8761061946902655, + "success_rate.epoch.env.agentgym:sciworld": 0.9552238805970149, + "success_rate.epoch.env.agentgym:textcraft": 0.9655172413793104, + "success_rate.epoch.env.babyai": 0.9583333333333334, + "success_rate.epoch.env.ded": 0.9543147208121827, + "success_rate.epoch.env.logic": 0.9202551834130781, + "success_rate.epoch.env.math": 0.9751499571550986, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.8193224592220828, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8622502368093177, + "success_rate.epoch.global": 0.8966942148760331, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6, + "success_rate.window.env_macro_mean": 0.8666666666666667, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9929123711340206, + "tokens_p.mean_in_band": 0.50225830078125, + "tokens_rate.above_band": 0.9065420560747663, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.09345794392523364 + }, + { + "epoch": 1.44013634426928, + "grad_norm": 97.21188495845038, + "learning_rate": 3.7850560939403716e-07, + "loss": 0.1818, + "step": 6760, + "success_rate.epoch.env.abd": 0.9837837837837838, + "success_rate.epoch.env.agentgym:alfworld": 0.868421052631579, + "success_rate.epoch.env.agentgym:sciworld": 0.9552238805970149, + "success_rate.epoch.env.agentgym:textcraft": 0.9666666666666667, + "success_rate.epoch.env.babyai": 0.9583333333333334, + "success_rate.epoch.env.ded": 0.9545454545454546, + "success_rate.epoch.env.logic": 0.9202551834130781, + "success_rate.epoch.env.math": 0.9743150684931506, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.8197747183979975, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8616583835259214, + "success_rate.epoch.global": 0.8964597478176528, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 0.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 0.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.6666666666666666, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9994347545219638, + "tokens_p.mean_in_band": 0.58984375, + "tokens_rate.above_band": 0.9948586118251928, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.005141388174807198 + }, + { + "epoch": 1.4412015338730293, + "grad_norm": 201.73881886585724, + "learning_rate": 3.7847532910259425e-07, + "loss": 0.2903, + "step": 6765, + "success_rate.epoch.env.abd": 0.9837837837837838, + "success_rate.epoch.env.agentgym:alfworld": 0.868421052631579, + "success_rate.epoch.env.agentgym:sciworld": 0.9552238805970149, + "success_rate.epoch.env.agentgym:textcraft": 0.9666666666666667, + "success_rate.epoch.env.babyai": 0.9583333333333334, + "success_rate.epoch.env.ded": 0.9545454545454546, + "success_rate.epoch.env.logic": 0.919047619047619, + "success_rate.epoch.env.math": 0.9743808710503843, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.8202247191011236, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8615954961527307, + "success_rate.epoch.global": 0.8964683115626512, + "success_rate.window.env.logic": 0.6666666666666666, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8888888888888888, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 1.0003355061349692, + "tokens_p.mean_in_band": 0.52125, + "tokens_rate.above_band": 0.9630723781388478, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03692762186115214 + }, + { + "epoch": 1.442266723476779, + "grad_norm": 41.393958439182995, + "learning_rate": 3.7844503534646426e-07, + "loss": 0.2617, + "step": 6770, + "success_rate.epoch.env.abd": 0.9837837837837838, + "success_rate.epoch.env.agentgym:alfworld": 0.868421052631579, + "success_rate.epoch.env.agentgym:sciworld": 0.9552238805970149, + "success_rate.epoch.env.agentgym:textcraft": 0.9666666666666667, + "success_rate.epoch.env.babyai": 0.9583333333333334, + "success_rate.epoch.env.ded": 0.9547738693467337, + "success_rate.epoch.env.logic": 0.9177215189873418, + "success_rate.epoch.env.math": 0.9744245524296675, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.820784069695084, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8615505276722074, + "success_rate.epoch.global": 0.896476833976834, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9997803514376997, + "tokens_p.mean_in_band": 0.47709517045454547, + "tokens_rate.above_band": 0.9861373660995589, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.013862633900441084 + }, + { + "epoch": 1.4433319130805282, + "grad_norm": 68.68731784915566, + "learning_rate": 3.78414728147725e-07, + "loss": 0.1593, + "step": 6775, + "success_rate.epoch.env.abd": 0.9838709677419355, + "success_rate.epoch.env.agentgym:alfworld": 0.868421052631579, + "success_rate.epoch.env.agentgym:sciworld": 0.9552238805970149, + "success_rate.epoch.env.agentgym:textcraft": 0.9666666666666667, + "success_rate.epoch.env.babyai": 0.9583333333333334, + "success_rate.epoch.env.ded": 0.9547738693467337, + "success_rate.epoch.env.logic": 0.9177215189873418, + "success_rate.epoch.env.math": 0.9744680851063829, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.8215613382899628, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8616330717840025, + "success_rate.epoch.global": 0.8967260471834376, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9921875, + "tokens_p.mean_in_band": 0.8681640625, + "tokens_rate.above_band": 0.9615384615384616, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.038461538461538464 + }, + { + "epoch": 1.444397102684278, + "grad_norm": 68.1099058041425, + "learning_rate": 3.7838440752846426e-07, + "loss": 0.2981, + "step": 6780, + "success_rate.epoch.env.abd": 0.9838709677419355, + "success_rate.epoch.env.agentgym:alfworld": 0.8620689655172413, + "success_rate.epoch.env.agentgym:sciworld": 0.9555555555555556, + "success_rate.epoch.env.agentgym:textcraft": 0.967741935483871, + "success_rate.epoch.env.babyai": 0.9583333333333334, + "success_rate.epoch.env.ded": 0.9547738693467337, + "success_rate.epoch.env.logic": 0.9162717219589257, + "success_rate.epoch.env.math": 0.9745114698385726, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.8218923933209648, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.861085753547292, + "success_rate.epoch.global": 0.8964937560038425, + "success_rate.window.env.agentgym:alfworld": 0.5, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.75, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9958477135461605, + "tokens_p.mean_in_band": 0.7134765625, + "tokens_rate.above_band": 0.9914456800684346, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.00855431993156544 + }, + { + "epoch": 1.4454622922880271, + "grad_norm": 32.46617417449706, + "learning_rate": 3.783540735107794e-07, + "loss": 0.3274, + "step": 6785, + "success_rate.epoch.env.abd": 0.9838709677419355, + "success_rate.epoch.env.agentgym:alfworld": 0.8632478632478633, + "success_rate.epoch.env.agentgym:sciworld": 0.9555555555555556, + "success_rate.epoch.env.agentgym:textcraft": 0.967741935483871, + "success_rate.epoch.env.babyai": 0.9583333333333334, + "success_rate.epoch.env.ded": 0.9547738693467337, + "success_rate.epoch.env.logic": 0.916403785488959, + "success_rate.epoch.env.math": 0.9745977984758679, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.8223318938926588, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8612527344990778, + "success_rate.epoch.global": 0.896741734547197, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9944852941176471, + "tokens_p.mean_in_band": 0.87109375, + "tokens_rate.above_band": 0.9870967741935484, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.012903225806451613 + }, + { + "epoch": 1.4465274818917768, + "grad_norm": 69.98733159466664, + "learning_rate": 3.7832372611677766e-07, + "loss": 0.2678, + "step": 6790, + "success_rate.epoch.env.abd": 0.9838709677419355, + "success_rate.epoch.env.agentgym:alfworld": 0.8632478632478633, + "success_rate.epoch.env.agentgym:sciworld": 0.9558823529411765, + "success_rate.epoch.env.agentgym:textcraft": 0.967741935483871, + "success_rate.epoch.env.babyai": 0.9583333333333334, + "success_rate.epoch.env.ded": 0.9552238805970149, + "success_rate.epoch.env.logic": 0.916403785488959, + "success_rate.epoch.env.math": 0.9738396624472574, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.8220443349753694, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.861228290289078, + "success_rate.epoch.global": 0.8965105162523901, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 0.75, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.8541666666666666, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9956232492997199, + "tokens_p.mean_in_band": 0.5857319078947368, + "tokens_rate.above_band": 0.949468085106383, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.05053191489361702 + }, + { + "epoch": 1.4475926714955263, + "grad_norm": 67.61420977236945, + "learning_rate": 3.78293365368576e-07, + "loss": 0.2082, + "step": 6795, + "success_rate.epoch.env.abd": 0.9838709677419355, + "success_rate.epoch.env.agentgym:alfworld": 0.8632478632478633, + "success_rate.epoch.env.agentgym:sciworld": 0.9571428571428572, + "success_rate.epoch.env.agentgym:textcraft": 0.967741935483871, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9552238805970149, + "success_rate.epoch.env.logic": 0.9165354330708662, + "success_rate.epoch.env.math": 0.9738617200674536, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.8223724646588814, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8615381999030745, + "success_rate.epoch.global": 0.896757272293753, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9987348178137652, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 1.4486578610992757, + "grad_norm": 541.1037833542614, + "learning_rate": 3.7826299128830116e-07, + "loss": 0.5228, + "step": 6800, + "success_rate.epoch.env.abd": 0.983957219251337, + "success_rate.epoch.env.agentgym:alfworld": 0.8632478632478633, + "success_rate.epoch.env.agentgym:sciworld": 0.9571428571428572, + "success_rate.epoch.env.agentgym:textcraft": 0.967741935483871, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9552238805970149, + "success_rate.epoch.env.logic": 0.9169278996865203, + "success_rate.epoch.env.math": 0.9739276703111859, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.8226993865030675, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8616174353769814, + "success_rate.epoch.global": 0.8970028544243578, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9947916666666666, + "tokens_p.mean_in_band": 0.8623046875, + "tokens_rate.above_band": 0.9545454545454546, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.045454545454545456 + }, + { + "epoch": 1.4497230507030252, + "grad_norm": 83.02353934640807, + "learning_rate": 3.782326038980895e-07, + "loss": 0.3023, + "step": 6805, + "success_rate.epoch.env.abd": 0.983957219251337, + "success_rate.epoch.env.agentgym:alfworld": 0.8632478632478633, + "success_rate.epoch.env.agentgym:sciworld": 0.9571428571428572, + "success_rate.epoch.env.agentgym:textcraft": 0.96875, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9552238805970149, + "success_rate.epoch.env.logic": 0.917057902973396, + "success_rate.epoch.env.math": 0.9740150880134115, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.8219094247246022, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8616570284430509, + "success_rate.epoch.global": 0.8967726625533935, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9952380952380953, + "tokens_p.mean_in_band": 0.41268382352941174, + "tokens_rate.above_band": 0.9251101321585903, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.07488986784140969 + }, + { + "epoch": 1.4507882403067747, + "grad_norm": 239.44035564636363, + "learning_rate": 3.782022032200871e-07, + "loss": 0.3865, + "step": 6810, + "success_rate.epoch.env.abd": 0.983957219251337, + "success_rate.epoch.env.agentgym:alfworld": 0.864406779661017, + "success_rate.epoch.env.agentgym:sciworld": 0.9574468085106383, + "success_rate.epoch.env.agentgym:textcraft": 0.96875, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9554455445544554, + "success_rate.epoch.env.logic": 0.9173166926677067, + "success_rate.epoch.env.math": 0.9740802675585284, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.8215158924205379, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8618038437770271, + "success_rate.epoch.global": 0.896780303030303, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9989669421487604, + "tokens_p.mean_in_band": 0.3152043269230769, + "tokens_rate.above_band": 0.9789644012944984, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.021035598705501618 + }, + { + "epoch": 1.4518534299105241, + "grad_norm": 114.94718312539281, + "learning_rate": 3.7817178927644983e-07, + "loss": 0.1674, + "step": 6815, + "success_rate.epoch.env.abd": 0.9842105263157894, + "success_rate.epoch.env.agentgym:alfworld": 0.864406779661017, + "success_rate.epoch.env.agentgym:sciworld": 0.9574468085106383, + "success_rate.epoch.env.agentgym:textcraft": 0.96875, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9554455445544554, + "success_rate.epoch.env.logic": 0.9174454828660437, + "success_rate.epoch.env.math": 0.9741019214703425, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.8220597196831201, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8618899872713167, + "success_rate.epoch.global": 0.8970240906943788, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9984903381642513, + "tokens_p.mean_in_band": 0.7921875, + "tokens_rate.above_band": 0.9764150943396226, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02358490566037736 + }, + { + "epoch": 1.4529186195142736, + "grad_norm": 164.55254171959837, + "learning_rate": 3.7814136208934306e-07, + "loss": 0.2339, + "step": 6820, + "success_rate.epoch.env.abd": 0.9842931937172775, + "success_rate.epoch.env.agentgym:alfworld": 0.8571428571428571, + "success_rate.epoch.env.agentgym:sciworld": 0.9574468085106383, + "success_rate.epoch.env.agentgym:textcraft": 0.96875, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9554455445544554, + "success_rate.epoch.env.logic": 0.9174454828660437, + "success_rate.epoch.env.math": 0.974188176519567, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.8218844984802431, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8612290580649234, + "success_rate.epoch.global": 0.8967954759660698, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.6875, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9977064220183486, + "tokens_p.mean_in_band": 0.6595052083333334, + "tokens_rate.above_band": 0.9732142857142857, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.026785714285714284 + }, + { + "epoch": 1.453983809118023, + "grad_norm": 78.15388922492372, + "learning_rate": 3.78110921680942e-07, + "loss": 0.3585, + "step": 6825, + "success_rate.epoch.env.abd": 0.984375, + "success_rate.epoch.env.agentgym:alfworld": 0.8571428571428571, + "success_rate.epoch.env.agentgym:sciworld": 0.9574468085106383, + "success_rate.epoch.env.agentgym:textcraft": 0.96875, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9554455445544554, + "success_rate.epoch.env.logic": 0.9178294573643411, + "success_rate.epoch.env.math": 0.9742096505823628, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.8218181818181818, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8612673251723557, + "success_rate.epoch.global": 0.8968030089327692, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9956521739130435, + "tokens_p.mean_in_band": 0.46205357142857145, + "tokens_rate.above_band": 0.9426229508196722, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.05737704918032787 + }, + { + "epoch": 1.4550489987217725, + "grad_norm": 123.26141996969243, + "learning_rate": 3.780804680734314e-07, + "loss": 0.3277, + "step": 6830, + "success_rate.epoch.env.abd": 0.984375, + "success_rate.epoch.env.agentgym:alfworld": 0.8583333333333333, + "success_rate.epoch.env.agentgym:sciworld": 0.9574468085106383, + "success_rate.epoch.env.agentgym:textcraft": 0.96875, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9554455445544554, + "success_rate.epoch.env.logic": 0.9180834621329211, + "success_rate.epoch.env.math": 0.9734660033167496, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.822141560798548, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8613604354154293, + "success_rate.epoch.global": 0.8968105065666041, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.75, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9375, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9985283933518005, + "tokens_p.mean_in_band": 0.6070963541666666, + "tokens_rate.above_band": 0.967828418230563, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.032171581769437 + }, + { + "epoch": 1.456114188325522, + "grad_norm": 100.71200498254407, + "learning_rate": 3.780500012890056e-07, + "loss": 0.2128, + "step": 6835, + "success_rate.epoch.env.abd": 0.984375, + "success_rate.epoch.env.agentgym:alfworld": 0.8583333333333333, + "success_rate.epoch.env.agentgym:sciworld": 0.9574468085106383, + "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9556650246305419, + "success_rate.epoch.env.logic": 0.9185867895545314, + "success_rate.epoch.env.math": 0.9735099337748344, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.8223564954682779, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8615357665356549, + "success_rate.epoch.global": 0.8970519419747309, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.999379652605459, + "tokens_p.mean_in_band": 0.833984375, + "tokens_rate.above_band": 0.9983484723369116, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0016515276630883566 + }, + { + "epoch": 1.4571793779292714, + "grad_norm": 210.8350261682488, + "learning_rate": 3.7801952134986855e-07, + "loss": 0.3259, + "step": 6840, + "success_rate.epoch.env.abd": 0.984375, + "success_rate.epoch.env.agentgym:alfworld": 0.8583333333333333, + "success_rate.epoch.env.agentgym:sciworld": 0.9577464788732394, + "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9514563106796117, + "success_rate.epoch.env.logic": 0.9187116564417178, + "success_rate.epoch.env.math": 0.9735099337748344, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.8228915662650602, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8612403932716223, + "success_rate.epoch.global": 0.8970588235294118, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 0.6666666666666666, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9948628048780488, + "tokens_p.mean_in_band": 0.6137806792237442, + "tokens_rate.above_band": 0.9034817100044072, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.09651828999559277 + }, + { + "epoch": 1.458244567533021, + "grad_norm": 87.25010672042312, + "learning_rate": 3.779890282782339e-07, + "loss": 0.3844, + "step": 6845, + "success_rate.epoch.env.abd": 0.984375, + "success_rate.epoch.env.agentgym:alfworld": 0.8583333333333333, + "success_rate.epoch.env.agentgym:sciworld": 0.9577464788732394, + "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9514563106796117, + "success_rate.epoch.env.logic": 0.9188361408882083, + "success_rate.epoch.env.math": 0.9735318444995864, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.8225419664268585, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8612219201200805, + "success_rate.epoch.global": 0.8968327899394504, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.990234375, + "tokens_p.mean_in_band": 0.7053125, + "tokens_rate.above_band": 0.8175182481751825, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.18248175182481752 + }, + { + "epoch": 1.4593097571367704, + "grad_norm": 171.02015220767652, + "learning_rate": 3.7795852209632455e-07, + "loss": 0.2679, + "step": 6850, + "success_rate.epoch.env.abd": 0.984375, + "success_rate.epoch.env.agentgym:alfworld": 0.8583333333333333, + "success_rate.epoch.env.agentgym:sciworld": 0.9577464788732394, + "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9514563106796117, + "success_rate.epoch.env.logic": 0.9190839694656489, + "success_rate.epoch.env.math": 0.9736191261335532, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.8223684210526315, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8612366078325512, + "success_rate.epoch.global": 0.8968401486988847, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9922816265060241, + "tokens_p.mean_in_band": 0.6792279411764706, + "tokens_rate.above_band": 0.907103825136612, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.09289617486338798 + }, + { + "epoch": 1.4603749467405198, + "grad_norm": 52.691519408635706, + "learning_rate": 3.7792800282637344e-07, + "loss": 0.238, + "step": 6855, + "success_rate.epoch.env.abd": 0.9844559585492227, + "success_rate.epoch.env.agentgym:alfworld": 0.8583333333333333, + "success_rate.epoch.env.agentgym:sciworld": 0.9577464788732394, + "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9514563106796117, + "success_rate.epoch.env.logic": 0.9194528875379939, + "success_rate.epoch.env.math": 0.9736625514403292, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.8227923627684964, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8613199936183885, + "success_rate.epoch.global": 0.8970792767732962, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9936835106382979, + "tokens_p.mean_in_band": 0.89453125, + "tokens_rate.above_band": 0.9894736842105263, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.010526315789473684 + }, + { + "epoch": 1.4614401363442693, + "grad_norm": 94.91942663446012, + "learning_rate": 3.7789747049062276e-07, + "loss": 0.0882, + "step": 6860, + "success_rate.epoch.env.abd": 0.9845360824742269, + "success_rate.epoch.env.agentgym:alfworld": 0.860655737704918, + "success_rate.epoch.env.agentgym:sciworld": 0.9577464788732394, + "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9514563106796117, + "success_rate.epoch.env.logic": 0.9194528875379939, + "success_rate.epoch.env.math": 0.9737274220032841, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.8231089934484812, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8615730872128912, + "success_rate.epoch.global": 0.8973172987974098, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.998282967032967, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 1.4625053259480187, + "grad_norm": 347.3620080155369, + "learning_rate": 3.7786692511132416e-07, + "loss": 0.445, + "step": 6865, + "success_rate.epoch.env.abd": 0.9846153846153847, + "success_rate.epoch.env.agentgym:alfworld": 0.860655737704918, + "success_rate.epoch.env.agentgym:sciworld": 0.9577464788732394, + "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9516908212560387, + "success_rate.epoch.env.logic": 0.9194528875379939, + "success_rate.epoch.env.math": 0.9737704918032787, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.8237388724035608, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8616627928013145, + "success_rate.epoch.global": 0.8975542224273189, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.998291015625, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 1.4635705155517682, + "grad_norm": 33.484943461277815, + "learning_rate": 3.7783636671073894e-07, + "loss": 0.2315, + "step": 6870, + "success_rate.epoch.env.abd": 0.9846153846153847, + "success_rate.epoch.env.agentgym:alfworld": 0.860655737704918, + "success_rate.epoch.env.agentgym:sciworld": 0.9583333333333334, + "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9516908212560387, + "success_rate.epoch.env.logic": 0.9195751138088012, + "success_rate.epoch.env.math": 0.9738562091503268, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.8240521327014217, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8617635253809339, + "success_rate.epoch.global": 0.8977900552486188, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9983231707317073, + "tokens_p.mean_in_band": 0.66796875, + "tokens_rate.above_band": 0.9808612440191388, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.019138755980861243 + }, + { + "epoch": 1.4646357051555177, + "grad_norm": 16.238490908343042, + "learning_rate": 3.778057953111378e-07, + "loss": 0.2136, + "step": 6875, + "success_rate.epoch.env.abd": 0.9846938775510204, + "success_rate.epoch.env.agentgym:alfworld": 0.860655737704918, + "success_rate.epoch.env.agentgym:sciworld": 0.9586206896551724, + "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9516908212560387, + "success_rate.epoch.env.logic": 0.9198184568835098, + "success_rate.epoch.env.math": 0.9738988580750407, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.824468085106383, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8618605975320118, + "success_rate.epoch.global": 0.8980248047772164, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9973569651741293, + "tokens_p.mean_in_band": 0.8098958333333334, + "tokens_rate.above_band": 0.9852941176470589, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.014705882352941176 + }, + { + "epoch": 1.4657008947592671, + "grad_norm": 62.57408491444139, + "learning_rate": 3.77775210934801e-07, + "loss": 0.2458, + "step": 6880, + "success_rate.epoch.env.abd": 0.9846938775510204, + "success_rate.epoch.env.agentgym:alfworld": 0.8617886178861789, + "success_rate.epoch.env.agentgym:sciworld": 0.958904109589041, + "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9519230769230769, + "success_rate.epoch.env.logic": 0.9186746987951807, + "success_rate.epoch.env.math": 0.9739625711960944, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.8245717660956882, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8619217058778478, + "success_rate.epoch.global": 0.8980293308890925, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.6666666666666666, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9444444444444443, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9967919340054996, + "tokens_p.mean_in_band": 0.5326286764705882, + "tokens_rate.above_band": 0.9846570397111913, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.015342960288808664 + }, + { + "epoch": 1.4667660843630166, + "grad_norm": 117.12791554500112, + "learning_rate": 3.7774461360401824e-07, + "loss": 0.3792, + "step": 6885, + "success_rate.epoch.env.abd": 0.9846938775510204, + "success_rate.epoch.env.agentgym:alfworld": 0.8629032258064516, + "success_rate.epoch.env.agentgym:sciworld": 0.958904109589041, + "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9519230769230769, + "success_rate.epoch.env.logic": 0.9186746987951807, + "success_rate.epoch.env.math": 0.974025974025974, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.8246027074749853, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8620316106168889, + "success_rate.epoch.global": 0.8980338363054412, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env_macro_mean": 0.9444444444444445, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9963321596244131, + "tokens_p.mean_in_band": 0.5731026785714286, + "tokens_rate.above_band": 0.9681818181818181, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.031818181818181815 + }, + { + "epoch": 1.467831273966766, + "grad_norm": 53.039999490607684, + "learning_rate": 3.7771400334108855e-07, + "loss": 0.1108, + "step": 6890, + "success_rate.epoch.env.abd": 0.9847715736040609, + "success_rate.epoch.env.agentgym:alfworld": 0.8629032258064516, + "success_rate.epoch.env.agentgym:sciworld": 0.958904109589041, + "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9523809523809523, + "success_rate.epoch.env.logic": 0.9187969924812031, + "success_rate.epoch.env.math": 0.9740680713128039, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.825014679976512, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8621326956155518, + "success_rate.epoch.global": 0.8982664233576643, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9995575221238938, + "tokens_p.mean_in_band": 0.84375, + "tokens_rate.above_band": 0.9991158267020336, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0008841732979664014 + }, + { + "epoch": 1.4688964635705155, + "grad_norm": 121.38925392283164, + "learning_rate": 3.776833801683206e-07, + "loss": 0.2769, + "step": 6895, + "success_rate.epoch.env.abd": 0.9847715736040609, + "success_rate.epoch.env.agentgym:alfworld": 0.8629032258064516, + "success_rate.epoch.env.agentgym:sciworld": 0.958904109589041, + "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.95260663507109, + "success_rate.epoch.env.logic": 0.918918918918919, + "success_rate.epoch.env.math": 0.9733441033925686, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.8254247217340364, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8621357577032921, + "success_rate.epoch.global": 0.8982703686845699, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.75, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9375, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9944282945736435, + "tokens_p.mean_in_band": 0.5833333333333334, + "tokens_rate.above_band": 0.9772727272727273, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.022727272727272728 + }, + { + "epoch": 1.469961653174265, + "grad_norm": 355.2643071772762, + "learning_rate": 3.776527441080322e-07, + "loss": 0.2891, + "step": 6900, + "success_rate.epoch.env.abd": 0.9847715736040609, + "success_rate.epoch.env.agentgym:alfworld": 0.8629032258064516, + "success_rate.epoch.env.agentgym:sciworld": 0.958904109589041, + "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9528301886792453, + "success_rate.epoch.env.logic": 0.9194029850746268, + "success_rate.epoch.env.math": 0.9733656174334141, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.82583284628872, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8622391448268732, + "success_rate.epoch.global": 0.8985013623978202, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9975454980842912, + "tokens_p.mean_in_band": 0.5755208333333334, + "tokens_rate.above_band": 0.9886363636363636, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.011363636363636364 + }, + { + "epoch": 1.4710268427780144, + "grad_norm": 1.1010876385696688, + "learning_rate": 3.776220951825508e-07, + "loss": 0.2222, + "step": 6905, + "success_rate.epoch.env.abd": 0.9848484848484849, + "success_rate.epoch.env.agentgym:alfworld": 0.8629032258064516, + "success_rate.epoch.env.agentgym:sciworld": 0.958904109589041, + "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9528301886792453, + "success_rate.epoch.env.logic": 0.9196428571428571, + "success_rate.epoch.env.math": 0.9734299516908212, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.8250728862973761, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8622047046067568, + "success_rate.epoch.global": 0.8982782057091074, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9946120689655172, + "tokens_p.mean_in_band": 0.470703125, + "tokens_rate.above_band": 0.9157894736842105, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.08421052631578947 + }, + { + "epoch": 1.472092032381764, + "grad_norm": 52.237889166183265, + "learning_rate": 3.77591433414213e-07, + "loss": 0.3713, + "step": 6910, + "success_rate.epoch.env.abd": 0.9849246231155779, + "success_rate.epoch.env.agentgym:alfworld": 0.8629032258064516, + "success_rate.epoch.env.agentgym:sciworld": 0.958904109589041, + "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9528301886792453, + "success_rate.epoch.env.logic": 0.9197622585438335, + "success_rate.epoch.env.math": 0.9734726688102894, + "success_rate.epoch.env.sat": 0.07407407407407407, + "success_rate.epoch.env.science": 0.8244186046511628, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8619078839060588, + "success_rate.epoch.global": 0.8978300180831826, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 0.6, + "success_rate.window.env_macro_mean": 0.72, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9959514170040485, + "tokens_p.mean_in_band": 0.5463005514705882, + "tokens_rate.above_band": 0.8790035587188612, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.12099644128113879 + }, + { + "epoch": 1.4731572219855134, + "grad_norm": 95.04107628637088, + "learning_rate": 3.77560758825365e-07, + "loss": 0.3626, + "step": 6915, + "success_rate.epoch.env.abd": 0.985, + "success_rate.epoch.env.agentgym:alfworld": 0.8629032258064516, + "success_rate.epoch.env.agentgym:sciworld": 0.958904109589041, + "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9530516431924883, + "success_rate.epoch.env.logic": 0.9198813056379822, + "success_rate.epoch.env.math": 0.9734939759036144, + "success_rate.epoch.env.sat": 0.07407407407407407, + "success_rate.epoch.env.science": 0.8244495944380069, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8619504453035116, + "success_rate.epoch.global": 0.8978349120433018, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env_macro_mean": 0.9666666666666668, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9976108562691132, + "tokens_p.mean_in_band": 0.7584635416666666, + "tokens_rate.above_band": 0.9819819819819819, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.018018018018018018 + }, + { + "epoch": 1.4742224115892628, + "grad_norm": 27.531466838226915, + "learning_rate": 3.775300714383621e-07, + "loss": 0.1848, + "step": 6920, + "success_rate.epoch.env.abd": 0.9850746268656716, + "success_rate.epoch.env.agentgym:alfworld": 0.856, + "success_rate.epoch.env.agentgym:sciworld": 0.958904109589041, + "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9530516431924883, + "success_rate.epoch.env.logic": 0.9186390532544378, + "success_rate.epoch.env.math": 0.9735788630904724, + "success_rate.epoch.env.sat": 0.07407407407407407, + "success_rate.epoch.env.science": 0.8240740740740741, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8611903103488389, + "success_rate.epoch.global": 0.8973897389738974, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 0.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.6, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.994968220338983, + "tokens_p.mean_in_band": 0.6028645833333334, + "tokens_rate.above_band": 0.9291338582677166, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.07086614173228346 + }, + { + "epoch": 1.4752876011930123, + "grad_norm": 231.5171896718186, + "learning_rate": 3.774993712755692e-07, + "loss": 0.5015, + "step": 6925, + "success_rate.epoch.env.abd": 0.9850746268656716, + "success_rate.epoch.env.agentgym:alfworld": 0.8492063492063492, + "success_rate.epoch.env.agentgym:sciworld": 0.958904109589041, + "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9530516431924883, + "success_rate.epoch.env.logic": 0.9175257731958762, + "success_rate.epoch.env.math": 0.9736, + "success_rate.epoch.env.sat": 0.07407407407407407, + "success_rate.epoch.env.science": 0.8240046162723601, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8604671056448027, + "success_rate.epoch.global": 0.8969465648854962, + "success_rate.window.env.agentgym:alfworld": 0.0, + "success_rate.window.env.logic": 0.6666666666666666, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.6166666666666667, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 1.0006236141906875, + "tokens_p.mean_in_band": 0.443115234375, + "tokens_rate.above_band": 0.9575371549893843, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.04246284501061571 + }, + { + "epoch": 1.4763527907967617, + "grad_norm": 182.226877016691, + "learning_rate": 3.774686583593602e-07, + "loss": 0.2062, + "step": 6930, + "success_rate.epoch.env.abd": 0.9850746268656716, + "success_rate.epoch.env.agentgym:alfworld": 0.8492063492063492, + "success_rate.epoch.env.agentgym:sciworld": 0.958904109589041, + "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9530516431924883, + "success_rate.epoch.env.logic": 0.9175257731958762, + "success_rate.epoch.env.math": 0.9736842105263158, + "success_rate.epoch.env.sat": 0.07142857142857142, + "success_rate.epoch.env.science": 0.8239355581127733, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8602279828921872, + "success_rate.epoch.global": 0.8967293906810035, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.6, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9954379562043796, + "tokens_p.mean_in_band": 0.7049005681818182, + "tokens_rate.above_band": 0.8616352201257862, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.13836477987421383 + }, + { + "epoch": 1.4774179804005114, + "grad_norm": 152.21272684933314, + "learning_rate": 3.7743793271211853e-07, + "loss": 0.2624, + "step": 6935, + "success_rate.epoch.env.abd": 0.9851485148514851, + "success_rate.epoch.env.agentgym:alfworld": 0.8503937007874016, + "success_rate.epoch.env.agentgym:sciworld": 0.958904109589041, + "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9532710280373832, + "success_rate.epoch.env.logic": 0.9177679882525698, + "success_rate.epoch.env.math": 0.9737470167064439, + "success_rate.epoch.env.sat": 0.07142857142857142, + "success_rate.epoch.env.science": 0.824036802760207, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8603995183736429, + "success_rate.epoch.global": 0.8969602145730889, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 1.0014982876712328, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 1.4784831700042607, + "grad_norm": 26.477416517540636, + "learning_rate": 3.7740719435623683e-07, + "loss": 0.4108, + "step": 6940, + "success_rate.epoch.env.abd": 0.9852216748768473, + "success_rate.epoch.env.agentgym:alfworld": 0.8503937007874016, + "success_rate.epoch.env.agentgym:sciworld": 0.958904109589041, + "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9532710280373832, + "success_rate.epoch.env.logic": 0.9178885630498533, + "success_rate.epoch.env.math": 0.9738095238095238, + "success_rate.epoch.env.sat": 0.07142857142857142, + "success_rate.epoch.env.science": 0.8239678899082569, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8604165482894406, + "success_rate.epoch.global": 0.8969669937555754, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.992739898989899, + "tokens_p.mean_in_band": 0.37890625, + "tokens_rate.above_band": 0.8918918918918919, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.10810810810810811 + }, + { + "epoch": 1.4795483596080103, + "grad_norm": 36.71226746501737, + "learning_rate": 3.77376443314117e-07, + "loss": 0.3865, + "step": 6945, + "success_rate.epoch.env.abd": 0.9852216748768473, + "success_rate.epoch.env.agentgym:alfworld": 0.8503937007874016, + "success_rate.epoch.env.agentgym:sciworld": 0.958904109589041, + "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9534883720930233, + "success_rate.epoch.env.logic": 0.91800878477306, + "success_rate.epoch.env.math": 0.9738924050632911, + "success_rate.epoch.env.sat": 0.07142857142857142, + "success_rate.epoch.env.science": 0.8232265446224256, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8603873757209666, + "success_rate.epoch.global": 0.8967512238540276, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9977571770334929, + "tokens_p.mean_in_band": 0.5830965909090909, + "tokens_rate.above_band": 0.95, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.05 + }, + { + "epoch": 1.4806135492117596, + "grad_norm": 41.19193732455248, + "learning_rate": 3.7734567960817005e-07, + "loss": 0.1933, + "step": 6950, + "success_rate.epoch.env.abd": 0.9852216748768473, + "success_rate.epoch.env.agentgym:alfworld": 0.8515625, + "success_rate.epoch.env.agentgym:sciworld": 0.958904109589041, + "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9493087557603687, + "success_rate.epoch.env.logic": 0.9183673469387755, + "success_rate.epoch.env.math": 0.9739336492890995, + "success_rate.epoch.env.sat": 0.07142857142857142, + "success_rate.epoch.env.science": 0.8234285714285714, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8601683771825678, + "success_rate.epoch.global": 0.8967584369449378, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 0.5, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.994188596491228, + "tokens_p.mean_in_band": 0.599476439790576, + "tokens_rate.above_band": 0.8818069306930693, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.11819306930693069 + }, + { + "epoch": 1.4816787388155093, + "grad_norm": 67.65891632659265, + "learning_rate": 3.7731490326081656e-07, + "loss": 0.3133, + "step": 6955, + "success_rate.epoch.env.abd": 0.9852216748768473, + "success_rate.epoch.env.agentgym:alfworld": 0.8515625, + "success_rate.epoch.env.agentgym:sciworld": 0.9594594594594594, + "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9493087557603687, + "success_rate.epoch.env.logic": 0.9171511627906976, + "success_rate.epoch.env.math": 0.9739542225730071, + "success_rate.epoch.env.sat": 0.07142857142857142, + "success_rate.epoch.env.science": 0.8238312428734321, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8601467781326686, + "success_rate.epoch.global": 0.8967656180770935, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 0.9, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9980279126213593, + "tokens_p.mean_in_band": 0.701891447368421, + "tokens_rate.above_band": 0.9559164733178654, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.04408352668213457 + }, + { + "epoch": 1.4827439284192585, + "grad_norm": 41.2008785320527, + "learning_rate": 3.7728411429448593e-07, + "loss": 0.268, + "step": 6960, + "success_rate.epoch.env.abd": 0.9852216748768473, + "success_rate.epoch.env.agentgym:alfworld": 0.8515625, + "success_rate.epoch.env.agentgym:sciworld": 0.9594594594594594, + "success_rate.epoch.env.agentgym:textcraft": 0.9705882352941176, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9495412844036697, + "success_rate.epoch.env.logic": 0.9175108538350217, + "success_rate.epoch.env.math": 0.9739952718676123, + "success_rate.epoch.env.sat": 0.07142857142857142, + "success_rate.epoch.env.science": 0.8234624145785877, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8602518423403533, + "success_rate.epoch.global": 0.8967499447269511, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.9, + "success_rate.window.global": 0.8888888888888888, + "tokens_p.mean_above_band": 0.9966755319148937, + "tokens_p.mean_in_band": 0.696875, + "tokens_rate.above_band": 0.9868766404199475, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.013123359580052493 + }, + { + "epoch": 1.4838091180230082, + "grad_norm": 85.90622121485156, + "learning_rate": 3.7725331273161705e-07, + "loss": 0.2284, + "step": 6965, + "success_rate.epoch.env.abd": 0.9852216748768473, + "success_rate.epoch.env.agentgym:alfworld": 0.8515625, + "success_rate.epoch.env.agentgym:sciworld": 0.959731543624161, + "success_rate.epoch.env.agentgym:textcraft": 0.9705882352941176, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9495412844036697, + "success_rate.epoch.env.logic": 0.9177489177489178, + "success_rate.epoch.env.math": 0.9732914375490966, + "success_rate.epoch.env.sat": 0.07142857142857142, + "success_rate.epoch.env.science": 0.8231949971574758, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.860209923825714, + "success_rate.epoch.global": 0.8965365100375028, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.75, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.8541666666666666, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9952116935483871, + "tokens_p.mean_in_band": 0.57734375, + "tokens_rate.above_band": 0.9253731343283582, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.07462686567164178 + }, + { + "epoch": 1.4848743076267574, + "grad_norm": 62.21930551436523, + "learning_rate": 3.7722249859465783e-07, + "loss": 0.3091, + "step": 6970, + "success_rate.epoch.env.abd": 0.9852941176470589, + "success_rate.epoch.env.agentgym:alfworld": 0.8515625, + "success_rate.epoch.env.agentgym:sciworld": 0.959731543624161, + "success_rate.epoch.env.agentgym:textcraft": 0.9705882352941176, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9497716894977168, + "success_rate.epoch.env.logic": 0.9178674351585014, + "success_rate.epoch.env.math": 0.9733750978856696, + "success_rate.epoch.env.sat": 0.07142857142857142, + "success_rate.epoch.env.science": 0.8229284903518729, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8602316073534244, + "success_rate.epoch.global": 0.8965441338322694, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9963357300884956, + "tokens_p.mean_in_band": 0.6690848214285714, + "tokens_rate.above_band": 0.9847494553376906, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.015250544662309368 + }, + { + "epoch": 1.4859394972305071, + "grad_norm": 112.12691753323395, + "learning_rate": 3.771916719060654e-07, + "loss": 0.3132, + "step": 6975, + "success_rate.epoch.env.abd": 0.9853658536585366, + "success_rate.epoch.env.agentgym:alfworld": 0.8515625, + "success_rate.epoch.env.agentgym:sciworld": 0.959731543624161, + "success_rate.epoch.env.agentgym:textcraft": 0.9705882352941176, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9497716894977168, + "success_rate.epoch.env.logic": 0.9169054441260746, + "success_rate.epoch.env.math": 0.9734167318217357, + "success_rate.epoch.env.sat": 0.10344827586206896, + "success_rate.epoch.env.science": 0.822562358276644, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8630320574691871, + "success_rate.epoch.global": 0.8963320887327038, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 0.75, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.85, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9975353422619048, + "tokens_p.mean_in_band": 0.43689903846153844, + "tokens_rate.above_band": 0.9451476793248945, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.05485232067510549 + }, + { + "epoch": 1.4870046868342566, + "grad_norm": 133.8394276351542, + "learning_rate": 3.77160832688306e-07, + "loss": 0.2747, + "step": 6980, + "success_rate.epoch.env.abd": 0.9853658536585366, + "success_rate.epoch.env.agentgym:alfworld": 0.8515625, + "success_rate.epoch.env.agentgym:sciworld": 0.959731543624161, + "success_rate.epoch.env.agentgym:textcraft": 0.9705882352941176, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9502262443438914, + "success_rate.epoch.env.logic": 0.9172610556348074, + "success_rate.epoch.env.math": 0.9734789391575663, + "success_rate.epoch.env.sat": 0.10344827586206896, + "success_rate.epoch.env.science": 0.8221970554926388, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8630781548243445, + "success_rate.epoch.global": 0.8963401271093578, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9997477578475337, + "tokens_p.mean_in_band": 0.51953125, + "tokens_rate.above_band": 0.9964253798033958, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0035746201966041107 + }, + { + "epoch": 1.488069876438006, + "grad_norm": 58.66485414134898, + "learning_rate": 3.771299809638551e-07, + "loss": 0.1686, + "step": 6985, + "success_rate.epoch.env.abd": 0.9853658536585366, + "success_rate.epoch.env.agentgym:alfworld": 0.8515625, + "success_rate.epoch.env.agentgym:sciworld": 0.959731543624161, + "success_rate.epoch.env.agentgym:textcraft": 0.9705882352941176, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9504504504504504, + "success_rate.epoch.env.logic": 0.9176136363636364, + "success_rate.epoch.env.math": 0.9735408560311284, + "success_rate.epoch.env.sat": 0.10344827586206896, + "success_rate.epoch.env.science": 0.8219332956472584, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8631122406301235, + "success_rate.epoch.global": 0.896348130330199, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9969117647058824, + "tokens_p.mean_in_band": 0.706640625, + "tokens_rate.above_band": 0.9883720930232558, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.011627906976744186 + }, + { + "epoch": 1.4891350660417555, + "grad_norm": 26.88216210048663, + "learning_rate": 3.770991167551972e-07, + "loss": 0.2377, + "step": 6990, + "success_rate.epoch.env.abd": 0.9853658536585366, + "success_rate.epoch.env.agentgym:alfworld": 0.8515625, + "success_rate.epoch.env.agentgym:sciworld": 0.959731543624161, + "success_rate.epoch.env.agentgym:textcraft": 0.9705882352941176, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9511111111111111, + "success_rate.epoch.env.logic": 0.9178470254957507, + "success_rate.epoch.env.math": 0.9736024844720497, + "success_rate.epoch.env.sat": 0.10344827586206896, + "success_rate.epoch.env.science": 0.8221343873517787, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8632174015335977, + "success_rate.epoch.global": 0.8965742963124591, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 1.0007468929254302, + "tokens_p.mean_in_band": 0.8515625, + "tokens_rate.above_band": 0.9995222169135213, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.00047778308647873863 + }, + { + "epoch": 1.490200255645505, + "grad_norm": 1026.8160156422239, + "learning_rate": 3.770682400848258e-07, + "loss": 0.4027, + "step": 6995, + "success_rate.epoch.env.abd": 0.9854368932038835, + "success_rate.epoch.env.agentgym:alfworld": 0.8515625, + "success_rate.epoch.env.agentgym:sciworld": 0.96, + "success_rate.epoch.env.agentgym:textcraft": 0.9705882352941176, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9511111111111111, + "success_rate.epoch.env.logic": 0.9178470254957507, + "success_rate.epoch.env.math": 0.9736434108527132, + "success_rate.epoch.env.sat": 0.10344827586206896, + "success_rate.epoch.env.science": 0.8227349465391108, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8633065816689779, + "success_rate.epoch.global": 0.8967994774657086, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9926353503184714, + "tokens_p.mean_in_band": 0.59375, + "tokens_rate.above_band": 0.9936708860759493, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.006329113924050633 + }, + { + "epoch": 1.4912654452492544, + "grad_norm": 319.6950635031251, + "learning_rate": 3.7703735097524373e-07, + "loss": 0.4081, + "step": 7000, + "success_rate.epoch.env.abd": 0.9854368932038835, + "success_rate.epoch.env.agentgym:alfworld": 0.8527131782945736, + "success_rate.epoch.env.agentgym:sciworld": 0.96, + "success_rate.epoch.env.agentgym:textcraft": 0.9705882352941176, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9511111111111111, + "success_rate.epoch.env.logic": 0.9180790960451978, + "success_rate.epoch.env.math": 0.9736638264910922, + "success_rate.epoch.env.sat": 0.10344827586206896, + "success_rate.epoch.env.science": 0.8210880538418396, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8632844245585349, + "success_rate.epoch.global": 0.896154681729307, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.3333333333333333, + "success_rate.window.env_macro_mean": 0.8333333333333333, + "success_rate.window.global": 0.6, + "tokens_p.mean_above_band": 0.9978197674418605, + "tokens_p.mean_in_band": 0.50927734375, + "tokens_rate.above_band": 0.9416058394160584, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.058394160583941604 + }, + { + "epoch": 1.4923306348530039, + "grad_norm": 69.18474094111437, + "learning_rate": 3.770064494489627e-07, + "loss": 0.2266, + "step": 7005, + "success_rate.epoch.env.abd": 0.9855072463768116, + "success_rate.epoch.env.agentgym:alfworld": 0.8527131782945736, + "success_rate.epoch.env.agentgym:sciworld": 0.96, + "success_rate.epoch.env.agentgym:textcraft": 0.9714285714285714, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9511111111111111, + "success_rate.epoch.env.logic": 0.918194640338505, + "success_rate.epoch.env.math": 0.9737451737451738, + "success_rate.epoch.env.sat": 0.10344827586206896, + "success_rate.epoch.env.science": 0.8213885778275476, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8634124340894875, + "success_rate.epoch.global": 0.8963797962280512, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9950770547945206, + "tokens_p.mean_in_band": 0.845703125, + "tokens_rate.above_band": 0.9733333333333334, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02666666666666667 + }, + { + "epoch": 1.4933958244567533, + "grad_norm": 202.44290441336236, + "learning_rate": 3.769755355285035e-07, + "loss": 0.338, + "step": 7010, + "success_rate.epoch.env.abd": 0.9855769230769231, + "success_rate.epoch.env.agentgym:alfworld": 0.8527131782945736, + "success_rate.epoch.env.agentgym:sciworld": 0.96, + "success_rate.epoch.env.agentgym:textcraft": 0.9714285714285714, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9511111111111111, + "success_rate.epoch.env.logic": 0.9186535764375876, + "success_rate.epoch.env.math": 0.9737654320987654, + "success_rate.epoch.env.sat": 0.10344827586206896, + "success_rate.epoch.env.science": 0.8212290502793296, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8634478289626301, + "success_rate.epoch.global": 0.8963876270819814, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9375, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9933510638297872, + "tokens_p.mean_in_band": 0.40792410714285715, + "tokens_rate.above_band": 0.9306930693069307, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.06930693069306931 + }, + { + "epoch": 1.4944610140605028, + "grad_norm": 58.94943868995095, + "learning_rate": 3.76944609236396e-07, + "loss": 0.2973, + "step": 7015, + "success_rate.epoch.env.abd": 0.9857142857142858, + "success_rate.epoch.env.agentgym:alfworld": 0.8527131782945736, + "success_rate.epoch.env.agentgym:sciworld": 0.9602649006622517, + "success_rate.epoch.env.agentgym:textcraft": 0.9714285714285714, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9511111111111111, + "success_rate.epoch.env.logic": 0.9187675070028011, + "success_rate.epoch.env.math": 0.9738058551617874, + "success_rate.epoch.env.sat": 0.10344827586206896, + "success_rate.epoch.env.science": 0.8216276477146043, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.863534666632005, + "success_rate.epoch.global": 0.8966112669976257, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9994169776119403, + "tokens_p.mean_in_band": 0.818359375, + "tokens_rate.above_band": 0.9962825278810409, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0037174721189591076 + }, + { + "epoch": 1.4955262036642523, + "grad_norm": 188.42181743688602, + "learning_rate": 3.7691367059517906e-07, + "loss": 0.2677, + "step": 7020, + "success_rate.epoch.env.abd": 0.9858490566037735, + "success_rate.epoch.env.agentgym:alfworld": 0.8527131782945736, + "success_rate.epoch.env.agentgym:sciworld": 0.9605263157894737, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9513274336283186, + "success_rate.epoch.env.logic": 0.9187675070028011, + "success_rate.epoch.env.math": 0.9738461538461538, + "success_rate.epoch.env.sat": 0.10344827586206896, + "success_rate.epoch.env.science": 0.8219254312743461, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8636932340476121, + "success_rate.epoch.global": 0.896833943570967, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9997184684684685, + "tokens_p.mean_in_band": 0.6103515625, + "tokens_rate.above_band": 0.9955156950672646, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.004484304932735426 + }, + { + "epoch": 1.4965913932680017, + "grad_norm": 100.91654830386487, + "learning_rate": 3.7688271962740057e-07, + "loss": 0.2314, + "step": 7025, + "success_rate.epoch.env.abd": 0.9858490566037735, + "success_rate.epoch.env.agentgym:alfworld": 0.8461538461538461, + "success_rate.epoch.env.agentgym:sciworld": 0.9605263157894737, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9515418502202643, + "success_rate.epoch.env.logic": 0.9192200557103064, + "success_rate.epoch.env.math": 0.9738662567255957, + "success_rate.epoch.env.sat": 0.10344827586206896, + "success_rate.epoch.env.science": 0.8221234018899388, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8631773891979537, + "success_rate.epoch.global": 0.8968407479045777, + "success_rate.window.env.agentgym:alfworld": 0.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 0.8333333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9984802431610942, + "tokens_p.mean_in_band": 0.7484375, + "tokens_rate.above_band": 0.9949596774193549, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.005040322580645161 + }, + { + "epoch": 1.4976565828717512, + "grad_norm": 103.72381943962169, + "learning_rate": 3.768517563556173e-07, + "loss": 0.2753, + "step": 7030, + "success_rate.epoch.env.abd": 0.9859154929577465, + "success_rate.epoch.env.agentgym:alfworld": 0.8461538461538461, + "success_rate.epoch.env.agentgym:sciworld": 0.9605263157894737, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9515418502202643, + "success_rate.epoch.env.logic": 0.9192200557103064, + "success_rate.epoch.env.math": 0.973159509202454, + "success_rate.epoch.env.sat": 0.10344827586206896, + "success_rate.epoch.env.science": 0.8227146814404432, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.863172931778075, + "success_rate.epoch.global": 0.896847523053828, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.math": 0.6666666666666666, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8888888888888888, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9947139303482587, + "tokens_p.mean_in_band": 0.6895833333333333, + "tokens_rate.above_band": 0.9305555555555556, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.06944444444444445 + }, + { + "epoch": 1.4987217724755006, + "grad_norm": 201.2032751309317, + "learning_rate": 3.768207808023951e-07, + "loss": 0.2265, + "step": 7035, + "success_rate.epoch.env.abd": 0.985981308411215, + "success_rate.epoch.env.agentgym:alfworld": 0.8473282442748091, + "success_rate.epoch.env.agentgym:sciworld": 0.9605263157894737, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9515418502202643, + "success_rate.epoch.env.logic": 0.9196675900277008, + "success_rate.epoch.env.math": 0.9731800766283525, + "success_rate.epoch.env.sat": 0.10344827586206896, + "success_rate.epoch.env.science": 0.8230088495575221, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8633549757266935, + "success_rate.epoch.global": 0.897068264498181, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.998114224137931, + "tokens_p.mean_in_band": 0.8177083333333334, + "tokens_rate.above_band": 0.9747899159663865, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.025210084033613446 + }, + { + "epoch": 1.49978696207925, + "grad_norm": 36.486216099178016, + "learning_rate": 3.767897929903088e-07, + "loss": 0.1893, + "step": 7040, + "success_rate.epoch.env.abd": 0.985981308411215, + "success_rate.epoch.env.agentgym:alfworld": 0.8473282442748091, + "success_rate.epoch.env.agentgym:sciworld": 0.9605263157894737, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9515418502202643, + "success_rate.epoch.env.logic": 0.9186206896551724, + "success_rate.epoch.env.math": 0.9732415902140673, + "success_rate.epoch.env.sat": 0.10344827586206896, + "success_rate.epoch.env.science": 0.8233995584988962, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.863300914104381, + "success_rate.epoch.global": 0.8970745248772155, + "success_rate.window.env.logic": 0.6666666666666666, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8888888888888888, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9987582781456954, + "tokens_p.mean_in_band": 0.6867897727272727, + "tokens_rate.above_band": 0.9320987654320988, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.06790123456790123 + }, + { + "epoch": 1.5008521516829996, + "grad_norm": 71.7137765131587, + "learning_rate": 3.7675879294194194e-07, + "loss": 0.3056, + "step": 7045, + "success_rate.epoch.env.abd": 0.985981308411215, + "success_rate.epoch.env.agentgym:alfworld": 0.8473282442748091, + "success_rate.epoch.env.agentgym:sciworld": 0.9605263157894737, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9515418502202643, + "success_rate.epoch.env.logic": 0.9188445667125172, + "success_rate.epoch.env.math": 0.973302822273074, + "success_rate.epoch.env.sat": 0.10344827586206896, + "success_rate.epoch.env.science": 0.8238855255916345, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8633710119415707, + "success_rate.epoch.global": 0.8972938418921799, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9933139534883721, + "tokens_p.mean_in_band": 0.84375, + "tokens_rate.above_band": 0.9817351598173516, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0182648401826484 + }, + { + "epoch": 1.501917341286749, + "grad_norm": 162.23594326269188, + "learning_rate": 3.7672778067988725e-07, + "loss": 0.2246, + "step": 7050, + "success_rate.epoch.env.abd": 0.985981308411215, + "success_rate.epoch.env.agentgym:alfworld": 0.8473282442748091, + "success_rate.epoch.env.agentgym:sciworld": 0.9607843137254902, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9515418502202643, + "success_rate.epoch.env.logic": 0.9191780821917809, + "success_rate.epoch.env.math": 0.9725609756097561, + "success_rate.epoch.env.sat": 0.10344827586206896, + "success_rate.epoch.env.science": 0.8238199780461032, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8633513864148827, + "success_rate.epoch.global": 0.897086965766532, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.7, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9936131386861314, + "tokens_p.mean_in_band": 0.5534446022727273, + "tokens_rate.above_band": 0.9256756756756757, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.07432432432432433 + }, + { + "epoch": 1.5029825308904985, + "grad_norm": 74.87500521829533, + "learning_rate": 3.766967562267462e-07, + "loss": 0.2424, + "step": 7055, + "success_rate.epoch.env.abd": 0.985981308411215, + "success_rate.epoch.env.agentgym:alfworld": 0.8473282442748091, + "success_rate.epoch.env.agentgym:sciworld": 0.961038961038961, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9515418502202643, + "success_rate.epoch.env.logic": 0.9193989071038251, + "success_rate.epoch.env.math": 0.9726235741444867, + "success_rate.epoch.env.sat": 0.10344827586206896, + "success_rate.epoch.env.science": 0.823658269441402, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8633856011562959, + "success_rate.epoch.global": 0.8970931466157437, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9375, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9962797619047619, + "tokens_p.mean_in_band": 0.70859375, + "tokens_rate.above_band": 0.9618320610687023, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03816793893129771 + }, + { + "epoch": 1.504047720494248, + "grad_norm": 125.536291463342, + "learning_rate": 3.7666571960512916e-07, + "loss": 0.198, + "step": 7060, + "success_rate.epoch.env.abd": 0.985981308411215, + "success_rate.epoch.env.agentgym:alfworld": 0.8484848484848485, + "success_rate.epoch.env.agentgym:sciworld": 0.9612903225806452, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9515418502202643, + "success_rate.epoch.env.logic": 0.9182561307901907, + "success_rate.epoch.env.math": 0.9726858877086495, + "success_rate.epoch.env.sat": 0.13333333333333333, + "success_rate.epoch.env.science": 0.8238512035010941, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8661497370229512, + "success_rate.epoch.global": 0.897099301291552, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9975715746421268, + "tokens_p.mean_in_band": 0.7847222222222222, + "tokens_rate.above_band": 0.9819277108433735, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.018072289156626505 + }, + { + "epoch": 1.5051129100979974, + "grad_norm": 81.33511731632726, + "learning_rate": 3.766346708376555e-07, + "loss": 0.2534, + "step": 7065, + "success_rate.epoch.env.abd": 0.985981308411215, + "success_rate.epoch.env.agentgym:alfworld": 0.849624060150376, + "success_rate.epoch.env.agentgym:sciworld": 0.9615384615384616, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9515418502202643, + "success_rate.epoch.env.logic": 0.9182561307901907, + "success_rate.epoch.env.math": 0.972809667673716, + "success_rate.epoch.env.sat": 0.13333333333333333, + "success_rate.epoch.env.science": 0.8240437158469945, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8663046136533429, + "success_rate.epoch.global": 0.8973167124445384, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9986033519553073, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 1.5061780997017469, + "grad_norm": 67.92568931057995, + "learning_rate": 3.766036099469533e-07, + "loss": 0.373, + "step": 7070, + "success_rate.epoch.env.abd": 0.985981308411215, + "success_rate.epoch.env.agentgym:alfworld": 0.849624060150376, + "success_rate.epoch.env.agentgym:sciworld": 0.9615384615384616, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9519650655021834, + "success_rate.epoch.env.logic": 0.9182561307901907, + "success_rate.epoch.env.math": 0.9729323308270676, + "success_rate.epoch.env.sat": 0.13333333333333333, + "success_rate.epoch.env.science": 0.824235807860262, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8663717018759375, + "success_rate.epoch.global": 0.8975332068311196, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 1.0002806886227544, + "tokens_p.mean_in_band": 0.78515625, + "tokens_rate.above_band": 0.99800796812749, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.00199203187250996 + }, + { + "epoch": 1.5072432893054963, + "grad_norm": 74.2854230790745, + "learning_rate": 3.765725369556594e-07, + "loss": 0.1376, + "step": 7075, + "success_rate.epoch.env.abd": 0.985981308411215, + "success_rate.epoch.env.agentgym:alfworld": 0.849624060150376, + "success_rate.epoch.env.agentgym:sciworld": 0.9615384615384616, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9521739130434783, + "success_rate.epoch.env.logic": 0.9184782608695652, + "success_rate.epoch.env.math": 0.972972972972973, + "success_rate.epoch.env.sat": 0.13333333333333333, + "success_rate.epoch.env.science": 0.8247142079477409, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.866458067317215, + "success_rate.epoch.global": 0.8977487902377446, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9987529342723005, + "tokens_p.mean_in_band": 0.8828125, + "tokens_rate.above_band": 0.9976580796252927, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.00234192037470726 + }, + { + "epoch": 1.5083084789092458, + "grad_norm": 177.46371989335142, + "learning_rate": 3.7654145188641964e-07, + "loss": 0.2073, + "step": 7080, + "success_rate.epoch.env.abd": 0.986046511627907, + "success_rate.epoch.env.agentgym:alfworld": 0.849624060150376, + "success_rate.epoch.env.agentgym:sciworld": 0.9617834394904459, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9523809523809523, + "success_rate.epoch.env.logic": 0.9185888738127544, + "success_rate.epoch.env.math": 0.9730134932533733, + "success_rate.epoch.env.sat": 0.13333333333333333, + "success_rate.epoch.env.science": 0.8245518739815317, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8665040691138995, + "success_rate.epoch.global": 0.897753516691161, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9583333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9987012987012988, + "tokens_p.mean_in_band": 0.537109375, + "tokens_rate.above_band": 0.9897172236503856, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.010282776349614395 + }, + { + "epoch": 1.5093736685129953, + "grad_norm": 92.38783690469769, + "learning_rate": 3.765103547618887e-07, + "loss": 0.25, + "step": 7085, + "success_rate.epoch.env.abd": 0.986046511627907, + "success_rate.epoch.env.agentgym:alfworld": 0.849624060150376, + "success_rate.epoch.env.agentgym:sciworld": 0.9620253164556962, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9523809523809523, + "success_rate.epoch.env.logic": 0.9185888738127544, + "success_rate.epoch.env.math": 0.9730740463724757, + "success_rate.epoch.env.sat": 0.13333333333333333, + "success_rate.epoch.env.science": 0.8240389821331889, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8664849362262643, + "success_rate.epoch.global": 0.8975487115021998, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.8888888888888888, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9964622641509434, + "tokens_p.mean_in_band": 0.4986979166666667, + "tokens_rate.above_band": 0.8983050847457628, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.1016949152542373 + }, + { + "epoch": 1.510438858116745, + "grad_norm": 35.49707992488118, + "learning_rate": 3.764792456047298e-07, + "loss": 0.1749, + "step": 7090, + "success_rate.epoch.env.abd": 0.986046511627907, + "success_rate.epoch.env.agentgym:alfworld": 0.8507462686567164, + "success_rate.epoch.env.agentgym:sciworld": 0.9620253164556962, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9525862068965517, + "success_rate.epoch.env.logic": 0.9185888738127544, + "success_rate.epoch.env.math": 0.9731743666169895, + "success_rate.epoch.env.sat": 0.13333333333333333, + "success_rate.epoch.env.science": 0.8237837837837838, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8665915348550868, + "success_rate.epoch.global": 0.897553836504286, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 1.000164907651715, + "tokens_p.mean_in_band": 0.4184027777777778, + "tokens_rate.above_band": 0.9921465968586387, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.007853403141361256 + }, + { + "epoch": 1.5115040477204942, + "grad_norm": 198.1590266783774, + "learning_rate": 3.7644812443761516e-07, + "loss": 0.393, + "step": 7095, + "success_rate.epoch.env.abd": 0.9861751152073732, + "success_rate.epoch.env.agentgym:alfworld": 0.8518518518518519, + "success_rate.epoch.env.agentgym:sciworld": 0.9620253164556962, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9525862068965517, + "success_rate.epoch.env.logic": 0.9186991869918699, + "success_rate.epoch.env.math": 0.9732540861812778, + "success_rate.epoch.env.sat": 0.13333333333333333, + "success_rate.epoch.env.science": 0.8228941684665226, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8666401352369726, + "success_rate.epoch.global": 0.8973503025245149, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.0, + "success_rate.window.env_macro_mean": 0.8, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9986945169712794, + "tokens_p.mean_in_band": 0.6633522727272727, + "tokens_rate.above_band": 0.9720812182741116, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.027918781725888325 + }, + { + "epoch": 1.5125692373242439, + "grad_norm": 103.3237623062668, + "learning_rate": 3.764169912832256e-07, + "loss": 0.2709, + "step": 7100, + "success_rate.epoch.env.abd": 0.9861751152073732, + "success_rate.epoch.env.agentgym:alfworld": 0.8518518518518519, + "success_rate.epoch.env.agentgym:sciworld": 0.9622641509433962, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9525862068965517, + "success_rate.epoch.env.logic": 0.918809201623816, + "success_rate.epoch.env.math": 0.9733135656041513, + "success_rate.epoch.env.sat": 0.13333333333333333, + "success_rate.epoch.env.science": 0.823371028540657, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8667206069293957, + "success_rate.epoch.global": 0.8975640224859462, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9971098265895953, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 1.513634426927993, + "grad_norm": 313.5706435826607, + "learning_rate": 3.763858461642508e-07, + "loss": 0.351, + "step": 7105, + "success_rate.epoch.env.abd": 0.9862385321100917, + "success_rate.epoch.env.agentgym:alfworld": 0.8518518518518519, + "success_rate.epoch.env.agentgym:sciworld": 0.9622641509433962, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9527896995708155, + "success_rate.epoch.env.logic": 0.918809201623816, + "success_rate.epoch.env.math": 0.9733727810650887, + "success_rate.epoch.env.sat": 0.13333333333333333, + "success_rate.epoch.env.science": 0.8238453276047261, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8667933727568492, + "success_rate.epoch.global": 0.8977768543527945, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9956801470588236, + "tokens_p.mean_in_band": 0.78125, + "tokens_rate.above_band": 0.9883720930232558, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.011627906976744186 + }, + { + "epoch": 1.5146996165317428, + "grad_norm": 252.9462269290008, + "learning_rate": 3.763546891033891e-07, + "loss": 0.353, + "step": 7110, + "success_rate.epoch.env.abd": 0.9862385321100917, + "success_rate.epoch.env.agentgym:alfworld": 0.8529411764705882, + "success_rate.epoch.env.agentgym:sciworld": 0.9622641509433962, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9529914529914529, + "success_rate.epoch.env.logic": 0.918918918918919, + "success_rate.epoch.env.math": 0.9734317343173432, + "success_rate.epoch.env.sat": 0.13333333333333333, + "success_rate.epoch.env.science": 0.8242229367631297, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8669604052791342, + "success_rate.epoch.global": 0.897988803649181, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9983689205219455, + "tokens_p.mean_in_band": 0.8424479166666666, + "tokens_rate.above_band": 0.9929328621908127, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.007067137809187279 + }, + { + "epoch": 1.515764806135492, + "grad_norm": 98.49626600731914, + "learning_rate": 3.7632352012334746e-07, + "loss": 0.3528, + "step": 7115, + "success_rate.epoch.env.abd": 0.9817351598173516, + "success_rate.epoch.env.agentgym:alfworld": 0.8529411764705882, + "success_rate.epoch.env.agentgym:sciworld": 0.9625, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9533898305084746, + "success_rate.epoch.env.logic": 0.918918918918919, + "success_rate.epoch.env.math": 0.9734317343173432, + "success_rate.epoch.env.sat": 0.13333333333333333, + "success_rate.epoch.env.science": 0.8242521367521367, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.86661131930367, + "success_rate.epoch.global": 0.8977860542106352, + "success_rate.window.env.abd": 0.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env_macro_mean": 0.7083333333333334, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9957843777197564, + "tokens_p.mean_in_band": 0.672945205479452, + "tokens_rate.above_band": 0.9402618657937807, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.05973813420621931 + }, + { + "epoch": 1.5168299957392417, + "grad_norm": 372.763557444237, + "learning_rate": 3.7629233924684166e-07, + "loss": 0.552, + "step": 7120, + "success_rate.epoch.env.abd": 0.9817351598173516, + "success_rate.epoch.env.agentgym:alfworld": 0.8529411764705882, + "success_rate.epoch.env.agentgym:sciworld": 0.9625, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9533898305084746, + "success_rate.epoch.env.logic": 0.9190283400809717, + "success_rate.epoch.env.math": 0.9734904270986745, + "success_rate.epoch.env.sat": 0.13333333333333333, + "success_rate.epoch.env.science": 0.8242811501597445, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8666292399719417, + "success_rate.epoch.global": 0.8977906256452612, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env_macro_mean": 0.9444444444444445, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9936733128834356, + "tokens_p.mean_in_band": 0.7236328125, + "tokens_rate.above_band": 0.9314285714285714, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.06857142857142857 + }, + { + "epoch": 1.517895185342991, + "grad_norm": 27.659399049715127, + "learning_rate": 3.762611464965961e-07, + "loss": 0.1832, + "step": 7125, + "success_rate.epoch.env.abd": 0.9817351598173516, + "success_rate.epoch.env.agentgym:alfworld": 0.8529411764705882, + "success_rate.epoch.env.agentgym:sciworld": 0.9627329192546584, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9533898305084746, + "success_rate.epoch.env.logic": 0.9191374663072777, + "success_rate.epoch.env.math": 0.973568281938326, + "success_rate.epoch.env.sat": 0.13333333333333333, + "success_rate.epoch.env.science": 0.8241232731137088, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8666530602696308, + "success_rate.epoch.global": 0.8977951782402638, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9375, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9978780864197531, + "tokens_p.mean_in_band": 0.4583333333333333, + "tokens_rate.above_band": 0.9642857142857143, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03571428571428571 + }, + { + "epoch": 1.5189603749467406, + "grad_norm": 112.36836328028735, + "learning_rate": 3.762299418953438e-07, + "loss": 0.4154, + "step": 7130, + "success_rate.epoch.env.abd": 0.9818181818181818, + "success_rate.epoch.env.agentgym:alfworld": 0.8540145985401459, + "success_rate.epoch.env.agentgym:sciworld": 0.9627329192546584, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9533898305084746, + "success_rate.epoch.env.logic": 0.9192462987886945, + "success_rate.epoch.env.math": 0.9736263736263736, + "success_rate.epoch.env.sat": 0.13333333333333333, + "success_rate.epoch.env.science": 0.823966065747614, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.866759074894518, + "success_rate.epoch.global": 0.8977997121118652, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9984975961538461, + "tokens_p.mean_in_band": 0.3404017857142857, + "tokens_rate.above_band": 0.9674418604651163, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03255813953488372 + }, + { + "epoch": 1.5200255645504899, + "grad_norm": 195.96966493282724, + "learning_rate": 3.7619872546582654e-07, + "loss": 0.1902, + "step": 7135, + "success_rate.epoch.env.abd": 0.9819004524886877, + "success_rate.epoch.env.agentgym:alfworld": 0.8561151079136691, + "success_rate.epoch.env.agentgym:sciworld": 0.9627329192546584, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9535864978902954, + "success_rate.epoch.env.logic": 0.9194630872483222, + "success_rate.epoch.env.math": 0.9736263736263736, + "success_rate.epoch.env.sat": 0.13333333333333333, + "success_rate.epoch.env.science": 0.8243386243386244, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8670289653014713, + "success_rate.epoch.global": 0.8980094397701621, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9994887107329843, + "tokens_p.mean_in_band": 0.69921875, + "tokens_rate.above_band": 0.9986928104575163, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.00130718954248366 + }, + { + "epoch": 1.5210907541542396, + "grad_norm": 119.41359684404878, + "learning_rate": 3.7616749723079455e-07, + "loss": 0.2443, + "step": 7140, + "success_rate.epoch.env.abd": 0.9819004524886877, + "success_rate.epoch.env.agentgym:alfworld": 0.8571428571428571, + "success_rate.epoch.env.agentgym:sciworld": 0.9627329192546584, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9535864978902954, + "success_rate.epoch.env.logic": 0.9194630872483222, + "success_rate.epoch.env.math": 0.9737609329446064, + "success_rate.epoch.env.sat": 0.13333333333333333, + "success_rate.epoch.env.science": 0.8245243128964059, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8671515104928534, + "success_rate.epoch.global": 0.8982183084169568, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9977678571428571, + "tokens_p.mean_in_band": 0.77734375, + "tokens_rate.above_band": 0.9966101694915255, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.003389830508474576 + }, + { + "epoch": 1.5221559437579888, + "grad_norm": 110.2857509718093, + "learning_rate": 3.761362572130067e-07, + "loss": 0.3237, + "step": 7145, + "success_rate.epoch.env.abd": 0.9819004524886877, + "success_rate.epoch.env.agentgym:alfworld": 0.8571428571428571, + "success_rate.epoch.env.agentgym:sciworld": 0.9627329192546584, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9535864978902954, + "success_rate.epoch.env.logic": 0.9198931909212283, + "success_rate.epoch.env.math": 0.9737991266375546, + "success_rate.epoch.env.sat": 0.13333333333333333, + "success_rate.epoch.env.science": 0.8243670886075949, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8671797898634938, + "success_rate.epoch.global": 0.8982219497240956, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9942129629629629, + "tokens_p.mean_in_band": 0.42041015625, + "tokens_rate.above_band": 0.9759036144578314, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.024096385542168676 + }, + { + "epoch": 1.5232211333617385, + "grad_norm": 134.41527561927634, + "learning_rate": 3.761050054352306e-07, + "loss": 0.3545, + "step": 7150, + "success_rate.epoch.env.abd": 0.9819819819819819, + "success_rate.epoch.env.agentgym:alfworld": 0.8571428571428571, + "success_rate.epoch.env.agentgym:sciworld": 0.9627329192546584, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9537815126050421, + "success_rate.epoch.env.logic": 0.92, + "success_rate.epoch.env.math": 0.9738562091503268, + "success_rate.epoch.env.sat": 0.13333333333333333, + "success_rate.epoch.env.science": 0.8247368421052632, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8672534434359714, + "success_rate.epoch.global": 0.898429532939017, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9993562734082397, + "tokens_p.mean_in_band": 0.82421875, + "tokens_rate.above_band": 0.9907235621521335, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.00927643784786642 + }, + { + "epoch": 1.5242863229654877, + "grad_norm": 36.09520468714888, + "learning_rate": 3.7607374192024224e-07, + "loss": 0.2294, + "step": 7155, + "success_rate.epoch.env.abd": 0.9820627802690582, + "success_rate.epoch.env.agentgym:alfworld": 0.8581560283687943, + "success_rate.epoch.env.agentgym:sciworld": 0.9627329192546584, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9539748953974896, + "success_rate.epoch.env.logic": 0.9201065246338216, + "success_rate.epoch.env.math": 0.9739130434782609, + "success_rate.epoch.env.sat": 0.13333333333333333, + "success_rate.epoch.env.science": 0.8250131371518655, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8674104440099548, + "success_rate.epoch.global": 0.8986362711174435, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9969812164579607, + "tokens_p.mean_in_band": 0.76171875, + "tokens_rate.above_band": 0.9893805309734514, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.010619469026548672 + }, + { + "epoch": 1.5253515125692374, + "grad_norm": 58.54681697123234, + "learning_rate": 3.7604246669082633e-07, + "loss": 0.2206, + "step": 7160, + "success_rate.epoch.env.abd": 0.9820627802690582, + "success_rate.epoch.env.agentgym:alfworld": 0.8581560283687943, + "success_rate.epoch.env.agentgym:sciworld": 0.9629629629629629, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9539748953974896, + "success_rate.epoch.env.logic": 0.9201065246338216, + "success_rate.epoch.env.math": 0.9725631768953069, + "success_rate.epoch.env.sat": 0.13333333333333333, + "success_rate.epoch.env.science": 0.8253801782905087, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8673420093066817, + "success_rate.epoch.global": 0.8984359130611416, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 0.6, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8666666666666667, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.995045731707317, + "tokens_p.mean_in_band": 0.5724609375, + "tokens_rate.above_band": 0.9425287356321839, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.05747126436781609 + }, + { + "epoch": 1.5264167021729866, + "grad_norm": 96.52544308919609, + "learning_rate": 3.76011179769776e-07, + "loss": 0.3362, + "step": 7165, + "success_rate.epoch.env.abd": 0.9820627802690582, + "success_rate.epoch.env.agentgym:alfworld": 0.8581560283687943, + "success_rate.epoch.env.agentgym:sciworld": 0.9631901840490797, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9539748953974896, + "success_rate.epoch.env.logic": 0.9201065246338216, + "success_rate.epoch.env.math": 0.9726618705035971, + "success_rate.epoch.env.sat": 0.13333333333333333, + "success_rate.epoch.env.science": 0.8246991104133962, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8673097226537084, + "success_rate.epoch.global": 0.8982363673221163, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.8333333333333334, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9947033898305084, + "tokens_p.mean_in_band": 0.677734375, + "tokens_rate.above_band": 0.946524064171123, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.053475935828877004 + }, + { + "epoch": 1.5274818917767363, + "grad_norm": 201.72454681434493, + "learning_rate": 3.7597988117989286e-07, + "loss": 0.29, + "step": 7170, + "success_rate.epoch.env.abd": 0.9821428571428571, + "success_rate.epoch.env.agentgym:alfworld": 0.8581560283687943, + "success_rate.epoch.env.agentgym:sciworld": 0.9631901840490797, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9539748953974896, + "success_rate.epoch.env.logic": 0.9204244031830239, + "success_rate.epoch.env.math": 0.9727011494252874, + "success_rate.epoch.env.sat": 0.13333333333333333, + "success_rate.epoch.env.science": 0.825065274151436, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8673827588430476, + "success_rate.epoch.global": 0.8984422415537123, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9977189781021898, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 1.5285470813804856, + "grad_norm": 121.52833879106123, + "learning_rate": 3.759485709439871e-07, + "loss": 0.3213, + "step": 7175, + "success_rate.epoch.env.abd": 0.9821428571428571, + "success_rate.epoch.env.agentgym:alfworld": 0.8581560283687943, + "success_rate.epoch.env.agentgym:sciworld": 0.9631901840490797, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9539748953974896, + "success_rate.epoch.env.logic": 0.9207397622192867, + "success_rate.epoch.env.math": 0.9727793696275072, + "success_rate.epoch.env.sat": 0.13333333333333333, + "success_rate.epoch.env.science": 0.8253388946819604, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8674434133675029, + "success_rate.epoch.global": 0.8986472844740562, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.989480198019802, + "tokens_p.mean_in_band": 0.818359375, + "tokens_rate.above_band": 0.9805825242718447, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.019417475728155338 + }, + { + "epoch": 1.5296122709842352, + "grad_norm": 154.19701165781498, + "learning_rate": 3.7591724908487754e-07, + "loss": 0.1985, + "step": 7180, + "success_rate.epoch.env.abd": 0.9821428571428571, + "success_rate.epoch.env.agentgym:alfworld": 0.8581560283687943, + "success_rate.epoch.env.agentgym:sciworld": 0.9634146341463414, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9539748953974896, + "success_rate.epoch.env.logic": 0.920844327176781, + "success_rate.epoch.env.math": 0.9728377412437456, + "success_rate.epoch.env.sat": 0.13333333333333333, + "success_rate.epoch.env.science": 0.8257930317212688, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.867519915522985, + "success_rate.epoch.global": 0.8988515011082007, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9969706632653061, + "tokens_p.mean_in_band": 0.6653645833333334, + "tokens_rate.above_band": 0.9849246231155779, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.01507537688442211 + }, + { + "epoch": 1.5306774605879847, + "grad_norm": 314.0843373614995, + "learning_rate": 3.758859156253912e-07, + "loss": 0.3544, + "step": 7185, + "success_rate.epoch.env.abd": 0.9821428571428571, + "success_rate.epoch.env.agentgym:alfworld": 0.8591549295774648, + "success_rate.epoch.env.agentgym:sciworld": 0.9634146341463414, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9541666666666667, + "success_rate.epoch.env.logic": 0.9209486166007905, + "success_rate.epoch.env.math": 0.9729344729344729, + "success_rate.epoch.env.sat": 0.13333333333333333, + "success_rate.epoch.env.science": 0.8254545454545454, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8676156616435177, + "success_rate.epoch.global": 0.8988538105771164, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.9, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9996811224489796, + "tokens_p.mean_in_band": 0.24140625, + "tokens_rate.above_band": 0.9936628643852978, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0063371356147021544 + }, + { + "epoch": 1.5317426501917342, + "grad_norm": 66.80951682709765, + "learning_rate": 3.758545705883637e-07, + "loss": 0.287, + "step": 7190, + "success_rate.epoch.env.abd": 0.9821428571428571, + "success_rate.epoch.env.agentgym:alfworld": 0.8601398601398601, + "success_rate.epoch.env.agentgym:sciworld": 0.9634146341463414, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9541666666666667, + "success_rate.epoch.env.logic": 0.9210526315789473, + "success_rate.epoch.env.math": 0.9730113636363636, + "success_rate.epoch.env.sat": 0.13333333333333333, + "success_rate.epoch.env.science": 0.8257261410788381, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8677463372677662, + "success_rate.epoch.global": 0.8990567930965282, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9996170343137255, + "tokens_p.mean_in_band": 0.8359375, + "tokens_rate.above_band": 0.9951219512195122, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.004878048780487805 + }, + { + "epoch": 1.5328078397954836, + "grad_norm": 71.9227432628488, + "learning_rate": 3.7582321399663913e-07, + "loss": 0.7006, + "step": 7195, + "success_rate.epoch.env.abd": 0.9823788546255506, + "success_rate.epoch.env.agentgym:alfworld": 0.8601398601398601, + "success_rate.epoch.env.agentgym:sciworld": 0.9636363636363636, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9543568464730291, + "success_rate.epoch.env.logic": 0.9198423127463863, + "success_rate.epoch.env.math": 0.9730113636363636, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.8254790264111859, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8672817370868615, + "success_rate.epoch.global": 0.8986581213699179, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.611111111111111, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.997301633605601, + "tokens_p.mean_in_band": 0.5135135135135135, + "tokens_rate.above_band": 0.9586129753914989, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.04138702460850112 + }, + { + "epoch": 1.533873029399233, + "grad_norm": 115.53448741846098, + "learning_rate": 3.7579184587306987e-07, + "loss": 0.449, + "step": 7200, + "success_rate.epoch.env.abd": 0.9823788546255506, + "success_rate.epoch.env.agentgym:alfworld": 0.8601398601398601, + "success_rate.epoch.env.agentgym:sciworld": 0.963855421686747, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9543568464730291, + "success_rate.epoch.env.logic": 0.9199475065616798, + "success_rate.epoch.env.math": 0.9730496453900709, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.825503355704698, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8673169064425795, + "success_rate.epoch.global": 0.8986608035178892, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env_macro_mean": 0.9583333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9947311046511628, + "tokens_p.mean_in_band": 0.6859375, + "tokens_rate.above_band": 0.9717514124293786, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02824858757062147 + }, + { + "epoch": 1.5349382190029826, + "grad_norm": 122.81173187534647, + "learning_rate": 3.757604662405168e-07, + "loss": 0.1973, + "step": 7205, + "success_rate.epoch.env.abd": 0.9823788546255506, + "success_rate.epoch.env.agentgym:alfworld": 0.8601398601398601, + "success_rate.epoch.env.agentgym:sciworld": 0.963855421686747, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9543568464730291, + "success_rate.epoch.env.logic": 0.9199475065616798, + "success_rate.epoch.env.math": 0.9731638418079096, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.8258629572385369, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8673599789836411, + "success_rate.epoch.global": 0.8988629563135847, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9949882075471698, + "tokens_p.mean_in_band": 0.7604166666666666, + "tokens_rate.above_band": 0.9724770642201835, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.027522935779816515 + }, + { + "epoch": 1.536003408606732, + "grad_norm": 28.45209197541216, + "learning_rate": 3.7572907512184926e-07, + "loss": 0.2688, + "step": 7210, + "success_rate.epoch.env.abd": 0.9823788546255506, + "success_rate.epoch.env.agentgym:alfworld": 0.8611111111111112, + "success_rate.epoch.env.agentgym:sciworld": 0.9640718562874252, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.9615384615384616, + "success_rate.epoch.env.ded": 0.9543568464730291, + "success_rate.epoch.env.logic": 0.9187418086500655, + "success_rate.epoch.env.math": 0.9731638418079096, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.8263995891114535, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8675469863537949, + "success_rate.epoch.global": 0.8988652199880549, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9974324324324324, + "tokens_p.mean_in_band": 0.5129310344827587, + "tokens_rate.above_band": 0.9696016771488469, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03039832285115304 + }, + { + "epoch": 1.5370685982104815, + "grad_norm": 116.67579592542391, + "learning_rate": 3.756976725399448e-07, + "loss": 0.1264, + "step": 7215, + "success_rate.epoch.env.abd": 0.9823788546255506, + "success_rate.epoch.env.agentgym:alfworld": 0.8611111111111112, + "success_rate.epoch.env.agentgym:sciworld": 0.9640718562874252, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.9629629629629629, + "success_rate.epoch.env.ded": 0.9543568464730291, + "success_rate.epoch.env.logic": 0.918954248366013, + "success_rate.epoch.env.math": 0.9731827805222301, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.826844262295082, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8677379457209221, + "success_rate.epoch.global": 0.8990661633220743, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9972578642384106, + "tokens_p.mean_in_band": 0.75390625, + "tokens_rate.above_band": 0.9983471074380166, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.001652892561983471 + }, + { + "epoch": 1.538133787814231, + "grad_norm": 33.37005056284808, + "learning_rate": 3.756662585176893e-07, + "loss": 0.1664, + "step": 7220, + "success_rate.epoch.env.abd": 0.9823788546255506, + "success_rate.epoch.env.agentgym:alfworld": 0.8611111111111112, + "success_rate.epoch.env.agentgym:sciworld": 0.9640718562874252, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.9629629629629629, + "success_rate.epoch.env.ded": 0.9545454545454546, + "success_rate.epoch.env.logic": 0.9178617992177314, + "success_rate.epoch.env.math": 0.9732205778717407, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.8271983640081799, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8676914055378995, + "success_rate.epoch.global": 0.8990680150703946, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 0.9, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 1.0005782169890665, + "tokens_p.mean_in_band": 0.6734375, + "tokens_rate.above_band": 0.9958123953098827, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0041876046901172526 + }, + { + "epoch": 1.5391989774179804, + "grad_norm": 72.70054637160406, + "learning_rate": 3.756348330779772e-07, + "loss": 0.2936, + "step": 7225, + "success_rate.epoch.env.abd": 0.9823788546255506, + "success_rate.epoch.env.agentgym:alfworld": 0.8611111111111112, + "success_rate.epoch.env.agentgym:sciworld": 0.9640718562874252, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.9629629629629629, + "success_rate.epoch.env.ded": 0.9545454545454546, + "success_rate.epoch.env.logic": 0.9167750325097529, + "success_rate.epoch.env.math": 0.9732958538299368, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.8275510204081633, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8676315115060997, + "success_rate.epoch.global": 0.8990698594894122, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8333333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.998828125, + "tokens_p.mean_in_band": 0.49333639705882354, + "tokens_rate.above_band": 0.974124809741248, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0258751902587519 + }, + { + "epoch": 1.5402641670217299, + "grad_norm": 90.24994920992617, + "learning_rate": 3.756033962437112e-07, + "loss": 0.2605, + "step": 7230, + "success_rate.epoch.env.abd": 0.9823788546255506, + "success_rate.epoch.env.agentgym:alfworld": 0.8620689655172413, + "success_rate.epoch.env.agentgym:sciworld": 0.9640718562874252, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.9629629629629629, + "success_rate.epoch.env.ded": 0.9547325102880658, + "success_rate.epoch.env.logic": 0.9167750325097529, + "success_rate.epoch.env.math": 0.973314606741573, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.8276563294356889, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.867746872605, + "success_rate.epoch.global": 0.8990716966225558, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8571428571428571, + "success_rate.window.env_macro_mean": 0.9642857142857143, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9985984219269103, + "tokens_p.mean_in_band": 0.35409007352941174, + "tokens_rate.above_band": 0.9860769860769861, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.013923013923013924 + }, + { + "epoch": 1.5413293566254793, + "grad_norm": 226.23333593108475, + "learning_rate": 3.7557194803780207e-07, + "loss": 0.7041, + "step": 7235, + "success_rate.epoch.env.abd": 0.9823788546255506, + "success_rate.epoch.env.agentgym:alfworld": 0.863013698630137, + "success_rate.epoch.env.agentgym:sciworld": 0.9640718562874252, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.9629629629629629, + "success_rate.epoch.env.ded": 0.9549180327868853, + "success_rate.epoch.env.logic": 0.9168831168831169, + "success_rate.epoch.env.math": 0.9726507713884993, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.827079107505071, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8677466255778534, + "success_rate.epoch.global": 0.8986792824758526, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.5, + "success_rate.window.env.science": 0.6, + "success_rate.window.env_macro_mean": 0.82, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9981871546961326, + "tokens_p.mean_in_band": 0.48829868861607145, + "tokens_rate.above_band": 0.981029810298103, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.018970189701897018 + }, + { + "epoch": 1.5423945462292288, + "grad_norm": 59.49632208363608, + "learning_rate": 3.7554048848316915e-07, + "loss": 0.2515, + "step": 7240, + "success_rate.epoch.env.abd": 0.9823788546255506, + "success_rate.epoch.env.agentgym:alfworld": 0.863013698630137, + "success_rate.epoch.env.agentgym:sciworld": 0.9642857142857143, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.9629629629629629, + "success_rate.epoch.env.ded": 0.9549180327868853, + "success_rate.epoch.env.logic": 0.917098445595855, + "success_rate.epoch.env.math": 0.972027972027972, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.8268354430379747, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8677068731127082, + "success_rate.epoch.global": 0.898485146566988, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.75, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.8541666666666666, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9955439814814815, + "tokens_p.mean_in_band": 0.6612723214285714, + "tokens_rate.above_band": 0.9747292418772563, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02527075812274368 + }, + { + "epoch": 1.5434597358329782, + "grad_norm": 211.25196886932898, + "learning_rate": 3.755090176027399e-07, + "loss": 0.2325, + "step": 7245, + "success_rate.epoch.env.abd": 0.9823788546255506, + "success_rate.epoch.env.agentgym:alfworld": 0.863013698630137, + "success_rate.epoch.env.agentgym:sciworld": 0.9642857142857143, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.9629629629629629, + "success_rate.epoch.env.ded": 0.9551020408163265, + "success_rate.epoch.env.logic": 0.917312661498708, + "success_rate.epoch.env.math": 0.9720865317515701, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.8271854471955533, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8677802174593875, + "success_rate.epoch.global": 0.8986844688788533, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9990053050397878, + "tokens_p.mean_in_band": 0.396484375, + "tokens_rate.above_band": 0.9973544973544973, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0026455026455026454 + }, + { + "epoch": 1.5445249254367277, + "grad_norm": 88.17144734564283, + "learning_rate": 3.7547753541945e-07, + "loss": 0.3595, + "step": 7250, + "success_rate.epoch.env.abd": 0.9823788546255506, + "success_rate.epoch.env.agentgym:alfworld": 0.8639455782312925, + "success_rate.epoch.env.agentgym:sciworld": 0.9642857142857143, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.9629629629629629, + "success_rate.epoch.env.ded": 0.9551020408163265, + "success_rate.epoch.env.logic": 0.9175257731958762, + "success_rate.epoch.env.math": 0.9721642310368824, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.8269424823410696, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8678692834347648, + "success_rate.epoch.global": 0.898687046835195, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9975892857142857, + "tokens_p.mean_in_band": 0.5809151785714286, + "tokens_rate.above_band": 0.9803921568627451, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0196078431372549 + }, + { + "epoch": 1.5455901150404772, + "grad_norm": 201.8897655836413, + "learning_rate": 3.7544604195624363e-07, + "loss": 0.3318, + "step": 7255, + "success_rate.epoch.env.abd": 0.9823788546255506, + "success_rate.epoch.env.agentgym:alfworld": 0.8639455782312925, + "success_rate.epoch.env.agentgym:sciworld": 0.9642857142857143, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.9629629629629629, + "success_rate.epoch.env.ded": 0.9551020408163265, + "success_rate.epoch.env.logic": 0.9163449163449163, + "success_rate.epoch.env.math": 0.9722607489597781, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.8267875125881168, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.867756619009218, + "success_rate.epoch.global": 0.8984940348132212, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.5833333333333334, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9935515873015873, + "tokens_p.mean_in_band": 0.65625, + "tokens_rate.above_band": 0.945, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.055 + }, + { + "epoch": 1.5466553046442266, + "grad_norm": 120.5917772841325, + "learning_rate": 3.7541453723607284e-07, + "loss": 0.4153, + "step": 7260, + "success_rate.epoch.env.abd": 0.982532751091703, + "success_rate.epoch.env.agentgym:alfworld": 0.8639455782312925, + "success_rate.epoch.env.agentgym:sciworld": 0.9642857142857143, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.9629629629629629, + "success_rate.epoch.env.ded": 0.951417004048583, + "success_rate.epoch.env.logic": 0.916452442159383, + "success_rate.epoch.env.math": 0.9722991689750693, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.8270487682252388, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8674726245696988, + "success_rate.epoch.global": 0.8984969744290455, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 0.5, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9866183315431436, + "tokens_p.mean_in_band": 0.4970485336752899, + "tokens_rate.above_band": 0.7135922330097088, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.28640776699029125 + }, + { + "epoch": 1.547720494247976, + "grad_norm": 270.2429326059362, + "learning_rate": 3.75383021281898e-07, + "loss": 0.332, + "step": 7265, + "success_rate.epoch.env.abd": 0.982532751091703, + "success_rate.epoch.env.agentgym:alfworld": 0.8657718120805369, + "success_rate.epoch.env.agentgym:sciworld": 0.9644970414201184, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.9629629629629629, + "success_rate.epoch.env.ded": 0.9518072289156626, + "success_rate.epoch.env.logic": 0.916452442159383, + "success_rate.epoch.env.math": 0.9723374827109267, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.8268072289156626, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8676748573221541, + "success_rate.epoch.global": 0.8984999025910774, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9936380293159609, + "tokens_p.mean_in_band": 0.7663810483870968, + "tokens_rate.above_band": 0.9753772835583797, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.024622716441620333 + }, + { + "epoch": 1.5487856838517255, + "grad_norm": 133.22513118022485, + "learning_rate": 3.7535149411668784e-07, + "loss": 0.3863, + "step": 7270, + "success_rate.epoch.env.abd": 0.982532751091703, + "success_rate.epoch.env.agentgym:alfworld": 0.8675496688741722, + "success_rate.epoch.env.agentgym:sciworld": 0.9644970414201184, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.9629629629629629, + "success_rate.epoch.env.ded": 0.9518072289156626, + "success_rate.epoch.env.logic": 0.9165596919127086, + "success_rate.epoch.env.math": 0.9716850828729282, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.8272408612919379, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8678263426935394, + "success_rate.epoch.global": 0.8985028193661287, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.5, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.998202264381885, + "tokens_p.mean_in_band": 0.0018157958984375, + "tokens_rate.above_band": 0.9987775061124694, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0012224938875305623 + }, + { + "epoch": 1.5498508734554752, + "grad_norm": 83.8355173301768, + "learning_rate": 3.7531995576341915e-07, + "loss": 0.3274, + "step": 7275, + "success_rate.epoch.env.abd": 0.9826086956521739, + "success_rate.epoch.env.agentgym:alfworld": 0.8675496688741722, + "success_rate.epoch.env.agentgym:sciworld": 0.9644970414201184, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.9629629629629629, + "success_rate.epoch.env.ded": 0.9518072289156626, + "success_rate.epoch.env.logic": 0.9156010230179028, + "success_rate.epoch.env.math": 0.9717241379310345, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.8270864567716142, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8677356087120347, + "success_rate.epoch.global": 0.8983116631088687, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 0.6666666666666666, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.8541666666666666, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9977409638554217, + "tokens_p.mean_in_band": 0.4609375, + "tokens_rate.above_band": 0.9707602339181286, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.029239766081871343 + }, + { + "epoch": 1.5509160630592245, + "grad_norm": 179.02460433090934, + "learning_rate": 3.7528840624507676e-07, + "loss": 0.2639, + "step": 7280, + "success_rate.epoch.env.abd": 0.9826086956521739, + "success_rate.epoch.env.agentgym:alfworld": 0.869281045751634, + "success_rate.epoch.env.agentgym:sciworld": 0.9647058823529412, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.9629629629629629, + "success_rate.epoch.env.ded": 0.9518072289156626, + "success_rate.epoch.env.logic": 0.9157088122605364, + "success_rate.epoch.env.math": 0.9717436250861475, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.827517447657029, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8679627437205297, + "success_rate.epoch.global": 0.8985086190199496, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.997549926035503, + "tokens_p.mean_in_band": 0.7565104166666666, + "tokens_rate.above_band": 0.9955817378497791, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.004418262150220913 + }, + { + "epoch": 1.5519812526629742, + "grad_norm": 87.68828579706313, + "learning_rate": 3.7525684558465367e-07, + "loss": 0.3525, + "step": 7285, + "success_rate.epoch.env.abd": 0.9826839826839827, + "success_rate.epoch.env.agentgym:alfworld": 0.869281045751634, + "success_rate.epoch.env.agentgym:sciworld": 0.9647058823529412, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.9629629629629629, + "success_rate.epoch.env.ded": 0.9518072289156626, + "success_rate.epoch.env.logic": 0.9158163265306123, + "success_rate.epoch.env.math": 0.9718406593406593, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.827775012444002, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8680115982971995, + "success_rate.epoch.global": 0.8987048134544752, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9943985849056604, + "tokens_p.mean_in_band": 0.8541666666666666, + "tokens_rate.above_band": 0.9724770642201835, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.027522935779816515 + }, + { + "epoch": 1.5530464422667234, + "grad_norm": 148.35392171602203, + "learning_rate": 3.7522527380515126e-07, + "loss": 0.4016, + "step": 7290, + "success_rate.epoch.env.abd": 0.9826839826839827, + "success_rate.epoch.env.agentgym:alfworld": 0.869281045751634, + "success_rate.epoch.env.agentgym:sciworld": 0.9649122807017544, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.9629629629629629, + "success_rate.epoch.env.ded": 0.9518072289156626, + "success_rate.epoch.env.logic": 0.9159235668789809, + "success_rate.epoch.env.math": 0.9719178082191781, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.8276204669647292, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8680330748514204, + "success_rate.epoch.global": 0.8987073123673548, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9375, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9973958333333334, + "tokens_p.mean_in_band": 0.5725446428571429, + "tokens_rate.above_band": 0.9882352941176471, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.011764705882352941 + }, + { + "epoch": 1.554111631870473, + "grad_norm": 237.85102098860557, + "learning_rate": 3.751936909295787e-07, + "loss": 0.3177, + "step": 7295, + "success_rate.epoch.env.abd": 0.9827586206896551, + "success_rate.epoch.env.agentgym:alfworld": 0.869281045751634, + "success_rate.epoch.env.agentgym:sciworld": 0.9649122807017544, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.9629629629629629, + "success_rate.epoch.env.ded": 0.9518072289156626, + "success_rate.epoch.env.logic": 0.916243654822335, + "success_rate.epoch.env.math": 0.9719753930280246, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.8273809523809523, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8680524199581563, + "success_rate.epoch.global": 0.8987098016560755, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9933823529411765, + "tokens_p.mean_in_band": 0.503125, + "tokens_rate.above_band": 0.9444444444444444, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.05555555555555555 + }, + { + "epoch": 1.5551768214742223, + "grad_norm": 78.36883597045158, + "learning_rate": 3.7516209698095337e-07, + "loss": 0.3323, + "step": 7300, + "success_rate.epoch.env.abd": 0.9827586206896551, + "success_rate.epoch.env.agentgym:alfworld": 0.8701298701298701, + "success_rate.epoch.env.agentgym:sciworld": 0.9649122807017544, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.9629629629629629, + "success_rate.epoch.env.ded": 0.9518072289156626, + "success_rate.epoch.env.logic": 0.9164556962025316, + "success_rate.epoch.env.math": 0.9720708446866485, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.8275520317145689, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8681730923900357, + "success_rate.epoch.global": 0.8989044781856621, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.999047256097561, + "tokens_p.mean_in_band": 0.75390625, + "tokens_rate.above_band": 0.9791044776119403, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.020895522388059702 + }, + { + "epoch": 1.556242011077972, + "grad_norm": 231.83040459248767, + "learning_rate": 3.751304919823007e-07, + "loss": 0.2871, + "step": 7305, + "success_rate.epoch.env.abd": 0.9827586206896551, + "success_rate.epoch.env.agentgym:alfworld": 0.8701298701298701, + "success_rate.epoch.env.agentgym:sciworld": 0.9649122807017544, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.9629629629629629, + "success_rate.epoch.env.ded": 0.952, + "success_rate.epoch.env.logic": 0.9167717528373266, + "success_rate.epoch.env.math": 0.9720898570456092, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.8264953040039545, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8681250116961702, + "success_rate.epoch.global": 0.8985229234605793, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.4, + "success_rate.window.env_macro_mean": 0.85, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9972875916870416, + "tokens_p.mean_below_band": 6.693881005048752e-10, + "tokens_p.mean_in_band": 0.4787326388888889, + "tokens_rate.above_band": 0.977299880525687, + "tokens_rate.below_band": 0.0011947431302270011, + "tokens_rate.in_band": 0.021505376344086023 + }, + { + "epoch": 1.5573072006817212, + "grad_norm": 78.69483288243262, + "learning_rate": 3.750988759566542e-07, + "loss": 0.1893, + "step": 7310, + "success_rate.epoch.env.abd": 0.9828326180257511, + "success_rate.epoch.env.agentgym:alfworld": 0.8701298701298701, + "success_rate.epoch.env.agentgym:sciworld": 0.9649122807017544, + "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, + "success_rate.epoch.env.babyai": 0.9629629629629629, + "success_rate.epoch.env.ded": 0.952191235059761, + "success_rate.epoch.env.logic": 0.9168765743073047, + "success_rate.epoch.env.math": 0.9721467391304348, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.8267522211253702, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8682554302255181, + "success_rate.epoch.global": 0.8987172123300785, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9982412316476346, + "tokens_p.mean_in_band": 0.88671875, + "tokens_rate.above_band": 0.9991850040749797, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0008149959250203749 + }, + { + "epoch": 1.558372390285471, + "grad_norm": 122.75864728381704, + "learning_rate": 3.7506724892705544e-07, + "loss": 0.3226, + "step": 7315, + "success_rate.epoch.env.abd": 0.9828326180257511, + "success_rate.epoch.env.agentgym:alfworld": 0.8701298701298701, + "success_rate.epoch.env.agentgym:sciworld": 0.9649122807017544, + "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, + "success_rate.epoch.env.babyai": 0.9642857142857143, + "success_rate.epoch.env.ded": 0.952191235059761, + "success_rate.epoch.env.logic": 0.917189460476788, + "success_rate.epoch.env.math": 0.9721845318860244, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.8270935960591133, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8684385943329331, + "success_rate.epoch.global": 0.8989107586470476, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.998567335243553, + "tokens_p.mean_in_band": 0.7763671875, + "tokens_rate.above_band": 0.9886685552407932, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0113314447592068 + }, + { + "epoch": 1.5594375798892202, + "grad_norm": 56.34095357579713, + "learning_rate": 3.7503561091655393e-07, + "loss": 0.238, + "step": 7320, + "success_rate.epoch.env.abd": 0.9828326180257511, + "success_rate.epoch.env.agentgym:alfworld": 0.8709677419354839, + "success_rate.epoch.env.agentgym:sciworld": 0.9649122807017544, + "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, + "success_rate.epoch.env.babyai": 0.9655172413793104, + "success_rate.epoch.env.ded": 0.9525691699604744, + "success_rate.epoch.env.logic": 0.917189460476788, + "success_rate.epoch.env.math": 0.972241029113067, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.8273487456960157, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8686894107569212, + "success_rate.epoch.global": 0.899103566660309, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.998546974522293, + "tokens_p.mean_in_band": 0.74609375, + "tokens_rate.above_band": 0.9993634627625716, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0006365372374283895 + }, + { + "epoch": 1.5605027694929698, + "grad_norm": 634.8912894277909, + "learning_rate": 3.750039619482072e-07, + "loss": 0.1986, + "step": 7325, + "success_rate.epoch.env.abd": 0.9828326180257511, + "success_rate.epoch.env.agentgym:alfworld": 0.8709677419354839, + "success_rate.epoch.env.agentgym:sciworld": 0.9649122807017544, + "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, + "success_rate.epoch.env.babyai": 0.9655172413793104, + "success_rate.epoch.env.ded": 0.952755905511811, + "success_rate.epoch.env.logic": 0.9173967459324155, + "success_rate.epoch.env.math": 0.9723160027008778, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.8276031434184676, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8687551736948508, + "success_rate.epoch.global": 0.8992956405863316, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9971968438538206, + "tokens_p.mean_in_band": 0.7604166666666666, + "tokens_rate.above_band": 0.9966887417218543, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0033112582781456954 + }, + { + "epoch": 1.561567959096719, + "grad_norm": 60.98056153603716, + "learning_rate": 3.7497230204508085e-07, + "loss": 0.2347, + "step": 7330, + "success_rate.epoch.env.abd": 0.9829787234042553, + "success_rate.epoch.env.agentgym:alfworld": 0.8709677419354839, + "success_rate.epoch.env.agentgym:sciworld": 0.9649122807017544, + "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, + "success_rate.epoch.env.babyai": 0.9655172413793104, + "success_rate.epoch.env.ded": 0.952755905511811, + "success_rate.epoch.env.logic": 0.9175, + "success_rate.epoch.env.math": 0.9723905723905724, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.8278567925453654, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8688076808096401, + "success_rate.epoch.global": 0.8994869846095382, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.997874149659864, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 1.5626331487004688, + "grad_norm": 184.51817282588448, + "learning_rate": 3.749406312302484e-07, + "loss": 0.2173, + "step": 7335, + "success_rate.epoch.env.abd": 0.9830508474576272, + "success_rate.epoch.env.agentgym:alfworld": 0.8709677419354839, + "success_rate.epoch.env.agentgym:sciworld": 0.9649122807017544, + "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, + "success_rate.epoch.env.babyai": 0.9655172413793104, + "success_rate.epoch.env.ded": 0.9529411764705882, + "success_rate.epoch.env.logic": 0.9175, + "success_rate.epoch.env.math": 0.9724091520861373, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.8284457478005866, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8688863108062705, + "success_rate.epoch.global": 0.8996776028826096, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9970067049808429, + "tokens_p.mean_in_band": 0.6940104166666666, + "tokens_rate.above_band": 0.9886363636363636, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.011363636363636364 + }, + { + "epoch": 1.563698338304218, + "grad_norm": 70.40206931706632, + "learning_rate": 3.749089495267912e-07, + "loss": 0.2263, + "step": 7340, + "success_rate.epoch.env.abd": 0.9830508474576272, + "success_rate.epoch.env.agentgym:alfworld": 0.8717948717948718, + "success_rate.epoch.env.agentgym:sciworld": 0.9649122807017544, + "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, + "success_rate.epoch.env.babyai": 0.9655172413793104, + "success_rate.epoch.env.ded": 0.953125, + "success_rate.epoch.env.logic": 0.9165628891656289, + "success_rate.epoch.env.math": 0.9724462365591398, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.8282088823816496, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8688748618615882, + "success_rate.epoch.global": 0.899488926746167, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.6666666666666666, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.8666666666666666, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9981335324232082, + "tokens_p.mean_in_band": 0.6076388888888888, + "tokens_rate.above_band": 0.984873949579832, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.015126050420168067 + }, + { + "epoch": 1.5647635279079677, + "grad_norm": 92.9873030626634, + "learning_rate": 3.748772569577988e-07, + "loss": 0.3012, + "step": 7345, + "success_rate.epoch.env.abd": 0.9830508474576272, + "success_rate.epoch.env.agentgym:alfworld": 0.8717948717948718, + "success_rate.epoch.env.agentgym:sciworld": 0.9651162790697675, + "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, + "success_rate.epoch.env.babyai": 0.9655172413793104, + "success_rate.epoch.env.ded": 0.953307392996109, + "success_rate.epoch.env.logic": 0.9168734491315137, + "success_rate.epoch.env.math": 0.9725016767270288, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.8278888347147733, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8689141658462263, + "success_rate.epoch.global": 0.899489892310599, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.9, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9977964743589743, + "tokens_p.mean_in_band": 0.513671875, + "tokens_rate.above_band": 0.9915254237288136, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.00847457627118644 + }, + { + "epoch": 1.565828717511717, + "grad_norm": 143.13321887750806, + "learning_rate": 3.748455535463684e-07, + "loss": 0.2566, + "step": 7350, + "success_rate.epoch.env.abd": 0.9830508474576272, + "success_rate.epoch.env.agentgym:alfworld": 0.8717948717948718, + "success_rate.epoch.env.agentgym:sciworld": 0.9651162790697675, + "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, + "success_rate.epoch.env.babyai": 0.9655172413793104, + "success_rate.epoch.env.ded": 0.9534883720930233, + "success_rate.epoch.env.logic": 0.9169764560099133, + "success_rate.epoch.env.math": 0.9725935828877005, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.8271665043816943, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8688826714646724, + "success_rate.epoch.global": 0.8993022817273242, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.3333333333333333, + "success_rate.window.env_macro_mean": 0.8333333333333333, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9978966346153846, + "tokens_p.mean_in_band": 0.501953125, + "tokens_rate.above_band": 0.975, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.025 + }, + { + "epoch": 1.5668939071154666, + "grad_norm": 218.88817745450382, + "learning_rate": 3.748138393156052e-07, + "loss": 0.3032, + "step": 7355, + "success_rate.epoch.env.abd": 0.9830508474576272, + "success_rate.epoch.env.agentgym:alfworld": 0.8717948717948718, + "success_rate.epoch.env.agentgym:sciworld": 0.9651162790697675, + "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, + "success_rate.epoch.env.babyai": 0.9655172413793104, + "success_rate.epoch.env.ded": 0.9534883720930233, + "success_rate.epoch.env.logic": 0.9169764560099133, + "success_rate.epoch.env.math": 0.9726666666666667, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.8276699029126213, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.86893507894739, + "success_rate.epoch.global": 0.8994918125352908, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9948369565217391, + "tokens_p.mean_in_band": 0.859375, + "tokens_rate.above_band": 0.9829059829059829, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.017094017094017096 + }, + { + "epoch": 1.567959096719216, + "grad_norm": 92.73721293149022, + "learning_rate": 3.7478211428862247e-07, + "loss": 0.3906, + "step": 7360, + "success_rate.epoch.env.abd": 0.9831932773109243, + "success_rate.epoch.env.agentgym:alfworld": 0.8726114649681529, + "success_rate.epoch.env.agentgym:sciworld": 0.9651162790697675, + "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, + "success_rate.epoch.env.babyai": 0.9655172413793104, + "success_rate.epoch.env.ded": 0.9534883720930233, + "success_rate.epoch.env.logic": 0.9159456118665018, + "success_rate.epoch.env.math": 0.9727030625832224, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.8279205041202132, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8689546404026003, + "success_rate.epoch.global": 0.8994927672365207, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9978649068322981, + "tokens_p.mean_in_band": 0.6637073863636364, + "tokens_rate.above_band": 0.9669669669669669, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03303303303303303 + }, + { + "epoch": 1.5690242863229655, + "grad_norm": 80.54347956379502, + "learning_rate": 3.74750378488541e-07, + "loss": 0.2968, + "step": 7365, + "success_rate.epoch.env.abd": 0.9832635983263598, + "success_rate.epoch.env.agentgym:alfworld": 0.8726114649681529, + "success_rate.epoch.env.agentgym:sciworld": 0.9651162790697675, + "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, + "success_rate.epoch.env.babyai": 0.9655172413793104, + "success_rate.epoch.env.ded": 0.9536679536679536, + "success_rate.epoch.env.logic": 0.9161528976572133, + "success_rate.epoch.env.math": 0.9727393617021277, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.8282535074987906, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8690297759370149, + "success_rate.epoch.global": 0.8996812300768798, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.99925, + "tokens_p.mean_in_band": 0.89453125, + "tokens_rate.above_band": 0.9973404255319149, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0026595744680851063 + }, + { + "epoch": 1.570089475926715, + "grad_norm": 110.22776202569082, + "learning_rate": 3.747186319384897e-07, + "loss": 0.3747, + "step": 7370, + "success_rate.epoch.env.abd": 0.9832635983263598, + "success_rate.epoch.env.agentgym:alfworld": 0.8726114649681529, + "success_rate.epoch.env.agentgym:sciworld": 0.9653179190751445, + "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, + "success_rate.epoch.env.babyai": 0.9655172413793104, + "success_rate.epoch.env.ded": 0.9536679536679536, + "success_rate.epoch.env.logic": 0.9163591635916359, + "success_rate.epoch.env.math": 0.9727755644090306, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.8281853281853282, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8690639513309457, + "success_rate.epoch.global": 0.899681826689126, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9972181008902077, + "tokens_p.mean_in_band": 0.4301215277777778, + "tokens_rate.above_band": 0.9739884393063584, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02601156069364162 + }, + { + "epoch": 1.5711546655304645, + "grad_norm": 717.7661144961071, + "learning_rate": 3.746868746616052e-07, + "loss": 0.2865, + "step": 7375, + "success_rate.epoch.env.abd": 0.9833333333333333, + "success_rate.epoch.env.agentgym:alfworld": 0.8726114649681529, + "success_rate.epoch.env.agentgym:sciworld": 0.9653179190751445, + "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, + "success_rate.epoch.env.babyai": 0.9655172413793104, + "success_rate.epoch.env.ded": 0.9536679536679536, + "success_rate.epoch.env.logic": 0.9164619164619164, + "success_rate.epoch.env.math": 0.9728296885354539, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.8281174771304767, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8690783841444755, + "success_rate.epoch.global": 0.8996824210722959, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9927262931034483, + "tokens_p.mean_in_band": 0.6328125, + "tokens_rate.above_band": 0.9747899159663865, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.025210084033613446 + }, + { + "epoch": 1.572219855134214, + "grad_norm": 149.78207009058485, + "learning_rate": 3.7465510668103204e-07, + "loss": 0.1977, + "step": 7380, + "success_rate.epoch.env.abd": 0.983402489626556, + "success_rate.epoch.env.agentgym:alfworld": 0.8734177215189873, + "success_rate.epoch.env.agentgym:sciworld": 0.9653179190751445, + "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, + "success_rate.epoch.env.babyai": 0.9655172413793104, + "success_rate.epoch.env.ded": 0.9536679536679536, + "success_rate.epoch.env.logic": 0.9165644171779141, + "success_rate.epoch.env.math": 0.9728476821192052, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.8285302593659942, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8688398779003671, + "success_rate.epoch.global": 0.8996830132388588, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8333333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.997145061728395, + "tokens_p.mean_in_band": 0.690185546875, + "tokens_rate.above_band": 0.9619952494061758, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03800475059382423 + }, + { + "epoch": 1.5732850447379634, + "grad_norm": 76.44000974123742, + "learning_rate": 3.7462332801992243e-07, + "loss": 0.2206, + "step": 7385, + "success_rate.epoch.env.abd": 0.9834710743801653, + "success_rate.epoch.env.agentgym:alfworld": 0.8734177215189873, + "success_rate.epoch.env.agentgym:sciworld": 0.9655172413793104, + "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, + "success_rate.epoch.env.babyai": 0.9655172413793104, + "success_rate.epoch.env.ded": 0.9536679536679536, + "success_rate.epoch.env.logic": 0.9167686658506732, + "success_rate.epoch.env.math": 0.9728656518861681, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.8284619070436032, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.868878220916286, + "success_rate.epoch.global": 0.8996836032011911, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.96, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.995, + "tokens_p.mean_in_band": 0.345703125, + "tokens_rate.above_band": 0.984251968503937, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.015748031496062992 + }, + { + "epoch": 1.5743502343417128, + "grad_norm": 335.0100723795315, + "learning_rate": 3.7459153870143644e-07, + "loss": 0.2626, + "step": 7390, + "success_rate.epoch.env.abd": 0.9835390946502057, + "success_rate.epoch.env.agentgym:alfworld": 0.8742138364779874, + "success_rate.epoch.env.agentgym:sciworld": 0.9655172413793104, + "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, + "success_rate.epoch.env.babyai": 0.9655172413793104, + "success_rate.epoch.env.ded": 0.9536679536679536, + "success_rate.epoch.env.logic": 0.9167686658506732, + "success_rate.epoch.env.math": 0.9729015201586253, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.8284758719541329, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8689613089537428, + "success_rate.epoch.global": 0.8996841909715771, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env_macro_mean": 0.9583333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9972967128027682, + "tokens_p.mean_in_band": 0.5703125, + "tokens_rate.above_band": 0.9796610169491525, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.020338983050847456 + }, + { + "epoch": 1.5754154239454623, + "grad_norm": 285.8289423925264, + "learning_rate": 3.745597387487419e-07, + "loss": 0.2655, + "step": 7395, + "success_rate.epoch.env.abd": 0.9836065573770492, + "success_rate.epoch.env.agentgym:alfworld": 0.8742138364779874, + "success_rate.epoch.env.agentgym:sciworld": 0.9655172413793104, + "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, + "success_rate.epoch.env.babyai": 0.9655172413793104, + "success_rate.epoch.env.ded": 0.9540229885057471, + "success_rate.epoch.env.logic": 0.9156479217603912, + "success_rate.epoch.env.math": 0.972937293729373, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.8288030519790176, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8689308277782871, + "success_rate.epoch.global": 0.8996847765622102, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 1.0004002988898377, + "tokens_p.mean_in_band": 0.609375, + "tokens_rate.above_band": 0.9890202702702703, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.01097972972972973 + }, + { + "epoch": 1.5764806135492118, + "grad_norm": 93.15656509004323, + "learning_rate": 3.7452792818501434e-07, + "loss": 0.437, + "step": 7400, + "success_rate.epoch.env.abd": 0.983739837398374, + "success_rate.epoch.env.agentgym:alfworld": 0.86875, + "success_rate.epoch.env.agentgym:sciworld": 0.9655172413793104, + "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, + "success_rate.epoch.env.babyai": 0.9655172413793104, + "success_rate.epoch.env.ded": 0.9540229885057471, + "success_rate.epoch.env.logic": 0.9158536585365854, + "success_rate.epoch.env.math": 0.972972972972973, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.8285714285714286, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8684471219742456, + "success_rate.epoch.global": 0.8995002776235425, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 0.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.7333333333333333, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9981343283582089, + "tokens_p.mean_in_band": 0.4365234375, + "tokens_rate.above_band": 0.9654178674351584, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0345821325648415 + }, + { + "epoch": 1.5775458031529612, + "grad_norm": 129.66763779012854, + "learning_rate": 3.744961070334372e-07, + "loss": 0.2338, + "step": 7405, + "success_rate.epoch.env.abd": 0.9838709677419355, + "success_rate.epoch.env.agentgym:alfworld": 0.86875, + "success_rate.epoch.env.agentgym:sciworld": 0.9655172413793104, + "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, + "success_rate.epoch.env.babyai": 0.9655172413793104, + "success_rate.epoch.env.ded": 0.9540229885057471, + "success_rate.epoch.env.logic": 0.9159561510353228, + "success_rate.epoch.env.math": 0.9730440499671269, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.8288159771754636, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8684970536506534, + "success_rate.epoch.global": 0.8996859412525402, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9939793577981652, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 1.5786109927567107, + "grad_norm": 54.17844040797932, + "learning_rate": 3.744642753172014e-07, + "loss": 0.3054, + "step": 7410, + "success_rate.epoch.env.abd": 0.9838709677419355, + "success_rate.epoch.env.agentgym:alfworld": 0.8695652173913043, + "success_rate.epoch.env.agentgym:sciworld": 0.9655172413793104, + "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, + "success_rate.epoch.env.babyai": 0.9666666666666667, + "success_rate.epoch.env.ded": 0.9540229885057471, + "success_rate.epoch.env.logic": 0.9159561510353228, + "success_rate.epoch.env.math": 0.9730617608409987, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.828909952606635, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8686858108309903, + "success_rate.epoch.global": 0.8996865203761756, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8571428571428571, + "success_rate.window.env_macro_mean": 0.9642857142857143, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9982506361323156, + "tokens_p.mean_in_band": 0.427734375, + "tokens_rate.above_band": 0.9899244332493703, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.010075566750629723 + }, + { + "epoch": 1.5796761823604601, + "grad_norm": 110.91168274500707, + "learning_rate": 3.744324330595057e-07, + "loss": 0.1285, + "step": 7415, + "success_rate.epoch.env.abd": 0.9838709677419355, + "success_rate.epoch.env.agentgym:alfworld": 0.8695652173913043, + "success_rate.epoch.env.agentgym:sciworld": 0.9655172413793104, + "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, + "success_rate.epoch.env.babyai": 0.9666666666666667, + "success_rate.epoch.env.ded": 0.9540229885057471, + "success_rate.epoch.env.logic": 0.916058394160584, + "success_rate.epoch.env.math": 0.9731675392670157, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.8291528632276385, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8687268046648341, + "success_rate.epoch.global": 0.8998711577397386, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.990234375, + "tokens_p.mean_in_band": 0.8502604166666666, + "tokens_rate.above_band": 0.9411764705882353, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.058823529411764705 + }, + { + "epoch": 1.5807413719642096, + "grad_norm": 133.0249004341405, + "learning_rate": 3.7440058028355646e-07, + "loss": 0.3102, + "step": 7420, + "success_rate.epoch.env.abd": 0.9838709677419355, + "success_rate.epoch.env.agentgym:alfworld": 0.8695652173913043, + "success_rate.epoch.env.agentgym:sciworld": 0.9661016949152542, + "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, + "success_rate.epoch.env.babyai": 0.9666666666666667, + "success_rate.epoch.env.ded": 0.9543726235741445, + "success_rate.epoch.env.logic": 0.9162621359223301, + "success_rate.epoch.env.math": 0.973185088293002, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.8293144208037825, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8688465262073993, + "success_rate.epoch.global": 0.9000551166636046, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9994996360989811, + "tokens_p.mean_in_band": 0.7265625, + "tokens_rate.above_band": 0.9970972423802612, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.002902757619738752 + }, + { + "epoch": 1.581806561567959, + "grad_norm": 95.87916724368114, + "learning_rate": 3.7436871701256784e-07, + "loss": 0.3336, + "step": 7425, + "success_rate.epoch.env.abd": 0.9838709677419355, + "success_rate.epoch.env.agentgym:alfworld": 0.8695652173913043, + "success_rate.epoch.env.agentgym:sciworld": 0.9662921348314607, + "success_rate.epoch.env.agentgym:textcraft": 0.9736842105263158, + "success_rate.epoch.env.babyai": 0.9666666666666667, + "success_rate.epoch.env.ded": 0.9543726235741445, + "success_rate.epoch.env.logic": 0.9162621359223301, + "success_rate.epoch.env.math": 0.9732375979112271, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.8292452830188679, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8689269852349321, + "success_rate.epoch.global": 0.9000550155877498, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9986929657794676, + "tokens_p.mean_in_band": 0.390625, + "tokens_rate.above_band": 0.9704797047970479, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02952029520295203 + }, + { + "epoch": 1.5828717511717085, + "grad_norm": 281.93536178156836, + "learning_rate": 3.7433684326976145e-07, + "loss": 0.2062, + "step": 7430, + "success_rate.epoch.env.abd": 0.9839357429718876, + "success_rate.epoch.env.agentgym:alfworld": 0.8695652173913043, + "success_rate.epoch.env.agentgym:sciworld": 0.9662921348314607, + "success_rate.epoch.env.agentgym:textcraft": 0.9736842105263158, + "success_rate.epoch.env.babyai": 0.9666666666666667, + "success_rate.epoch.env.ded": 0.9545454545454546, + "success_rate.epoch.env.logic": 0.9162621359223301, + "success_rate.epoch.env.math": 0.9732550554468362, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.8288669487541138, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8689157788233064, + "success_rate.epoch.global": 0.8998718652754897, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.7142857142857143, + "success_rate.window.env_macro_mean": 0.9285714285714286, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9960469374167776, + "tokens_p.mean_in_band": 0.5223721590909091, + "tokens_rate.above_band": 0.9855643044619422, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.014435695538057743 + }, + { + "epoch": 1.583936940775458, + "grad_norm": 63.85553003750588, + "learning_rate": 3.7430495907836675e-07, + "loss": 0.193, + "step": 7435, + "success_rate.epoch.env.abd": 0.984, + "success_rate.epoch.env.agentgym:alfworld": 0.8695652173913043, + "success_rate.epoch.env.agentgym:sciworld": 0.9662921348314607, + "success_rate.epoch.env.agentgym:textcraft": 0.9736842105263158, + "success_rate.epoch.env.babyai": 0.9666666666666667, + "success_rate.epoch.env.ded": 0.9545454545454546, + "success_rate.epoch.env.logic": 0.9164648910411622, + "success_rate.epoch.env.math": 0.9732899022801303, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.8292682926829268, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8689797063604928, + "success_rate.epoch.global": 0.9000548145441257, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9967105263157895, + "tokens_p.mean_in_band": 0.7587890625, + "tokens_rate.above_band": 0.9661016949152542, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03389830508474576 + }, + { + "epoch": 1.5850021303792075, + "grad_norm": 102.56513791047378, + "learning_rate": 3.742730644616207e-07, + "loss": 0.3006, + "step": 7440, + "success_rate.epoch.env.abd": 0.984, + "success_rate.epoch.env.agentgym:alfworld": 0.8695652173913043, + "success_rate.epoch.env.agentgym:sciworld": 0.9664804469273743, + "success_rate.epoch.env.agentgym:textcraft": 0.9736842105263158, + "success_rate.epoch.env.babyai": 0.9666666666666667, + "success_rate.epoch.env.ded": 0.9547169811320755, + "success_rate.epoch.env.logic": 0.9166666666666666, + "success_rate.epoch.env.math": 0.9733420026007802, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.8285714285714286, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8689721473166011, + "success_rate.epoch.global": 0.8998723326645997, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.3333333333333333, + "success_rate.window.env_macro_mean": 0.8666666666666668, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.999721975088968, + "tokens_p.mean_in_band": 0.5302734375, + "tokens_rate.above_band": 0.9859649122807017, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.014035087719298246 + }, + { + "epoch": 1.586067319982957, + "grad_norm": 114.09171612837113, + "learning_rate": 3.742411594427678e-07, + "loss": 0.2446, + "step": 7445, + "success_rate.epoch.env.abd": 0.984, + "success_rate.epoch.env.agentgym:alfworld": 0.8695652173913043, + "success_rate.epoch.env.agentgym:sciworld": 0.9666666666666667, + "success_rate.epoch.env.agentgym:textcraft": 0.9736842105263158, + "success_rate.epoch.env.babyai": 0.9666666666666667, + "success_rate.epoch.env.ded": 0.9547169811320755, + "success_rate.epoch.env.logic": 0.916767189384801, + "success_rate.epoch.env.math": 0.9733593242365172, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.8286647992530346, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.869008277750671, + "success_rate.epoch.global": 0.8998725650828326, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8571428571428571, + "success_rate.window.env_macro_mean": 0.9642857142857143, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9968354430379747, + "tokens_p.mean_in_band": 0.625, + "tokens_rate.above_band": 0.9693251533742331, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03067484662576687 + }, + { + "epoch": 1.5871325095867066, + "grad_norm": 403.0613650343333, + "learning_rate": 3.7420924404506027e-07, + "loss": 0.2355, + "step": 7450, + "success_rate.epoch.env.abd": 0.984, + "success_rate.epoch.env.agentgym:alfworld": 0.8695652173913043, + "success_rate.epoch.env.agentgym:sciworld": 0.9666666666666667, + "success_rate.epoch.env.agentgym:textcraft": 0.9736842105263158, + "success_rate.epoch.env.babyai": 0.9666666666666667, + "success_rate.epoch.env.ded": 0.9547169811320755, + "success_rate.epoch.env.logic": 0.9157641395908543, + "success_rate.epoch.env.math": 0.9734283862605314, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.828904428904429, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8689451542853495, + "success_rate.epoch.global": 0.8998727966563692, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9994310793237972, + "tokens_p.mean_in_band": 0.5661764705882353, + "tokens_rate.above_band": 0.9576587795765878, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.04234122042341221 + }, + { + "epoch": 1.5881976991904558, + "grad_norm": 75.75547823520138, + "learning_rate": 3.7417731829175774e-07, + "loss": 0.3218, + "step": 7455, + "success_rate.epoch.env.abd": 0.984, + "success_rate.epoch.env.agentgym:alfworld": 0.8695652173913043, + "success_rate.epoch.env.agentgym:sciworld": 0.9668508287292817, + "success_rate.epoch.env.agentgym:textcraft": 0.9736842105263158, + "success_rate.epoch.env.babyai": 0.9666666666666667, + "success_rate.epoch.env.ded": 0.9547169811320755, + "success_rate.epoch.env.logic": 0.9158653846153846, + "success_rate.epoch.env.math": 0.9734799482535575, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.8293023255813954, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8690119602632712, + "success_rate.epoch.global": 0.9000544168329403, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9982850609756098, + "tokens_p.mean_in_band": 0.69921875, + "tokens_rate.above_band": 0.9704142011834319, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.029585798816568046 + }, + { + "epoch": 1.5892628887942055, + "grad_norm": 420.98657005573835, + "learning_rate": 3.7414538220612756e-07, + "loss": 0.2801, + "step": 7460, + "success_rate.epoch.env.abd": 0.984, + "success_rate.epoch.env.agentgym:alfworld": 0.8695652173913043, + "success_rate.epoch.env.agentgym:sciworld": 0.9668508287292817, + "success_rate.epoch.env.agentgym:textcraft": 0.9736842105263158, + "success_rate.epoch.env.babyai": 0.9666666666666667, + "success_rate.epoch.env.ded": 0.9548872180451128, + "success_rate.epoch.env.logic": 0.9159663865546218, + "success_rate.epoch.env.math": 0.9735824742268041, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.8294609665427509, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8690603607893508, + "success_rate.epoch.global": 0.9002353793228318, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9962469362745098, + "tokens_p.mean_in_band": 0.7981770833333334, + "tokens_rate.above_band": 0.9855072463768116, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.014492753623188406 + }, + { + "epoch": 1.5903280783979548, + "grad_norm": 120.57693716068823, + "learning_rate": 3.741134358114445e-07, + "loss": 0.3244, + "step": 7465, + "success_rate.epoch.env.abd": 0.984, + "success_rate.epoch.env.agentgym:alfworld": 0.8695652173913043, + "success_rate.epoch.env.agentgym:sciworld": 0.9668508287292817, + "success_rate.epoch.env.agentgym:textcraft": 0.9736842105263158, + "success_rate.epoch.env.babyai": 0.9666666666666667, + "success_rate.epoch.env.ded": 0.9548872180451128, + "success_rate.epoch.env.logic": 0.9161676646706587, + "success_rate.epoch.env.math": 0.9736673089274245, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.8296983758700696, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8691079537115306, + "success_rate.epoch.global": 0.9004156876920296, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9908854166666666, + "tokens_p.mean_in_band": 0.87109375, + "tokens_rate.above_band": 0.9795918367346939, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02040816326530612 + }, + { + "epoch": 1.5913932680017044, + "grad_norm": 146.17220641527368, + "learning_rate": 3.7408147913099083e-07, + "loss": 0.2916, + "step": 7470, + "success_rate.epoch.env.abd": 0.984, + "success_rate.epoch.env.agentgym:alfworld": 0.8695652173913043, + "success_rate.epoch.env.agentgym:sciworld": 0.9668508287292817, + "success_rate.epoch.env.agentgym:textcraft": 0.9736842105263158, + "success_rate.epoch.env.babyai": 0.9666666666666667, + "success_rate.epoch.env.ded": 0.9548872180451128, + "success_rate.epoch.env.logic": 0.916267942583732, + "success_rate.epoch.env.math": 0.9737010904425915, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.8293246993524515, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8690861703397688, + "success_rate.epoch.global": 0.9002345300378857, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.7142857142857143, + "success_rate.window.env_macro_mean": 0.9047619047619048, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9955778301886793, + "tokens_p.mean_in_band": 0.563232421875, + "tokens_rate.above_band": 0.9298245614035088, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.07017543859649122 + }, + { + "epoch": 1.5924584576054537, + "grad_norm": 81.85401089471598, + "learning_rate": 3.740495121880563e-07, + "loss": 0.2498, + "step": 7475, + "success_rate.epoch.env.abd": 0.9840637450199203, + "success_rate.epoch.env.agentgym:alfworld": 0.8703703703703703, + "success_rate.epoch.env.agentgym:sciworld": 0.967032967032967, + "success_rate.epoch.env.agentgym:textcraft": 0.9736842105263158, + "success_rate.epoch.env.babyai": 0.9666666666666667, + "success_rate.epoch.env.ded": 0.9548872180451128, + "success_rate.epoch.env.logic": 0.9163679808841099, + "success_rate.epoch.env.math": 0.9737683941138836, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.8294824399260629, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.869211272053219, + "success_rate.epoch.global": 0.9004141905276427, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9974385245901639, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 1.5935236472092034, + "grad_norm": 202.3792501137584, + "learning_rate": 3.7401753500593835e-07, + "loss": 0.2716, + "step": 7480, + "success_rate.epoch.env.abd": 0.9840637450199203, + "success_rate.epoch.env.agentgym:alfworld": 0.8703703703703703, + "success_rate.epoch.env.agentgym:sciworld": 0.967032967032967, + "success_rate.epoch.env.agentgym:textcraft": 0.9736842105263158, + "success_rate.epoch.env.babyai": 0.967741935483871, + "success_rate.epoch.env.ded": 0.9548872180451128, + "success_rate.epoch.env.logic": 0.9165673420738975, + "success_rate.epoch.env.math": 0.9738353541799617, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.8292570373788648, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8693127436464801, + "success_rate.epoch.global": 0.9004134459823836, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9991185897435897, + "tokens_p.mean_in_band": 0.6848958333333334, + "tokens_rate.above_band": 0.9774436090225563, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.022556390977443608 + }, + { + "epoch": 1.5945888368129526, + "grad_norm": 89.87928124264751, + "learning_rate": 3.7398554760794156e-07, + "loss": 0.2647, + "step": 7485, + "success_rate.epoch.env.abd": 0.9840637450199203, + "success_rate.epoch.env.agentgym:alfworld": 0.8703703703703703, + "success_rate.epoch.env.agentgym:sciworld": 0.9672131147540983, + "success_rate.epoch.env.agentgym:textcraft": 0.9736842105263158, + "success_rate.epoch.env.babyai": 0.967741935483871, + "success_rate.epoch.env.ded": 0.9548872180451128, + "success_rate.epoch.env.logic": 0.9167657550535078, + "success_rate.epoch.env.math": 0.9738687061822817, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.8296500920810314, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.869385922501501, + "success_rate.epoch.global": 0.9005921406782702, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9972074468085106, + "tokens_p.mean_in_band": 0.8815104166666666, + "tokens_rate.above_band": 0.9873949579831933, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.012605042016806723 + }, + { + "epoch": 1.5956540264167023, + "grad_norm": 41.06241910836217, + "learning_rate": 3.739535500173782e-07, + "loss": 0.1696, + "step": 7490, + "success_rate.epoch.env.abd": 0.9841269841269841, + "success_rate.epoch.env.agentgym:alfworld": 0.8703703703703703, + "success_rate.epoch.env.agentgym:sciworld": 0.967391304347826, + "success_rate.epoch.env.agentgym:textcraft": 0.9736842105263158, + "success_rate.epoch.env.babyai": 0.967741935483871, + "success_rate.epoch.env.ded": 0.9550561797752809, + "success_rate.epoch.env.logic": 0.9167657550535078, + "success_rate.epoch.env.math": 0.9732824427480916, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.8285845588235294, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8692730673868887, + "success_rate.epoch.global": 0.9000537345513165, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 0.6666666666666666, + "success_rate.window.env.science": 0.25, + "success_rate.window.env_macro_mean": 0.7833333333333333, + "success_rate.window.global": 0.6, + "tokens_p.mean_above_band": 0.9973006644518272, + "tokens_p.mean_in_band": 0.6354166666666666, + "tokens_rate.above_band": 0.9435736677115988, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.05642633228840126 + }, + { + "epoch": 1.5967192160204515, + "grad_norm": 105.36218028997493, + "learning_rate": 3.7392154225756783e-07, + "loss": 0.2389, + "step": 7495, + "success_rate.epoch.env.abd": 0.9841269841269841, + "success_rate.epoch.env.agentgym:alfworld": 0.8703703703703703, + "success_rate.epoch.env.agentgym:sciworld": 0.967391304347826, + "success_rate.epoch.env.agentgym:textcraft": 0.9736842105263158, + "success_rate.epoch.env.babyai": 0.967741935483871, + "success_rate.epoch.env.ded": 0.9550561797752809, + "success_rate.epoch.env.logic": 0.9156769596199525, + "success_rate.epoch.env.math": 0.9733333333333334, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.8285976168652612, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8691798994953814, + "success_rate.epoch.global": 0.899874843554443, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env_macro_mean": 0.6111111111111112, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9954844006568144, + "tokens_p.mean_in_band": 0.5321875, + "tokens_rate.above_band": 0.9241274658573596, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.07587253414264036 + }, + { + "epoch": 1.5977844056242012, + "grad_norm": 85.06000770032496, + "learning_rate": 3.738895243518375e-07, + "loss": 0.2327, + "step": 7500, + "success_rate.epoch.env.abd": 0.9841269841269841, + "success_rate.epoch.env.agentgym:alfworld": 0.8703703703703703, + "success_rate.epoch.env.agentgym:sciworld": 0.967391304347826, + "success_rate.epoch.env.agentgym:textcraft": 0.9743589743589743, + "success_rate.epoch.env.babyai": 0.967741935483871, + "success_rate.epoch.env.ded": 0.9550561797752809, + "success_rate.epoch.env.logic": 0.9158767772511849, + "success_rate.epoch.env.math": 0.973384030418251, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.8289112534309241, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8692925281421515, + "success_rate.epoch.global": 0.9000535427449581, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9976173020527859, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 1.5988495952279504, + "grad_norm": 158.61518151707392, + "learning_rate": 3.7385749632352165e-07, + "loss": 0.3134, + "step": 7505, + "success_rate.epoch.env.abd": 0.9841897233201581, + "success_rate.epoch.env.agentgym:alfworld": 0.8703703703703703, + "success_rate.epoch.env.agentgym:sciworld": 0.9675675675675676, + "success_rate.epoch.env.agentgym:textcraft": 0.9743589743589743, + "success_rate.epoch.env.babyai": 0.967741935483871, + "success_rate.epoch.env.ded": 0.9550561797752809, + "success_rate.epoch.env.logic": 0.9159763313609467, + "success_rate.epoch.env.math": 0.9734513274336283, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.8291457286432161, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8693507398467285, + "success_rate.epoch.global": 0.9002316052022091, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9953785211267606, + "tokens_p.mean_in_band": 0.7890625, + "tokens_rate.above_band": 0.9726027397260274, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0273972602739726 + }, + { + "epoch": 1.5999147848317001, + "grad_norm": 47.171231342409065, + "learning_rate": 3.738254581959621e-07, + "loss": 0.124, + "step": 7510, + "success_rate.epoch.env.abd": 0.9841897233201581, + "success_rate.epoch.env.agentgym:alfworld": 0.8703703703703703, + "success_rate.epoch.env.agentgym:sciworld": 0.967741935483871, + "success_rate.epoch.env.agentgym:textcraft": 0.9743589743589743, + "success_rate.epoch.env.babyai": 0.967741935483871, + "success_rate.epoch.env.ded": 0.9550561797752809, + "success_rate.epoch.env.logic": 0.9161747343565525, + "success_rate.epoch.env.math": 0.9734848484848485, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.8295350957155879, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8694230724863197, + "success_rate.epoch.global": 0.900409034323315, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9957298136645962, + "tokens_p.mean_in_band": 0.71875, + "tokens_rate.above_band": 0.9877300613496932, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.012269938650306749 + }, + { + "epoch": 1.6009799744354494, + "grad_norm": 449.16224571290246, + "learning_rate": 3.7379340999250794e-07, + "loss": 0.3919, + "step": 7515, + "success_rate.epoch.env.abd": 0.9841897233201581, + "success_rate.epoch.env.agentgym:alfworld": 0.8703703703703703, + "success_rate.epoch.env.agentgym:sciworld": 0.967741935483871, + "success_rate.epoch.env.agentgym:textcraft": 0.9743589743589743, + "success_rate.epoch.env.babyai": 0.967741935483871, + "success_rate.epoch.env.ded": 0.9516728624535316, + "success_rate.epoch.env.logic": 0.9161747343565525, + "success_rate.epoch.env.math": 0.9735516372795969, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.8293903548680619, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8691084116340897, + "success_rate.epoch.global": 0.9002307828865613, + "success_rate.window.env.ded": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.75, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9913002329644729, + "tokens_p.mean_below_band": 2.60770320892334e-07, + "tokens_p.mean_in_band": 0.47299038951120165, + "tokens_rate.above_band": 0.7769230769230769, + "tokens_rate.below_band": 0.0009049773755656109, + "tokens_rate.in_band": 0.22217194570135745 + }, + { + "epoch": 1.602045164039199, + "grad_norm": 112.49181236949589, + "learning_rate": 3.737613517365157e-07, + "loss": 0.2804, + "step": 7520, + "success_rate.epoch.env.abd": 0.9841897233201581, + "success_rate.epoch.env.agentgym:alfworld": 0.8703703703703703, + "success_rate.epoch.env.agentgym:sciworld": 0.967741935483871, + "success_rate.epoch.env.agentgym:textcraft": 0.9743589743589743, + "success_rate.epoch.env.babyai": 0.967741935483871, + "success_rate.epoch.env.ded": 0.9516728624535316, + "success_rate.epoch.env.logic": 0.9166666666666666, + "success_rate.epoch.env.math": 0.9736015084852294, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.8290131878126421, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8691233785850287, + "success_rate.epoch.global": 0.9002303739145845, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.0, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 0.75, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9978298611111112, + "tokens_p.mean_in_band": 0.5982142857142857, + "tokens_rate.above_band": 0.976271186440678, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.023728813559322035 + }, + { + "epoch": 1.6031103536429483, + "grad_norm": 64.43560988540021, + "learning_rate": 3.737292834513492e-07, + "loss": 0.3589, + "step": 7525, + "success_rate.epoch.env.abd": 0.9841897233201581, + "success_rate.epoch.env.agentgym:alfworld": 0.8703703703703703, + "success_rate.epoch.env.agentgym:sciworld": 0.967741935483871, + "success_rate.epoch.env.agentgym:textcraft": 0.9743589743589743, + "success_rate.epoch.env.babyai": 0.967741935483871, + "success_rate.epoch.env.ded": 0.9518518518518518, + "success_rate.epoch.env.logic": 0.9167643610785463, + "success_rate.epoch.env.math": 0.9736346516007532, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.8294784580498866, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8691938419634802, + "success_rate.epoch.global": 0.9004068636122413, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9985865290068829, + "tokens_p.mean_in_band": 0.87890625, + "tokens_rate.above_band": 0.9980372914622179, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.001962708537782139 + }, + { + "epoch": 1.604175543246698, + "grad_norm": 308.0946203409165, + "learning_rate": 3.736972051603796e-07, + "loss": 0.2861, + "step": 7530, + "success_rate.epoch.env.abd": 0.9841897233201581, + "success_rate.epoch.env.agentgym:alfworld": 0.8703703703703703, + "success_rate.epoch.env.agentgym:sciworld": 0.967741935483871, + "success_rate.epoch.env.agentgym:textcraft": 0.9743589743589743, + "success_rate.epoch.env.babyai": 0.967741935483871, + "success_rate.epoch.env.ded": 0.9518518518518518, + "success_rate.epoch.env.logic": 0.9169590643274854, + "success_rate.epoch.env.math": 0.9736677115987461, + "success_rate.epoch.env.sat": 0.12121212121212122, + "success_rate.epoch.env.science": 0.8293345405160706, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8688571116839564, + "success_rate.epoch.global": 0.9002295603037259, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 0.75, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9950336185819071, + "tokens_p.mean_in_band": 0.6694078947368421, + "tokens_rate.above_band": 0.955607476635514, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.04439252336448598 + }, + { + "epoch": 1.6052407328504472, + "grad_norm": 43.38084235055817, + "learning_rate": 3.7366511688698527e-07, + "loss": 0.1036, + "step": 7535, + "success_rate.epoch.env.abd": 0.9841897233201581, + "success_rate.epoch.env.agentgym:alfworld": 0.8703703703703703, + "success_rate.epoch.env.agentgym:sciworld": 0.967741935483871, + "success_rate.epoch.env.agentgym:textcraft": 0.9743589743589743, + "success_rate.epoch.env.babyai": 0.967741935483871, + "success_rate.epoch.env.ded": 0.9518518518518518, + "success_rate.epoch.env.logic": 0.9170560747663551, + "success_rate.epoch.env.math": 0.9737171464330413, + "success_rate.epoch.env.sat": 0.12121212121212122, + "success_rate.epoch.env.science": 0.8297968397291197, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.868912452091794, + "success_rate.epoch.global": 0.900405429226159, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9961993243243243, + "tokens_p.mean_in_band": 0.865234375, + "tokens_rate.above_band": 0.9736842105263158, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02631578947368421 + }, + { + "epoch": 1.606305922454197, + "grad_norm": 142.03121451815522, + "learning_rate": 3.73633018654552e-07, + "loss": 0.3655, + "step": 7540, + "success_rate.epoch.env.abd": 0.9841897233201581, + "success_rate.epoch.env.agentgym:alfworld": 0.8703703703703703, + "success_rate.epoch.env.agentgym:sciworld": 0.967741935483871, + "success_rate.epoch.env.agentgym:textcraft": 0.975, + "success_rate.epoch.env.babyai": 0.967741935483871, + "success_rate.epoch.env.ded": 0.9518518518518518, + "success_rate.epoch.env.logic": 0.9171528588098017, + "success_rate.epoch.env.math": 0.97375, + "success_rate.epoch.env.sat": 0.12121212121212122, + "success_rate.epoch.env.science": 0.8293561458802341, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.868942449310207, + "success_rate.epoch.global": 0.9002287524194967, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9957236842105263, + "tokens_p.mean_in_band": 0.5859375, + "tokens_rate.above_band": 0.9313725490196079, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.06862745098039216 + }, + { + "epoch": 1.6073711120579464, + "grad_norm": 281.4151639779487, + "learning_rate": 3.7360091048647265e-07, + "loss": 0.5624, + "step": 7545, + "success_rate.epoch.env.abd": 0.984251968503937, + "success_rate.epoch.env.agentgym:alfworld": 0.8703703703703703, + "success_rate.epoch.env.agentgym:sciworld": 0.9679144385026738, + "success_rate.epoch.env.agentgym:textcraft": 0.975609756097561, + "success_rate.epoch.env.babyai": 0.967741935483871, + "success_rate.epoch.env.ded": 0.9518518518518518, + "success_rate.epoch.env.logic": 0.9152148664343787, + "success_rate.epoch.env.math": 0.9737827715355806, + "success_rate.epoch.env.sat": 0.12121212121212122, + "success_rate.epoch.env.science": 0.828982898289829, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.868812088934743, + "success_rate.epoch.global": 0.8998770419813806, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.0, + "success_rate.window.env_macro_mean": 0.75, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9997176204819277, + "tokens_p.mean_in_band": 0.4446428571428571, + "tokens_rate.above_band": 0.9743213499633162, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.025678650036683785 + }, + { + "epoch": 1.6084363016616958, + "grad_norm": 66.56732194583225, + "learning_rate": 3.735687924061476e-07, + "loss": 0.2741, + "step": 7550, + "success_rate.epoch.env.abd": 0.984251968503937, + "success_rate.epoch.env.agentgym:alfworld": 0.8703703703703703, + "success_rate.epoch.env.agentgym:sciworld": 0.9679144385026738, + "success_rate.epoch.env.agentgym:textcraft": 0.975609756097561, + "success_rate.epoch.env.babyai": 0.967741935483871, + "success_rate.epoch.env.ded": 0.9520295202952029, + "success_rate.epoch.env.logic": 0.9153132250580046, + "success_rate.epoch.env.math": 0.9738805970149254, + "success_rate.epoch.env.sat": 0.12121212121212122, + "success_rate.epoch.env.science": 0.8290598290598291, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8688530692362271, + "success_rate.epoch.global": 0.9000350754121361, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.998926116838488, + "tokens_p.mean_in_band": 0.7044270833333334, + "tokens_rate.above_band": 0.9948717948717949, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.005128205128205128 + }, + { + "epoch": 1.6095014912654453, + "grad_norm": 319.7704207125596, + "learning_rate": 3.73536664436984e-07, + "loss": 0.342, + "step": 7555, + "success_rate.epoch.env.abd": 0.984313725490196, + "success_rate.epoch.env.agentgym:alfworld": 0.8703703703703703, + "success_rate.epoch.env.agentgym:sciworld": 0.9679144385026738, + "success_rate.epoch.env.agentgym:textcraft": 0.975609756097561, + "success_rate.epoch.env.babyai": 0.967741935483871, + "success_rate.epoch.env.ded": 0.9520295202952029, + "success_rate.epoch.env.logic": 0.9154113557358053, + "success_rate.epoch.env.math": 0.9739454094292804, + "success_rate.epoch.env.sat": 0.12121212121212122, + "success_rate.epoch.env.science": 0.8293668612483162, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.868901408533218, + "success_rate.epoch.global": 0.9002100840336135, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9930555555555556, + "tokens_p.mean_in_band": 0.7022569444444444, + "tokens_rate.above_band": 0.9166666666666666, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.08333333333333333 + }, + { + "epoch": 1.6105666808691947, + "grad_norm": 0.0, + "learning_rate": 3.7350452660239666e-07, + "loss": 0.1687, + "step": 7560, + "success_rate.epoch.env.abd": 0.984375, + "success_rate.epoch.env.agentgym:alfworld": 0.8711656441717791, + "success_rate.epoch.env.agentgym:sciworld": 0.9679144385026738, + "success_rate.epoch.env.agentgym:textcraft": 0.975609756097561, + "success_rate.epoch.env.babyai": 0.967741935483871, + "success_rate.epoch.env.ded": 0.9522058823529411, + "success_rate.epoch.env.logic": 0.9155092592592593, + "success_rate.epoch.env.math": 0.9739615623062616, + "success_rate.epoch.env.sat": 0.12121212121212122, + "success_rate.epoch.env.science": 0.8297491039426523, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8690404275753746, + "success_rate.epoch.global": 0.9003844809507165, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9992517605633803, + "tokens_p.mean_in_band": 0.84765625, + "tokens_rate.above_band": 0.9985935302390999, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0014064697609001407 + }, + { + "epoch": 1.6116318704729442, + "grad_norm": 29.96036182473792, + "learning_rate": 3.7347237892580745e-07, + "loss": 0.3907, + "step": 7565, + "success_rate.epoch.env.abd": 0.984375, + "success_rate.epoch.env.agentgym:alfworld": 0.8711656441717791, + "success_rate.epoch.env.agentgym:sciworld": 0.9679144385026738, + "success_rate.epoch.env.agentgym:textcraft": 0.975609756097561, + "success_rate.epoch.env.babyai": 0.967741935483871, + "success_rate.epoch.env.ded": 0.9523809523809523, + "success_rate.epoch.env.logic": 0.9157043879907621, + "success_rate.epoch.env.math": 0.9740420271940667, + "success_rate.epoch.env.sat": 0.12121212121212122, + "success_rate.epoch.env.science": 0.8299015219337511, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8690952531788674, + "success_rate.epoch.global": 0.9005582693649686, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9970703125, + "tokens_p.mean_in_band": 0.74609375, + "tokens_rate.above_band": 0.9696969696969697, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.030303030303030304 + }, + { + "epoch": 1.6126970600766937, + "grad_norm": 114.037020582343, + "learning_rate": 3.7344022143064526e-07, + "loss": 0.2688, + "step": 7570, + "success_rate.epoch.env.abd": 0.984375, + "success_rate.epoch.env.agentgym:alfworld": 0.8719512195121951, + "success_rate.epoch.env.agentgym:sciworld": 0.9679144385026738, + "success_rate.epoch.env.agentgym:textcraft": 0.975609756097561, + "success_rate.epoch.env.babyai": 0.967741935483871, + "success_rate.epoch.env.ded": 0.9523809523809523, + "success_rate.epoch.env.logic": 0.9158016147635525, + "success_rate.epoch.env.math": 0.974090067859346, + "success_rate.epoch.env.sat": 0.12121212121212122, + "success_rate.epoch.env.science": 0.8289414917373827, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8690925997772415, + "success_rate.epoch.global": 0.900208986415883, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.4, + "success_rate.window.env_macro_mean": 0.85, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9957078313253012, + "tokens_p.mean_in_band": 0.6019736842105263, + "tokens_rate.above_band": 0.956221198156682, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.04377880184331797 + }, + { + "epoch": 1.6137622496804431, + "grad_norm": 225.17243415463471, + "learning_rate": 3.734080541403463e-07, + "loss": 0.2769, + "step": 7575, + "success_rate.epoch.env.abd": 0.984375, + "success_rate.epoch.env.agentgym:alfworld": 0.8719512195121951, + "success_rate.epoch.env.agentgym:sciworld": 0.9679144385026738, + "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, + "success_rate.epoch.env.babyai": 0.967741935483871, + "success_rate.epoch.env.ded": 0.9523809523809523, + "success_rate.epoch.env.logic": 0.9158986175115207, + "success_rate.epoch.env.math": 0.974169741697417, + "success_rate.epoch.env.sat": 0.12121212121212122, + "success_rate.epoch.env.science": 0.8291703835860839, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8691822623706645, + "success_rate.epoch.global": 0.9003824756606398, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9978658536585366, + "tokens_p.mean_in_band": 0.771875, + "tokens_rate.above_band": 0.9761904761904762, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.023809523809523808 + }, + { + "epoch": 1.6148274392841926, + "grad_norm": 67.9033884536374, + "learning_rate": 3.7337587707835383e-07, + "loss": 0.1484, + "step": 7580, + "success_rate.epoch.env.abd": 0.9844357976653697, + "success_rate.epoch.env.agentgym:alfworld": 0.8719512195121951, + "success_rate.epoch.env.agentgym:sciworld": 0.9680851063829787, + "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, + "success_rate.epoch.env.babyai": 0.967741935483871, + "success_rate.epoch.env.ded": 0.9523809523809523, + "success_rate.epoch.env.logic": 0.9158986175115207, + "success_rate.epoch.env.math": 0.9742173112338858, + "success_rate.epoch.env.sat": 0.12121212121212122, + "success_rate.epoch.env.science": 0.8295505117935025, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8692421863060793, + "success_rate.epoch.global": 0.9005553627212773, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9972615979381443, + "tokens_p.mean_in_band": 0.709375, + "tokens_rate.above_band": 0.9748743718592965, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02512562814070352 + }, + { + "epoch": 1.615892628887942, + "grad_norm": 77.25770649901607, + "learning_rate": 3.7334369026811825e-07, + "loss": 0.3723, + "step": 7585, + "success_rate.epoch.env.abd": 0.9844961240310077, + "success_rate.epoch.env.agentgym:alfworld": 0.8719512195121951, + "success_rate.epoch.env.agentgym:sciworld": 0.9680851063829787, + "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, + "success_rate.epoch.env.babyai": 0.967741935483871, + "success_rate.epoch.env.ded": 0.9523809523809523, + "success_rate.epoch.env.logic": 0.9160919540229885, + "success_rate.epoch.env.math": 0.9742804654011022, + "success_rate.epoch.env.sat": 0.12121212121212122, + "success_rate.epoch.env.science": 0.8297777777777777, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8692916483995884, + "success_rate.epoch.global": 0.9007276507276507, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9899553571428571, + "tokens_p.mean_in_band": 0.7907366071428571, + "tokens_rate.above_band": 0.9411764705882353, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.058823529411764705 + }, + { + "epoch": 1.6169578184916915, + "grad_norm": 321.43578582718186, + "learning_rate": 3.73311493733097e-07, + "loss": 0.2626, + "step": 7590, + "success_rate.epoch.env.abd": 0.9845559845559846, + "success_rate.epoch.env.agentgym:alfworld": 0.8719512195121951, + "success_rate.epoch.env.agentgym:sciworld": 0.9680851063829787, + "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, + "success_rate.epoch.env.babyai": 0.967741935483871, + "success_rate.epoch.env.ded": 0.9523809523809523, + "success_rate.epoch.env.logic": 0.9152348224513173, + "success_rate.epoch.env.math": 0.9737324373854612, + "success_rate.epoch.env.sat": 0.12121212121212122, + "success_rate.epoch.env.science": 0.8299289520426287, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.869183091599817, + "success_rate.epoch.global": 0.9005534417156693, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 0.6666666666666666, + "success_rate.window.env.math": 0.75, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8541666666666666, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 1.0009995791245792, + "tokens_p.mean_in_band": 0.45590277777777777, + "tokens_rate.above_band": 0.9295774647887324, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.07042253521126761 + }, + { + "epoch": 1.618023008095441, + "grad_norm": 80.81922636871866, + "learning_rate": 3.7327928749675494e-07, + "loss": 0.2884, + "step": 7595, + "success_rate.epoch.env.abd": 0.9845559845559846, + "success_rate.epoch.env.agentgym:alfworld": 0.8719512195121951, + "success_rate.epoch.env.agentgym:sciworld": 0.9680851063829787, + "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, + "success_rate.epoch.env.babyai": 0.967741935483871, + "success_rate.epoch.env.ded": 0.9523809523809523, + "success_rate.epoch.env.logic": 0.9152348224513173, + "success_rate.epoch.env.math": 0.973780487804878, + "success_rate.epoch.env.sat": 0.12121212121212122, + "success_rate.epoch.env.science": 0.8300132802124834, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8691951260170234, + "success_rate.epoch.global": 0.9005524861878453, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8571428571428571, + "success_rate.window.env_macro_mean": 0.9285714285714286, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9895833333333334, + "tokens_p.mean_in_band": 0.6744791666666666, + "tokens_rate.above_band": 0.9166666666666666, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.08333333333333333 + }, + { + "epoch": 1.6190881976991904, + "grad_norm": 102.82476991420386, + "learning_rate": 3.732470715825635e-07, + "loss": 0.2456, + "step": 7600, + "success_rate.epoch.env.abd": 0.9846153846153847, + "success_rate.epoch.env.agentgym:alfworld": 0.8719512195121951, + "success_rate.epoch.env.agentgym:sciworld": 0.9680851063829787, + "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, + "success_rate.epoch.env.babyai": 0.967741935483871, + "success_rate.epoch.env.ded": 0.9525547445255474, + "success_rate.epoch.env.logic": 0.9155251141552512, + "success_rate.epoch.env.math": 0.973780487804878, + "success_rate.epoch.env.sat": 0.12121212121212122, + "success_rate.epoch.env.science": 0.8303886925795053, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.869276843860201, + "success_rate.epoch.global": 0.9007238883143743, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9959914921465969, + "tokens_p.mean_in_band": 0.6702008928571429, + "tokens_rate.above_band": 0.9646464646464646, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03535353535353535 + }, + { + "epoch": 1.62015338730294, + "grad_norm": 180.67783720273331, + "learning_rate": 3.732148460140015e-07, + "loss": 0.2127, + "step": 7605, + "success_rate.epoch.env.abd": 0.9846153846153847, + "success_rate.epoch.env.agentgym:alfworld": 0.8719512195121951, + "success_rate.epoch.env.agentgym:sciworld": 0.9682539682539683, + "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, + "success_rate.epoch.env.babyai": 0.967741935483871, + "success_rate.epoch.env.ded": 0.9525547445255474, + "success_rate.epoch.env.logic": 0.9157175398633257, + "success_rate.epoch.env.math": 0.9732360097323601, + "success_rate.epoch.env.sat": 0.12121212121212122, + "success_rate.epoch.env.science": 0.8301720335244817, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8692404939012484, + "success_rate.epoch.global": 0.9005505849965588, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.75, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.8541666666666666, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9931506849315068, + "tokens_p.mean_in_band": 0.7346354166666667, + "tokens_rate.above_band": 0.906832298136646, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.09316770186335403 + }, + { + "epoch": 1.6212185769066894, + "grad_norm": 40.51645321487268, + "learning_rate": 3.7318261081455464e-07, + "loss": 0.2116, + "step": 7610, + "success_rate.epoch.env.abd": 0.9847908745247148, + "success_rate.epoch.env.agentgym:alfworld": 0.8719512195121951, + "success_rate.epoch.env.agentgym:sciworld": 0.9682539682539683, + "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, + "success_rate.epoch.env.babyai": 0.967741935483871, + "success_rate.epoch.env.ded": 0.9525547445255474, + "success_rate.epoch.env.logic": 0.9158134243458476, + "success_rate.epoch.env.math": 0.9732522796352584, + "success_rate.epoch.env.sat": 0.12121212121212122, + "success_rate.epoch.env.science": 0.8305457746478874, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8693006198483534, + "success_rate.epoch.global": 0.900721401580213, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.998686974789916, + "tokens_p.mean_in_band": 0.80078125, + "tokens_rate.above_band": 0.967479674796748, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.032520325203252036 + }, + { + "epoch": 1.6222837665104388, + "grad_norm": 62.28192926403548, + "learning_rate": 3.731503660077158e-07, + "loss": 0.2969, + "step": 7615, + "success_rate.epoch.env.abd": 0.9847908745247148, + "success_rate.epoch.env.agentgym:alfworld": 0.8719512195121951, + "success_rate.epoch.env.agentgym:sciworld": 0.968421052631579, + "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, + "success_rate.epoch.env.babyai": 0.967741935483871, + "success_rate.epoch.env.ded": 0.9528985507246377, + "success_rate.epoch.env.logic": 0.9159090909090909, + "success_rate.epoch.env.math": 0.9733171619163129, + "success_rate.epoch.env.sat": 0.12121212121212122, + "success_rate.epoch.env.science": 0.8306948109058927, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8693752085464449, + "success_rate.epoch.global": 0.9008916323731139, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9986694868995634, + "tokens_p.mean_in_band": 0.84765625, + "tokens_rate.above_band": 0.9989094874591058, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0010905125408942203 + }, + { + "epoch": 1.6233489561141883, + "grad_norm": 156.81930557595388, + "learning_rate": 3.731181116169847e-07, + "loss": 0.2337, + "step": 7620, + "success_rate.epoch.env.abd": 0.9849056603773585, + "success_rate.epoch.env.agentgym:alfworld": 0.8719512195121951, + "success_rate.epoch.env.agentgym:sciworld": 0.968421052631579, + "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, + "success_rate.epoch.env.babyai": 0.967741935483871, + "success_rate.epoch.env.ded": 0.9494584837545126, + "success_rate.epoch.env.logic": 0.9160045402951191, + "success_rate.epoch.env.math": 0.9733494851605088, + "success_rate.epoch.env.sat": 0.12121212121212122, + "success_rate.epoch.env.science": 0.8305531167690957, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8690716446715308, + "success_rate.epoch.global": 0.9007189318726464, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 0.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.75, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9934225512528474, + "tokens_p.mean_in_band": 0.6259072580645161, + "tokens_rate.above_band": 0.9340425531914893, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.06595744680851064 + }, + { + "epoch": 1.6244141457179377, + "grad_norm": 690.799236158285, + "learning_rate": 3.7308584766586815e-07, + "loss": 0.2974, + "step": 7625, + "success_rate.epoch.env.abd": 0.9850187265917603, + "success_rate.epoch.env.agentgym:alfworld": 0.8719512195121951, + "success_rate.epoch.env.agentgym:sciworld": 0.968421052631579, + "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, + "success_rate.epoch.env.babyai": 0.967741935483871, + "success_rate.epoch.env.ded": 0.9494584837545126, + "success_rate.epoch.env.logic": 0.9160997732426304, + "success_rate.epoch.env.math": 0.9734138972809667, + "success_rate.epoch.env.sat": 0.12121212121212122, + "success_rate.epoch.env.science": 0.8307759754493643, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8691166964863161, + "success_rate.epoch.global": 0.9008885850991114, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9976380813953488, + "tokens_p.mean_in_band": 0.7975260416666666, + "tokens_rate.above_band": 0.9662921348314607, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.033707865168539325 + }, + { + "epoch": 1.6254793353216872, + "grad_norm": 153.06846157304767, + "learning_rate": 3.7305357417787985e-07, + "loss": 0.2226, + "step": 7630, + "success_rate.epoch.env.abd": 0.9851851851851852, + "success_rate.epoch.env.agentgym:alfworld": 0.8674698795180723, + "success_rate.epoch.env.agentgym:sciworld": 0.968421052631579, + "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, + "success_rate.epoch.env.babyai": 0.967741935483871, + "success_rate.epoch.env.ded": 0.9494584837545126, + "success_rate.epoch.env.logic": 0.9160997732426304, + "success_rate.epoch.env.math": 0.9734779987944545, + "success_rate.epoch.env.sat": 0.12121212121212122, + "success_rate.epoch.env.science": 0.8308501314636284, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8687370034069576, + "success_rate.epoch.global": 0.9008870692596384, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9974904397705545, + "tokens_p.mean_in_band": 0.654296875, + "tokens_rate.above_band": 0.9942965779467681, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.005703422053231939 + }, + { + "epoch": 1.6265445249254369, + "grad_norm": 45.31779472198913, + "learning_rate": 3.7302129117654047e-07, + "loss": 0.2428, + "step": 7635, + "success_rate.epoch.env.abd": 0.9851851851851852, + "success_rate.epoch.env.agentgym:alfworld": 0.8682634730538922, + "success_rate.epoch.env.agentgym:sciworld": 0.968421052631579, + "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, + "success_rate.epoch.env.babyai": 0.967741935483871, + "success_rate.epoch.env.ded": 0.9494584837545126, + "success_rate.epoch.env.logic": 0.9160997732426304, + "success_rate.epoch.env.math": 0.9729241877256317, + "success_rate.epoch.env.sat": 0.12121212121212122, + "success_rate.epoch.env.science": 0.8312937062937062, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8687991267976006, + "success_rate.epoch.global": 0.9008855585831063, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.math": 0.6666666666666666, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8888888888888888, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.997327302631579, + "tokens_p.mean_in_band": 0.7664930555555556, + "tokens_rate.above_band": 0.9712460063897763, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02875399361022364 + }, + { + "epoch": 1.6276097145291861, + "grad_norm": 100.9174475086996, + "learning_rate": 3.729889986853777e-07, + "loss": 0.2554, + "step": 7640, + "success_rate.epoch.env.abd": 0.9851851851851852, + "success_rate.epoch.env.agentgym:alfworld": 0.8682634730538922, + "success_rate.epoch.env.agentgym:sciworld": 0.9685863874345549, + "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, + "success_rate.epoch.env.babyai": 0.967741935483871, + "success_rate.epoch.env.ded": 0.9496402877697842, + "success_rate.epoch.env.logic": 0.9161947904869762, + "success_rate.epoch.env.math": 0.9729567307692307, + "success_rate.epoch.env.sat": 0.12121212121212122, + "success_rate.epoch.env.science": 0.8312254688181422, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8688360778549302, + "success_rate.epoch.global": 0.9008840530431826, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.96, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9984038978494624, + "tokens_p.mean_in_band": 0.4518229166666667, + "tokens_rate.above_band": 0.9841269841269841, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.015873015873015872 + }, + { + "epoch": 1.6286749041329358, + "grad_norm": 367.9406803345278, + "learning_rate": 3.72956696727926e-07, + "loss": 0.2885, + "step": 7645, + "success_rate.epoch.env.abd": 0.9851851851851852, + "success_rate.epoch.env.agentgym:alfworld": 0.8682634730538922, + "success_rate.epoch.env.agentgym:sciworld": 0.96875, + "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, + "success_rate.epoch.env.babyai": 0.967741935483871, + "success_rate.epoch.env.ded": 0.9496402877697842, + "success_rate.epoch.env.logic": 0.9161947904869762, + "success_rate.epoch.env.math": 0.9730215827338129, + "success_rate.epoch.env.sat": 0.12121212121212122, + "success_rate.epoch.env.science": 0.8315926892950392, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.868890231037378, + "success_rate.epoch.global": 0.9010522742701969, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9916213768115942, + "tokens_p.mean_in_band": 0.7005208333333334, + "tokens_rate.above_band": 0.9787234042553191, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02127659574468085 + }, + { + "epoch": 1.629740093736685, + "grad_norm": 231.5486625392195, + "learning_rate": 3.729243853277268e-07, + "loss": 0.4174, + "step": 7650, + "success_rate.epoch.env.abd": 0.9851851851851852, + "success_rate.epoch.env.agentgym:alfworld": 0.8682634730538922, + "success_rate.epoch.env.agentgym:sciworld": 0.96875, + "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, + "success_rate.epoch.env.babyai": 0.96875, + "success_rate.epoch.env.ded": 0.9496402877697842, + "success_rate.epoch.env.logic": 0.9153498871331829, + "success_rate.epoch.env.math": 0.9730215827338129, + "success_rate.epoch.env.sat": 0.12121212121212122, + "success_rate.epoch.env.science": 0.8315972222222222, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.868905475954607, + "success_rate.epoch.global": 0.9008810572687225, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.logic": 0.6666666666666666, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env_macro_mean": 0.8333333333333334, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9932565789473684, + "tokens_p.mean_in_band": 0.65625, + "tokens_rate.above_band": 0.8715596330275229, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.12844036697247707 + }, + { + "epoch": 1.6308052833404347, + "grad_norm": 150.55881028102985, + "learning_rate": 3.728920645083285e-07, + "loss": 0.1494, + "step": 7655, + "success_rate.epoch.env.abd": 0.9851851851851852, + "success_rate.epoch.env.agentgym:alfworld": 0.8682634730538922, + "success_rate.epoch.env.agentgym:sciworld": 0.96875, + "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, + "success_rate.epoch.env.babyai": 0.96875, + "success_rate.epoch.env.ded": 0.9498207885304659, + "success_rate.epoch.env.logic": 0.9153498871331829, + "success_rate.epoch.env.math": 0.9730700179533214, + "success_rate.epoch.env.sat": 0.12121212121212122, + "success_rate.epoch.env.science": 0.8316017316017316, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8689266982600343, + "success_rate.epoch.global": 0.9008795669824087, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env_macro_mean": 0.9444444444444445, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9941821808510638, + "tokens_p.mean_in_band": 0.610546875, + "tokens_rate.above_band": 0.9740932642487047, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.025906735751295335 + }, + { + "epoch": 1.631870472944184, + "grad_norm": 148.96900970374492, + "learning_rate": 3.728597342932862e-07, + "loss": 0.3319, + "step": 7660, + "success_rate.epoch.env.abd": 0.9851851851851852, + "success_rate.epoch.env.agentgym:alfworld": 0.8682634730538922, + "success_rate.epoch.env.agentgym:sciworld": 0.9689119170984456, + "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, + "success_rate.epoch.env.babyai": 0.96875, + "success_rate.epoch.env.ded": 0.9498207885304659, + "success_rate.epoch.env.logic": 0.9153498871331829, + "success_rate.epoch.env.math": 0.9731182795698925, + "success_rate.epoch.env.sat": 0.12121212121212122, + "success_rate.epoch.env.science": 0.8320379965457686, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8689854658654027, + "success_rate.epoch.global": 0.901046943600135, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.994758064516129, + "tokens_p.mean_in_band": 0.605078125, + "tokens_rate.above_band": 0.96875, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03125 + }, + { + "epoch": 1.6329356625479337, + "grad_norm": 162.08260057598574, + "learning_rate": 3.72827394706162e-07, + "loss": 0.4404, + "step": 7665, + "success_rate.epoch.env.abd": 0.985239852398524, + "success_rate.epoch.env.agentgym:alfworld": 0.8682634730538922, + "success_rate.epoch.env.agentgym:sciworld": 0.9689119170984456, + "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, + "success_rate.epoch.env.babyai": 0.96875, + "success_rate.epoch.env.ded": 0.9498207885304659, + "success_rate.epoch.env.logic": 0.915445321307779, + "success_rate.epoch.env.math": 0.9731503579952268, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.8319689788884101, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8686716567533407, + "success_rate.epoch.global": 0.9008766014834795, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.76, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9959935897435898, + "tokens_p.mean_in_band": 0.6919921875, + "tokens_rate.above_band": 0.9122807017543859, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.08771929824561403 + }, + { + "epoch": 1.634000852151683, + "grad_norm": 342.6591660322463, + "learning_rate": 3.7279504577052467e-07, + "loss": 0.2027, + "step": 7670, + "success_rate.epoch.env.abd": 0.985239852398524, + "success_rate.epoch.env.agentgym:alfworld": 0.8690476190476191, + "success_rate.epoch.env.agentgym:sciworld": 0.9689119170984456, + "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, + "success_rate.epoch.env.babyai": 0.96875, + "success_rate.epoch.env.ded": 0.9498207885304659, + "success_rate.epoch.env.logic": 0.9146067415730337, + "success_rate.epoch.env.math": 0.9732142857142857, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.8321136461472234, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8686856714112366, + "success_rate.epoch.global": 0.9008751262201279, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 0.6666666666666666, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 1.0009722222222222, + "tokens_p.mean_in_band": 0.4822048611111111, + "tokens_rate.above_band": 0.9615384615384616, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.038461538461538464 + }, + { + "epoch": 1.6350660417554326, + "grad_norm": 81.46516230391956, + "learning_rate": 3.727626875099499e-07, + "loss": 0.1922, + "step": 7675, + "success_rate.epoch.env.abd": 0.985239852398524, + "success_rate.epoch.env.agentgym:alfworld": 0.8698224852071006, + "success_rate.epoch.env.agentgym:sciworld": 0.9689119170984456, + "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, + "success_rate.epoch.env.babyai": 0.96875, + "success_rate.epoch.env.ded": 0.9498207885304659, + "success_rate.epoch.env.logic": 0.9147025813692481, + "success_rate.epoch.env.math": 0.9732461355529132, + "success_rate.epoch.env.sat": 0.11428571428571428, + "success_rate.epoch.env.science": 0.8324742268041238, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8684949252215465, + "success_rate.epoch.global": 0.9008736559139785, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9963490099009901, + "tokens_p.mean_in_band": 0.6461397058823529, + "tokens_rate.above_band": 0.9674329501915708, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.032567049808429116 + }, + { + "epoch": 1.6361312313591818, + "grad_norm": 92.70969757869626, + "learning_rate": 3.727303199480203e-07, + "loss": 0.4116, + "step": 7680, + "success_rate.epoch.env.abd": 0.985239852398524, + "success_rate.epoch.env.agentgym:alfworld": 0.8705882352941177, + "success_rate.epoch.env.agentgym:sciworld": 0.9689119170984456, + "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, + "success_rate.epoch.env.babyai": 0.96875, + "success_rate.epoch.env.ded": 0.95, + "success_rate.epoch.env.logic": 0.9147025813692481, + "success_rate.epoch.env.math": 0.9732620320855615, + "success_rate.epoch.env.sat": 0.11428571428571428, + "success_rate.epoch.env.science": 0.8316916488222698, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8685111325040324, + "success_rate.epoch.global": 0.9005367326400536, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5714285714285714, + "success_rate.window.env_macro_mean": 0.8928571428571428, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9967640532544378, + "tokens_p.mean_in_band": 0.412109375, + "tokens_rate.above_band": 0.9811320754716981, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.018867924528301886 + }, + { + "epoch": 1.6371964209629315, + "grad_norm": 196.54618252657176, + "learning_rate": 3.7269794310832487e-07, + "loss": 0.3494, + "step": 7685, + "success_rate.epoch.env.abd": 0.985239852398524, + "success_rate.epoch.env.agentgym:alfworld": 0.8705882352941177, + "success_rate.epoch.env.agentgym:sciworld": 0.9689119170984456, + "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, + "success_rate.epoch.env.babyai": 0.96875, + "success_rate.epoch.env.ded": 0.9501779359430605, + "success_rate.epoch.env.logic": 0.9149888143176734, + "success_rate.epoch.env.math": 0.9733096085409253, + "success_rate.epoch.env.sat": 0.11428571428571428, + "success_rate.epoch.env.science": 0.8319076133447391, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8685772879466978, + "success_rate.epoch.global": 0.9007032819825854, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.997125, + "tokens_p.mean_in_band": 0.55078125, + "tokens_rate.above_band": 0.998003992015968, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.001996007984031936 + }, + { + "epoch": 1.6382616105666807, + "grad_norm": 65.03341026716015, + "learning_rate": 3.726655570144599e-07, + "loss": 0.2486, + "step": 7690, + "success_rate.epoch.env.abd": 0.985239852398524, + "success_rate.epoch.env.agentgym:alfworld": 0.8705882352941177, + "success_rate.epoch.env.agentgym:sciworld": 0.9689119170984456, + "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, + "success_rate.epoch.env.babyai": 0.9696969696969697, + "success_rate.epoch.env.ded": 0.950530035335689, + "success_rate.epoch.env.logic": 0.9140625, + "success_rate.epoch.env.math": 0.9733412322274881, + "success_rate.epoch.env.sat": 0.11428571428571428, + "success_rate.epoch.env.science": 0.8316958564715934, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8685947989999108, + "success_rate.epoch.global": 0.9005349381477766, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.8333333333333333, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9961412535079514, + "tokens_p.mean_in_band": 0.5925071022727273, + "tokens_rate.above_band": 0.9798350137488543, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02016498625114574 + }, + { + "epoch": 1.6393268001704304, + "grad_norm": 103.85645798224259, + "learning_rate": 3.7263316169002793e-07, + "loss": 0.2125, + "step": 7695, + "success_rate.epoch.env.abd": 0.985239852398524, + "success_rate.epoch.env.agentgym:alfworld": 0.8713450292397661, + "success_rate.epoch.env.agentgym:sciworld": 0.9689119170984456, + "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, + "success_rate.epoch.env.babyai": 0.9696969696969697, + "success_rate.epoch.env.ded": 0.9507042253521126, + "success_rate.epoch.env.logic": 0.9141583054626533, + "success_rate.epoch.env.math": 0.9733570159857904, + "success_rate.epoch.env.sat": 0.1388888888888889, + "success_rate.epoch.env.science": 0.8320545609548167, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8709588401153131, + "success_rate.epoch.global": 0.9007009345794392, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9983407079646017, + "tokens_p.mean_in_band": 0.8033854166666666, + "tokens_rate.above_band": 0.9947183098591549, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.00528169014084507 + }, + { + "epoch": 1.6403919897741797, + "grad_norm": 62.121610755628375, + "learning_rate": 3.726007571586385e-07, + "loss": 0.1938, + "step": 7700, + "success_rate.epoch.env.abd": 0.9852941176470589, + "success_rate.epoch.env.agentgym:alfworld": 0.872093023255814, + "success_rate.epoch.env.agentgym:sciworld": 0.9689119170984456, + "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, + "success_rate.epoch.env.babyai": 0.9696969696969697, + "success_rate.epoch.env.ded": 0.9508771929824561, + "success_rate.epoch.env.logic": 0.914349276974416, + "success_rate.epoch.env.math": 0.9733885274985216, + "success_rate.epoch.env.sat": 0.1388888888888889, + "success_rate.epoch.env.science": 0.8322690506598552, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8710872218993547, + "success_rate.epoch.global": 0.900866377874042, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9998265815760267, + "tokens_p.mean_in_band": 0.734375, + "tokens_rate.above_band": 0.9988913525498891, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0011086474501108647 + }, + { + "epoch": 1.6414571793779293, + "grad_norm": 275.0158453149495, + "learning_rate": 3.7256834344390776e-07, + "loss": 0.2968, + "step": 7705, + "success_rate.epoch.env.abd": 0.9853479853479854, + "success_rate.epoch.env.agentgym:alfworld": 0.872093023255814, + "success_rate.epoch.env.agentgym:sciworld": 0.9689119170984456, + "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, + "success_rate.epoch.env.babyai": 0.9696969696969697, + "success_rate.epoch.env.ded": 0.9508771929824561, + "success_rate.epoch.env.logic": 0.9147286821705426, + "success_rate.epoch.env.math": 0.973435655253837, + "success_rate.epoch.env.sat": 0.1388888888888889, + "success_rate.epoch.env.science": 0.8324117396852403, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.871143866415514, + "success_rate.epoch.global": 0.9010312707917498, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9982930672268907, + "tokens_p.mean_in_band": 0.8515625, + "tokens_rate.above_band": 0.9916666666666667, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.008333333333333333 + }, + { + "epoch": 1.6425223689816786, + "grad_norm": 71.57081103326922, + "learning_rate": 3.725359205694587e-07, + "loss": 0.3018, + "step": 7710, + "success_rate.epoch.env.abd": 0.9854014598540146, + "success_rate.epoch.env.agentgym:alfworld": 0.872093023255814, + "success_rate.epoch.env.agentgym:sciworld": 0.9690721649484536, + "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, + "success_rate.epoch.env.babyai": 0.9696969696969697, + "success_rate.epoch.env.ded": 0.9508771929824561, + "success_rate.epoch.env.logic": 0.9150110375275938, + "success_rate.epoch.env.math": 0.972877358490566, + "success_rate.epoch.env.sat": 0.1388888888888889, + "success_rate.epoch.env.science": 0.832625318606627, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8711576264038056, + "success_rate.epoch.global": 0.9010295582862836, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.5, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9967277486910995, + "tokens_p.mean_in_band": 0.68115234375, + "tokens_rate.above_band": 0.9794871794871794, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.020512820512820513 + }, + { + "epoch": 1.6435875585854283, + "grad_norm": 95.39845993364986, + "learning_rate": 3.725034885589208e-07, + "loss": 0.318, + "step": 7715, + "success_rate.epoch.env.abd": 0.9854014598540146, + "success_rate.epoch.env.agentgym:alfworld": 0.872093023255814, + "success_rate.epoch.env.agentgym:sciworld": 0.9690721649484536, + "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, + "success_rate.epoch.env.babyai": 0.9696969696969697, + "success_rate.epoch.env.ded": 0.951048951048951, + "success_rate.epoch.env.logic": 0.9151047409040793, + "success_rate.epoch.env.math": 0.9729252501471454, + "success_rate.epoch.env.sat": 0.1388888888888889, + "success_rate.epoch.env.science": 0.8329800763035184, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8712183637489375, + "success_rate.epoch.global": 0.9011936339522546, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9903980446927374, + "tokens_p.mean_in_band": 0.869140625, + "tokens_rate.above_band": 0.988950276243094, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.011049723756906077 + }, + { + "epoch": 1.6446527481891777, + "grad_norm": 329.82682850415637, + "learning_rate": 3.7247104743593026e-07, + "loss": 0.3167, + "step": 7720, + "success_rate.epoch.env.abd": 0.9854014598540146, + "success_rate.epoch.env.agentgym:alfworld": 0.872093023255814, + "success_rate.epoch.env.agentgym:sciworld": 0.9692307692307692, + "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, + "success_rate.epoch.env.babyai": 0.9696969696969697, + "success_rate.epoch.env.ded": 0.9512195121951219, + "success_rate.epoch.env.logic": 0.9152915291529153, + "success_rate.epoch.env.math": 0.9729570840681951, + "success_rate.epoch.env.sat": 0.1388888888888889, + "success_rate.epoch.env.science": 0.8328396106644097, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8712553930179613, + "success_rate.epoch.global": 0.9011916583912611, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9992129629629629, + "tokens_p.mean_in_band": 0.6227678571428571, + "tokens_rate.above_band": 0.9897360703812317, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.010263929618768328 + }, + { + "epoch": 1.6457179377929272, + "grad_norm": 18.034470361736645, + "learning_rate": 3.7243859722413e-07, + "loss": 0.0787, + "step": 7725, + "success_rate.epoch.env.abd": 0.9855072463768116, + "success_rate.epoch.env.agentgym:alfworld": 0.872093023255814, + "success_rate.epoch.env.agentgym:sciworld": 0.9693877551020408, + "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, + "success_rate.epoch.env.babyai": 0.9696969696969697, + "success_rate.epoch.env.ded": 0.9512195121951219, + "success_rate.epoch.env.logic": 0.9153846153846154, + "success_rate.epoch.env.math": 0.9730363423212193, + "success_rate.epoch.env.sat": 0.1388888888888889, + "success_rate.epoch.env.science": 0.8329103214890017, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8713013773546326, + "success_rate.epoch.global": 0.9013549239920687, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9964689265536724, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 1.6467831273966766, + "grad_norm": 119.38978016918082, + "learning_rate": 3.7240613794716945e-07, + "loss": 0.4119, + "step": 7730, + "success_rate.epoch.env.abd": 0.9855072463768116, + "success_rate.epoch.env.agentgym:alfworld": 0.872093023255814, + "success_rate.epoch.env.agentgym:sciworld": 0.9693877551020408, + "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, + "success_rate.epoch.env.babyai": 0.9696969696969697, + "success_rate.epoch.env.ded": 0.9512195121951219, + "success_rate.epoch.env.logic": 0.9155701754385965, + "success_rate.epoch.env.math": 0.9730679156908665, + "success_rate.epoch.env.sat": 0.1388888888888889, + "success_rate.epoch.env.science": 0.8333333333333334, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8713595723789925, + "success_rate.epoch.global": 0.9015176509402837, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9910239361702128, + "tokens_p.mean_in_band": 0.6832682291666666, + "tokens_rate.above_band": 0.8867924528301887, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.11320754716981132 + }, + { + "epoch": 1.647848317000426, + "grad_norm": 187.38384494249783, + "learning_rate": 3.723736696287047e-07, + "loss": 0.2842, + "step": 7735, + "success_rate.epoch.env.abd": 0.9855072463768116, + "success_rate.epoch.env.agentgym:alfworld": 0.8670520231213873, + "success_rate.epoch.env.agentgym:sciworld": 0.9693877551020408, + "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, + "success_rate.epoch.env.babyai": 0.9696969696969697, + "success_rate.epoch.env.ded": 0.9512195121951219, + "success_rate.epoch.env.logic": 0.9145673603504929, + "success_rate.epoch.env.math": 0.973115137346581, + "success_rate.epoch.env.sat": 0.1388888888888889, + "success_rate.epoch.env.science": 0.8336842105263158, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.870846325435917, + "success_rate.epoch.global": 0.9013504611330698, + "success_rate.window.env.agentgym:alfworld": 0.0, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.5, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9979801829268292, + "tokens_p.mean_in_band": 0.564453125, + "tokens_rate.above_band": 0.9613130128956624, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.038686987104337635 + }, + { + "epoch": 1.6489135066041756, + "grad_norm": 56.74886303063964, + "learning_rate": 3.723411922923985e-07, + "loss": 0.2978, + "step": 7740, + "success_rate.epoch.env.abd": 0.9855072463768116, + "success_rate.epoch.env.agentgym:alfworld": 0.8670520231213873, + "success_rate.epoch.env.agentgym:sciworld": 0.9693877551020408, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9696969696969697, + "success_rate.epoch.env.ded": 0.9512195121951219, + "success_rate.epoch.env.logic": 0.9136612021857924, + "success_rate.epoch.env.math": 0.9732090856144437, + "success_rate.epoch.env.sat": 0.1388888888888889, + "success_rate.epoch.env.science": 0.8337542087542088, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8708291889074705, + "success_rate.epoch.global": 0.9013482407102926, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9962518740629686, + "tokens_p.mean_in_band": 0.4126953125, + "tokens_rate.above_band": 0.9569583931133429, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.043041606886657105 + }, + { + "epoch": 1.649978696207925, + "grad_norm": 394.70224775920286, + "learning_rate": 3.7230870596192e-07, + "loss": 0.332, + "step": 7745, + "success_rate.epoch.env.abd": 0.9855072463768116, + "success_rate.epoch.env.agentgym:alfworld": 0.8670520231213873, + "success_rate.epoch.env.agentgym:sciworld": 0.9693877551020408, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9696969696969697, + "success_rate.epoch.env.ded": 0.9512195121951219, + "success_rate.epoch.env.logic": 0.9136612021857924, + "success_rate.epoch.env.math": 0.9726902963393376, + "success_rate.epoch.env.sat": 0.1388888888888889, + "success_rate.epoch.env.science": 0.8337531486146096, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8707819298697701, + "success_rate.epoch.global": 0.9011818778726198, + "success_rate.window.env.math": 0.75, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env_macro_mean": 0.7916666666666667, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9930862831858407, + "tokens_p.mean_in_band": 0.455078125, + "tokens_rate.above_band": 0.904, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.096 + }, + { + "epoch": 1.6510438858116745, + "grad_norm": 334.73305344806573, + "learning_rate": 3.7227621066094506e-07, + "loss": 0.3101, + "step": 7750, + "success_rate.epoch.env.abd": 0.9855072463768116, + "success_rate.epoch.env.agentgym:alfworld": 0.867816091954023, + "success_rate.epoch.env.agentgym:sciworld": 0.9695431472081218, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9696969696969697, + "success_rate.epoch.env.ded": 0.9512195121951219, + "success_rate.epoch.env.logic": 0.9138495092693566, + "success_rate.epoch.env.math": 0.9727061556329849, + "success_rate.epoch.env.sat": 0.1388888888888889, + "success_rate.epoch.env.science": 0.8336127409891031, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8708713134779903, + "success_rate.epoch.global": 0.9011799410029498, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 0.9583333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9991359447004609, + "tokens_p.mean_in_band": 0.6497802734375, + "tokens_rate.above_band": 0.9938931297709923, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0061068702290076335 + }, + { + "epoch": 1.652109075415424, + "grad_norm": 276.7578517848306, + "learning_rate": 3.72243706413156e-07, + "loss": 0.175, + "step": 7755, + "success_rate.epoch.env.abd": 0.9855072463768116, + "success_rate.epoch.env.agentgym:alfworld": 0.8693181818181818, + "success_rate.epoch.env.agentgym:sciworld": 0.9696969696969697, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9696969696969697, + "success_rate.epoch.env.ded": 0.9512195121951219, + "success_rate.epoch.env.logic": 0.9138495092693566, + "success_rate.epoch.env.math": 0.9727694090382387, + "success_rate.epoch.env.sat": 0.1388888888888889, + "success_rate.epoch.env.science": 0.833821682712432, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8710465959763166, + "success_rate.epoch.global": 0.9013416230366492, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9991508152173914, + "tokens_p.mean_in_band": 0.65234375, + "tokens_rate.above_band": 0.9986431478968792, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0013568521031207597 + }, + { + "epoch": 1.6531742650191734, + "grad_norm": 115.57766909779146, + "learning_rate": 3.7221119324224174e-07, + "loss": 0.339, + "step": 7760, + "success_rate.epoch.env.abd": 0.9855072463768116, + "success_rate.epoch.env.agentgym:alfworld": 0.8693181818181818, + "success_rate.epoch.env.agentgym:sciworld": 0.9698492462311558, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9696969696969697, + "success_rate.epoch.env.ded": 0.9512195121951219, + "success_rate.epoch.env.logic": 0.9131378935939196, + "success_rate.epoch.env.math": 0.9722382880277617, + "success_rate.epoch.env.sat": 0.1388888888888889, + "success_rate.epoch.env.science": 0.8339606859054789, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8709600998891638, + "success_rate.epoch.global": 0.9011760862463247, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 0.75, + "success_rate.window.env.math": 0.6666666666666666, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8541666666666666, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9966755319148937, + "tokens_p.mean_in_band": 0.5173611111111112, + "tokens_rate.above_band": 0.9543147208121827, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.04568527918781726 + }, + { + "epoch": 1.6542394546229229, + "grad_norm": 67.82084549987316, + "learning_rate": 3.7217867117189754e-07, + "loss": 0.3183, + "step": 7765, + "success_rate.epoch.env.abd": 0.9855072463768116, + "success_rate.epoch.env.agentgym:alfworld": 0.8700564971751412, + "success_rate.epoch.env.agentgym:sciworld": 0.9698492462311558, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9696969696969697, + "success_rate.epoch.env.ded": 0.9513888888888888, + "success_rate.epoch.env.logic": 0.913232104121475, + "success_rate.epoch.env.math": 0.9722382880277617, + "success_rate.epoch.env.sat": 0.1388888888888889, + "success_rate.epoch.env.science": 0.833959115561118, + "success_rate.epoch.env.webshop": 0.96875, + "success_rate.epoch.env_macro_mean": 0.8682101300922476, + "success_rate.epoch.global": 0.9010110893672537, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env.webshop": 0.0, + "success_rate.window.env_macro_mean": 0.7666666666666667, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.996662265258216, + "tokens_p.mean_in_band": 0.6405222039473685, + "tokens_rate.above_band": 0.9573033707865168, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.04269662921348315 + }, + { + "epoch": 1.6553046442266723, + "grad_norm": 63.4554497851929, + "learning_rate": 3.721461402258253e-07, + "loss": 0.2909, + "step": 7770, + "success_rate.epoch.env.abd": 0.9855595667870036, + "success_rate.epoch.env.agentgym:alfworld": 0.8707865168539326, + "success_rate.epoch.env.agentgym:sciworld": 0.9698492462311558, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9696969696969697, + "success_rate.epoch.env.ded": 0.9513888888888888, + "success_rate.epoch.env.logic": 0.9133261105092091, + "success_rate.epoch.env.math": 0.9722703639514731, + "success_rate.epoch.env.sat": 0.13513513513513514, + "success_rate.epoch.env.science": 0.8338192419825073, + "success_rate.epoch.env.webshop": 0.96875, + "success_rate.epoch.env_macro_mean": 0.8679387478257078, + "success_rate.epoch.global": 0.9008466297622925, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.7916666666666666, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9981711195928753, + "tokens_p.mean_in_band": 0.6474609375, + "tokens_rate.above_band": 0.960880195599022, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.039119804400977995 + }, + { + "epoch": 1.6563698338304218, + "grad_norm": 96.70753254800114, + "learning_rate": 3.721136004277334e-07, + "loss": 0.2652, + "step": 7775, + "success_rate.epoch.env.abd": 0.9856115107913669, + "success_rate.epoch.env.agentgym:alfworld": 0.8707865168539326, + "success_rate.epoch.env.agentgym:sciworld": 0.9698492462311558, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9696969696969697, + "success_rate.epoch.env.ded": 0.9513888888888888, + "success_rate.epoch.env.logic": 0.9136069114470843, + "success_rate.epoch.env.math": 0.972318339100346, + "success_rate.epoch.env.sat": 0.13513513513513514, + "success_rate.epoch.env.science": 0.8336106489184693, + "success_rate.epoch.env.webshop": 0.96875, + "success_rate.epoch.env_macro_mean": 0.8679543957372601, + "success_rate.epoch.global": 0.9008452535760728, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9969429347826086, + "tokens_p.mean_in_band": 0.26953125, + "tokens_rate.above_band": 0.968421052631579, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.031578947368421054 + }, + { + "epoch": 1.6574350234341713, + "grad_norm": 104.1371359488132, + "learning_rate": 3.7208105180133656e-07, + "loss": 0.295, + "step": 7780, + "success_rate.epoch.env.abd": 0.9856115107913669, + "success_rate.epoch.env.agentgym:alfworld": 0.8707865168539326, + "success_rate.epoch.env.agentgym:sciworld": 0.9698492462311558, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9696969696969697, + "success_rate.epoch.env.ded": 0.9515570934256056, + "success_rate.epoch.env.logic": 0.9137001078748651, + "success_rate.epoch.env.math": 0.9723820483314154, + "success_rate.epoch.env.sat": 0.13513513513513514, + "success_rate.epoch.env.science": 0.8338870431893688, + "success_rate.epoch.env.webshop": 0.96875, + "success_rate.epoch.env_macro_mean": 0.8680090779614843, + "success_rate.epoch.global": 0.9010061668289516, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9963942307692307, + "tokens_p.mean_in_band": 0.755859375, + "tokens_rate.above_band": 0.9811320754716981, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.018867924528301886 + }, + { + "epoch": 1.6585002130379207, + "grad_norm": 45.06907586218594, + "learning_rate": 3.7204849437035593e-07, + "loss": 0.4137, + "step": 7785, + "success_rate.epoch.env.abd": 0.9856115107913669, + "success_rate.epoch.env.agentgym:alfworld": 0.8715083798882681, + "success_rate.epoch.env.agentgym:sciworld": 0.9698492462311558, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9696969696969697, + "success_rate.epoch.env.ded": 0.9515570934256056, + "success_rate.epoch.env.logic": 0.9138858988159311, + "success_rate.epoch.env.math": 0.9724296381390006, + "success_rate.epoch.env.sat": 0.13513513513513514, + "success_rate.epoch.env.science": 0.8341625207296849, + "success_rate.epoch.env.webshop": 0.96875, + "success_rate.epoch.env_macro_mean": 0.8681209617181481, + "success_rate.epoch.global": 0.9011665586519767, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9956550802139037, + "tokens_p.mean_in_band": 0.88671875, + "tokens_rate.above_band": 0.9894179894179894, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.010582010582010581 + }, + { + "epoch": 1.6595654026416702, + "grad_norm": 1030.1108191459152, + "learning_rate": 3.720159281585192e-07, + "loss": 0.2991, + "step": 7790, + "success_rate.epoch.env.abd": 0.9856115107913669, + "success_rate.epoch.env.agentgym:alfworld": 0.8715083798882681, + "success_rate.epoch.env.agentgym:sciworld": 0.97, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9696969696969697, + "success_rate.epoch.env.ded": 0.9515570934256056, + "success_rate.epoch.env.logic": 0.9139784946236559, + "success_rate.epoch.env.math": 0.971919770773639, + "success_rate.epoch.env.sat": 0.13513513513513514, + "success_rate.epoch.env.science": 0.8340231788079471, + "success_rate.epoch.env.webshop": 0.96875, + "success_rate.epoch.env_macro_mean": 0.868084065380827, + "success_rate.epoch.global": 0.9010029116790682, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.75, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9973591549295775, + "tokens_p.mean_in_band": 0.611328125, + "tokens_rate.above_band": 0.9466666666666667, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.05333333333333334 + }, + { + "epoch": 1.6606305922454196, + "grad_norm": 142.16290615659017, + "learning_rate": 3.7198335318956043e-07, + "loss": 0.2497, + "step": 7795, + "success_rate.epoch.env.abd": 0.9856115107913669, + "success_rate.epoch.env.agentgym:alfworld": 0.8722222222222222, + "success_rate.epoch.env.agentgym:sciworld": 0.97, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9696969696969697, + "success_rate.epoch.env.ded": 0.9515570934256056, + "success_rate.epoch.env.logic": 0.9139784946236559, + "success_rate.epoch.env.math": 0.9714448886350657, + "success_rate.epoch.env.sat": 0.13513513513513514, + "success_rate.epoch.env.science": 0.8342290202563043, + "success_rate.epoch.env.webshop": 0.96875, + "success_rate.epoch.env_macro_mean": 0.8681245018938945, + "success_rate.epoch.global": 0.901001291989664, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.math": 0.8333333333333334, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9444444444444445, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9967105263157895, + "tokens_p.mean_in_band": 0.6663411458333334, + "tokens_rate.above_band": 0.9858490566037735, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.014150943396226415 + }, + { + "epoch": 1.661695781849169, + "grad_norm": 97.74415695452171, + "learning_rate": 3.7195076948721994e-07, + "loss": 0.5769, + "step": 7800, + "success_rate.epoch.env.abd": 0.985663082437276, + "success_rate.epoch.env.agentgym:alfworld": 0.8722222222222222, + "success_rate.epoch.env.agentgym:sciworld": 0.97, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9696969696969697, + "success_rate.epoch.env.ded": 0.9517241379310345, + "success_rate.epoch.env.logic": 0.9130901287553648, + "success_rate.epoch.env.math": 0.9714611872146118, + "success_rate.epoch.env.sat": 0.13513513513513514, + "success_rate.epoch.env.science": 0.8345709570957096, + "success_rate.epoch.env.webshop": 0.96875, + "success_rate.epoch.env_macro_mean": 0.8680961824122579, + "success_rate.epoch.global": 0.9009996775233795, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9962789415656009, + "tokens_p.mean_in_band": 0.5932291666666667, + "tokens_rate.above_band": 0.967982924226254, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.032017075773746 + }, + { + "epoch": 1.6627609714529186, + "grad_norm": 232.95073519629534, + "learning_rate": 3.719181770752445e-07, + "loss": 0.191, + "step": 7805, + "success_rate.epoch.env.abd": 0.9857142857142858, + "success_rate.epoch.env.agentgym:alfworld": 0.8722222222222222, + "success_rate.epoch.env.agentgym:sciworld": 0.97, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9696969696969697, + "success_rate.epoch.env.ded": 0.9517241379310345, + "success_rate.epoch.env.logic": 0.9132762312633833, + "success_rate.epoch.env.math": 0.9715261958997722, + "success_rate.epoch.env.sat": 0.13513513513513514, + "success_rate.epoch.env.science": 0.8343634116192831, + "success_rate.epoch.env.webshop": 0.96875, + "success_rate.epoch.env_macro_mean": 0.8681047977753269, + "success_rate.epoch.global": 0.9009980682549903, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9959239130434783, + "tokens_p.mean_in_band": 0.4125, + "tokens_rate.above_band": 0.9484536082474226, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.05154639175257732 + }, + { + "epoch": 1.6638261610566683, + "grad_norm": 143.5885360203726, + "learning_rate": 3.7188557597738726e-07, + "loss": 0.2483, + "step": 7810, + "success_rate.epoch.env.abd": 0.9857142857142858, + "success_rate.epoch.env.agentgym:alfworld": 0.8729281767955801, + "success_rate.epoch.env.agentgym:sciworld": 0.97, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9696969696969697, + "success_rate.epoch.env.ded": 0.9518900343642611, + "success_rate.epoch.env.logic": 0.9134615384615384, + "success_rate.epoch.env.math": 0.9715909090909091, + "success_rate.epoch.env.sat": 0.13513513513513514, + "success_rate.epoch.env.science": 0.8344997941539728, + "success_rate.epoch.env.webshop": 0.96875, + "success_rate.epoch.env_macro_mean": 0.8682191844962877, + "success_rate.epoch.global": 0.9011571841851495, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9992260061919505, + "tokens_p.mean_in_band": 0.8138020833333334, + "tokens_rate.above_band": 0.9953775038520801, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.004622496147919877 + }, + { + "epoch": 1.6648913506604175, + "grad_norm": 104.78913253365752, + "learning_rate": 3.718529662174077e-07, + "loss": 0.1087, + "step": 7815, + "success_rate.epoch.env.abd": 0.9857651245551602, + "success_rate.epoch.env.agentgym:alfworld": 0.8729281767955801, + "success_rate.epoch.env.agentgym:sciworld": 0.97, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9696969696969697, + "success_rate.epoch.env.ded": 0.952054794520548, + "success_rate.epoch.env.logic": 0.9124866595517609, + "success_rate.epoch.env.math": 0.9716231555051078, + "success_rate.epoch.env.sat": 0.13513513513513514, + "success_rate.epoch.env.science": 0.8348397699260477, + "success_rate.epoch.env.webshop": 0.96875, + "success_rate.epoch.env_macro_mean": 0.8681839974302562, + "success_rate.epoch.global": 0.9011553273427471, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9990706319702602, + "tokens_p.mean_in_band": 0.6966145833333334, + "tokens_rate.above_band": 0.9889705882352942, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.011029411764705883 + }, + { + "epoch": 1.6659565402641672, + "grad_norm": 109.81806305288673, + "learning_rate": 3.7182034781907153e-07, + "loss": 0.3105, + "step": 7820, + "success_rate.epoch.env.abd": 0.9857651245551602, + "success_rate.epoch.env.agentgym:alfworld": 0.8736263736263736, + "success_rate.epoch.env.agentgym:sciworld": 0.97, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9696969696969697, + "success_rate.epoch.env.ded": 0.9522184300341296, + "success_rate.epoch.env.logic": 0.9116080937167199, + "success_rate.epoch.env.math": 0.9716713881019831, + "success_rate.epoch.env.sat": 0.13513513513513514, + "success_rate.epoch.env.science": 0.8350430857611818, + "success_rate.epoch.env.webshop": 0.96875, + "success_rate.epoch.env_macro_mean": 0.8682053442431059, + "success_rate.epoch.global": 0.9011534764498558, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9961106115107914, + "tokens_p.mean_in_band": 0.7075892857142857, + "tokens_rate.above_band": 0.99002849002849, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.009971509971509971 + }, + { + "epoch": 1.6670217298679164, + "grad_norm": 174.628180767647, + "learning_rate": 3.717877208061508e-07, + "loss": 0.1941, + "step": 7825, + "success_rate.epoch.env.abd": 0.9857651245551602, + "success_rate.epoch.env.agentgym:alfworld": 0.8743169398907104, + "success_rate.epoch.env.agentgym:sciworld": 0.9701492537313433, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9705882352941176, + "success_rate.epoch.env.ded": 0.9522184300341296, + "success_rate.epoch.env.logic": 0.9116080937167199, + "success_rate.epoch.env.math": 0.9717194570135747, + "success_rate.epoch.env.sat": 0.13513513513513514, + "success_rate.epoch.env.science": 0.834903727980336, + "success_rate.epoch.env.webshop": 0.96875, + "success_rate.epoch.env_macro_mean": 0.8683544166725216, + "success_rate.epoch.global": 0.9011516314779271, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9991020114942529, + "tokens_p.mean_below_band": 4.1443854570388794e-08, + "tokens_rate.above_band": 0.997134670487106, + "tokens_rate.below_band": 0.0028653295128939827, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 1.668086919471666, + "grad_norm": 135.3204835471677, + "learning_rate": 3.7175508520242383e-07, + "loss": 0.4867, + "step": 7830, + "success_rate.epoch.env.abd": 0.9857651245551602, + "success_rate.epoch.env.agentgym:alfworld": 0.8743169398907104, + "success_rate.epoch.env.agentgym:sciworld": 0.9702970297029703, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9705882352941176, + "success_rate.epoch.env.ded": 0.9522184300341296, + "success_rate.epoch.env.logic": 0.9116080937167199, + "success_rate.epoch.env.math": 0.9717992103778906, + "success_rate.epoch.env.sat": 0.13513513513513514, + "success_rate.epoch.env.science": 0.8343558282208589, + "success_rate.epoch.env.webshop": 0.96875, + "success_rate.epoch.env_macro_mean": 0.8683252920885639, + "success_rate.epoch.global": 0.900990099009901, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.8333333333333334, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9955357142857143, + "tokens_p.mean_in_band": 0.5114182692307693, + "tokens_rate.above_band": 0.91875, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.08125 + }, + { + "epoch": 1.6691521090754153, + "grad_norm": 52.76944006340344, + "learning_rate": 3.717224410316753e-07, + "loss": 0.3032, + "step": 7835, + "success_rate.epoch.env.abd": 0.9857651245551602, + "success_rate.epoch.env.agentgym:alfworld": 0.875, + "success_rate.epoch.env.agentgym:sciworld": 0.9702970297029703, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9705882352941176, + "success_rate.epoch.env.ded": 0.9522184300341296, + "success_rate.epoch.env.logic": 0.9117021276595745, + "success_rate.epoch.env.math": 0.9719101123595506, + "success_rate.epoch.env.sat": 0.13513513513513514, + "success_rate.epoch.env.science": 0.8344235486508585, + "success_rate.epoch.env.webshop": 0.96875, + "success_rate.epoch.env_macro_mean": 0.8684121754034553, + "success_rate.epoch.global": 0.9011479591836735, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9989795918367347, + "tokens_p.mean_in_band": 0.60546875, + "tokens_rate.above_band": 0.9919028340080972, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.008097165991902834 + }, + { + "epoch": 1.670217298679165, + "grad_norm": 103.25510566671726, + "learning_rate": 3.7168978831769595e-07, + "loss": 0.1262, + "step": 7840, + "success_rate.epoch.env.abd": 0.9857651245551602, + "success_rate.epoch.env.agentgym:alfworld": 0.875, + "success_rate.epoch.env.agentgym:sciworld": 0.9702970297029703, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9705882352941176, + "success_rate.epoch.env.ded": 0.9522184300341296, + "success_rate.epoch.env.logic": 0.9117959617428267, + "success_rate.epoch.env.math": 0.9719730941704036, + "success_rate.epoch.env.sat": 0.13513513513513514, + "success_rate.epoch.env.science": 0.8343533251733987, + "success_rate.epoch.env.webshop": 0.96875, + "success_rate.epoch.env_macro_mean": 0.868420047441332, + "success_rate.epoch.global": 0.9011461318051576, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9916424418604651, + "tokens_p.mean_in_band": 0.470703125, + "tokens_rate.above_band": 0.9148936170212766, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0851063829787234 + }, + { + "epoch": 1.6712824882829143, + "grad_norm": 76.9395039492631, + "learning_rate": 3.716571270842828e-07, + "loss": 0.2373, + "step": 7845, + "success_rate.epoch.env.abd": 0.9857651245551602, + "success_rate.epoch.env.agentgym:alfworld": 0.875, + "success_rate.epoch.env.agentgym:sciworld": 0.9704433497536946, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9705882352941176, + "success_rate.epoch.env.ded": 0.9523809523809523, + "success_rate.epoch.env.logic": 0.9110169491525424, + "success_rate.epoch.env.math": 0.9719887955182073, + "success_rate.epoch.env.sat": 0.13157894736842105, + "success_rate.epoch.env.science": 0.8345558272208639, + "success_rate.epoch.env.webshop": 0.96875, + "success_rate.epoch.env_macro_mean": 0.868073851571861, + "success_rate.epoch.global": 0.9009853782581055, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.6666666666666666, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.7777777777777777, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9977678571428571, + "tokens_p.mean_in_band": 0.5089142628205128, + "tokens_rate.above_band": 0.965938864628821, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03406113537117904 + }, + { + "epoch": 1.672347677886664, + "grad_norm": 57.945199572414275, + "learning_rate": 3.7162445735523933e-07, + "loss": 0.2576, + "step": 7850, + "success_rate.epoch.env.abd": 0.9858156028368794, + "success_rate.epoch.env.agentgym:alfworld": 0.875, + "success_rate.epoch.env.agentgym:sciworld": 0.9704433497536946, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9428571428571428, + "success_rate.epoch.env.ded": 0.9525423728813559, + "success_rate.epoch.env.logic": 0.9111111111111111, + "success_rate.epoch.env.math": 0.9720357941834452, + "success_rate.epoch.env.sat": 0.13157894736842105, + "success_rate.epoch.env.science": 0.8347578347578347, + "success_rate.epoch.env.webshop": 0.96875, + "success_rate.epoch.env_macro_mean": 0.8656033037996725, + "success_rate.epoch.global": 0.900983814662012, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.babyai": 0.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8333333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9967512376237624, + "tokens_p.mean_in_band": 0.76220703125, + "tokens_rate.above_band": 0.9901960784313726, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.00980392156862745 + }, + { + "epoch": 1.6734128674904132, + "grad_norm": 84.09873849250378, + "learning_rate": 3.715917791543748e-07, + "loss": 0.3459, + "step": 7855, + "success_rate.epoch.env.abd": 0.9858156028368794, + "success_rate.epoch.env.agentgym:alfworld": 0.875, + "success_rate.epoch.env.agentgym:sciworld": 0.9704433497536946, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9428571428571428, + "success_rate.epoch.env.ded": 0.9525423728813559, + "success_rate.epoch.env.logic": 0.9113924050632911, + "success_rate.epoch.env.math": 0.9720826353992184, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.8345528455284553, + "success_rate.epoch.env.webshop": 0.96875, + "success_rate.epoch.env_macro_mean": 0.8653077880519706, + "success_rate.epoch.global": 0.9008238276299113, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.6666666666666666, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9949324324324325, + "tokens_p.mean_in_band": 0.6822916666666666, + "tokens_rate.above_band": 0.8457142857142858, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.15428571428571428 + }, + { + "epoch": 1.6744780570941629, + "grad_norm": 84.71408702788854, + "learning_rate": 3.7155909250550494e-07, + "loss": 0.2583, + "step": 7860, + "success_rate.epoch.env.abd": 0.9858657243816255, + "success_rate.epoch.env.agentgym:alfworld": 0.8763440860215054, + "success_rate.epoch.env.agentgym:sciworld": 0.9705882352941176, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9428571428571428, + "success_rate.epoch.env.ded": 0.9525423728813559, + "success_rate.epoch.env.logic": 0.9104320337197049, + "success_rate.epoch.env.math": 0.9720982142857143, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.8344155844155844, + "success_rate.epoch.env.webshop": 0.96875, + "success_rate.epoch.env_macro_mean": 0.8653493371007628, + "success_rate.epoch.global": 0.9006643467257197, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.7916666666666666, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9980292792792793, + "tokens_p.mean_in_band": 0.6193576388888888, + "tokens_rate.above_band": 0.9866666666666667, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.013333333333333334 + }, + { + "epoch": 1.675543246697912, + "grad_norm": 75.08572573751955, + "learning_rate": 3.7152639743245156e-07, + "loss": 0.4514, + "step": 7865, + "success_rate.epoch.env.abd": 0.9859154929577465, + "success_rate.epoch.env.agentgym:alfworld": 0.8770053475935828, + "success_rate.epoch.env.agentgym:sciworld": 0.9705882352941176, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9444444444444444, + "success_rate.epoch.env.ded": 0.9525423728813559, + "success_rate.epoch.env.logic": 0.9095688748685594, + "success_rate.epoch.env.math": 0.9720982142857143, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.8343458890238963, + "success_rate.epoch.env.webshop": 0.96875, + "success_rate.epoch.env_macro_mean": 0.8654734714182779, + "success_rate.epoch.global": 0.9005053695514845, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.86, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9974023929471033, + "tokens_p.mean_in_band": 0.65875, + "tokens_rate.above_band": 0.9407582938388626, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.05924170616113744 + }, + { + "epoch": 1.6766084363016618, + "grad_norm": 42.78193198434463, + "learning_rate": 3.714936939590425e-07, + "loss": 0.1738, + "step": 7870, + "success_rate.epoch.env.abd": 0.9859649122807017, + "success_rate.epoch.env.agentgym:alfworld": 0.8776595744680851, + "success_rate.epoch.env.agentgym:sciworld": 0.9705882352941176, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9444444444444444, + "success_rate.epoch.env.ded": 0.9525423728813559, + "success_rate.epoch.env.logic": 0.9097586568730325, + "success_rate.epoch.env.math": 0.9721448467966574, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.8337378640776699, + "success_rate.epoch.env.webshop": 0.96875, + "success_rate.epoch.env_macro_mean": 0.8655036564879732, + "success_rate.epoch.global": 0.9003468937243772, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.3333333333333333, + "success_rate.window.env_macro_mean": 0.8666666666666666, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.997995283018868, + "tokens_p.mean_in_band": 0.7364783653846154, + "tokens_rate.above_band": 0.9532374100719424, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.046762589928057555 + }, + { + "epoch": 1.677673625905411, + "grad_norm": 76.61180309856198, + "learning_rate": 3.714609821091119e-07, + "loss": 0.2391, + "step": 7875, + "success_rate.epoch.env.abd": 0.986013986013986, + "success_rate.epoch.env.agentgym:alfworld": 0.8776595744680851, + "success_rate.epoch.env.agentgym:sciworld": 0.9707317073170731, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9444444444444444, + "success_rate.epoch.env.ded": 0.9525423728813559, + "success_rate.epoch.env.logic": 0.909853249475891, + "success_rate.epoch.env.math": 0.9721913236929922, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.8336025848142165, + "success_rate.epoch.env.webshop": 0.96875, + "success_rate.epoch.env_macro_mean": 0.8655216870326985, + "success_rate.epoch.global": 0.9003463476070529, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9996846619576185, + "tokens_p.mean_in_band": 0.6624348958333334, + "tokens_rate.above_band": 0.9969818913480886, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0030181086519114686 + }, + { + "epoch": 1.6787388155091607, + "grad_norm": 63.07588982984957, + "learning_rate": 3.7142826190649993e-07, + "loss": 0.4093, + "step": 7880, + "success_rate.epoch.env.abd": 0.986013986013986, + "success_rate.epoch.env.agentgym:alfworld": 0.8789473684210526, + "success_rate.epoch.env.agentgym:sciworld": 0.970873786407767, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9444444444444444, + "success_rate.epoch.env.ded": 0.9525423728813559, + "success_rate.epoch.env.logic": 0.909853249475891, + "success_rate.epoch.env.math": 0.9722222222222222, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.8338709677419355, + "success_rate.epoch.env.webshop": 0.96875, + "success_rate.epoch.env_macro_mean": 0.8656788828963904, + "success_rate.epoch.global": 0.9004873447571137, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9986702127659575, + "tokens_p.mean_in_band": 0.8203125, + "tokens_rate.above_band": 0.9939577039274925, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.006042296072507553 + }, + { + "epoch": 1.67980400511291, + "grad_norm": 93.0766110963747, + "learning_rate": 3.713955333750528e-07, + "loss": 0.2856, + "step": 7885, + "success_rate.epoch.env.abd": 0.986013986013986, + "success_rate.epoch.env.agentgym:alfworld": 0.8789473684210526, + "success_rate.epoch.env.agentgym:sciworld": 0.9710144927536232, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9444444444444444, + "success_rate.epoch.env.ded": 0.9527027027027027, + "success_rate.epoch.env.logic": 0.909853249475891, + "success_rate.epoch.env.math": 0.9722376457523598, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.833869670152856, + "success_rate.epoch.env.webshop": 0.9696969696969697, + "success_rate.epoch.env_macro_mean": 0.8657936221514113, + "success_rate.epoch.global": 0.9004865798147857, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 0.9666666666666668, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9993898186889819, + "tokens_p.mean_in_band": 0.6865234375, + "tokens_rate.above_band": 0.9972183588317107, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0027816411682892906 + }, + { + "epoch": 1.6808691947166596, + "grad_norm": 190.316150312791, + "learning_rate": 3.7136279653862284e-07, + "loss": 0.2751, + "step": 7890, + "success_rate.epoch.env.abd": 0.986013986013986, + "success_rate.epoch.env.agentgym:alfworld": 0.8795811518324608, + "success_rate.epoch.env.agentgym:sciworld": 0.9710144927536232, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9444444444444444, + "success_rate.epoch.env.ded": 0.9527027027027027, + "success_rate.epoch.env.logic": 0.910135841170324, + "success_rate.epoch.env.math": 0.9722376457523598, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.833868378812199, + "success_rate.epoch.env.webshop": 0.9696969696969697, + "success_rate.epoch.env_macro_mean": 0.8658768115846098, + "success_rate.epoch.global": 0.9004858172700204, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env_macro_mean": 0.9444444444444445, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.99755859375, + "tokens_p.mean_in_band": 0.6453993055555556, + "tokens_rate.above_band": 0.9660377358490566, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.033962264150943396 + }, + { + "epoch": 1.6819343843204089, + "grad_norm": 131.79297529971197, + "learning_rate": 3.713300514210684e-07, + "loss": 0.2255, + "step": 7895, + "success_rate.epoch.env.abd": 0.986013986013986, + "success_rate.epoch.env.agentgym:alfworld": 0.8795811518324608, + "success_rate.epoch.env.agentgym:sciworld": 0.9710144927536232, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9444444444444444, + "success_rate.epoch.env.ded": 0.9530201342281879, + "success_rate.epoch.env.logic": 0.9102296450939458, + "success_rate.epoch.env.math": 0.9722530521642619, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.8342010412494995, + "success_rate.epoch.env.webshop": 0.9696969696969697, + "success_rate.epoch.env_macro_mean": 0.8659458392480925, + "success_rate.epoch.global": 0.9006259780907668, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9996260683760684, + "tokens_p.mean_in_band": 0.7464488636363636, + "tokens_rate.above_band": 0.9906858594411516, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.009314140558848433 + }, + { + "epoch": 1.6829995739241586, + "grad_norm": 431.9338913833118, + "learning_rate": 3.712972980462539e-07, + "loss": 0.4689, + "step": 7900, + "success_rate.epoch.env.abd": 0.9860627177700348, + "success_rate.epoch.env.agentgym:alfworld": 0.8802083333333334, + "success_rate.epoch.env.agentgym:sciworld": 0.9710144927536232, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9444444444444444, + "success_rate.epoch.env.ded": 0.9530201342281879, + "success_rate.epoch.env.logic": 0.9102296450939458, + "success_rate.epoch.env.math": 0.9723145071982281, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.8336665333866453, + "success_rate.epoch.env.webshop": 0.9696969696969697, + "success_rate.epoch.env_macro_mean": 0.8659642811051864, + "success_rate.epoch.global": 0.90046875, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.995788409703504, + "tokens_p.mean_in_band": 0.540625, + "tokens_rate.above_band": 0.9867021276595744, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.013297872340425532 + }, + { + "epoch": 1.684064763527908, + "grad_norm": 112.17255266011806, + "learning_rate": 3.712645364380498e-07, + "loss": 0.214, + "step": 7905, + "success_rate.epoch.env.abd": 0.9860627177700348, + "success_rate.epoch.env.agentgym:alfworld": 0.8808290155440415, + "success_rate.epoch.env.agentgym:sciworld": 0.9710144927536232, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9444444444444444, + "success_rate.epoch.env.ded": 0.9531772575250836, + "success_rate.epoch.env.logic": 0.9094693028095734, + "success_rate.epoch.env.math": 0.9723604201216142, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.8337994406711946, + "success_rate.epoch.env.webshop": 0.9696969696969697, + "success_rate.epoch.env_macro_mean": 0.8659821250534745, + "success_rate.epoch.global": 0.9004680187207488, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.6666666666666666, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9981060606060606, + "tokens_p.mean_in_band": 0.49107142857142855, + "tokens_rate.above_band": 0.9889589905362776, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.011041009463722398 + }, + { + "epoch": 1.6851299531316575, + "grad_norm": 126.85344346925532, + "learning_rate": 3.7123176662033244e-07, + "loss": 0.4201, + "step": 7910, + "success_rate.epoch.env.abd": 0.9860627177700348, + "success_rate.epoch.env.agentgym:alfworld": 0.8808290155440415, + "success_rate.epoch.env.agentgym:sciworld": 0.9710144927536232, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9444444444444444, + "success_rate.epoch.env.ded": 0.9531772575250836, + "success_rate.epoch.env.logic": 0.9095634095634095, + "success_rate.epoch.env.math": 0.9723909442297074, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.8326026305300916, + "success_rate.epoch.env.webshop": 0.9705882352941176, + "success_rate.epoch.env_macro_mean": 0.8659656783551085, + "success_rate.epoch.global": 0.9, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.3333333333333333, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 0.8333333333333333, + "success_rate.window.global": 0.6, + "tokens_p.mean_above_band": 0.9980742296918768, + "tokens_p.mean_in_band": 0.5420386904761905, + "tokens_rate.above_band": 0.9444444444444444, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.05555555555555555 + }, + { + "epoch": 1.686195142735407, + "grad_norm": 144.61063935745233, + "learning_rate": 3.7119898861698433e-07, + "loss": 0.2579, + "step": 7915, + "success_rate.epoch.env.abd": 0.9861111111111112, + "success_rate.epoch.env.agentgym:alfworld": 0.8814432989690721, + "success_rate.epoch.env.agentgym:sciworld": 0.9663461538461539, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9444444444444444, + "success_rate.epoch.env.ded": 0.9531772575250836, + "success_rate.epoch.env.logic": 0.9098445595854923, + "success_rate.epoch.env.math": 0.9724214009928296, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.8327359617682198, + "success_rate.epoch.env.webshop": 0.9705882352941176, + "success_rate.epoch.env_macro_mean": 0.8656419761625603, + "success_rate.epoch.global": 0.9, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 0.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8333333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9979748255234298, + "tokens_p.mean_in_band": 0.441650390625, + "tokens_rate.above_band": 0.9980099502487563, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.001990049751243781 + }, + { + "epoch": 1.6872603323391564, + "grad_norm": 345.36578703149934, + "learning_rate": 3.711662024518937e-07, + "loss": 0.3189, + "step": 7920, + "success_rate.epoch.env.abd": 0.9861111111111112, + "success_rate.epoch.env.agentgym:alfworld": 0.8814432989690721, + "success_rate.epoch.env.agentgym:sciworld": 0.9665071770334929, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9444444444444444, + "success_rate.epoch.env.ded": 0.9531772575250836, + "success_rate.epoch.env.logic": 0.9099378881987578, + "success_rate.epoch.env.math": 0.9724517906336089, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.8330683624801272, + "success_rate.epoch.env.webshop": 0.9714285714285714, + "success_rate.epoch.env_macro_mean": 0.865774474188719, + "success_rate.epoch.global": 0.9001552795031056, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9987836826347305, + "tokens_p.mean_in_band": 0.8125, + "tokens_rate.above_band": 0.9940476190476191, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.005952380952380952 + }, + { + "epoch": 1.6883255219429059, + "grad_norm": 363.8245621848165, + "learning_rate": 3.711334081489551e-07, + "loss": 0.1861, + "step": 7925, + "success_rate.epoch.env.abd": 0.986159169550173, + "success_rate.epoch.env.agentgym:alfworld": 0.8814432989690721, + "success_rate.epoch.env.agentgym:sciworld": 0.9666666666666667, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9444444444444444, + "success_rate.epoch.env.ded": 0.9533333333333334, + "success_rate.epoch.env.logic": 0.9099378881987578, + "success_rate.epoch.env.math": 0.9724972497249725, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.8333333333333334, + "success_rate.epoch.env.webshop": 0.9714285714285714, + "success_rate.epoch.env_macro_mean": 0.8658357518091786, + "success_rate.epoch.global": 0.9003100775193799, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9987458193979933, + "tokens_p.mean_in_band": 0.740625, + "tokens_rate.above_band": 0.9917081260364843, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.008291873963515755 + }, + { + "epoch": 1.6893907115466553, + "grad_norm": 654.546833984525, + "learning_rate": 3.711006057320686e-07, + "loss": 0.5475, + "step": 7930, + "success_rate.epoch.env.abd": 0.986159169550173, + "success_rate.epoch.env.agentgym:alfworld": 0.8814432989690721, + "success_rate.epoch.env.agentgym:sciworld": 0.9666666666666667, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9444444444444444, + "success_rate.epoch.env.ded": 0.9533333333333334, + "success_rate.epoch.env.logic": 0.9100310237849017, + "success_rate.epoch.env.math": 0.9725576289791438, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.8332673267326732, + "success_rate.epoch.env.webshop": 0.9714285714285714, + "success_rate.epoch.env_macro_mean": 0.8658437071036927, + "success_rate.epoch.global": 0.9003095975232198, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9912014563106796, + "tokens_p.mean_in_band": 0.675, + "tokens_rate.above_band": 0.9537037037037037, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.046296296296296294 + }, + { + "epoch": 1.6904559011504048, + "grad_norm": 32.725839028054885, + "learning_rate": 3.710677952251404e-07, + "loss": 0.1983, + "step": 7935, + "success_rate.epoch.env.abd": 0.986159169550173, + "success_rate.epoch.env.agentgym:alfworld": 0.8814432989690721, + "success_rate.epoch.env.agentgym:sciworld": 0.9666666666666667, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9444444444444444, + "success_rate.epoch.env.ded": 0.9533333333333334, + "success_rate.epoch.env.logic": 0.9102167182662538, + "success_rate.epoch.env.math": 0.9726027397260274, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.8328063241106719, + "success_rate.epoch.env.webshop": 0.9714285714285714, + "success_rate.epoch.env_macro_mean": 0.8658227800678958, + "success_rate.epoch.global": 0.9001545595054096, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6, + "success_rate.window.env_macro_mean": 0.8666666666666667, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9951636904761905, + "tokens_p.mean_in_band": 0.69296875, + "tokens_rate.above_band": 0.8936170212765957, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.10638297872340426 + }, + { + "epoch": 1.6915210907541542, + "grad_norm": 134.6269161042348, + "learning_rate": 3.7103497665208255e-07, + "loss": 0.1893, + "step": 7940, + "success_rate.epoch.env.abd": 0.9862068965517241, + "success_rate.epoch.env.agentgym:alfworld": 0.8814432989690721, + "success_rate.epoch.env.agentgym:sciworld": 0.966824644549763, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9444444444444444, + "success_rate.epoch.env.ded": 0.9501661129568106, + "success_rate.epoch.env.logic": 0.9104938271604939, + "success_rate.epoch.env.math": 0.9726327312534209, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.8329383886255924, + "success_rate.epoch.env.webshop": 0.9714285714285714, + "success_rate.epoch.env_macro_mean": 0.8655934754719575, + "success_rate.epoch.global": 0.9001543209876544, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 0.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8333333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9968370445344129, + "tokens_p.mean_in_band": 0.3046875, + "tokens_rate.above_band": 0.9959677419354839, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.004032258064516129 + }, + { + "epoch": 1.6925862803579037, + "grad_norm": 148.10737098421555, + "learning_rate": 3.7100215003681305e-07, + "loss": 0.2395, + "step": 7945, + "success_rate.epoch.env.abd": 0.9862068965517241, + "success_rate.epoch.env.agentgym:alfworld": 0.8814432989690721, + "success_rate.epoch.env.agentgym:sciworld": 0.9669811320754716, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9459459459459459, + "success_rate.epoch.env.ded": 0.9503311258278145, + "success_rate.epoch.env.logic": 0.9105858170606372, + "success_rate.epoch.env.math": 0.9726626571897211, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.832807570977918, + "success_rate.epoch.env.webshop": 0.9714285714285714, + "success_rate.epoch.env_macro_mean": 0.8657583936616832, + "success_rate.epoch.global": 0.9001540832049306, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9583333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9980636833046471, + "tokens_p.mean_in_band": 0.6337890625, + "tokens_rate.above_band": 0.9797639123102867, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02023608768971332 + }, + { + "epoch": 1.6936514699616532, + "grad_norm": 95.46044006950885, + "learning_rate": 3.709693154032557e-07, + "loss": 0.4849, + "step": 7950, + "success_rate.epoch.env.abd": 0.9862068965517241, + "success_rate.epoch.env.agentgym:alfworld": 0.882051282051282, + "success_rate.epoch.env.agentgym:sciworld": 0.9669811320754716, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9459459459459459, + "success_rate.epoch.env.ded": 0.9504950495049505, + "success_rate.epoch.env.logic": 0.9098360655737705, + "success_rate.epoch.env.math": 0.9721615720524017, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.8329393223010244, + "success_rate.epoch.env.webshop": 0.9714285714285714, + "success_rate.epoch.env_macro_mean": 0.865726831976071, + "success_rate.epoch.global": 0.9, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.6666666666666666, + "success_rate.window.env.math": 0.6666666666666666, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8666666666666666, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9973572938689218, + "tokens_p.mean_in_band": 0.545166015625, + "tokens_rate.above_band": 0.967280163599182, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.032719836400818 + }, + { + "epoch": 1.6947166595654026, + "grad_norm": 64.33283153312027, + "learning_rate": 3.7093647277534005e-07, + "loss": 0.2293, + "step": 7955, + "success_rate.epoch.env.abd": 0.9862068965517241, + "success_rate.epoch.env.agentgym:alfworld": 0.882051282051282, + "success_rate.epoch.env.agentgym:sciworld": 0.9669811320754716, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9459459459459459, + "success_rate.epoch.env.ded": 0.9509803921568627, + "success_rate.epoch.env.logic": 0.9100204498977505, + "success_rate.epoch.env.math": 0.9721919302071974, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.8326771653543307, + "success_rate.epoch.env.webshop": 0.9722222222222222, + "success_rate.epoch.env_macro_mean": 0.8658387937013114, + "success_rate.epoch.global": 0.9, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 0.9, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9994565217391305, + "tokens_p.mean_in_band": 0.337890625, + "tokens_rate.above_band": 0.9971098265895953, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.002890173410404624 + }, + { + "epoch": 1.695781849169152, + "grad_norm": 115.02976587760558, + "learning_rate": 3.7090362217700165e-07, + "loss": 0.2314, + "step": 7960, + "success_rate.epoch.env.abd": 0.9862068965517241, + "success_rate.epoch.env.agentgym:alfworld": 0.8826530612244898, + "success_rate.epoch.env.agentgym:sciworld": 0.9669811320754716, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9459459459459459, + "success_rate.epoch.env.ded": 0.9509803921568627, + "success_rate.epoch.env.logic": 0.910295616717635, + "success_rate.epoch.env.math": 0.9722524483133841, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.8328088119590873, + "success_rate.epoch.env.webshop": 0.9722222222222222, + "success_rate.epoch.env_macro_mean": 0.8659359855834965, + "success_rate.epoch.global": 0.9001533742331288, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.996895032051282, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 1.6968470387729016, + "grad_norm": 407.4669622444961, + "learning_rate": 3.7087076363218177e-07, + "loss": 0.2165, + "step": 7965, + "success_rate.epoch.env.abd": 0.9862068965517241, + "success_rate.epoch.env.agentgym:alfworld": 0.8826530612244898, + "success_rate.epoch.env.agentgym:sciworld": 0.9669811320754716, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9459459459459459, + "success_rate.epoch.env.ded": 0.9509803921568627, + "success_rate.epoch.env.logic": 0.9103869653767821, + "success_rate.epoch.env.math": 0.9723127035830619, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.8327444051825678, + "success_rate.epoch.env.webshop": 0.9722222222222222, + "success_rate.epoch.env_macro_mean": 0.8659439125973424, + "success_rate.epoch.global": 0.9001531393568147, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9926658163265306, + "tokens_p.mean_in_band": 0.621875, + "tokens_rate.above_band": 0.9074074074074074, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.09259259259259259 + }, + { + "epoch": 1.697912228376651, + "grad_norm": 88.46519403789512, + "learning_rate": 3.708378971648275e-07, + "loss": 0.2573, + "step": 7970, + "success_rate.epoch.env.abd": 0.9862068965517241, + "success_rate.epoch.env.agentgym:alfworld": 0.8826530612244898, + "success_rate.epoch.env.agentgym:sciworld": 0.9672897196261683, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9459459459459459, + "success_rate.epoch.env.ded": 0.9511400651465798, + "success_rate.epoch.env.logic": 0.9096446700507614, + "success_rate.epoch.env.math": 0.9723427331887202, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.8328100470957613, + "success_rate.epoch.env.webshop": 0.972972972972973, + "success_rate.epoch.env_macro_mean": 0.8659959478231603, + "success_rate.epoch.global": 0.9001529051987768, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.6666666666666666, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 0.9444444444444443, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9992529880478087, + "tokens_p.mean_in_band": 0.4921875, + "tokens_rate.above_band": 0.984313725490196, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.01568627450980392 + }, + { + "epoch": 1.6989774179804005, + "grad_norm": 463.94907514086873, + "learning_rate": 3.708050227988916e-07, + "loss": 0.2542, + "step": 7975, + "success_rate.epoch.env.abd": 0.9862542955326461, + "success_rate.epoch.env.agentgym:alfworld": 0.8826530612244898, + "success_rate.epoch.env.agentgym:sciworld": 0.9672897196261683, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9459459459459459, + "success_rate.epoch.env.ded": 0.9512987012987013, + "success_rate.epoch.env.logic": 0.9089989888776542, + "success_rate.epoch.env.math": 0.9718462371413102, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.8328756375049039, + "success_rate.epoch.env.webshop": 0.972972972972973, + "success_rate.epoch.env_macro_mean": 0.8659168067614939, + "success_rate.epoch.global": 0.9, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.75, + "success_rate.window.env.math": 0.6666666666666666, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8833333333333332, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9969574036511156, + "tokens_p.mean_below_band": 1.7229467630386353e-08, + "tokens_p.mean_in_band": 0.6917067307692307, + "tokens_rate.above_band": 0.9723865877712031, + "tokens_rate.below_band": 0.0019723865877712033, + "tokens_rate.in_band": 0.02564102564102564 + }, + { + "epoch": 1.70004260758415, + "grad_norm": 396.01476407258144, + "learning_rate": 3.707721405583328e-07, + "loss": 0.5476, + "step": 7980, + "success_rate.epoch.env.abd": 0.9863013698630136, + "success_rate.epoch.env.agentgym:alfworld": 0.8787878787878788, + "success_rate.epoch.env.agentgym:sciworld": 0.9672897196261683, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9459459459459459, + "success_rate.epoch.env.ded": 0.9512987012987013, + "success_rate.epoch.env.logic": 0.9090909090909091, + "success_rate.epoch.env.math": 0.9718918918918918, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.8330721003134797, + "success_rate.epoch.env.webshop": 0.972972972972973, + "success_rate.epoch.env_macro_mean": 0.8656000730947819, + "success_rate.epoch.global": 0.9, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 0.5, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9994158878504673, + "tokens_p.mean_in_band": 0.69140625, + "tokens_rate.above_band": 0.9907407407407407, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.009259259259259259 + }, + { + "epoch": 1.7011077971878994, + "grad_norm": 953.6407265468724, + "learning_rate": 3.707392504671153e-07, + "loss": 0.3446, + "step": 7985, + "success_rate.epoch.env.abd": 0.9863013698630136, + "success_rate.epoch.env.agentgym:alfworld": 0.8787878787878788, + "success_rate.epoch.env.agentgym:sciworld": 0.9672897196261683, + "success_rate.epoch.env.agentgym:textcraft": 0.9772727272727273, + "success_rate.epoch.env.babyai": 0.9459459459459459, + "success_rate.epoch.env.ded": 0.9514563106796117, + "success_rate.epoch.env.logic": 0.9093655589123867, + "success_rate.epoch.env.math": 0.9719373988127361, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.8328112764291308, + "success_rate.epoch.env.webshop": 0.972972972972973, + "success_rate.epoch.env_macro_mean": 0.8656678443188818, + "success_rate.epoch.global": 0.9, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.9, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9998078893442623, + "tokens_p.mean_in_band": 0.5026041666666666, + "tokens_rate.above_band": 0.9938900203665988, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.006109979633401222 + }, + { + "epoch": 1.7021729867916489, + "grad_norm": 51.83978408833236, + "learning_rate": 3.707063525492093e-07, + "loss": 0.1982, + "step": 7990, + "success_rate.epoch.env.abd": 0.9863013698630136, + "success_rate.epoch.env.agentgym:alfworld": 0.8787878787878788, + "success_rate.epoch.env.agentgym:sciworld": 0.9672897196261683, + "success_rate.epoch.env.agentgym:textcraft": 0.9772727272727273, + "success_rate.epoch.env.babyai": 0.9459459459459459, + "success_rate.epoch.env.ded": 0.9483870967741935, + "success_rate.epoch.env.logic": 0.9094567404426559, + "success_rate.epoch.env.math": 0.9714593430263866, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.8330727130570759, + "success_rate.epoch.env.webshop": 0.972972972972973, + "success_rate.epoch.env_macro_mean": 0.8653774214521952, + "success_rate.epoch.global": 0.8998480243161094, + "success_rate.window.env.ded": 0.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.75, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.6875, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9879907024793388, + "tokens_p.mean_in_band": 0.7586379278273809, + "tokens_rate.above_band": 0.852112676056338, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.14788732394366197 + }, + { + "epoch": 1.7032381763953985, + "grad_norm": 179.74604009249427, + "learning_rate": 3.706734468285905e-07, + "loss": 0.3836, + "step": 7995, + "success_rate.epoch.env.abd": 0.9863013698630136, + "success_rate.epoch.env.agentgym:alfworld": 0.8787878787878788, + "success_rate.epoch.env.agentgym:sciworld": 0.9672897196261683, + "success_rate.epoch.env.agentgym:textcraft": 0.9772727272727273, + "success_rate.epoch.env.babyai": 0.9459459459459459, + "success_rate.epoch.env.ded": 0.9483870967741935, + "success_rate.epoch.env.logic": 0.9095477386934674, + "success_rate.epoch.env.math": 0.9715359828141783, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.8333333333333334, + "success_rate.epoch.env.webshop": 0.972972972972973, + "success_rate.epoch.env_macro_mean": 0.8654163540262734, + "success_rate.epoch.global": 0.9, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9943524096385542, + "tokens_p.mean_in_band": 0.81640625, + "tokens_rate.above_band": 0.9540229885057471, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.04597701149425287 + }, + { + "epoch": 1.7043033659991478, + "grad_norm": 133.6542587357131, + "learning_rate": 3.7064053332924024e-07, + "loss": 0.1729, + "step": 8000, + "success_rate.epoch.env.abd": 0.9863013698630136, + "success_rate.epoch.env.agentgym:alfworld": 0.8793969849246231, + "success_rate.epoch.env.agentgym:sciworld": 0.9674418604651163, + "success_rate.epoch.env.agentgym:textcraft": 0.9772727272727273, + "success_rate.epoch.env.babyai": 0.9459459459459459, + "success_rate.epoch.env.ded": 0.9483870967741935, + "success_rate.epoch.env.logic": 0.9097291875626881, + "success_rate.epoch.env.math": 0.9715512614063339, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.8336579664978574, + "success_rate.epoch.env.webshop": 0.972972972972973, + "success_rate.epoch.env_macro_mean": 0.8655329547173274, + "success_rate.epoch.global": 0.9001515151515151, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9993083501006036, + "tokens_p.mean_in_band": 0.783203125, + "tokens_rate.above_band": 0.9841584158415841, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.015841584158415842 + }, + { + "epoch": 1.7053685556028975, + "grad_norm": 72.77279599004258, + "learning_rate": 3.706076120751459e-07, + "loss": 0.2909, + "step": 8005, + "success_rate.epoch.env.abd": 0.9863013698630136, + "success_rate.epoch.env.agentgym:alfworld": 0.88, + "success_rate.epoch.env.agentgym:sciworld": 0.9674418604651163, + "success_rate.epoch.env.agentgym:textcraft": 0.9772727272727273, + "success_rate.epoch.env.babyai": 0.9459459459459459, + "success_rate.epoch.env.ded": 0.9483870967741935, + "success_rate.epoch.env.logic": 0.9099099099099099, + "success_rate.epoch.env.math": 0.9715817694369974, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.8339813374805599, + "success_rate.epoch.env.webshop": 0.972972972972973, + "success_rate.epoch.env_macro_mean": 0.8656363743933241, + "success_rate.epoch.global": 0.900302571860817, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9954690831556503, + "tokens_p.mean_in_band": 0.767578125, + "tokens_rate.above_band": 0.9791231732776617, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.020876826722338204 + }, + { + "epoch": 1.7064337452066467, + "grad_norm": 52.66562918063131, + "learning_rate": 3.705746830903e-07, + "loss": 0.1271, + "step": 8010, + "success_rate.epoch.env.abd": 0.9863013698630136, + "success_rate.epoch.env.agentgym:alfworld": 0.88, + "success_rate.epoch.env.agentgym:sciworld": 0.9675925925925926, + "success_rate.epoch.env.agentgym:textcraft": 0.9772727272727273, + "success_rate.epoch.env.babyai": 0.9459459459459459, + "success_rate.epoch.env.ded": 0.9487179487179487, + "success_rate.epoch.env.logic": 0.9099099099099099, + "success_rate.epoch.env.math": 0.971627408993576, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.8342391304347826, + "success_rate.epoch.env.webshop": 0.972972972972973, + "success_rate.epoch.env_macro_mean": 0.8657077395371452, + "success_rate.epoch.global": 0.9004531722054381, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9987855007473841, + "tokens_p.mean_in_band": 0.875, + "tokens_rate.above_band": 0.9955357142857143, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.004464285714285714 + }, + { + "epoch": 1.7074989348103964, + "grad_norm": 77.66567174897885, + "learning_rate": 3.705417463987011e-07, + "loss": 0.3153, + "step": 8015, + "success_rate.epoch.env.abd": 0.9863481228668942, + "success_rate.epoch.env.agentgym:alfworld": 0.88, + "success_rate.epoch.env.agentgym:sciworld": 0.9675925925925926, + "success_rate.epoch.env.agentgym:textcraft": 0.9772727272727273, + "success_rate.epoch.env.babyai": 0.9459459459459459, + "success_rate.epoch.env.ded": 0.9487179487179487, + "success_rate.epoch.env.logic": 0.9090909090909091, + "success_rate.epoch.env.math": 0.9716577540106952, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.8337853545137544, + "success_rate.epoch.env.webshop": 0.972972972972973, + "success_rate.epoch.env_macro_mean": 0.8655990414717789, + "success_rate.epoch.global": 0.9001508295625943, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6, + "success_rate.window.env_macro_mean": 0.775, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 1.0003375771604939, + "tokens_p.mean_in_band": 0.534148185483871, + "tokens_rate.above_band": 0.9543446244477173, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.045655375552282766 + }, + { + "epoch": 1.7085641244141456, + "grad_norm": 639.6188200208629, + "learning_rate": 3.705088020243532e-07, + "loss": 0.4426, + "step": 8020, + "success_rate.epoch.env.abd": 0.9863481228668942, + "success_rate.epoch.env.agentgym:alfworld": 0.88, + "success_rate.epoch.env.agentgym:sciworld": 0.967741935483871, + "success_rate.epoch.env.agentgym:textcraft": 0.9772727272727273, + "success_rate.epoch.env.babyai": 0.9459459459459459, + "success_rate.epoch.env.ded": 0.9488817891373802, + "success_rate.epoch.env.logic": 0.9090909090909091, + "success_rate.epoch.env.math": 0.9717031500266952, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.8336557059961315, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8656845113229089, + "success_rate.epoch.global": 0.9001506024096385, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9977034120734908, + "tokens_p.mean_in_band": 0.734375, + "tokens_rate.above_band": 0.9857697283311773, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.014230271668822769 + }, + { + "epoch": 1.7096293140178953, + "grad_norm": 79.15637329562013, + "learning_rate": 3.7047584999126587e-07, + "loss": 0.2122, + "step": 8025, + "success_rate.epoch.env.abd": 0.9863481228668942, + "success_rate.epoch.env.agentgym:alfworld": 0.88, + "success_rate.epoch.env.agentgym:sciworld": 0.967741935483871, + "success_rate.epoch.env.agentgym:textcraft": 0.9772727272727273, + "success_rate.epoch.env.babyai": 0.9459459459459459, + "success_rate.epoch.env.ded": 0.9490445859872612, + "success_rate.epoch.env.logic": 0.9093625498007968, + "success_rate.epoch.env.math": 0.9712306872669153, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.8337843061461152, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.86569274540927, + "success_rate.epoch.global": 0.9001503759398496, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.75, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9375, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9975694444444444, + "tokens_p.mean_in_band": 0.79296875, + "tokens_rate.above_band": 0.9761388286334056, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02386117136659436 + }, + { + "epoch": 1.7106945036216445, + "grad_norm": 67.31530185294233, + "learning_rate": 3.7044289032345433e-07, + "loss": 0.2967, + "step": 8030, + "success_rate.epoch.env.abd": 0.9864406779661017, + "success_rate.epoch.env.agentgym:alfworld": 0.88, + "success_rate.epoch.env.agentgym:sciworld": 0.967741935483871, + "success_rate.epoch.env.agentgym:textcraft": 0.9772727272727273, + "success_rate.epoch.env.babyai": 0.9459459459459459, + "success_rate.epoch.env.ded": 0.9490445859872612, + "success_rate.epoch.env.logic": 0.9093625498007968, + "success_rate.epoch.env.math": 0.9712765957446808, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.8337191358024691, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8656994084304815, + "success_rate.epoch.global": 0.9001501501501501, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9955645161290323, + "tokens_p.mean_in_band": 0.73046875, + "tokens_rate.above_band": 0.9281437125748503, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0718562874251497 + }, + { + "epoch": 1.7117596932253942, + "grad_norm": 142.3132195378032, + "learning_rate": 3.704099230449394e-07, + "loss": 0.2215, + "step": 8035, + "success_rate.epoch.env.abd": 0.9865319865319865, + "success_rate.epoch.env.agentgym:alfworld": 0.88, + "success_rate.epoch.env.agentgym:sciworld": 0.9678899082568807, + "success_rate.epoch.env.agentgym:textcraft": 0.9772727272727273, + "success_rate.epoch.env.babyai": 0.9459459459459459, + "success_rate.epoch.env.ded": 0.9492063492063492, + "success_rate.epoch.env.logic": 0.9093625498007968, + "success_rate.epoch.env.math": 0.9712918660287081, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.8336542164035425, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8657313534707618, + "success_rate.epoch.global": 0.9001499250374813, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.96, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.999498820754717, + "tokens_p.mean_in_band": 0.5667613636363636, + "tokens_rate.above_band": 0.996552804763397, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0034471952366029457 + }, + { + "epoch": 1.7128248828291435, + "grad_norm": 88.10647320103408, + "learning_rate": 3.703769481797474e-07, + "loss": 0.2556, + "step": 8040, + "success_rate.epoch.env.abd": 0.9865771812080537, + "success_rate.epoch.env.agentgym:alfworld": 0.88, + "success_rate.epoch.env.agentgym:sciworld": 0.9678899082568807, + "success_rate.epoch.env.agentgym:textcraft": 0.9772727272727273, + "success_rate.epoch.env.babyai": 0.9459459459459459, + "success_rate.epoch.env.ded": 0.9492063492063492, + "success_rate.epoch.env.logic": 0.9086395233366436, + "success_rate.epoch.env.math": 0.9713375796178344, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.8338461538461538, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8656913370383665, + "success_rate.epoch.global": 0.9001497005988024, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 0.6666666666666666, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9961387973640856, + "tokens_p.mean_in_band": 0.45703125, + "tokens_rate.above_band": 0.9499217527386542, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.050078247261345854 + }, + { + "epoch": 1.7138900724328932, + "grad_norm": 402.29852730174576, + "learning_rate": 3.703439657519101e-07, + "loss": 0.239, + "step": 8045, + "success_rate.epoch.env.abd": 0.9865771812080537, + "success_rate.epoch.env.agentgym:alfworld": 0.88, + "success_rate.epoch.env.agentgym:sciworld": 0.9678899082568807, + "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, + "success_rate.epoch.env.babyai": 0.9459459459459459, + "success_rate.epoch.env.ded": 0.9493670886075949, + "success_rate.epoch.env.logic": 0.908820614469772, + "success_rate.epoch.env.math": 0.9713831478537361, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.8336534767575874, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8657549526917083, + "success_rate.epoch.global": 0.9001494768310911, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9995259626604434, + "tokens_p.mean_in_band": 0.701171875, + "tokens_rate.above_band": 0.9976717112922002, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.002328288707799767 + }, + { + "epoch": 1.7149552620366424, + "grad_norm": 40.620780802998205, + "learning_rate": 3.7031097578546485e-07, + "loss": 0.3006, + "step": 8050, + "success_rate.epoch.env.abd": 0.9865771812080537, + "success_rate.epoch.env.agentgym:alfworld": 0.88, + "success_rate.epoch.env.agentgym:sciworld": 0.9678899082568807, + "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, + "success_rate.epoch.env.babyai": 0.9459459459459459, + "success_rate.epoch.env.ded": 0.9493670886075949, + "success_rate.epoch.env.logic": 0.908820614469772, + "success_rate.epoch.env.math": 0.9714285714285714, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.8337164750957854, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8657648092292568, + "success_rate.epoch.global": 0.9001492537313432, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8571428571428571, + "success_rate.window.env_macro_mean": 0.9285714285714286, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9889705882352942, + "tokens_p.mean_in_band": 0.5950520833333334, + "tokens_rate.above_band": 0.9340659340659341, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.06593406593406594 + }, + { + "epoch": 1.716020451640392, + "grad_norm": 62.86038435203946, + "learning_rate": 3.702779783044546e-07, + "loss": 0.2795, + "step": 8055, + "success_rate.epoch.env.abd": 0.9866220735785953, + "success_rate.epoch.env.agentgym:alfworld": 0.88, + "success_rate.epoch.env.agentgym:sciworld": 0.9678899082568807, + "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, + "success_rate.epoch.env.babyai": 0.9459459459459459, + "success_rate.epoch.env.ded": 0.9493670886075949, + "success_rate.epoch.env.logic": 0.9080118694362018, + "success_rate.epoch.env.math": 0.9714889123548046, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.8339073861461921, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8657182091668577, + "success_rate.epoch.global": 0.9001490312965723, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 1.0010758998435054, + "tokens_p.mean_in_band": 0.5855263157894737, + "tokens_rate.above_band": 0.9711246200607903, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.028875379939209727 + }, + { + "epoch": 1.7170856412441413, + "grad_norm": 221.09680667015974, + "learning_rate": 3.7024497333292757e-07, + "loss": 0.3113, + "step": 8060, + "success_rate.epoch.env.abd": 0.9866666666666667, + "success_rate.epoch.env.agentgym:alfworld": 0.88, + "success_rate.epoch.env.agentgym:sciworld": 0.9678899082568807, + "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, + "success_rate.epoch.env.babyai": 0.9459459459459459, + "success_rate.epoch.env.ded": 0.9493670886075949, + "success_rate.epoch.env.logic": 0.9072063178677197, + "success_rate.epoch.env.math": 0.9715789473684211, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.8339709257842387, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8656629924551534, + "success_rate.epoch.global": 0.9001488095238095, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 1.0004098360655738, + "tokens_p.mean_in_band": 0.4609375, + "tokens_rate.above_band": 0.9744408945686901, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.025559105431309903 + }, + { + "epoch": 1.718150830847891, + "grad_norm": 63.3426700401906, + "learning_rate": 3.702119608949377e-07, + "loss": 0.3124, + "step": 8065, + "success_rate.epoch.env.abd": 0.9866666666666667, + "success_rate.epoch.env.agentgym:alfworld": 0.8762376237623762, + "success_rate.epoch.env.agentgym:sciworld": 0.9678899082568807, + "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, + "success_rate.epoch.env.babyai": 0.9459459459459459, + "success_rate.epoch.env.ded": 0.9493670886075949, + "success_rate.epoch.env.logic": 0.9072063178677197, + "success_rate.epoch.env.math": 0.9711286089238845, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.8341612533435231, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8652973208985284, + "success_rate.epoch.global": 0.9, + "success_rate.window.env.agentgym:alfworld": 0.5, + "success_rate.window.env.math": 0.8, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.7666666666666666, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9992666967509025, + "tokens_p.mean_below_band": 1.0477378964424133e-08, + "tokens_p.mean_in_band": 0.6213235294117647, + "tokens_rate.above_band": 0.9685314685314685, + "tokens_rate.below_band": 0.0017482517482517483, + "tokens_rate.in_band": 0.02972027972027972 + }, + { + "epoch": 1.7192160204516402, + "grad_norm": 99.12312219549374, + "learning_rate": 3.7017894101454405e-07, + "loss": 0.2504, + "step": 8070, + "success_rate.epoch.env.abd": 0.9867109634551495, + "success_rate.epoch.env.agentgym:alfworld": 0.8762376237623762, + "success_rate.epoch.env.agentgym:sciworld": 0.9678899082568807, + "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, + "success_rate.epoch.env.babyai": 0.9459459459459459, + "success_rate.epoch.env.ded": 0.9495268138801262, + "success_rate.epoch.env.logic": 0.9075712881022615, + "success_rate.epoch.env.math": 0.9711437565582371, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.834351145038168, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8653676874098515, + "success_rate.epoch.global": 0.9001483679525223, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9993327402135231, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 1.72028121005539, + "grad_norm": 146.81034238962428, + "learning_rate": 3.701459137158113e-07, + "loss": 0.3457, + "step": 8075, + "success_rate.epoch.env.abd": 0.9867549668874173, + "success_rate.epoch.env.agentgym:alfworld": 0.8762376237623762, + "success_rate.epoch.env.agentgym:sciworld": 0.9678899082568807, + "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, + "success_rate.epoch.env.babyai": 0.9459459459459459, + "success_rate.epoch.env.ded": 0.9495268138801262, + "success_rate.epoch.env.logic": 0.9076620825147348, + "success_rate.epoch.env.math": 0.9711891042430592, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.8342857142857143, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8653781160259523, + "success_rate.epoch.global": 0.9001481481481481, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.992879746835443, + "tokens_p.mean_in_band": 0.5622209821428571, + "tokens_rate.above_band": 0.9575757575757575, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.04242424242424243 + }, + { + "epoch": 1.7213463996591394, + "grad_norm": 84.3736690260916, + "learning_rate": 3.701128790228096e-07, + "loss": 0.3187, + "step": 8080, + "success_rate.epoch.env.abd": 0.9834983498349835, + "success_rate.epoch.env.agentgym:alfworld": 0.8762376237623762, + "success_rate.epoch.env.agentgym:sciworld": 0.9678899082568807, + "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, + "success_rate.epoch.env.babyai": 0.9459459459459459, + "success_rate.epoch.env.ded": 0.9495268138801262, + "success_rate.epoch.env.logic": 0.9059745347698335, + "success_rate.epoch.env.math": 0.9712192569335426, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.8345378470901483, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8649543088166417, + "success_rate.epoch.global": 0.8998520710059171, + "success_rate.window.env.abd": 0.0, + "success_rate.window.env.logic": 0.3333333333333333, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.5833333333333334, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9911406911142454, + "tokens_p.mean_in_band": 0.24870102611940298, + "tokens_rate.above_band": 0.17467356491746736, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.8253264350825327 + }, + { + "epoch": 1.7224115892628888, + "grad_norm": 51.691261578257524, + "learning_rate": 3.700798369596143e-07, + "loss": 0.1247, + "step": 8085, + "success_rate.epoch.env.abd": 0.9834983498349835, + "success_rate.epoch.env.agentgym:alfworld": 0.8762376237623762, + "success_rate.epoch.env.agentgym:sciworld": 0.9678899082568807, + "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, + "success_rate.epoch.env.babyai": 0.9459459459459459, + "success_rate.epoch.env.ded": 0.9498432601880877, + "success_rate.epoch.env.logic": 0.9059745347698335, + "success_rate.epoch.env.math": 0.9712493465760585, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.8345351043643264, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8649855627461558, + "success_rate.epoch.global": 0.8998522895125554, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env_macro_mean": 0.9444444444444445, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9966142590286425, + "tokens_p.mean_in_band": 0.5130208333333334, + "tokens_rate.above_band": 0.992583436341162, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.007416563658838072 + }, + { + "epoch": 1.7234767788666383, + "grad_norm": 55.81938843436817, + "learning_rate": 3.700467875503063e-07, + "loss": 0.5246, + "step": 8090, + "success_rate.epoch.env.abd": 0.9834983498349835, + "success_rate.epoch.env.agentgym:alfworld": 0.8762376237623762, + "success_rate.epoch.env.agentgym:sciworld": 0.9680365296803652, + "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, + "success_rate.epoch.env.babyai": 0.9459459459459459, + "success_rate.epoch.env.ded": 0.9498432601880877, + "success_rate.epoch.env.logic": 0.9060665362035225, + "success_rate.epoch.env.math": 0.9712643678160919, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.8345950037850114, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8650140667023277, + "success_rate.epoch.global": 0.8998525073746313, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8571428571428571, + "success_rate.window.env_macro_mean": 0.9642857142857143, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9944852941176471, + "tokens_p.mean_in_band": 0.6853693181818182, + "tokens_rate.above_band": 0.9392265193370166, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.06077348066298342 + }, + { + "epoch": 1.7245419684703878, + "grad_norm": 225.01972540603396, + "learning_rate": 3.700137308189717e-07, + "loss": 0.452, + "step": 8095, + "success_rate.epoch.env.abd": 0.9834983498349835, + "success_rate.epoch.env.agentgym:alfworld": 0.8762376237623762, + "success_rate.epoch.env.agentgym:sciworld": 0.9680365296803652, + "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, + "success_rate.epoch.env.babyai": 0.9459459459459459, + "success_rate.epoch.env.ded": 0.9498432601880877, + "success_rate.epoch.env.logic": 0.9060665362035225, + "success_rate.epoch.env.math": 0.9713242961418144, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.834214501510574, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.864984923616081, + "success_rate.epoch.global": 0.8997054491899853, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.8333333333333333, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9908854166666666, + "tokens_p.mean_in_band": 0.48974609375, + "tokens_rate.above_band": 0.9230769230769231, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.07692307692307693 + }, + { + "epoch": 1.7256071580741372, + "grad_norm": 44.033781178941474, + "learning_rate": 3.6998066678970207e-07, + "loss": 0.4498, + "step": 8100, + "success_rate.epoch.env.abd": 0.9834983498349835, + "success_rate.epoch.env.agentgym:alfworld": 0.8780487804878049, + "success_rate.epoch.env.agentgym:sciworld": 0.9680365296803652, + "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, + "success_rate.epoch.env.babyai": 0.9459459459459459, + "success_rate.epoch.env.ded": 0.95, + "success_rate.epoch.env.logic": 0.9060665362035225, + "success_rate.epoch.env.math": 0.9713839750260146, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.8343396226415094, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8651806233026696, + "success_rate.epoch.global": 0.8998529411764706, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9993523316062176, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 1.7266723476778867, + "grad_norm": 150.84605281193748, + "learning_rate": 3.699475954865942e-07, + "loss": 0.2065, + "step": 8105, + "success_rate.epoch.env.abd": 0.9834983498349835, + "success_rate.epoch.env.agentgym:alfworld": 0.8780487804878049, + "success_rate.epoch.env.agentgym:sciworld": 0.9680365296803652, + "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, + "success_rate.epoch.env.babyai": 0.9459459459459459, + "success_rate.epoch.env.ded": 0.95, + "success_rate.epoch.env.logic": 0.9060665362035225, + "success_rate.epoch.env.math": 0.9714285714285714, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.8347760632292058, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8652243539381472, + "success_rate.epoch.global": 0.9, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9918893129770993, + "tokens_p.mean_in_band": 0.806640625, + "tokens_rate.above_band": 0.9703703703703703, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02962962962962963 + }, + { + "epoch": 1.7277375372816361, + "grad_norm": 149.27970364787186, + "learning_rate": 3.699145169337502e-07, + "loss": 0.1931, + "step": 8110, + "success_rate.epoch.env.abd": 0.9834983498349835, + "success_rate.epoch.env.agentgym:alfworld": 0.8780487804878049, + "success_rate.epoch.env.agentgym:sciworld": 0.9680365296803652, + "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, + "success_rate.epoch.env.babyai": 0.9459459459459459, + "success_rate.epoch.env.ded": 0.9501557632398754, + "success_rate.epoch.env.logic": 0.906158357771261, + "success_rate.epoch.env.math": 0.9714730290456431, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.8350864012021036, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8652791157924731, + "success_rate.epoch.global": 0.9001466275659824, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9969855305466238, + "tokens_p.mean_in_band": 0.833984375, + "tokens_rate.above_band": 0.9936102236421726, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.006389776357827476 + }, + { + "epoch": 1.7288027268853856, + "grad_norm": 82.62645146205433, + "learning_rate": 3.6988143115527753e-07, + "loss": 0.2641, + "step": 8115, + "success_rate.epoch.env.abd": 0.9834983498349835, + "success_rate.epoch.env.agentgym:alfworld": 0.8786407766990292, + "success_rate.epoch.env.agentgym:sciworld": 0.9680365296803652, + "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, + "success_rate.epoch.env.babyai": 0.9459459459459459, + "success_rate.epoch.env.ded": 0.9501557632398754, + "success_rate.epoch.env.logic": 0.90625, + "success_rate.epoch.env.math": 0.9715025906735751, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.8350824587706147, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8653435937594192, + "success_rate.epoch.global": 0.9001464128843338, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env_macro_mean": 0.9583333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9969579646017699, + "tokens_p.mean_in_band": 0.740234375, + "tokens_rate.above_band": 0.9713467048710601, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02865329512893983 + }, + { + "epoch": 1.729867916489135, + "grad_norm": 48.766759075903, + "learning_rate": 3.698483381752888e-07, + "loss": 0.3593, + "step": 8120, + "success_rate.epoch.env.abd": 0.9834983498349835, + "success_rate.epoch.env.agentgym:alfworld": 0.8786407766990292, + "success_rate.epoch.env.agentgym:sciworld": 0.9681818181818181, + "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, + "success_rate.epoch.env.babyai": 0.9473684210526315, + "success_rate.epoch.env.ded": 0.9501557632398754, + "success_rate.epoch.env.logic": 0.9063414634146342, + "success_rate.epoch.env.math": 0.9715468184169684, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.8349550898203593, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8654868742881383, + "success_rate.epoch.global": 0.9001461988304094, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9930132113821138, + "tokens_p.mean_in_band": 0.65966796875, + "tokens_rate.above_band": 0.968503937007874, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.031496062992125984 + }, + { + "epoch": 1.7309331060928845, + "grad_norm": 88.84159104133133, + "learning_rate": 3.6981523801790204e-07, + "loss": 0.3669, + "step": 8125, + "success_rate.epoch.env.abd": 0.9834983498349835, + "success_rate.epoch.env.agentgym:alfworld": 0.8786407766990292, + "success_rate.epoch.env.agentgym:sciworld": 0.9681818181818181, + "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, + "success_rate.epoch.env.babyai": 0.9473684210526315, + "success_rate.epoch.env.ded": 0.9501557632398754, + "success_rate.epoch.env.logic": 0.9064327485380117, + "success_rate.epoch.env.math": 0.9716055756324212, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.8352633545013074, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8655285385626637, + "success_rate.epoch.global": 0.9002919708029197, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9959846368715084, + "tokens_p.mean_in_band": 0.775390625, + "tokens_rate.above_band": 0.9675675675675676, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.032432432432432434 + }, + { + "epoch": 1.731998295696634, + "grad_norm": 136.23742159989214, + "learning_rate": 3.697821307072403e-07, + "loss": 0.4581, + "step": 8130, + "success_rate.epoch.env.abd": 0.9834983498349835, + "success_rate.epoch.env.agentgym:alfworld": 0.8786407766990292, + "success_rate.epoch.env.agentgym:sciworld": 0.9681818181818181, + "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, + "success_rate.epoch.env.babyai": 0.9473684210526315, + "success_rate.epoch.env.ded": 0.9501557632398754, + "success_rate.epoch.env.logic": 0.9066147859922179, + "success_rate.epoch.env.math": 0.9716786817713697, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.8350746268656717, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8655345763769836, + "success_rate.epoch.global": 0.9002915451895044, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.8888888888888888, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9938118811881188, + "tokens_p.mean_in_band": 0.6651785714285714, + "tokens_rate.above_band": 0.9351851851851852, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.06481481481481481 + }, + { + "epoch": 1.7330634853003835, + "grad_norm": 156.7926497364516, + "learning_rate": 3.6974901626743203e-07, + "loss": 0.4604, + "step": 8135, + "success_rate.epoch.env.abd": 0.9835526315789473, + "success_rate.epoch.env.agentgym:alfworld": 0.8786407766990292, + "success_rate.epoch.env.agentgym:sciworld": 0.9683257918552036, + "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, + "success_rate.epoch.env.babyai": 0.9473684210526315, + "success_rate.epoch.env.ded": 0.9501557632398754, + "success_rate.epoch.env.logic": 0.9057337220602527, + "success_rate.epoch.env.math": 0.9716932578486875, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.8346984363365599, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8654396288345826, + "success_rate.epoch.global": 0.9, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.7333333333333333, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9982938218390804, + "tokens_p.mean_in_band": 0.4948466532939189, + "tokens_rate.above_band": 0.96577243293247, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.034227567067530065 + }, + { + "epoch": 1.734128674904133, + "grad_norm": 576.5655291026226, + "learning_rate": 3.697158947226108e-07, + "loss": 0.3873, + "step": 8140, + "success_rate.epoch.env.abd": 0.9835526315789473, + "success_rate.epoch.env.agentgym:alfworld": 0.8803827751196173, + "success_rate.epoch.env.agentgym:sciworld": 0.9683257918552036, + "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, + "success_rate.epoch.env.babyai": 0.9473684210526315, + "success_rate.epoch.env.ded": 0.9501557632398754, + "success_rate.epoch.env.logic": 0.9058252427184466, + "success_rate.epoch.env.math": 0.9717368961973278, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.8348828560803273, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8656270449410545, + "success_rate.epoch.global": 0.9001453488372093, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9995508982035928, + "tokens_p.mean_in_band": 0.8035714285714286, + "tokens_rate.above_band": 0.9916864608076009, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.00831353919239905 + }, + { + "epoch": 1.7351938645078824, + "grad_norm": 106.07720845603625, + "learning_rate": 3.696827660969152e-07, + "loss": 0.3657, + "step": 8145, + "success_rate.epoch.env.abd": 0.9835526315789473, + "success_rate.epoch.env.agentgym:alfworld": 0.8803827751196173, + "success_rate.epoch.env.agentgym:sciworld": 0.9683257918552036, + "success_rate.epoch.env.agentgym:textcraft": 0.9782608695652174, + "success_rate.epoch.env.babyai": 0.9473684210526315, + "success_rate.epoch.env.ded": 0.9503105590062112, + "success_rate.epoch.env.logic": 0.9058252427184466, + "success_rate.epoch.env.math": 0.9717948717948718, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.8350668647845468, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8654156580001827, + "success_rate.epoch.global": 0.9001451378809869, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9991685144124168, + "tokens_p.mean_in_band": 0.7621527777777778, + "tokens_rate.above_band": 0.9616204690831557, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03837953091684435 + }, + { + "epoch": 1.7362590541116318, + "grad_norm": 98.6683963210045, + "learning_rate": 3.6964963041448934e-07, + "loss": 0.2948, + "step": 8150, + "success_rate.epoch.env.abd": 0.9835526315789473, + "success_rate.epoch.env.agentgym:alfworld": 0.8803827751196173, + "success_rate.epoch.env.agentgym:sciworld": 0.9683257918552036, + "success_rate.epoch.env.agentgym:textcraft": 0.9782608695652174, + "success_rate.epoch.env.babyai": 0.9473684210526315, + "success_rate.epoch.env.ded": 0.9503105590062112, + "success_rate.epoch.env.logic": 0.9059165858389913, + "success_rate.epoch.env.math": 0.9713408393039918, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.8350018539117539, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8653767761598984, + "success_rate.epoch.global": 0.9, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.75, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.85, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9912383177570093, + "tokens_p.mean_in_band": 0.71640625, + "tokens_rate.above_band": 0.8770491803278688, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.12295081967213115 + }, + { + "epoch": 1.7373242437153813, + "grad_norm": 72.99932873016495, + "learning_rate": 3.6961648769948215e-07, + "loss": 0.3644, + "step": 8155, + "success_rate.epoch.env.abd": 0.9835526315789473, + "success_rate.epoch.env.agentgym:alfworld": 0.8809523809523809, + "success_rate.epoch.env.agentgym:sciworld": 0.968609865470852, + "success_rate.epoch.env.agentgym:textcraft": 0.9782608695652174, + "success_rate.epoch.env.babyai": 0.9487179487179487, + "success_rate.epoch.env.ded": 0.9503105590062112, + "success_rate.epoch.env.logic": 0.9060077519379846, + "success_rate.epoch.env.math": 0.9713554987212276, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.8352462051092188, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8656089019623913, + "success_rate.epoch.global": 0.9001447178002895, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9972036891679749, + "tokens_p.mean_in_band": 0.76171875, + "tokens_rate.above_band": 0.9937597503900156, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0062402496099844 + }, + { + "epoch": 1.7383894333191308, + "grad_norm": 122.15960408016022, + "learning_rate": 3.6958333797604786e-07, + "loss": 0.2652, + "step": 8160, + "success_rate.epoch.env.abd": 0.9835526315789473, + "success_rate.epoch.env.agentgym:alfworld": 0.8809523809523809, + "success_rate.epoch.env.agentgym:sciworld": 0.968609865470852, + "success_rate.epoch.env.agentgym:textcraft": 0.9782608695652174, + "success_rate.epoch.env.babyai": 0.9487179487179487, + "success_rate.epoch.env.ded": 0.9503105590062112, + "success_rate.epoch.env.logic": 0.9062801932367149, + "success_rate.epoch.env.math": 0.9714139867279225, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.8354289940828402, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8656556036241227, + "success_rate.epoch.global": 0.9002890173410405, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9956018518518519, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 1.7394546229228802, + "grad_norm": 81.438866539525, + "learning_rate": 3.6955018126834564e-07, + "loss": 0.3715, + "step": 8165, + "success_rate.epoch.env.abd": 0.9835526315789473, + "success_rate.epoch.env.agentgym:alfworld": 0.8809523809523809, + "success_rate.epoch.env.agentgym:sciworld": 0.968609865470852, + "success_rate.epoch.env.agentgym:textcraft": 0.9782608695652174, + "success_rate.epoch.env.babyai": 0.9487179487179487, + "success_rate.epoch.env.ded": 0.9504643962848297, + "success_rate.epoch.env.logic": 0.9062801932367149, + "success_rate.epoch.env.math": 0.9709628120224146, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.8353636028054633, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8656226282873714, + "success_rate.epoch.global": 0.9001443001443001, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 0.75, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.85, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9984497389033943, + "tokens_p.mean_in_band": 0.4299879807692308, + "tokens_rate.above_band": 0.9671717171717171, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03282828282828283 + }, + { + "epoch": 1.74051981252663, + "grad_norm": 14.69828298801578, + "learning_rate": 3.6951701760054003e-07, + "loss": 0.348, + "step": 8170, + "success_rate.epoch.env.abd": 0.9835526315789473, + "success_rate.epoch.env.agentgym:alfworld": 0.8815165876777251, + "success_rate.epoch.env.agentgym:sciworld": 0.96875, + "success_rate.epoch.env.agentgym:textcraft": 0.9782608695652174, + "success_rate.epoch.env.babyai": 0.9487179487179487, + "success_rate.epoch.env.ded": 0.9504643962848297, + "success_rate.epoch.env.logic": 0.9062801932367149, + "success_rate.epoch.env.math": 0.9709775967413442, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.8350515463917526, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8656596346109814, + "success_rate.epoch.global": 0.9, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.7142857142857143, + "success_rate.window.env_macro_mean": 0.9285714285714286, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9981560559006211, + "tokens_p.mean_in_band": 0.56953125, + "tokens_rate.above_band": 0.9847094801223242, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.01529051987767584 + }, + { + "epoch": 1.7415850021303791, + "grad_norm": 253.472915473384, + "learning_rate": 3.694838469968003e-07, + "loss": 0.2324, + "step": 8175, + "success_rate.epoch.env.abd": 0.9835526315789473, + "success_rate.epoch.env.agentgym:alfworld": 0.8815165876777251, + "success_rate.epoch.env.agentgym:sciworld": 0.9688888888888889, + "success_rate.epoch.env.agentgym:textcraft": 0.9782608695652174, + "success_rate.epoch.env.babyai": 0.9487179487179487, + "success_rate.epoch.env.ded": 0.9504643962848297, + "success_rate.epoch.env.logic": 0.9064609450337512, + "success_rate.epoch.env.math": 0.971021860701576, + "success_rate.epoch.env.sat": 0.12195121951219512, + "success_rate.epoch.env.science": 0.835233541743288, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8654321000209712, + "success_rate.epoch.global": 0.9, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9992917847025495, + "tokens_p.mean_in_band": 0.6957720588235294, + "tokens_rate.above_band": 0.9764868603042877, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02351313969571231 + }, + { + "epoch": 1.7426501917341288, + "grad_norm": 111.48827945199903, + "learning_rate": 3.694506694813011e-07, + "loss": 0.3127, + "step": 8180, + "success_rate.epoch.env.abd": 0.9835526315789473, + "success_rate.epoch.env.agentgym:alfworld": 0.8815165876777251, + "success_rate.epoch.env.agentgym:sciworld": 0.9688888888888889, + "success_rate.epoch.env.agentgym:textcraft": 0.9782608695652174, + "success_rate.epoch.env.babyai": 0.9487179487179487, + "success_rate.epoch.env.ded": 0.9504643962848297, + "success_rate.epoch.env.logic": 0.9064609450337512, + "success_rate.epoch.env.math": 0.9710659898477157, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.8355963302752294, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8673696310230646, + "success_rate.epoch.global": 0.9001436781609196, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9910714285714286, + "tokens_p.mean_in_band": 0.8018973214285714, + "tokens_rate.above_band": 0.9565217391304348, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.043478260869565216 + }, + { + "epoch": 1.743715381337878, + "grad_norm": 125.7077269336235, + "learning_rate": 3.694174850782219e-07, + "loss": 0.3711, + "step": 8185, + "success_rate.epoch.env.abd": 0.9835526315789473, + "success_rate.epoch.env.agentgym:alfworld": 0.8815165876777251, + "success_rate.epoch.env.agentgym:sciworld": 0.9691629955947136, + "success_rate.epoch.env.agentgym:textcraft": 0.9782608695652174, + "success_rate.epoch.env.babyai": 0.9487179487179487, + "success_rate.epoch.env.ded": 0.9504643962848297, + "success_rate.epoch.env.logic": 0.9065510597302505, + "success_rate.epoch.env.math": 0.9711246200607903, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.8354105571847508, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8673911836162392, + "success_rate.epoch.global": 0.9001434720229555, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9977985395189003, + "tokens_p.mean_in_band": 0.5066964285714286, + "tokens_rate.above_band": 0.9881154499151104, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.011884550084889643 + }, + { + "epoch": 1.7447805709416278, + "grad_norm": 31.7327380812736, + "learning_rate": 3.6938429381174725e-07, + "loss": 0.2015, + "step": 8190, + "success_rate.epoch.env.abd": 0.9835526315789473, + "success_rate.epoch.env.agentgym:alfworld": 0.8820754716981132, + "success_rate.epoch.env.agentgym:sciworld": 0.9691629955947136, + "success_rate.epoch.env.agentgym:textcraft": 0.9782608695652174, + "success_rate.epoch.env.babyai": 0.9487179487179487, + "success_rate.epoch.env.ded": 0.9504643962848297, + "success_rate.epoch.env.logic": 0.9056785370548605, + "success_rate.epoch.env.math": 0.9711684370257967, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.8357116721551409, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8673940284599115, + "success_rate.epoch.global": 0.9001432664756447, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.75, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9951923076923077, + "tokens_p.mean_in_band": 0.7449776785714286, + "tokens_rate.above_band": 0.9852631578947368, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.014736842105263158 + }, + { + "epoch": 1.745845760545377, + "grad_norm": 105.58094073057569, + "learning_rate": 3.6935109570606666e-07, + "loss": 0.2123, + "step": 8195, + "success_rate.epoch.env.abd": 0.9835526315789473, + "success_rate.epoch.env.agentgym:alfworld": 0.8820754716981132, + "success_rate.epoch.env.agentgym:sciworld": 0.9692982456140351, + "success_rate.epoch.env.agentgym:textcraft": 0.9782608695652174, + "success_rate.epoch.env.babyai": 0.9487179487179487, + "success_rate.epoch.env.ded": 0.9507692307692308, + "success_rate.epoch.env.logic": 0.9057692307692308, + "success_rate.epoch.env.math": 0.9711830131445905, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.8360116873630388, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.86747088023671, + "success_rate.epoch.global": 0.9002861230329041, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9974264705882353, + "tokens_p.mean_in_band": 0.73046875, + "tokens_rate.above_band": 0.9956076134699854, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.004392386530014641 + }, + { + "epoch": 1.7469109501491267, + "grad_norm": 79.84535168853331, + "learning_rate": 3.6931789078537477e-07, + "loss": 0.3843, + "step": 8200, + "success_rate.epoch.env.abd": 0.9835526315789473, + "success_rate.epoch.env.agentgym:alfworld": 0.8820754716981132, + "success_rate.epoch.env.agentgym:sciworld": 0.9692982456140351, + "success_rate.epoch.env.agentgym:textcraft": 0.9782608695652174, + "success_rate.epoch.env.babyai": 0.9487179487179487, + "success_rate.epoch.env.ded": 0.9510703363914373, + "success_rate.epoch.env.logic": 0.9049904030710173, + "success_rate.epoch.env.math": 0.9712266532054518, + "success_rate.epoch.env.sat": 0.16279069767441862, + "success_rate.epoch.env.science": 0.8357664233576643, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8692212628545971, + "success_rate.epoch.global": 0.9001428571428571, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.8, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9987058723693143, + "tokens_p.mean_in_band": 0.6809895833333334, + "tokens_rate.above_band": 0.9839679358717435, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.01603206412825651 + }, + { + "epoch": 1.747976139752876, + "grad_norm": 164.7820734171724, + "learning_rate": 3.69284679073871e-07, + "loss": 0.3149, + "step": 8205, + "success_rate.epoch.env.abd": 0.9836601307189542, + "success_rate.epoch.env.agentgym:alfworld": 0.8820754716981132, + "success_rate.epoch.env.agentgym:sciworld": 0.9692982456140351, + "success_rate.epoch.env.agentgym:textcraft": 0.9787234042553191, + "success_rate.epoch.env.babyai": 0.9487179487179487, + "success_rate.epoch.env.ded": 0.9510703363914373, + "success_rate.epoch.env.logic": 0.9050814956855225, + "success_rate.epoch.env.math": 0.9712846347607053, + "success_rate.epoch.env.sat": 0.16279069767441862, + "success_rate.epoch.env.science": 0.8358862144420132, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8692975264077076, + "success_rate.epoch.global": 0.9002853067047075, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9957720588235294, + "tokens_p.mean_in_band": 0.7265625, + "tokens_rate.above_band": 0.9826589595375722, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.017341040462427744 + }, + { + "epoch": 1.7490413293566256, + "grad_norm": 140.12892335407128, + "learning_rate": 3.692514605957599e-07, + "loss": 0.2303, + "step": 8210, + "success_rate.epoch.env.abd": 0.9837133550488599, + "success_rate.epoch.env.agentgym:alfworld": 0.8820754716981132, + "success_rate.epoch.env.agentgym:sciworld": 0.9692982456140351, + "success_rate.epoch.env.agentgym:textcraft": 0.9787234042553191, + "success_rate.epoch.env.babyai": 0.9487179487179487, + "success_rate.epoch.env.ded": 0.9510703363914373, + "success_rate.epoch.env.logic": 0.9050814956855225, + "success_rate.epoch.env.math": 0.9713135379969804, + "success_rate.epoch.env.sat": 0.16279069767441862, + "success_rate.epoch.env.science": 0.8363041105856676, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8693429831086018, + "success_rate.epoch.global": 0.9004273504273504, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9919064748201439, + "tokens_p.mean_in_band": 0.720703125, + "tokens_rate.above_band": 0.9586206896551724, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.041379310344827586 + }, + { + "epoch": 1.7501065189603748, + "grad_norm": 51.03269048770925, + "learning_rate": 3.692182353752507e-07, + "loss": 0.355, + "step": 8215, + "success_rate.epoch.env.abd": 0.9837662337662337, + "success_rate.epoch.env.agentgym:alfworld": 0.8820754716981132, + "success_rate.epoch.env.agentgym:sciworld": 0.9692982456140351, + "success_rate.epoch.env.agentgym:textcraft": 0.9787234042553191, + "success_rate.epoch.env.babyai": 0.9487179487179487, + "success_rate.epoch.env.ded": 0.9510703363914373, + "success_rate.epoch.env.logic": 0.9052631578947369, + "success_rate.epoch.env.math": 0.971356783919598, + "success_rate.epoch.env.sat": 0.16279069767441862, + "success_rate.epoch.env.science": 0.83617871413004, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8693568367807452, + "success_rate.epoch.global": 0.9004267425320057, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9375, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9974264705882353, + "tokens_p.mean_in_band": 0.2421875, + "tokens_rate.above_band": 0.9770114942528736, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.022988505747126436 + }, + { + "epoch": 1.7511717085641245, + "grad_norm": 33.93199028743441, + "learning_rate": 3.691850034365579e-07, + "loss": 0.1626, + "step": 8220, + "success_rate.epoch.env.abd": 0.9838187702265372, + "success_rate.epoch.env.agentgym:alfworld": 0.8779342723004695, + "success_rate.epoch.env.agentgym:sciworld": 0.9692982456140351, + "success_rate.epoch.env.agentgym:textcraft": 0.9787234042553191, + "success_rate.epoch.env.babyai": 0.9487179487179487, + "success_rate.epoch.env.ded": 0.9510703363914373, + "success_rate.epoch.env.logic": 0.9055343511450382, + "success_rate.epoch.env.math": 0.9713855421686747, + "success_rate.epoch.env.sat": 0.16279069767441862, + "success_rate.epoch.env.science": 0.8363570391872278, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8690286198370384, + "success_rate.epoch.global": 0.9004261363636363, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 0.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9961586378737541, + "tokens_p.mean_in_band": 0.721875, + "tokens_rate.above_band": 0.9836601307189542, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.016339869281045753 + }, + { + "epoch": 1.7522368981678738, + "grad_norm": 53.372944742560236, + "learning_rate": 3.6915176480390053e-07, + "loss": 0.127, + "step": 8225, + "success_rate.epoch.env.abd": 0.9838187702265372, + "success_rate.epoch.env.agentgym:alfworld": 0.8779342723004695, + "success_rate.epoch.env.agentgym:sciworld": 0.9694323144104804, + "success_rate.epoch.env.agentgym:textcraft": 0.9787234042553191, + "success_rate.epoch.env.babyai": 0.9487179487179487, + "success_rate.epoch.env.ded": 0.9510703363914373, + "success_rate.epoch.env.logic": 0.9057142857142857, + "success_rate.epoch.env.math": 0.9714285714285714, + "success_rate.epoch.env.sat": 0.16279069767441862, + "success_rate.epoch.env.science": 0.8358695652173913, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.869016761533016, + "success_rate.epoch.global": 0.9002836879432624, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9945987654320988, + "tokens_p.mean_in_band": 0.6509650735294118, + "tokens_rate.above_band": 0.9050279329608939, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.09497206703910614 + }, + { + "epoch": 1.7533020877716234, + "grad_norm": 98.14570350236706, + "learning_rate": 3.6911851950150273e-07, + "loss": 0.2419, + "step": 8230, + "success_rate.epoch.env.abd": 0.9838187702265372, + "success_rate.epoch.env.agentgym:alfworld": 0.8779342723004695, + "success_rate.epoch.env.agentgym:sciworld": 0.9694323144104804, + "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, + "success_rate.epoch.env.babyai": 0.9487179487179487, + "success_rate.epoch.env.ded": 0.9512195121951219, + "success_rate.epoch.env.logic": 0.9050332383665717, + "success_rate.epoch.env.math": 0.971457185778668, + "success_rate.epoch.env.sat": 0.16279069767441862, + "success_rate.epoch.env.science": 0.835988414192614, + "success_rate.epoch.env.webshop": 0.9743589743589743, + "success_rate.epoch.env_macro_mean": 0.8690834540807703, + "success_rate.epoch.global": 0.9002832861189801, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.6666666666666666, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 0.9444444444444443, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9987224842767296, + "tokens_p.mean_in_band": 0.7760416666666666, + "tokens_rate.above_band": 0.9906542056074766, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.009345794392523364 + }, + { + "epoch": 1.7543672773753727, + "grad_norm": 196.41016934536862, + "learning_rate": 3.690852675535935e-07, + "loss": 0.2736, + "step": 8235, + "success_rate.epoch.env.abd": 0.9838709677419355, + "success_rate.epoch.env.agentgym:alfworld": 0.8785046728971962, + "success_rate.epoch.env.agentgym:sciworld": 0.9694323144104804, + "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, + "success_rate.epoch.env.babyai": 0.9487179487179487, + "success_rate.epoch.env.ded": 0.9512195121951219, + "success_rate.epoch.env.logic": 0.9041745730550285, + "success_rate.epoch.env.math": 0.9714857428714357, + "success_rate.epoch.env.sat": 0.16279069767441862, + "success_rate.epoch.env.science": 0.8362847849656668, + "success_rate.epoch.env.webshop": 0.9743589743589743, + "success_rate.epoch.env_macro_mean": 0.8690915323231703, + "success_rate.epoch.global": 0.9002828854314003, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.997014146567718, + "tokens_p.mean_in_band": 0.6876446759259259, + "tokens_rate.above_band": 0.9522968197879859, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.04770318021201413 + }, + { + "epoch": 1.7554324669791224, + "grad_norm": 83.57473081650586, + "learning_rate": 3.6905200898440657e-07, + "loss": 0.2746, + "step": 8240, + "success_rate.epoch.env.abd": 0.9838709677419355, + "success_rate.epoch.env.agentgym:alfworld": 0.8790697674418605, + "success_rate.epoch.env.agentgym:sciworld": 0.9695652173913043, + "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, + "success_rate.epoch.env.babyai": 0.9487179487179487, + "success_rate.epoch.env.ded": 0.9513677811550152, + "success_rate.epoch.env.logic": 0.9042654028436019, + "success_rate.epoch.env.math": 0.9715284715284715, + "success_rate.epoch.env.sat": 0.16279069767441862, + "success_rate.epoch.env.science": 0.8364620938628159, + "success_rate.epoch.env.webshop": 0.9743589743589743, + "success_rate.epoch.env_macro_mean": 0.8691967263075466, + "success_rate.epoch.global": 0.9004237288135594, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9964480048367593, + "tokens_p.mean_in_band": 0.8253348214285714, + "tokens_rate.above_band": 0.9916067146282974, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.008393285371702638 + }, + { + "epoch": 1.7564976565828716, + "grad_norm": 49.97892403907451, + "learning_rate": 3.690187438181805e-07, + "loss": 0.2496, + "step": 8245, + "success_rate.epoch.env.abd": 0.9839228295819936, + "success_rate.epoch.env.agentgym:alfworld": 0.8790697674418605, + "success_rate.epoch.env.agentgym:sciworld": 0.9696969696969697, + "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, + "success_rate.epoch.env.babyai": 0.9487179487179487, + "success_rate.epoch.env.ded": 0.9513677811550152, + "success_rate.epoch.env.logic": 0.9046270066100094, + "success_rate.epoch.env.math": 0.9715284715284715, + "success_rate.epoch.env.sat": 0.16279069767441862, + "success_rate.epoch.env.science": 0.8359177785791562, + "success_rate.epoch.env.webshop": 0.975, + "success_rate.epoch.env_macro_mean": 0.8692550834229557, + "success_rate.epoch.global": 0.9002820874471086, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.science": 0.3333333333333333, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 0.8666666666666666, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.998747723132969, + "tokens_p.mean_in_band": 0.6010044642857143, + "tokens_rate.above_band": 0.987410071942446, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.012589928057553957 + }, + { + "epoch": 1.7575628461866213, + "grad_norm": 114.47787118059497, + "learning_rate": 3.6898547207915873e-07, + "loss": 0.2586, + "step": 8250, + "success_rate.epoch.env.abd": 0.9839228295819936, + "success_rate.epoch.env.agentgym:alfworld": 0.8790697674418605, + "success_rate.epoch.env.agentgym:sciworld": 0.9696969696969697, + "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, + "success_rate.epoch.env.babyai": 0.9487179487179487, + "success_rate.epoch.env.ded": 0.9513677811550152, + "success_rate.epoch.env.logic": 0.9047169811320754, + "success_rate.epoch.env.math": 0.9710578842315369, + "success_rate.epoch.env.sat": 0.16279069767441862, + "success_rate.epoch.env.science": 0.8359712230215828, + "success_rate.epoch.env.webshop": 0.975, + "success_rate.epoch.env_macro_mean": 0.8692253408472791, + "success_rate.epoch.global": 0.9001408450704226, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.5, + "success_rate.window.env.science": 0.8571428571428571, + "success_rate.window.env_macro_mean": 0.7857142857142857, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9939338235294117, + "tokens_p.mean_in_band": 0.489375, + "tokens_rate.above_band": 0.7727272727272727, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.22727272727272727 + }, + { + "epoch": 1.7586280357903705, + "grad_norm": 88.74631440721146, + "learning_rate": 3.6895219379158955e-07, + "loss": 0.3547, + "step": 8255, + "success_rate.epoch.env.abd": 0.9839743589743589, + "success_rate.epoch.env.agentgym:alfworld": 0.8796296296296297, + "success_rate.epoch.env.agentgym:sciworld": 0.9696969696969697, + "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, + "success_rate.epoch.env.babyai": 0.9487179487179487, + "success_rate.epoch.env.ded": 0.9513677811550152, + "success_rate.epoch.env.logic": 0.9038642789820923, + "success_rate.epoch.env.math": 0.9711011459890384, + "success_rate.epoch.env.sat": 0.16279069767441862, + "success_rate.epoch.env.science": 0.8358477011494253, + "success_rate.epoch.env.webshop": 0.975, + "success_rate.epoch.env_macro_mean": 0.8691961071486877, + "success_rate.epoch.global": 0.9, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.75, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9997195512820513, + "tokens_p.mean_in_band": 0.6153927364864865, + "tokens_rate.above_band": 0.9547123623011016, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.04528763769889841 + }, + { + "epoch": 1.7596932253941202, + "grad_norm": 58.862840691141415, + "learning_rate": 3.689189089797258e-07, + "loss": 0.3514, + "step": 8260, + "success_rate.epoch.env.abd": 0.9840764331210191, + "success_rate.epoch.env.agentgym:alfworld": 0.8796296296296297, + "success_rate.epoch.env.agentgym:sciworld": 0.9696969696969697, + "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, + "success_rate.epoch.env.babyai": 0.9487179487179487, + "success_rate.epoch.env.ded": 0.9513677811550152, + "success_rate.epoch.env.logic": 0.903954802259887, + "success_rate.epoch.env.math": 0.9706467661691542, + "success_rate.epoch.env.sat": 0.16279069767441862, + "success_rate.epoch.env.science": 0.8360832137733142, + "success_rate.epoch.env.webshop": 0.975, + "success_rate.epoch.env_macro_mean": 0.8691937189876385, + "success_rate.epoch.global": 0.9, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.6666666666666666, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9946446572580645, + "tokens_p.mean_in_band": 0.7691127232142857, + "tokens_rate.above_band": 0.9860834990059643, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.013916500994035786 + }, + { + "epoch": 1.7607584149978697, + "grad_norm": 325.74195709203667, + "learning_rate": 3.6888561766782534e-07, + "loss": 0.3825, + "step": 8265, + "success_rate.epoch.env.abd": 0.9840764331210191, + "success_rate.epoch.env.agentgym:alfworld": 0.8796296296296297, + "success_rate.epoch.env.agentgym:sciworld": 0.9696969696969697, + "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, + "success_rate.epoch.env.babyai": 0.9487179487179487, + "success_rate.epoch.env.ded": 0.9513677811550152, + "success_rate.epoch.env.logic": 0.9040451552210724, + "success_rate.epoch.env.math": 0.9707196029776675, + "success_rate.epoch.env.sat": 0.16279069767441862, + "success_rate.epoch.env.science": 0.8363180515759312, + "success_rate.epoch.env.webshop": 0.975, + "success_rate.epoch.env_macro_mean": 0.8692299033123945, + "success_rate.epoch.global": 0.900140252454418, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.99375, + "tokens_p.mean_in_band": 0.7176649305555556, + "tokens_rate.above_band": 0.898876404494382, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.10112359550561797 + }, + { + "epoch": 1.7618236046016191, + "grad_norm": 139.83387070271536, + "learning_rate": 3.688523198801505e-07, + "loss": 0.2471, + "step": 8270, + "success_rate.epoch.env.abd": 0.9840764331210191, + "success_rate.epoch.env.agentgym:alfworld": 0.8796296296296297, + "success_rate.epoch.env.agentgym:sciworld": 0.9698275862068966, + "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, + "success_rate.epoch.env.babyai": 0.9487179487179487, + "success_rate.epoch.env.ded": 0.9513677811550152, + "success_rate.epoch.env.logic": 0.9033771106941839, + "success_rate.epoch.env.math": 0.9707486365889936, + "success_rate.epoch.env.sat": 0.16279069767441862, + "success_rate.epoch.env.science": 0.836552217453505, + "success_rate.epoch.env.webshop": 0.975, + "success_rate.epoch.env_macro_mean": 0.8692049734462071, + "success_rate.epoch.global": 0.900140056022409, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 0.6666666666666666, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9941737288135594, + "tokens_p.mean_in_band": 0.73828125, + "tokens_rate.above_band": 0.9609120521172638, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03908794788273615 + }, + { + "epoch": 1.7628887942053686, + "grad_norm": 174.87619621200622, + "learning_rate": 3.6881901564096864e-07, + "loss": 0.2019, + "step": 8275, + "success_rate.epoch.env.abd": 0.9840764331210191, + "success_rate.epoch.env.agentgym:alfworld": 0.880184331797235, + "success_rate.epoch.env.agentgym:sciworld": 0.9699570815450643, + "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, + "success_rate.epoch.env.babyai": 0.9487179487179487, + "success_rate.epoch.env.ded": 0.9513677811550152, + "success_rate.epoch.env.logic": 0.9034676663542643, + "success_rate.epoch.env.math": 0.9707776126795443, + "success_rate.epoch.env.sat": 0.16279069767441862, + "success_rate.epoch.env.science": 0.8368439842913246, + "success_rate.epoch.env.webshop": 0.975, + "success_rate.epoch.env_macro_mean": 0.8693045640002275, + "success_rate.epoch.global": 0.9002797202797203, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9976500659630607, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 1.763953983809118, + "grad_norm": 477.69093397229483, + "learning_rate": 3.6878570497455147e-07, + "loss": 0.3575, + "step": 8280, + "success_rate.epoch.env.abd": 0.9840764331210191, + "success_rate.epoch.env.agentgym:alfworld": 0.880184331797235, + "success_rate.epoch.env.agentgym:sciworld": 0.9702127659574468, + "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, + "success_rate.epoch.env.babyai": 0.9487179487179487, + "success_rate.epoch.env.ded": 0.9513677811550152, + "success_rate.epoch.env.logic": 0.9034676663542643, + "success_rate.epoch.env.math": 0.9703557312252964, + "success_rate.epoch.env.sat": 0.16279069767441862, + "success_rate.epoch.env.science": 0.837018544935806, + "success_rate.epoch.env.webshop": 0.975, + "success_rate.epoch.env_macro_mean": 0.869305324327738, + "success_rate.epoch.global": 0.9002793296089385, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 0.8, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9985733695652174, + "tokens_p.mean_in_band": 0.61328125, + "tokens_rate.above_band": 0.9913793103448276, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.008620689655172414 + }, + { + "epoch": 1.7650191734128675, + "grad_norm": 115.24093305979926, + "learning_rate": 3.687523879051757e-07, + "loss": 0.1575, + "step": 8285, + "success_rate.epoch.env.abd": 0.9840764331210191, + "success_rate.epoch.env.agentgym:alfworld": 0.8807339449541285, + "success_rate.epoch.env.agentgym:sciworld": 0.9702127659574468, + "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, + "success_rate.epoch.env.babyai": 0.9487179487179487, + "success_rate.epoch.env.ded": 0.9516616314199395, + "success_rate.epoch.env.logic": 0.903558052434457, + "success_rate.epoch.env.math": 0.9704142011834319, + "success_rate.epoch.env.sat": 0.1590909090909091, + "success_rate.epoch.env.science": 0.8370766488413547, + "success_rate.epoch.env.webshop": 0.975, + "success_rate.epoch.env_macro_mean": 0.8690644729443001, + "success_rate.epoch.global": 0.900278940027894, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8333333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9984327983951855, + "tokens_p.mean_in_band": 0.7261029411764706, + "tokens_rate.above_band": 0.9832347140039448, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.016765285996055226 + }, + { + "epoch": 1.766084363016617, + "grad_norm": 150.02177723714118, + "learning_rate": 3.687190644571225e-07, + "loss": 0.4813, + "step": 8290, + "success_rate.epoch.env.abd": 0.9840764331210191, + "success_rate.epoch.env.agentgym:alfworld": 0.8807339449541285, + "success_rate.epoch.env.agentgym:sciworld": 0.9702127659574468, + "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, + "success_rate.epoch.env.babyai": 0.9487179487179487, + "success_rate.epoch.env.ded": 0.9519519519519519, + "success_rate.epoch.env.logic": 0.9036482694106641, + "success_rate.epoch.env.math": 0.9704724409448819, + "success_rate.epoch.env.sat": 0.1590909090909091, + "success_rate.epoch.env.science": 0.8368945868945868, + "success_rate.epoch.env.webshop": 0.975, + "success_rate.epoch.env_macro_mean": 0.8690878107009276, + "success_rate.epoch.global": 0.9002785515320334, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.998146186440678, + "tokens_p.mean_in_band": 0.4661458333333333, + "tokens_rate.above_band": 0.963265306122449, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.036734693877551024 + }, + { + "epoch": 1.7671495526203664, + "grad_norm": 120.31289882633322, + "learning_rate": 3.686857346546778e-07, + "loss": 0.4033, + "step": 8295, + "success_rate.epoch.env.abd": 0.9840764331210191, + "success_rate.epoch.env.agentgym:alfworld": 0.8767123287671232, + "success_rate.epoch.env.agentgym:sciworld": 0.9702127659574468, + "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, + "success_rate.epoch.env.babyai": 0.9487179487179487, + "success_rate.epoch.env.ded": 0.9519519519519519, + "success_rate.epoch.env.logic": 0.9039179104477612, + "success_rate.epoch.env.math": 0.9705304518664047, + "success_rate.epoch.env.sat": 0.1590909090909091, + "success_rate.epoch.env.science": 0.8370106761565836, + "success_rate.epoch.env.webshop": 0.975, + "success_rate.epoch.env_macro_mean": 0.8687625493403469, + "success_rate.epoch.global": 0.9002781641168289, + "success_rate.window.env.agentgym:alfworld": 0.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.75, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9997359154929577, + "tokens_p.mean_in_band": 0.5966796875, + "tokens_rate.above_band": 0.9943977591036415, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0056022408963585435 + }, + { + "epoch": 1.768214742224116, + "grad_norm": 129.2118145999637, + "learning_rate": 3.686523985221321e-07, + "loss": 0.3394, + "step": 8300, + "success_rate.epoch.env.abd": 0.9840764331210191, + "success_rate.epoch.env.agentgym:alfworld": 0.8772727272727273, + "success_rate.epoch.env.agentgym:sciworld": 0.9703389830508474, + "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, + "success_rate.epoch.env.babyai": 0.9487179487179487, + "success_rate.epoch.env.ded": 0.9520958083832335, + "success_rate.epoch.env.logic": 0.9030754892823858, + "success_rate.epoch.env.math": 0.9705738106915155, + "success_rate.epoch.env.sat": 0.1590909090909091, + "success_rate.epoch.env.science": 0.8371845005332386, + "success_rate.epoch.env.webshop": 0.975, + "success_rate.epoch.env_macro_mean": 0.8687812069827718, + "success_rate.epoch.global": 0.9002777777777777, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8333333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 1.000803755144033, + "tokens_p.mean_in_band": 0.6566611842105263, + "tokens_rate.above_band": 0.9808274470232089, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.01917255297679112 + }, + { + "epoch": 1.7692799318278654, + "grad_norm": 74.52794628196288, + "learning_rate": 3.686190560837805e-07, + "loss": 0.2688, + "step": 8305, + "success_rate.epoch.env.abd": 0.9840764331210191, + "success_rate.epoch.env.agentgym:alfworld": 0.8738738738738738, + "success_rate.epoch.env.agentgym:sciworld": 0.9703389830508474, + "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, + "success_rate.epoch.env.babyai": 0.9487179487179487, + "success_rate.epoch.env.ded": 0.9522388059701492, + "success_rate.epoch.env.logic": 0.9032558139534884, + "success_rate.epoch.env.math": 0.9705882352941176, + "success_rate.epoch.env.sat": 0.1590909090909091, + "success_rate.epoch.env.science": 0.8374156904508342, + "success_rate.epoch.env.webshop": 0.975, + "success_rate.epoch.env_macro_mean": 0.8685239418354412, + "success_rate.epoch.global": 0.9002773925104022, + "success_rate.window.env.agentgym:alfworld": 0.5, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9989945302445302, + "tokens_p.mean_in_band": 0.7467830882352942, + "tokens_rate.above_band": 0.9785894206549118, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.021410579345088162 + }, + { + "epoch": 1.7703451214316148, + "grad_norm": 87.47326385592673, + "learning_rate": 3.685857073639228e-07, + "loss": 0.2358, + "step": 8310, + "success_rate.epoch.env.abd": 0.9841269841269841, + "success_rate.epoch.env.agentgym:alfworld": 0.8738738738738738, + "success_rate.epoch.env.agentgym:sciworld": 0.9704641350210971, + "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, + "success_rate.epoch.env.babyai": 0.9487179487179487, + "success_rate.epoch.env.ded": 0.9523809523809523, + "success_rate.epoch.env.logic": 0.903435468895079, + "success_rate.epoch.env.math": 0.9706026457618814, + "success_rate.epoch.env.sat": 0.1590909090909091, + "success_rate.epoch.env.science": 0.837646224742999, + "success_rate.epoch.env.webshop": 0.975, + "success_rate.epoch.env_macro_mean": 0.8685914372071264, + "success_rate.epoch.global": 0.900415512465374, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 1.000840227507756, + "tokens_p.mean_in_band": 0.892578125, + "tokens_rate.above_band": 0.9979360165118679, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0020639834881320948 + }, + { + "epoch": 1.7714103110353643, + "grad_norm": 124.70343574185186, + "learning_rate": 3.6855235238686325e-07, + "loss": 0.423, + "step": 8315, + "success_rate.epoch.env.abd": 0.9841772151898734, + "success_rate.epoch.env.agentgym:alfworld": 0.8738738738738738, + "success_rate.epoch.env.agentgym:sciworld": 0.9704641350210971, + "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, + "success_rate.epoch.env.babyai": 0.9487179487179487, + "success_rate.epoch.env.ded": 0.9525222551928784, + "success_rate.epoch.env.logic": 0.9035250463821892, + "success_rate.epoch.env.math": 0.9706314243759178, + "success_rate.epoch.env.sat": 0.1590909090909091, + "success_rate.epoch.env.science": 0.83793347487615, + "success_rate.epoch.env.webshop": 0.975, + "success_rate.epoch.env_macro_mean": 0.8686457226715912, + "success_rate.epoch.global": 0.9005532503457815, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9953648325358851, + "tokens_p.mean_in_band": 0.7083333333333334, + "tokens_rate.above_band": 0.9720930232558139, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.027906976744186046 + }, + { + "epoch": 1.7724755006391137, + "grad_norm": 202.3068292737047, + "learning_rate": 3.685189911769108e-07, + "loss": 0.3005, + "step": 8320, + "success_rate.epoch.env.abd": 0.9842767295597484, + "success_rate.epoch.env.agentgym:alfworld": 0.8738738738738738, + "success_rate.epoch.env.agentgym:sciworld": 0.9704641350210971, + "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, + "success_rate.epoch.env.babyai": 0.9487179487179487, + "success_rate.epoch.env.ded": 0.9525222551928784, + "success_rate.epoch.env.logic": 0.9037927844588344, + "success_rate.epoch.env.math": 0.9706888128969223, + "success_rate.epoch.env.sat": 0.1590909090909091, + "success_rate.epoch.env.science": 0.8379908029713478, + "success_rate.epoch.env.webshop": 0.975, + "success_rate.epoch.env_macro_mean": 0.8686895380409296, + "success_rate.epoch.global": 0.9006906077348066, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.996919014084507, + "tokens_p.mean_in_band": 0.759765625, + "tokens_rate.above_band": 0.9861111111111112, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.013888888888888888 + }, + { + "epoch": 1.7735406902428632, + "grad_norm": 25.208703512723403, + "learning_rate": 3.684856237583787e-07, + "loss": 0.2872, + "step": 8325, + "success_rate.epoch.env.abd": 0.9842767295597484, + "success_rate.epoch.env.agentgym:alfworld": 0.8738738738738738, + "success_rate.epoch.env.agentgym:sciworld": 0.9704641350210971, + "success_rate.epoch.env.agentgym:textcraft": 0.9795918367346939, + "success_rate.epoch.env.babyai": 0.9487179487179487, + "success_rate.epoch.env.ded": 0.9525222551928784, + "success_rate.epoch.env.logic": 0.9038817005545287, + "success_rate.epoch.env.math": 0.9707602339181286, + "success_rate.epoch.env.sat": 0.1590909090909091, + "success_rate.epoch.env.science": 0.8381625441696113, + "success_rate.epoch.env.webshop": 0.975, + "success_rate.epoch.env_macro_mean": 0.868758378803038, + "success_rate.epoch.global": 0.9008275862068965, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9973152920962199, + "tokens_p.mean_in_band": 0.8736979166666666, + "tokens_rate.above_band": 0.9948717948717949, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.005128205128205128 + }, + { + "epoch": 1.7746058798466127, + "grad_norm": 91.98801952864397, + "learning_rate": 3.6845225015558503e-07, + "loss": 0.4416, + "step": 8330, + "success_rate.epoch.env.abd": 0.9842767295597484, + "success_rate.epoch.env.agentgym:alfworld": 0.8738738738738738, + "success_rate.epoch.env.agentgym:sciworld": 0.9704641350210971, + "success_rate.epoch.env.agentgym:textcraft": 0.9795918367346939, + "success_rate.epoch.env.babyai": 0.9487179487179487, + "success_rate.epoch.env.ded": 0.9526627218934911, + "success_rate.epoch.env.logic": 0.9038817005545287, + "success_rate.epoch.env.math": 0.9708313077297035, + "success_rate.epoch.env.sat": 0.1590909090909091, + "success_rate.epoch.env.science": 0.8380381086803105, + "success_rate.epoch.env.webshop": 0.975, + "success_rate.epoch.env_macro_mean": 0.8687662974414821, + "success_rate.epoch.global": 0.9008264462809917, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9983198924731183, + "tokens_p.mean_in_band": 0.6966145833333334, + "tokens_rate.above_band": 0.9872611464968153, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.012738853503184714 + }, + { + "epoch": 1.7756710694503621, + "grad_norm": 345.2172572080791, + "learning_rate": 3.6841887039285223e-07, + "loss": 0.2875, + "step": 8335, + "success_rate.epoch.env.abd": 0.9842767295597484, + "success_rate.epoch.env.agentgym:alfworld": 0.8738738738738738, + "success_rate.epoch.env.agentgym:sciworld": 0.9704641350210971, + "success_rate.epoch.env.agentgym:textcraft": 0.9795918367346939, + "success_rate.epoch.env.babyai": 0.9487179487179487, + "success_rate.epoch.env.ded": 0.9526627218934911, + "success_rate.epoch.env.logic": 0.9039704524469068, + "success_rate.epoch.env.math": 0.9708879184861717, + "success_rate.epoch.env.sat": 0.1590909090909091, + "success_rate.epoch.env.science": 0.8383233532934131, + "success_rate.epoch.env.webshop": 0.975, + "success_rate.epoch.env_macro_mean": 0.8688054435562048, + "success_rate.epoch.global": 0.9009628610729024, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9948694029850746, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 1.7767362590541116, + "grad_norm": 46.50858499775321, + "learning_rate": 3.683854844945071e-07, + "loss": 0.2299, + "step": 8340, + "success_rate.epoch.env.abd": 0.9842767295597484, + "success_rate.epoch.env.agentgym:alfworld": 0.8738738738738738, + "success_rate.epoch.env.agentgym:sciworld": 0.9704641350210971, + "success_rate.epoch.env.agentgym:textcraft": 0.9795918367346939, + "success_rate.epoch.env.babyai": 0.9487179487179487, + "success_rate.epoch.env.ded": 0.9526627218934911, + "success_rate.epoch.env.logic": 0.9040590405904059, + "success_rate.epoch.env.math": 0.9709583736689255, + "success_rate.epoch.env.sat": 0.1590909090909091, + "success_rate.epoch.env.science": 0.838494018296974, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.8688908494132388, + "success_rate.epoch.global": 0.9010989010989011, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9960227272727272, + "tokens_p.mean_in_band": 0.888671875, + "tokens_rate.above_band": 0.9927797833935018, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.007220216606498195 + }, + { + "epoch": 1.777801448657861, + "grad_norm": 42.75726907247912, + "learning_rate": 3.683520924848812e-07, + "loss": 0.1527, + "step": 8345, + "success_rate.epoch.env.abd": 0.9842767295597484, + "success_rate.epoch.env.agentgym:alfworld": 0.8699551569506726, + "success_rate.epoch.env.agentgym:sciworld": 0.9704641350210971, + "success_rate.epoch.env.agentgym:textcraft": 0.9795918367346939, + "success_rate.epoch.env.babyai": 0.9487179487179487, + "success_rate.epoch.env.ded": 0.9526627218934911, + "success_rate.epoch.env.logic": 0.9042357274401474, + "success_rate.epoch.env.math": 0.9710144927536232, + "success_rate.epoch.env.sat": 0.1590909090909091, + "success_rate.epoch.env.science": 0.8386643233743409, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.8685712488758394, + "success_rate.epoch.global": 0.9010973936899863, + "success_rate.window.env.agentgym:alfworld": 0.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.75, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9985119047619048, + "tokens_p.mean_in_band": 0.66171875, + "tokens_rate.above_band": 0.9861878453038674, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.013812154696132596 + }, + { + "epoch": 1.7788666382616105, + "grad_norm": 56.75882220070855, + "learning_rate": 3.683186943883103e-07, + "loss": 0.2209, + "step": 8350, + "success_rate.epoch.env.abd": 0.9842767295597484, + "success_rate.epoch.env.agentgym:alfworld": 0.8711111111111111, + "success_rate.epoch.env.agentgym:sciworld": 0.9704641350210971, + "success_rate.epoch.env.agentgym:textcraft": 0.9795918367346939, + "success_rate.epoch.env.babyai": 0.95, + "success_rate.epoch.env.ded": 0.9526627218934911, + "success_rate.epoch.env.logic": 0.9044117647058824, + "success_rate.epoch.env.math": 0.9710564399421129, + "success_rate.epoch.env.sat": 0.1590909090909091, + "success_rate.epoch.env.science": 0.8387776606954689, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.8688230058956432, + "success_rate.epoch.global": 0.9012328767123288, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9983229712041884, + "tokens_p.mean_in_band": 0.5859375, + "tokens_rate.above_band": 0.9947916666666666, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.005208333333333333 + }, + { + "epoch": 1.7799318278653602, + "grad_norm": 257.8840695143015, + "learning_rate": 3.6828529022913473e-07, + "loss": 0.2682, + "step": 8355, + "success_rate.epoch.env.abd": 0.9843260188087775, + "success_rate.epoch.env.agentgym:alfworld": 0.8711111111111111, + "success_rate.epoch.env.agentgym:sciworld": 0.9704641350210971, + "success_rate.epoch.env.agentgym:textcraft": 0.9795918367346939, + "success_rate.epoch.env.babyai": 0.95, + "success_rate.epoch.env.ded": 0.9526627218934911, + "success_rate.epoch.env.logic": 0.9045871559633027, + "success_rate.epoch.env.math": 0.9711121810303323, + "success_rate.epoch.env.sat": 0.1590909090909091, + "success_rate.epoch.env.science": 0.8385964912280702, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.8688320288163041, + "success_rate.epoch.global": 0.9012311901504788, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9950810185185185, + "tokens_p.mean_in_band": 0.596875, + "tokens_rate.above_band": 0.9557522123893806, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.04424778761061947 + }, + { + "epoch": 1.7809970174691094, + "grad_norm": 130.47379827753818, + "learning_rate": 3.6825188003169917e-07, + "loss": 0.2674, + "step": 8360, + "success_rate.epoch.env.abd": 0.984375, + "success_rate.epoch.env.agentgym:alfworld": 0.8711111111111111, + "success_rate.epoch.env.agentgym:sciworld": 0.9704641350210971, + "success_rate.epoch.env.agentgym:textcraft": 0.9795918367346939, + "success_rate.epoch.env.babyai": 0.95, + "success_rate.epoch.env.ded": 0.9528023598820059, + "success_rate.epoch.env.logic": 0.9046746104491292, + "success_rate.epoch.env.math": 0.9711538461538461, + "success_rate.epoch.env.sat": 0.1590909090909091, + "success_rate.epoch.env.science": 0.8388227049754731, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.8688814790468932, + "success_rate.epoch.global": 0.9013661202185792, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9971590909090909, + "tokens_p.mean_in_band": 0.88671875, + "tokens_rate.above_band": 0.9930555555555556, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.006944444444444444 + }, + { + "epoch": 1.7820622070728591, + "grad_norm": 83.52970371647345, + "learning_rate": 3.6821846382035266e-07, + "loss": 0.2227, + "step": 8365, + "success_rate.epoch.env.abd": 0.9844236760124611, + "success_rate.epoch.env.agentgym:alfworld": 0.8711111111111111, + "success_rate.epoch.env.agentgym:sciworld": 0.9704641350210971, + "success_rate.epoch.env.agentgym:textcraft": 0.9795918367346939, + "success_rate.epoch.env.babyai": 0.95, + "success_rate.epoch.env.ded": 0.9529411764705882, + "success_rate.epoch.env.logic": 0.9047619047619048, + "success_rate.epoch.env.math": 0.971195391262602, + "success_rate.epoch.env.sat": 0.1590909090909091, + "success_rate.epoch.env.science": 0.8386983904828551, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.8688989351859803, + "success_rate.epoch.global": 0.9013642564802182, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.997633495145631, + "tokens_p.mean_in_band": 0.5611049107142857, + "tokens_rate.above_band": 0.9735349716446124, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.026465028355387523 + }, + { + "epoch": 1.7831273966766084, + "grad_norm": 55.241273610000256, + "learning_rate": 3.681850416194489e-07, + "loss": 0.2441, + "step": 8370, + "success_rate.epoch.env.abd": 0.9844236760124611, + "success_rate.epoch.env.agentgym:alfworld": 0.8716814159292036, + "success_rate.epoch.env.agentgym:sciworld": 0.9705882352941176, + "success_rate.epoch.env.agentgym:textcraft": 0.9795918367346939, + "success_rate.epoch.env.babyai": 0.95, + "success_rate.epoch.env.ded": 0.9529411764705882, + "success_rate.epoch.env.logic": 0.9048490393412626, + "success_rate.epoch.env.math": 0.9712368168744008, + "success_rate.epoch.env.sat": 0.1590909090909091, + "success_rate.epoch.env.science": 0.8389238294898672, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.868994244666824, + "success_rate.epoch.global": 0.9014986376021799, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9962143705463183, + "tokens_p.mean_in_band": 0.6943359375, + "tokens_rate.above_band": 0.9952718676122931, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.004728132387706856 + }, + { + "epoch": 1.784192586280358, + "grad_norm": 92.67297601578905, + "learning_rate": 3.6815161345334553e-07, + "loss": 0.4485, + "step": 8375, + "success_rate.epoch.env.abd": 0.984472049689441, + "success_rate.epoch.env.agentgym:alfworld": 0.8722466960352423, + "success_rate.epoch.env.agentgym:sciworld": 0.9705882352941176, + "success_rate.epoch.env.agentgym:textcraft": 0.9795918367346939, + "success_rate.epoch.env.babyai": 0.95, + "success_rate.epoch.env.ded": 0.9529411764705882, + "success_rate.epoch.env.logic": 0.9050228310502283, + "success_rate.epoch.env.math": 0.9712505989458553, + "success_rate.epoch.env.sat": 0.15555555555555556, + "success_rate.epoch.env.science": 0.8391486392184229, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.8687661250083368, + "success_rate.epoch.global": 0.9014965986394557, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8333333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9969444444444444, + "tokens_p.mean_in_band": 0.469970703125, + "tokens_rate.above_band": 0.9656652360515021, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.034334763948497854 + }, + { + "epoch": 1.7852577758841073, + "grad_norm": 48.337398003004665, + "learning_rate": 3.681181793464049e-07, + "loss": 0.3813, + "step": 8380, + "success_rate.epoch.env.abd": 0.984472049689441, + "success_rate.epoch.env.agentgym:alfworld": 0.8728070175438597, + "success_rate.epoch.env.agentgym:sciworld": 0.9707112970711297, + "success_rate.epoch.env.agentgym:textcraft": 0.9795918367346939, + "success_rate.epoch.env.babyai": 0.95, + "success_rate.epoch.env.ded": 0.9529411764705882, + "success_rate.epoch.env.logic": 0.9051959890610757, + "success_rate.epoch.env.math": 0.97131931166348, + "success_rate.epoch.env.sat": 0.15555555555555556, + "success_rate.epoch.env.science": 0.8392047436344611, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.8688553394110768, + "success_rate.epoch.global": 0.9016304347826087, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9987623762376238, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 1.786322965487857, + "grad_norm": 498.04105566005234, + "learning_rate": 3.680847393229935e-07, + "loss": 0.2806, + "step": 8385, + "success_rate.epoch.env.abd": 0.9845201238390093, + "success_rate.epoch.env.agentgym:alfworld": 0.8733624454148472, + "success_rate.epoch.env.agentgym:sciworld": 0.9707112970711297, + "success_rate.epoch.env.agentgym:textcraft": 0.9803921568627451, + "success_rate.epoch.env.babyai": 0.95, + "success_rate.epoch.env.ded": 0.9529411764705882, + "success_rate.epoch.env.logic": 0.9052823315118397, + "success_rate.epoch.env.math": 0.9713467048710601, + "success_rate.epoch.env.sat": 0.15555555555555556, + "success_rate.epoch.env.science": 0.8390243902439024, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.8689769034489306, + "success_rate.epoch.global": 0.9016282225237449, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9444444444444443, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9995903558052435, + "tokens_p.mean_in_band": 0.345703125, + "tokens_rate.above_band": 0.9925650557620818, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.007434944237918215 + }, + { + "epoch": 1.7873881550916062, + "grad_norm": 59.349432049608815, + "learning_rate": 3.680512934074822e-07, + "loss": 0.2393, + "step": 8390, + "success_rate.epoch.env.abd": 0.9845201238390093, + "success_rate.epoch.env.agentgym:alfworld": 0.8733624454148472, + "success_rate.epoch.env.agentgym:sciworld": 0.9708333333333333, + "success_rate.epoch.env.agentgym:textcraft": 0.9803921568627451, + "success_rate.epoch.env.babyai": 0.95, + "success_rate.epoch.env.ded": 0.9530791788856305, + "success_rate.epoch.env.logic": 0.9046321525885559, + "success_rate.epoch.env.math": 0.9713876967095851, + "success_rate.epoch.env.sat": 0.15555555555555556, + "success_rate.epoch.env.science": 0.8391364902506964, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.8689553535943197, + "success_rate.epoch.global": 0.9016260162601626, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.6666666666666666, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 1.000140134529148, + "tokens_p.mean_in_band": 0.6315104166666666, + "tokens_rate.above_band": 0.9966480446927374, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0033519553072625698 + }, + { + "epoch": 1.7884533446953559, + "grad_norm": 127.47583311210774, + "learning_rate": 3.680178416242461e-07, + "loss": 0.3413, + "step": 8395, + "success_rate.epoch.env.abd": 0.9845201238390093, + "success_rate.epoch.env.agentgym:alfworld": 0.8733624454148472, + "success_rate.epoch.env.agentgym:sciworld": 0.9708333333333333, + "success_rate.epoch.env.agentgym:textcraft": 0.9803921568627451, + "success_rate.epoch.env.babyai": 0.95, + "success_rate.epoch.env.ded": 0.9530791788856305, + "success_rate.epoch.env.logic": 0.9039855072463768, + "success_rate.epoch.env.math": 0.9714285714285714, + "success_rate.epoch.env.sat": 0.15555555555555556, + "success_rate.epoch.env.science": 0.8386648122392212, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.8688574037184409, + "success_rate.epoch.global": 0.9013531799729364, + "success_rate.window.env.logic": 0.6666666666666666, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.7222222222222222, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9968243740795287, + "tokens_p.mean_in_band": 0.5754206730769231, + "tokens_rate.above_band": 0.9631205673758865, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03687943262411347 + }, + { + "epoch": 1.7895185342991051, + "grad_norm": 73.65532766359051, + "learning_rate": 3.6798438399766464e-07, + "loss": 0.2555, + "step": 8400, + "success_rate.epoch.env.abd": 0.9846153846153847, + "success_rate.epoch.env.agentgym:alfworld": 0.8744588744588745, + "success_rate.epoch.env.agentgym:sciworld": 0.9710743801652892, + "success_rate.epoch.env.agentgym:textcraft": 0.9803921568627451, + "success_rate.epoch.env.babyai": 0.95, + "success_rate.epoch.env.ded": 0.9530791788856305, + "success_rate.epoch.env.logic": 0.9039855072463768, + "success_rate.epoch.env.math": 0.97144217039505, + "success_rate.epoch.env.sat": 0.15555555555555556, + "success_rate.epoch.env.science": 0.8388329281000347, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.8690041720347729, + "success_rate.epoch.global": 0.9014864864864864, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9984517601043025, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 1.7905837239028548, + "grad_norm": 111.28770294088349, + "learning_rate": 3.679509205521215e-07, + "loss": 0.31, + "step": 8405, + "success_rate.epoch.env.abd": 0.9846153846153847, + "success_rate.epoch.env.agentgym:alfworld": 0.875, + "success_rate.epoch.env.agentgym:sciworld": 0.9710743801652892, + "success_rate.epoch.env.agentgym:textcraft": 0.9803921568627451, + "success_rate.epoch.env.babyai": 0.95, + "success_rate.epoch.env.ded": 0.9532163742690059, + "success_rate.epoch.env.logic": 0.9039855072463768, + "success_rate.epoch.env.math": 0.9714693295292439, + "success_rate.epoch.env.sat": 0.15555555555555556, + "success_rate.epoch.env.science": 0.8388214904679376, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.8690672668008271, + "success_rate.epoch.global": 0.9014844804318488, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env_macro_mean": 0.9583333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9969019396551724, + "tokens_p.mean_in_band": 0.46986607142857145, + "tokens_rate.above_band": 0.9851380042462845, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.014861995753715499 + }, + { + "epoch": 1.791648913506604, + "grad_norm": 418.5759581172558, + "learning_rate": 3.679174513120046e-07, + "loss": 0.2362, + "step": 8410, + "success_rate.epoch.env.abd": 0.9847094801223242, + "success_rate.epoch.env.agentgym:alfworld": 0.875, + "success_rate.epoch.env.agentgym:sciworld": 0.9711934156378601, + "success_rate.epoch.env.agentgym:textcraft": 0.9803921568627451, + "success_rate.epoch.env.babyai": 0.95, + "success_rate.epoch.env.ded": 0.9532163742690059, + "success_rate.epoch.env.logic": 0.9041591320072333, + "success_rate.epoch.env.math": 0.9714964370546318, + "success_rate.epoch.env.sat": 0.15555555555555556, + "success_rate.epoch.env.science": 0.83898891966759, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.8691201115704097, + "success_rate.epoch.global": 0.9016172506738545, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9989224137931034, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 1.7927141031103537, + "grad_norm": 96.83658454131306, + "learning_rate": 3.678839763017061e-07, + "loss": 0.2445, + "step": 8415, + "success_rate.epoch.env.abd": 0.9847560975609756, + "success_rate.epoch.env.agentgym:alfworld": 0.875, + "success_rate.epoch.env.agentgym:sciworld": 0.9711934156378601, + "success_rate.epoch.env.agentgym:textcraft": 0.9803921568627451, + "success_rate.epoch.env.babyai": 0.95, + "success_rate.epoch.env.ded": 0.9533527696793003, + "success_rate.epoch.env.logic": 0.9041591320072333, + "success_rate.epoch.env.math": 0.9715504978662873, + "success_rate.epoch.env.sat": 0.15555555555555556, + "success_rate.epoch.env.science": 0.838865836791148, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.8691304743689695, + "success_rate.epoch.global": 0.9016150740242261, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9375, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9948620495495496, + "tokens_p.mean_in_band": 0.64532470703125, + "tokens_rate.above_band": 0.9823008849557522, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.017699115044247787 + }, + { + "epoch": 1.793779292714103, + "grad_norm": 48.72980061970136, + "learning_rate": 3.6785049554562225e-07, + "loss": 0.4237, + "step": 8420, + "success_rate.epoch.env.abd": 0.9847560975609756, + "success_rate.epoch.env.agentgym:alfworld": 0.875, + "success_rate.epoch.env.agentgym:sciworld": 0.9711934156378601, + "success_rate.epoch.env.agentgym:textcraft": 0.9803921568627451, + "success_rate.epoch.env.babyai": 0.95, + "success_rate.epoch.env.ded": 0.9534883720930233, + "success_rate.epoch.env.logic": 0.9034296028880866, + "success_rate.epoch.env.math": 0.9716043539990534, + "success_rate.epoch.env.sat": 0.15555555555555556, + "success_rate.epoch.env.science": 0.8390328151986183, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.869096556899407, + "success_rate.epoch.global": 0.9016129032258065, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9976929530201343, + "tokens_p.mean_in_band": 0.5660511363636364, + "tokens_rate.above_band": 0.9793427230046948, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.020657276995305163 + }, + { + "epoch": 1.7948444823178527, + "grad_norm": 51.40452171372572, + "learning_rate": 3.678170090681537e-07, + "loss": 0.3246, + "step": 8425, + "success_rate.epoch.env.abd": 0.9847560975609756, + "success_rate.epoch.env.agentgym:alfworld": 0.875, + "success_rate.epoch.env.agentgym:sciworld": 0.9711934156378601, + "success_rate.epoch.env.agentgym:textcraft": 0.9803921568627451, + "success_rate.epoch.env.babyai": 0.95, + "success_rate.epoch.env.ded": 0.9534883720930233, + "success_rate.epoch.env.logic": 0.9035166816952209, + "success_rate.epoch.env.math": 0.9716580066131318, + "success_rate.epoch.env.sat": 0.15217391304347827, + "success_rate.epoch.env.science": 0.838909968954812, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.8687907607780734, + "success_rate.epoch.global": 0.901476510067114, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.6875, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9953972868217055, + "tokens_p.mean_in_band": 0.6076171875, + "tokens_rate.above_band": 0.8657718120805369, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.1342281879194631 + }, + { + "epoch": 1.795909671921602, + "grad_norm": 58.47596334101638, + "learning_rate": 3.677835168937052e-07, + "loss": 0.3301, + "step": 8430, + "success_rate.epoch.env.abd": 0.9847560975609756, + "success_rate.epoch.env.agentgym:alfworld": 0.875, + "success_rate.epoch.env.agentgym:sciworld": 0.9711934156378601, + "success_rate.epoch.env.agentgym:textcraft": 0.9803921568627451, + "success_rate.epoch.env.babyai": 0.95, + "success_rate.epoch.env.ded": 0.9536231884057971, + "success_rate.epoch.env.logic": 0.9027902790279028, + "success_rate.epoch.env.math": 0.971253534401508, + "success_rate.epoch.env.sat": 0.15217391304347827, + "success_rate.epoch.env.science": 0.8386763185108583, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.8686789690498805, + "success_rate.epoch.global": 0.9012064343163538, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 0.8, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.7, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9959610133495146, + "tokens_p.mean_below_band": 8.866190910339355e-07, + "tokens_p.mean_in_band": 0.3921875, + "tokens_rate.above_band": 0.9751479289940829, + "tokens_rate.below_band": 0.001183431952662722, + "tokens_rate.in_band": 0.023668639053254437 + }, + { + "epoch": 1.7969748615253516, + "grad_norm": 219.06314510997606, + "learning_rate": 3.6775001904668545e-07, + "loss": 0.3121, + "step": 8435, + "success_rate.epoch.env.abd": 0.9848024316109423, + "success_rate.epoch.env.agentgym:alfworld": 0.875, + "success_rate.epoch.env.agentgym:sciworld": 0.9711934156378601, + "success_rate.epoch.env.agentgym:textcraft": 0.9803921568627451, + "success_rate.epoch.env.babyai": 0.95, + "success_rate.epoch.env.ded": 0.9536231884057971, + "success_rate.epoch.env.logic": 0.9028776978417267, + "success_rate.epoch.env.math": 0.971307619943556, + "success_rate.epoch.env.sat": 0.15217391304347827, + "success_rate.epoch.env.science": 0.8388984509466437, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.8687162391263917, + "success_rate.epoch.global": 0.9013386880856761, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9918478260869565, + "tokens_p.mean_in_band": 0.7734375, + "tokens_rate.above_band": 0.9019607843137255, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.09803921568627451 + }, + { + "epoch": 1.7980400511291008, + "grad_norm": 39.81884928918224, + "learning_rate": 3.6771651555150746e-07, + "loss": 0.3656, + "step": 8440, + "success_rate.epoch.env.abd": 0.9848484848484849, + "success_rate.epoch.env.agentgym:alfworld": 0.875, + "success_rate.epoch.env.agentgym:sciworld": 0.9711934156378601, + "success_rate.epoch.env.agentgym:textcraft": 0.9803921568627451, + "success_rate.epoch.env.babyai": 0.95, + "success_rate.epoch.env.ded": 0.9536231884057971, + "success_rate.epoch.env.logic": 0.9013452914798207, + "success_rate.epoch.env.math": 0.9713211095439587, + "success_rate.epoch.env.sat": 0.15217391304347827, + "success_rate.epoch.env.science": 0.8388316151202749, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.8685762664581801, + "success_rate.epoch.global": 0.9010695187165776, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 0.3333333333333333, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.7833333333333333, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9971942724458205, + "tokens_p.mean_in_band": 0.5909598214285714, + "tokens_rate.above_band": 0.9584569732937686, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.04154302670623145 + }, + { + "epoch": 1.7991052407328505, + "grad_norm": 84.77316424565245, + "learning_rate": 3.676830064325885e-07, + "loss": 0.3008, + "step": 8445, + "success_rate.epoch.env.abd": 0.9848484848484849, + "success_rate.epoch.env.agentgym:alfworld": 0.875, + "success_rate.epoch.env.agentgym:sciworld": 0.9713114754098361, + "success_rate.epoch.env.agentgym:textcraft": 0.9803921568627451, + "success_rate.epoch.env.babyai": 0.95, + "success_rate.epoch.env.ded": 0.9536231884057971, + "success_rate.epoch.env.logic": 0.9013452914798207, + "success_rate.epoch.env.math": 0.9713883677298312, + "success_rate.epoch.env.sat": 0.15217391304347827, + "success_rate.epoch.env.science": 0.8390528483184626, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.8686132256541833, + "success_rate.epoch.global": 0.9012016021361816, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9961168639053254, + "tokens_p.mean_in_band": 0.7174479166666666, + "tokens_rate.above_band": 0.9825581395348837, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.01744186046511628 + }, + { + "epoch": 1.8001704303366, + "grad_norm": 211.15306640923058, + "learning_rate": 3.676494917143496e-07, + "loss": 0.3796, + "step": 8450, + "success_rate.epoch.env.abd": 0.9848484848484849, + "success_rate.epoch.env.agentgym:alfworld": 0.875, + "success_rate.epoch.env.agentgym:sciworld": 0.9714285714285714, + "success_rate.epoch.env.agentgym:textcraft": 0.9803921568627451, + "success_rate.epoch.env.babyai": 0.95, + "success_rate.epoch.env.ded": 0.9536231884057971, + "success_rate.epoch.env.logic": 0.9014336917562724, + "success_rate.epoch.env.math": 0.971441947565543, + "success_rate.epoch.env.sat": 0.15217391304347827, + "success_rate.epoch.env.science": 0.8389307745030843, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.8686256804101397, + "success_rate.epoch.global": 0.9012, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9375, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.99609375, + "tokens_p.mean_in_band": 0.7559344951923077, + "tokens_rate.above_band": 0.9078014184397163, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.09219858156028368 + }, + { + "epoch": 1.8012356199403494, + "grad_norm": 54.855635536562424, + "learning_rate": 3.676159714212161e-07, + "loss": 0.2858, + "step": 8455, + "success_rate.epoch.env.abd": 0.9848484848484849, + "success_rate.epoch.env.agentgym:alfworld": 0.875, + "success_rate.epoch.env.agentgym:sciworld": 0.9714285714285714, + "success_rate.epoch.env.agentgym:textcraft": 0.9807692307692307, + "success_rate.epoch.env.babyai": 0.95, + "success_rate.epoch.env.ded": 0.953757225433526, + "success_rate.epoch.env.logic": 0.9016100178890877, + "success_rate.epoch.env.math": 0.9714686623012161, + "success_rate.epoch.env.sat": 0.15217391304347827, + "success_rate.epoch.env.science": 0.839151266255989, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.8687106480061041, + "success_rate.epoch.global": 0.9013315579227696, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9970737632508834, + "tokens_p.mean_in_band": 0.8058035714285714, + "tokens_rate.above_band": 0.987783595113438, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.012216404886561954 + }, + { + "epoch": 1.8023008095440989, + "grad_norm": 31.371114937652205, + "learning_rate": 3.675824455776174e-07, + "loss": 0.3861, + "step": 8460, + "success_rate.epoch.env.abd": 0.9848484848484849, + "success_rate.epoch.env.agentgym:alfworld": 0.8675213675213675, + "success_rate.epoch.env.agentgym:sciworld": 0.9714285714285714, + "success_rate.epoch.env.agentgym:textcraft": 0.9807692307692307, + "success_rate.epoch.env.babyai": 0.95, + "success_rate.epoch.env.ded": 0.953757225433526, + "success_rate.epoch.env.logic": 0.9016100178890877, + "success_rate.epoch.env.math": 0.9714953271028037, + "success_rate.epoch.env.sat": 0.15217391304347827, + "success_rate.epoch.env.science": 0.8387978142076503, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.8680010643947057, + "success_rate.epoch.global": 0.9009308510638298, + "success_rate.window.env.agentgym:alfworld": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.5555555555555555, + "success_rate.window.global": 0.6, + "tokens_p.mean_above_band": 0.9985965568862275, + "tokens_p.mean_in_band": 0.41158854166666664, + "tokens_rate.above_band": 0.9570200573065902, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.04297994269340974 + }, + { + "epoch": 1.8033659991478483, + "grad_norm": 141.56172827880636, + "learning_rate": 3.6754891420798683e-07, + "loss": 0.3029, + "step": 8465, + "success_rate.epoch.env.abd": 0.9848484848484849, + "success_rate.epoch.env.agentgym:alfworld": 0.864406779661017, + "success_rate.epoch.env.agentgym:sciworld": 0.9714285714285714, + "success_rate.epoch.env.agentgym:textcraft": 0.9807692307692307, + "success_rate.epoch.env.babyai": 0.95, + "success_rate.epoch.env.ded": 0.953757225433526, + "success_rate.epoch.env.logic": 0.9008928571428572, + "success_rate.epoch.env.math": 0.9715086408220458, + "success_rate.epoch.env.sat": 0.15217391304347827, + "success_rate.epoch.env.science": 0.8387316740538697, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.8676479212091494, + "success_rate.epoch.global": 0.90066401062417, + "success_rate.window.env.agentgym:alfworld": 0.5, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.7, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 1.00035140562249, + "tokens_p.mean_in_band": 0.505615234375, + "tokens_rate.above_band": 0.9873116574147502, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.012688342585249802 + }, + { + "epoch": 1.8044311887515978, + "grad_norm": 125.1743919274533, + "learning_rate": 3.6751537733676183e-07, + "loss": 0.2639, + "step": 8470, + "success_rate.epoch.env.abd": 0.984984984984985, + "success_rate.epoch.env.agentgym:alfworld": 0.864406779661017, + "success_rate.epoch.env.agentgym:sciworld": 0.9714285714285714, + "success_rate.epoch.env.agentgym:textcraft": 0.9807692307692307, + "success_rate.epoch.env.babyai": 0.95, + "success_rate.epoch.env.ded": 0.953757225433526, + "success_rate.epoch.env.logic": 0.9009812667261374, + "success_rate.epoch.env.math": 0.971535230984601, + "success_rate.epoch.env.sat": 0.15217391304347827, + "success_rate.epoch.env.science": 0.8389513108614233, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.867690751817321, + "success_rate.epoch.global": 0.9007957559681697, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9971804511278195, + "tokens_p.mean_in_band": 0.865234375, + "tokens_rate.above_band": 0.9708029197080292, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.029197080291970802 + }, + { + "epoch": 1.8054963783553473, + "grad_norm": 136.04580099448177, + "learning_rate": 3.6748183498838383e-07, + "loss": 0.2773, + "step": 8475, + "success_rate.epoch.env.abd": 0.9850299401197605, + "success_rate.epoch.env.agentgym:alfworld": 0.864406779661017, + "success_rate.epoch.env.agentgym:sciworld": 0.9714285714285714, + "success_rate.epoch.env.agentgym:textcraft": 0.9807692307692307, + "success_rate.epoch.env.babyai": 0.95, + "success_rate.epoch.env.ded": 0.9538904899135446, + "success_rate.epoch.env.logic": 0.9011576135351737, + "success_rate.epoch.env.math": 0.9710955710955711, + "success_rate.epoch.env.sat": 0.15217391304347827, + "success_rate.epoch.env.science": 0.8388303298197892, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.8676720177712451, + "success_rate.epoch.global": 0.9006622516556292, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.5, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.85, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.997378587196468, + "tokens_p.mean_in_band": 0.5037006578947368, + "tokens_rate.above_band": 0.9794594594594594, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02054054054054054 + }, + { + "epoch": 1.8065615679590967, + "grad_norm": 349.799537670917, + "learning_rate": 3.6744828718729826e-07, + "loss": 0.3093, + "step": 8480, + "success_rate.epoch.env.abd": 0.9850299401197605, + "success_rate.epoch.env.agentgym:alfworld": 0.864406779661017, + "success_rate.epoch.env.agentgym:sciworld": 0.9715447154471545, + "success_rate.epoch.env.agentgym:textcraft": 0.9807692307692307, + "success_rate.epoch.env.babyai": 0.95, + "success_rate.epoch.env.ded": 0.9511494252873564, + "success_rate.epoch.env.logic": 0.9012455516014235, + "success_rate.epoch.env.math": 0.9711493718008376, + "success_rate.epoch.env.sat": 0.15217391304347827, + "success_rate.epoch.env.science": 0.8383152173913043, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.8673994455653748, + "success_rate.epoch.global": 0.9003968253968254, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 0.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.3333333333333333, + "success_rate.window.env_macro_mean": 0.6666666666666666, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9949678375252964, + "tokens_p.mean_in_band": 0.5573410700363826, + "tokens_rate.above_band": 0.8779187817258883, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.12208121827411167 + }, + { + "epoch": 1.8076267575628462, + "grad_norm": 66.9464921394573, + "learning_rate": 3.674147339579545e-07, + "loss": 0.5047, + "step": 8485, + "success_rate.epoch.env.abd": 0.9850746268656716, + "success_rate.epoch.env.agentgym:alfworld": 0.8649789029535865, + "success_rate.epoch.env.agentgym:sciworld": 0.9715447154471545, + "success_rate.epoch.env.agentgym:textcraft": 0.9807692307692307, + "success_rate.epoch.env.babyai": 0.95, + "success_rate.epoch.env.ded": 0.9511494252873564, + "success_rate.epoch.env.logic": 0.90150842945874, + "success_rate.epoch.env.math": 0.9711761971176197, + "success_rate.epoch.env.sat": 0.15217391304347827, + "success_rate.epoch.env.science": 0.838140481845945, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.8674659708078494, + "success_rate.epoch.global": 0.9003963011889036, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9992288961038961, + "tokens_p.mean_in_band": 0.6330729166666667, + "tokens_rate.above_band": 0.9808917197452229, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.01910828025477707 + }, + { + "epoch": 1.8086919471665956, + "grad_norm": 105.73409208723615, + "learning_rate": 3.673811753248059e-07, + "loss": 0.1991, + "step": 8490, + "success_rate.epoch.env.abd": 0.9851190476190477, + "success_rate.epoch.env.agentgym:alfworld": 0.8649789029535865, + "success_rate.epoch.env.agentgym:sciworld": 0.9715447154471545, + "success_rate.epoch.env.agentgym:textcraft": 0.9807692307692307, + "success_rate.epoch.env.babyai": 0.95, + "success_rate.epoch.env.ded": 0.9512893982808023, + "success_rate.epoch.env.logic": 0.9016829052258636, + "success_rate.epoch.env.math": 0.9712163416898792, + "success_rate.epoch.env.sat": 0.15217391304347827, + "success_rate.epoch.env.science": 0.8383050847457627, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.8675172087156696, + "success_rate.epoch.global": 0.9005277044854881, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9984439834024896, + "tokens_p.mean_in_band": 0.828125, + "tokens_rate.above_band": 0.9877049180327869, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.012295081967213115 + }, + { + "epoch": 1.809757136770345, + "grad_norm": 87.4635664032167, + "learning_rate": 3.6734761131230987e-07, + "loss": 0.1867, + "step": 8495, + "success_rate.epoch.env.abd": 0.9851632047477745, + "success_rate.epoch.env.agentgym:alfworld": 0.8649789029535865, + "success_rate.epoch.env.agentgym:sciworld": 0.9715447154471545, + "success_rate.epoch.env.agentgym:textcraft": 0.9807692307692307, + "success_rate.epoch.env.babyai": 0.95, + "success_rate.epoch.env.ded": 0.9512893982808023, + "success_rate.epoch.env.logic": 0.9017699115044248, + "success_rate.epoch.env.math": 0.9712830013895322, + "success_rate.epoch.env.sat": 0.15217391304347827, + "success_rate.epoch.env.science": 0.8384693532001355, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.8675501261303346, + "success_rate.epoch.global": 0.9006587615283268, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9969682835820896, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 1.8108223263740946, + "grad_norm": 123.38034576458628, + "learning_rate": 3.673140419449274e-07, + "loss": 0.4046, + "step": 8500, + "success_rate.epoch.env.abd": 0.9851632047477745, + "success_rate.epoch.env.agentgym:alfworld": 0.8649789029535865, + "success_rate.epoch.env.agentgym:sciworld": 0.97165991902834, + "success_rate.epoch.env.agentgym:textcraft": 0.9807692307692307, + "success_rate.epoch.env.babyai": 0.95, + "success_rate.epoch.env.ded": 0.9512893982808023, + "success_rate.epoch.env.logic": 0.9019434628975265, + "success_rate.epoch.env.math": 0.971309578898658, + "success_rate.epoch.env.sat": 0.15217391304347827, + "success_rate.epoch.env.science": 0.8387423935091278, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.8676036145660077, + "success_rate.epoch.global": 0.9007894736842105, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9933467741935483, + "tokens_p.mean_in_band": 0.64453125, + "tokens_rate.above_band": 0.9451219512195121, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.054878048780487805 + }, + { + "epoch": 1.811887515977844, + "grad_norm": 186.8125349768505, + "learning_rate": 3.6728046724712376e-07, + "loss": 0.3676, + "step": 8505, + "success_rate.epoch.env.abd": 0.9851632047477745, + "success_rate.epoch.env.agentgym:alfworld": 0.8613445378151261, + "success_rate.epoch.env.agentgym:sciworld": 0.97165991902834, + "success_rate.epoch.env.agentgym:textcraft": 0.9807692307692307, + "success_rate.epoch.env.babyai": 0.95, + "success_rate.epoch.env.ded": 0.9512893982808023, + "success_rate.epoch.env.logic": 0.9020300088261254, + "success_rate.epoch.env.math": 0.9713361072584373, + "success_rate.epoch.env.sat": 0.15217391304347827, + "success_rate.epoch.env.science": 0.8383940620782726, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.867251830722286, + "success_rate.epoch.global": 0.9005256241787122, + "success_rate.window.env.agentgym:alfworld": 0.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.6666666666666666, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.997314453125, + "tokens_p.mean_in_band": 0.5842927631578947, + "tokens_rate.above_band": 0.9309090909090909, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.06909090909090909 + }, + { + "epoch": 1.8129527055815935, + "grad_norm": 245.92692628209977, + "learning_rate": 3.6724688724336796e-07, + "loss": 0.151, + "step": 8510, + "success_rate.epoch.env.abd": 0.985207100591716, + "success_rate.epoch.env.agentgym:alfworld": 0.8613445378151261, + "success_rate.epoch.env.agentgym:sciworld": 0.97165991902834, + "success_rate.epoch.env.agentgym:textcraft": 0.9807692307692307, + "success_rate.epoch.env.babyai": 0.95, + "success_rate.epoch.env.ded": 0.9512893982808023, + "success_rate.epoch.env.logic": 0.9022026431718062, + "success_rate.epoch.env.math": 0.9713890170742963, + "success_rate.epoch.env.sat": 0.15217391304347827, + "success_rate.epoch.env.science": 0.8385574654533199, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.867291180120516, + "success_rate.epoch.global": 0.9006561679790026, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9884733606557377, + "tokens_p.mean_in_band": 0.884765625, + "tokens_rate.above_band": 0.9838709677419355, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.016129032258064516 + }, + { + "epoch": 1.814017895185343, + "grad_norm": 43.71584633814408, + "learning_rate": 3.672133019581328e-07, + "loss": 0.3417, + "step": 8515, + "success_rate.epoch.env.abd": 0.9852941176470589, + "success_rate.epoch.env.agentgym:alfworld": 0.8613445378151261, + "success_rate.epoch.env.agentgym:sciworld": 0.97165991902834, + "success_rate.epoch.env.agentgym:textcraft": 0.9807692307692307, + "success_rate.epoch.env.babyai": 0.95, + "success_rate.epoch.env.ded": 0.9512893982808023, + "success_rate.epoch.env.logic": 0.9022887323943662, + "success_rate.epoch.env.math": 0.9714153988012909, + "success_rate.epoch.env.sat": 0.15217391304347827, + "success_rate.epoch.env.science": 0.8388290713324361, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.8673340068372444, + "success_rate.epoch.global": 0.9007863695937091, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9946120689655172, + "tokens_p.mean_in_band": 0.7261284722222222, + "tokens_rate.above_band": 0.928, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.072 + }, + { + "epoch": 1.8150830847890924, + "grad_norm": 59.75579659371212, + "learning_rate": 3.671797114158949e-07, + "loss": 0.3303, + "step": 8520, + "success_rate.epoch.env.abd": 0.9853372434017595, + "success_rate.epoch.env.agentgym:alfworld": 0.8613445378151261, + "success_rate.epoch.env.agentgym:sciworld": 0.9717741935483871, + "success_rate.epoch.env.agentgym:textcraft": 0.9807692307692307, + "success_rate.epoch.env.babyai": 0.95, + "success_rate.epoch.env.ded": 0.9514285714285714, + "success_rate.epoch.env.logic": 0.9023746701846965, + "success_rate.epoch.env.math": 0.9714548802946593, + "success_rate.epoch.env.sat": 0.15217391304347827, + "success_rate.epoch.env.science": 0.8389374579690653, + "success_rate.epoch.env.webshop": 0.9761904761904762, + "success_rate.epoch.env_macro_mean": 0.8674350158768592, + "success_rate.epoch.global": 0.9009162303664922, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9940124671916011, + "tokens_p.mean_in_band": 0.8109375, + "tokens_rate.above_band": 0.9934810951760105, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.00651890482398957 + }, + { + "epoch": 1.8161482743928419, + "grad_norm": 116.2575874490211, + "learning_rate": 3.67146115641135e-07, + "loss": 0.4174, + "step": 8525, + "success_rate.epoch.env.abd": 0.9853801169590644, + "success_rate.epoch.env.agentgym:alfworld": 0.8613445378151261, + "success_rate.epoch.env.agentgym:sciworld": 0.9717741935483871, + "success_rate.epoch.env.agentgym:textcraft": 0.9807692307692307, + "success_rate.epoch.env.babyai": 0.95, + "success_rate.epoch.env.ded": 0.9514285714285714, + "success_rate.epoch.env.logic": 0.9025460930640913, + "success_rate.epoch.env.math": 0.9714811407543699, + "success_rate.epoch.env.sat": 0.1702127659574468, + "success_rate.epoch.env.science": 0.839099764863957, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.8691618728369778, + "success_rate.epoch.global": 0.9010457516339869, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9984296482412061, + "tokens_p.mean_in_band": 0.7900390625, + "tokens_rate.above_band": 0.9900497512437811, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.009950248756218905 + }, + { + "epoch": 1.8172134639965913, + "grad_norm": 65.82815264964785, + "learning_rate": 3.671125146583374e-07, + "loss": 0.1464, + "step": 8530, + "success_rate.epoch.env.abd": 0.9854651162790697, + "success_rate.epoch.env.agentgym:alfworld": 0.8625, + "success_rate.epoch.env.agentgym:sciworld": 0.9717741935483871, + "success_rate.epoch.env.agentgym:textcraft": 0.9807692307692307, + "success_rate.epoch.env.babyai": 0.95, + "success_rate.epoch.env.ded": 0.9514285714285714, + "success_rate.epoch.env.logic": 0.9025460930640913, + "success_rate.epoch.env.math": 0.9714942528735632, + "success_rate.epoch.env.sat": 0.1702127659574468, + "success_rate.epoch.env.science": 0.8393695506371562, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.8693003600549115, + "success_rate.epoch.global": 0.9011749347258485, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9975294729542302, + "tokens_p.mean_in_band": 0.880859375, + "tokens_rate.above_band": 0.9972337482710927, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0027662517289073307 + }, + { + "epoch": 1.8182786536003408, + "grad_norm": 185.3163671114768, + "learning_rate": 3.670789084919902e-07, + "loss": 0.2633, + "step": 8535, + "success_rate.epoch.env.abd": 0.9855072463768116, + "success_rate.epoch.env.agentgym:alfworld": 0.8625, + "success_rate.epoch.env.agentgym:sciworld": 0.9717741935483871, + "success_rate.epoch.env.agentgym:textcraft": 0.9807692307692307, + "success_rate.epoch.env.babyai": 0.9512195121951219, + "success_rate.epoch.env.ded": 0.9514285714285714, + "success_rate.epoch.env.logic": 0.9028021015761821, + "success_rate.epoch.env.math": 0.9715335169880625, + "success_rate.epoch.env.sat": 0.1702127659574468, + "success_rate.epoch.env.science": 0.8394772117962467, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.8694516851529612, + "success_rate.epoch.global": 0.9013037809647979, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9967981557377049, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 1.8193438432040905, + "grad_norm": 119.93006513072066, + "learning_rate": 3.6704529716658537e-07, + "loss": 0.2836, + "step": 8540, + "success_rate.epoch.env.abd": 0.9855072463768116, + "success_rate.epoch.env.agentgym:alfworld": 0.8625, + "success_rate.epoch.env.agentgym:sciworld": 0.9717741935483871, + "success_rate.epoch.env.agentgym:textcraft": 0.9811320754716981, + "success_rate.epoch.env.babyai": 0.9512195121951219, + "success_rate.epoch.env.ded": 0.9514285714285714, + "success_rate.epoch.env.logic": 0.902972027972028, + "success_rate.epoch.env.math": 0.9715726730857405, + "success_rate.epoch.env.sat": 0.1702127659574468, + "success_rate.epoch.env.science": 0.8396921017402945, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.8695232139838737, + "success_rate.epoch.global": 0.9014322916666667, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9956380208333333, + "tokens_p.mean_in_band": 0.8515625, + "tokens_rate.above_band": 0.995850622406639, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.004149377593360996 + }, + { + "epoch": 1.8204090328078397, + "grad_norm": 114.32632587520037, + "learning_rate": 3.6701168070661856e-07, + "loss": 0.2852, + "step": 8545, + "success_rate.epoch.env.abd": 0.9855491329479769, + "success_rate.epoch.env.agentgym:alfworld": 0.8625, + "success_rate.epoch.env.agentgym:sciworld": 0.9717741935483871, + "success_rate.epoch.env.agentgym:textcraft": 0.9811320754716981, + "success_rate.epoch.env.babyai": 0.9512195121951219, + "success_rate.epoch.env.ded": 0.9514285714285714, + "success_rate.epoch.env.logic": 0.903056768558952, + "success_rate.epoch.env.math": 0.9711406321575813, + "success_rate.epoch.env.sat": 0.1702127659574468, + "success_rate.epoch.env.science": 0.8400133600534402, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.8695246543968806, + "success_rate.epoch.global": 0.9014304291287386, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.5, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9932484567901234, + "tokens_p.mean_in_band": 0.7776227678571429, + "tokens_rate.above_band": 0.9204545454545454, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.07954545454545454 + }, + { + "epoch": 1.8214742224115894, + "grad_norm": 83.38526281362576, + "learning_rate": 3.669780591365892e-07, + "loss": 0.2369, + "step": 8550, + "success_rate.epoch.env.abd": 0.9855491329479769, + "success_rate.epoch.env.agentgym:alfworld": 0.8636363636363636, + "success_rate.epoch.env.agentgym:sciworld": 0.9718875502008032, + "success_rate.epoch.env.agentgym:textcraft": 0.9811320754716981, + "success_rate.epoch.env.babyai": 0.9512195121951219, + "success_rate.epoch.env.ded": 0.9514285714285714, + "success_rate.epoch.env.logic": 0.9031413612565445, + "success_rate.epoch.env.math": 0.9711934156378601, + "success_rate.epoch.env.sat": 0.1702127659574468, + "success_rate.epoch.env.science": 0.8401201602136181, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.8696604631811379, + "success_rate.epoch.global": 0.9015584415584416, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9977779878971256, + "tokens_p.mean_in_band": 0.7903645833333334, + "tokens_rate.above_band": 0.9954819277108434, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.004518072289156626 + }, + { + "epoch": 1.8225394120153386, + "grad_norm": 92.90295711310547, + "learning_rate": 3.669444324810006e-07, + "loss": 0.2809, + "step": 8555, + "success_rate.epoch.env.abd": 0.9855907780979827, + "success_rate.epoch.env.agentgym:alfworld": 0.8636363636363636, + "success_rate.epoch.env.agentgym:sciworld": 0.9718875502008032, + "success_rate.epoch.env.agentgym:textcraft": 0.9811320754716981, + "success_rate.epoch.env.babyai": 0.9512195121951219, + "success_rate.epoch.env.ded": 0.9515669515669516, + "success_rate.epoch.env.logic": 0.9031413612565445, + "success_rate.epoch.env.math": 0.9712460063897763, + "success_rate.epoch.env.sat": 0.1702127659574468, + "success_rate.epoch.env.science": 0.8396666666666667, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.8696403834078059, + "success_rate.epoch.global": 0.9014267185473411, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9989708083832335, + "tokens_p.mean_in_band": 0.5501302083333334, + "tokens_rate.above_band": 0.9823529411764705, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.01764705882352941 + }, + { + "epoch": 1.8236046016190883, + "grad_norm": 62.04636971662324, + "learning_rate": 3.6691080076435945e-07, + "loss": 0.1885, + "step": 8560, + "success_rate.epoch.env.abd": 0.9855907780979827, + "success_rate.epoch.env.agentgym:alfworld": 0.8636363636363636, + "success_rate.epoch.env.agentgym:sciworld": 0.9718875502008032, + "success_rate.epoch.env.agentgym:textcraft": 0.9811320754716981, + "success_rate.epoch.env.babyai": 0.9512195121951219, + "success_rate.epoch.env.ded": 0.9515669515669516, + "success_rate.epoch.env.logic": 0.9032258064516129, + "success_rate.epoch.env.math": 0.971285323609845, + "success_rate.epoch.env.sat": 0.1702127659574468, + "success_rate.epoch.env.science": 0.8393213572854291, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.8696202427745241, + "success_rate.epoch.global": 0.9012953367875648, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.8888888888888888, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9932598039215687, + "tokens_p.mean_in_band": 0.58984375, + "tokens_rate.above_band": 0.9357798165137615, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.06422018348623854 + }, + { + "epoch": 1.8246697912228376, + "grad_norm": 80.86503687070869, + "learning_rate": 3.668771640111764e-07, + "loss": 0.3293, + "step": 8565, + "success_rate.epoch.env.abd": 0.985632183908046, + "success_rate.epoch.env.agentgym:alfworld": 0.8636363636363636, + "success_rate.epoch.env.agentgym:sciworld": 0.972, + "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, + "success_rate.epoch.env.babyai": 0.9512195121951219, + "success_rate.epoch.env.ded": 0.9517045454545454, + "success_rate.epoch.env.logic": 0.9033942558746736, + "success_rate.epoch.env.math": 0.9713245334547109, + "success_rate.epoch.env.sat": 0.1702127659574468, + "success_rate.epoch.env.science": 0.8393747921516461, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.8697022381964132, + "success_rate.epoch.global": 0.9014230271668823, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9963235294117647, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 1.8257349808265873, + "grad_norm": 75.49193121156453, + "learning_rate": 3.668435222459656e-07, + "loss": 0.1695, + "step": 8570, + "success_rate.epoch.env.abd": 0.985632183908046, + "success_rate.epoch.env.agentgym:alfworld": 0.8636363636363636, + "success_rate.epoch.env.agentgym:sciworld": 0.972, + "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, + "success_rate.epoch.env.babyai": 0.9512195121951219, + "success_rate.epoch.env.ded": 0.9517045454545454, + "success_rate.epoch.env.logic": 0.9034782608695652, + "success_rate.epoch.env.math": 0.9713766469786461, + "success_rate.epoch.env.sat": 0.1702127659574468, + "success_rate.epoch.env.science": 0.8396414342629482, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.8697388527991524, + "success_rate.epoch.global": 0.9015503875968992, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9877136752136753, + "tokens_p.mean_in_band": 0.7712673611111112, + "tokens_rate.above_band": 0.9285714285714286, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.07142857142857142 + }, + { + "epoch": 1.8268001704303365, + "grad_norm": 147.24169525468326, + "learning_rate": 3.66809875493245e-07, + "loss": 0.275, + "step": 8575, + "success_rate.epoch.env.abd": 0.985632183908046, + "success_rate.epoch.env.agentgym:alfworld": 0.8636363636363636, + "success_rate.epoch.env.agentgym:sciworld": 0.972, + "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, + "success_rate.epoch.env.babyai": 0.9512195121951219, + "success_rate.epoch.env.ded": 0.9518413597733711, + "success_rate.epoch.env.logic": 0.9028620988725065, + "success_rate.epoch.env.math": 0.9714026327734907, + "success_rate.epoch.env.sat": 0.1702127659574468, + "success_rate.epoch.env.science": 0.8398541114058355, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.8697169723681977, + "success_rate.epoch.global": 0.9015483870967742, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.6666666666666666, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9994676853707415, + "tokens_p.mean_in_band": 0.607421875, + "tokens_rate.above_band": 0.9960079840319361, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.003992015968063872 + }, + { + "epoch": 1.8278653600340862, + "grad_norm": 79.5571842198206, + "learning_rate": 3.6677622377753603e-07, + "loss": 0.2458, + "step": 8580, + "success_rate.epoch.env.abd": 0.985632183908046, + "success_rate.epoch.env.agentgym:alfworld": 0.8641975308641975, + "success_rate.epoch.env.agentgym:sciworld": 0.972, + "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, + "success_rate.epoch.env.babyai": 0.9512195121951219, + "success_rate.epoch.env.ded": 0.9518413597733711, + "success_rate.epoch.env.logic": 0.9028620988725065, + "success_rate.epoch.env.math": 0.9714544630720435, + "success_rate.epoch.env.sat": 0.1702127659574468, + "success_rate.epoch.env.science": 0.8397881496193313, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.8697667028900052, + "success_rate.epoch.global": 0.9015463917525773, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9975665983606558, + "tokens_p.mean_in_band": 0.728515625, + "tokens_rate.above_band": 0.976, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.024 + }, + { + "epoch": 1.8289305496378354, + "grad_norm": 154.21009305469008, + "learning_rate": 3.667425671233639e-07, + "loss": 0.6609, + "step": 8585, + "success_rate.epoch.env.abd": 0.9856733524355301, + "success_rate.epoch.env.agentgym:alfworld": 0.8647540983606558, + "success_rate.epoch.env.agentgym:sciworld": 0.972, + "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, + "success_rate.epoch.env.babyai": 0.9512195121951219, + "success_rate.epoch.env.ded": 0.9518413597733711, + "success_rate.epoch.env.logic": 0.9028620988725065, + "success_rate.epoch.env.math": 0.9714673913043478, + "success_rate.epoch.env.sat": 0.1702127659574468, + "success_rate.epoch.env.science": 0.8394980184940555, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.869795842265548, + "success_rate.epoch.global": 0.9014157014157014, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.7142857142857143, + "success_rate.window.env_macro_mean": 0.9285714285714286, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9976388888888889, + "tokens_p.mean_in_band": 0.6024305555555556, + "tokens_rate.above_band": 0.9615384615384616, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.038461538461538464 + }, + { + "epoch": 1.829995739241585, + "grad_norm": 335.7108853351841, + "learning_rate": 3.667089055552573e-07, + "loss": 0.3227, + "step": 8590, + "success_rate.epoch.env.abd": 0.9856733524355301, + "success_rate.epoch.env.agentgym:alfworld": 0.8647540983606558, + "success_rate.epoch.env.agentgym:sciworld": 0.972, + "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, + "success_rate.epoch.env.babyai": 0.9512195121951219, + "success_rate.epoch.env.ded": 0.952112676056338, + "success_rate.epoch.env.logic": 0.902946273830156, + "success_rate.epoch.env.math": 0.9715189873417721, + "success_rate.epoch.env.sat": 0.1702127659574468, + "success_rate.epoch.env.science": 0.8393269548003959, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.8698172989550372, + "success_rate.epoch.global": 0.901413881748072, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9978505291005291, + "tokens_p.mean_in_band": 0.4482421875, + "tokens_rate.above_band": 0.9792746113989638, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02072538860103627 + }, + { + "epoch": 1.8310609288453343, + "grad_norm": 107.95032943813455, + "learning_rate": 3.666752390977485e-07, + "loss": 0.3423, + "step": 8595, + "success_rate.epoch.env.abd": 0.9856733524355301, + "success_rate.epoch.env.agentgym:alfworld": 0.8653061224489796, + "success_rate.epoch.env.agentgym:sciworld": 0.972, + "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, + "success_rate.epoch.env.babyai": 0.9512195121951219, + "success_rate.epoch.env.ded": 0.952112676056338, + "success_rate.epoch.env.logic": 0.902946273830156, + "success_rate.epoch.env.math": 0.9715575620767495, + "success_rate.epoch.env.sat": 0.1702127659574468, + "success_rate.epoch.env.science": 0.8393151135989463, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.8698699132842964, + "success_rate.epoch.global": 0.9014120667522465, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env_macro_mean": 0.9444444444444445, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9948795180722891, + "tokens_p.mean_in_band": 0.6761067708333334, + "tokens_rate.above_band": 0.9718969555035128, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02810304449648712 + }, + { + "epoch": 1.832126118449084, + "grad_norm": 56.83198867234907, + "learning_rate": 3.666415677753735e-07, + "loss": 0.2797, + "step": 8600, + "success_rate.epoch.env.abd": 0.9856733524355301, + "success_rate.epoch.env.agentgym:alfworld": 0.8653061224489796, + "success_rate.epoch.env.agentgym:sciworld": 0.972, + "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, + "success_rate.epoch.env.babyai": 0.9512195121951219, + "success_rate.epoch.env.ded": 0.952112676056338, + "success_rate.epoch.env.logic": 0.9031979256698358, + "success_rate.epoch.env.math": 0.9715960324616771, + "success_rate.epoch.env.sat": 0.1702127659574468, + "success_rate.epoch.env.science": 0.8388687931601447, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.8698557134466424, + "success_rate.epoch.global": 0.9012820512820513, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.8333333333333334, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9940476190476191, + "tokens_p.mean_in_band": 0.6139914772727273, + "tokens_rate.above_band": 0.9051724137931034, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.09482758620689655 + }, + { + "epoch": 1.8331913080528333, + "grad_norm": 0.0, + "learning_rate": 3.666078916126716e-07, + "loss": 0.1571, + "step": 8605, + "success_rate.epoch.env.abd": 0.9856733524355301, + "success_rate.epoch.env.agentgym:alfworld": 0.8653061224489796, + "success_rate.epoch.env.agentgym:sciworld": 0.972, + "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, + "success_rate.epoch.env.babyai": 0.9512195121951219, + "success_rate.epoch.env.ded": 0.952247191011236, + "success_rate.epoch.env.logic": 0.9032815198618307, + "success_rate.epoch.env.math": 0.9712100764732343, + "success_rate.epoch.env.sat": 0.1702127659574468, + "success_rate.epoch.env.science": 0.8390275952693824, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.8698548911982503, + "success_rate.epoch.global": 0.901280409731114, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.8, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9996424485125858, + "tokens_p.mean_below_band": 2.2118911147117615e-08, + "tokens_p.mean_in_band": 0.84765625, + "tokens_rate.above_band": 0.997716894977169, + "tokens_rate.below_band": 0.001141552511415525, + "tokens_rate.in_band": 0.001141552511415525 + }, + { + "epoch": 1.834256497656583, + "grad_norm": 109.81075931162576, + "learning_rate": 3.665742106341857e-07, + "loss": 0.3689, + "step": 8610, + "success_rate.epoch.env.abd": 0.9856733524355301, + "success_rate.epoch.env.agentgym:alfworld": 0.8653061224489796, + "success_rate.epoch.env.agentgym:sciworld": 0.972, + "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, + "success_rate.epoch.env.babyai": 0.9512195121951219, + "success_rate.epoch.env.ded": 0.952247191011236, + "success_rate.epoch.env.logic": 0.9026701119724375, + "success_rate.epoch.env.math": 0.9708258527827648, + "success_rate.epoch.env.sat": 0.1702127659574468, + "success_rate.epoch.env.science": 0.8388049901510177, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.8697441424075024, + "success_rate.epoch.global": 0.9010230179028133, + "success_rate.window.env.logic": 0.6666666666666666, + "success_rate.window.env.math": 0.8, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.6555555555555556, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9969389619883041, + "tokens_p.mean_in_band": 0.602796052631579, + "tokens_rate.above_band": 0.9473684210526315, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.05263157894736842 + }, + { + "epoch": 1.8353216872603322, + "grad_norm": 48.26808723590609, + "learning_rate": 3.665405248644624e-07, + "loss": 0.1927, + "step": 8615, + "success_rate.epoch.env.abd": 0.9857142857142858, + "success_rate.epoch.env.agentgym:alfworld": 0.8653061224489796, + "success_rate.epoch.env.agentgym:sciworld": 0.972, + "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, + "success_rate.epoch.env.babyai": 0.9512195121951219, + "success_rate.epoch.env.ded": 0.952247191011236, + "success_rate.epoch.env.logic": 0.9020618556701031, + "success_rate.epoch.env.math": 0.9708650829224563, + "success_rate.epoch.env.sat": 0.1702127659574468, + "success_rate.epoch.env.science": 0.8389635946211873, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.8697105525517099, + "success_rate.epoch.global": 0.901021711366539, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 0.6666666666666666, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9971875, + "tokens_p.mean_in_band": 0.646484375, + "tokens_rate.above_band": 0.9615384615384616, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.038461538461538464 + }, + { + "epoch": 1.8363868768640819, + "grad_norm": 91.16632230601893, + "learning_rate": 3.665068343280516e-07, + "loss": 0.2845, + "step": 8620, + "success_rate.epoch.env.abd": 0.9857142857142858, + "success_rate.epoch.env.agentgym:alfworld": 0.8653061224489796, + "success_rate.epoch.env.agentgym:sciworld": 0.9721115537848606, + "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, + "success_rate.epoch.env.babyai": 0.9512195121951219, + "success_rate.epoch.env.ded": 0.952513966480447, + "success_rate.epoch.env.logic": 0.9021459227467811, + "success_rate.epoch.env.math": 0.9708781362007168, + "success_rate.epoch.env.sat": 0.1702127659574468, + "success_rate.epoch.env.science": 0.8392272429600524, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.8697777432742441, + "success_rate.epoch.global": 0.9011479591836735, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 1.0005634014423077, + "tokens_p.mean_in_band": 0.8486328125, + "tokens_rate.above_band": 0.9952153110047847, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.004784688995215311 + }, + { + "epoch": 1.8374520664678313, + "grad_norm": 70.45980748244546, + "learning_rate": 3.6647313904950667e-07, + "loss": 0.289, + "step": 8625, + "success_rate.epoch.env.abd": 0.9857549857549858, + "success_rate.epoch.env.agentgym:alfworld": 0.8663967611336032, + "success_rate.epoch.env.agentgym:sciworld": 0.9721115537848606, + "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, + "success_rate.epoch.env.babyai": 0.9523809523809523, + "success_rate.epoch.env.ded": 0.952513966480447, + "success_rate.epoch.env.logic": 0.9021459227467811, + "success_rate.epoch.env.math": 0.970917225950783, + "success_rate.epoch.env.sat": 0.1702127659574468, + "success_rate.epoch.env.science": 0.8393850179914949, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.8700040745190315, + "success_rate.epoch.global": 0.9012738853503185, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9972222222222222, + "tokens_p.mean_in_band": 0.8765625, + "tokens_rate.above_band": 0.9915254237288136, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.00847457627118644 + }, + { + "epoch": 1.8385172560715808, + "grad_norm": 90.94651430453813, + "learning_rate": 3.6643943905338454e-07, + "loss": 0.2248, + "step": 8630, + "success_rate.epoch.env.abd": 0.9857549857549858, + "success_rate.epoch.env.agentgym:alfworld": 0.8663967611336032, + "success_rate.epoch.env.agentgym:sciworld": 0.9723320158102767, + "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, + "success_rate.epoch.env.babyai": 0.9523809523809523, + "success_rate.epoch.env.ded": 0.952513966480447, + "success_rate.epoch.env.logic": 0.902229845626072, + "success_rate.epoch.env.math": 0.9709562109025917, + "success_rate.epoch.env.sat": 0.1702127659574468, + "success_rate.epoch.env.science": 0.8395949036262659, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.8700543704727849, + "success_rate.epoch.global": 0.9013994910941476, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9987098623853211, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 1.8395824456753302, + "grad_norm": 63.054858904476646, + "learning_rate": 3.664057343642455e-07, + "loss": 0.1912, + "step": 8635, + "success_rate.epoch.env.abd": 0.9857549857549858, + "success_rate.epoch.env.agentgym:alfworld": 0.8629032258064516, + "success_rate.epoch.env.agentgym:sciworld": 0.9723320158102767, + "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, + "success_rate.epoch.env.babyai": 0.9523809523809523, + "success_rate.epoch.env.ded": 0.9526462395543176, + "success_rate.epoch.env.logic": 0.9023136246786633, + "success_rate.epoch.env.math": 0.9709691826708352, + "success_rate.epoch.env.sat": 0.1702127659574468, + "success_rate.epoch.env.science": 0.8395826540593414, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.8697564831092058, + "success_rate.epoch.global": 0.9012706480304955, + "success_rate.window.env.agentgym:alfworld": 0.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env_macro_mean": 0.7666666666666667, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.998046875, + "tokens_p.mean_in_band": 0.6958705357142857, + "tokens_rate.above_band": 0.9846153846153847, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.015384615384615385 + }, + { + "epoch": 1.8406476352790797, + "grad_norm": 18.50344239368859, + "learning_rate": 3.663720250066533e-07, + "loss": 0.1891, + "step": 8640, + "success_rate.epoch.env.abd": 0.9857549857549858, + "success_rate.epoch.env.agentgym:alfworld": 0.8629032258064516, + "success_rate.epoch.env.agentgym:sciworld": 0.9723320158102767, + "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, + "success_rate.epoch.env.babyai": 0.9523809523809523, + "success_rate.epoch.env.ded": 0.9527777777777777, + "success_rate.epoch.env.logic": 0.9016253207869974, + "success_rate.epoch.env.math": 0.9710080285459411, + "success_rate.epoch.env.sat": 0.1702127659574468, + "success_rate.epoch.env.science": 0.8397915988277435, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.8697283944705969, + "success_rate.epoch.global": 0.901269035532995, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 1.000170068027211, + "tokens_p.mean_in_band": 0.5756578947368421, + "tokens_rate.above_band": 0.9748010610079576, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.025198938992042442 + }, + { + "epoch": 1.8417128248828292, + "grad_norm": 144.5788589554126, + "learning_rate": 3.6633831100517505e-07, + "loss": 0.4581, + "step": 8645, + "success_rate.epoch.env.abd": 0.9857549857549858, + "success_rate.epoch.env.agentgym:alfworld": 0.8634538152610441, + "success_rate.epoch.env.agentgym:sciworld": 0.9723320158102767, + "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, + "success_rate.epoch.env.babyai": 0.9523809523809523, + "success_rate.epoch.env.ded": 0.9527777777777777, + "success_rate.epoch.env.logic": 0.9017933390264731, + "success_rate.epoch.env.math": 0.9710338680926917, + "success_rate.epoch.env.sat": 0.1702127659574468, + "success_rate.epoch.env.science": 0.8394018205461639, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.869760637103255, + "success_rate.epoch.global": 0.9011406844106464, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6, + "success_rate.window.env_macro_mean": 0.9, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9953703703703703, + "tokens_p.mean_in_band": 0.5301846590909091, + "tokens_rate.above_band": 0.9310344827586207, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.06896551724137931 + }, + { + "epoch": 1.8427780144865786, + "grad_norm": 68.72584600900457, + "learning_rate": 3.6630459238438125e-07, + "loss": 0.2544, + "step": 8650, + "success_rate.epoch.env.abd": 0.9857549857549858, + "success_rate.epoch.env.agentgym:alfworld": 0.8650793650793651, + "success_rate.epoch.env.agentgym:sciworld": 0.9723320158102767, + "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, + "success_rate.epoch.env.babyai": 0.9523809523809523, + "success_rate.epoch.env.ded": 0.9527777777777777, + "success_rate.epoch.env.logic": 0.9010238907849829, + "success_rate.epoch.env.math": 0.9710596616206589, + "success_rate.epoch.env.sat": 0.1702127659574468, + "success_rate.epoch.env.science": 0.8392857142857143, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.869830254270923, + "success_rate.epoch.global": 0.9010126582278482, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.6875, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 1.0003861202635915, + "tokens_p.mean_in_band": 0.5553977272727273, + "tokens_rate.above_band": 0.982200647249191, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.01779935275080906 + }, + { + "epoch": 1.843843204090328, + "grad_norm": 151.59932566960663, + "learning_rate": 3.6627086916884584e-07, + "loss": 0.362, + "step": 8655, + "success_rate.epoch.env.abd": 0.9857549857549858, + "success_rate.epoch.env.agentgym:alfworld": 0.8616600790513834, + "success_rate.epoch.env.agentgym:sciworld": 0.9723320158102767, + "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, + "success_rate.epoch.env.babyai": 0.9523809523809523, + "success_rate.epoch.env.ded": 0.9529085872576177, + "success_rate.epoch.env.logic": 0.9011082693947144, + "success_rate.epoch.env.math": 0.9710854092526691, + "success_rate.epoch.env.sat": 0.1702127659574468, + "success_rate.epoch.env.science": 0.8392220421393841, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.8695355249570386, + "success_rate.epoch.global": 0.9008849557522124, + "success_rate.window.env.agentgym:alfworld": 0.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.76, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9983766233766234, + "tokens_p.mean_below_band": 5.3085386753082275e-08, + "tokens_p.mean_in_band": 0.3076171875, + "tokens_rate.above_band": 0.9903536977491961, + "tokens_rate.below_band": 0.003215434083601286, + "tokens_rate.in_band": 0.006430868167202572 + }, + { + "epoch": 1.8449083936940776, + "grad_norm": 90.18613340694525, + "learning_rate": 3.6623714138314607e-07, + "loss": 0.1854, + "step": 8660, + "success_rate.epoch.env.abd": 0.9857954545454546, + "success_rate.epoch.env.agentgym:alfworld": 0.8616600790513834, + "success_rate.epoch.env.agentgym:sciworld": 0.9723320158102767, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9523809523809523, + "success_rate.epoch.env.ded": 0.9529085872576177, + "success_rate.epoch.env.logic": 0.9011082693947144, + "success_rate.epoch.env.math": 0.9711239449133718, + "success_rate.epoch.env.sat": 0.1702127659574468, + "success_rate.epoch.env.science": 0.8394822006472492, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.8695969670748326, + "success_rate.epoch.global": 0.901010101010101, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9950657894736842, + "tokens_p.mean_in_band": 0.8, + "tokens_rate.above_band": 0.9715909090909091, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.028409090909090908 + }, + { + "epoch": 1.845973583297827, + "grad_norm": 82.94777241142698, + "learning_rate": 3.6620340905186247e-07, + "loss": 0.3565, + "step": 8665, + "success_rate.epoch.env.abd": 0.9857954545454546, + "success_rate.epoch.env.agentgym:alfworld": 0.8616600790513834, + "success_rate.epoch.env.agentgym:sciworld": 0.9723320158102767, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9534883720930233, + "success_rate.epoch.env.ded": 0.9529085872576177, + "success_rate.epoch.env.logic": 0.9012765957446809, + "success_rate.epoch.env.math": 0.9711623779946761, + "success_rate.epoch.env.sat": 0.1702127659574468, + "success_rate.epoch.env.science": 0.8393665158371041, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.8697059211051233, + "success_rate.epoch.global": 0.9010088272383354, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9375, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9972516286644951, + "tokens_p.mean_in_band": 0.7299107142857143, + "tokens_rate.above_band": 0.9887278582930756, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.011272141706924315 + }, + { + "epoch": 1.8470387729015765, + "grad_norm": 126.21880070728955, + "learning_rate": 3.6616967219957894e-07, + "loss": 0.3996, + "step": 8670, + "success_rate.epoch.env.abd": 0.9857954545454546, + "success_rate.epoch.env.agentgym:alfworld": 0.8616600790513834, + "success_rate.epoch.env.agentgym:sciworld": 0.9723320158102767, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9534883720930233, + "success_rate.epoch.env.ded": 0.9530386740331491, + "success_rate.epoch.env.logic": 0.9013605442176871, + "success_rate.epoch.env.math": 0.9707835325365206, + "success_rate.epoch.env.sat": 0.1702127659574468, + "success_rate.epoch.env.science": 0.8395221181788828, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.8697050840262288, + "success_rate.epoch.global": 0.9010075566750629, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.8, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9964978448275862, + "tokens_p.mean_below_band": 2.7830537874251604e-10, + "tokens_p.mean_in_band": 0.703125, + "tokens_rate.above_band": 0.9872340425531915, + "tokens_rate.below_band": 0.00425531914893617, + "tokens_rate.in_band": 0.00851063829787234 + }, + { + "epoch": 1.848103962505326, + "grad_norm": 124.1414342864317, + "learning_rate": 3.6613593085088263e-07, + "loss": 0.3269, + "step": 8675, + "success_rate.epoch.env.abd": 0.9858757062146892, + "success_rate.epoch.env.agentgym:alfworld": 0.8622047244094488, + "success_rate.epoch.env.agentgym:sciworld": 0.9723320158102767, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9534883720930233, + "success_rate.epoch.env.ded": 0.953168044077135, + "success_rate.epoch.env.logic": 0.9015280135823429, + "success_rate.epoch.env.math": 0.9707964601769912, + "success_rate.epoch.env.sat": 0.1702127659574468, + "success_rate.epoch.env.science": 0.8393548387096774, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.8697748462632476, + "success_rate.epoch.global": 0.9010062893081761, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9444444444444443, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9996357202331391, + "tokens_p.mean_in_band": 0.5859375, + "tokens_rate.above_band": 0.9950289975144988, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.004971002485501243 + }, + { + "epoch": 1.8491691521090754, + "grad_norm": 27.568502548372212, + "learning_rate": 3.6610218503036403e-07, + "loss": 0.3363, + "step": 8680, + "success_rate.epoch.env.abd": 0.9858757062146892, + "success_rate.epoch.env.agentgym:alfworld": 0.8622047244094488, + "success_rate.epoch.env.agentgym:sciworld": 0.9723320158102767, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9534883720930233, + "success_rate.epoch.env.ded": 0.9505494505494505, + "success_rate.epoch.env.logic": 0.9008474576271186, + "success_rate.epoch.env.math": 0.9708351745470615, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.8391878826941669, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.8691408925887815, + "success_rate.epoch.global": 0.9006281407035176, + "success_rate.window.env.ded": 0.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.4333333333333333, + "success_rate.window.global": 0.6, + "tokens_p.mean_above_band": 0.9953870387038704, + "tokens_p.mean_in_band": 0.7041193181818182, + "tokens_rate.above_band": 0.9099099099099099, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.09009009009009009 + }, + { + "epoch": 1.8502343417128249, + "grad_norm": 231.50158626001303, + "learning_rate": 3.6606843476261683e-07, + "loss": 0.3401, + "step": 8685, + "success_rate.epoch.env.abd": 0.9859550561797753, + "success_rate.epoch.env.agentgym:alfworld": 0.8622047244094488, + "success_rate.epoch.env.agentgym:sciworld": 0.9724409448818898, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9534883720930233, + "success_rate.epoch.env.ded": 0.9505494505494505, + "success_rate.epoch.env.logic": 0.9009314140558848, + "success_rate.epoch.env.math": 0.9708609271523179, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.839073060830383, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.8691575440621393, + "success_rate.epoch.global": 0.9006273525721455, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9987824675324676, + "tokens_p.mean_in_band": 0.1513671875, + "tokens_rate.above_band": 0.9935483870967742, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0064516129032258064 + }, + { + "epoch": 1.8512995313165743, + "grad_norm": 33.90291242663054, + "learning_rate": 3.6603468007223797e-07, + "loss": 0.2315, + "step": 8690, + "success_rate.epoch.env.abd": 0.9859550561797753, + "success_rate.epoch.env.agentgym:alfworld": 0.8622047244094488, + "success_rate.epoch.env.agentgym:sciworld": 0.9724409448818898, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9534883720930233, + "success_rate.epoch.env.ded": 0.9505494505494505, + "success_rate.epoch.env.logic": 0.9009314140558848, + "success_rate.epoch.env.math": 0.9708994708994709, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.8391136801541426, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.8691647407049495, + "success_rate.epoch.global": 0.9006265664160401, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8571428571428571, + "success_rate.window.env_macro_mean": 0.9285714285714286, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9890510948905109, + "tokens_p.mean_in_band": 0.78173828125, + "tokens_rate.above_band": 0.9448275862068966, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.05517241379310345 + }, + { + "epoch": 1.8523647209203238, + "grad_norm": 167.57781940090902, + "learning_rate": 3.6600092098382763e-07, + "loss": 0.266, + "step": 8695, + "success_rate.epoch.env.abd": 0.9859550561797753, + "success_rate.epoch.env.agentgym:alfworld": 0.86328125, + "success_rate.epoch.env.agentgym:sciworld": 0.9725490196078431, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9534883720930233, + "success_rate.epoch.env.ded": 0.9480874316939891, + "success_rate.epoch.env.logic": 0.9010152284263959, + "success_rate.epoch.env.math": 0.9708994708994709, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.8393200769724182, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.8690749945822067, + "success_rate.epoch.global": 0.9006257822277848, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 0.5, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9972540824261276, + "tokens_p.mean_in_band": 0.7094029017857143, + "tokens_rate.above_band": 0.9945862335653519, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.005413766434648105 + }, + { + "epoch": 1.8534299105240732, + "grad_norm": 163.77829352756117, + "learning_rate": 3.6596715752198924e-07, + "loss": 0.3513, + "step": 8700, + "success_rate.epoch.env.abd": 0.9859550561797753, + "success_rate.epoch.env.agentgym:alfworld": 0.8638132295719845, + "success_rate.epoch.env.agentgym:sciworld": 0.9725490196078431, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9534883720930233, + "success_rate.epoch.env.ded": 0.9480874316939891, + "success_rate.epoch.env.logic": 0.8995780590717299, + "success_rate.epoch.env.math": 0.9709251101321585, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.8395259449071109, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.8690137507080885, + "success_rate.epoch.global": 0.9005, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 0.3333333333333333, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8333333333333333, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9978703703703704, + "tokens_p.mean_in_band": 0.5137746710526315, + "tokens_rate.above_band": 0.9594882729211087, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.04051172707889126 + }, + { + "epoch": 1.8544951001278227, + "grad_norm": 36.36148672150636, + "learning_rate": 3.659333897113293e-07, + "loss": 0.2406, + "step": 8705, + "success_rate.epoch.env.abd": 0.9859550561797753, + "success_rate.epoch.env.agentgym:alfworld": 0.8638132295719845, + "success_rate.epoch.env.agentgym:sciworld": 0.97265625, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9534883720930233, + "success_rate.epoch.env.ded": 0.9483695652173914, + "success_rate.epoch.env.logic": 0.8995780590717299, + "success_rate.epoch.env.math": 0.9709634843818742, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.8397312859884837, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.8690713033668747, + "success_rate.epoch.global": 0.9006242197253433, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9961630286493861, + "tokens_p.mean_in_band": 0.6171875, + "tokens_rate.above_band": 0.9986376021798365, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0013623978201634877 + }, + { + "epoch": 1.8555602897315722, + "grad_norm": 53.63600077529118, + "learning_rate": 3.658996175764576e-07, + "loss": 0.1747, + "step": 8710, + "success_rate.epoch.env.abd": 0.9859943977591037, + "success_rate.epoch.env.agentgym:alfworld": 0.8638132295719845, + "success_rate.epoch.env.agentgym:sciworld": 0.97265625, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9534883720930233, + "success_rate.epoch.env.ded": 0.9483695652173914, + "success_rate.epoch.env.logic": 0.8996627318718381, + "success_rate.epoch.env.math": 0.9710017574692443, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.8399872245289045, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.8691093239129862, + "success_rate.epoch.global": 0.9007481296758105, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9880952380952381, + "tokens_p.mean_in_band": 0.84765625, + "tokens_rate.above_band": 0.9921259842519685, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.007874015748031496 + }, + { + "epoch": 1.8566254793353218, + "grad_norm": 175.93561049497623, + "learning_rate": 3.65865841141987e-07, + "loss": 0.3368, + "step": 8715, + "success_rate.epoch.env.abd": 0.9859943977591037, + "success_rate.epoch.env.agentgym:alfworld": 0.8638132295719845, + "success_rate.epoch.env.agentgym:sciworld": 0.97265625, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9534883720930233, + "success_rate.epoch.env.ded": 0.9483695652173914, + "success_rate.epoch.env.logic": 0.8998316498316499, + "success_rate.epoch.env.math": 0.9710399297937692, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.8401913875598086, + "success_rate.epoch.env.webshop": 0.9772727272727273, + "success_rate.epoch.env_macro_mean": 0.8691947597803913, + "success_rate.epoch.global": 0.9008717310087173, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9957482993197279, + "tokens_p.mean_in_band": 0.7845982142857143, + "tokens_rate.above_band": 0.9545454545454546, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.045454545454545456 + }, + { + "epoch": 1.857690668939071, + "grad_norm": 71.15789116107545, + "learning_rate": 3.658320604325335e-07, + "loss": 0.2932, + "step": 8720, + "success_rate.epoch.env.abd": 0.9860335195530726, + "success_rate.epoch.env.agentgym:alfworld": 0.8638132295719845, + "success_rate.epoch.env.agentgym:sciworld": 0.97265625, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9534883720930233, + "success_rate.epoch.env.ded": 0.9483695652173914, + "success_rate.epoch.env.logic": 0.8998316498316499, + "success_rate.epoch.env.math": 0.9711033274956217, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.8403950302644154, + "success_rate.epoch.env.webshop": 0.9772727272727273, + "success_rate.epoch.env_macro_mean": 0.869222592707703, + "success_rate.epoch.global": 0.9009950248756219, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.995, + "tokens_p.mean_in_band": 0.875, + "tokens_rate.above_band": 0.9803921568627451, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0196078431372549 + }, + { + "epoch": 1.8587558585428208, + "grad_norm": 0.0, + "learning_rate": 3.6579827547271627e-07, + "loss": 0.2052, + "step": 8725, + "success_rate.epoch.env.abd": 0.9860335195530726, + "success_rate.epoch.env.agentgym:alfworld": 0.8643410852713178, + "success_rate.epoch.env.agentgym:sciworld": 0.97265625, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9534883720930233, + "success_rate.epoch.env.ded": 0.9486486486486486, + "success_rate.epoch.env.logic": 0.8999158957106812, + "success_rate.epoch.env.math": 0.9706911636045494, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.8405981546293351, + "success_rate.epoch.env.webshop": 0.9772727272727273, + "success_rate.epoch.env_macro_mean": 0.869284605933473, + "success_rate.epoch.global": 0.9009937888198758, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.5, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9976697736351531, + "tokens_p.mean_in_band": 0.3794642857142857, + "tokens_rate.above_band": 0.9907651715039578, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.009234828496042216 + }, + { + "epoch": 1.85982104814657, + "grad_norm": 172.7191476254395, + "learning_rate": 3.6576448628715754e-07, + "loss": 0.4475, + "step": 8730, + "success_rate.epoch.env.abd": 0.9860335195530726, + "success_rate.epoch.env.agentgym:alfworld": 0.8643410852713178, + "success_rate.epoch.env.agentgym:sciworld": 0.97265625, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9534883720930233, + "success_rate.epoch.env.ded": 0.9486486486486486, + "success_rate.epoch.env.logic": 0.8999158957106812, + "success_rate.epoch.env.math": 0.9707423580786027, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.8402667513496348, + "success_rate.epoch.env.webshop": 0.9772727272727273, + "success_rate.epoch.env_macro_mean": 0.8692591324056868, + "success_rate.epoch.global": 0.9008684863523573, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.8333333333333333, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9922520661157025, + "tokens_p.mean_in_band": 0.4060329861111111, + "tokens_rate.above_band": 0.9307692307692308, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.06923076923076923 + }, + { + "epoch": 1.8608862377503197, + "grad_norm": 76.55022844979887, + "learning_rate": 3.657306929004827e-07, + "loss": 0.2583, + "step": 8735, + "success_rate.epoch.env.abd": 0.9860335195530726, + "success_rate.epoch.env.agentgym:alfworld": 0.8648648648648649, + "success_rate.epoch.env.agentgym:sciworld": 0.9727626459143969, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9534883720930233, + "success_rate.epoch.env.ded": 0.9486486486486486, + "success_rate.epoch.env.logic": 0.9, + "success_rate.epoch.env.math": 0.9707933740191804, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.8401015228426396, + "success_rate.epoch.env.webshop": 0.9772727272727273, + "success_rate.epoch.env_macro_mean": 0.8693136839721273, + "success_rate.epoch.global": 0.9008674101610905, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9972646882494005, + "tokens_p.mean_in_band": 0.7354166666666667, + "tokens_rate.above_band": 0.9823321554770318, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0176678445229682 + }, + { + "epoch": 1.861951427354069, + "grad_norm": 26.311656830578926, + "learning_rate": 3.6569689533732e-07, + "loss": 0.1422, + "step": 8740, + "success_rate.epoch.env.abd": 0.9860335195530726, + "success_rate.epoch.env.agentgym:alfworld": 0.8653846153846154, + "success_rate.epoch.env.agentgym:sciworld": 0.9728682170542635, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9534883720930233, + "success_rate.epoch.env.ded": 0.9486486486486486, + "success_rate.epoch.env.logic": 0.8992443324937027, + "success_rate.epoch.env.math": 0.9708188153310104, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.8403547671840355, + "success_rate.epoch.env.webshop": 0.9772727272727273, + "success_rate.epoch.env_macro_mean": 0.869327169409086, + "success_rate.epoch.global": 0.9008663366336633, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9967693836978131, + "tokens_p.mean_in_band": 0.4947916666666667, + "tokens_rate.above_band": 0.9940711462450593, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.005928853754940711 + }, + { + "epoch": 1.8630166169578186, + "grad_norm": 49.168202966392805, + "learning_rate": 3.656630936223009e-07, + "loss": 0.233, + "step": 8745, + "success_rate.epoch.env.abd": 0.9860724233983287, + "success_rate.epoch.env.agentgym:alfworld": 0.8653846153846154, + "success_rate.epoch.env.agentgym:sciworld": 0.9728682170542635, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9534883720930233, + "success_rate.epoch.env.ded": 0.9486486486486486, + "success_rate.epoch.env.logic": 0.8993288590604027, + "success_rate.epoch.env.math": 0.9709075119409466, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.8404053198226725, + "success_rate.epoch.env.webshop": 0.9772727272727273, + "success_rate.epoch.env_macro_mean": 0.8693510493782249, + "success_rate.epoch.global": 0.9009888751545118, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9974609375, + "tokens_p.mean_in_band": 0.69453125, + "tokens_rate.above_band": 0.9696969696969697, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.030303030303030304 + }, + { + "epoch": 1.8640818065615679, + "grad_norm": 128.56927975701845, + "learning_rate": 3.656292877800599e-07, + "loss": 0.3961, + "step": 8750, + "success_rate.epoch.env.abd": 0.9860724233983287, + "success_rate.epoch.env.agentgym:alfworld": 0.8653846153846154, + "success_rate.epoch.env.agentgym:sciworld": 0.9692307692307692, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9534883720930233, + "success_rate.epoch.env.ded": 0.9487870619946092, + "success_rate.epoch.env.logic": 0.8994132439228835, + "success_rate.epoch.env.math": 0.9709453599306158, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.8405567858272699, + "success_rate.epoch.env.webshop": 0.9772727272727273, + "success_rate.epoch.env_macro_mean": 0.8690578370490627, + "success_rate.epoch.global": 0.9009876543209877, + "success_rate.window.env.agentgym:sciworld": 0.5, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9967548076923077, + "tokens_p.mean_in_band": 0.7314453125, + "tokens_rate.above_band": 0.9923664122137404, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.007633587786259542 + }, + { + "epoch": 1.8651469961653175, + "grad_norm": 78.1317822767448, + "learning_rate": 3.655954778352344e-07, + "loss": 0.2493, + "step": 8755, + "success_rate.epoch.env.abd": 0.9860724233983287, + "success_rate.epoch.env.agentgym:alfworld": 0.8625954198473282, + "success_rate.epoch.env.agentgym:sciworld": 0.9693486590038314, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9534883720930233, + "success_rate.epoch.env.ded": 0.9487870619946092, + "success_rate.epoch.env.logic": 0.8996655518394648, + "success_rate.epoch.env.math": 0.9709579540528825, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.8400758533501896, + "success_rate.epoch.env.webshop": 0.9772727272727273, + "success_rate.epoch.env_macro_mean": 0.8687953519397484, + "success_rate.epoch.global": 0.9007398273736128, + "success_rate.window.env.agentgym:alfworld": 0.5, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.3333333333333333, + "success_rate.window.env_macro_mean": 0.7666666666666666, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9982578397212544, + "tokens_p.mean_in_band": 0.5829326923076923, + "tokens_rate.above_band": 0.9851258581235698, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.014874141876430207 + }, + { + "epoch": 1.8662121857690668, + "grad_norm": 25.245321186340654, + "learning_rate": 3.655616638124649e-07, + "loss": 0.1284, + "step": 8760, + "success_rate.epoch.env.abd": 0.9861495844875346, + "success_rate.epoch.env.agentgym:alfworld": 0.8625954198473282, + "success_rate.epoch.env.agentgym:sciworld": 0.9693486590038314, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9534883720930233, + "success_rate.epoch.env.ded": 0.9487870619946092, + "success_rate.epoch.env.logic": 0.899749373433584, + "success_rate.epoch.env.math": 0.9709579540528825, + "success_rate.epoch.env.sat": 0.16326530612244897, + "success_rate.epoch.env.science": 0.8403281792363522, + "success_rate.epoch.env.webshop": 0.9777777777777777, + "success_rate.epoch.env_macro_mean": 0.8685696245334139, + "success_rate.epoch.global": 0.9007389162561577, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 0.8, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9967271959459459, + "tokens_p.mean_in_band": 0.7163461538461539, + "tokens_rate.above_band": 0.9579288025889967, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.042071197411003236 + }, + { + "epoch": 1.8672773753728165, + "grad_norm": 62.88486349099041, + "learning_rate": 3.655278457363947e-07, + "loss": 0.3126, + "step": 8765, + "success_rate.epoch.env.abd": 0.9861495844875346, + "success_rate.epoch.env.agentgym:alfworld": 0.8625954198473282, + "success_rate.epoch.env.agentgym:sciworld": 0.9693486590038314, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9534883720930233, + "success_rate.epoch.env.ded": 0.9487870619946092, + "success_rate.epoch.env.logic": 0.8991666666666667, + "success_rate.epoch.env.math": 0.970983109571243, + "success_rate.epoch.env.sat": 0.16326530612244897, + "success_rate.epoch.env.science": 0.8402646502835539, + "success_rate.epoch.env.webshop": 0.9777777777777777, + "success_rate.epoch.env_macro_mean": 0.8685131626969272, + "success_rate.epoch.global": 0.9006150061500615, + "success_rate.window.env.logic": 0.6666666666666666, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.8222222222222223, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.99, + "tokens_p.mean_in_band": 0.6234019886363636, + "tokens_rate.above_band": 0.872093023255814, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.12790697674418605 + }, + { + "epoch": 1.8683425649765657, + "grad_norm": 22.074902832687243, + "learning_rate": 3.6549402363167033e-07, + "loss": 0.1873, + "step": 8770, + "success_rate.epoch.env.abd": 0.9861495844875346, + "success_rate.epoch.env.agentgym:alfworld": 0.8625954198473282, + "success_rate.epoch.env.agentgym:sciworld": 0.9693486590038314, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9534883720930233, + "success_rate.epoch.env.ded": 0.9487870619946092, + "success_rate.epoch.env.logic": 0.8991666666666667, + "success_rate.epoch.env.math": 0.9710583153347733, + "success_rate.epoch.env.sat": 0.16326530612244897, + "success_rate.epoch.env.science": 0.8404657016991819, + "success_rate.epoch.env.webshop": 0.9777777777777777, + "success_rate.epoch.env_macro_mean": 0.8685382769859415, + "success_rate.epoch.global": 0.9007371007371008, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9938668224299065, + "tokens_p.mean_in_band": 0.6162109375, + "tokens_rate.above_band": 0.963963963963964, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.036036036036036036 + }, + { + "epoch": 1.8694077545803154, + "grad_norm": 396.0226969200282, + "learning_rate": 3.65460197522941e-07, + "loss": 0.3681, + "step": 8775, + "success_rate.epoch.env.abd": 0.9861878453038674, + "success_rate.epoch.env.agentgym:alfworld": 0.8631178707224335, + "success_rate.epoch.env.agentgym:sciworld": 0.9693486590038314, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9534883720930233, + "success_rate.epoch.env.ded": 0.9487870619946092, + "success_rate.epoch.env.logic": 0.8985868661679135, + "success_rate.epoch.env.math": 0.9710708117443869, + "success_rate.epoch.env.sat": 0.16326530612244897, + "success_rate.epoch.env.science": 0.8403519798868636, + "success_rate.epoch.env.webshop": 0.9777777777777777, + "success_rate.epoch.env_macro_mean": 0.8685273393304851, + "success_rate.epoch.global": 0.9006134969325154, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 0.6666666666666666, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.8833333333333332, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 1.0009407722513088, + "tokens_p.mean_in_band": 0.45703125, + "tokens_rate.above_band": 0.9744897959183674, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.025510204081632654 + }, + { + "epoch": 1.8704729441840646, + "grad_norm": 51.67374573714006, + "learning_rate": 3.654263674348589e-07, + "loss": 0.4164, + "step": 8780, + "success_rate.epoch.env.abd": 0.9861878453038674, + "success_rate.epoch.env.agentgym:alfworld": 0.8636363636363636, + "success_rate.epoch.env.agentgym:sciworld": 0.9693486590038314, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9534883720930233, + "success_rate.epoch.env.ded": 0.9489247311827957, + "success_rate.epoch.env.logic": 0.8987551867219917, + "success_rate.epoch.env.math": 0.9711206896551724, + "success_rate.epoch.env.sat": 0.16326530612244897, + "success_rate.epoch.env.science": 0.8404522613065326, + "success_rate.epoch.env.webshop": 0.9777777777777777, + "success_rate.epoch.env_macro_mean": 0.8686159431474533, + "success_rate.epoch.global": 0.9007352941176471, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9969099813432836, + "tokens_p.mean_in_band": 0.7328125, + "tokens_rate.above_band": 0.9907578558225508, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.009242144177449169 + }, + { + "epoch": 1.8715381337878143, + "grad_norm": 42.24871642244632, + "learning_rate": 3.6539253339207926e-07, + "loss": 0.2346, + "step": 8785, + "success_rate.epoch.env.abd": 0.9862258953168044, + "success_rate.epoch.env.agentgym:alfworld": 0.8636363636363636, + "success_rate.epoch.env.agentgym:sciworld": 0.9693486590038314, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9534883720930233, + "success_rate.epoch.env.ded": 0.9490616621983914, + "success_rate.epoch.env.logic": 0.8987551867219917, + "success_rate.epoch.env.math": 0.9711579853637538, + "success_rate.epoch.env.sat": 0.16326530612244897, + "success_rate.epoch.env.science": 0.8406524466750314, + "success_rate.epoch.env.webshop": 0.9782608695652174, + "success_rate.epoch.env_macro_mean": 0.8686973571377308, + "success_rate.epoch.global": 0.9008567931456548, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9976765799256505, + "tokens_p.mean_in_band": 0.7890625, + "tokens_rate.above_band": 0.9950678175092479, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.004932182490752158 + }, + { + "epoch": 1.8726033233915635, + "grad_norm": 211.27373804971876, + "learning_rate": 3.6535869541926004e-07, + "loss": 0.1801, + "step": 8790, + "success_rate.epoch.env.abd": 0.9863013698630136, + "success_rate.epoch.env.agentgym:alfworld": 0.8636363636363636, + "success_rate.epoch.env.agentgym:sciworld": 0.9693486590038314, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9534883720930233, + "success_rate.epoch.env.ded": 0.9493333333333334, + "success_rate.epoch.env.logic": 0.8987551867219917, + "success_rate.epoch.env.math": 0.9711703958691911, + "success_rate.epoch.env.sat": 0.16326530612244897, + "success_rate.epoch.env.science": 0.8405388471177945, + "success_rate.epoch.env.webshop": 0.9787234042553191, + "success_rate.epoch.env_macro_mean": 0.8687617654394993, + "success_rate.epoch.global": 0.9008557457212714, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9996191926884996, + "tokens_p.mean_in_band": 0.67578125, + "tokens_rate.above_band": 0.996962794229309, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0030372057706909645 + }, + { + "epoch": 1.8736685129953132, + "grad_norm": 77.10857938654429, + "learning_rate": 3.653248535410621e-07, + "loss": 0.347, + "step": 8795, + "success_rate.epoch.env.abd": 0.9863013698630136, + "success_rate.epoch.env.agentgym:alfworld": 0.8641509433962264, + "success_rate.epoch.env.agentgym:sciworld": 0.9693486590038314, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9534883720930233, + "success_rate.epoch.env.ded": 0.949468085106383, + "success_rate.epoch.env.logic": 0.8980099502487562, + "success_rate.epoch.env.math": 0.9712199312714777, + "success_rate.epoch.env.sat": 0.16326530612244897, + "success_rate.epoch.env.science": 0.8400625978090767, + "success_rate.epoch.env.webshop": 0.9787234042553191, + "success_rate.epoch.env_macro_mean": 0.8687142546352488, + "success_rate.epoch.global": 0.9006105006105006, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.3333333333333333, + "success_rate.window.env_macro_mean": 0.6666666666666666, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9972933070866141, + "tokens_p.mean_below_band": 4.94765117764473e-09, + "tokens_p.mean_in_band": 0.5048828125, + "tokens_rate.above_band": 0.9824191279887482, + "tokens_rate.below_band": 0.0007032348804500703, + "tokens_rate.in_band": 0.016877637130801686 + }, + { + "epoch": 1.8747337025990625, + "grad_norm": 442.0879752917922, + "learning_rate": 3.652910077821492e-07, + "loss": 0.3164, + "step": 8800, + "success_rate.epoch.env.abd": 0.9863387978142076, + "success_rate.epoch.env.agentgym:alfworld": 0.8641509433962264, + "success_rate.epoch.env.agentgym:sciworld": 0.9693486590038314, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9534883720930233, + "success_rate.epoch.env.ded": 0.949468085106383, + "success_rate.epoch.env.logic": 0.8974358974358975, + "success_rate.epoch.env.math": 0.9708529789969995, + "success_rate.epoch.env.sat": 0.16326530612244897, + "success_rate.epoch.env.science": 0.8401126408010012, + "success_rate.epoch.env.webshop": 0.9787234042553191, + "success_rate.epoch.env_macro_mean": 0.8686366606221381, + "success_rate.epoch.global": 0.9004878048780488, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 0.6666666666666666, + "success_rate.window.env.math": 0.8, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8666666666666667, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9992988782051282, + "tokens_p.mean_in_band": 0.5529296875, + "tokens_rate.above_band": 0.968944099378882, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.031055900621118012 + }, + { + "epoch": 1.8757988922028122, + "grad_norm": 81.08565346136402, + "learning_rate": 3.652571581671878e-07, + "loss": 0.3476, + "step": 8805, + "success_rate.epoch.env.abd": 0.9863387978142076, + "success_rate.epoch.env.agentgym:alfworld": 0.8641509433962264, + "success_rate.epoch.env.agentgym:sciworld": 0.9693486590038314, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9534883720930233, + "success_rate.epoch.env.ded": 0.949468085106383, + "success_rate.epoch.env.logic": 0.8968646864686468, + "success_rate.epoch.env.math": 0.9708904109589042, + "success_rate.epoch.env.sat": 0.16326530612244897, + "success_rate.epoch.env.science": 0.8396875, + "success_rate.epoch.env.webshop": 0.9787234042553191, + "success_rate.epoch.env_macro_mean": 0.8685494860942885, + "success_rate.epoch.global": 0.9002436053593179, + "success_rate.window.env.logic": 0.6666666666666666, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.7222222222222222, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 1.0012820512820513, + "tokens_p.mean_in_band": 0.5306332236842105, + "tokens_rate.above_band": 0.9390048154093098, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.060995184590690206 + }, + { + "epoch": 1.8768640818065616, + "grad_norm": 148.91949073523304, + "learning_rate": 3.652233047208473e-07, + "loss": 0.2632, + "step": 8810, + "success_rate.epoch.env.abd": 0.9863387978142076, + "success_rate.epoch.env.agentgym:alfworld": 0.8646616541353384, + "success_rate.epoch.env.agentgym:sciworld": 0.9693486590038314, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9534883720930233, + "success_rate.epoch.env.ded": 0.949468085106383, + "success_rate.epoch.env.logic": 0.8962962962962963, + "success_rate.epoch.env.math": 0.9709277469003847, + "success_rate.epoch.env.sat": 0.16326530612244897, + "success_rate.epoch.env.science": 0.8395254448954106, + "success_rate.epoch.env.webshop": 0.9787234042553191, + "success_rate.epoch.env_macro_mean": 0.8685329044037114, + "success_rate.epoch.global": 0.9001216545012165, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 0.6666666666666666, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.8333333333333333, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9971497252747252, + "tokens_p.mean_in_band": 0.5689808238636364, + "tokens_rate.above_band": 0.9763948497854077, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.023605150214592276 + }, + { + "epoch": 1.877929271410311, + "grad_norm": 65.76655117597862, + "learning_rate": 3.6518944746779984e-07, + "loss": 0.2495, + "step": 8815, + "success_rate.epoch.env.abd": 0.9863387978142076, + "success_rate.epoch.env.agentgym:alfworld": 0.8646616541353384, + "success_rate.epoch.env.agentgym:sciworld": 0.9693486590038314, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9545454545454546, + "success_rate.epoch.env.ded": 0.9496021220159151, + "success_rate.epoch.env.logic": 0.8963815789473685, + "success_rate.epoch.env.math": 0.9709773794280837, + "success_rate.epoch.env.sat": 0.16326530612244897, + "success_rate.epoch.env.science": 0.8393636930754834, + "success_rate.epoch.env.webshop": 0.9787234042553191, + "success_rate.epoch.env_macro_mean": 0.8686387482874213, + "success_rate.epoch.global": 0.9001215066828676, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9333333333333333, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9975071225071225, + "tokens_p.mean_in_band": 0.5050223214285714, + "tokens_rate.above_band": 0.9804469273743017, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.019553072625698324 + }, + { + "epoch": 1.8789944610140605, + "grad_norm": 86.95907042611266, + "learning_rate": 3.651555864327204e-07, + "loss": 0.1606, + "step": 8820, + "success_rate.epoch.env.abd": 0.9863387978142076, + "success_rate.epoch.env.agentgym:alfworld": 0.8646616541353384, + "success_rate.epoch.env.agentgym:sciworld": 0.9693486590038314, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9545454545454546, + "success_rate.epoch.env.ded": 0.9496021220159151, + "success_rate.epoch.env.logic": 0.896636587366694, + "success_rate.epoch.env.math": 0.9710144927536232, + "success_rate.epoch.env.sat": 0.16326530612244897, + "success_rate.epoch.env.science": 0.8392523364485981, + "success_rate.epoch.env.webshop": 0.9787234042553191, + "success_rate.epoch.env_macro_mean": 0.8686551814799649, + "success_rate.epoch.global": 0.9001213592233009, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9885752688172043, + "tokens_p.mean_in_band": 0.37109375, + "tokens_rate.above_band": 0.9789473684210527, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.021052631578947368 + }, + { + "epoch": 1.88005965061781, + "grad_norm": 62.78270060399613, + "learning_rate": 3.6512172164028663e-07, + "loss": 0.2073, + "step": 8825, + "success_rate.epoch.env.abd": 0.9864130434782609, + "success_rate.epoch.env.agentgym:alfworld": 0.8646616541353384, + "success_rate.epoch.env.agentgym:sciworld": 0.9693486590038314, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9545454545454546, + "success_rate.epoch.env.ded": 0.9496021220159151, + "success_rate.epoch.env.logic": 0.8967213114754098, + "success_rate.epoch.env.math": 0.9710515112813963, + "success_rate.epoch.env.sat": 0.16326530612244897, + "success_rate.epoch.env.science": 0.8394523957685127, + "success_rate.epoch.env.webshop": 0.9787234042553191, + "success_rate.epoch.env_macro_mean": 0.8686911858090972, + "success_rate.epoch.global": 0.9002424242424243, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9965490797546013, + "tokens_p.mean_in_band": 0.79296875, + "tokens_rate.above_band": 0.9760479041916168, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.023952095808383235 + }, + { + "epoch": 1.8811248402215595, + "grad_norm": 276.27209638409545, + "learning_rate": 3.6508785311517884e-07, + "loss": 0.4515, + "step": 8830, + "success_rate.epoch.env.abd": 0.986449864498645, + "success_rate.epoch.env.agentgym:alfworld": 0.8646616541353384, + "success_rate.epoch.env.agentgym:sciworld": 0.9693486590038314, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9545454545454546, + "success_rate.epoch.env.ded": 0.9496021220159151, + "success_rate.epoch.env.logic": 0.8967213114754098, + "success_rate.epoch.env.math": 0.971063829787234, + "success_rate.epoch.env.sat": 0.16326530612244897, + "success_rate.epoch.env.science": 0.8386095592799503, + "success_rate.epoch.env.webshop": 0.9787234042553191, + "success_rate.epoch.env_macro_mean": 0.8686190315397936, + "success_rate.epoch.global": 0.8998789346246974, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.8333333333333334, + "success_rate.window.global": 0.6, + "tokens_p.mean_above_band": 0.990234375, + "tokens_p.mean_in_band": 0.5220947265625, + "tokens_rate.above_band": 0.8888888888888888, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.1111111111111111 + }, + { + "epoch": 1.882190029825309, + "grad_norm": 108.39685236365882, + "learning_rate": 3.6505398088208035e-07, + "loss": 0.2833, + "step": 8835, + "success_rate.epoch.env.abd": 0.986449864498645, + "success_rate.epoch.env.agentgym:alfworld": 0.8646616541353384, + "success_rate.epoch.env.agentgym:sciworld": 0.9693486590038314, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9545454545454546, + "success_rate.epoch.env.ded": 0.9496021220159151, + "success_rate.epoch.env.logic": 0.8968903436988543, + "success_rate.epoch.env.math": 0.9711007224819379, + "success_rate.epoch.env.sat": 0.16326530612244897, + "success_rate.epoch.env.science": 0.8388596219398823, + "success_rate.epoch.env.webshop": 0.9787234042553191, + "success_rate.epoch.env_macro_mean": 0.8686604849559827, + "success_rate.epoch.global": 0.9, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9927325581395349, + "tokens_p.mean_in_band": 0.8736979166666666, + "tokens_rate.above_band": 0.9662921348314607, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.033707865168539325 + }, + { + "epoch": 1.8832552194290584, + "grad_norm": 207.93342982331916, + "learning_rate": 3.6502010496567693e-07, + "loss": 0.2613, + "step": 8840, + "success_rate.epoch.env.abd": 0.9864864864864865, + "success_rate.epoch.env.agentgym:alfworld": 0.8646616541353384, + "success_rate.epoch.env.agentgym:sciworld": 0.9693486590038314, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9545454545454546, + "success_rate.epoch.env.ded": 0.9497354497354498, + "success_rate.epoch.env.logic": 0.8969746524938675, + "success_rate.epoch.env.math": 0.9711252653927813, + "success_rate.epoch.env.sat": 0.16326530612244897, + "success_rate.epoch.env.science": 0.8391089108910891, + "success_rate.epoch.env.webshop": 0.9787234042553191, + "success_rate.epoch.env_macro_mean": 0.8687084931709318, + "success_rate.epoch.global": 0.9001207729468599, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.997749162479062, + "tokens_p.mean_in_band": 0.87890625, + "tokens_rate.above_band": 0.9983277591973244, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0016722408026755853 + }, + { + "epoch": 1.8843204090328078, + "grad_norm": 59.02728698427287, + "learning_rate": 3.6498622539065705e-07, + "loss": 0.1715, + "step": 8845, + "success_rate.epoch.env.abd": 0.9864864864864865, + "success_rate.epoch.env.agentgym:alfworld": 0.8646616541353384, + "success_rate.epoch.env.agentgym:sciworld": 0.9694656488549618, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9545454545454546, + "success_rate.epoch.env.ded": 0.9498680738786279, + "success_rate.epoch.env.logic": 0.8969746524938675, + "success_rate.epoch.env.math": 0.9711742263671047, + "success_rate.epoch.env.sat": 0.16326530612244897, + "success_rate.epoch.env.science": 0.8389987639060569, + "success_rate.epoch.env.webshop": 0.9787234042553191, + "success_rate.epoch.env_macro_mean": 0.8687256229876226, + "success_rate.epoch.global": 0.9001206272617611, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9375, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9993792808219178, + "tokens_p.mean_in_band": 0.5442708333333334, + "tokens_rate.above_band": 0.9918478260869565, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.008152173913043478 + }, + { + "epoch": 1.8853855986365573, + "grad_norm": 288.46010219787297, + "learning_rate": 3.6495234218171193e-07, + "loss": 0.3287, + "step": 8850, + "success_rate.epoch.env.abd": 0.9865591397849462, + "success_rate.epoch.env.agentgym:alfworld": 0.8646616541353384, + "success_rate.epoch.env.agentgym:sciworld": 0.9695817490494296, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9545454545454546, + "success_rate.epoch.env.ded": 0.9498680738786279, + "success_rate.epoch.env.logic": 0.8969746524938675, + "success_rate.epoch.env.math": 0.9711864406779661, + "success_rate.epoch.env.sat": 0.16326530612244897, + "success_rate.epoch.env.science": 0.8389882788402221, + "success_rate.epoch.env.webshop": 0.9787234042553191, + "success_rate.epoch.env_macro_mean": 0.8687429396001639, + "success_rate.epoch.global": 0.9001204819277109, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env_macro_mean": 0.9583333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9975703753351206, + "tokens_p.mean_in_band": 0.5817057291666666, + "tokens_rate.above_band": 0.9688311688311688, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03116883116883117 + }, + { + "epoch": 1.8864507882403068, + "grad_norm": 131.39455046498674, + "learning_rate": 3.6491845536353545e-07, + "loss": 0.375, + "step": 8855, + "success_rate.epoch.env.abd": 0.9865591397849462, + "success_rate.epoch.env.agentgym:alfworld": 0.8646616541353384, + "success_rate.epoch.env.agentgym:sciworld": 0.9695817490494296, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9545454545454546, + "success_rate.epoch.env.ded": 0.9498680738786279, + "success_rate.epoch.env.logic": 0.8964110929853181, + "success_rate.epoch.env.math": 0.9712473572938689, + "success_rate.epoch.env.sat": 0.16326530612244897, + "success_rate.epoch.env.science": 0.8390875462392109, + "success_rate.epoch.env.webshop": 0.9787234042553191, + "success_rate.epoch.env_macro_mean": 0.8687062691007404, + "success_rate.epoch.global": 0.9001203369434416, + "success_rate.window.env.logic": 0.6666666666666666, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8888888888888888, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.992722602739726, + "tokens_p.mean_in_band": 0.6607142857142857, + "tokens_rate.above_band": 0.954248366013072, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0457516339869281 + }, + { + "epoch": 1.8875159778440562, + "grad_norm": 33.0871024813285, + "learning_rate": 3.64884564960824e-07, + "loss": 0.3077, + "step": 8860, + "success_rate.epoch.env.abd": 0.9865951742627346, + "success_rate.epoch.env.agentgym:alfworld": 0.8646616541353384, + "success_rate.epoch.env.agentgym:sciworld": 0.9695817490494296, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9545454545454546, + "success_rate.epoch.env.ded": 0.9498680738786279, + "success_rate.epoch.env.logic": 0.8958502847843776, + "success_rate.epoch.env.math": 0.9712837837837838, + "success_rate.epoch.env.sat": 0.16326530612244897, + "success_rate.epoch.env.science": 0.8392362180474284, + "success_rate.epoch.env.webshop": 0.9787234042553191, + "success_rate.epoch.env_macro_mean": 0.8686753895166478, + "success_rate.epoch.global": 0.9001201923076924, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 0.6666666666666666, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9950181159420289, + "tokens_p.mean_in_band": 0.66064453125, + "tokens_rate.above_band": 0.9452054794520548, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0547945205479452 + }, + { + "epoch": 1.8885811674478057, + "grad_norm": 73.15717917151254, + "learning_rate": 3.648506709982767e-07, + "loss": 0.2875, + "step": 8865, + "success_rate.epoch.env.abd": 0.9866666666666667, + "success_rate.epoch.env.agentgym:alfworld": 0.8646616541353384, + "success_rate.epoch.env.agentgym:sciworld": 0.9695817490494296, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9545454545454546, + "success_rate.epoch.env.ded": 0.9498680738786279, + "success_rate.epoch.env.logic": 0.8960194963444355, + "success_rate.epoch.env.math": 0.970873786407767, + "success_rate.epoch.env.sat": 0.16326530612244897, + "success_rate.epoch.env.science": 0.8391758917589176, + "success_rate.epoch.env.webshop": 0.9787234042553191, + "success_rate.epoch.env_macro_mean": 0.8686545149984171, + "success_rate.epoch.global": 0.9, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.7, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.994745575221239, + "tokens_p.mean_in_band": 0.6860795454545454, + "tokens_rate.above_band": 0.9112903225806451, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.08870967741935484 + }, + { + "epoch": 1.8896463570515551, + "grad_norm": 112.30454056775271, + "learning_rate": 3.6481677350059525e-07, + "loss": 0.4443, + "step": 8870, + "success_rate.epoch.env.abd": 0.9866666666666667, + "success_rate.epoch.env.agentgym:alfworld": 0.8646616541353384, + "success_rate.epoch.env.agentgym:sciworld": 0.9695817490494296, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9545454545454546, + "success_rate.epoch.env.ded": 0.95, + "success_rate.epoch.env.logic": 0.8960194963444355, + "success_rate.epoch.env.math": 0.9709228824273072, + "success_rate.epoch.env.sat": 0.16326530612244897, + "success_rate.epoch.env.science": 0.8394227817009517, + "success_rate.epoch.env.webshop": 0.9787234042553191, + "success_rate.epoch.env_macro_mean": 0.8686934160968668, + "success_rate.epoch.global": 0.9001199040767386, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.995, + "tokens_p.mean_in_band": 0.80078125, + "tokens_rate.above_band": 0.9803921568627451, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0196078431372549 + }, + { + "epoch": 1.8907115466553046, + "grad_norm": 108.87575965681468, + "learning_rate": 3.647828724924839e-07, + "loss": 0.1819, + "step": 8875, + "success_rate.epoch.env.abd": 0.9867021276595744, + "success_rate.epoch.env.agentgym:alfworld": 0.8651685393258427, + "success_rate.epoch.env.agentgym:sciworld": 0.9696969696969697, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9545454545454546, + "success_rate.epoch.env.ded": 0.95, + "success_rate.epoch.env.logic": 0.8960194963444355, + "success_rate.epoch.env.math": 0.9709595959595959, + "success_rate.epoch.env.sat": 0.16326530612244897, + "success_rate.epoch.env.science": 0.8396197485433916, + "success_rate.epoch.env.webshop": 0.9787234042553191, + "success_rate.epoch.env_macro_mean": 0.8687744385701105, + "success_rate.epoch.global": 0.9002395209580838, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9980755131964809, + "tokens_p.mean_in_band": 0.87890625, + "tokens_rate.above_band": 0.9970760233918129, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0029239766081871343 + }, + { + "epoch": 1.891776736259054, + "grad_norm": 27.242299934578543, + "learning_rate": 3.6474896799864945e-07, + "loss": 0.2229, + "step": 8880, + "success_rate.epoch.env.abd": 0.9867021276595744, + "success_rate.epoch.env.agentgym:alfworld": 0.8651685393258427, + "success_rate.epoch.env.agentgym:sciworld": 0.9696969696969697, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9545454545454546, + "success_rate.epoch.env.ded": 0.9501312335958005, + "success_rate.epoch.env.logic": 0.8960194963444355, + "success_rate.epoch.env.math": 0.9710084033613445, + "success_rate.epoch.env.sat": 0.16326530612244897, + "success_rate.epoch.env.science": 0.8392529087568892, + "success_rate.epoch.env.webshop": 0.9787234042553191, + "success_rate.epoch.env_macro_mean": 0.8687574568620238, + "success_rate.epoch.global": 0.9001196172248804, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6, + "success_rate.window.env_macro_mean": 0.8666666666666667, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9965128755364807, + "tokens_p.mean_in_band": 0.5091145833333334, + "tokens_rate.above_band": 0.9395161290322581, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.06048387096774194 + }, + { + "epoch": 1.8928419258628035, + "grad_norm": 75.14965641933597, + "learning_rate": 3.647150600438012e-07, + "loss": 0.3553, + "step": 8885, + "success_rate.epoch.env.abd": 0.986737400530504, + "success_rate.epoch.env.agentgym:alfworld": 0.8651685393258427, + "success_rate.epoch.env.agentgym:sciworld": 0.9696969696969697, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9545454545454546, + "success_rate.epoch.env.ded": 0.9502617801047121, + "success_rate.epoch.env.logic": 0.8961038961038961, + "success_rate.epoch.env.math": 0.9710205795884083, + "success_rate.epoch.env.sat": 0.16326530612244897, + "success_rate.epoch.env.science": 0.8391929073677774, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.868816152897351, + "success_rate.epoch.global": 0.9001194743130227, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 0.9666666666666667, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9977494855967078, + "tokens_p.mean_in_band": 0.640625, + "tokens_rate.above_band": 0.9858012170385395, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.014198782961460446 + }, + { + "epoch": 1.893907115466553, + "grad_norm": 55.776692901584276, + "learning_rate": 3.6468114865265116e-07, + "loss": 0.3356, + "step": 8890, + "success_rate.epoch.env.abd": 0.9868073878627969, + "success_rate.epoch.env.agentgym:alfworld": 0.8651685393258427, + "success_rate.epoch.env.agentgym:sciworld": 0.9696969696969697, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9545454545454546, + "success_rate.epoch.env.ded": 0.9502617801047121, + "success_rate.epoch.env.logic": 0.8962722852512156, + "success_rate.epoch.env.math": 0.9710570469798657, + "success_rate.epoch.env.sat": 0.16326530612244897, + "success_rate.epoch.env.science": 0.8393402565668907, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8688545340855497, + "success_rate.epoch.global": 0.9002386634844869, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9958333333333333, + "tokens_p.mean_in_band": 0.6627604166666666, + "tokens_rate.above_band": 0.9803921568627451, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0196078431372549 + }, + { + "epoch": 1.8949723050703025, + "grad_norm": 296.52137807605504, + "learning_rate": 3.646472338499136e-07, + "loss": 0.1957, + "step": 8895, + "success_rate.epoch.env.abd": 0.9868073878627969, + "success_rate.epoch.env.agentgym:alfworld": 0.8656716417910447, + "success_rate.epoch.env.agentgym:sciworld": 0.9696969696969697, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9545454545454546, + "success_rate.epoch.env.ded": 0.9502617801047121, + "success_rate.epoch.env.logic": 0.8963562753036437, + "success_rate.epoch.env.math": 0.971081307627829, + "success_rate.epoch.env.sat": 0.16326530612244897, + "success_rate.epoch.env.science": 0.8396341463414634, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8689368288982919, + "success_rate.epoch.global": 0.900357568533969, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9977078239608802, + "tokens_p.mean_in_band": 0.78828125, + "tokens_rate.above_band": 0.9879227053140096, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.012077294685990338 + }, + { + "epoch": 1.8960374946740521, + "grad_norm": 100.64244478297722, + "learning_rate": 3.6461331566030537e-07, + "loss": 0.2256, + "step": 8900, + "success_rate.epoch.env.abd": 0.9868073878627969, + "success_rate.epoch.env.agentgym:alfworld": 0.8656716417910447, + "success_rate.epoch.env.agentgym:sciworld": 0.9696969696969697, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9545454545454546, + "success_rate.epoch.env.ded": 0.9503916449086162, + "success_rate.epoch.env.logic": 0.8964401294498382, + "success_rate.epoch.env.math": 0.9711297071129708, + "success_rate.epoch.env.sat": 0.16326530612244897, + "success_rate.epoch.env.science": 0.8398294762484775, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8689784151112242, + "success_rate.epoch.global": 0.9004761904761904, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9970529359430605, + "tokens_p.mean_in_band": 0.779296875, + "tokens_rate.above_band": 0.9964539007092199, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0035460992907801418 + }, + { + "epoch": 1.8971026842778014, + "grad_norm": 0.0, + "learning_rate": 3.6457939410854587e-07, + "loss": 0.1274, + "step": 8905, + "success_rate.epoch.env.abd": 0.9868073878627969, + "success_rate.epoch.env.agentgym:alfworld": 0.8666666666666667, + "success_rate.epoch.env.agentgym:sciworld": 0.9696969696969697, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9545454545454546, + "success_rate.epoch.env.ded": 0.9505208333333334, + "success_rate.epoch.env.logic": 0.8964401294498382, + "success_rate.epoch.env.math": 0.9711417816813049, + "success_rate.epoch.env.sat": 0.16326530612244897, + "success_rate.epoch.env.science": 0.8398176291793313, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8690806370020904, + "success_rate.epoch.global": 0.9004756242568371, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env_macro_mean": 0.9583333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9978402679830748, + "tokens_p.mean_in_band": 0.3984375, + "tokens_rate.above_band": 0.9957865168539326, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.004213483146067416 + }, + { + "epoch": 1.898167873881551, + "grad_norm": 87.1933198476706, + "learning_rate": 3.6454546921935686e-07, + "loss": 0.2482, + "step": 8910, + "success_rate.epoch.env.abd": 0.9868073878627969, + "success_rate.epoch.env.agentgym:alfworld": 0.8666666666666667, + "success_rate.epoch.env.agentgym:sciworld": 0.9696969696969697, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9545454545454546, + "success_rate.epoch.env.ded": 0.9505208333333334, + "success_rate.epoch.env.logic": 0.8964401294498382, + "success_rate.epoch.env.math": 0.9707846410684474, + "success_rate.epoch.env.sat": 0.16326530612244897, + "success_rate.epoch.env.science": 0.8397572078907436, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8690426768292315, + "success_rate.epoch.global": 0.9003562945368171, + "success_rate.window.env.math": 0.8, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.8, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.993421052631579, + "tokens_p.mean_in_band": 0.471435546875, + "tokens_rate.above_band": 0.9344262295081968, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.06557377049180328 + }, + { + "epoch": 1.8992330634853003, + "grad_norm": 67.01756454499545, + "learning_rate": 3.645115410174625e-07, + "loss": 0.2604, + "step": 8915, + "success_rate.epoch.env.abd": 0.9868421052631579, + "success_rate.epoch.env.agentgym:alfworld": 0.8671586715867159, + "success_rate.epoch.env.agentgym:sciworld": 0.9696969696969697, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9555555555555556, + "success_rate.epoch.env.ded": 0.9505208333333334, + "success_rate.epoch.env.logic": 0.8964401294498382, + "success_rate.epoch.env.math": 0.9708211754897874, + "success_rate.epoch.env.sat": 0.16, + "success_rate.epoch.env.science": 0.8399029714978775, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8689021145780077, + "success_rate.epoch.global": 0.900355871886121, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8333333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9984793187347932, + "tokens_p.mean_in_band": 0.7333333333333333, + "tokens_rate.above_band": 0.9647887323943662, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.035211267605633804 + }, + { + "epoch": 1.90029825308905, + "grad_norm": 124.8867859723598, + "learning_rate": 3.6447760952758945e-07, + "loss": 0.4629, + "step": 8920, + "success_rate.epoch.env.abd": 0.9868421052631579, + "success_rate.epoch.env.agentgym:alfworld": 0.8676470588235294, + "success_rate.epoch.env.agentgym:sciworld": 0.9696969696969697, + "success_rate.epoch.env.agentgym:textcraft": 0.9821428571428571, + "success_rate.epoch.env.babyai": 0.9555555555555556, + "success_rate.epoch.env.ded": 0.948051948051948, + "success_rate.epoch.env.logic": 0.8965238480194018, + "success_rate.epoch.env.math": 0.9708454810495627, + "success_rate.epoch.env.sat": 0.16, + "success_rate.epoch.env.science": 0.839794064203513, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8687515049521056, + "success_rate.epoch.global": 0.9002369668246446, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.ded": 0.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.7916666666666666, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9906776511397423, + "tokens_p.mean_in_band": 0.5905602229899497, + "tokens_rate.above_band": 0.8352649006622517, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.16473509933774835 + }, + { + "epoch": 1.9013634426927992, + "grad_norm": 534.2548537255205, + "learning_rate": 3.6444367477446683e-07, + "loss": 0.2376, + "step": 8925, + "success_rate.epoch.env.abd": 0.9868421052631579, + "success_rate.epoch.env.agentgym:alfworld": 0.8676470588235294, + "success_rate.epoch.env.agentgym:sciworld": 0.9699248120300752, + "success_rate.epoch.env.agentgym:textcraft": 0.9821428571428571, + "success_rate.epoch.env.babyai": 0.9555555555555556, + "success_rate.epoch.env.ded": 0.948051948051948, + "success_rate.epoch.env.logic": 0.8966908797417272, + "success_rate.epoch.env.math": 0.970869746150645, + "success_rate.epoch.env.sat": 0.16, + "success_rate.epoch.env.science": 0.8396854204476709, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.868779731806712, + "success_rate.epoch.global": 0.9002366863905326, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9375, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9980630165289256, + "tokens_p.mean_in_band": 0.64453125, + "tokens_rate.above_band": 0.9603174603174603, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03968253968253968 + }, + { + "epoch": 1.902428632296549, + "grad_norm": 138.79055943098803, + "learning_rate": 3.6440973678282596e-07, + "loss": 0.2636, + "step": 8930, + "success_rate.epoch.env.abd": 0.9868766404199475, + "success_rate.epoch.env.agentgym:alfworld": 0.8676470588235294, + "success_rate.epoch.env.agentgym:sciworld": 0.9701492537313433, + "success_rate.epoch.env.agentgym:textcraft": 0.9821428571428571, + "success_rate.epoch.env.babyai": 0.9555555555555556, + "success_rate.epoch.env.ded": 0.948051948051948, + "success_rate.epoch.env.logic": 0.8968573730862208, + "success_rate.epoch.env.math": 0.9708939708939709, + "success_rate.epoch.env.sat": 0.1568627450980392, + "success_rate.epoch.env.science": 0.839782345828295, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8685442195725792, + "success_rate.epoch.global": 0.9002364066193853, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8333333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9982461734693877, + "tokens_p.mean_in_band": 0.6927083333333334, + "tokens_rate.above_band": 0.9423076923076923, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.057692307692307696 + }, + { + "epoch": 1.9034938219002981, + "grad_norm": 123.68342906320828, + "learning_rate": 3.643757955774006e-07, + "loss": 0.1959, + "step": 8935, + "success_rate.epoch.env.abd": 0.9868766404199475, + "success_rate.epoch.env.agentgym:alfworld": 0.8676470588235294, + "success_rate.epoch.env.agentgym:sciworld": 0.9701492537313433, + "success_rate.epoch.env.agentgym:textcraft": 0.9821428571428571, + "success_rate.epoch.env.babyai": 0.9555555555555556, + "success_rate.epoch.env.ded": 0.9481865284974094, + "success_rate.epoch.env.logic": 0.8970233306516492, + "success_rate.epoch.env.math": 0.9709302325581395, + "success_rate.epoch.env.sat": 0.1568627450980392, + "success_rate.epoch.env.science": 0.8396739130434783, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8685649801989649, + "success_rate.epoch.global": 0.9002361275088547, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9375, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9950564971751412, + "tokens_p.mean_in_band": 0.7360026041666666, + "tokens_rate.above_band": 0.9365079365079365, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.06349206349206349 + }, + { + "epoch": 1.9045590115040478, + "grad_norm": 218.0056230192956, + "learning_rate": 3.643418511829268e-07, + "loss": 0.3123, + "step": 8940, + "success_rate.epoch.env.abd": 0.9869109947643979, + "success_rate.epoch.env.agentgym:alfworld": 0.8676470588235294, + "success_rate.epoch.env.agentgym:sciworld": 0.9702602230483272, + "success_rate.epoch.env.agentgym:textcraft": 0.9821428571428571, + "success_rate.epoch.env.babyai": 0.9555555555555556, + "success_rate.epoch.env.ded": 0.9457364341085271, + "success_rate.epoch.env.logic": 0.8971887550200803, + "success_rate.epoch.env.math": 0.970954356846473, + "success_rate.epoch.env.sat": 0.1568627450980392, + "success_rate.epoch.env.science": 0.8395173453996984, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8683584538612865, + "success_rate.epoch.global": 0.9001179245283019, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 0.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.7777777777777777, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9955074186113512, + "tokens_p.mean_below_band": 1.2759119272232056e-07, + "tokens_p.mean_in_band": 0.5483903556034483, + "tokens_rate.above_band": 0.8816357632715266, + "tokens_rate.below_band": 0.000508001016002032, + "tokens_rate.in_band": 0.11785623571247142 + }, + { + "epoch": 1.905624201107797, + "grad_norm": 12.2928777285083, + "learning_rate": 3.643079036241432e-07, + "loss": 0.6015, + "step": 8945, + "success_rate.epoch.env.abd": 0.9869451697127938, + "success_rate.epoch.env.agentgym:alfworld": 0.8676470588235294, + "success_rate.epoch.env.agentgym:sciworld": 0.9702602230483272, + "success_rate.epoch.env.agentgym:textcraft": 0.9821428571428571, + "success_rate.epoch.env.babyai": 0.9555555555555556, + "success_rate.epoch.env.ded": 0.9458762886597938, + "success_rate.epoch.env.logic": 0.8972712680577849, + "success_rate.epoch.env.math": 0.9710024855012428, + "success_rate.epoch.env.sat": 0.1568627450980392, + "success_rate.epoch.env.science": 0.8396624472573839, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8683993423203611, + "success_rate.epoch.global": 0.9002355712603063, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 1.0012953367875648, + "tokens_p.mean_in_band": 0.8984375, + "tokens_rate.above_band": 0.9994821336095288, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0005178663904712584 + }, + { + "epoch": 1.9066893907115467, + "grad_norm": 38.41411912337251, + "learning_rate": 3.6427395292579024e-07, + "loss": 0.1982, + "step": 8950, + "success_rate.epoch.env.abd": 0.9869791666666666, + "success_rate.epoch.env.agentgym:alfworld": 0.8676470588235294, + "success_rate.epoch.env.agentgym:sciworld": 0.9702602230483272, + "success_rate.epoch.env.agentgym:textcraft": 0.9821428571428571, + "success_rate.epoch.env.babyai": 0.9555555555555556, + "success_rate.epoch.env.ded": 0.9460154241645244, + "success_rate.epoch.env.logic": 0.8973536487570168, + "success_rate.epoch.env.math": 0.9710504549214226, + "success_rate.epoch.env.sat": 0.1568627450980392, + "success_rate.epoch.env.science": 0.8395061728395061, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8684127248803737, + "success_rate.epoch.global": 0.900235294117647, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9980005924170616, + "tokens_p.mean_in_band": 0.62890625, + "tokens_rate.above_band": 0.9813953488372092, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.018604651162790697 + }, + { + "epoch": 1.907754580315296, + "grad_norm": 51.110429199823585, + "learning_rate": 3.6423999911261116e-07, + "loss": 0.3135, + "step": 8955, + "success_rate.epoch.env.abd": 0.9870466321243523, + "success_rate.epoch.env.agentgym:alfworld": 0.8676470588235294, + "success_rate.epoch.env.agentgym:sciworld": 0.9702602230483272, + "success_rate.epoch.env.agentgym:textcraft": 0.9821428571428571, + "success_rate.epoch.env.babyai": 0.9555555555555556, + "success_rate.epoch.env.ded": 0.9460154241645244, + "success_rate.epoch.env.logic": 0.8976, + "success_rate.epoch.env.math": 0.9710863279636514, + "success_rate.epoch.env.sat": 0.1568627450980392, + "success_rate.epoch.env.science": 0.8396027685826061, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8684532962881917, + "success_rate.epoch.global": 0.9003525264394829, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.995361328125, + "tokens_p.mean_in_band": 0.5546875, + "tokens_rate.above_band": 0.9922480620155039, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.007751937984496124 + }, + { + "epoch": 1.9088197699190457, + "grad_norm": 99.64006925845497, + "learning_rate": 3.642060422093512e-07, + "loss": 0.1851, + "step": 8960, + "success_rate.epoch.env.abd": 0.9870801033591732, + "success_rate.epoch.env.agentgym:alfworld": 0.8681318681318682, + "success_rate.epoch.env.agentgym:sciworld": 0.9703703703703703, + "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, + "success_rate.epoch.env.babyai": 0.9555555555555556, + "success_rate.epoch.env.ded": 0.9460154241645244, + "success_rate.epoch.env.logic": 0.8976, + "success_rate.epoch.env.math": 0.9711101939744119, + "success_rate.epoch.env.sat": 0.1568627450980392, + "success_rate.epoch.env.science": 0.8397956116621581, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8685586072121495, + "success_rate.epoch.global": 0.9004694835680751, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9984038978494624, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 1.909884959522795, + "grad_norm": 42.651445610609706, + "learning_rate": 3.6417208224075794e-07, + "loss": 0.1813, + "step": 8965, + "success_rate.epoch.env.abd": 0.9870801033591732, + "success_rate.epoch.env.agentgym:alfworld": 0.8686131386861314, + "success_rate.epoch.env.agentgym:sciworld": 0.9703703703703703, + "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, + "success_rate.epoch.env.babyai": 0.9555555555555556, + "success_rate.epoch.env.ded": 0.9462915601023018, + "success_rate.epoch.env.logic": 0.8976818545163869, + "success_rate.epoch.env.math": 0.9707216494845361, + "success_rate.epoch.env.sat": 0.1568627450980392, + "success_rate.epoch.env.science": 0.8399879915941159, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8686170705258323, + "success_rate.epoch.global": 0.9004689331770223, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.5, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9984406822810591, + "tokens_p.mean_in_band": 0.6875, + "tokens_rate.above_band": 0.9939271255060729, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.006072874493927126 + }, + { + "epoch": 1.9109501491265446, + "grad_norm": 68.41679309492326, + "learning_rate": 3.641381192315811e-07, + "loss": 0.5337, + "step": 8970, + "success_rate.epoch.env.abd": 0.9870801033591732, + "success_rate.epoch.env.agentgym:alfworld": 0.8686131386861314, + "success_rate.epoch.env.agentgym:sciworld": 0.9703703703703703, + "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, + "success_rate.epoch.env.babyai": 0.9565217391304348, + "success_rate.epoch.env.ded": 0.9462915601023018, + "success_rate.epoch.env.logic": 0.8977635782747604, + "success_rate.epoch.env.math": 0.9707457766790276, + "success_rate.epoch.env.sat": 0.1568627450980392, + "success_rate.epoch.env.science": 0.8399760263709919, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8687134404626158, + "success_rate.epoch.global": 0.9004683840749415, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env_macro_mean": 0.9583333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9923780487804879, + "tokens_p.mean_in_band": 0.54765625, + "tokens_rate.above_band": 0.8913043478260869, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.10869565217391304 + }, + { + "epoch": 1.9120153387302938, + "grad_norm": 102.54793159479081, + "learning_rate": 3.6410415320657266e-07, + "loss": 0.3892, + "step": 8975, + "success_rate.epoch.env.abd": 0.9870801033591732, + "success_rate.epoch.env.agentgym:alfworld": 0.8686131386861314, + "success_rate.epoch.env.agentgym:sciworld": 0.9703703703703703, + "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, + "success_rate.epoch.env.babyai": 0.9565217391304348, + "success_rate.epoch.env.ded": 0.9462915601023018, + "success_rate.epoch.env.logic": 0.897047086991221, + "success_rate.epoch.env.math": 0.9703947368421053, + "success_rate.epoch.env.sat": 0.1568627450980392, + "success_rate.epoch.env.science": 0.8398683029033224, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8686065991364221, + "success_rate.epoch.global": 0.9002339181286549, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 0.8, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.5166666666666667, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9908002336448598, + "tokens_p.mean_in_band": 0.623046875, + "tokens_rate.above_band": 0.9224137931034483, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.07758620689655173 + }, + { + "epoch": 1.9130805283340435, + "grad_norm": 79.87936609934741, + "learning_rate": 3.640701841904869e-07, + "loss": 0.2685, + "step": 8980, + "success_rate.epoch.env.abd": 0.9870801033591732, + "success_rate.epoch.env.agentgym:alfworld": 0.8686131386861314, + "success_rate.epoch.env.agentgym:sciworld": 0.9703703703703703, + "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, + "success_rate.epoch.env.babyai": 0.9565217391304348, + "success_rate.epoch.env.ded": 0.9464285714285714, + "success_rate.epoch.env.logic": 0.8971291866028708, + "success_rate.epoch.env.math": 0.9704433497536946, + "success_rate.epoch.env.sat": 0.1568627450980392, + "success_rate.epoch.env.science": 0.8397608370702541, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8686211680470076, + "success_rate.epoch.global": 0.900233644859813, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9375, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9966482649842271, + "tokens_p.mean_in_band": 0.7274305555555556, + "tokens_rate.above_band": 0.9723926380368099, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.027607361963190184 + }, + { + "epoch": 1.914145717937793, + "grad_norm": 65.72260002128823, + "learning_rate": 3.640362122080802e-07, + "loss": 0.301, + "step": 8985, + "success_rate.epoch.env.abd": 0.9870801033591732, + "success_rate.epoch.env.agentgym:alfworld": 0.8686131386861314, + "success_rate.epoch.env.agentgym:sciworld": 0.9704797047970479, + "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, + "success_rate.epoch.env.babyai": 0.9565217391304348, + "success_rate.epoch.env.ded": 0.9464285714285714, + "success_rate.epoch.env.logic": 0.896414342629482, + "success_rate.epoch.env.math": 0.970467596390484, + "success_rate.epoch.env.sat": 0.1568627450980392, + "success_rate.epoch.env.science": 0.8394509101760669, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8685401507920886, + "success_rate.epoch.global": 0.9, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.6666666666666666, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9923245614035088, + "tokens_p.mean_in_band": 0.6875, + "tokens_rate.above_band": 0.9144385026737968, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0855614973262032 + }, + { + "epoch": 1.9152109075415424, + "grad_norm": 80.78397466931366, + "learning_rate": 3.6400223728411094e-07, + "loss": 0.1647, + "step": 8990, + "success_rate.epoch.env.abd": 0.9870801033591732, + "success_rate.epoch.env.agentgym:alfworld": 0.8690909090909091, + "success_rate.epoch.env.agentgym:sciworld": 0.9704797047970479, + "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, + "success_rate.epoch.env.babyai": 0.9565217391304348, + "success_rate.epoch.env.ded": 0.9464285714285714, + "success_rate.epoch.env.logic": 0.8966613672496025, + "success_rate.epoch.env.math": 0.9705159705159705, + "success_rate.epoch.env.sat": 0.1568627450980392, + "success_rate.epoch.env.science": 0.8395466746197435, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8686191447551852, + "success_rate.epoch.global": 0.9001165501165501, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9966902709359606, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 1.916276097145292, + "grad_norm": 63.13953004143602, + "learning_rate": 3.6396825944334e-07, + "loss": 0.2674, + "step": 8995, + "success_rate.epoch.env.abd": 0.9870801033591732, + "success_rate.epoch.env.agentgym:alfworld": 0.8690909090909091, + "success_rate.epoch.env.agentgym:sciworld": 0.9704797047970479, + "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, + "success_rate.epoch.env.babyai": 0.9565217391304348, + "success_rate.epoch.env.ded": 0.9464285714285714, + "success_rate.epoch.env.logic": 0.8968253968253969, + "success_rate.epoch.env.math": 0.9705641864268193, + "success_rate.epoch.env.sat": 0.1568627450980392, + "success_rate.epoch.env.science": 0.8397378611855824, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8686558203963198, + "success_rate.epoch.global": 0.9002328288707799, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9934413580246914, + "tokens_p.mean_in_band": 0.809375, + "tokens_rate.above_band": 0.9418604651162791, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.05813953488372093 + }, + { + "epoch": 1.9173412867490414, + "grad_norm": 41.61706506620451, + "learning_rate": 3.6393427871053005e-07, + "loss": 0.3223, + "step": 9000, + "success_rate.epoch.env.abd": 0.9871134020618557, + "success_rate.epoch.env.agentgym:alfworld": 0.8690909090909091, + "success_rate.epoch.env.agentgym:sciworld": 0.9706959706959707, + "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, + "success_rate.epoch.env.babyai": 0.9565217391304348, + "success_rate.epoch.env.ded": 0.9464285714285714, + "success_rate.epoch.env.logic": 0.8968253968253969, + "success_rate.epoch.env.math": 0.9706242350061199, + "success_rate.epoch.env.sat": 0.1568627450980392, + "success_rate.epoch.env.science": 0.8398332837153915, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8686926418245666, + "success_rate.epoch.global": 0.9003488372093024, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9989329268292683, + "tokens_p.mean_in_band": 0.87109375, + "tokens_rate.above_band": 0.9951456310679612, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0048543689320388345 + }, + { + "epoch": 1.9184064763527908, + "grad_norm": 81.30143203275551, + "learning_rate": 3.6390029511044604e-07, + "loss": 0.4031, + "step": 9005, + "success_rate.epoch.env.abd": 0.9871134020618557, + "success_rate.epoch.env.agentgym:alfworld": 0.8690909090909091, + "success_rate.epoch.env.agentgym:sciworld": 0.9706959706959707, + "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, + "success_rate.epoch.env.babyai": 0.9574468085106383, + "success_rate.epoch.env.ded": 0.9440203562340967, + "success_rate.epoch.env.logic": 0.8962787015043547, + "success_rate.epoch.env.math": 0.9706242350061199, + "success_rate.epoch.env.sat": 0.1568627450980392, + "success_rate.epoch.env.science": 0.8397740784780023, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8685027285179573, + "success_rate.epoch.global": 0.9001161440185831, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.ded": 0.0, + "success_rate.window.env.logic": 0.6666666666666666, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.6166666666666667, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9950268058690744, + "tokens_p.mean_in_band": 0.7016447368421053, + "tokens_rate.above_band": 0.9031600407747197, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.09683995922528033 + }, + { + "epoch": 1.9194716659565403, + "grad_norm": 169.86596290553987, + "learning_rate": 3.63866308667855e-07, + "loss": 0.2962, + "step": 9010, + "success_rate.epoch.env.abd": 0.9871134020618557, + "success_rate.epoch.env.agentgym:alfworld": 0.8690909090909091, + "success_rate.epoch.env.agentgym:sciworld": 0.9706959706959707, + "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, + "success_rate.epoch.env.babyai": 0.9574468085106383, + "success_rate.epoch.env.ded": 0.9441624365482234, + "success_rate.epoch.env.logic": 0.8964426877470356, + "success_rate.epoch.env.math": 0.9706481858948227, + "success_rate.epoch.env.sat": 0.1568627450980392, + "success_rate.epoch.env.science": 0.8397150489759573, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8685273637855452, + "success_rate.epoch.global": 0.9001160092807424, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9974759615384615, + "tokens_p.mean_in_band": 0.5205078125, + "tokens_rate.above_band": 0.9701492537313433, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.029850746268656716 + }, + { + "epoch": 1.9205368555602897, + "grad_norm": 142.77595837046383, + "learning_rate": 3.6383231940752596e-07, + "loss": 0.4036, + "step": 9015, + "success_rate.epoch.env.abd": 0.987146529562982, + "success_rate.epoch.env.agentgym:alfworld": 0.8690909090909091, + "success_rate.epoch.env.agentgym:sciworld": 0.9706959706959707, + "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, + "success_rate.epoch.env.babyai": 0.9574468085106383, + "success_rate.epoch.env.ded": 0.9441624365482234, + "success_rate.epoch.env.logic": 0.8966876971608833, + "success_rate.epoch.env.math": 0.9706720977596741, + "success_rate.epoch.env.sat": 0.1568627450980392, + "success_rate.epoch.env.science": 0.8396086569819152, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8685451507660709, + "success_rate.epoch.global": 0.9001158748551564, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9375, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9954268292682927, + "tokens_p.mean_in_band": 0.6497395833333334, + "tokens_rate.above_band": 0.9447004608294931, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.055299539170506916 + }, + { + "epoch": 1.9216020451640392, + "grad_norm": 90.08525616667941, + "learning_rate": 3.637983273542301e-07, + "loss": 0.1618, + "step": 9020, + "success_rate.epoch.env.abd": 0.9872122762148338, + "success_rate.epoch.env.agentgym:alfworld": 0.8690909090909091, + "success_rate.epoch.env.agentgym:sciworld": 0.9709090909090909, + "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, + "success_rate.epoch.env.babyai": 0.9574468085106383, + "success_rate.epoch.env.ded": 0.9441624365482234, + "success_rate.epoch.env.logic": 0.8968503937007875, + "success_rate.epoch.env.math": 0.9706720977596741, + "success_rate.epoch.env.sat": 0.1568627450980392, + "success_rate.epoch.env.science": 0.8397986378442405, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8686025638812709, + "success_rate.epoch.global": 0.9002314814814815, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9988095238095238, + "tokens_p.mean_in_band": 0.8583984375, + "tokens_rate.above_band": 0.9924385633270322, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.007561436672967864 + }, + { + "epoch": 1.9226672347677887, + "grad_norm": 168.30605562541513, + "learning_rate": 3.637643325327406e-07, + "loss": 0.2117, + "step": 9025, + "success_rate.epoch.env.abd": 0.9872122762148338, + "success_rate.epoch.env.agentgym:alfworld": 0.8690909090909091, + "success_rate.epoch.env.agentgym:sciworld": 0.9709090909090909, + "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, + "success_rate.epoch.env.babyai": 0.9574468085106383, + "success_rate.epoch.env.ded": 0.9441624365482234, + "success_rate.epoch.env.logic": 0.8969315499606609, + "success_rate.epoch.env.math": 0.9707317073170731, + "success_rate.epoch.env.sat": 0.1568627450980392, + "success_rate.epoch.env.science": 0.8396923986986099, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8686057026696019, + "success_rate.epoch.global": 0.9002312138728323, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9909722222222223, + "tokens_p.mean_in_band": 0.5520833333333334, + "tokens_rate.above_band": 0.8571428571428571, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.14285714285714285 + }, + { + "epoch": 1.9237324243715381, + "grad_norm": 90.51657523408925, + "learning_rate": 3.6373033496783253e-07, + "loss": 0.2778, + "step": 9030, + "success_rate.epoch.env.abd": 0.9872448979591837, + "success_rate.epoch.env.agentgym:alfworld": 0.8695652173913043, + "success_rate.epoch.env.agentgym:sciworld": 0.9709090909090909, + "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, + "success_rate.epoch.env.babyai": 0.9574468085106383, + "success_rate.epoch.env.ded": 0.9443037974683545, + "success_rate.epoch.env.logic": 0.8969315499606609, + "success_rate.epoch.env.math": 0.9707436001625356, + "success_rate.epoch.env.sat": 0.1568627450980392, + "success_rate.epoch.env.science": 0.8399763802775317, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8686915358868077, + "success_rate.epoch.global": 0.9003464203233257, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9980491329479769, + "tokens_p.mean_in_band": 0.73193359375, + "tokens_rate.above_band": 0.9908361970217641, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.009163802978235968 + }, + { + "epoch": 1.9247976139752876, + "grad_norm": 50.94793959134441, + "learning_rate": 3.636963346842832e-07, + "loss": 0.2667, + "step": 9035, + "success_rate.epoch.env.abd": 0.9872773536895675, + "success_rate.epoch.env.agentgym:alfworld": 0.8695652173913043, + "success_rate.epoch.env.agentgym:sciworld": 0.9709090909090909, + "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, + "success_rate.epoch.env.babyai": 0.9574468085106383, + "success_rate.epoch.env.ded": 0.9443037974683545, + "success_rate.epoch.env.logic": 0.8970125786163522, + "success_rate.epoch.env.math": 0.9703613479496549, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.839622641509434, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8683607088098267, + "success_rate.epoch.global": 0.9, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.5, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 0.6, + "success_rate.window.env_macro_mean": 0.62, + "success_rate.window.global": 0.6, + "tokens_p.mean_above_band": 0.995045731707317, + "tokens_p.mean_in_band": 0.5623337765957447, + "tokens_rate.above_band": 0.7772511848341233, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.22274881516587677 + }, + { + "epoch": 1.925862803579037, + "grad_norm": 55.74473206296719, + "learning_rate": 3.6366233170687165e-07, + "loss": 0.2714, + "step": 9040, + "success_rate.epoch.env.abd": 0.9872773536895675, + "success_rate.epoch.env.agentgym:alfworld": 0.8700361010830325, + "success_rate.epoch.env.agentgym:sciworld": 0.9710144927536232, + "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, + "success_rate.epoch.env.babyai": 0.9574468085106383, + "success_rate.epoch.env.ded": 0.9443037974683545, + "success_rate.epoch.env.logic": 0.8971742543171115, + "success_rate.epoch.env.math": 0.9703733766233766, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.8395643214601118, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8684235878881374, + "success_rate.epoch.global": 0.9, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.96, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9971942724458205, + "tokens_p.mean_in_band": 0.3984375, + "tokens_rate.above_band": 0.9877675840978594, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.012232415902140673 + }, + { + "epoch": 1.9269279931827865, + "grad_norm": 133.49173403920906, + "learning_rate": 3.636283260603791e-07, + "loss": 0.3041, + "step": 9045, + "success_rate.epoch.env.abd": 0.9872773536895675, + "success_rate.epoch.env.agentgym:alfworld": 0.8709677419354839, + "success_rate.epoch.env.agentgym:sciworld": 0.9710144927536232, + "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, + "success_rate.epoch.env.babyai": 0.9583333333333334, + "success_rate.epoch.env.ded": 0.9444444444444444, + "success_rate.epoch.env.logic": 0.8971742543171115, + "success_rate.epoch.env.math": 0.9703853955375253, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.8398001175778954, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8686241904047892, + "success_rate.epoch.global": 0.9001150747986191, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9995659722222222, + "tokens_p.mean_in_band": 0.658203125, + "tokens_rate.above_band": 0.9969230769230769, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.003076923076923077 + }, + { + "epoch": 1.927993182786536, + "grad_norm": 612.7860374754193, + "learning_rate": 3.635943177695886e-07, + "loss": 0.4929, + "step": 9050, + "success_rate.epoch.env.abd": 0.9872773536895675, + "success_rate.epoch.env.agentgym:alfworld": 0.8714285714285714, + "success_rate.epoch.env.agentgym:sciworld": 0.9710144927536232, + "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, + "success_rate.epoch.env.babyai": 0.9583333333333334, + "success_rate.epoch.env.ded": 0.9444444444444444, + "success_rate.epoch.env.logic": 0.8974158183241974, + "success_rate.epoch.env.math": 0.9704453441295546, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.8398471936526594, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8686977738745135, + "success_rate.epoch.global": 0.9002298850574713, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9979108635097493, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 1.9290583723902854, + "grad_norm": 224.17483075811546, + "learning_rate": 3.635603068592851e-07, + "loss": 0.2035, + "step": 9055, + "success_rate.epoch.env.abd": 0.9872773536895675, + "success_rate.epoch.env.agentgym:alfworld": 0.8718861209964412, + "success_rate.epoch.env.agentgym:sciworld": 0.9710144927536232, + "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, + "success_rate.epoch.env.babyai": 0.9583333333333334, + "success_rate.epoch.env.ded": 0.9444444444444444, + "success_rate.epoch.env.logic": 0.8974960876369327, + "success_rate.epoch.env.math": 0.9704811969268096, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.8400821596244131, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8687712863881146, + "success_rate.epoch.global": 0.9003444316877153, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9965441176470589, + "tokens_p.mean_in_band": 0.6953125, + "tokens_rate.above_band": 0.9976525821596244, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.002347417840375587 + }, + { + "epoch": 1.930123561994035, + "grad_norm": 126.74713334968651, + "learning_rate": 3.635262933542556e-07, + "loss": 0.2898, + "step": 9060, + "success_rate.epoch.env.abd": 0.9873096446700508, + "success_rate.epoch.env.agentgym:alfworld": 0.8723404255319149, + "success_rate.epoch.env.agentgym:sciworld": 0.9710144927536232, + "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, + "success_rate.epoch.env.babyai": 0.9583333333333334, + "success_rate.epoch.env.ded": 0.9445843828715366, + "success_rate.epoch.env.logic": 0.89765625, + "success_rate.epoch.env.math": 0.9705169628432956, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.839882697947214, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8688279228013333, + "success_rate.epoch.global": 0.9003440366972477, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9987012987012988, + "tokens_p.mean_in_band": 0.439453125, + "tokens_rate.above_band": 0.9987029831387808, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0012970168612191958 + }, + { + "epoch": 1.9311887515977844, + "grad_norm": 62.90172133206027, + "learning_rate": 3.634922772792888e-07, + "loss": 0.1731, + "step": 9065, + "success_rate.epoch.env.abd": 0.9873417721518988, + "success_rate.epoch.env.agentgym:alfworld": 0.8723404255319149, + "success_rate.epoch.env.agentgym:sciworld": 0.9710144927536232, + "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, + "success_rate.epoch.env.babyai": 0.9583333333333334, + "success_rate.epoch.env.ded": 0.9447236180904522, + "success_rate.epoch.env.logic": 0.8978159126365055, + "success_rate.epoch.env.math": 0.9705407586763519, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.8397773872290568, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8688506055697122, + "success_rate.epoch.global": 0.9003436426116839, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9988517060367454, + "tokens_p.mean_in_band": 0.5642361111111112, + "tokens_rate.above_band": 0.9769230769230769, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.023076923076923078 + }, + { + "epoch": 1.9322539412015338, + "grad_norm": 127.52265247480662, + "learning_rate": 3.6345825865917547e-07, + "loss": 0.2649, + "step": 9070, + "success_rate.epoch.env.abd": 0.9873417721518988, + "success_rate.epoch.env.agentgym:alfworld": 0.8723404255319149, + "success_rate.epoch.env.agentgym:sciworld": 0.9675090252707581, + "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, + "success_rate.epoch.env.babyai": 0.9583333333333334, + "success_rate.epoch.env.ded": 0.9447236180904522, + "success_rate.epoch.env.logic": 0.8979750778816199, + "success_rate.epoch.env.math": 0.9705763804917372, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.8399180567749488, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.86856242276276, + "success_rate.epoch.global": 0.9003318457489415, + "success_rate.window.env.agentgym:sciworld": 0.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.75, + "success_rate.window.global": 0.8888888888888888, + "tokens_p.mean_above_band": 0.9972426470588235, + "tokens_p.mean_in_band": 0.6611328125, + "tokens_rate.above_band": 0.9855072463768116, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.014492753623188406 + }, + { + "epoch": 1.9333191308052835, + "grad_norm": 285.3429191357568, + "learning_rate": 3.6342423751870807e-07, + "loss": 0.243, + "step": 9075, + "success_rate.epoch.env.abd": 0.9873417721518988, + "success_rate.epoch.env.agentgym:alfworld": 0.872791519434629, + "success_rate.epoch.env.agentgym:sciworld": 0.9676258992805755, + "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, + "success_rate.epoch.env.babyai": 0.9583333333333334, + "success_rate.epoch.env.ded": 0.9447236180904522, + "success_rate.epoch.env.logic": 0.8980544747081712, + "success_rate.epoch.env.math": 0.9705763804917372, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.8402453271028038, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8686510259506636, + "success_rate.epoch.global": 0.9004457652303121, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9980596405228758, + "tokens_p.mean_in_band": 0.8359375, + "tokens_rate.above_band": 0.9902912621359223, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.009708737864077669 + }, + { + "epoch": 1.9343843204090327, + "grad_norm": 77.8749109391679, + "learning_rate": 3.633902138826809e-07, + "loss": 0.2204, + "step": 9080, + "success_rate.epoch.env.abd": 0.9873417721518988, + "success_rate.epoch.env.agentgym:alfworld": 0.872791519434629, + "success_rate.epoch.env.agentgym:sciworld": 0.9676258992805755, + "success_rate.epoch.env.agentgym:textcraft": 0.9827586206896551, + "success_rate.epoch.env.babyai": 0.9583333333333334, + "success_rate.epoch.env.ded": 0.9447236180904522, + "success_rate.epoch.env.logic": 0.8982919254658385, + "success_rate.epoch.env.math": 0.9702093397745571, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.8400933761307265, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8686529295331351, + "success_rate.epoch.global": 0.900331088023747, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.6666666666666666, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.8333333333333333, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9961538461538462, + "tokens_p.mean_in_band": 0.6150390625, + "tokens_rate.above_band": 0.8666666666666667, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.13333333333333333 + }, + { + "epoch": 1.9354495100127824, + "grad_norm": 398.3227759738249, + "learning_rate": 3.633561877758903e-07, + "loss": 0.3421, + "step": 9085, + "success_rate.epoch.env.abd": 0.9873417721518988, + "success_rate.epoch.env.agentgym:alfworld": 0.872791519434629, + "success_rate.epoch.env.agentgym:sciworld": 0.9676258992805755, + "success_rate.epoch.env.agentgym:textcraft": 0.9827586206896551, + "success_rate.epoch.env.babyai": 0.9583333333333334, + "success_rate.epoch.env.ded": 0.9448621553884712, + "success_rate.epoch.env.logic": 0.8985282726568552, + "success_rate.epoch.env.math": 0.9702452754322477, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.8402332361516035, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8687029913665536, + "success_rate.epoch.global": 0.9004447485460144, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9986361480075902, + "tokens_p.mean_in_band": 0.8372395833333334, + "tokens_rate.above_band": 0.9943396226415094, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.005660377358490566 + }, + { + "epoch": 1.9365146996165317, + "grad_norm": 279.4423630613026, + "learning_rate": 3.6332215922313415e-07, + "loss": 0.1831, + "step": 9090, + "success_rate.epoch.env.abd": 0.9873417721518988, + "success_rate.epoch.env.agentgym:alfworld": 0.872791519434629, + "success_rate.epoch.env.agentgym:sciworld": 0.9676258992805755, + "success_rate.epoch.env.agentgym:textcraft": 0.9827586206896551, + "success_rate.epoch.env.babyai": 0.9583333333333334, + "success_rate.epoch.env.ded": 0.9448621553884712, + "success_rate.epoch.env.logic": 0.8986068111455109, + "success_rate.epoch.env.math": 0.970281124497992, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.840221187427241, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.868712294896557, + "success_rate.epoch.global": 0.9004442419409956, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env_macro_mean": 0.9444444444444445, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9905024509803921, + "tokens_p.mean_in_band": 0.65625, + "tokens_rate.above_band": 0.9622641509433962, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03773584905660377 + }, + { + "epoch": 1.9375798892202813, + "grad_norm": 50.96224276554615, + "learning_rate": 3.6328812824921224e-07, + "loss": 0.2398, + "step": 9095, + "success_rate.epoch.env.abd": 0.9873417721518988, + "success_rate.epoch.env.agentgym:alfworld": 0.872791519434629, + "success_rate.epoch.env.agentgym:sciworld": 0.9676258992805755, + "success_rate.epoch.env.agentgym:textcraft": 0.9827586206896551, + "success_rate.epoch.env.babyai": 0.9591836734693877, + "success_rate.epoch.env.ded": 0.9448621553884712, + "success_rate.epoch.env.logic": 0.8987635239567233, + "success_rate.epoch.env.math": 0.9699278267842822, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.8403605699331201, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8687843983274148, + "success_rate.epoch.global": 0.9004437364887928, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.75, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9375, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.996822033898305, + "tokens_p.mean_in_band": 0.4479166666666667, + "tokens_rate.above_band": 0.9833333333333333, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.016666666666666666 + }, + { + "epoch": 1.9386450788240306, + "grad_norm": 109.92298152517856, + "learning_rate": 3.6325409487892607e-07, + "loss": 0.2635, + "step": 9100, + "success_rate.epoch.env.abd": 0.9873417721518988, + "success_rate.epoch.env.agentgym:alfworld": 0.8732394366197183, + "success_rate.epoch.env.agentgym:sciworld": 0.967741935483871, + "success_rate.epoch.env.agentgym:textcraft": 0.9827586206896551, + "success_rate.epoch.env.babyai": 0.9591836734693877, + "success_rate.epoch.env.ded": 0.9448621553884712, + "success_rate.epoch.env.logic": 0.8987635239567233, + "success_rate.epoch.env.math": 0.9699759807846277, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.8405460354342144, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8688569049537626, + "success_rate.epoch.global": 0.9005568814638027, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9967026378896883, + "tokens_p.mean_in_band": 0.765625, + "tokens_rate.above_band": 0.9952267303102625, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.00477326968973747 + }, + { + "epoch": 1.9397102684277803, + "grad_norm": 124.21391618067524, + "learning_rate": 3.6322005913707894e-07, + "loss": 0.1727, + "step": 9105, + "success_rate.epoch.env.abd": 0.9874055415617129, + "success_rate.epoch.env.agentgym:alfworld": 0.8732394366197183, + "success_rate.epoch.env.agentgym:sciworld": 0.967741935483871, + "success_rate.epoch.env.agentgym:textcraft": 0.9827586206896551, + "success_rate.epoch.env.babyai": 0.9591836734693877, + "success_rate.epoch.env.ded": 0.945, + "success_rate.epoch.env.logic": 0.8989197530864198, + "success_rate.epoch.env.math": 0.9699879951980792, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.8404409631563678, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8688809763434574, + "success_rate.epoch.global": 0.9005562492904984, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9973104508196722, + "tokens_p.mean_in_band": 0.57421875, + "tokens_rate.above_band": 0.991869918699187, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.008130081300813009 + }, + { + "epoch": 1.9407754580315295, + "grad_norm": 62.23115194154413, + "learning_rate": 3.6318602104847583e-07, + "loss": 0.2065, + "step": 9110, + "success_rate.epoch.env.abd": 0.9874055415617129, + "success_rate.epoch.env.agentgym:alfworld": 0.8732394366197183, + "success_rate.epoch.env.agentgym:sciworld": 0.967741935483871, + "success_rate.epoch.env.agentgym:textcraft": 0.9827586206896551, + "success_rate.epoch.env.babyai": 0.9591836734693877, + "success_rate.epoch.env.ded": 0.9451371571072319, + "success_rate.epoch.env.logic": 0.8984615384615384, + "success_rate.epoch.env.math": 0.9700119952019193, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.8402898550724638, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8688402340163925, + "success_rate.epoch.global": 0.9004422270098651, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.75, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.8541666666666666, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9976777059773829, + "tokens_p.mean_in_band": 0.6293402777777778, + "tokens_rate.above_band": 0.9717425431711146, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0282574568288854 + }, + { + "epoch": 1.9418406476352792, + "grad_norm": 80.02891353760823, + "learning_rate": 3.631519806379235e-07, + "loss": 0.2089, + "step": 9115, + "success_rate.epoch.env.abd": 0.9874055415617129, + "success_rate.epoch.env.agentgym:alfworld": 0.8732394366197183, + "success_rate.epoch.env.agentgym:sciworld": 0.967741935483871, + "success_rate.epoch.env.agentgym:textcraft": 0.9827586206896551, + "success_rate.epoch.env.babyai": 0.9591836734693877, + "success_rate.epoch.env.ded": 0.9451371571072319, + "success_rate.epoch.env.logic": 0.8987730061349694, + "success_rate.epoch.env.math": 0.9700479233226837, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.8404286128004633, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8688844297911374, + "success_rate.epoch.global": 0.9005549892400045, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9913306451612903, + "tokens_p.mean_in_band": 0.7369791666666666, + "tokens_rate.above_band": 0.9281437125748503, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0718562874251497 + }, + { + "epoch": 1.9429058372390284, + "grad_norm": 72.53454885443212, + "learning_rate": 3.631179379302303e-07, + "loss": 0.248, + "step": 9120, + "success_rate.epoch.env.abd": 0.9874055415617129, + "success_rate.epoch.env.agentgym:alfworld": 0.8736842105263158, + "success_rate.epoch.env.agentgym:sciworld": 0.967741935483871, + "success_rate.epoch.env.agentgym:textcraft": 0.9827586206896551, + "success_rate.epoch.env.babyai": 0.9591836734693877, + "success_rate.epoch.env.ded": 0.9454094292803971, + "success_rate.epoch.env.logic": 0.8988505747126436, + "success_rate.epoch.env.math": 0.9700837654567211, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.8405671296296297, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8689725183021048, + "success_rate.epoch.global": 0.9006674963231135, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9993259803921568, + "tokens_p.mean_in_band": 0.8984375, + "tokens_rate.above_band": 0.9980430528375733, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0019569471624266144 + }, + { + "epoch": 1.9439710268427781, + "grad_norm": 43.46356659659513, + "learning_rate": 3.630838929502064e-07, + "loss": 0.1965, + "step": 9125, + "success_rate.epoch.env.abd": 0.9874371859296482, + "success_rate.epoch.env.agentgym:alfworld": 0.8736842105263158, + "success_rate.epoch.env.agentgym:sciworld": 0.967741935483871, + "success_rate.epoch.env.agentgym:textcraft": 0.9827586206896551, + "success_rate.epoch.env.babyai": 0.9591836734693877, + "success_rate.epoch.env.ded": 0.9454094292803971, + "success_rate.epoch.env.logic": 0.8990053557765876, + "success_rate.epoch.env.math": 0.9701433121019108, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.8403701561596298, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8689769727209294, + "success_rate.epoch.global": 0.9006667420047463, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9967105263157895, + "tokens_p.mean_in_band": 0.4713541666666667, + "tokens_rate.above_band": 0.8636363636363636, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.13636363636363635 + }, + { + "epoch": 1.9450362164465274, + "grad_norm": 120.39441897126666, + "learning_rate": 3.6304984572266345e-07, + "loss": 0.1859, + "step": 9130, + "success_rate.epoch.env.abd": 0.9874371859296482, + "success_rate.epoch.env.agentgym:alfworld": 0.8741258741258742, + "success_rate.epoch.env.agentgym:sciworld": 0.967741935483871, + "success_rate.epoch.env.agentgym:textcraft": 0.9827586206896551, + "success_rate.epoch.env.babyai": 0.9591836734693877, + "success_rate.epoch.env.ded": 0.9454094292803971, + "success_rate.epoch.env.logic": 0.8990825688073395, + "success_rate.epoch.env.math": 0.9701670644391408, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.8406466512702079, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8690514385462129, + "success_rate.epoch.global": 0.9007788689468337, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9947630494505495, + "tokens_p.mean_in_band": 0.8138020833333334, + "tokens_rate.above_band": 0.9918256130790191, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.008174386920980926 + }, + { + "epoch": 1.946101406050277, + "grad_norm": 0.0, + "learning_rate": 3.630157962724148e-07, + "loss": 0.3496, + "step": 9135, + "success_rate.epoch.env.abd": 0.9874686716791979, + "success_rate.epoch.env.agentgym:alfworld": 0.8741258741258742, + "success_rate.epoch.env.agentgym:sciworld": 0.967741935483871, + "success_rate.epoch.env.agentgym:textcraft": 0.9827586206896551, + "success_rate.epoch.env.babyai": 0.9591836734693877, + "success_rate.epoch.env.ded": 0.9455445544554455, + "success_rate.epoch.env.logic": 0.8991596638655462, + "success_rate.epoch.env.math": 0.9702144559173947, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.8407845399480819, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8690904372861159, + "success_rate.epoch.global": 0.9008907430375465, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 1.0002088903743316, + "tokens_p.mean_in_band": 0.876953125, + "tokens_rate.above_band": 0.9973333333333333, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0026666666666666666 + }, + { + "epoch": 1.9471665956540263, + "grad_norm": 276.0311235604553, + "learning_rate": 3.629817446242756e-07, + "loss": 0.3399, + "step": 9140, + "success_rate.epoch.env.abd": 0.9874686716791979, + "success_rate.epoch.env.agentgym:alfworld": 0.8741258741258742, + "success_rate.epoch.env.agentgym:sciworld": 0.9678571428571429, + "success_rate.epoch.env.agentgym:textcraft": 0.9827586206896551, + "success_rate.epoch.env.babyai": 0.9591836734693877, + "success_rate.epoch.env.ded": 0.945679012345679, + "success_rate.epoch.env.logic": 0.8993135011441648, + "success_rate.epoch.env.math": 0.9702499008330028, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.8405880657249928, + "success_rate.epoch.env.webshop": 0.9795918367346939, + "success_rate.epoch.env_macro_mean": 0.8691511321318132, + "success_rate.epoch.global": 0.9008897398355671, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9994762569832403, + "tokens_p.mean_in_band": 0.6276041666666666, + "tokens_rate.above_band": 0.9916897506925207, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.008310249307479225 + }, + { + "epoch": 1.948231785257776, + "grad_norm": 216.7601198118111, + "learning_rate": 3.629476908030623e-07, + "loss": 0.3421, + "step": 9145, + "success_rate.epoch.env.abd": 0.9874686716791979, + "success_rate.epoch.env.agentgym:alfworld": 0.8741258741258742, + "success_rate.epoch.env.agentgym:sciworld": 0.9678571428571429, + "success_rate.epoch.env.agentgym:textcraft": 0.9827586206896551, + "success_rate.epoch.env.babyai": 0.9591836734693877, + "success_rate.epoch.env.ded": 0.9459459459459459, + "success_rate.epoch.env.logic": 0.8987052551408987, + "success_rate.epoch.env.math": 0.9702852614896988, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.8407258064516129, + "success_rate.epoch.env.webshop": 0.9795918367346939, + "success_rate.epoch.env_macro_mean": 0.8691358402209328, + "success_rate.epoch.global": 0.9008887388907638, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9979811946902655, + "tokens_p.mean_in_band": 0.65, + "tokens_rate.above_band": 0.9868995633187773, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.013100436681222707 + }, + { + "epoch": 1.9492969748615252, + "grad_norm": 118.96363195648838, + "learning_rate": 3.629136348335931e-07, + "loss": 0.2794, + "step": 9150, + "success_rate.epoch.env.abd": 0.9875311720698254, + "success_rate.epoch.env.agentgym:alfworld": 0.8745644599303136, + "success_rate.epoch.env.agentgym:sciworld": 0.9679715302491103, + "success_rate.epoch.env.agentgym:textcraft": 0.9827586206896551, + "success_rate.epoch.env.babyai": 0.9591836734693877, + "success_rate.epoch.env.ded": 0.9459459459459459, + "success_rate.epoch.env.logic": 0.898936170212766, + "success_rate.epoch.env.math": 0.9702970297029703, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.8405296488198043, + "success_rate.epoch.env.webshop": 0.9795918367346939, + "success_rate.epoch.env_macro_mean": 0.8691960219700569, + "success_rate.epoch.global": 0.9008877401955276, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.998989218328841, + "tokens_p.mean_in_band": 0.576171875, + "tokens_rate.above_band": 0.9840848806366048, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.015915119363395226 + }, + { + "epoch": 1.9503621644652749, + "grad_norm": 83.31949908768537, + "learning_rate": 3.628795767406878e-07, + "loss": 0.209, + "step": 9155, + "success_rate.epoch.env.abd": 0.9875311720698254, + "success_rate.epoch.env.agentgym:alfworld": 0.8745644599303136, + "success_rate.epoch.env.agentgym:sciworld": 0.9680851063829787, + "success_rate.epoch.env.agentgym:textcraft": 0.9827586206896551, + "success_rate.epoch.env.babyai": 0.9591836734693877, + "success_rate.epoch.env.ded": 0.9459459459459459, + "success_rate.epoch.env.logic": 0.898936170212766, + "success_rate.epoch.env.math": 0.970344009489917, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.8407588387467663, + "success_rate.epoch.env.webshop": 0.9795918367346939, + "success_rate.epoch.env_macro_mean": 0.869231453410764, + "success_rate.epoch.global": 0.9009989897856101, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9964015151515152, + "tokens_p.mean_in_band": 0.8268229166666666, + "tokens_rate.above_band": 0.9821428571428571, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.017857142857142856 + }, + { + "epoch": 1.9514273540690241, + "grad_norm": 199.44421113391073, + "learning_rate": 3.6284551654916765e-07, + "loss": 0.1286, + "step": 9160, + "success_rate.epoch.env.abd": 0.9875311720698254, + "success_rate.epoch.env.agentgym:alfworld": 0.875, + "success_rate.epoch.env.agentgym:sciworld": 0.9680851063829787, + "success_rate.epoch.env.agentgym:textcraft": 0.9830508474576272, + "success_rate.epoch.env.babyai": 0.9591836734693877, + "success_rate.epoch.env.ded": 0.946078431372549, + "success_rate.epoch.env.logic": 0.8990129081245254, + "success_rate.epoch.env.math": 0.9703791469194313, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.8408960367604825, + "success_rate.epoch.env.webshop": 0.9795918367346939, + "success_rate.epoch.env_macro_mean": 0.8693323011943324, + "success_rate.epoch.global": 0.9011099899091827, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9981997784342689, + "tokens_p.mean_in_band": 0.8313802083333334, + "tokens_rate.above_band": 0.9955882352941177, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.004411764705882353 + }, + { + "epoch": 1.9524925436727738, + "grad_norm": 76.79437987446578, + "learning_rate": 3.628114542838555e-07, + "loss": 0.3887, + "step": 9165, + "success_rate.epoch.env.abd": 0.9875621890547264, + "success_rate.epoch.env.agentgym:alfworld": 0.8754325259515571, + "success_rate.epoch.env.agentgym:sciworld": 0.9681978798586572, + "success_rate.epoch.env.agentgym:textcraft": 0.9833333333333333, + "success_rate.epoch.env.babyai": 0.9591836734693877, + "success_rate.epoch.env.ded": 0.9463414634146341, + "success_rate.epoch.env.logic": 0.8990129081245254, + "success_rate.epoch.env.math": 0.9703908409001184, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.8409873708381171, + "success_rate.epoch.env.webshop": 0.9795918367346939, + "success_rate.epoch.env_macro_mean": 0.8694436523205368, + "success_rate.epoch.global": 0.9012096774193549, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9972426470588235, + "tokens_p.mean_in_band": 0.8834635416666666, + "tokens_rate.above_band": 0.9941520467836257, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.005847953216374269 + }, + { + "epoch": 1.9535577332765233, + "grad_norm": 332.6171389137624, + "learning_rate": 3.627773899695756e-07, + "loss": 0.2785, + "step": 9170, + "success_rate.epoch.env.abd": 0.9875621890547264, + "success_rate.epoch.env.agentgym:alfworld": 0.8754325259515571, + "success_rate.epoch.env.agentgym:sciworld": 0.968421052631579, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.9591836734693877, + "success_rate.epoch.env.ded": 0.9463414634146341, + "success_rate.epoch.env.logic": 0.8990895295902883, + "success_rate.epoch.env.math": 0.9704258675078864, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.8408373960424433, + "success_rate.epoch.env.webshop": 0.9795918367346939, + "success_rate.epoch.env_macro_mean": 0.8694852950563998, + "success_rate.epoch.global": 0.9012083240098456, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9991472712680578, + "tokens_p.mean_in_band": 0.5528846153846154, + "tokens_rate.above_band": 0.9795597484276729, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.020440251572327043 + }, + { + "epoch": 1.9546229228802727, + "grad_norm": 121.59459115619276, + "learning_rate": 3.6274332363115396e-07, + "loss": 0.2806, + "step": 9175, + "success_rate.epoch.env.abd": 0.9875621890547264, + "success_rate.epoch.env.agentgym:alfworld": 0.8754325259515571, + "success_rate.epoch.env.agentgym:sciworld": 0.9685314685314685, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.9591836734693877, + "success_rate.epoch.env.ded": 0.9464720194647201, + "success_rate.epoch.env.logic": 0.8991660348749052, + "success_rate.epoch.env.math": 0.9704608113430484, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.8409742120343839, + "success_rate.epoch.env.webshop": 0.98, + "success_rate.epoch.env_macro_mean": 0.8695668769043091, + "success_rate.epoch.global": 0.901318730442557, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 1.0002700950734658, + "tokens_p.mean_in_band": 0.75390625, + "tokens_rate.above_band": 0.9991364421416234, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0008635578583765112 + }, + { + "epoch": 1.9556881124840222, + "grad_norm": 37.136455214458834, + "learning_rate": 3.627092552934177e-07, + "loss": 0.1691, + "step": 9180, + "success_rate.epoch.env.abd": 0.9875621890547264, + "success_rate.epoch.env.agentgym:alfworld": 0.8724137931034482, + "success_rate.epoch.env.agentgym:sciworld": 0.96875, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.9591836734693877, + "success_rate.epoch.env.ded": 0.9464720194647201, + "success_rate.epoch.env.logic": 0.8992424242424243, + "success_rate.epoch.env.math": 0.9705188679245284, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.8410197651102835, + "success_rate.epoch.env.webshop": 0.98, + "success_rate.epoch.env_macro_mean": 0.8693286766902474, + "success_rate.epoch.global": 0.9013172583165885, + "success_rate.window.env.agentgym:alfworld": 0.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9973030018761726, + "tokens_p.mean_in_band": 0.7259114583333334, + "tokens_rate.above_band": 0.9944029850746269, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.005597014925373134 + }, + { + "epoch": 1.9567533020877717, + "grad_norm": 60.58211295407395, + "learning_rate": 3.6267518498119573e-07, + "loss": 0.3777, + "step": 9185, + "success_rate.epoch.env.abd": 0.9875621890547264, + "success_rate.epoch.env.agentgym:alfworld": 0.8724137931034482, + "success_rate.epoch.env.agentgym:sciworld": 0.9688581314878892, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.9591836734693877, + "success_rate.epoch.env.ded": 0.9464720194647201, + "success_rate.epoch.env.logic": 0.8987915407854985, + "success_rate.epoch.env.math": 0.9705535924617197, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.8411107930146007, + "success_rate.epoch.env.webshop": 0.98, + "success_rate.epoch.env_macro_mean": 0.8693089494604721, + "success_rate.epoch.global": 0.9013157894736842, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 0.75, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9375, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9978829160530192, + "tokens_p.mean_in_band": 0.390625, + "tokens_rate.above_band": 0.9713876967095851, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02861230329041488 + }, + { + "epoch": 1.9578184916915211, + "grad_norm": 146.93966578858831, + "learning_rate": 3.626411127193181e-07, + "loss": 0.2278, + "step": 9190, + "success_rate.epoch.env.abd": 0.9875621890547264, + "success_rate.epoch.env.agentgym:alfworld": 0.872852233676976, + "success_rate.epoch.env.agentgym:sciworld": 0.9688581314878892, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.9591836734693877, + "success_rate.epoch.env.ded": 0.9464720194647201, + "success_rate.epoch.env.logic": 0.8989441930618401, + "success_rate.epoch.env.math": 0.9705882352941176, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.8412925364598227, + "success_rate.epoch.env.webshop": 0.98, + "success_rate.epoch.env_macro_mean": 0.8693823566538803, + "success_rate.epoch.global": 0.9014257072844731, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9993528106508875, + "tokens_p.mean_in_band": 0.81640625, + "tokens_rate.above_band": 0.9970501474926253, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0029498525073746312 + }, + { + "epoch": 1.9588836812952706, + "grad_norm": 85.65704990242106, + "learning_rate": 3.626070385326165e-07, + "loss": 0.2422, + "step": 9195, + "success_rate.epoch.env.abd": 0.9875621890547264, + "success_rate.epoch.env.agentgym:alfworld": 0.872852233676976, + "success_rate.epoch.env.agentgym:sciworld": 0.9688581314878892, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.9591836734693877, + "success_rate.epoch.env.ded": 0.9464720194647201, + "success_rate.epoch.env.logic": 0.8990963855421686, + "success_rate.epoch.env.math": 0.9706227967097533, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.8412335808109651, + "success_rate.epoch.env.webshop": 0.98, + "success_rate.epoch.env_macro_mean": 0.8693939746763445, + "success_rate.epoch.global": 0.9014241210502892, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9942528735632183, + "tokens_p.mean_in_band": 0.666015625, + "tokens_rate.above_band": 0.9157894736842105, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.08421052631578947 + }, + { + "epoch": 1.95994887089902, + "grad_norm": 93.35941200463283, + "learning_rate": 3.62572962445924e-07, + "loss": 0.3115, + "step": 9200, + "success_rate.epoch.env.abd": 0.9875621890547264, + "success_rate.epoch.env.agentgym:alfworld": 0.872852233676976, + "success_rate.epoch.env.agentgym:sciworld": 0.9688581314878892, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.9591836734693877, + "success_rate.epoch.env.ded": 0.9464720194647201, + "success_rate.epoch.env.logic": 0.8991723100075244, + "success_rate.epoch.env.math": 0.9706572769953051, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.8409350057012542, + "success_rate.epoch.env.webshop": 0.98, + "success_rate.epoch.env_macro_mean": 0.8693768682800896, + "success_rate.epoch.global": 0.9013114025338964, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.8888888888888888, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9883177570093458, + "tokens_p.mean_in_band": 0.5505756578947368, + "tokens_rate.above_band": 0.8492063492063492, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.15079365079365079 + }, + { + "epoch": 1.9610140605027695, + "grad_norm": 744.4963052442677, + "learning_rate": 3.625388844840749e-07, + "loss": 0.2262, + "step": 9205, + "success_rate.epoch.env.abd": 0.9875621890547264, + "success_rate.epoch.env.agentgym:alfworld": 0.872852233676976, + "success_rate.epoch.env.agentgym:sciworld": 0.9689655172413794, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.9591836734693877, + "success_rate.epoch.env.ded": 0.9464720194647201, + "success_rate.epoch.env.logic": 0.8992481203007519, + "success_rate.epoch.env.math": 0.9706802189210321, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.8409220261809903, + "success_rate.epoch.env.webshop": 0.98, + "success_rate.epoch.env_macro_mean": 0.8693944281393787, + "success_rate.epoch.global": 0.901309946714032, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env_macro_mean": 0.9583333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9921370967741936, + "tokens_p.mean_in_band": 0.5830078125, + "tokens_rate.above_band": 0.9748427672955975, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.025157232704402517 + }, + { + "epoch": 1.962079250106519, + "grad_norm": 119.62004429252833, + "learning_rate": 3.6250480467190505e-07, + "loss": 0.257, + "step": 9210, + "success_rate.epoch.env.abd": 0.9875621890547264, + "success_rate.epoch.env.agentgym:alfworld": 0.872852233676976, + "success_rate.epoch.env.agentgym:sciworld": 0.9689655172413794, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.9591836734693877, + "success_rate.epoch.env.ded": 0.9466019417475728, + "success_rate.epoch.env.logic": 0.8993993993993994, + "success_rate.epoch.env.math": 0.970714564623194, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.8408186469584992, + "success_rate.epoch.env.webshop": 0.98, + "success_rate.epoch.env_macro_mean": 0.869413716126758, + "success_rate.epoch.global": 0.9013084941228654, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9375, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9982876712328768, + "tokens_p.mean_in_band": 0.6011284722222222, + "tokens_rate.above_band": 0.9700996677740864, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.029900332225913623 + }, + { + "epoch": 1.9631444397102684, + "grad_norm": 245.02400293712606, + "learning_rate": 3.624707230342516e-07, + "loss": 0.248, + "step": 9215, + "success_rate.epoch.env.abd": 0.9875621890547264, + "success_rate.epoch.env.agentgym:alfworld": 0.872852233676976, + "success_rate.epoch.env.agentgym:sciworld": 0.9689655172413794, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.9591836734693877, + "success_rate.epoch.env.ded": 0.9467312348668281, + "success_rate.epoch.env.logic": 0.8994748687171793, + "success_rate.epoch.env.math": 0.9707374170893485, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.8408059023836549, + "success_rate.epoch.env.webshop": 0.98, + "success_rate.epoch.env_macro_mean": 0.8694332497929712, + "success_rate.epoch.global": 0.9013070447496677, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env_macro_mean": 0.9583333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.998584142394822, + "tokens_p.mean_in_band": 0.7509765625, + "tokens_rate.above_band": 0.987220447284345, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.012779552715654952 + }, + { + "epoch": 1.9642096293140179, + "grad_norm": 211.04879472712864, + "learning_rate": 3.6243663959595295e-07, + "loss": 0.214, + "step": 9220, + "success_rate.epoch.env.abd": 0.9875621890547264, + "success_rate.epoch.env.agentgym:alfworld": 0.872852233676976, + "success_rate.epoch.env.agentgym:sciworld": 0.9689655172413794, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.9591836734693877, + "success_rate.epoch.env.ded": 0.9467312348668281, + "success_rate.epoch.env.logic": 0.8995502248875562, + "success_rate.epoch.env.math": 0.9708057609964967, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.8406577828182591, + "success_rate.epoch.env.webshop": 0.98, + "success_rate.epoch.env_macro_mean": 0.8694328480213467, + "success_rate.epoch.global": 0.9013055985837575, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.8888888888888888, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9948422330097088, + "tokens_p.mean_in_band": 0.4211647727272727, + "tokens_rate.above_band": 0.9035087719298246, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.09649122807017543 + }, + { + "epoch": 1.9652748189177673, + "grad_norm": 118.7322997757117, + "learning_rate": 3.6240255438184877e-07, + "loss": 0.3001, + "step": 9225, + "success_rate.epoch.env.abd": 0.9875930521091811, + "success_rate.epoch.env.agentgym:alfworld": 0.872852233676976, + "success_rate.epoch.env.agentgym:sciworld": 0.9689655172413794, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.9591836734693877, + "success_rate.epoch.env.ded": 0.9468599033816425, + "success_rate.epoch.env.logic": 0.8997005988023952, + "success_rate.epoch.env.math": 0.9708171206225681, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.8408833522083805, + "success_rate.epoch.env.webshop": 0.98, + "success_rate.epoch.env_macro_mean": 0.8694825602486468, + "success_rate.epoch.global": 0.9014146772767463, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9969929963459196, + "tokens_p.mean_in_band": 0.8671875, + "tokens_rate.above_band": 0.992744860943168, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.007255139056831923 + }, + { + "epoch": 1.9663400085215168, + "grad_norm": 103.92445388398006, + "learning_rate": 3.623684674167803e-07, + "loss": 0.1225, + "step": 9230, + "success_rate.epoch.env.abd": 0.9876237623762376, + "success_rate.epoch.env.agentgym:alfworld": 0.872852233676976, + "success_rate.epoch.env.agentgym:sciworld": 0.9690721649484536, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.9591836734693877, + "success_rate.epoch.env.ded": 0.9468599033816425, + "success_rate.epoch.env.logic": 0.8997756170531039, + "success_rate.epoch.env.math": 0.9708511465215701, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.8407805429864253, + "success_rate.epoch.env.webshop": 0.98, + "success_rate.epoch.env_macro_mean": 0.8694956141488182, + "success_rate.epoch.global": 0.9014131154780305, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9938979289940828, + "tokens_p.mean_below_band": 1.6555645743210334e-12, + "tokens_p.mean_in_band": 0.8265625, + "tokens_rate.above_band": 0.9657142857142857, + "tokens_rate.below_band": 0.005714285714285714, + "tokens_rate.in_band": 0.02857142857142857 + }, + { + "epoch": 1.9674051981252663, + "grad_norm": 146.4281118127438, + "learning_rate": 3.6233437872558985e-07, + "loss": 0.2767, + "step": 9235, + "success_rate.epoch.env.abd": 0.9876237623762376, + "success_rate.epoch.env.agentgym:alfworld": 0.872852233676976, + "success_rate.epoch.env.agentgym:sciworld": 0.9690721649484536, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.9591836734693877, + "success_rate.epoch.env.ded": 0.9468599033816425, + "success_rate.epoch.env.logic": 0.9, + "success_rate.epoch.env.math": 0.9708850931677019, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.8406779661016949, + "success_rate.epoch.env.webshop": 0.98, + "success_rate.epoch.env_macro_mean": 0.8695097734859363, + "success_rate.epoch.global": 0.9014115571239524, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9978530534351145, + "tokens_p.mean_in_band": 0.5546875, + "tokens_rate.above_band": 0.916083916083916, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.08391608391608392 + }, + { + "epoch": 1.9684703877290157, + "grad_norm": 118.09468061866858, + "learning_rate": 3.623002883331209e-07, + "loss": 0.1381, + "step": 9240, + "success_rate.epoch.env.abd": 0.9876237623762376, + "success_rate.epoch.env.agentgym:alfworld": 0.872852233676976, + "success_rate.epoch.env.agentgym:sciworld": 0.9692832764505119, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.9591836734693877, + "success_rate.epoch.env.ded": 0.9468599033816425, + "success_rate.epoch.env.logic": 0.9000745712155108, + "success_rate.epoch.env.math": 0.9709414955443626, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.8407679277244494, + "success_rate.epoch.env.webshop": 0.98, + "success_rate.epoch.env_macro_mean": 0.8695490504602074, + "success_rate.epoch.global": 0.9015201586252478, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 1.000330250990753, + "tokens_p.mean_in_band": 0.8229166666666666, + "tokens_rate.above_band": 0.9921363040629095, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.007863695937090432 + }, + { + "epoch": 1.9695355773327652, + "grad_norm": 173.38182405899335, + "learning_rate": 3.6226619626421837e-07, + "loss": 0.2257, + "step": 9245, + "success_rate.epoch.env.abd": 0.9876237623762376, + "success_rate.epoch.env.agentgym:alfworld": 0.8737201365187713, + "success_rate.epoch.env.agentgym:sciworld": 0.9693877551020408, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.9591836734693877, + "success_rate.epoch.env.ded": 0.9468599033816425, + "success_rate.epoch.env.logic": 0.8994787788533135, + "success_rate.epoch.env.math": 0.9709752321981424, + "success_rate.epoch.env.sat": 0.1509433962264151, + "success_rate.epoch.env.science": 0.8408128704487722, + "success_rate.epoch.env.webshop": 0.98, + "success_rate.epoch.env_macro_mean": 0.8693265514501611, + "success_rate.epoch.global": 0.9014084507042254, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.75, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9996476582827407, + "tokens_p.mean_in_band": 0.488031914893617, + "tokens_rate.above_band": 0.9608333333333333, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03916666666666667 + }, + { + "epoch": 1.9706007669365146, + "grad_norm": 133.41869435186206, + "learning_rate": 3.622321025437282e-07, + "loss": 0.358, + "step": 9250, + "success_rate.epoch.env.abd": 0.9876543209876543, + "success_rate.epoch.env.agentgym:alfworld": 0.8741496598639455, + "success_rate.epoch.env.agentgym:sciworld": 0.9693877551020408, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.9591836734693877, + "success_rate.epoch.env.ded": 0.946987951807229, + "success_rate.epoch.env.logic": 0.8994787788533135, + "success_rate.epoch.env.math": 0.9710200927357032, + "success_rate.epoch.env.sat": 0.1509433962264151, + "success_rate.epoch.env.science": 0.8409475465313029, + "success_rate.epoch.env.webshop": 0.98, + "success_rate.epoch.env_macro_mean": 0.8693963393594584, + "success_rate.epoch.global": 0.9015168168828314, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9968549250535332, + "tokens_p.mean_in_band": 0.7578125, + "tokens_rate.above_band": 0.9915074309978769, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.008492569002123142 + }, + { + "epoch": 1.971665956540264, + "grad_norm": 162.71428128857067, + "learning_rate": 3.6219800719649785e-07, + "loss": 0.3732, + "step": 9255, + "success_rate.epoch.env.abd": 0.9876847290640394, + "success_rate.epoch.env.agentgym:alfworld": 0.8711864406779661, + "success_rate.epoch.env.agentgym:sciworld": 0.9693877551020408, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.9591836734693877, + "success_rate.epoch.env.ded": 0.946987951807229, + "success_rate.epoch.env.logic": 0.8995535714285714, + "success_rate.epoch.env.math": 0.970679012345679, + "success_rate.epoch.env.sat": 0.1509433962264151, + "success_rate.epoch.env.science": 0.8410819949281487, + "success_rate.epoch.env.webshop": 0.98, + "success_rate.epoch.env_macro_mean": 0.8691177347660478, + "success_rate.epoch.global": 0.9014053579270971, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 0.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.75, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.75, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.99748322147651, + "tokens_p.mean_in_band": 0.63330078125, + "tokens_rate.above_band": 0.9738562091503268, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.026143790849673203 + }, + { + "epoch": 1.9727311461440138, + "grad_norm": 141.1740562317876, + "learning_rate": 3.6216391024737555e-07, + "loss": 0.2552, + "step": 9260, + "success_rate.epoch.env.abd": 0.9876847290640394, + "success_rate.epoch.env.agentgym:alfworld": 0.8711864406779661, + "success_rate.epoch.env.agentgym:sciworld": 0.9694915254237289, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.9591836734693877, + "success_rate.epoch.env.ded": 0.946987951807229, + "success_rate.epoch.env.logic": 0.899702823179792, + "success_rate.epoch.env.math": 0.9706903200925568, + "success_rate.epoch.env.sat": 0.1509433962264151, + "success_rate.epoch.env.science": 0.8410689170182841, + "success_rate.epoch.env.webshop": 0.98, + "success_rate.epoch.env_macro_mean": 0.869140575848768, + "success_rate.epoch.global": 0.9014038166264532, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env_macro_mean": 0.9583333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9990808823529411, + "tokens_p.mean_in_band": 0.583984375, + "tokens_rate.above_band": 0.9883720930232558, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.011627906976744186 + }, + { + "epoch": 1.973796335747763, + "grad_norm": 230.68805083661985, + "learning_rate": 3.621298117212111e-07, + "loss": 0.2407, + "step": 9265, + "success_rate.epoch.env.abd": 0.9876847290640394, + "success_rate.epoch.env.agentgym:alfworld": 0.8711864406779661, + "success_rate.epoch.env.agentgym:sciworld": 0.9695945945945946, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.9591836734693877, + "success_rate.epoch.env.ded": 0.946987951807229, + "success_rate.epoch.env.logic": 0.8991097922848664, + "success_rate.epoch.env.math": 0.9707354639969196, + "success_rate.epoch.env.sat": 0.1509433962264151, + "success_rate.epoch.env.science": 0.8406408094435076, + "success_rate.epoch.env.webshop": 0.98, + "success_rate.epoch.env_macro_mean": 0.869061218994725, + "success_rate.epoch.global": 0.9011831726555652, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.3333333333333333, + "success_rate.window.env_macro_mean": 0.7083333333333334, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9962337133550488, + "tokens_p.mean_below_band": 7.729977369308472e-08, + "tokens_p.mean_in_band": 0.6416015625, + "tokens_rate.above_band": 0.9489953632148377, + "tokens_rate.below_band": 0.0015455950540958269, + "tokens_rate.in_band": 0.04945904173106646 + }, + { + "epoch": 1.9748615253515127, + "grad_norm": 80.44849496360418, + "learning_rate": 3.620957116428551e-07, + "loss": 0.1594, + "step": 9270, + "success_rate.epoch.env.abd": 0.9877149877149877, + "success_rate.epoch.env.agentgym:alfworld": 0.8711864406779661, + "success_rate.epoch.env.agentgym:sciworld": 0.9695945945945946, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.9591836734693877, + "success_rate.epoch.env.ded": 0.9471153846153846, + "success_rate.epoch.env.logic": 0.899184581171238, + "success_rate.epoch.env.math": 0.9707692307692307, + "success_rate.epoch.env.sat": 0.1509433962264151, + "success_rate.epoch.env.science": 0.8408197641774284, + "success_rate.epoch.env.webshop": 0.98, + "success_rate.epoch.env_macro_mean": 0.8691016918903347, + "success_rate.epoch.global": 0.9012913110089735, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9985518292682927, + "tokens_p.mean_in_band": 0.87890625, + "tokens_rate.above_band": 0.9975669099756691, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0024330900243309003 + }, + { + "epoch": 1.975926714955262, + "grad_norm": 97.39905145173371, + "learning_rate": 3.6206161003715956e-07, + "loss": 0.1979, + "step": 9275, + "success_rate.epoch.env.abd": 0.9877450980392157, + "success_rate.epoch.env.agentgym:alfworld": 0.8711864406779661, + "success_rate.epoch.env.agentgym:sciworld": 0.9695945945945946, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.9591836734693877, + "success_rate.epoch.env.ded": 0.947242206235012, + "success_rate.epoch.env.logic": 0.8994082840236687, + "success_rate.epoch.env.math": 0.9708029197080292, + "success_rate.epoch.env.sat": 0.1509433962264151, + "success_rate.epoch.env.science": 0.8406285072951739, + "success_rate.epoch.env.webshop": 0.98, + "success_rate.epoch.env_macro_mean": 0.8691219706951375, + "success_rate.epoch.global": 0.9012898994315698, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.9, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9967364091559371, + "tokens_p.mean_in_band": 0.6460336538461539, + "tokens_rate.above_band": 0.9817415730337079, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.018258426966292134 + }, + { + "epoch": 1.9769919045590116, + "grad_norm": 77.5166022700115, + "learning_rate": 3.620275069289775e-07, + "loss": 0.322, + "step": 9280, + "success_rate.epoch.env.abd": 0.9877450980392157, + "success_rate.epoch.env.agentgym:alfworld": 0.8716216216216216, + "success_rate.epoch.env.agentgym:sciworld": 0.9695945945945946, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.9591836734693877, + "success_rate.epoch.env.ded": 0.947242206235012, + "success_rate.epoch.env.logic": 0.8995568685376661, + "success_rate.epoch.env.math": 0.9708141321044547, + "success_rate.epoch.env.sat": 0.1509433962264151, + "success_rate.epoch.env.science": 0.8405715886803026, + "success_rate.epoch.env.webshop": 0.9803921568627451, + "success_rate.epoch.env_macro_mean": 0.869206535795315, + "success_rate.epoch.global": 0.9012884909368858, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 0.96, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9964583333333333, + "tokens_p.mean_in_band": 0.5484375, + "tokens_rate.above_band": 0.9836065573770492, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.01639344262295082 + }, + { + "epoch": 1.9780570941627609, + "grad_norm": 125.99951134193954, + "learning_rate": 3.619934023431629e-07, + "loss": 0.5176, + "step": 9285, + "success_rate.epoch.env.abd": 0.9877450980392157, + "success_rate.epoch.env.agentgym:alfworld": 0.8716216216216216, + "success_rate.epoch.env.agentgym:sciworld": 0.9695945945945946, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.9591836734693877, + "success_rate.epoch.env.ded": 0.9449760765550239, + "success_rate.epoch.env.logic": 0.8995568685376661, + "success_rate.epoch.env.math": 0.9708365310821182, + "success_rate.epoch.env.sat": 0.1509433962264151, + "success_rate.epoch.env.science": 0.8403243847874721, + "success_rate.epoch.env.webshop": 0.9803921568627451, + "success_rate.epoch.env_macro_mean": 0.8689800871957555, + "success_rate.epoch.global": 0.9010689354275742, + "success_rate.window.env.ded": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.7142857142857143, + "success_rate.window.env_macro_mean": 0.5714285714285715, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9954551575545381, + "tokens_p.mean_below_band": 8.307397365570068e-07, + "tokens_p.mean_in_band": 0.47935267857142855, + "tokens_rate.above_band": 0.9530287474332649, + "tokens_rate.below_band": 0.0002566735112936345, + "tokens_rate.in_band": 0.04671457905544148 + }, + { + "epoch": 1.9791222837665106, + "grad_norm": 92.51467006729722, + "learning_rate": 3.6195929630457095e-07, + "loss": 0.2159, + "step": 9290, + "success_rate.epoch.env.abd": 0.9877450980392157, + "success_rate.epoch.env.agentgym:alfworld": 0.8716216216216216, + "success_rate.epoch.env.agentgym:sciworld": 0.9695945945945946, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.9591836734693877, + "success_rate.epoch.env.ded": 0.9449760765550239, + "success_rate.epoch.env.logic": 0.8998527245949927, + "success_rate.epoch.env.math": 0.9708588957055214, + "success_rate.epoch.env.sat": 0.1509433962264151, + "success_rate.epoch.env.science": 0.8399441340782123, + "success_rate.epoch.env.webshop": 0.9803921568627451, + "success_rate.epoch.env_macro_mean": 0.8689744481022527, + "success_rate.epoch.global": 0.9009588145565482, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.8333333333333334, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9923611111111111, + "tokens_p.mean_in_band": 0.6057692307692307, + "tokens_rate.above_band": 0.8737864077669902, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.1262135922330097 + }, + { + "epoch": 1.9801874733702598, + "grad_norm": 108.66737695307832, + "learning_rate": 3.619251888380579e-07, + "loss": 0.2765, + "step": 9295, + "success_rate.epoch.env.abd": 0.9877450980392157, + "success_rate.epoch.env.agentgym:alfworld": 0.87248322147651, + "success_rate.epoch.env.agentgym:sciworld": 0.9695945945945946, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.9591836734693877, + "success_rate.epoch.env.ded": 0.9451073985680191, + "success_rate.epoch.env.logic": 0.8998527245949927, + "success_rate.epoch.env.math": 0.9709146574818217, + "success_rate.epoch.env.sat": 0.1509433962264151, + "success_rate.epoch.env.science": 0.8400335008375209, + "success_rate.epoch.env.webshop": 0.9803921568627451, + "success_rate.epoch.env_macro_mean": 0.8690779072298429, + "success_rate.epoch.global": 0.901066608619939, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9985850192678227, + "tokens_p.mean_in_band": 0.6751302083333334, + "tokens_rate.above_band": 0.9971181556195965, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.002881844380403458 + }, + { + "epoch": 1.9812526629740095, + "grad_norm": 146.58759284466217, + "learning_rate": 3.618910799684812e-07, + "loss": 0.2646, + "step": 9300, + "success_rate.epoch.env.abd": 0.9877450980392157, + "success_rate.epoch.env.agentgym:alfworld": 0.87248322147651, + "success_rate.epoch.env.agentgym:sciworld": 0.9695945945945946, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.9591836734693877, + "success_rate.epoch.env.ded": 0.9451073985680191, + "success_rate.epoch.env.logic": 0.9, + "success_rate.epoch.env.math": 0.9709369024856597, + "success_rate.epoch.env.sat": 0.1509433962264151, + "success_rate.epoch.env.science": 0.8400222965440357, + "success_rate.epoch.env.webshop": 0.9803921568627451, + "success_rate.epoch.env_macro_mean": 0.8690922996039666, + "success_rate.epoch.global": 0.9010654490106544, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env_macro_mean": 0.9444444444444445, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9901315789473685, + "tokens_p.mean_in_band": 0.7326388888888888, + "tokens_rate.above_band": 0.9134615384615384, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.08653846153846154 + }, + { + "epoch": 1.9823178525777587, + "grad_norm": 31.659030602726542, + "learning_rate": 3.6185696972069894e-07, + "loss": 0.1954, + "step": 9305, + "success_rate.epoch.env.abd": 0.9877450980392157, + "success_rate.epoch.env.agentgym:alfworld": 0.87248322147651, + "success_rate.epoch.env.agentgym:sciworld": 0.9696969696969697, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.9591836734693877, + "success_rate.epoch.env.ded": 0.9451073985680191, + "success_rate.epoch.env.logic": 0.9002201027146002, + "success_rate.epoch.env.math": 0.9709923664122138, + "success_rate.epoch.env.sat": 0.1509433962264151, + "success_rate.epoch.env.science": 0.840066870994706, + "success_rate.epoch.env.webshop": 0.9803921568627451, + "success_rate.epoch.env_macro_mean": 0.8691307101670755, + "success_rate.epoch.global": 0.9011728931364031, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9969284188034188, + "tokens_p.mean_in_band": 0.712890625, + "tokens_rate.above_band": 0.9669421487603306, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03305785123966942 + }, + { + "epoch": 1.9833830421815084, + "grad_norm": 131.04405700264704, + "learning_rate": 3.618228581195705e-07, + "loss": 0.4417, + "step": 9310, + "success_rate.epoch.env.abd": 0.9877450980392157, + "success_rate.epoch.env.agentgym:alfworld": 0.87248322147651, + "success_rate.epoch.env.agentgym:sciworld": 0.9697986577181208, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9451073985680191, + "success_rate.epoch.env.logic": 0.9002201027146002, + "success_rate.epoch.env.math": 0.9710365853658537, + "success_rate.epoch.env.sat": 0.1509433962264151, + "success_rate.epoch.env.science": 0.8399666017255775, + "success_rate.epoch.env.webshop": 0.9803921568627451, + "success_rate.epoch.env_macro_mean": 0.8692090705521914, + "success_rate.epoch.global": 0.9011716207420265, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9375, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9945652173913043, + "tokens_p.mean_in_band": 0.4147135416666667, + "tokens_rate.above_band": 0.968421052631579, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.031578947368421054 + }, + { + "epoch": 1.9844482317852576, + "grad_norm": 137.595617783164, + "learning_rate": 3.617887451899561e-07, + "loss": 0.2663, + "step": 9315, + "success_rate.epoch.env.abd": 0.9877450980392157, + "success_rate.epoch.env.agentgym:alfworld": 0.87248322147651, + "success_rate.epoch.env.agentgym:sciworld": 0.9697986577181208, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9452380952380952, + "success_rate.epoch.env.logic": 0.9002201027146002, + "success_rate.epoch.env.math": 0.9711026615969581, + "success_rate.epoch.env.sat": 0.1509433962264151, + "success_rate.epoch.env.science": 0.8398220244716351, + "success_rate.epoch.env.webshop": 0.9803921568627451, + "success_rate.epoch.env_macro_mean": 0.8692138156110314, + "success_rate.epoch.global": 0.9011703511053316, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.8888888888888888, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9968398876404494, + "tokens_p.mean_in_band": 0.45, + "tokens_rate.above_band": 0.9726775956284153, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0273224043715847 + }, + { + "epoch": 1.9855134213890073, + "grad_norm": 88.22679644944432, + "learning_rate": 3.617546309567172e-07, + "loss": 0.351, + "step": 9320, + "success_rate.epoch.env.abd": 0.9877750611246944, + "success_rate.epoch.env.agentgym:alfworld": 0.87248322147651, + "success_rate.epoch.env.agentgym:sciworld": 0.9697986577181208, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9452380952380952, + "success_rate.epoch.env.logic": 0.9004392386530015, + "success_rate.epoch.env.math": 0.9711355867831372, + "success_rate.epoch.env.sat": 0.1509433962264151, + "success_rate.epoch.env.science": 0.8396776882467352, + "success_rate.epoch.env.webshop": 0.9803921568627451, + "success_rate.epoch.env_macro_mean": 0.8692263327005914, + "success_rate.epoch.global": 0.9011690842173631, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.991504854368932, + "tokens_p.mean_in_band": 0.5872395833333334, + "tokens_rate.above_band": 0.9196428571428571, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.08035714285714286 + }, + { + "epoch": 1.9865786109927566, + "grad_norm": 457.20328575149074, + "learning_rate": 3.6172051544471575e-07, + "loss": 0.241, + "step": 9325, + "success_rate.epoch.env.abd": 0.9877750611246944, + "success_rate.epoch.env.agentgym:alfworld": 0.87248322147651, + "success_rate.epoch.env.agentgym:sciworld": 0.97, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9452380952380952, + "success_rate.epoch.env.logic": 0.9005120702267739, + "success_rate.epoch.env.math": 0.9711793704967766, + "success_rate.epoch.env.sat": 0.1509433962264151, + "success_rate.epoch.env.science": 0.8398112159911161, + "success_rate.epoch.env.webshop": 0.9803921568627451, + "success_rate.epoch.env_macro_mean": 0.869267376820016, + "success_rate.epoch.global": 0.9012759515570934, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9976801310043668, + "tokens_p.mean_in_band": 0.408203125, + "tokens_rate.above_band": 0.9956521739130435, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.004347826086956522 + }, + { + "epoch": 1.9876438005965062, + "grad_norm": 456.1231754450967, + "learning_rate": 3.616863986788151e-07, + "loss": 0.4675, + "step": 9330, + "success_rate.epoch.env.abd": 0.9877750611246944, + "success_rate.epoch.env.agentgym:alfworld": 0.87248322147651, + "success_rate.epoch.env.agentgym:sciworld": 0.97, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9452380952380952, + "success_rate.epoch.env.logic": 0.9006574141709277, + "success_rate.epoch.env.math": 0.9712121212121212, + "success_rate.epoch.env.sat": 0.1509433962264151, + "success_rate.epoch.env.science": 0.839478791239257, + "success_rate.epoch.env.webshop": 0.9803921568627451, + "success_rate.epoch.env_macro_mean": 0.8692533468116196, + "success_rate.epoch.global": 0.9011665586519767, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6, + "success_rate.window.env_macro_mean": 0.8666666666666667, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9941666666666666, + "tokens_p.mean_in_band": 0.51241455078125, + "tokens_rate.above_band": 0.9375, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0625 + }, + { + "epoch": 1.9887089902002555, + "grad_norm": 24.228203276889616, + "learning_rate": 3.616522806838791e-07, + "loss": 0.2454, + "step": 9335, + "success_rate.epoch.env.abd": 0.9878048780487805, + "success_rate.epoch.env.agentgym:alfworld": 0.87248322147651, + "success_rate.epoch.env.agentgym:sciworld": 0.9700996677740864, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9453681710213777, + "success_rate.epoch.env.logic": 0.9007299270072993, + "success_rate.epoch.env.math": 0.9712230215827338, + "success_rate.epoch.env.sat": 0.1509433962264151, + "success_rate.epoch.env.science": 0.8396566048186098, + "success_rate.epoch.env.webshop": 0.9807692307692307, + "success_rate.epoch.env_macro_mean": 0.8693349705547356, + "success_rate.epoch.global": 0.9012731981009927, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9977409638554217, + "tokens_p.mean_in_band": 0.6822916666666666, + "tokens_rate.above_band": 0.9940119760479041, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.005988023952095809 + }, + { + "epoch": 1.9897741798040052, + "grad_norm": 63.15318199117054, + "learning_rate": 3.6161816148477287e-07, + "loss": 0.1866, + "step": 9340, + "success_rate.epoch.env.abd": 0.9878048780487805, + "success_rate.epoch.env.agentgym:alfworld": 0.87248322147651, + "success_rate.epoch.env.agentgym:sciworld": 0.9700996677740864, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9457547169811321, + "success_rate.epoch.env.logic": 0.9008023340627279, + "success_rate.epoch.env.math": 0.9712447975785092, + "success_rate.epoch.env.sat": 0.1509433962264151, + "success_rate.epoch.env.science": 0.8398340248962656, + "success_rate.epoch.env.webshop": 0.9807692307692307, + "success_rate.epoch.env_macro_mean": 0.8693948022900643, + "success_rate.epoch.global": 0.9013796076740677, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.999234068627451, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 1.9908393694077546, + "grad_norm": 90.98323883013198, + "learning_rate": 3.6158404110636217e-07, + "loss": 0.3974, + "step": 9345, + "success_rate.epoch.env.abd": 0.9878048780487805, + "success_rate.epoch.env.agentgym:alfworld": 0.87248322147651, + "success_rate.epoch.env.agentgym:sciworld": 0.9701986754966887, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9457547169811321, + "success_rate.epoch.env.logic": 0.9008746355685131, + "success_rate.epoch.env.math": 0.9709104646769928, + "success_rate.epoch.env.sat": 0.1509433962264151, + "success_rate.epoch.env.science": 0.8400110527770103, + "success_rate.epoch.env.webshop": 0.9807692307692307, + "success_rate.epoch.env_macro_mean": 0.8693960753998474, + "success_rate.epoch.global": 0.9013781223083549, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.75, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9375, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9966793168880456, + "tokens_p.mean_in_band": 0.71796875, + "tokens_rate.above_band": 0.9906015037593985, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.009398496240601503 + }, + { + "epoch": 1.991904559011504, + "grad_norm": 89.82657481925854, + "learning_rate": 3.615499195735137e-07, + "loss": 0.213, + "step": 9350, + "success_rate.epoch.env.abd": 0.9878048780487805, + "success_rate.epoch.env.agentgym:alfworld": 0.87248322147651, + "success_rate.epoch.env.agentgym:sciworld": 0.9701986754966887, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.9607843137254902, + "success_rate.epoch.env.ded": 0.9458823529411765, + "success_rate.epoch.env.logic": 0.9009468317552805, + "success_rate.epoch.env.math": 0.970954356846473, + "success_rate.epoch.env.sat": 0.1509433962264151, + "success_rate.epoch.env.science": 0.84009942004971, + "success_rate.epoch.env.webshop": 0.9811320754716981, + "success_rate.epoch.env_macro_mean": 0.8695305526741156, + "success_rate.epoch.global": 0.901484190148419, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.998070987654321, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 1.9929697486152536, + "grad_norm": 55.59520445288831, + "learning_rate": 3.6151579691109497e-07, + "loss": 0.4466, + "step": 9355, + "success_rate.epoch.env.abd": 0.9878048780487805, + "success_rate.epoch.env.agentgym:alfworld": 0.87248322147651, + "success_rate.epoch.env.agentgym:sciworld": 0.9701986754966887, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.9607843137254902, + "success_rate.epoch.env.ded": 0.9458823529411765, + "success_rate.epoch.env.logic": 0.9012345679012346, + "success_rate.epoch.env.math": 0.970954356846473, + "success_rate.epoch.env.sat": 0.1509433962264151, + "success_rate.epoch.env.science": 0.8403639371381307, + "success_rate.epoch.env.webshop": 0.9811320754716981, + "success_rate.epoch.env_macro_mean": 0.8695807575136043, + "success_rate.epoch.global": 0.9015900300816502, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.99390625, + "tokens_p.mean_in_band": 0.7368706597222222, + "tokens_rate.above_band": 0.9174311926605505, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.08256880733944955 + }, + { + "epoch": 1.994034938219003, + "grad_norm": 46.508304769784004, + "learning_rate": 3.6148167314397433e-07, + "loss": 0.2788, + "step": 9360, + "success_rate.epoch.env.abd": 0.9878048780487805, + "success_rate.epoch.env.agentgym:alfworld": 0.87248322147651, + "success_rate.epoch.env.agentgym:sciworld": 0.9701986754966887, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.9615384615384616, + "success_rate.epoch.env.ded": 0.9461358313817331, + "success_rate.epoch.env.logic": 0.9013062409288825, + "success_rate.epoch.env.math": 0.9709871891484552, + "success_rate.epoch.env.sat": 0.1509433962264151, + "success_rate.epoch.env.science": 0.8404958677685951, + "success_rate.epoch.env.webshop": 0.9811320754716981, + "success_rate.epoch.env_macro_mean": 0.869693854078479, + "success_rate.epoch.global": 0.9016956428418116, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9978419654714475, + "tokens_p.mean_in_band": 0.51171875, + "tokens_rate.above_band": 0.9973509933774835, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0026490066225165563 + }, + { + "epoch": 1.9951001278227525, + "grad_norm": 174.8542830108639, + "learning_rate": 3.61447548297021e-07, + "loss": 0.3549, + "step": 9365, + "success_rate.epoch.env.abd": 0.9878048780487805, + "success_rate.epoch.env.agentgym:alfworld": 0.87248322147651, + "success_rate.epoch.env.agentgym:sciworld": 0.9701986754966887, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.9622641509433962, + "success_rate.epoch.env.ded": 0.9461358313817331, + "success_rate.epoch.env.logic": 0.9013778100072516, + "success_rate.epoch.env.math": 0.9710199473089951, + "success_rate.epoch.env.sat": 0.1509433962264151, + "success_rate.epoch.env.science": 0.8404401650618982, + "success_rate.epoch.env.webshop": 0.9811320754716981, + "success_rate.epoch.env_macro_mean": 0.8697642462545833, + "success_rate.epoch.global": 0.9016938250428816, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9914772727272727, + "tokens_p.mean_in_band": 0.49296875, + "tokens_rate.above_band": 0.8897058823529411, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.11029411764705882 + }, + { + "epoch": 1.996165317426502, + "grad_norm": 70.97149395509877, + "learning_rate": 3.6141342239510485e-07, + "loss": 0.3877, + "step": 9370, + "success_rate.epoch.env.abd": 0.9878640776699029, + "success_rate.epoch.env.agentgym:alfworld": 0.87248322147651, + "success_rate.epoch.env.agentgym:sciworld": 0.9701986754966887, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.9622641509433962, + "success_rate.epoch.env.ded": 0.9461358313817331, + "success_rate.epoch.env.logic": 0.9014492753623189, + "success_rate.epoch.env.math": 0.9706987227648385, + "success_rate.epoch.env.sat": 0.1509433962264151, + "success_rate.epoch.env.science": 0.8402529557327467, + "success_rate.epoch.env.webshop": 0.9811320754716981, + "success_rate.epoch.env_macro_mean": 0.8697299036275724, + "success_rate.epoch.global": 0.9015849218248019, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.8, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.825, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9965, + "tokens_p.mean_in_band": 0.3030790441176471, + "tokens_rate.above_band": 0.8802816901408451, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.11971830985915492 + }, + { + "epoch": 1.9972305070302514, + "grad_norm": 115.82174745572928, + "learning_rate": 3.6137929546309664e-07, + "loss": 0.4441, + "step": 9375, + "success_rate.epoch.env.abd": 0.9878640776699029, + "success_rate.epoch.env.agentgym:alfworld": 0.87248322147651, + "success_rate.epoch.env.agentgym:sciworld": 0.9702970297029703, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.9622641509433962, + "success_rate.epoch.env.ded": 0.9461358313817331, + "success_rate.epoch.env.logic": 0.9007965242577842, + "success_rate.epoch.env.math": 0.9707536557930259, + "success_rate.epoch.env.sat": 0.1509433962264151, + "success_rate.epoch.env.science": 0.8403846153846154, + "success_rate.epoch.env.webshop": 0.9811320754716981, + "success_rate.epoch.env_macro_mean": 0.8696964668804636, + "success_rate.epoch.global": 0.9015832263585793, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.75, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9969490521327015, + "tokens_p.mean_in_band": 0.41342905405405406, + "tokens_rate.above_band": 0.9661172161172161, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03388278388278388 + }, + { + "epoch": 1.9982956966340009, + "grad_norm": 556.2684394186227, + "learning_rate": 3.613451675258678e-07, + "loss": 0.5335, + "step": 9380, + "success_rate.epoch.env.abd": 0.9878640776699029, + "success_rate.epoch.env.agentgym:alfworld": 0.8733333333333333, + "success_rate.epoch.env.agentgym:sciworld": 0.9702970297029703, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.9622641509433962, + "success_rate.epoch.env.ded": 0.9461358313817331, + "success_rate.epoch.env.logic": 0.9007965242577842, + "success_rate.epoch.env.math": 0.9707865168539326, + "success_rate.epoch.env.sat": 0.1509433962264151, + "success_rate.epoch.env.science": 0.8403292181069959, + "success_rate.epoch.env.webshop": 0.9811320754716981, + "success_rate.epoch.env_macro_mean": 0.8697717010295647, + "success_rate.epoch.global": 0.9015815345159222, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9985166139240507, + "tokens_p.mean_in_band": 0.6189903846153846, + "tokens_rate.above_band": 0.9798449612403101, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.020155038759689922 + }, + { + "epoch": 1.9993608862377503, + "grad_norm": 234.36914750080672, + "learning_rate": 3.613110386082904e-07, + "loss": 0.4179, + "step": 9385, + "success_rate.epoch.env.abd": 0.9878640776699029, + "success_rate.epoch.env.agentgym:alfworld": 0.8733333333333333, + "success_rate.epoch.env.agentgym:sciworld": 0.9702970297029703, + "success_rate.epoch.env.agentgym:textcraft": 0.9838709677419355, + "success_rate.epoch.env.babyai": 0.9622641509433962, + "success_rate.epoch.env.ded": 0.9462616822429907, + "success_rate.epoch.env.logic": 0.9008683068017366, + "success_rate.epoch.env.math": 0.9708083832335329, + "success_rate.epoch.env.sat": 0.1509433962264151, + "success_rate.epoch.env.science": 0.84, + "success_rate.epoch.env.webshop": 0.9811320754716981, + "success_rate.epoch.env_macro_mean": 0.8697857639425375, + "success_rate.epoch.global": 0.9014730999146029, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6, + "success_rate.window.env_macro_mean": 0.9199999999999999, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9946996466431095, + "tokens_p.mean_in_band": 0.5830078125, + "tokens_rate.above_band": 0.9593220338983051, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.04067796610169491 + }, + { + "epoch": 2.0004260758415, + "grad_norm": 320.926825579749, + "learning_rate": 3.612769087352377e-07, + "loss": 0.4256, + "step": 9390, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.9, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.999234693877551, + "tokens_p.mean_in_band": 0.259765625, + "tokens_rate.above_band": 0.9959349593495935, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0040650406504065045 + }, + { + "epoch": 2.0014912654452495, + "grad_norm": 340.63416924544634, + "learning_rate": 3.6124277793158297e-07, + "loss": 0.5056, + "step": 9395, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.ded": 1.0, + "success_rate.epoch.env.logic": 0.6666666666666666, + "success_rate.epoch.env.math": 1.0, + "success_rate.epoch.env.science": 0.5, + "success_rate.epoch.env_macro_mean": 0.8333333333333333, + "success_rate.epoch.global": 0.8, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.6666666666666666, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.8333333333333333, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9990040307461567, + "tokens_p.mean_in_band": 0.60986328125, + "tokens_rate.above_band": 0.9881437569470174, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.011856243052982586 + }, + { + "epoch": 2.0025564550489987, + "grad_norm": 243.60823813088356, + "learning_rate": 3.612086462222006e-07, + "loss": 0.6777, + "step": 9400, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.ded": 1.0, + "success_rate.epoch.env.logic": 0.6666666666666666, + "success_rate.epoch.env.math": 0.8, + "success_rate.epoch.env.science": 0.4, + "success_rate.epoch.env_macro_mean": 0.811111111111111, + "success_rate.epoch.global": 0.75, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 0.75, + "success_rate.window.env.science": 0.3333333333333333, + "success_rate.window.env_macro_mean": 0.7708333333333334, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9935233160621761, + "tokens_p.mean_below_band": 1.8189894035458565e-09, + "tokens_p.mean_in_band": 0.6473214285714286, + "tokens_rate.above_band": 0.9633943427620633, + "tokens_rate.below_band": 0.0016638935108153079, + "tokens_rate.in_band": 0.03494176372712146 + }, + { + "epoch": 2.0036216446527484, + "grad_norm": 212.36509304422106, + "learning_rate": 3.611745136319656e-07, + "loss": 0.6162, + "step": 9405, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.ded": 1.0, + "success_rate.epoch.env.logic": 0.5, + "success_rate.epoch.env.math": 0.8, + "success_rate.epoch.env.science": 0.375, + "success_rate.epoch.env_macro_mean": 0.7791666666666667, + "success_rate.epoch.global": 0.7, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 0.8, + "success_rate.window.env.science": 0.3333333333333333, + "success_rate.window.env_macro_mean": 0.5333333333333333, + "success_rate.window.global": 0.6, + "tokens_p.mean_above_band": 0.9939516129032258, + "tokens_p.mean_in_band": 0.5398763020833334, + "tokens_rate.above_band": 0.8857142857142857, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.11428571428571428 + }, + { + "epoch": 2.0046868342564976, + "grad_norm": 344.2226449012985, + "learning_rate": 3.611403801857535e-07, + "loss": 0.8166, + "step": 9410, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.ded": 1.0, + "success_rate.epoch.env.logic": 0.5, + "success_rate.epoch.env.math": 0.8461538461538461, + "success_rate.epoch.env.science": 0.46153846153846156, + "success_rate.epoch.env_macro_mean": 0.8012820512820514, + "success_rate.epoch.global": 0.725, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6, + "success_rate.window.env_macro_mean": 0.8666666666666667, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9986702127659575, + "tokens_p.mean_in_band": 0.59619140625, + "tokens_rate.above_band": 0.9778085991678225, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.022191400832177532 + }, + { + "epoch": 2.0057520238602473, + "grad_norm": 137.40345291730455, + "learning_rate": 3.6110624590844047e-07, + "loss": 0.6818, + "step": 9415, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.ded": 1.0, + "success_rate.epoch.env.logic": 0.4, + "success_rate.epoch.env.math": 0.7857142857142857, + "success_rate.epoch.env.science": 0.5263157894736842, + "success_rate.epoch.env_macro_mean": 0.8390037593984963, + "success_rate.epoch.global": 0.7, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 0.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.5333333333333333, + "success_rate.window.global": 0.6, + "tokens_p.mean_above_band": 0.9978321771611526, + "tokens_p.mean_in_band": 0.5275135869565217, + "tokens_rate.above_band": 0.953204476093591, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.04679552390640895 + }, + { + "epoch": 2.0068172134639966, + "grad_norm": 249.94363670988278, + "learning_rate": 3.6107211082490344e-07, + "loss": 0.5498, + "step": 9420, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.ded": 1.0, + "success_rate.epoch.env.logic": 0.4, + "success_rate.epoch.env.math": 0.8125, + "success_rate.epoch.env.science": 0.56, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8636111111111111, + "success_rate.epoch.global": 0.7166666666666667, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9983766233766234, + "tokens_p.mean_in_band": 0.3703125, + "tokens_rate.above_band": 0.9685534591194969, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.031446540880503145 + }, + { + "epoch": 2.0078824030677462, + "grad_norm": 131.17384422636022, + "learning_rate": 3.610379749600197e-07, + "loss": 0.41, + "step": 9425, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.ded": 1.0, + "success_rate.epoch.env.logic": 0.3333333333333333, + "success_rate.epoch.env.math": 0.85, + "success_rate.epoch.env.science": 0.6206896551724138, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8671136653895274, + "success_rate.epoch.global": 0.7428571428571429, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.75, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9974681712962963, + "tokens_p.mean_in_band": 0.7034696691176471, + "tokens_rate.above_band": 0.927038626609442, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.07296137339055794 + }, + { + "epoch": 2.0089475926714955, + "grad_norm": 205.8439565836018, + "learning_rate": 3.610038383386673e-07, + "loss": 0.6034, + "step": 9430, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.ded": 1.0, + "success_rate.epoch.env.logic": 0.3333333333333333, + "success_rate.epoch.env.math": 0.8, + "success_rate.epoch.env.science": 0.5454545454545454, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8531986531986532, + "success_rate.epoch.global": 0.7, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 0.6, + "success_rate.window.env.science": 0.0, + "success_rate.window.env_macro_mean": 0.5333333333333333, + "success_rate.window.global": 0.4, + "tokens_p.mean_above_band": 0.9955255681818181, + "tokens_p.mean_in_band": 0.4512939453125, + "tokens_rate.above_band": 0.953757225433526, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.046242774566473986 + }, + { + "epoch": 2.010012782275245, + "grad_norm": 107.20687856202514, + "learning_rate": 3.609697009857247e-07, + "loss": 0.4466, + "step": 9435, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.ded": 1.0, + "success_rate.epoch.env.logic": 0.3333333333333333, + "success_rate.epoch.env.math": 0.8275862068965517, + "success_rate.epoch.env.science": 0.5945945945945946, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8617237927582756, + "success_rate.epoch.global": 0.7333333333333333, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9956617647058823, + "tokens_p.mean_in_band": 0.8046875, + "tokens_rate.above_band": 0.9883720930232558, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.011627906976744186 + }, + { + "epoch": 2.0110779718789944, + "grad_norm": 142.3629123005929, + "learning_rate": 3.60935562926071e-07, + "loss": 0.5068, + "step": 9440, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 1.0, + "success_rate.epoch.env.logic": 0.42857142857142855, + "success_rate.epoch.env.math": 0.8484848484848485, + "success_rate.epoch.env.science": 0.5789473684210527, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8856003645477329, + "success_rate.epoch.global": 0.75, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.0, + "success_rate.window.env_macro_mean": 0.8333333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9997650375939849, + "tokens_p.mean_in_band": 0.5494791666666666, + "tokens_rate.above_band": 0.9866468842729971, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.013353115727002967 + }, + { + "epoch": 2.012143161482744, + "grad_norm": 62.622488581704104, + "learning_rate": 3.609014241845858e-07, + "loss": 0.4964, + "step": 9445, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 1.0, + "success_rate.epoch.env.logic": 0.4444444444444444, + "success_rate.epoch.env.math": 0.8648648648648649, + "success_rate.epoch.env.science": 0.5952380952380952, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8904547404547405, + "success_rate.epoch.global": 0.7545454545454545, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.75, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9952713815789473, + "tokens_p.mean_in_band": 0.548046875, + "tokens_rate.above_band": 0.9529780564263323, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.047021943573667714 + }, + { + "epoch": 2.0132083510864933, + "grad_norm": 126.28369202278844, + "learning_rate": 3.6086728478614904e-07, + "loss": 0.6079, + "step": 9450, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9090909090909091, + "success_rate.epoch.env.logic": 0.5, + "success_rate.epoch.env.math": 0.8717948717948718, + "success_rate.epoch.env.science": 0.5869565217391305, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8867842302624911, + "success_rate.epoch.global": 0.75, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 0.5, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.8, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9984358706986444, + "tokens_p.mean_in_band": 0.33774038461538464, + "tokens_rate.above_band": 0.9866255144032922, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.013374485596707819 + }, + { + "epoch": 2.014273540690243, + "grad_norm": 231.68794549351844, + "learning_rate": 3.6083314475564143e-07, + "loss": 0.495, + "step": 9455, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9166666666666666, + "success_rate.epoch.env.logic": 0.45454545454545453, + "success_rate.epoch.env.math": 0.8780487804878049, + "success_rate.epoch.env.science": 0.5882352941176471, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8837496195817574, + "success_rate.epoch.global": 0.7461538461538462, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6, + "success_rate.window.env_macro_mean": 0.72, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9988398644667059, + "tokens_p.mean_in_band": 0.4778293918918919, + "tokens_rate.above_band": 0.9786620530565168, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.021337946943483274 + }, + { + "epoch": 2.0153387302939922, + "grad_norm": 81.89482890135903, + "learning_rate": 3.6079900411794387e-07, + "loss": 0.4829, + "step": 9460, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9285714285714286, + "success_rate.epoch.env.logic": 0.46153846153846156, + "success_rate.epoch.env.math": 0.8863636363636364, + "success_rate.epoch.env.science": 0.6037735849056604, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8880247111379187, + "success_rate.epoch.global": 0.7571428571428571, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9988046448087432, + "tokens_p.mean_in_band": 0.4322916666666667, + "tokens_rate.above_band": 0.9838709677419355, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.016129032258064516 + }, + { + "epoch": 2.016403919897742, + "grad_norm": 191.7094748697114, + "learning_rate": 3.607648628979379e-07, + "loss": 0.4237, + "step": 9465, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9285714285714286, + "success_rate.epoch.env.logic": 0.5333333333333333, + "success_rate.epoch.env.math": 0.8958333333333334, + "success_rate.epoch.env.science": 0.6071428571428571, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8964880952380951, + "success_rate.epoch.global": 0.7666666666666667, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9959239130434783, + "tokens_p.mean_in_band": 0.5600328947368421, + "tokens_rate.above_band": 0.8789808917197452, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.12101910828025478 + }, + { + "epoch": 2.017469109501491, + "grad_norm": 176.1966454416567, + "learning_rate": 3.607307211205053e-07, + "loss": 0.5309, + "step": 9470, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9285714285714286, + "success_rate.epoch.env.logic": 0.5, + "success_rate.epoch.env.math": 0.9056603773584906, + "success_rate.epoch.env.science": 0.6101694915254238, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8944401297455343, + "success_rate.epoch.global": 0.76875, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 0.6666666666666666, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 1.0006017329910142, + "tokens_p.mean_in_band": 0.4693287037037037, + "tokens_rate.above_band": 0.966501240694789, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.033498759305210915 + }, + { + "epoch": 2.018534299105241, + "grad_norm": 239.3430133453057, + "learning_rate": 3.6069657881052844e-07, + "loss": 0.2041, + "step": 9475, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9285714285714286, + "success_rate.epoch.env.logic": 0.5555555555555556, + "success_rate.epoch.env.math": 0.9122807017543859, + "success_rate.epoch.env.science": 0.6129032258064516, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.9009310911687823, + "success_rate.epoch.global": 0.7764705882352941, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9963803088803089, + "tokens_p.mean_in_band": 0.68359375, + "tokens_rate.above_band": 0.9847908745247148, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.015209125475285171 + }, + { + "epoch": 2.01959948870899, + "grad_norm": 172.39458477329816, + "learning_rate": 3.606624359928899e-07, + "loss": 0.361, + "step": 9480, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9285714285714286, + "success_rate.epoch.env.logic": 0.6190476190476191, + "success_rate.epoch.env.math": 0.9166666666666666, + "success_rate.epoch.env.science": 0.625, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.9089285714285713, + "success_rate.epoch.global": 0.7888888888888889, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9991159830268741, + "tokens_p.mean_in_band": 0.67578125, + "tokens_rate.above_band": 0.997179125528914, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0028208744710860366 + }, + { + "epoch": 2.0206646783127398, + "grad_norm": 90.98167587573818, + "learning_rate": 3.606282926924728e-07, + "loss": 0.2571, + "step": 9485, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9285714285714286, + "success_rate.epoch.env.logic": 0.6363636363636364, + "success_rate.epoch.env.math": 0.9206349206349206, + "success_rate.epoch.env.science": 0.6417910447761194, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.9127361030346105, + "success_rate.epoch.global": 0.8, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.998546511627907, + "tokens_p.mean_in_band": 0.7591145833333334, + "tokens_rate.above_band": 0.9862385321100917, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.013761467889908258 + }, + { + "epoch": 2.021729867916489, + "grad_norm": 115.3975355084232, + "learning_rate": 3.6059414893416054e-07, + "loss": 0.4624, + "step": 9490, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9285714285714286, + "success_rate.epoch.env.logic": 0.6521739130434783, + "success_rate.epoch.env.math": 0.9242424242424242, + "success_rate.epoch.env.science": 0.625, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.9129987765857331, + "success_rate.epoch.global": 0.795, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.4, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 0.85, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9957545518207283, + "tokens_p.mean_in_band": 0.4736328125, + "tokens_rate.above_band": 0.9571045576407506, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.04289544235924933 + }, + { + "epoch": 2.0227950575202387, + "grad_norm": 722.5825883483019, + "learning_rate": 3.605600047428368e-07, + "loss": 0.4416, + "step": 9495, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9285714285714286, + "success_rate.epoch.env.logic": 0.6923076923076923, + "success_rate.epoch.env.math": 0.9154929577464789, + "success_rate.epoch.env.science": 0.6351351351351351, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.9171507213760736, + "success_rate.epoch.global": 0.8, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.8, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9940878378378378, + "tokens_p.mean_in_band": 0.5859375, + "tokens_rate.above_band": 0.961038961038961, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03896103896103896 + }, + { + "epoch": 2.023860247123988, + "grad_norm": 89.78032517627625, + "learning_rate": 3.6052586014338556e-07, + "loss": 0.3562, + "step": 9500, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9285714285714286, + "success_rate.epoch.env.logic": 0.7037037037037037, + "success_rate.epoch.env.math": 0.9210526315789473, + "success_rate.epoch.env.science": 0.6363636363636364, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.9189691400217717, + "success_rate.epoch.global": 0.8045454545454546, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9922680412371134, + "tokens_p.mean_in_band": 0.6393229166666666, + "tokens_rate.above_band": 0.941747572815534, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.05825242718446602 + }, + { + "epoch": 2.0249254367277376, + "grad_norm": 96.66100381812466, + "learning_rate": 3.6049171516069125e-07, + "loss": 0.2178, + "step": 9505, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9285714285714286, + "success_rate.epoch.env.logic": 0.7037037037037037, + "success_rate.epoch.env.math": 0.9240506329113924, + "success_rate.epoch.env.science": 0.6419753086419753, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.91983010738285, + "success_rate.epoch.global": 0.8078602620087336, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9375, + "success_rate.window.global": 0.8888888888888888, + "tokens_p.mean_above_band": 0.9969978165938864, + "tokens_p.mean_in_band": 0.6637834821428571, + "tokens_rate.above_band": 0.9423868312757202, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.05761316872427984 + }, + { + "epoch": 2.025990626331487, + "grad_norm": 826.1814779725264, + "learning_rate": 3.604575698196385e-07, + "loss": 0.4907, + "step": 9510, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9375, + "success_rate.epoch.env.logic": 0.7037037037037037, + "success_rate.epoch.env.math": 0.926829268292683, + "success_rate.epoch.env.science": 0.6309523809523809, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.9198985352948768, + "success_rate.epoch.global": 0.8075313807531381, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.3333333333333333, + "success_rate.window.env_macro_mean": 0.8666666666666666, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9980357142857142, + "tokens_p.mean_in_band": 0.671875, + "tokens_rate.above_band": 0.9641873278236914, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03581267217630854 + }, + { + "epoch": 2.0270558159352365, + "grad_norm": 140.5626460194624, + "learning_rate": 3.604234241451121e-07, + "loss": 0.112, + "step": 9515, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9375, + "success_rate.epoch.env.logic": 0.7333333333333333, + "success_rate.epoch.env.math": 0.9285714285714286, + "success_rate.epoch.env.science": 0.6395348837209303, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.9238939645625692, + "success_rate.epoch.global": 0.8152610441767069, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9965533088235294, + "tokens_p.mean_in_band": 0.8411458333333334, + "tokens_rate.above_band": 0.9945155393053017, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.005484460694698354 + }, + { + "epoch": 2.0281210055389858, + "grad_norm": 139.14264902908886, + "learning_rate": 3.6038927816199726e-07, + "loss": 0.4959, + "step": 9520, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9444444444444444, + "success_rate.epoch.env.logic": 0.7333333333333333, + "success_rate.epoch.env.math": 0.9294117647058824, + "success_rate.epoch.env.science": 0.6483516483516484, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.9255541190835309, + "success_rate.epoch.global": 0.8185328185328186, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.96, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9994817073170732, + "tokens_p.mean_in_band": 0.67431640625, + "tokens_rate.above_band": 0.9961127308066083, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.003887269193391642 + }, + { + "epoch": 2.0291861951427355, + "grad_norm": 189.81289108124403, + "learning_rate": 3.6035513189517925e-07, + "loss": 0.3229, + "step": 9525, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9473684210526315, + "success_rate.epoch.env.logic": 0.7419354838709677, + "success_rate.epoch.env.math": 0.9325842696629213, + "success_rate.epoch.env.science": 0.6526315789473685, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.927451975353389, + "success_rate.epoch.global": 0.8215613382899628, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9375, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9977313603662524, + "tokens_p.mean_in_band": 0.60546875, + "tokens_rate.above_band": 0.9960912052117263, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.003908794788273616 + }, + { + "epoch": 2.0302513847464847, + "grad_norm": 101.57335611204468, + "learning_rate": 3.6032098536954376e-07, + "loss": 0.3196, + "step": 9530, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9473684210526315, + "success_rate.epoch.env.logic": 0.7575757575757576, + "success_rate.epoch.env.math": 0.9340659340659341, + "success_rate.epoch.env.science": 0.6435643564356436, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.9282574469129967, + "success_rate.epoch.global": 0.8172043010752689, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.8333333333333334, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9915602189781022, + "tokens_p.mean_in_band": 0.5738636363636364, + "tokens_rate.above_band": 0.8616352201257862, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.13836477987421383 + }, + { + "epoch": 2.0313165743502344, + "grad_norm": 110.82828022333625, + "learning_rate": 3.6028683860997635e-07, + "loss": 0.3032, + "step": 9535, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9473684210526315, + "success_rate.epoch.env.logic": 0.7647058823529411, + "success_rate.epoch.env.math": 0.9368421052631579, + "success_rate.epoch.env.science": 0.6509433962264151, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.9299859804895145, + "success_rate.epoch.global": 0.8200692041522492, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9938186813186813, + "tokens_p.mean_in_band": 0.6361607142857143, + "tokens_rate.above_band": 0.9285714285714286, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.07142857142857142 + }, + { + "epoch": 2.0323817639539836, + "grad_norm": 83.98913706399934, + "learning_rate": 3.6025269164136306e-07, + "loss": 0.23, + "step": 9540, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9473684210526315, + "success_rate.epoch.env.logic": 0.7777777777777778, + "success_rate.epoch.env.math": 0.9381443298969072, + "success_rate.epoch.env.science": 0.6422018348623854, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.9305492363589701, + "success_rate.epoch.global": 0.8193979933110368, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.3333333333333333, + "success_rate.window.env_macro_mean": 0.8888888888888888, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9954044117647058, + "tokens_p.mean_in_band": 0.4375, + "tokens_rate.above_band": 0.9272727272727272, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.07272727272727272 + }, + { + "epoch": 2.0334469535577333, + "grad_norm": 90.99730528111844, + "learning_rate": 3.6021854448858993e-07, + "loss": 0.3659, + "step": 9545, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9473684210526315, + "success_rate.epoch.env.logic": 0.7837837837837838, + "success_rate.epoch.env.math": 0.94, + "success_rate.epoch.env.science": 0.6403508771929824, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.9311503082029396, + "success_rate.epoch.global": 0.8187702265372169, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6, + "success_rate.window.env_macro_mean": 0.9, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.994740099009901, + "tokens_p.mean_in_band": 0.5028645833333333, + "tokens_rate.above_band": 0.8706896551724138, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.12931034482758622 + }, + { + "epoch": 2.0345121431614825, + "grad_norm": 57.27481021638967, + "learning_rate": 3.601843971765431e-07, + "loss": 0.3535, + "step": 9550, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.95, + "success_rate.epoch.env.logic": 0.7948717948717948, + "success_rate.epoch.env.math": 0.9411764705882353, + "success_rate.epoch.env.science": 0.652542372881356, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.9338590638341386, + "success_rate.epoch.global": 0.8244514106583072, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9991490166414524, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 2.0355773327652322, + "grad_norm": 41.035762347023855, + "learning_rate": 3.6015024973010895e-07, + "loss": 0.2693, + "step": 9555, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.95, + "success_rate.epoch.env.logic": 0.8048780487804879, + "success_rate.epoch.env.math": 0.9439252336448598, + "success_rate.epoch.env.science": 0.6583333333333333, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.935713661575868, + "success_rate.epoch.global": 0.8297872340425532, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9948275862068966, + "tokens_p.mean_in_band": 0.80078125, + "tokens_rate.above_band": 0.9731543624161074, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.026845637583892617 + }, + { + "epoch": 2.0366425223689815, + "grad_norm": 413.41384942417, + "learning_rate": 3.601161021741739e-07, + "loss": 0.3851, + "step": 9560, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.95, + "success_rate.epoch.env.logic": 0.813953488372093, + "success_rate.epoch.env.math": 0.9363636363636364, + "success_rate.epoch.env.science": 0.672, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.937231712473573, + "success_rate.epoch.global": 0.831858407079646, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.6666666666666666, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8888888888888888, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9992597292724196, + "tokens_p.mean_in_band": 0.6588541666666666, + "tokens_rate.above_band": 0.9800995024875622, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.01990049751243781 + }, + { + "epoch": 2.037707711972731, + "grad_norm": 25.091192591618025, + "learning_rate": 3.600819545336244e-07, + "loss": 0.2506, + "step": 9565, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9523809523809523, + "success_rate.epoch.env.logic": 0.813953488372093, + "success_rate.epoch.env.math": 0.9380530973451328, + "success_rate.epoch.env.science": 0.6796875, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.9384075038098179, + "success_rate.epoch.global": 0.836676217765043, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9986092931937173, + "tokens_p.mean_in_band": 0.814453125, + "tokens_rate.above_band": 0.9982578397212544, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0017421602787456446 + }, + { + "epoch": 2.038772901576481, + "grad_norm": 124.26799191600831, + "learning_rate": 3.6004780683334705e-07, + "loss": 0.3578, + "step": 9570, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9545454545454546, + "success_rate.epoch.env.logic": 0.8, + "success_rate.epoch.env.math": 0.9391304347826087, + "success_rate.epoch.env.science": 0.6742424242424242, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.9367918313570488, + "success_rate.epoch.global": 0.8328690807799443, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.8, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9966267820773931, + "tokens_p.mean_in_band": 0.6027901785714286, + "tokens_rate.above_band": 0.9655850540806293, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0344149459193707 + }, + { + "epoch": 2.03983809118023, + "grad_norm": 74.61564211759006, + "learning_rate": 3.600136590982284e-07, + "loss": 0.3258, + "step": 9575, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9545454545454546, + "success_rate.epoch.env.logic": 0.8163265306122449, + "success_rate.epoch.env.math": 0.9401709401709402, + "success_rate.epoch.env.science": 0.6814814814814815, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.9392524406810121, + "success_rate.epoch.global": 0.8373983739837398, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9983606557377049, + "tokens_p.mean_in_band": 0.86865234375, + "tokens_rate.above_band": 0.9744408945686901, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.025559105431309903 + }, + { + "epoch": 2.0409032807839798, + "grad_norm": 102.4449027559801, + "learning_rate": 3.599795113531551e-07, + "loss": 0.4033, + "step": 9580, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9545454545454546, + "success_rate.epoch.env.logic": 0.8163265306122449, + "success_rate.epoch.env.math": 0.9411764705882353, + "success_rate.epoch.env.science": 0.6830985915492958, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.939514704729523, + "success_rate.epoch.global": 0.8364116094986808, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.7142857142857143, + "success_rate.window.env_macro_mean": 0.9047619047619048, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9945436507936508, + "tokens_p.mean_in_band": 0.5379971590909091, + "tokens_rate.above_band": 0.9197080291970803, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.08029197080291971 + }, + { + "epoch": 2.041968470387729, + "grad_norm": 137.5972515293955, + "learning_rate": 3.5994536362301375e-07, + "loss": 0.3628, + "step": 9585, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9545454545454546, + "success_rate.epoch.env.logic": 0.82, + "success_rate.epoch.env.math": 0.9426229508196722, + "success_rate.epoch.env.sat": 1.0, + "success_rate.epoch.env.science": 0.6712328767123288, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.944400116552496, + "success_rate.epoch.global": 0.8329048843187661, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 1.0, + "success_rate.window.env.science": 0.25, + "success_rate.window.env_macro_mean": 0.85, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9957191780821918, + "tokens_p.mean_in_band": 0.51611328125, + "tokens_rate.above_band": 0.9319148936170213, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.06808510638297872 + }, + { + "epoch": 2.0430336599914787, + "grad_norm": 48.459671839727775, + "learning_rate": 3.5991121593269107e-07, + "loss": 0.2521, + "step": 9590, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9565217391304348, + "success_rate.epoch.env.logic": 0.8235294117647058, + "success_rate.epoch.env.math": 0.944, + "success_rate.epoch.env.sat": 1.0, + "success_rate.epoch.env.science": 0.6754966887417219, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.9454134399669876, + "success_rate.epoch.global": 0.8345864661654135, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9929078014184397, + "tokens_p.mean_below_band": 3.583409124985337e-10, + "tokens_p.mean_in_band": 0.7880859375, + "tokens_rate.above_band": 0.94, + "tokens_rate.below_band": 0.006666666666666667, + "tokens_rate.in_band": 0.05333333333333334 + }, + { + "epoch": 2.044098849595228, + "grad_norm": 125.57216426470644, + "learning_rate": 3.5987706830707355e-07, + "loss": 0.3249, + "step": 9595, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9565217391304348, + "success_rate.epoch.env.logic": 0.8301886792452831, + "success_rate.epoch.env.math": 0.937984496124031, + "success_rate.epoch.env.sat": 1.0, + "success_rate.epoch.env.science": 0.6838709677419355, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.9462332620219714, + "success_rate.epoch.global": 0.8361858190709046, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.75, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9912790697674418, + "tokens_p.mean_in_band": 0.760546875, + "tokens_rate.above_band": 0.8958333333333334, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.10416666666666667 + }, + { + "epoch": 2.0451640391989776, + "grad_norm": 80.00764951612555, + "learning_rate": 3.5984292077104777e-07, + "loss": 0.2917, + "step": 9600, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9583333333333334, + "success_rate.epoch.env.logic": 0.8148148148148148, + "success_rate.epoch.env.math": 0.9384615384615385, + "success_rate.epoch.env.sat": 1.0, + "success_rate.epoch.env.science": 0.6770186335403726, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.9444207563772781, + "success_rate.epoch.global": 0.8305489260143198, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.7, + "success_rate.window.global": 0.6, + "tokens_p.mean_above_band": 0.9995105421686747, + "tokens_p.mean_in_band": 0.5015869140625, + "tokens_rate.above_band": 0.962877030162413, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.037122969837587005 + }, + { + "epoch": 2.046229228802727, + "grad_norm": 89.76733159137095, + "learning_rate": 3.598087733495002e-07, + "loss": 0.3909, + "step": 9605, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.96, + "success_rate.epoch.env.logic": 0.8103448275862069, + "success_rate.epoch.env.math": 0.9393939393939394, + "success_rate.epoch.env.sat": 1.0, + "success_rate.epoch.env.science": 0.6809815950920245, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.944610942006561, + "success_rate.epoch.global": 0.8321678321678322, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.75, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9970238095238095, + "tokens_p.mean_in_band": 0.64453125, + "tokens_rate.above_band": 0.9910112359550561, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.008988764044943821 + }, + { + "epoch": 2.0472944184064765, + "grad_norm": 377.234369819399, + "learning_rate": 3.597746260673172e-07, + "loss": 0.2693, + "step": 9610, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9615384615384616, + "success_rate.epoch.env.logic": 0.8032786885245902, + "success_rate.epoch.env.math": 0.9398496240601504, + "success_rate.epoch.env.sat": 1.0, + "success_rate.epoch.env.science": 0.6848484848484848, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.9445013871792441, + "success_rate.epoch.global": 0.8337129840546698, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.6666666666666666, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9523809523809523, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9979771627344223, + "tokens_p.mean_in_band": 0.42367788461538464, + "tokens_rate.above_band": 0.984514592019059, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.015485407980941036 + }, + { + "epoch": 2.0483596080102258, + "grad_norm": 362.7960061936099, + "learning_rate": 3.5974047894938513e-07, + "loss": 0.3669, + "step": 9615, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9629629629629629, + "success_rate.epoch.env.logic": 0.8125, + "success_rate.epoch.env.math": 0.9407407407407408, + "success_rate.epoch.env.sat": 0.5, + "success_rate.epoch.env.science": 0.6826347305389222, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.899894403112966, + "success_rate.epoch.global": 0.8329621380846325, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.75, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9980849582172702, + "tokens_p.mean_in_band": 0.5412946428571429, + "tokens_rate.above_band": 0.9447368421052632, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.05526315789473684 + }, + { + "epoch": 2.0494247976139754, + "grad_norm": 155.86362774108335, + "learning_rate": 3.5970633202059017e-07, + "loss": 0.313, + "step": 9620, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9629629629629629, + "success_rate.epoch.env.logic": 0.8125, + "success_rate.epoch.env.math": 0.9428571428571428, + "success_rate.epoch.env.sat": 0.5, + "success_rate.epoch.env.science": 0.6882352941176471, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.9005959454488867, + "success_rate.epoch.global": 0.8366013071895425, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.99625, + "tokens_p.mean_in_band": 0.7765625, + "tokens_rate.above_band": 0.967741935483871, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03225806451612903 + }, + { + "epoch": 2.0504899872177247, + "grad_norm": 86.06508562645304, + "learning_rate": 3.5967218530581826e-07, + "loss": 0.3441, + "step": 9625, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9629629629629629, + "success_rate.epoch.env.logic": 0.8153846153846154, + "success_rate.epoch.env.math": 0.9440559440559441, + "success_rate.epoch.env.sat": 0.5, + "success_rate.epoch.env.science": 0.6988636363636364, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.9019333780697416, + "success_rate.epoch.global": 0.8400852878464818, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9925, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 2.0515551768214744, + "grad_norm": 144.8050227408927, + "learning_rate": 3.596380388299554e-07, + "loss": 0.2577, + "step": 9630, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9629629629629629, + "success_rate.epoch.env.logic": 0.8181818181818182, + "success_rate.epoch.env.math": 0.9455782312925171, + "success_rate.epoch.env.sat": 0.5, + "success_rate.epoch.env.science": 0.7055555555555556, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.9029344152720775, + "success_rate.epoch.global": 0.8434237995824635, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.997564935064935, + "tokens_p.mean_in_band": 0.82421875, + "tokens_rate.above_band": 0.9956896551724138, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.004310344827586207 + }, + { + "epoch": 2.0526203664252236, + "grad_norm": 68.7718047074938, + "learning_rate": 3.5960389261788724e-07, + "loss": 0.3167, + "step": 9635, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9629629629629629, + "success_rate.epoch.env.logic": 0.8088235294117647, + "success_rate.epoch.env.math": 0.9466666666666667, + "success_rate.epoch.env.sat": 0.5, + "success_rate.epoch.env.science": 0.6994535519125683, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.9016278828139966, + "success_rate.epoch.global": 0.8404907975460123, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.3333333333333333, + "success_rate.window.env_macro_mean": 0.7083333333333333, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9959795321637427, + "tokens_p.mean_in_band": 0.503125, + "tokens_rate.above_band": 0.9447513812154696, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.055248618784530384 + }, + { + "epoch": 2.0536855560289733, + "grad_norm": 54.50339168876519, + "learning_rate": 3.595697466944992e-07, + "loss": 0.2692, + "step": 9640, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9629629629629629, + "success_rate.epoch.env.logic": 0.8088235294117647, + "success_rate.epoch.env.math": 0.9473684210526315, + "success_rate.epoch.env.sat": 0.5, + "success_rate.epoch.env.science": 0.7037037037037037, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.9020780561028239, + "success_rate.epoch.global": 0.8416833667334669, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env_macro_mean": 0.9444444444444445, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9915487421383647, + "tokens_p.mean_in_band": 0.7017045454545454, + "tokens_rate.above_band": 0.9352941176470588, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.06470588235294118 + }, + { + "epoch": 2.0547507456327225, + "grad_norm": 34.004449514286705, + "learning_rate": 3.5953560108467675e-07, + "loss": 0.2095, + "step": 9645, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9629629629629629, + "success_rate.epoch.env.logic": 0.8142857142857143, + "success_rate.epoch.env.math": 0.948051948051948, + "success_rate.epoch.env.sat": 0.5, + "success_rate.epoch.env.science": 0.7098445595854922, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.9031950168078289, + "success_rate.epoch.global": 0.8447937131630648, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9990131578947369, + "tokens_p.mean_in_band": 0.828125, + "tokens_rate.above_band": 0.9965034965034965, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0034965034965034965 + }, + { + "epoch": 2.055815935236472, + "grad_norm": 51.50414655273183, + "learning_rate": 3.595014558133049e-07, + "loss": 0.1324, + "step": 9650, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9642857142857143, + "success_rate.epoch.env.logic": 0.8194444444444444, + "success_rate.epoch.env.math": 0.9490445859872612, + "success_rate.epoch.env.sat": 0.3333333333333333, + "success_rate.epoch.env.science": 0.7091836734693877, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.888662886501831, + "success_rate.epoch.global": 0.8439306358381503, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.7333333333333333, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9920197469325154, + "tokens_p.mean_in_band": 0.7576462765957447, + "tokens_rate.above_band": 0.932761087267525, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.06723891273247497 + }, + { + "epoch": 2.0568811248402215, + "grad_norm": 304.15525353544035, + "learning_rate": 3.594673109052685e-07, + "loss": 0.2903, + "step": 9655, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9642857142857143, + "success_rate.epoch.env.logic": 0.821917808219178, + "success_rate.epoch.env.math": 0.9493670886075949, + "success_rate.epoch.env.sat": 0.3333333333333333, + "success_rate.epoch.env.science": 0.7192118226600985, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8898287061005381, + "success_rate.epoch.global": 0.8468809073724007, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9969835907335908, + "tokens_p.mean_in_band": 0.7788461538461539, + "tokens_rate.above_band": 0.9522058823529411, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.04779411764705882 + }, + { + "epoch": 2.057946314443971, + "grad_norm": 64.29470240299285, + "learning_rate": 3.59433166385452e-07, + "loss": 0.1572, + "step": 9660, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9655172413793104, + "success_rate.epoch.env.logic": 0.821917808219178, + "success_rate.epoch.env.math": 0.95, + "success_rate.epoch.env.sat": 0.3333333333333333, + "success_rate.epoch.env.science": 0.7285714285714285, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8908490737730227, + "success_rate.epoch.global": 0.849721706864564, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9968312937062938, + "tokens_p.mean_in_band": 0.8125, + "tokens_rate.above_band": 0.9930555555555556, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.006944444444444444 + }, + { + "epoch": 2.0590115040477204, + "grad_norm": 48.02552504760577, + "learning_rate": 3.593990222787398e-07, + "loss": 0.178, + "step": 9665, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9655172413793104, + "success_rate.epoch.env.logic": 0.821917808219178, + "success_rate.epoch.env.math": 0.9515151515151515, + "success_rate.epoch.env.sat": 0.3333333333333333, + "success_rate.epoch.env.science": 0.7311320754716981, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8912196009016974, + "success_rate.epoch.global": 0.8524590163934426, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9973404255319149, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 2.06007669365147, + "grad_norm": 58.02687871513042, + "learning_rate": 3.5936487861001584e-07, + "loss": 0.2438, + "step": 9670, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9666666666666667, + "success_rate.epoch.env.logic": 0.821917808219178, + "success_rate.epoch.env.math": 0.9515151515151515, + "success_rate.epoch.env.sat": 0.3333333333333333, + "success_rate.epoch.env.science": 0.7385321100917431, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.891996824529643, + "success_rate.epoch.global": 0.8548387096774194, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.998015873015873, + "tokens_p.mean_in_band": 0.7240513392857143, + "tokens_rate.above_band": 0.9818181818181818, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.01818181818181818 + }, + { + "epoch": 2.0611418832552193, + "grad_norm": 55.31759833065296, + "learning_rate": 3.5933073540416383e-07, + "loss": 0.1866, + "step": 9675, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9666666666666667, + "success_rate.epoch.env.logic": 0.8266666666666667, + "success_rate.epoch.env.math": 0.9520958083832335, + "success_rate.epoch.env.sat": 0.3333333333333333, + "success_rate.epoch.env.science": 0.7443946188340808, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8930142812621802, + "success_rate.epoch.global": 0.8573943661971831, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9947916666666666, + "tokens_p.mean_in_band": 0.763671875, + "tokens_rate.above_band": 0.9629629629629629, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.037037037037037035 + }, + { + "epoch": 2.062207072858969, + "grad_norm": 104.55919875876411, + "learning_rate": 3.59296592686067e-07, + "loss": 0.2903, + "step": 9680, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.967741935483871, + "success_rate.epoch.env.logic": 0.8266666666666667, + "success_rate.epoch.env.math": 0.9523809523809523, + "success_rate.epoch.env.sat": 0.3333333333333333, + "success_rate.epoch.env.science": 0.7400881057268722, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8927464539628814, + "success_rate.epoch.global": 0.856401384083045, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.9, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.995547493403694, + "tokens_p.mean_in_band": 0.6388671875, + "tokens_rate.above_band": 0.9869791666666666, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.013020833333333334 + }, + { + "epoch": 2.063272262462718, + "grad_norm": 98.18511053990107, + "learning_rate": 3.592624504806084e-07, + "loss": 0.2499, + "step": 9685, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.967741935483871, + "success_rate.epoch.env.logic": 0.8289473684210527, + "success_rate.epoch.env.math": 0.9532163742690059, + "success_rate.epoch.env.sat": 0.3333333333333333, + "success_rate.epoch.env.science": 0.7402597402597403, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8930453410697275, + "success_rate.epoch.global": 0.8571428571428571, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9972222222222222, + "tokens_p.mean_in_band": 0.6947544642857143, + "tokens_rate.above_band": 0.9625668449197861, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0374331550802139 + }, + { + "epoch": 2.064337452066468, + "grad_norm": 108.30435465743868, + "learning_rate": 3.5922830881267054e-07, + "loss": 0.1795, + "step": 9690, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9375, + "success_rate.epoch.env.logic": 0.8311688311688312, + "success_rate.epoch.env.math": 0.953757225433526, + "success_rate.epoch.env.sat": 0.3333333333333333, + "success_rate.epoch.env.science": 0.7468354430379747, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8911449848157879, + "success_rate.epoch.global": 0.8578595317725752, + "success_rate.window.env.ded": 0.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.75, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9888572386058981, + "tokens_p.mean_below_band": 8.307397365570068e-07, + "tokens_p.mean_in_band": 0.48441972805343514, + "tokens_rate.above_band": 0.7393458870168483, + "tokens_rate.below_band": 0.0009910802775024777, + "tokens_rate.in_band": 0.25966303270564917 + }, + { + "epoch": 2.065402641670217, + "grad_norm": 156.47234223738994, + "learning_rate": 3.5919416770713567e-07, + "loss": 0.2071, + "step": 9695, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9375, + "success_rate.epoch.env.logic": 0.8311688311688312, + "success_rate.epoch.env.math": 0.9540229885057471, + "success_rate.epoch.env.sat": 0.3333333333333333, + "success_rate.epoch.env.science": 0.7551020408163265, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8919206539840218, + "success_rate.epoch.global": 0.8601973684210527, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9897388059701493, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 2.066467831273967, + "grad_norm": 66.73295862149546, + "learning_rate": 3.591600271888857e-07, + "loss": 0.2947, + "step": 9700, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9375, + "success_rate.epoch.env.logic": 0.8395061728395061, + "success_rate.epoch.env.math": 0.9542857142857143, + "success_rate.epoch.env.sat": 0.3333333333333333, + "success_rate.epoch.env.science": 0.7590361445783133, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8930601240942607, + "success_rate.epoch.global": 0.8622366288492707, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9962993421052632, + "tokens_p.mean_in_band": 0.890625, + "tokens_rate.above_band": 0.9934640522875817, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.006535947712418301 + }, + { + "epoch": 2.067533020877716, + "grad_norm": 68.04145636664377, + "learning_rate": 3.5912588728280177e-07, + "loss": 0.0881, + "step": 9705, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9375, + "success_rate.epoch.env.logic": 0.8395061728395061, + "success_rate.epoch.env.math": 0.9553072625698324, + "success_rate.epoch.env.sat": 0.3333333333333333, + "success_rate.epoch.env.science": 0.7598425196850394, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8932262989479738, + "success_rate.epoch.global": 0.8628389154704944, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9929552023121387, + "tokens_p.mean_in_band": 0.6354166666666666, + "tokens_rate.above_band": 0.9505494505494505, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.04945054945054945 + }, + { + "epoch": 2.0685982104814657, + "grad_norm": 160.84378379603496, + "learning_rate": 3.5909174801376493e-07, + "loss": 0.2373, + "step": 9710, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9375, + "success_rate.epoch.env.logic": 0.8414634146341463, + "success_rate.epoch.env.math": 0.9567567567567568, + "success_rate.epoch.env.sat": 0.3333333333333333, + "success_rate.epoch.env.science": 0.7626459143968871, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8937908562837386, + "success_rate.epoch.global": 0.8649921507064364, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.99, + "tokens_p.mean_in_band": 0.80859375, + "tokens_rate.above_band": 0.9803921568627451, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0196078431372549 + }, + { + "epoch": 2.069663400085215, + "grad_norm": 126.90666648615168, + "learning_rate": 3.590576094066556e-07, + "loss": 0.2349, + "step": 9715, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9375, + "success_rate.epoch.env.logic": 0.8433734939759037, + "success_rate.epoch.env.math": 0.9574468085106383, + "success_rate.epoch.env.sat": 0.3333333333333333, + "success_rate.epoch.env.science": 0.7615384615384615, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8939265543053033, + "success_rate.epoch.global": 0.865533230293663, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9967072564612326, + "tokens_p.mean_in_band": 0.583984375, + "tokens_rate.above_band": 0.9843444227005871, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.015655577299412915 + }, + { + "epoch": 2.0707285896889647, + "grad_norm": 258.67673619265275, + "learning_rate": 3.5902347148635376e-07, + "loss": 0.1615, + "step": 9720, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9375, + "success_rate.epoch.env.logic": 0.8452380952380952, + "success_rate.epoch.env.math": 0.9576719576719577, + "success_rate.epoch.env.sat": 0.3333333333333333, + "success_rate.epoch.env.science": 0.7565543071161048, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8936634266690447, + "success_rate.epoch.global": 0.863013698630137, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5714285714285714, + "success_rate.window.env_macro_mean": 0.8928571428571428, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9955357142857143, + "tokens_p.mean_in_band": 0.6501953125, + "tokens_rate.above_band": 0.9224806201550387, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.07751937984496124 + }, + { + "epoch": 2.071793779292714, + "grad_norm": 51.018949507965054, + "learning_rate": 3.589893342777389e-07, + "loss": 0.27, + "step": 9725, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9375, + "success_rate.epoch.env.logic": 0.8409090909090909, + "success_rate.epoch.env.math": 0.9581151832460733, + "success_rate.epoch.env.sat": 0.3333333333333333, + "success_rate.epoch.env.science": 0.7555555555555555, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8932193784585504, + "success_rate.epoch.global": 0.8618618618618619, + "success_rate.window.env.logic": 0.75, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.8055555555555555, + "success_rate.window.global": 0.7777777777777778, + "tokens_p.mean_above_band": 0.9993043664383562, + "tokens_p.mean_in_band": 0.4781901041666667, + "tokens_rate.above_band": 0.9605263157894737, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.039473684210526314 + }, + { + "epoch": 2.0728589688964636, + "grad_norm": 34.658069142671394, + "learning_rate": 3.5895519780568993e-07, + "loss": 0.2078, + "step": 9730, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9393939393939394, + "success_rate.epoch.env.logic": 0.8461538461538461, + "success_rate.epoch.env.math": 0.9591836734693877, + "success_rate.epoch.env.sat": 0.3333333333333333, + "success_rate.epoch.env.science": 0.7555555555555555, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8939654861732784, + "success_rate.epoch.global": 0.863905325443787, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9965363300492611, + "tokens_p.mean_in_band": 0.609375, + "tokens_rate.above_band": 0.9975429975429976, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.002457002457002457 + }, + { + "epoch": 2.073924158500213, + "grad_norm": 128.17836059182127, + "learning_rate": 3.589210620950853e-07, + "loss": 0.3631, + "step": 9735, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9393939393939394, + "success_rate.epoch.env.logic": 0.8478260869565217, + "success_rate.epoch.env.math": 0.9595959595959596, + "success_rate.epoch.env.sat": 0.3333333333333333, + "success_rate.epoch.env.science": 0.7536231884057971, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8939793188805047, + "success_rate.epoch.global": 0.8629737609329446, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9879807692307693, + "tokens_p.mean_in_band": 0.59453125, + "tokens_rate.above_band": 0.9122807017543859, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.08771929824561403 + }, + { + "epoch": 2.0749893481039625, + "grad_norm": 403.3371051957635, + "learning_rate": 3.588869271708029e-07, + "loss": 0.0885, + "step": 9740, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9411764705882353, + "success_rate.epoch.env.logic": 0.8494623655913979, + "success_rate.epoch.env.math": 0.9601990049751243, + "success_rate.epoch.env.sat": 0.3333333333333333, + "success_rate.epoch.env.science": 0.7571428571428571, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8946649119664499, + "success_rate.epoch.global": 0.8649425287356322, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9996374709976799, + "tokens_p.mean_in_band": 0.8515625, + "tokens_rate.above_band": 0.9953810623556582, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.004618937644341801 + }, + { + "epoch": 2.076054537707712, + "grad_norm": 31.23771291370937, + "learning_rate": 3.5885279305772e-07, + "loss": 0.2103, + "step": 9745, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9411764705882353, + "success_rate.epoch.env.logic": 0.851063829787234, + "success_rate.epoch.env.math": 0.9603960396039604, + "success_rate.epoch.env.sat": 0.2, + "success_rate.epoch.env.science": 0.7614035087719299, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.883094531704669, + "success_rate.epoch.global": 0.8640226628895185, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9979020979020979, + "tokens_p.mean_in_band": 0.6794181034482759, + "tokens_rate.above_band": 0.9610215053763441, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.038978494623655914 + }, + { + "epoch": 2.0771197273114614, + "grad_norm": 142.84453153763224, + "learning_rate": 3.588186597807132e-07, + "loss": 0.1762, + "step": 9750, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9411764705882353, + "success_rate.epoch.env.logic": 0.8541666666666666, + "success_rate.epoch.env.math": 0.9607843137254902, + "success_rate.epoch.env.sat": 0.2, + "success_rate.epoch.env.science": 0.7647058823529411, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8837121212121212, + "success_rate.epoch.global": 0.8659217877094972, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9965193704600485, + "tokens_p.mean_in_band": 0.75, + "tokens_rate.above_band": 0.9975845410628019, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0024154589371980675 + }, + { + "epoch": 2.078184916915211, + "grad_norm": 212.249322311458, + "learning_rate": 3.587845273646587e-07, + "loss": 0.1883, + "step": 9755, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9444444444444444, + "success_rate.epoch.env.logic": 0.8556701030927835, + "success_rate.epoch.env.math": 0.9609756097560975, + "success_rate.epoch.env.sat": 0.2, + "success_rate.epoch.env.science": 0.7663230240549829, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8843102892134825, + "success_rate.epoch.global": 0.8677685950413223, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9944381598793364, + "tokens_p.mean_in_band": 0.69140625, + "tokens_rate.above_band": 0.9977426636568849, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.002257336343115124 + }, + { + "epoch": 2.0792501065189604, + "grad_norm": 33.88421265661674, + "learning_rate": 3.587503958344319e-07, + "loss": 0.1871, + "step": 9760, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9459459459459459, + "success_rate.epoch.env.logic": 0.8556701030927835, + "success_rate.epoch.env.math": 0.9617224880382775, + "success_rate.epoch.env.sat": 0.2, + "success_rate.epoch.env.science": 0.7679180887372014, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.884659693255837, + "success_rate.epoch.global": 0.8695652173913043, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9989762090483619, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 2.08031529612271, + "grad_norm": 275.80249510225406, + "learning_rate": 3.5871626521490764e-07, + "loss": 0.2488, + "step": 9765, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9473684210526315, + "success_rate.epoch.env.logic": 0.8556701030927835, + "success_rate.epoch.env.math": 0.9620853080568721, + "success_rate.epoch.env.sat": 0.2, + "success_rate.epoch.env.science": 0.7676767676767676, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8848000545344594, + "success_rate.epoch.global": 0.8699731903485255, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9583333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9992320415879017, + "tokens_p.mean_in_band": 0.58359375, + "tokens_rate.above_band": 0.9906367041198502, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.009363295880149813 + }, + { + "epoch": 2.0813804857264593, + "grad_norm": 37.01370628622843, + "learning_rate": 3.5868213553096006e-07, + "loss": 0.1954, + "step": 9770, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9473684210526315, + "success_rate.epoch.env.logic": 0.8484848484848485, + "success_rate.epoch.env.math": 0.9627906976744186, + "success_rate.epoch.env.sat": 0.2, + "success_rate.epoch.env.science": 0.770764119601329, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8844916442557479, + "success_rate.epoch.global": 0.8703703703703703, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8333333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9940580985915493, + "tokens_p.mean_in_band": 0.2216796875, + "tokens_rate.above_band": 0.993006993006993, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.006993006993006993 + }, + { + "epoch": 2.082445675330209, + "grad_norm": 388.79164726042114, + "learning_rate": 3.586480068074627e-07, + "loss": 0.2549, + "step": 9775, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9473684210526315, + "success_rate.epoch.env.logic": 0.8529411764705882, + "success_rate.epoch.env.math": 0.9634703196347032, + "success_rate.epoch.env.sat": 0.2, + "success_rate.epoch.env.science": 0.7722772277227723, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8850961040800631, + "success_rate.epoch.global": 0.8720626631853786, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.99609375, + "tokens_p.mean_in_band": 0.853515625, + "tokens_rate.above_band": 0.9565217391304348, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.043478260869565216 + }, + { + "epoch": 2.083510864933958, + "grad_norm": 99.21320677865037, + "learning_rate": 3.586138790692882e-07, + "loss": 0.2763, + "step": 9780, + "success_rate.epoch.env.abd": 1.0, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9473684210526315, + "success_rate.epoch.env.logic": 0.8529411764705882, + "success_rate.epoch.env.math": 0.9641255605381166, + "success_rate.epoch.env.sat": 0.2, + "success_rate.epoch.env.science": 0.7719869706840391, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8851292844313977, + "success_rate.epoch.global": 0.8724226804123711, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9375, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9999096820809249, + "tokens_p.mean_in_band": 0.642578125, + "tokens_rate.above_band": 0.9942528735632183, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.005747126436781609 + }, + { + "epoch": 2.084576054537708, + "grad_norm": 145.39397899274596, + "learning_rate": 3.5857975234130867e-07, + "loss": 0.2596, + "step": 9785, + "success_rate.epoch.env.abd": 0.9767441860465116, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9487179487179487, + "success_rate.epoch.env.logic": 0.8543689320388349, + "success_rate.epoch.env.math": 0.9646017699115044, + "success_rate.epoch.env.sat": 0.2, + "success_rate.epoch.env.science": 0.7741935483870968, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8835114895547178, + "success_rate.epoch.global": 0.8727735368956743, + "success_rate.window.env.abd": 0.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8333333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9942540322580645, + "tokens_p.mean_below_band": 3.741847144232856e-07, + "tokens_p.mean_in_band": 0.10090948462537895, + "tokens_rate.above_band": 0.21102791014295438, + "tokens_rate.below_band": 0.0030633083730428863, + "tokens_rate.in_band": 0.7859087814840027 + }, + { + "epoch": 2.085641244141457, + "grad_norm": 407.73000929703767, + "learning_rate": 3.5854562664839547e-07, + "loss": 0.2151, + "step": 9790, + "success_rate.epoch.env.abd": 0.9772727272727273, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9487179487179487, + "success_rate.epoch.env.logic": 0.8584905660377359, + "success_rate.epoch.env.math": 0.9647577092511013, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.7770700636942676, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8811796074218587, + "success_rate.epoch.global": 0.8731155778894473, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9948308270676691, + "tokens_p.mean_in_band": 0.6607142857142857, + "tokens_rate.above_band": 0.9047619047619048, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.09523809523809523 + }, + { + "epoch": 2.086706433745207, + "grad_norm": 210.247580059442, + "learning_rate": 3.5851150201541906e-07, + "loss": 0.1942, + "step": 9795, + "success_rate.epoch.env.abd": 0.9772727272727273, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9487179487179487, + "success_rate.epoch.env.logic": 0.8598130841121495, + "success_rate.epoch.env.math": 0.9650655021834061, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.778125, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8814237208138999, + "success_rate.epoch.global": 0.8734491315136477, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env_macro_mean": 0.9583333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9982244318181818, + "tokens_p.mean_in_band": 0.709765625, + "tokens_rate.above_band": 0.9723756906077348, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.027624309392265192 + }, + { + "epoch": 2.087771623348956, + "grad_norm": 91.59689440486994, + "learning_rate": 3.5847737846724935e-07, + "loss": 0.2954, + "step": 9800, + "success_rate.epoch.env.abd": 0.9772727272727273, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9487179487179487, + "success_rate.epoch.env.logic": 0.8598130841121495, + "success_rate.epoch.env.math": 0.9658119658119658, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.7777777777777778, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8814600154872033, + "success_rate.epoch.global": 0.8737745098039216, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9978966346153846, + "tokens_p.mean_in_band": 0.609375, + "tokens_rate.above_band": 0.9811320754716981, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.018867924528301886 + }, + { + "epoch": 2.0888368129527057, + "grad_norm": 53.24308318000804, + "learning_rate": 3.584432560287552e-07, + "loss": 0.2765, + "step": 9805, + "success_rate.epoch.env.abd": 0.9772727272727273, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9487179487179487, + "success_rate.epoch.env.logic": 0.8611111111111112, + "success_rate.epoch.env.math": 0.9661016949152542, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.7784615384615384, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8816665170132043, + "success_rate.epoch.global": 0.8753026634382567, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9983013775130305, + "tokens_p.mean_in_band": 0.53125, + "tokens_rate.above_band": 0.9992559523809523, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.000744047619047619 + }, + { + "epoch": 2.089902002556455, + "grad_norm": 121.81374819834977, + "learning_rate": 3.584091347248049e-07, + "loss": 0.2179, + "step": 9810, + "success_rate.epoch.env.abd": 0.9777777777777777, + "success_rate.epoch.env.agentgym:alfworld": 1.0, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9512195121951219, + "success_rate.epoch.env.logic": 0.8611111111111112, + "success_rate.epoch.env.math": 0.9665271966527197, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.7774390243902439, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8818855717085128, + "success_rate.epoch.global": 0.8755980861244019, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 1.0002246732026143, + "tokens_p.mean_in_band": 0.6015625, + "tokens_rate.above_band": 0.9986945169712794, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0013054830287206266 + }, + { + "epoch": 2.0909671921602047, + "grad_norm": 41.60592812384703, + "learning_rate": 3.583750145802658e-07, + "loss": 0.3349, + "step": 9815, + "success_rate.epoch.env.abd": 0.9787234042553191, + "success_rate.epoch.env.agentgym:alfworld": 0.96, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9512195121951219, + "success_rate.epoch.env.logic": 0.8611111111111112, + "success_rate.epoch.env.math": 0.9666666666666667, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.7801204819277109, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8785916220747815, + "success_rate.epoch.global": 0.875886524822695, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 0.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8333333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9990433673469388, + "tokens_p.mean_in_band": 0.65234375, + "tokens_rate.above_band": 0.9865771812080537, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.013422818791946308 + }, + { + "epoch": 2.092032381763954, + "grad_norm": 96.57248052582246, + "learning_rate": 3.583408956200044e-07, + "loss": 0.1948, + "step": 9820, + "success_rate.epoch.env.abd": 0.9787234042553191, + "success_rate.epoch.env.agentgym:alfworld": 0.96, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9534883720930233, + "success_rate.epoch.env.logic": 0.8623853211009175, + "success_rate.epoch.env.math": 0.9673469387755103, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.781437125748503, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8790952571490854, + "success_rate.epoch.global": 0.8773364485981309, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.997455636743215, + "tokens_p.mean_in_band": 0.87890625, + "tokens_rate.above_band": 0.9979166666666667, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0020833333333333333 + }, + { + "epoch": 2.0930975713677036, + "grad_norm": 73.38948293390685, + "learning_rate": 3.5830677786888634e-07, + "loss": 0.4176, + "step": 9825, + "success_rate.epoch.env.abd": 0.9787234042553191, + "success_rate.epoch.env.agentgym:alfworld": 0.9615384615384616, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9534883720930233, + "success_rate.epoch.env.logic": 0.8648648648648649, + "success_rate.epoch.env.math": 0.964, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.7797619047619048, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8790039703800219, + "success_rate.epoch.global": 0.8764434180138568, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.8, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.825, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9939563679245284, + "tokens_p.mean_in_band": 0.3995615641276042, + "tokens_rate.above_band": 0.9724770642201835, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.027522935779816515 + }, + { + "epoch": 2.094162760971453, + "grad_norm": 0.0, + "learning_rate": 3.5827266135177634e-07, + "loss": 0.2741, + "step": 9830, + "success_rate.epoch.env.abd": 0.9787234042553191, + "success_rate.epoch.env.agentgym:alfworld": 0.9615384615384616, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9545454545454546, + "success_rate.epoch.env.logic": 0.8648648648648649, + "success_rate.epoch.env.math": 0.9644268774703557, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.7807017543859649, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8792243167024624, + "success_rate.epoch.global": 0.8767123287671232, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env_macro_mean": 0.9444444444444445, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9951716738197425, + "tokens_p.mean_in_band": 0.7184244791666666, + "tokens_rate.above_band": 0.9748953974895398, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02510460251046025 + }, + { + "epoch": 2.0952279505752025, + "grad_norm": 197.33323881507908, + "learning_rate": 3.582385460935384e-07, + "loss": 0.2579, + "step": 9835, + "success_rate.epoch.env.abd": 0.9787234042553191, + "success_rate.epoch.env.agentgym:alfworld": 0.9629629629629629, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9545454545454546, + "success_rate.epoch.env.logic": 0.8648648648648649, + "success_rate.epoch.env.math": 0.96484375, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.7809798270893372, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8794169936713278, + "success_rate.epoch.global": 0.8769751693002258, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9971217105263158, + "tokens_p.mean_in_band": 0.39453125, + "tokens_rate.above_band": 0.9956331877729258, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.004366812227074236 + }, + { + "epoch": 2.0962931401789517, + "grad_norm": 141.31835519757158, + "learning_rate": 3.5820443211903545e-07, + "loss": 0.2315, + "step": 9840, + "success_rate.epoch.env.abd": 0.9787234042553191, + "success_rate.epoch.env.agentgym:alfworld": 0.9642857142857143, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9333333333333333, + "success_rate.epoch.env.logic": 0.8648648648648649, + "success_rate.epoch.env.math": 0.9652509652509652, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.7806267806267806, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8776137935712403, + "success_rate.epoch.global": 0.8761160714285714, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.75, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9924395161290323, + "tokens_p.mean_in_band": 0.6625, + "tokens_rate.above_band": 0.8920863309352518, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.1079136690647482 + }, + { + "epoch": 2.0973583297827014, + "grad_norm": 0.0, + "learning_rate": 3.581703194531294e-07, + "loss": 0.3037, + "step": 9845, + "success_rate.epoch.env.abd": 0.9787234042553191, + "success_rate.epoch.env.agentgym:alfworld": 0.9642857142857143, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9333333333333333, + "success_rate.epoch.env.logic": 0.8660714285714286, + "success_rate.epoch.env.math": 0.9653846153846154, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.7849162011173184, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8781255785103995, + "success_rate.epoch.global": 0.8774834437086093, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9930555555555556, + "tokens_p.mean_in_band": 0.8560267857142857, + "tokens_rate.above_band": 0.9625668449197861, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0374331550802139 + }, + { + "epoch": 2.0984235193864507, + "grad_norm": 78.01096507401617, + "learning_rate": 3.581362081206814e-07, + "loss": 0.2301, + "step": 9850, + "success_rate.epoch.env.abd": 0.9787234042553191, + "success_rate.epoch.env.agentgym:alfworld": 0.9642857142857143, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9347826086956522, + "success_rate.epoch.env.logic": 0.8672566371681416, + "success_rate.epoch.env.math": 0.9657794676806084, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.7845303867403315, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8783658986811304, + "success_rate.epoch.global": 0.8777292576419214, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9976924587588374, + "tokens_p.mean_in_band": 0.7083333333333334, + "tokens_rate.above_band": 0.9906614785992218, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.00933852140077821 + }, + { + "epoch": 2.0994887089902003, + "grad_norm": 103.19034151426482, + "learning_rate": 3.581020981465515e-07, + "loss": 0.3427, + "step": 9855, + "success_rate.epoch.env.abd": 0.9787234042553191, + "success_rate.epoch.env.agentgym:alfworld": 0.9642857142857143, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9347826086956522, + "success_rate.epoch.env.logic": 0.8672566371681416, + "success_rate.epoch.env.math": 0.9661654135338346, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.7859078590785907, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8785262094258107, + "success_rate.epoch.global": 0.8779697624190065, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8571428571428571, + "success_rate.window.env_macro_mean": 0.9285714285714286, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9921016483516484, + "tokens_p.mean_in_band": 0.7278645833333334, + "tokens_rate.above_band": 0.91, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.09 + }, + { + "epoch": 2.1005538985939496, + "grad_norm": 308.68351450125414, + "learning_rate": 3.5806798955559886e-07, + "loss": 0.2511, + "step": 9860, + "success_rate.epoch.env.abd": 0.9787234042553191, + "success_rate.epoch.env.agentgym:alfworld": 0.9333333333333333, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9347826086956522, + "success_rate.epoch.env.logic": 0.8695652173913043, + "success_rate.epoch.env.math": 0.9662921348314607, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.786096256684492, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8759508747143844, + "success_rate.epoch.global": 0.8771367521367521, + "success_rate.window.env.agentgym:alfworld": 0.5, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.825, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9994179799426934, + "tokens_p.mean_in_band": 0.49441964285714285, + "tokens_rate.above_band": 0.9900709219858156, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.009929078014184398 + }, + { + "epoch": 2.1016190881976993, + "grad_norm": 314.3046116266596, + "learning_rate": 3.5803388237268156e-07, + "loss": 0.2412, + "step": 9865, + "success_rate.epoch.env.abd": 0.9787234042553191, + "success_rate.epoch.env.agentgym:alfworld": 0.9333333333333333, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9375, + "success_rate.epoch.env.logic": 0.8695652173913043, + "success_rate.epoch.env.math": 0.9666666666666667, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.7857142857142857, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8761972340025068, + "success_rate.epoch.global": 0.8773784355179705, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9375, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.996099695585997, + "tokens_p.mean_in_band": 0.54453125, + "tokens_rate.above_band": 0.9924471299093656, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0075528700906344415 + }, + { + "epoch": 2.1026842778014485, + "grad_norm": 50.924108583305674, + "learning_rate": 3.5799977662265666e-07, + "loss": 0.258, + "step": 9870, + "success_rate.epoch.env.abd": 0.9791666666666666, + "success_rate.epoch.env.agentgym:alfworld": 0.9333333333333333, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9375, + "success_rate.epoch.env.logic": 0.8571428571428571, + "success_rate.epoch.env.math": 0.967032967032967, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.7842105263157895, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8750048197416619, + "success_rate.epoch.global": 0.8755230125523012, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.75, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 1.0000480030721965, + "tokens_p.mean_in_band": 0.5639105902777778, + "tokens_rate.above_band": 0.9730941704035875, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.026905829596412557 + }, + { + "epoch": 2.103749467405198, + "grad_norm": 84.85979172482334, + "learning_rate": 3.5796567233038016e-07, + "loss": 0.1677, + "step": 9875, + "success_rate.epoch.env.abd": 0.9791666666666666, + "success_rate.epoch.env.agentgym:alfworld": 0.9354838709677419, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9375, + "success_rate.epoch.env.logic": 0.8583333333333333, + "success_rate.epoch.env.math": 0.9676258992805755, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.7859007832898173, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8755161109277091, + "success_rate.epoch.global": 0.8768115942028986, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9977609034267912, + "tokens_p.mean_in_band": 0.68359375, + "tokens_rate.above_band": 0.9968944099378882, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.003105590062111801 + }, + { + "epoch": 2.1048146570089474, + "grad_norm": 129.0968070491486, + "learning_rate": 3.5793156952070705e-07, + "loss": 0.1709, + "step": 9880, + "success_rate.epoch.env.abd": 0.98, + "success_rate.epoch.env.agentgym:alfworld": 0.9354838709677419, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9375, + "success_rate.epoch.env.logic": 0.860655737704918, + "success_rate.epoch.env.math": 0.9678571428571429, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.7875647668393783, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8759752895487135, + "success_rate.epoch.global": 0.8780737704918032, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9978197674418605, + "tokens_p.mean_in_band": 0.8916015625, + "tokens_rate.above_band": 0.9772727272727273, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.022727272727272728 + }, + { + "epoch": 2.105879846612697, + "grad_norm": 78.38916540916124, + "learning_rate": 3.5789746821849127e-07, + "loss": 0.293, + "step": 9885, + "success_rate.epoch.env.abd": 0.98, + "success_rate.epoch.env.agentgym:alfworld": 0.9354838709677419, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9387755102040817, + "success_rate.epoch.env.logic": 0.864, + "success_rate.epoch.env.math": 0.9680851063829787, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.7846153846153846, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8761478671669867, + "success_rate.epoch.global": 0.8772819472616633, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9959112149532711, + "tokens_p.mean_in_band": 0.44921875, + "tokens_rate.above_band": 0.9304347826086956, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.06956521739130435 + }, + { + "epoch": 2.1069450362164464, + "grad_norm": 64.62336526775415, + "learning_rate": 3.5786336844858546e-07, + "loss": 0.2072, + "step": 9890, + "success_rate.epoch.env.abd": 0.9807692307692307, + "success_rate.epoch.env.agentgym:alfworld": 0.9354838709677419, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9387755102040817, + "success_rate.epoch.env.logic": 0.8650793650793651, + "success_rate.epoch.env.math": 0.9683098591549296, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.7848101265822784, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8763540572203902, + "success_rate.epoch.global": 0.8775100401606426, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9968913612565445, + "tokens_p.mean_in_band": 0.5032552083333334, + "tokens_rate.above_band": 0.9845360824742269, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.015463917525773196 + }, + { + "epoch": 2.108010225820196, + "grad_norm": 0.0, + "learning_rate": 3.5782927023584136e-07, + "loss": 0.2612, + "step": 9895, + "success_rate.epoch.env.abd": 0.9807692307692307, + "success_rate.epoch.env.agentgym:alfworld": 0.9354838709677419, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9387755102040817, + "success_rate.epoch.env.logic": 0.859375, + "success_rate.epoch.env.math": 0.9685314685314685, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.7880299251870324, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8761483338478382, + "success_rate.epoch.global": 0.8777335984095428, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8333333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9913563829787234, + "tokens_p.mean_in_band": 0.6927083333333334, + "tokens_rate.above_band": 0.912621359223301, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.08737864077669903 + }, + { + "epoch": 2.1090754154239453, + "grad_norm": 88.72607914915754, + "learning_rate": 3.5779517360510954e-07, + "loss": 0.4011, + "step": 9900, + "success_rate.epoch.env.abd": 0.9807692307692307, + "success_rate.epoch.env.agentgym:alfworld": 0.9375, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9423076923076923, + "success_rate.epoch.env.logic": 0.8604651162790697, + "success_rate.epoch.env.math": 0.96875, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.7866004962779156, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8766417456636887, + "success_rate.epoch.global": 0.8779527559055118, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9988386824324325, + "tokens_p.mean_in_band": 0.64990234375, + "tokens_rate.above_band": 0.9946236559139785, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.005376344086021506 + }, + { + "epoch": 2.110140605027695, + "grad_norm": 160.4988556227826, + "learning_rate": 3.577610785812394e-07, + "loss": 0.2467, + "step": 9905, + "success_rate.epoch.env.abd": 0.9811320754716981, + "success_rate.epoch.env.agentgym:alfworld": 0.9375, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9444444444444444, + "success_rate.epoch.env.logic": 0.8625954198473282, + "success_rate.epoch.env.math": 0.9689655172413794, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.7881773399014779, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.877225587597545, + "success_rate.epoch.global": 0.8791423001949318, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.997301479468599, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 2.111205794631444, + "grad_norm": 450.5338541198487, + "learning_rate": 3.57726985189079e-07, + "loss": 0.2826, + "step": 9910, + "success_rate.epoch.env.abd": 0.9811320754716981, + "success_rate.epoch.env.agentgym:alfworld": 0.9393939393939394, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9464285714285714, + "success_rate.epoch.env.logic": 0.8646616541353384, + "success_rate.epoch.env.math": 0.9691780821917808, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.7892156862745098, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8778796977784094, + "success_rate.epoch.global": 0.8803088803088803, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9997320732657833, + "tokens_p.mean_in_band": 0.671875, + "tokens_rate.above_band": 0.9992211838006231, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.000778816199376947 + }, + { + "epoch": 2.112270984235194, + "grad_norm": 23.190480883870144, + "learning_rate": 3.576928934534756e-07, + "loss": 0.2372, + "step": 9915, + "success_rate.epoch.env.abd": 0.9811320754716981, + "success_rate.epoch.env.agentgym:alfworld": 0.9428571428571428, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9464285714285714, + "success_rate.epoch.env.logic": 0.8656716417910447, + "success_rate.epoch.env.math": 0.9693877551020408, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.7912621359223301, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8784914535672268, + "success_rate.epoch.global": 0.8814531548757171, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9973997028231798, + "tokens_p.mean_in_band": 0.8984375, + "tokens_rate.above_band": 0.9985163204747775, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.001483679525222552 + }, + { + "epoch": 2.113336173838943, + "grad_norm": 69.2132976853282, + "learning_rate": 3.5765880339927475e-07, + "loss": 0.3353, + "step": 9920, + "success_rate.epoch.env.abd": 0.9811320754716981, + "success_rate.epoch.env.agentgym:alfworld": 0.9428571428571428, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9464285714285714, + "success_rate.epoch.env.logic": 0.8666666666666667, + "success_rate.epoch.env.math": 0.9693877551020408, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.7933491686460807, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8787716406217151, + "success_rate.epoch.global": 0.8816287878787878, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.science": 0.8888888888888888, + "success_rate.window.env_macro_mean": 0.9444444444444444, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9933035714285714, + "tokens_p.mean_in_band": 0.4966517857142857, + "tokens_rate.above_band": 0.9333333333333333, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.06666666666666667 + }, + { + "epoch": 2.114401363442693, + "grad_norm": 46.075415051842846, + "learning_rate": 3.5762471505132125e-07, + "loss": 0.2824, + "step": 9925, + "success_rate.epoch.env.abd": 0.9818181818181818, + "success_rate.epoch.env.agentgym:alfworld": 0.9428571428571428, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9473684210526315, + "success_rate.epoch.env.logic": 0.8676470588235294, + "success_rate.epoch.env.math": 0.9693877551020408, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.7868852459016393, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8784209520201665, + "success_rate.epoch.global": 0.8789868667917449, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.science": 0.3333333333333333, + "success_rate.window.env_macro_mean": 0.8333333333333333, + "success_rate.window.global": 0.6, + "tokens_p.mean_above_band": 0.9966744087837838, + "tokens_p.mean_in_band": 0.4152644230769231, + "tokens_rate.above_band": 0.9579288025889967, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.042071197411003236 + }, + { + "epoch": 2.115466553046442, + "grad_norm": 76.85179740366831, + "learning_rate": 3.575906284344583e-07, + "loss": 0.2347, + "step": 9930, + "success_rate.epoch.env.abd": 0.9818181818181818, + "success_rate.epoch.env.agentgym:alfworld": 0.9444444444444444, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9491525423728814, + "success_rate.epoch.env.logic": 0.8686131386861314, + "success_rate.epoch.env.math": 0.9697986577181208, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.7878787878787878, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8789429472350193, + "success_rate.epoch.global": 0.8801115241635687, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9995449029126213, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 2.1165317426501917, + "grad_norm": 452.0678086516589, + "learning_rate": 3.57556543573528e-07, + "loss": 0.3612, + "step": 9935, + "success_rate.epoch.env.abd": 0.9818181818181818, + "success_rate.epoch.env.agentgym:alfworld": 0.9444444444444444, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.95, + "success_rate.epoch.env.logic": 0.8714285714285714, + "success_rate.epoch.env.math": 0.9701986754966887, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.7878787878787878, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8793123025212128, + "success_rate.epoch.global": 0.8812154696132597, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9997874149659864, + "tokens_p.mean_in_band": 0.890625, + "tokens_rate.above_band": 0.9988674971687429, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0011325028312570782 + }, + { + "epoch": 2.1175969322539414, + "grad_norm": 158.39559236936503, + "learning_rate": 3.5752246049337125e-07, + "loss": 0.2274, + "step": 9940, + "success_rate.epoch.env.abd": 0.9821428571428571, + "success_rate.epoch.env.agentgym:alfworld": 0.9444444444444444, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.95, + "success_rate.epoch.env.logic": 0.8741258741258742, + "success_rate.epoch.env.math": 0.9702970297029703, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.789838337182448, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8776096077687033, + "success_rate.epoch.global": 0.8813868613138686, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9965753424657534, + "tokens_p.mean_in_band": 0.6015625, + "tokens_rate.above_band": 0.9647577092511013, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03524229074889868 + }, + { + "epoch": 2.1186621218576907, + "grad_norm": 78.70995162916155, + "learning_rate": 3.5748837921882747e-07, + "loss": 0.2829, + "step": 9945, + "success_rate.epoch.env.abd": 0.9824561403508771, + "success_rate.epoch.env.agentgym:alfworld": 0.9444444444444444, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9508196721311475, + "success_rate.epoch.env.logic": 0.8758620689655172, + "success_rate.epoch.env.math": 0.9707792207792207, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7903225806451613, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8779582972885009, + "success_rate.epoch.global": 0.8824593128390597, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9973821989528796, + "tokens_p.mean_in_band": 0.8515625, + "tokens_rate.above_band": 0.9982578397212544, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0017421602787456446 + }, + { + "epoch": 2.1197273114614403, + "grad_norm": 115.78725251556781, + "learning_rate": 3.5745429977473487e-07, + "loss": 0.3258, + "step": 9950, + "success_rate.epoch.env.abd": 0.9827586206896551, + "success_rate.epoch.env.agentgym:alfworld": 0.9444444444444444, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9508196721311475, + "success_rate.epoch.env.logic": 0.8775510204081632, + "success_rate.epoch.env.math": 0.9709677419354839, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7881548974943052, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8779594127236674, + "success_rate.epoch.global": 0.8817204301075269, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6, + "success_rate.window.env_macro_mean": 0.9, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.994140625, + "tokens_p.mean_in_band": 0.466796875, + "tokens_rate.above_band": 0.975609756097561, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.024390243902439025 + }, + { + "epoch": 2.1207925010651896, + "grad_norm": 63.8070756453753, + "learning_rate": 3.574202221859303e-07, + "loss": 0.2211, + "step": 9955, + "success_rate.epoch.env.abd": 0.9827586206896551, + "success_rate.epoch.env.agentgym:alfworld": 0.9459459459459459, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9508196721311475, + "success_rate.epoch.env.logic": 0.8733333333333333, + "success_rate.epoch.env.math": 0.9710610932475884, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7891156462585034, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8778083140421196, + "success_rate.epoch.global": 0.8818827708703375, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 0.6666666666666666, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 0.9444444444444443, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9973290598290598, + "tokens_p.mean_in_band": 0.7256433823529411, + "tokens_rate.above_band": 0.9856781802864364, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.014321819713563605 + }, + { + "epoch": 2.1218576906689393, + "grad_norm": 195.56920867765484, + "learning_rate": 3.573861464772492e-07, + "loss": 0.3186, + "step": 9960, + "success_rate.epoch.env.abd": 0.9830508474576272, + "success_rate.epoch.env.agentgym:alfworld": 0.9473684210526315, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9508196721311475, + "success_rate.epoch.env.logic": 0.8741721854304636, + "success_rate.epoch.env.math": 0.9714285714285714, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7882882882882883, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8779986480587157, + "success_rate.epoch.global": 0.8820422535211268, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9966104497354498, + "tokens_p.mean_in_band": 0.73203125, + "tokens_rate.above_band": 0.9869451697127938, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.013054830287206266 + }, + { + "epoch": 2.1229228802726885, + "grad_norm": 119.32577809880027, + "learning_rate": 3.573520726735258e-07, + "loss": 0.265, + "step": 9965, + "success_rate.epoch.env.abd": 0.9833333333333333, + "success_rate.epoch.env.agentgym:alfworld": 0.9487179487179487, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9508196721311475, + "success_rate.epoch.env.logic": 0.875, + "success_rate.epoch.env.math": 0.9715189873417721, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7884187082405345, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.878242344783807, + "success_rate.epoch.global": 0.8821989528795812, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.9666666666666667, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9983742774566474, + "tokens_p.mean_in_band": 0.6294642857142857, + "tokens_rate.above_band": 0.9801699716713881, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.019830028328611898 + }, + { + "epoch": 2.123988069876438, + "grad_norm": 36.09824604465382, + "learning_rate": 3.573180007995928e-07, + "loss": 0.0905, + "step": 9970, + "success_rate.epoch.env.abd": 0.9833333333333333, + "success_rate.epoch.env.agentgym:alfworld": 0.9487179487179487, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9508196721311475, + "success_rate.epoch.env.logic": 0.8774193548387097, + "success_rate.epoch.env.math": 0.9719626168224299, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7888888888888889, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8785453597808729, + "success_rate.epoch.global": 0.8832179930795848, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9993206521739131, + "tokens_p.mean_in_band": 0.8671875, + "tokens_rate.above_band": 0.9928057553956835, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.007194244604316547 + }, + { + "epoch": 2.1250532594801874, + "grad_norm": 87.4942570720097, + "learning_rate": 3.5728393088028163e-07, + "loss": 0.3072, + "step": 9975, + "success_rate.epoch.env.abd": 0.9833333333333333, + "success_rate.epoch.env.agentgym:alfworld": 0.9487179487179487, + "success_rate.epoch.env.agentgym:sciworld": 1.0, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9508196721311475, + "success_rate.epoch.env.logic": 0.8782051282051282, + "success_rate.epoch.env.math": 0.9691358024691358, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7912087912087912, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.878570710811148, + "success_rate.epoch.global": 0.8833619210977701, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.6666666666666666, + "success_rate.window.env.science": 1.0, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 1.0002414459161149, + "tokens_p.mean_in_band": 0.32734375, + "tokens_rate.above_band": 0.9945115257958288, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.005488474204171241 + }, + { + "epoch": 2.126118449083937, + "grad_norm": 120.08776235586902, + "learning_rate": 3.57249862940422e-07, + "loss": 0.2397, + "step": 9980, + "success_rate.epoch.env.abd": 0.9833333333333333, + "success_rate.epoch.env.agentgym:alfworld": 0.9487179487179487, + "success_rate.epoch.env.agentgym:sciworld": 0.9722222222222222, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9516129032258065, + "success_rate.epoch.env.logic": 0.879746835443038, + "success_rate.epoch.env.math": 0.9694189602446484, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7925764192139738, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8764077968416466, + "success_rate.epoch.global": 0.8835034013605442, + "success_rate.window.env.agentgym:sciworld": 0.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9978777066929134, + "tokens_p.mean_below_band": 5.20230969414115e-10, + "tokens_p.mean_in_band": 0.8080357142857143, + "tokens_rate.above_band": 0.9921875, + "tokens_rate.below_band": 0.0009765625, + "tokens_rate.in_band": 0.0068359375 + }, + { + "epoch": 2.1271836386876863, + "grad_norm": 28.93575935653819, + "learning_rate": 3.5721579700484256e-07, + "loss": 0.252, + "step": 9985, + "success_rate.epoch.env.abd": 0.9833333333333333, + "success_rate.epoch.env.agentgym:alfworld": 0.9487179487179487, + "success_rate.epoch.env.agentgym:sciworld": 0.9722222222222222, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9523809523809523, + "success_rate.epoch.env.logic": 0.8819875776397516, + "success_rate.epoch.env.math": 0.9697885196374623, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7934782608695652, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8767969052416708, + "success_rate.epoch.global": 0.8844856661045531, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 1.0, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 2.128248828291436, + "grad_norm": 102.13109463315611, + "learning_rate": 3.571817330983703e-07, + "loss": 0.3414, + "step": 9990, + "success_rate.epoch.env.abd": 0.9833333333333333, + "success_rate.epoch.env.agentgym:alfworld": 0.9487179487179487, + "success_rate.epoch.env.agentgym:sciworld": 0.9722222222222222, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9523809523809523, + "success_rate.epoch.env.logic": 0.8827160493827161, + "success_rate.epoch.env.math": 0.9700598802395209, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7939914163090128, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8769344495857134, + "success_rate.epoch.global": 0.8846153846153846, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env_macro_mean": 0.9444444444444445, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9953703703703703, + "tokens_p.mean_in_band": 0.5611979166666666, + "tokens_rate.above_band": 0.972972972972973, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02702702702702703 + }, + { + "epoch": 2.1293140178951853, + "grad_norm": 123.16740415066614, + "learning_rate": 3.5714767124583063e-07, + "loss": 0.2379, + "step": 9995, + "success_rate.epoch.env.abd": 0.9836065573770492, + "success_rate.epoch.env.agentgym:alfworld": 0.9487179487179487, + "success_rate.epoch.env.agentgym:sciworld": 0.9722222222222222, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9523809523809523, + "success_rate.epoch.env.logic": 0.8841463414634146, + "success_rate.epoch.env.math": 0.9703264094955489, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7914893617021277, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8768860851105824, + "success_rate.epoch.global": 0.8839137645107794, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9970079787234043, + "tokens_p.mean_in_band": 0.5575284090909091, + "tokens_rate.above_band": 0.8103448275862069, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.1896551724137931 + }, + { + "epoch": 2.130379207498935, + "grad_norm": 256.25816493493846, + "learning_rate": 3.5711361147204767e-07, + "loss": 0.2942, + "step": 10000, + "success_rate.epoch.env.abd": 0.9841269841269841, + "success_rate.epoch.env.agentgym:alfworld": 0.9487179487179487, + "success_rate.epoch.env.agentgym:sciworld": 0.9722222222222222, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.953125, + "success_rate.epoch.env.logic": 0.8848484848484849, + "success_rate.epoch.env.math": 0.9705014749262537, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7932489451476793, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8772407457133379, + "success_rate.epoch.global": 0.8848684210526315, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.996654175588865, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 2.131444397102684, + "grad_norm": 77.31242397726325, + "learning_rate": 3.570795538018439e-07, + "loss": 0.3695, + "step": 10005, + "success_rate.epoch.env.abd": 0.984375, + "success_rate.epoch.env.agentgym:alfworld": 0.9487179487179487, + "success_rate.epoch.env.agentgym:sciworld": 0.9722222222222222, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.953125, + "success_rate.epoch.env.logic": 0.8855421686746988, + "success_rate.epoch.env.math": 0.9710144927536232, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7941176470588235, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8774519656622236, + "success_rate.epoch.global": 0.8858075040783034, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9953271028037384, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 2.132509586706434, + "grad_norm": 88.03783538102049, + "learning_rate": 3.570454982600404e-07, + "loss": 0.2477, + "step": 10010, + "success_rate.epoch.env.abd": 0.984375, + "success_rate.epoch.env.agentgym:alfworld": 0.9487179487179487, + "success_rate.epoch.env.agentgym:sciworld": 0.9722222222222222, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.953125, + "success_rate.epoch.env.logic": 0.8869047619047619, + "success_rate.epoch.env.math": 0.9712643678160919, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7958333333333333, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8777545251683182, + "success_rate.epoch.global": 0.8867313915857605, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9978373702422145, + "tokens_p.mean_in_band": 0.79296875, + "tokens_rate.above_band": 0.996551724137931, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0034482758620689655 + }, + { + "epoch": 2.133574776310183, + "grad_norm": 44.934618978991544, + "learning_rate": 3.5701144487145644e-07, + "loss": 0.2371, + "step": 10015, + "success_rate.epoch.env.abd": 0.984375, + "success_rate.epoch.env.agentgym:alfworld": 0.95, + "success_rate.epoch.env.agentgym:sciworld": 0.9722222222222222, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.953125, + "success_rate.epoch.env.logic": 0.888235294117647, + "success_rate.epoch.env.math": 0.9713467048710601, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.7942386831275721, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8778545497450586, + "success_rate.epoch.global": 0.8860353130016051, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9974609375, + "tokens_p.mean_in_band": 0.6341145833333334, + "tokens_rate.above_band": 0.9467455621301775, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.05325443786982249 + }, + { + "epoch": 2.134639965913933, + "grad_norm": 35.167712395122784, + "learning_rate": 3.569773936609101e-07, + "loss": 0.2102, + "step": 10020, + "success_rate.epoch.env.abd": 0.984375, + "success_rate.epoch.env.agentgym:alfworld": 0.9523809523809523, + "success_rate.epoch.env.agentgym:sciworld": 0.9722222222222222, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9538461538461539, + "success_rate.epoch.env.logic": 0.8888888888888888, + "success_rate.epoch.env.math": 0.9715099715099715, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.7955010224948875, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8767022010311888, + "success_rate.epoch.global": 0.8861464968152867, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8333333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9969457687723481, + "tokens_p.mean_in_band": 0.5036892361111112, + "tokens_rate.above_band": 0.9789964994165694, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.021003500583430573 + }, + { + "epoch": 2.135705155517682, + "grad_norm": 100.36409304806315, + "learning_rate": 3.569433446532175e-07, + "loss": 0.319, + "step": 10025, + "success_rate.epoch.env.abd": 0.984375, + "success_rate.epoch.env.agentgym:alfworld": 0.9523809523809523, + "success_rate.epoch.env.agentgym:sciworld": 0.9722222222222222, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9538461538461539, + "success_rate.epoch.env.logic": 0.8895348837209303, + "success_rate.epoch.env.math": 0.971830985915493, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.7955465587044535, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8767942506172914, + "success_rate.epoch.global": 0.8862559241706162, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.994375, + "tokens_p.mean_in_band": 0.6436941964285714, + "tokens_rate.above_band": 0.9345794392523364, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.06542056074766354 + }, + { + "epoch": 2.1367703451214317, + "grad_norm": 190.31628250345724, + "learning_rate": 3.569092978731933e-07, + "loss": 0.2563, + "step": 10030, + "success_rate.epoch.env.abd": 0.9846153846153847, + "success_rate.epoch.env.agentgym:alfworld": 0.9523809523809523, + "success_rate.epoch.env.agentgym:sciworld": 0.9722222222222222, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9545454545454546, + "success_rate.epoch.env.logic": 0.8926553672316384, + "success_rate.epoch.env.math": 0.971830985915493, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.7943548387096774, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8770550186928022, + "success_rate.epoch.global": 0.8863636363636364, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.9, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9998180494905385, + "tokens_p.mean_in_band": 0.466796875, + "tokens_rate.above_band": 0.9970972423802612, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.002902757619738752 + }, + { + "epoch": 2.137835534725181, + "grad_norm": 147.1014082681251, + "learning_rate": 3.5687525334565063e-07, + "loss": 0.2763, + "step": 10035, + "success_rate.epoch.env.abd": 0.9846153846153847, + "success_rate.epoch.env.agentgym:alfworld": 0.9523809523809523, + "success_rate.epoch.env.agentgym:sciworld": 0.9722222222222222, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9552238805970149, + "success_rate.epoch.env.logic": 0.8926553672316384, + "success_rate.epoch.env.math": 0.9722222222222222, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.794, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8771200026608578, + "success_rate.epoch.global": 0.8864696734059098, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9921343537414966, + "tokens_p.mean_in_band": 0.5927734375, + "tokens_rate.above_band": 0.9865771812080537, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.013422818791946308 + }, + { + "epoch": 2.1389007243289306, + "grad_norm": 168.98905659793235, + "learning_rate": 3.568412110954009e-07, + "loss": 0.2934, + "step": 10040, + "success_rate.epoch.env.abd": 0.9846153846153847, + "success_rate.epoch.env.agentgym:alfworld": 0.9523809523809523, + "success_rate.epoch.env.agentgym:sciworld": 0.9722222222222222, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9558823529411765, + "success_rate.epoch.env.logic": 0.8932584269662921, + "success_rate.epoch.env.math": 0.9723756906077348, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.7944664031620553, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8772910393541653, + "success_rate.epoch.global": 0.8865740740740741, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env_macro_mean": 0.9583333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9957298136645962, + "tokens_p.mean_in_band": 0.62421875, + "tokens_rate.above_band": 0.9698795180722891, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.030120481927710843 + }, + { + "epoch": 2.13996591393268, + "grad_norm": 68.63278084097718, + "learning_rate": 3.5680717114725375e-07, + "loss": 0.2753, + "step": 10045, + "success_rate.epoch.env.abd": 0.9846153846153847, + "success_rate.epoch.env.agentgym:alfworld": 0.9523809523809523, + "success_rate.epoch.env.agentgym:sciworld": 0.9722222222222222, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9558823529411765, + "success_rate.epoch.env.logic": 0.8950276243093923, + "success_rate.epoch.env.math": 0.9726775956284153, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.7956777996070727, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8775894483367833, + "success_rate.epoch.global": 0.8874425727411945, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9971590909090909, + "tokens_p.mean_in_band": 0.828125, + "tokens_rate.above_band": 0.9871794871794872, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.01282051282051282 + }, + { + "epoch": 2.1410311035364296, + "grad_norm": 125.97507517372232, + "learning_rate": 3.567731335260174e-07, + "loss": 0.2266, + "step": 10050, + "success_rate.epoch.env.abd": 0.9846153846153847, + "success_rate.epoch.env.agentgym:alfworld": 0.9523809523809523, + "success_rate.epoch.env.agentgym:sciworld": 0.972972972972973, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9565217391304348, + "success_rate.epoch.env.logic": 0.8956043956043956, + "success_rate.epoch.env.math": 0.9728260869565217, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.797270955165692, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8779265897114867, + "success_rate.epoch.global": 0.8882978723404256, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9985431235431236, + "tokens_p.mean_in_band": 0.69921875, + "tokens_rate.above_band": 0.9839449541284404, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.016055045871559634 + }, + { + "epoch": 2.142096293140179, + "grad_norm": 50.06901825201844, + "learning_rate": 3.567390982564981e-07, + "loss": 0.3495, + "step": 10055, + "success_rate.epoch.env.abd": 0.9848484848484849, + "success_rate.epoch.env.agentgym:alfworld": 0.9523809523809523, + "success_rate.epoch.env.agentgym:sciworld": 0.972972972972973, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9571428571428572, + "success_rate.epoch.env.logic": 0.8956043956043956, + "success_rate.epoch.env.math": 0.9703504043126685, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.7953667953667953, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8776060784208297, + "success_rate.epoch.global": 0.8868778280542986, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 0.6666666666666666, + "success_rate.window.env.science": 0.6, + "success_rate.window.env_macro_mean": 0.8166666666666667, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9971698113207547, + "tokens_p.mean_below_band": 2.2118911147117615e-08, + "tokens_p.mean_in_band": 0.523162841796875, + "tokens_rate.above_band": 0.9397163120567376, + "tokens_rate.below_band": 0.0035460992907801418, + "tokens_rate.in_band": 0.05673758865248227 + }, + { + "epoch": 2.1431614827439285, + "grad_norm": 113.59391973124605, + "learning_rate": 3.5670506536350055e-07, + "loss": 0.2315, + "step": 10060, + "success_rate.epoch.env.abd": 0.9848484848484849, + "success_rate.epoch.env.agentgym:alfworld": 0.9523809523809523, + "success_rate.epoch.env.agentgym:sciworld": 0.972972972972973, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9577464788732394, + "success_rate.epoch.env.logic": 0.8961748633879781, + "success_rate.epoch.env.math": 0.9707446808510638, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.7946257197696737, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8776812866440331, + "success_rate.epoch.global": 0.8869760479041916, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9977156432748538, + "tokens_p.mean_in_band": 0.7194010416666666, + "tokens_rate.above_band": 0.9827586206896551, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.017241379310344827 + }, + { + "epoch": 2.1442266723476777, + "grad_norm": 491.9637640602059, + "learning_rate": 3.5667103487182774e-07, + "loss": 0.3562, + "step": 10065, + "success_rate.epoch.env.abd": 0.9850746268656716, + "success_rate.epoch.env.agentgym:alfworld": 0.9523809523809523, + "success_rate.epoch.env.agentgym:sciworld": 0.9736842105263158, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9577464788732394, + "success_rate.epoch.env.logic": 0.8918918918918919, + "success_rate.epoch.env.math": 0.9709762532981531, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.7934990439770554, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8772957688921162, + "success_rate.epoch.global": 0.8863298662704309, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.8333333333333334, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 1.000812567713976, + "tokens_p.mean_in_band": 0.4486607142857143, + "tokens_rate.above_band": 0.9777542372881356, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.022245762711864406 + }, + { + "epoch": 2.1452918619514274, + "grad_norm": 163.9797965599951, + "learning_rate": 3.5663700680628075e-07, + "loss": 0.306, + "step": 10070, + "success_rate.epoch.env.abd": 0.9852941176470589, + "success_rate.epoch.env.agentgym:alfworld": 0.9523809523809523, + "success_rate.epoch.env.agentgym:sciworld": 0.9736842105263158, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9577464788732394, + "success_rate.epoch.env.logic": 0.8924731182795699, + "success_rate.epoch.env.math": 0.9712793733681462, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.7950664136622391, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8775386058852294, + "success_rate.epoch.global": 0.8871681415929203, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9976851851851852, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 2.1463570515551766, + "grad_norm": 49.16048371659576, + "learning_rate": 3.5660298119165906e-07, + "loss": 0.1745, + "step": 10075, + "success_rate.epoch.env.abd": 0.9852941176470589, + "success_rate.epoch.env.agentgym:alfworld": 0.9523809523809523, + "success_rate.epoch.env.agentgym:sciworld": 0.9743589743589743, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9577464788732394, + "success_rate.epoch.env.logic": 0.893048128342246, + "success_rate.epoch.env.math": 0.9717223650385605, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.7958412098298677, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8777629296791726, + "success_rate.epoch.global": 0.8879941434846267, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9965796019900498, + "tokens_p.mean_in_band": 0.84765625, + "tokens_rate.above_band": 0.9901477832512315, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.009852216748768473 + }, + { + "epoch": 2.1474222411589263, + "grad_norm": 76.5793998030311, + "learning_rate": 3.565689580527602e-07, + "loss": 0.2279, + "step": 10080, + "success_rate.epoch.env.abd": 0.9852941176470589, + "success_rate.epoch.env.agentgym:alfworld": 0.9523809523809523, + "success_rate.epoch.env.agentgym:sciworld": 0.9743589743589743, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9577464788732394, + "success_rate.epoch.env.logic": 0.8936170212765957, + "success_rate.epoch.env.math": 0.9720101781170484, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.795880149812734, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8778443520424184, + "success_rate.epoch.global": 0.8880813953488372, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9957107843137255, + "tokens_p.mean_in_band": 0.708984375, + "tokens_rate.above_band": 0.9745222929936306, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.025477707006369428 + }, + { + "epoch": 2.1484874307626756, + "grad_norm": 162.09354830423786, + "learning_rate": 3.565349374143801e-07, + "loss": 0.345, + "step": 10085, + "success_rate.epoch.env.abd": 0.9855072463768116, + "success_rate.epoch.env.agentgym:alfworld": 0.9523809523809523, + "success_rate.epoch.env.agentgym:sciworld": 0.9743589743589743, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9577464788732394, + "success_rate.epoch.env.logic": 0.8947368421052632, + "success_rate.epoch.env.math": 0.9720101781170484, + "success_rate.epoch.env.sat": 0.2222222222222222, + "success_rate.epoch.env.science": 0.7981481481481482, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8870100947802416, + "success_rate.epoch.global": 0.8888888888888888, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.sat": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.996895032051282, + "tokens_p.mean_in_band": 0.84375, + "tokens_rate.above_band": 0.9889064976228209, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.011093502377179081 + }, + { + "epoch": 2.1495526203664252, + "grad_norm": 78.51081522612341, + "learning_rate": 3.5650091930131275e-07, + "loss": 0.4265, + "step": 10090, + "success_rate.epoch.env.abd": 0.9855072463768116, + "success_rate.epoch.env.agentgym:alfworld": 0.9523809523809523, + "success_rate.epoch.env.agentgym:sciworld": 0.975, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9577464788732394, + "success_rate.epoch.env.logic": 0.8958333333333334, + "success_rate.epoch.env.math": 0.9722222222222222, + "success_rate.epoch.env.sat": 0.2222222222222222, + "success_rate.epoch.env.science": 0.7977941176470589, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8871551430050763, + "success_rate.epoch.global": 0.8889684813753582, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9375, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9983874555160143, + "tokens_p.mean_in_band": 0.66796875, + "tokens_rate.above_band": 0.9808027923211169, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.019197207678883072 + }, + { + "epoch": 2.1506178099701745, + "grad_norm": 109.23803165405054, + "learning_rate": 3.564669037383502e-07, + "loss": 0.241, + "step": 10095, + "success_rate.epoch.env.abd": 0.9855072463768116, + "success_rate.epoch.env.agentgym:alfworld": 0.9523809523809523, + "success_rate.epoch.env.agentgym:sciworld": 0.975, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9577464788732394, + "success_rate.epoch.env.logic": 0.8963730569948186, + "success_rate.epoch.env.math": 0.972568578553616, + "success_rate.epoch.env.sat": 0.2222222222222222, + "success_rate.epoch.env.science": 0.7992700729927007, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8873698734903964, + "success_rate.epoch.global": 0.8897581792318634, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9959239130434783, + "tokens_p.mean_in_band": 0.8359375, + "tokens_rate.above_band": 0.9484536082474226, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.05154639175257732 + }, + { + "epoch": 2.151682999573924, + "grad_norm": 97.71526801625657, + "learning_rate": 3.564328907502829e-07, + "loss": 0.3166, + "step": 10100, + "success_rate.epoch.env.abd": 0.9855072463768116, + "success_rate.epoch.env.agentgym:alfworld": 0.9523809523809523, + "success_rate.epoch.env.agentgym:sciworld": 0.975, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9577464788732394, + "success_rate.epoch.env.logic": 0.8974358974358975, + "success_rate.epoch.env.math": 0.9727047146401985, + "success_rate.epoch.env.sat": 0.2, + "success_rate.epoch.env.science": 0.7992766726943942, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8854592693092265, + "success_rate.epoch.global": 0.8891242937853108, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.7, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9917091836734694, + "tokens_p.mean_in_band": 0.5336538461538461, + "tokens_rate.above_band": 0.8497109826589595, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.15028901734104047 + }, + { + "epoch": 2.1527481891776734, + "grad_norm": 11.688365148775938, + "learning_rate": 3.5639888036189906e-07, + "loss": 0.2732, + "step": 10105, + "success_rate.epoch.env.abd": 0.9855072463768116, + "success_rate.epoch.env.agentgym:alfworld": 0.9523809523809523, + "success_rate.epoch.env.agentgym:sciworld": 0.975, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9577464788732394, + "success_rate.epoch.env.logic": 0.898989898989899, + "success_rate.epoch.env.math": 0.9728395061728395, + "success_rate.epoch.env.sat": 0.2, + "success_rate.epoch.env.science": 0.8010752688172043, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.885776304691904, + "success_rate.epoch.global": 0.8899018232819075, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9944196428571429, + "tokens_p.mean_in_band": 0.8671875, + "tokens_rate.above_band": 0.9790209790209791, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02097902097902098 + }, + { + "epoch": 2.153813378781423, + "grad_norm": 424.5865547018612, + "learning_rate": 3.5636487259798545e-07, + "loss": 0.3279, + "step": 10110, + "success_rate.epoch.env.abd": 0.9857142857142858, + "success_rate.epoch.env.agentgym:alfworld": 0.9523809523809523, + "success_rate.epoch.env.agentgym:sciworld": 0.975, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9577464788732394, + "success_rate.epoch.env.logic": 0.9, + "success_rate.epoch.env.math": 0.9731051344743277, + "success_rate.epoch.env.sat": 0.2, + "success_rate.epoch.env.science": 0.8021390374331551, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8860078080796328, + "success_rate.epoch.global": 0.8906685236768802, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9921875, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 2.1548785683851728, + "grad_norm": 94.85832224596984, + "learning_rate": 3.563308674833265e-07, + "loss": 0.3266, + "step": 10115, + "success_rate.epoch.env.abd": 0.9857142857142858, + "success_rate.epoch.env.agentgym:alfworld": 0.9534883720930233, + "success_rate.epoch.env.agentgym:sciworld": 0.975, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9583333333333334, + "success_rate.epoch.env.logic": 0.900497512437811, + "success_rate.epoch.env.math": 0.9731051344743277, + "success_rate.epoch.env.sat": 0.18181818181818182, + "success_rate.epoch.env.science": 0.8024691358024691, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8845841777884936, + "success_rate.epoch.global": 0.8900414937759336, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env_macro_mean": 0.7666666666666667, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9979440789473685, + "tokens_p.mean_in_band": 0.623095703125, + "tokens_rate.above_band": 0.9785407725321889, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02145922746781116 + }, + { + "epoch": 2.155943757988922, + "grad_norm": 93.90254543878595, + "learning_rate": 3.5629686504270506e-07, + "loss": 0.1606, + "step": 10120, + "success_rate.epoch.env.abd": 0.9859154929577465, + "success_rate.epoch.env.agentgym:alfworld": 0.9545454545454546, + "success_rate.epoch.env.agentgym:sciworld": 0.975609756097561, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9583333333333334, + "success_rate.epoch.env.logic": 0.9014778325123153, + "success_rate.epoch.env.math": 0.973170731707317, + "success_rate.epoch.env.sat": 0.18181818181818182, + "success_rate.epoch.env.science": 0.8038528896672504, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8849748793308326, + "success_rate.epoch.global": 0.8907967032967034, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9980502599653379, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 2.1570089475926717, + "grad_norm": 624.0064901894963, + "learning_rate": 3.562628653009017e-07, + "loss": 0.4398, + "step": 10125, + "success_rate.epoch.env.abd": 0.9859154929577465, + "success_rate.epoch.env.agentgym:alfworld": 0.9347826086956522, + "success_rate.epoch.env.agentgym:sciworld": 0.975609756097561, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.958904109589041, + "success_rate.epoch.env.logic": 0.8975609756097561, + "success_rate.epoch.env.math": 0.9732360097323601, + "success_rate.epoch.env.sat": 0.18181818181818182, + "success_rate.epoch.env.science": 0.8034782608695652, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.882845945033624, + "success_rate.epoch.global": 0.8894952251023193, + "success_rate.window.env.agentgym:alfworld": 0.5, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.75, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.997885687732342, + "tokens_p.mean_in_band": 0.5966796875, + "tokens_rate.above_band": 0.9853479853479854, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.014652014652014652 + }, + { + "epoch": 2.158074137196421, + "grad_norm": 38.12365135895893, + "learning_rate": 3.5622886828269516e-07, + "loss": 0.142, + "step": 10130, + "success_rate.epoch.env.abd": 0.9859154929577465, + "success_rate.epoch.env.agentgym:alfworld": 0.9347826086956522, + "success_rate.epoch.env.agentgym:sciworld": 0.975609756097561, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.958904109589041, + "success_rate.epoch.env.logic": 0.8985507246376812, + "success_rate.epoch.env.math": 0.9733656174334141, + "success_rate.epoch.env.sat": 0.18181818181818182, + "success_rate.epoch.env.science": 0.803448275862069, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8829449788264859, + "success_rate.epoch.global": 0.8895663956639567, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9975341426403642, + "tokens_p.mean_in_band": 0.7297585227272727, + "tokens_rate.above_band": 0.9835820895522388, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.016417910447761194 + }, + { + "epoch": 2.1591393268001706, + "grad_norm": 220.53278906205063, + "learning_rate": 3.561948740128625e-07, + "loss": 0.3503, + "step": 10135, + "success_rate.epoch.env.abd": 0.9859154929577465, + "success_rate.epoch.env.agentgym:alfworld": 0.9347826086956522, + "success_rate.epoch.env.agentgym:sciworld": 0.975609756097561, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.96, + "success_rate.epoch.env.logic": 0.8995215311004785, + "success_rate.epoch.env.math": 0.9734939759036144, + "success_rate.epoch.env.sat": 0.18181818181818182, + "success_rate.epoch.env.science": 0.8047945205479452, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8832669151928344, + "success_rate.epoch.global": 0.8903095558546433, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 1.0001228380503144, + "tokens_p.mean_in_band": 0.65234375, + "tokens_rate.above_band": 0.9945269741985927, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.00547302580140735 + }, + { + "epoch": 2.16020451640392, + "grad_norm": 112.18080955828253, + "learning_rate": 3.561608825161782e-07, + "loss": 0.2632, + "step": 10140, + "success_rate.epoch.env.abd": 0.9859154929577465, + "success_rate.epoch.env.agentgym:alfworld": 0.9148936170212766, + "success_rate.epoch.env.agentgym:sciworld": 0.975609756097561, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9605263157894737, + "success_rate.epoch.env.logic": 0.9014084507042254, + "success_rate.epoch.env.math": 0.973621103117506, + "success_rate.epoch.env.sat": 0.18181818181818182, + "success_rate.epoch.env.science": 0.8037542662116041, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8815951985197795, + "success_rate.epoch.global": 0.8897058823529411, + "success_rate.window.env.agentgym:alfworld": 0.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.7, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9986103192702395, + "tokens_p.mean_in_band": 0.5463169642857143, + "tokens_rate.above_band": 0.9920814479638009, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.007918552036199095 + }, + { + "epoch": 2.1612697060076695, + "grad_norm": 144.60601412089292, + "learning_rate": 3.561268938174151e-07, + "loss": 0.241, + "step": 10145, + "success_rate.epoch.env.abd": 0.9859154929577465, + "success_rate.epoch.env.agentgym:alfworld": 0.9148936170212766, + "success_rate.epoch.env.agentgym:sciworld": 0.975609756097561, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9615384615384616, + "success_rate.epoch.env.logic": 0.9027777777777778, + "success_rate.epoch.env.math": 0.9737470167064439, + "success_rate.epoch.env.sat": 0.18181818181818182, + "success_rate.epoch.env.science": 0.8047538200339559, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8819140112683095, + "success_rate.epoch.global": 0.8904382470119522, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9993169398907104, + "tokens_p.mean_in_band": 0.890625, + "tokens_rate.above_band": 0.997275204359673, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0027247956403269754 + }, + { + "epoch": 2.162334895611419, + "grad_norm": 102.5551948515419, + "learning_rate": 3.56092907941344e-07, + "loss": 0.2562, + "step": 10150, + "success_rate.epoch.env.abd": 0.9859154929577465, + "success_rate.epoch.env.agentgym:alfworld": 0.9148936170212766, + "success_rate.epoch.env.agentgym:sciworld": 0.975609756097561, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9615384615384616, + "success_rate.epoch.env.logic": 0.9032258064516129, + "success_rate.epoch.env.math": 0.9741176470588235, + "success_rate.epoch.env.sat": 0.18181818181818182, + "success_rate.epoch.env.science": 0.8057432432432432, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.882078382380628, + "success_rate.epoch.global": 0.8911609498680739, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9898174157303371, + "tokens_p.mean_in_band": 0.748046875, + "tokens_rate.above_band": 0.9175257731958762, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.08247422680412371 + }, + { + "epoch": 2.1634000852151685, + "grad_norm": 98.02656807287734, + "learning_rate": 3.560589249127335e-07, + "loss": 0.1623, + "step": 10155, + "success_rate.epoch.env.abd": 0.9859154929577465, + "success_rate.epoch.env.agentgym:alfworld": 0.9148936170212766, + "success_rate.epoch.env.agentgym:sciworld": 0.9761904761904762, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9615384615384616, + "success_rate.epoch.env.logic": 0.9032258064516129, + "success_rate.epoch.env.math": 0.9744186046511628, + "success_rate.epoch.env.sat": 0.18181818181818182, + "success_rate.epoch.env.science": 0.8070469798657718, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8822770564086082, + "success_rate.epoch.global": 0.8918741808650066, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9967177242888403, + "tokens_p.mean_in_band": 0.828125, + "tokens_rate.above_band": 0.9978165938864629, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.002183406113537118 + }, + { + "epoch": 2.1644652748189177, + "grad_norm": 102.3557858066811, + "learning_rate": 3.5602494475635026e-07, + "loss": 0.1352, + "step": 10160, + "success_rate.epoch.env.abd": 0.9859154929577465, + "success_rate.epoch.env.agentgym:alfworld": 0.9148936170212766, + "success_rate.epoch.env.agentgym:sciworld": 0.9761904761904762, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9620253164556962, + "success_rate.epoch.env.logic": 0.9041095890410958, + "success_rate.epoch.env.math": 0.9747126436781609, + "success_rate.epoch.env.sat": 0.18181818181818182, + "success_rate.epoch.env.science": 0.8076923076923077, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8824870568049946, + "success_rate.epoch.global": 0.892578125, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9967296511627907, + "tokens_p.mean_in_band": 0.8510044642857143, + "tokens_rate.above_band": 0.98005698005698, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.019943019943019943 + }, + { + "epoch": 2.1655304644226674, + "grad_norm": 145.79749909634245, + "learning_rate": 3.559909674969587e-07, + "loss": 0.2773, + "step": 10165, + "success_rate.epoch.env.abd": 0.9859154929577465, + "success_rate.epoch.env.agentgym:alfworld": 0.9148936170212766, + "success_rate.epoch.env.agentgym:sciworld": 0.9767441860465116, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9620253164556962, + "success_rate.epoch.env.logic": 0.9049773755656109, + "success_rate.epoch.env.math": 0.9749430523917996, + "success_rate.epoch.env.sat": 0.18181818181818182, + "success_rate.epoch.env.science": 0.8086522462562395, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8827244971375513, + "success_rate.epoch.global": 0.8932729624838293, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9974291590493601, + "tokens_p.mean_in_band": 0.6640625, + "tokens_rate.above_band": 0.9963570127504554, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0036429872495446266 + }, + { + "epoch": 2.1665956540264166, + "grad_norm": 153.5381911864979, + "learning_rate": 3.5595699315932114e-07, + "loss": 0.3574, + "step": 10170, + "success_rate.epoch.env.abd": 0.9861111111111112, + "success_rate.epoch.env.agentgym:alfworld": 0.9148936170212766, + "success_rate.epoch.env.agentgym:sciworld": 0.9767441860465116, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9620253164556962, + "success_rate.epoch.env.logic": 0.9054054054054054, + "success_rate.epoch.env.math": 0.975, + "success_rate.epoch.env.sat": 0.18181818181818182, + "success_rate.epoch.env.science": 0.8108552631578947, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8829866437287343, + "success_rate.epoch.global": 0.8939588688946015, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9961222627737226, + "tokens_p.mean_in_band": 0.89453125, + "tokens_rate.above_band": 0.9927536231884058, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.007246376811594203 + }, + { + "epoch": 2.1676608436301663, + "grad_norm": 148.2431649183385, + "learning_rate": 3.5592302176819803e-07, + "loss": 0.2617, + "step": 10175, + "success_rate.epoch.env.abd": 0.9861111111111112, + "success_rate.epoch.env.agentgym:alfworld": 0.9148936170212766, + "success_rate.epoch.env.agentgym:sciworld": 0.9767441860465116, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9620253164556962, + "success_rate.epoch.env.logic": 0.90625, + "success_rate.epoch.env.math": 0.9752252252252253, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.8117839607201309, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8817909166587833, + "success_rate.epoch.global": 0.8939974457215837, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.75, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9967672413793104, + "tokens_p.mean_in_band": 0.6964285714285714, + "tokens_rate.above_band": 0.9119496855345912, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0880503144654088 + }, + { + "epoch": 2.1687260332339156, + "grad_norm": 243.756584530022, + "learning_rate": 3.558890533483473e-07, + "loss": 0.2382, + "step": 10180, + "success_rate.epoch.env.abd": 0.9863013698630136, + "success_rate.epoch.env.agentgym:alfworld": 0.9148936170212766, + "success_rate.epoch.env.agentgym:sciworld": 0.9767441860465116, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9625, + "success_rate.epoch.env.logic": 0.9070796460176991, + "success_rate.epoch.env.math": 0.9752808988764045, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.8133116883116883, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8820707338912055, + "success_rate.epoch.global": 0.8946700507614214, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9982763788968825, + "tokens_p.mean_in_band": 0.806640625, + "tokens_rate.above_band": 0.9904988123515439, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.009501187648456057 + }, + { + "epoch": 2.1697912228376652, + "grad_norm": 109.96009841477199, + "learning_rate": 3.558550879245249e-07, + "loss": 0.2608, + "step": 10185, + "success_rate.epoch.env.abd": 0.9864864864864865, + "success_rate.epoch.env.agentgym:alfworld": 0.9148936170212766, + "success_rate.epoch.env.agentgym:sciworld": 0.9767441860465116, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9629629629629629, + "success_rate.epoch.env.logic": 0.9074889867841409, + "success_rate.epoch.env.math": 0.9754464285714286, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.8129032258064516, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8821447782132661, + "success_rate.epoch.global": 0.894703656998739, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9995181718061674, + "tokens_p.mean_in_band": 0.7135416666666666, + "tokens_rate.above_band": 0.9869565217391304, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.013043478260869565 + }, + { + "epoch": 2.1708564124414145, + "grad_norm": 93.62406945200333, + "learning_rate": 3.558211255214847e-07, + "loss": 0.2817, + "step": 10190, + "success_rate.epoch.env.abd": 0.9866666666666667, + "success_rate.epoch.env.agentgym:alfworld": 0.9166666666666666, + "success_rate.epoch.env.agentgym:sciworld": 0.9767441860465116, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9629629629629629, + "success_rate.epoch.env.logic": 0.9082969432314411, + "success_rate.epoch.env.math": 0.9755555555555555, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.8138041733547352, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8824876201046551, + "success_rate.epoch.global": 0.8953634085213033, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9984800583657587, + "tokens_p.mean_in_band": 0.8515625, + "tokens_rate.above_band": 0.9961240310077519, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.003875968992248062 + }, + { + "epoch": 2.171921602045164, + "grad_norm": 82.59491410077452, + "learning_rate": 3.5578716616397814e-07, + "loss": 0.1769, + "step": 10195, + "success_rate.epoch.env.abd": 0.9866666666666667, + "success_rate.epoch.env.agentgym:alfworld": 0.9166666666666666, + "success_rate.epoch.env.agentgym:sciworld": 0.9772727272727273, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9629629629629629, + "success_rate.epoch.env.logic": 0.908695652173913, + "success_rate.epoch.env.math": 0.975609756097561, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.8142857142857143, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8826206193448072, + "success_rate.epoch.global": 0.8953922789539228, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8571428571428571, + "success_rate.window.env_macro_mean": 0.9642857142857143, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9934701492537313, + "tokens_p.mean_in_band": 0.634375, + "tokens_rate.above_band": 0.9305555555555556, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.06944444444444445 + }, + { + "epoch": 2.1729867916489134, + "grad_norm": 70.72759597959033, + "learning_rate": 3.5575320987675463e-07, + "loss": 0.1978, + "step": 10200, + "success_rate.epoch.env.abd": 0.9866666666666667, + "success_rate.epoch.env.agentgym:alfworld": 0.9166666666666666, + "success_rate.epoch.env.agentgym:sciworld": 0.9772727272727273, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9629629629629629, + "success_rate.epoch.env.logic": 0.9094827586206896, + "success_rate.epoch.env.math": 0.9757709251101322, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.815748031496063, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8828397641329613, + "success_rate.epoch.global": 0.8960396039603961, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9970858134920635, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 2.174051981252663, + "grad_norm": 57.463233844692645, + "learning_rate": 3.5571925668456124e-07, + "loss": 0.1043, + "step": 10205, + "success_rate.epoch.env.abd": 0.9866666666666667, + "success_rate.epoch.env.agentgym:alfworld": 0.9166666666666666, + "success_rate.epoch.env.agentgym:sciworld": 0.9772727272727273, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9629629629629629, + "success_rate.epoch.env.logic": 0.9102564102564102, + "success_rate.epoch.env.math": 0.975929978118162, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.8171875, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8830554162372966, + "success_rate.epoch.global": 0.8966789667896679, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9903846153846154, + "tokens_p.mean_in_band": 0.8020833333333334, + "tokens_rate.above_band": 0.975, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.025 + }, + { + "epoch": 2.1751171708564123, + "grad_norm": 113.58682220156203, + "learning_rate": 3.556853066121428e-07, + "loss": 0.2097, + "step": 10210, + "success_rate.epoch.env.abd": 0.9868421052631579, + "success_rate.epoch.env.agentgym:alfworld": 0.9166666666666666, + "success_rate.epoch.env.agentgym:sciworld": 0.9772727272727273, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9629629629629629, + "success_rate.epoch.env.logic": 0.9071729957805907, + "success_rate.epoch.env.math": 0.9761388286334056, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.8177570093457944, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8828618147810883, + "success_rate.epoch.global": 0.8966992665036675, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 0.6666666666666666, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9951923076923077, + "tokens_p.mean_in_band": 0.7373046875, + "tokens_rate.above_band": 0.9381443298969072, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.061855670103092786 + }, + { + "epoch": 2.176182360460162, + "grad_norm": 145.70704886424622, + "learning_rate": 3.5565135968424194e-07, + "loss": 0.3638, + "step": 10215, + "success_rate.epoch.env.abd": 0.9868421052631579, + "success_rate.epoch.env.agentgym:alfworld": 0.9183673469387755, + "success_rate.epoch.env.agentgym:sciworld": 0.9772727272727273, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9634146341463414, + "success_rate.epoch.env.logic": 0.9083333333333333, + "success_rate.epoch.env.math": 0.9762419006479481, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.8186046511627907, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8832493968574309, + "success_rate.epoch.global": 0.8973268529769137, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 1.0018138801261829, + "tokens_p.mean_in_band": 0.53125, + "tokens_rate.above_band": 0.9993694829760403, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0006305170239596469 + }, + { + "epoch": 2.1772475500639112, + "grad_norm": 91.29467156649099, + "learning_rate": 3.556174159255989e-07, + "loss": 0.124, + "step": 10220, + "success_rate.epoch.env.abd": 0.9871794871794872, + "success_rate.epoch.env.agentgym:alfworld": 0.9183673469387755, + "success_rate.epoch.env.agentgym:sciworld": 0.9777777777777777, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9634146341463414, + "success_rate.epoch.env.logic": 0.9087136929460581, + "success_rate.epoch.env.math": 0.9764957264957265, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.8188854489164087, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8834091619152038, + "success_rate.epoch.global": 0.8979468599033816, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9988132911392406, + "tokens_p.mean_in_band": 0.783203125, + "tokens_rate.above_band": 0.9875, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0125 + }, + { + "epoch": 2.178312739667661, + "grad_norm": 124.59608843676052, + "learning_rate": 3.5558347536095157e-07, + "loss": 0.1955, + "step": 10225, + "success_rate.epoch.env.abd": 0.9873417721518988, + "success_rate.epoch.env.agentgym:alfworld": 0.9183673469387755, + "success_rate.epoch.env.agentgym:sciworld": 0.9777777777777777, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.963855421686747, + "success_rate.epoch.env.logic": 0.9094650205761317, + "success_rate.epoch.env.math": 0.9765957446808511, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.82, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8836427045889862, + "success_rate.epoch.global": 0.8985594237695078, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9974933155080213, + "tokens_p.mean_in_band": 0.6796875, + "tokens_rate.above_band": 0.9929203539823008, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.007079646017699115 + }, + { + "epoch": 2.17937792927141, + "grad_norm": 91.12728799105118, + "learning_rate": 3.555495380150357e-07, + "loss": 0.2989, + "step": 10230, + "success_rate.epoch.env.abd": 0.9875, + "success_rate.epoch.env.agentgym:alfworld": 0.9183673469387755, + "success_rate.epoch.env.agentgym:sciworld": 0.9777777777777777, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9523809523809523, + "success_rate.epoch.env.logic": 0.9098360655737705, + "success_rate.epoch.env.math": 0.9767441860465116, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.8211009174311926, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8827612648014224, + "success_rate.epoch.global": 0.8985680190930787, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 0.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9869237588652482, + "tokens_p.mean_in_band": 0.6725, + "tokens_rate.above_band": 0.818577648766328, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.18142235123367198 + }, + { + "epoch": 2.18044311887516, + "grad_norm": 158.24440444924429, + "learning_rate": 3.555156039125846e-07, + "loss": 0.2805, + "step": 10235, + "success_rate.epoch.env.abd": 0.9876543209876543, + "success_rate.epoch.env.agentgym:alfworld": 0.9183673469387755, + "success_rate.epoch.env.agentgym:sciworld": 0.9777777777777777, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9523809523809523, + "success_rate.epoch.env.logic": 0.9102040816326531, + "success_rate.epoch.env.math": 0.9767932489451476, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.8212121212121212, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8828233196856133, + "success_rate.epoch.global": 0.8985765124555161, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 0.9666666666666668, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.999375, + "tokens_p.mean_in_band": 0.471875, + "tokens_rate.above_band": 0.9917355371900827, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.008264462809917356 + }, + { + "epoch": 2.181508308478909, + "grad_norm": 29.247346444365288, + "learning_rate": 3.5548167307832904e-07, + "loss": 0.4119, + "step": 10240, + "success_rate.epoch.env.abd": 0.9876543209876543, + "success_rate.epoch.env.agentgym:alfworld": 0.9, + "success_rate.epoch.env.agentgym:sciworld": 0.9787234042553191, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9523809523809523, + "success_rate.epoch.env.logic": 0.9112903225806451, + "success_rate.epoch.env.math": 0.9768421052631578, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.8220211161387632, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8814162625702872, + "success_rate.epoch.global": 0.8985849056603774, + "success_rate.window.env.agentgym:alfworld": 0.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9980796089385475, + "tokens_p.mean_in_band": 0.57421875, + "tokens_rate.above_band": 0.9944444444444445, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.005555555555555556 + }, + { + "epoch": 2.1825734980826588, + "grad_norm": 41.530817869612875, + "learning_rate": 3.554477455369977e-07, + "loss": 0.2657, + "step": 10245, + "success_rate.epoch.env.abd": 0.9878048780487805, + "success_rate.epoch.env.agentgym:alfworld": 0.9, + "success_rate.epoch.env.agentgym:sciworld": 0.9787234042553191, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9523809523809523, + "success_rate.epoch.env.logic": 0.9116465863453815, + "success_rate.epoch.env.math": 0.9770354906054279, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.8228228228228228, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8815528001023046, + "success_rate.epoch.global": 0.8991793669402111, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9983552631578947, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 2.183638687686408, + "grad_norm": 255.64992913836087, + "learning_rate": 3.5541382131331677e-07, + "loss": 0.3722, + "step": 10250, + "success_rate.epoch.env.abd": 0.9878048780487805, + "success_rate.epoch.env.agentgym:alfworld": 0.9, + "success_rate.epoch.env.agentgym:sciworld": 0.9791666666666666, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9523809523809523, + "success_rate.epoch.env.logic": 0.9123505976095617, + "success_rate.epoch.env.math": 0.9751552795031055, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.8236173393124065, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.881558398198922, + "success_rate.epoch.global": 0.8991841491841492, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.75, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9375, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9974112426035503, + "tokens_p.mean_below_band": 9.255018085241318e-09, + "tokens_p.mean_in_band": 0.753515625, + "tokens_rate.above_band": 0.9657142857142857, + "tokens_rate.below_band": 0.005714285714285714, + "tokens_rate.in_band": 0.02857142857142857 + }, + { + "epoch": 2.1847038772901577, + "grad_norm": 183.63775688535384, + "learning_rate": 3.553799004320098e-07, + "loss": 0.2492, + "step": 10255, + "success_rate.epoch.env.abd": 0.9879518072289156, + "success_rate.epoch.env.agentgym:alfworld": 0.9, + "success_rate.epoch.env.agentgym:sciworld": 0.9795918367346939, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9523809523809523, + "success_rate.epoch.env.logic": 0.9087301587301587, + "success_rate.epoch.env.math": 0.9752066115702479, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.8249258160237388, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8814048953941249, + "success_rate.epoch.global": 0.899188876013905, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8333333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9988540870893812, + "tokens_p.mean_in_band": 0.446875, + "tokens_rate.above_band": 0.9849510910458992, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.015048908954100828 + }, + { + "epoch": 2.185769066893907, + "grad_norm": 103.67100506622471, + "learning_rate": 3.553459829177982e-07, + "loss": 0.1638, + "step": 10260, + "success_rate.epoch.env.abd": 0.9879518072289156, + "success_rate.epoch.env.agentgym:alfworld": 0.9, + "success_rate.epoch.env.agentgym:sciworld": 0.9795918367346939, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9534883720930233, + "success_rate.epoch.env.logic": 0.9090909090909091, + "success_rate.epoch.env.math": 0.9754601226993865, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.8251851851851851, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.880419489716206, + "success_rate.epoch.global": 0.8991935483870968, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9936583129584352, + "tokens_p.mean_in_band": 0.69609375, + "tokens_rate.above_band": 0.964622641509434, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03537735849056604 + }, + { + "epoch": 2.1868342564976566, + "grad_norm": 143.77941569645327, + "learning_rate": 3.553120687954009e-07, + "loss": 0.4796, + "step": 10265, + "success_rate.epoch.env.abd": 0.9879518072289156, + "success_rate.epoch.env.agentgym:alfworld": 0.9019607843137255, + "success_rate.epoch.env.agentgym:sciworld": 0.9795918367346939, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9540229885057471, + "success_rate.epoch.env.logic": 0.905511811023622, + "success_rate.epoch.env.math": 0.9755102040816327, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.825, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8803086896122264, + "success_rate.epoch.global": 0.898567335243553, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.76, + "success_rate.window.global": 0.7777777777777778, + "tokens_p.mean_above_band": 0.996179706601467, + "tokens_p.mean_in_band": 0.6774553571428571, + "tokens_rate.above_band": 0.9831730769230769, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.016826923076923076 + }, + { + "epoch": 2.187899446101406, + "grad_norm": 169.1682516071941, + "learning_rate": 3.5527815808953417e-07, + "loss": 0.3989, + "step": 10270, + "success_rate.epoch.env.abd": 0.9879518072289156, + "success_rate.epoch.env.agentgym:alfworld": 0.9019607843137255, + "success_rate.epoch.env.agentgym:sciworld": 0.9795918367346939, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9545454545454546, + "success_rate.epoch.env.logic": 0.9058823529411765, + "success_rate.epoch.env.math": 0.973630831643002, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.8233576642335766, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8800697168624271, + "success_rate.epoch.global": 0.8974358974358975, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.6666666666666666, + "success_rate.window.env.science": 0.6, + "success_rate.window.env_macro_mean": 0.8166666666666667, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9935213414634146, + "tokens_p.mean_in_band": 0.64, + "tokens_rate.above_band": 0.8677248677248677, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.13227513227513227 + }, + { + "epoch": 2.1889646357051555, + "grad_norm": 64.89640814461663, + "learning_rate": 3.5524425082491184e-07, + "loss": 0.2961, + "step": 10275, + "success_rate.epoch.env.abd": 0.9880952380952381, + "success_rate.epoch.env.agentgym:alfworld": 0.9019607843137255, + "success_rate.epoch.env.agentgym:sciworld": 0.9795918367346939, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9545454545454546, + "success_rate.epoch.env.logic": 0.9058823529411765, + "success_rate.epoch.env.math": 0.9738430583501007, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.8231884057971014, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8800866622385132, + "success_rate.epoch.global": 0.8974504249291785, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9880681818181818, + "tokens_p.mean_in_band": 0.5217633928571429, + "tokens_rate.above_band": 0.9401709401709402, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.05982905982905983 + }, + { + "epoch": 2.1900298253089048, + "grad_norm": 352.2383096109671, + "learning_rate": 3.552103470262453e-07, + "loss": 0.249, + "step": 10280, + "success_rate.epoch.env.abd": 0.9880952380952381, + "success_rate.epoch.env.agentgym:alfworld": 0.9019607843137255, + "success_rate.epoch.env.agentgym:sciworld": 0.9795918367346939, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9545454545454546, + "success_rate.epoch.env.logic": 0.90625, + "success_rate.epoch.env.math": 0.974, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.8247126436781609, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8802729192012205, + "success_rate.epoch.global": 0.8980281690140846, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9876179245283019, + "tokens_p.mean_in_band": 0.751953125, + "tokens_rate.above_band": 0.9464285714285714, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.05357142857142857 + }, + { + "epoch": 2.1910950149126545, + "grad_norm": 151.01735787478043, + "learning_rate": 3.5517644671824345e-07, + "loss": 0.2639, + "step": 10285, + "success_rate.epoch.env.abd": 0.9880952380952381, + "success_rate.epoch.env.agentgym:alfworld": 0.9019607843137255, + "success_rate.epoch.env.agentgym:sciworld": 0.9795918367346939, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9545454545454546, + "success_rate.epoch.env.logic": 0.9066147859922179, + "success_rate.epoch.env.math": 0.9742574257425742, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.8257142857142857, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.880420542271304, + "success_rate.epoch.global": 0.8985994397759104, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9916237113402062, + "tokens_p.mean_in_band": 0.8072916666666666, + "tokens_rate.above_band": 0.97, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03 + }, + { + "epoch": 2.192160204516404, + "grad_norm": 125.98396183994296, + "learning_rate": 3.551425499256126e-07, + "loss": 0.198, + "step": 10290, + "success_rate.epoch.env.abd": 0.9880952380952381, + "success_rate.epoch.env.agentgym:alfworld": 0.9019607843137255, + "success_rate.epoch.env.agentgym:sciworld": 0.9795918367346939, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9545454545454546, + "success_rate.epoch.env.logic": 0.9076923076923077, + "success_rate.epoch.env.math": 0.9743589743589743, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.826950354609929, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.880640100381498, + "success_rate.epoch.global": 0.8991643454038997, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9991964285714285, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 2.1932253941201534, + "grad_norm": 98.91486294312406, + "learning_rate": 3.551086566730564e-07, + "loss": 0.362, + "step": 10295, + "success_rate.epoch.env.abd": 0.9880952380952381, + "success_rate.epoch.env.agentgym:alfworld": 0.9019607843137255, + "success_rate.epoch.env.agentgym:sciworld": 0.9795918367346939, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9545454545454546, + "success_rate.epoch.env.logic": 0.9076923076923077, + "success_rate.epoch.env.math": 0.9744597249508841, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.8260869565217391, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.880570768790927, + "success_rate.epoch.global": 0.8986149584487535, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9900990099009901, + "tokens_p.mean_in_band": 0.511328125, + "tokens_rate.above_band": 0.9099099099099099, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.09009009009009009 + }, + { + "epoch": 2.1942905837239026, + "grad_norm": 112.55139212232042, + "learning_rate": 3.5507476698527613e-07, + "loss": 0.2197, + "step": 10300, + "success_rate.epoch.env.abd": 0.9880952380952381, + "success_rate.epoch.env.agentgym:alfworld": 0.9019607843137255, + "success_rate.epoch.env.agentgym:sciworld": 0.98, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9555555555555556, + "success_rate.epoch.env.logic": 0.9076923076923077, + "success_rate.epoch.env.math": 0.9745098039215686, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.827538247566064, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8808361900900558, + "success_rate.epoch.global": 0.8991735537190083, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9981352880658436, + "tokens_p.mean_in_band": 0.6744791666666666, + "tokens_rate.above_band": 0.9969230769230769, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.003076923076923077 + }, + { + "epoch": 2.1953557733276523, + "grad_norm": 40.75117826753435, + "learning_rate": 3.550408808869703e-07, + "loss": 0.2322, + "step": 10305, + "success_rate.epoch.env.abd": 0.9880952380952381, + "success_rate.epoch.env.agentgym:alfworld": 0.9019607843137255, + "success_rate.epoch.env.agentgym:sciworld": 0.98, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9555555555555556, + "success_rate.epoch.env.logic": 0.9080459770114943, + "success_rate.epoch.env.math": 0.974757281553398, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.8284923928077456, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8809775802893918, + "success_rate.epoch.global": 0.8997260273972603, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9960526315789474, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 2.196420962931402, + "grad_norm": 121.9444776883651, + "learning_rate": 3.550069984028348e-07, + "loss": 0.3273, + "step": 10310, + "success_rate.epoch.env.abd": 0.9882352941176471, + "success_rate.epoch.env.agentgym:alfworld": 0.9038461538461539, + "success_rate.epoch.env.agentgym:sciworld": 0.9803921568627451, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9560439560439561, + "success_rate.epoch.env.logic": 0.9045801526717557, + "success_rate.epoch.env.math": 0.9748062015503876, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.8294360385144429, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8810169188593856, + "success_rate.epoch.global": 0.8997275204359673, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8571428571428571, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9989792713567839, + "tokens_p.mean_in_band": 0.49594907407407407, + "tokens_rate.above_band": 0.9778869778869779, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.022113022113022112 + }, + { + "epoch": 2.1974861525351512, + "grad_norm": 47.834997807906134, + "learning_rate": 3.5497311955756303e-07, + "loss": 0.197, + "step": 10315, + "success_rate.epoch.env.abd": 0.9882352941176471, + "success_rate.epoch.env.agentgym:alfworld": 0.9038461538461539, + "success_rate.epoch.env.agentgym:sciworld": 0.9803921568627451, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9560439560439561, + "success_rate.epoch.env.logic": 0.9049429657794676, + "success_rate.epoch.env.math": 0.9749518304431599, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.8308321964529332, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8811900643083832, + "success_rate.epoch.global": 0.9002710027100271, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9951704545454545, + "tokens_p.mean_in_band": 0.80859375, + "tokens_rate.above_band": 0.9821428571428571, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.017857142857142856 + }, + { + "epoch": 2.198551342138901, + "grad_norm": 439.5282198262885, + "learning_rate": 3.5493924437584555e-07, + "loss": 0.263, + "step": 10320, + "success_rate.epoch.env.abd": 0.9883720930232558, + "success_rate.epoch.env.agentgym:alfworld": 0.9038461538461539, + "success_rate.epoch.env.agentgym:sciworld": 0.9803921568627451, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.946236559139785, + "success_rate.epoch.env.logic": 0.9053030303030303, + "success_rate.epoch.env.math": 0.975095785440613, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.8310626702997275, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.879378690161132, + "success_rate.epoch.global": 0.8997304582210243, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.ded": 0.5, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.7857142857142857, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9915659617321249, + "tokens_p.mean_in_band": 0.7450635302197802, + "tokens_rate.above_band": 0.9160516605166051, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.08394833948339483 + }, + { + "epoch": 2.19961653174265, + "grad_norm": 761.9837880416337, + "learning_rate": 3.549053728823704e-07, + "loss": 0.2322, + "step": 10325, + "success_rate.epoch.env.abd": 0.9883720930232558, + "success_rate.epoch.env.agentgym:alfworld": 0.9038461538461539, + "success_rate.epoch.env.agentgym:sciworld": 0.9807692307692307, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.946236559139785, + "success_rate.epoch.env.logic": 0.9056603773584906, + "success_rate.epoch.env.math": 0.9752851711026616, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.8319783197831978, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8795459134436289, + "success_rate.epoch.global": 0.9002680965147453, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9952083333333334, + "tokens_p.mean_in_band": 0.88671875, + "tokens_rate.above_band": 0.9933774834437086, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.006622516556291391 + }, + { + "epoch": 2.2006817213464, + "grad_norm": 90.05011135362957, + "learning_rate": 3.548715051018229e-07, + "loss": 0.2618, + "step": 10330, + "success_rate.epoch.env.abd": 0.9886363636363636, + "success_rate.epoch.env.agentgym:alfworld": 0.9038461538461539, + "success_rate.epoch.env.agentgym:sciworld": 0.9807692307692307, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.946236559139785, + "success_rate.epoch.env.logic": 0.9060150375939849, + "success_rate.epoch.env.math": 0.9753787878787878, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.8331090174966352, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8797134812016438, + "success_rate.epoch.global": 0.9008, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9956395348837209, + "tokens_p.mean_in_band": 0.6315104166666666, + "tokens_rate.above_band": 0.9772727272727273, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.022727272727272728 + }, + { + "epoch": 2.201746910950149, + "grad_norm": 116.87056608849511, + "learning_rate": 3.548376410588856e-07, + "loss": 0.3251, + "step": 10335, + "success_rate.epoch.env.abd": 0.9886363636363636, + "success_rate.epoch.env.agentgym:alfworld": 0.9038461538461539, + "success_rate.epoch.env.agentgym:sciworld": 0.9811320754716981, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.946236559139785, + "success_rate.epoch.env.logic": 0.9063670411985019, + "success_rate.epoch.env.math": 0.975517890772128, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.8326639892904953, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8797506560192971, + "success_rate.epoch.global": 0.9007957559681697, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9982541899441341, + "tokens_p.mean_in_band": 0.7131696428571429, + "tokens_rate.above_band": 0.9808219178082191, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.019178082191780823 + }, + { + "epoch": 2.2028121005538988, + "grad_norm": 102.9445188138565, + "learning_rate": 3.548037807782385e-07, + "loss": 0.1565, + "step": 10340, + "success_rate.epoch.env.abd": 0.9886363636363636, + "success_rate.epoch.env.agentgym:alfworld": 0.9038461538461539, + "success_rate.epoch.env.agentgym:sciworld": 0.9814814814814815, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9468085106382979, + "success_rate.epoch.env.logic": 0.9067164179104478, + "success_rate.epoch.env.math": 0.9757462686567164, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.8331108144192256, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8799275594041663, + "success_rate.epoch.global": 0.9013192612137203, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9980837264150944, + "tokens_p.mean_in_band": 0.890625, + "tokens_rate.above_band": 0.9953051643192489, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.004694835680751174 + }, + { + "epoch": 2.203877290157648, + "grad_norm": 203.637264163332, + "learning_rate": 3.547699242845585e-07, + "loss": 0.3841, + "step": 10345, + "success_rate.epoch.env.abd": 0.9886363636363636, + "success_rate.epoch.env.agentgym:alfworld": 0.9038461538461539, + "success_rate.epoch.env.agentgym:sciworld": 0.9814814814814815, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9468085106382979, + "success_rate.epoch.env.logic": 0.9067164179104478, + "success_rate.epoch.env.math": 0.9759704251386322, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.8342175066312998, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8800485456490744, + "success_rate.epoch.global": 0.9018372703412073, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9969907407407408, + "tokens_p.mean_in_band": 0.873046875, + "tokens_rate.above_band": 0.9712230215827338, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02877697841726619 + }, + { + "epoch": 2.2049424797613977, + "grad_norm": 121.72233733345384, + "learning_rate": 3.547360716025202e-07, + "loss": 0.4605, + "step": 10350, + "success_rate.epoch.env.abd": 0.9886363636363636, + "success_rate.epoch.env.agentgym:alfworld": 0.9038461538461539, + "success_rate.epoch.env.agentgym:sciworld": 0.9814814814814815, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9473684210526315, + "success_rate.epoch.env.logic": 0.9080882352941176, + "success_rate.epoch.env.math": 0.9760589318600368, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.8309114927344782, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8799316566147641, + "success_rate.epoch.global": 0.9007832898172323, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.0, + "success_rate.window.env_macro_mean": 0.75, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9978070175438597, + "tokens_p.mean_in_band": 0.5127650669642857, + "tokens_rate.above_band": 0.9606741573033708, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03932584269662921 + }, + { + "epoch": 2.206007669365147, + "grad_norm": 475.99020721070815, + "learning_rate": 3.54702222756795e-07, + "loss": 0.307, + "step": 10355, + "success_rate.epoch.env.abd": 0.9886363636363636, + "success_rate.epoch.env.agentgym:alfworld": 0.9038461538461539, + "success_rate.epoch.env.agentgym:sciworld": 0.9818181818181818, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9479166666666666, + "success_rate.epoch.env.logic": 0.9080882352941176, + "success_rate.epoch.env.math": 0.9761904761904762, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.8318002628120894, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8801048621019265, + "success_rate.epoch.global": 0.9012987012987013, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9983407079646017, + "tokens_p.mean_in_band": 0.8466796875, + "tokens_rate.above_band": 0.9964726631393298, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.003527336860670194 + }, + { + "epoch": 2.2070728589688966, + "grad_norm": 65.40048558684627, + "learning_rate": 3.546683777720518e-07, + "loss": 0.2953, + "step": 10360, + "success_rate.epoch.env.abd": 0.9887640449438202, + "success_rate.epoch.env.agentgym:alfworld": 0.9038461538461539, + "success_rate.epoch.env.agentgym:sciworld": 0.9818181818181818, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9479166666666666, + "success_rate.epoch.env.logic": 0.9087591240875912, + "success_rate.epoch.env.math": 0.97632058287796, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.8326797385620915, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8802692396054188, + "success_rate.epoch.global": 0.9018087855297158, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.995049504950495, + "tokens_p.mean_in_band": 0.78515625, + "tokens_rate.above_band": 0.9805825242718447, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.019417475728155338 + }, + { + "epoch": 2.208138048572646, + "grad_norm": 348.89387099802366, + "learning_rate": 3.546345366729566e-07, + "loss": 0.5394, + "step": 10365, + "success_rate.epoch.env.abd": 0.9887640449438202, + "success_rate.epoch.env.agentgym:alfworld": 0.9038461538461539, + "success_rate.epoch.env.agentgym:sciworld": 0.9818181818181818, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9484536082474226, + "success_rate.epoch.env.logic": 0.9090909090909091, + "success_rate.epoch.env.math": 0.9764065335753176, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.8326848249027238, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8803564908437884, + "success_rate.epoch.global": 0.9017994858611825, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env_macro_mean": 0.9583333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9956597222222222, + "tokens_p.mean_in_band": 0.7035590277777778, + "tokens_rate.above_band": 0.9411764705882353, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.058823529411764705 + }, + { + "epoch": 2.2092032381763955, + "grad_norm": 59.11365061195948, + "learning_rate": 3.546006994841725e-07, + "loss": 0.1907, + "step": 10370, + "success_rate.epoch.env.abd": 0.9888888888888889, + "success_rate.epoch.env.agentgym:alfworld": 0.9056603773584906, + "success_rate.epoch.env.agentgym:sciworld": 0.9818181818181818, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9489795918367347, + "success_rate.epoch.env.logic": 0.9094202898550725, + "success_rate.epoch.env.math": 0.9765342960288809, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.8320413436692506, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8805636465738764, + "success_rate.epoch.global": 0.9017902813299232, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9444444444444443, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9988154332129964, + "tokens_p.mean_below_band": 1.525040715932846e-08, + "tokens_p.mean_in_band": 0.89453125, + "tokens_rate.above_band": 0.9964028776978417, + "tokens_rate.below_band": 0.0017985611510791368, + "tokens_rate.in_band": 0.0017985611510791368 + }, + { + "epoch": 2.2102684277801448, + "grad_norm": 80.78888263581783, + "learning_rate": 3.5456686623035987e-07, + "loss": 0.4095, + "step": 10375, + "success_rate.epoch.env.abd": 0.9888888888888889, + "success_rate.epoch.env.agentgym:alfworld": 0.9056603773584906, + "success_rate.epoch.env.agentgym:sciworld": 0.9818181818181818, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9494949494949495, + "success_rate.epoch.env.logic": 0.9097472924187726, + "success_rate.epoch.env.math": 0.9765765765765766, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.8322663252240717, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8806645213306431, + "success_rate.epoch.global": 0.9017811704834605, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8571428571428571, + "success_rate.window.env_macro_mean": 0.9642857142857143, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9942239336492891, + "tokens_p.mean_in_band": 0.63232421875, + "tokens_rate.above_band": 0.9634703196347032, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0365296803652968 + }, + { + "epoch": 2.2113336173838944, + "grad_norm": 2.2558260457402066, + "learning_rate": 3.54533036936176e-07, + "loss": 0.2063, + "step": 10380, + "success_rate.epoch.env.abd": 0.9888888888888889, + "success_rate.epoch.env.agentgym:alfworld": 0.9056603773584906, + "success_rate.epoch.env.agentgym:sciworld": 0.9818181818181818, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9411764705882353, + "success_rate.epoch.env.logic": 0.910394265232975, + "success_rate.epoch.env.math": 0.9767025089605734, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.8326947637292464, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8800175090394302, + "success_rate.epoch.global": 0.9017721518987342, + "success_rate.window.env.ded": 0.6666666666666666, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9933997844827587, + "tokens_p.mean_in_band": 0.58, + "tokens_rate.above_band": 0.8743718592964824, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.12562814070351758 + }, + { + "epoch": 2.2123988069876437, + "grad_norm": 0.0, + "learning_rate": 3.5449921162627557e-07, + "loss": 0.1028, + "step": 10385, + "success_rate.epoch.env.abd": 0.9891304347826086, + "success_rate.epoch.env.agentgym:alfworld": 0.9056603773584906, + "success_rate.epoch.env.agentgym:sciworld": 0.9821428571428571, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9411764705882353, + "success_rate.epoch.env.logic": 0.9113475177304965, + "success_rate.epoch.env.math": 0.9767857142857143, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.8331210191082803, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8802019576230748, + "success_rate.epoch.global": 0.9022670025188917, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9984032846715328, + "tokens_p.mean_in_band": 0.87890625, + "tokens_rate.above_band": 0.9989583333333333, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0010416666666666667 + }, + { + "epoch": 2.2134639965913934, + "grad_norm": 53.84760187355933, + "learning_rate": 3.544653903253102e-07, + "loss": 0.213, + "step": 10390, + "success_rate.epoch.env.abd": 0.989247311827957, + "success_rate.epoch.env.agentgym:alfworld": 0.9056603773584906, + "success_rate.epoch.env.agentgym:sciworld": 0.9821428571428571, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.941747572815534, + "success_rate.epoch.env.logic": 0.9113475177304965, + "success_rate.epoch.env.math": 0.9768683274021353, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.8331226295828066, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8802721578834017, + "success_rate.epoch.global": 0.9022556390977443, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env_macro_mean": 0.9583333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9976415094339622, + "tokens_p.mean_in_band": 0.3755580357142857, + "tokens_rate.above_band": 0.9098712446351931, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.09012875536480687 + }, + { + "epoch": 2.2145291861951426, + "grad_norm": 146.0855247999162, + "learning_rate": 3.5443157305792855e-07, + "loss": 0.5253, + "step": 10395, + "success_rate.epoch.env.abd": 0.989247311827957, + "success_rate.epoch.env.agentgym:alfworld": 0.9074074074074074, + "success_rate.epoch.env.agentgym:sciworld": 0.9821428571428571, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.941747572815534, + "success_rate.epoch.env.logic": 0.9122807017543859, + "success_rate.epoch.env.math": 0.9769911504424779, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.8337531486146096, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8805842993511246, + "success_rate.epoch.global": 0.9027431421446384, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9967592592592592, + "tokens_p.mean_in_band": 0.8671875, + "tokens_rate.above_band": 0.9975369458128078, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0024630541871921183 + }, + { + "epoch": 2.2155943757988923, + "grad_norm": 113.27216231718302, + "learning_rate": 3.543977598487764e-07, + "loss": 0.1942, + "step": 10400, + "success_rate.epoch.env.abd": 0.989247311827957, + "success_rate.epoch.env.agentgym:alfworld": 0.9074074074074074, + "success_rate.epoch.env.agentgym:sciworld": 0.9821428571428571, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9423076923076923, + "success_rate.epoch.env.logic": 0.9122807017543859, + "success_rate.epoch.env.math": 0.9771929824561404, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.8345864661654135, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8807293238108177, + "success_rate.epoch.global": 0.9032258064516129, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9924202127659575, + "tokens_p.mean_in_band": 0.7137276785714286, + "tokens_rate.above_band": 0.9710743801652892, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.028925619834710745 + }, + { + "epoch": 2.2166595654026415, + "grad_norm": 68.29868150681403, + "learning_rate": 3.543639507224967e-07, + "loss": 0.2539, + "step": 10405, + "success_rate.epoch.env.abd": 0.9893617021276596, + "success_rate.epoch.env.agentgym:alfworld": 0.9074074074074074, + "success_rate.epoch.env.agentgym:sciworld": 0.9824561403508771, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9433962264150944, + "success_rate.epoch.env.logic": 0.9125874125874126, + "success_rate.epoch.env.math": 0.9773123909249564, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.83375, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8808298566064136, + "success_rate.epoch.global": 0.9032098765432098, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9980458311063602, + "tokens_p.mean_in_band": 0.66015625, + "tokens_rate.above_band": 0.9978666666666667, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0021333333333333334 + }, + { + "epoch": 2.217724755006391, + "grad_norm": 69.45311921574765, + "learning_rate": 3.5433014570372913e-07, + "loss": 0.208, + "step": 10410, + "success_rate.epoch.env.abd": 0.9893617021276596, + "success_rate.epoch.env.agentgym:alfworld": 0.9074074074074074, + "success_rate.epoch.env.agentgym:sciworld": 0.9824561403508771, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9433962264150944, + "success_rate.epoch.env.logic": 0.9131944444444444, + "success_rate.epoch.env.math": 0.9773913043478261, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.8337468982630273, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8808919332921344, + "success_rate.epoch.global": 0.9031941031941032, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env_macro_mean": 0.9444444444444445, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9966397849462365, + "tokens_p.mean_in_band": 0.76875, + "tokens_rate.above_band": 0.9823943661971831, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.017605633802816902 + }, + { + "epoch": 2.2187899446101405, + "grad_norm": 163.18710301265094, + "learning_rate": 3.5429634481711073e-07, + "loss": 0.2559, + "step": 10415, + "success_rate.epoch.env.abd": 0.9894736842105263, + "success_rate.epoch.env.agentgym:alfworld": 0.9074074074074074, + "success_rate.epoch.env.agentgym:sciworld": 0.9824561403508771, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9433962264150944, + "success_rate.epoch.env.logic": 0.9134948096885813, + "success_rate.epoch.env.math": 0.9775474956822107, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.8320987654320988, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8807937883676309, + "success_rate.epoch.global": 0.9026894865525672, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9931469298245614, + "tokens_p.mean_in_band": 0.5030048076923077, + "tokens_rate.above_band": 0.8976377952755905, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.10236220472440945 + }, + { + "epoch": 2.21985513421389, + "grad_norm": 102.65183870658844, + "learning_rate": 3.5426254808727513e-07, + "loss": 0.1708, + "step": 10420, + "success_rate.epoch.env.abd": 0.9894736842105263, + "success_rate.epoch.env.agentgym:alfworld": 0.9074074074074074, + "success_rate.epoch.env.agentgym:sciworld": 0.9824561403508771, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9433962264150944, + "success_rate.epoch.env.logic": 0.911864406779661, + "success_rate.epoch.env.math": 0.9775862068965517, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.8314883148831488, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8805935936182191, + "success_rate.epoch.global": 0.9021897810218978, + "success_rate.window.env.logic": 0.8333333333333334, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.8333333333333334, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9975436681222707, + "tokens_p.mean_in_band": 0.6029094827586207, + "tokens_rate.above_band": 0.9594972067039106, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.040502793296089384 + }, + { + "epoch": 2.2209203238176394, + "grad_norm": 29.948138320034516, + "learning_rate": 3.542287555388533e-07, + "loss": 0.2419, + "step": 10425, + "success_rate.epoch.env.abd": 0.9894736842105263, + "success_rate.epoch.env.agentgym:alfworld": 0.9090909090909091, + "success_rate.epoch.env.agentgym:sciworld": 0.9824561403508771, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9433962264150944, + "success_rate.epoch.env.logic": 0.912751677852349, + "success_rate.epoch.env.math": 0.9777777777777777, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.8316953316953317, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8808635354772734, + "success_rate.epoch.global": 0.9026634382566586, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9986595174262735, + "tokens_p.mean_in_band": 0.68359375, + "tokens_rate.above_band": 0.9946666666666667, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.005333333333333333 + }, + { + "epoch": 2.221985513421389, + "grad_norm": 43.04740596362637, + "learning_rate": 3.54194967196473e-07, + "loss": 0.2223, + "step": 10430, + "success_rate.epoch.env.abd": 0.9894736842105263, + "success_rate.epoch.env.agentgym:alfworld": 0.9090909090909091, + "success_rate.epoch.env.agentgym:sciworld": 0.9824561403508771, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9433962264150944, + "success_rate.epoch.env.logic": 0.912751677852349, + "success_rate.epoch.env.math": 0.9779286926994907, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.8329268292682926, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8809892093404256, + "success_rate.epoch.global": 0.9031325301204819, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9923295454545454, + "tokens_p.mean_in_band": 0.7208806818181818, + "tokens_rate.above_band": 0.9090909090909091, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.09090909090909091 + }, + { + "epoch": 2.2230507030251383, + "grad_norm": 175.9974499026715, + "learning_rate": 3.541611830847588e-07, + "loss": 0.182, + "step": 10435, + "success_rate.epoch.env.abd": 0.9894736842105263, + "success_rate.epoch.env.agentgym:alfworld": 0.9090909090909091, + "success_rate.epoch.env.agentgym:sciworld": 0.9827586206896551, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9433962264150944, + "success_rate.epoch.env.logic": 0.9130434782608695, + "success_rate.epoch.env.math": 0.9779661016949153, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.8343409915356711, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8811751958867986, + "success_rate.epoch.global": 0.9035971223021583, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9954545454545455, + "tokens_p.mean_in_band": 0.703125, + "tokens_rate.above_band": 0.9763313609467456, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.023668639053254437 + }, + { + "epoch": 2.224115892628888, + "grad_norm": 175.09294908698772, + "learning_rate": 3.5412740322833246e-07, + "loss": 0.2044, + "step": 10440, + "success_rate.epoch.env.abd": 0.9894736842105263, + "success_rate.epoch.env.agentgym:alfworld": 0.9090909090909091, + "success_rate.epoch.env.agentgym:sciworld": 0.9827586206896551, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9433962264150944, + "success_rate.epoch.env.logic": 0.9141914191419142, + "success_rate.epoch.env.math": 0.9780775716694773, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.8349397590361446, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8813441211918968, + "success_rate.epoch.global": 0.9040572792362769, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9982798165137615, + "tokens_p.mean_in_band": 0.80859375, + "tokens_rate.above_band": 0.9732142857142857, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.026785714285714284 + }, + { + "epoch": 2.225181082232637, + "grad_norm": 139.77944157282403, + "learning_rate": 3.540936276518125e-07, + "loss": 0.1099, + "step": 10445, + "success_rate.epoch.env.abd": 0.9894736842105263, + "success_rate.epoch.env.agentgym:alfworld": 0.9090909090909091, + "success_rate.epoch.env.agentgym:sciworld": 0.9827586206896551, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9433962264150944, + "success_rate.epoch.env.logic": 0.9144736842105263, + "success_rate.epoch.env.math": 0.9781879194630873, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.8349282296650717, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8813787651456376, + "success_rate.epoch.global": 0.9040380047505938, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env_macro_mean": 0.9444444444444445, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9916460396039604, + "tokens_p.mean_in_band": 0.7787642045454546, + "tokens_rate.above_band": 0.9017857142857143, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.09821428571428571 + }, + { + "epoch": 2.226246271836387, + "grad_norm": 95.48067858615772, + "learning_rate": 3.5405985637981417e-07, + "loss": 0.4742, + "step": 10450, + "success_rate.epoch.env.abd": 0.9896907216494846, + "success_rate.epoch.env.agentgym:alfworld": 0.9090909090909091, + "success_rate.epoch.env.agentgym:sciworld": 0.9827586206896551, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9433962264150944, + "success_rate.epoch.env.logic": 0.9147540983606557, + "success_rate.epoch.env.math": 0.9782608695652174, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.835909631391201, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8815198381835782, + "success_rate.epoch.global": 0.9044917257683215, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9964028776978417, + "tokens_p.mean_in_band": 0.7825520833333334, + "tokens_rate.above_band": 0.9788732394366197, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02112676056338028 + }, + { + "epoch": 2.227311461440136, + "grad_norm": 235.28559288404165, + "learning_rate": 3.540260894369499e-07, + "loss": 0.3454, + "step": 10455, + "success_rate.epoch.env.abd": 0.9896907216494846, + "success_rate.epoch.env.agentgym:alfworld": 0.8928571428571429, + "success_rate.epoch.env.agentgym:sciworld": 0.9830508474576272, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9439252336448598, + "success_rate.epoch.env.logic": 0.9147540983606557, + "success_rate.epoch.env.math": 0.9782971619365609, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.8358913813459268, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8801203391008546, + "success_rate.epoch.global": 0.904, + "success_rate.window.env.agentgym:alfworld": 0.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env_macro_mean": 0.7666666666666667, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9974760892667375, + "tokens_p.mean_in_band": 0.400390625, + "tokens_rate.above_band": 0.9957671957671957, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.004232804232804233 + }, + { + "epoch": 2.228376651043886, + "grad_norm": 160.6381780763142, + "learning_rate": 3.5399232684782866e-07, + "loss": 0.23, + "step": 10460, + "success_rate.epoch.env.abd": 0.9897959183673469, + "success_rate.epoch.env.agentgym:alfworld": 0.8928571428571429, + "success_rate.epoch.env.agentgym:sciworld": 0.9838709677419355, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9439252336448598, + "success_rate.epoch.env.logic": 0.9153094462540716, + "success_rate.epoch.env.math": 0.9783333333333334, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.8364705882352941, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8803108884810116, + "success_rate.epoch.global": 0.9044496487119438, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9973723723723724, + "tokens_p.mean_in_band": 0.8645833333333334, + "tokens_rate.above_band": 0.9910714285714286, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.008928571428571428 + }, + { + "epoch": 2.2294418406476355, + "grad_norm": 59.72525531913584, + "learning_rate": 3.5395856863705647e-07, + "loss": 0.1808, + "step": 10465, + "success_rate.epoch.env.abd": 0.9897959183673469, + "success_rate.epoch.env.agentgym:alfworld": 0.8928571428571429, + "success_rate.epoch.env.agentgym:sciworld": 0.9838709677419355, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9439252336448598, + "success_rate.epoch.env.logic": 0.9158576051779935, + "success_rate.epoch.env.math": 0.978405315614618, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.8364485981308412, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8803652658538074, + "success_rate.epoch.global": 0.9044289044289044, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env_macro_mean": 0.9444444444444445, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9911764705882353, + "tokens_p.mean_below_band": 5.617039278149605e-09, + "tokens_p.mean_in_band": 0.85546875, + "tokens_rate.above_band": 0.9826589595375722, + "tokens_rate.below_band": 0.005780346820809248, + "tokens_rate.in_band": 0.011560693641618497 + }, + { + "epoch": 2.2305070302513847, + "grad_norm": 82.93498714340033, + "learning_rate": 3.5392481482923607e-07, + "loss": 0.1552, + "step": 10470, + "success_rate.epoch.env.abd": 0.98989898989899, + "success_rate.epoch.env.agentgym:alfworld": 0.8928571428571429, + "success_rate.epoch.env.agentgym:sciworld": 0.9838709677419355, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9439252336448598, + "success_rate.epoch.env.logic": 0.9161290322580645, + "success_rate.epoch.env.math": 0.9785123966942149, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.8372093023255814, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8804782007525392, + "success_rate.epoch.global": 0.9048723897911833, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9990717821782178, + "tokens_p.mean_in_band": 0.673828125, + "tokens_rate.above_band": 0.9921414538310412, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.007858546168958742 + }, + { + "epoch": 2.231572219855134, + "grad_norm": 26.753714481629693, + "learning_rate": 3.538910654489669e-07, + "loss": 0.1306, + "step": 10475, + "success_rate.epoch.env.abd": 0.98989898989899, + "success_rate.epoch.env.agentgym:alfworld": 0.8928571428571429, + "success_rate.epoch.env.agentgym:sciworld": 0.9841269841269841, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9444444444444444, + "success_rate.epoch.env.logic": 0.9166666666666666, + "success_rate.epoch.env.math": 0.9785123966942149, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.838150289017341, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.880683096051175, + "success_rate.epoch.global": 0.9053117782909931, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9982352941176471, + "tokens_p.mean_in_band": 0.8177083333333334, + "tokens_rate.above_band": 0.9929906542056075, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.007009345794392523 + }, + { + "epoch": 2.2326374094588837, + "grad_norm": 2825.53851744974, + "learning_rate": 3.5385732052084536e-07, + "loss": 0.5081, + "step": 10480, + "success_rate.epoch.env.abd": 0.98989898989899, + "success_rate.epoch.env.agentgym:alfworld": 0.8928571428571429, + "success_rate.epoch.env.agentgym:sciworld": 0.9841269841269841, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9454545454545454, + "success_rate.epoch.env.logic": 0.9171974522292994, + "success_rate.epoch.env.math": 0.9785478547854786, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.8390804597701149, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8809109610890634, + "success_rate.epoch.global": 0.9057471264367816, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9994201030927835, + "tokens_p.mean_in_band": 0.845703125, + "tokens_rate.above_band": 0.9918200408997955, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0081799591002045 + }, + { + "epoch": 2.2337025990626334, + "grad_norm": 49.641437949288076, + "learning_rate": 3.538235800694645e-07, + "loss": 0.1971, + "step": 10485, + "success_rate.epoch.env.abd": 0.98989898989899, + "success_rate.epoch.env.agentgym:alfworld": 0.8928571428571429, + "success_rate.epoch.env.agentgym:sciworld": 0.984375, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9459459459459459, + "success_rate.epoch.env.logic": 0.9174603174603174, + "success_rate.epoch.env.math": 0.9770491803278688, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.8396334478808706, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8809161061116616, + "success_rate.epoch.global": 0.905720823798627, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.75, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.993629476584022, + "tokens_p.mean_in_band": 0.56988525390625, + "tokens_rate.above_band": 0.989100817438692, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.010899182561307902 + }, + { + "epoch": 2.2347677886663826, + "grad_norm": 51.797906689952775, + "learning_rate": 3.537898441194141e-07, + "loss": 0.2071, + "step": 10490, + "success_rate.epoch.env.abd": 0.98989898989899, + "success_rate.epoch.env.agentgym:alfworld": 0.8928571428571429, + "success_rate.epoch.env.agentgym:sciworld": 0.984375, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9459459459459459, + "success_rate.epoch.env.logic": 0.9182389937106918, + "success_rate.epoch.env.math": 0.9771986970684039, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.8401826484018264, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8810504146127404, + "success_rate.epoch.global": 0.9061503416856492, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9901685393258427, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 2.2358329782701323, + "grad_norm": 122.66354994853877, + "learning_rate": 3.5375611269528063e-07, + "loss": 0.3037, + "step": 10495, + "success_rate.epoch.env.abd": 0.98989898989899, + "success_rate.epoch.env.agentgym:alfworld": 0.8771929824561403, + "success_rate.epoch.env.agentgym:sciworld": 0.984375, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9459459459459459, + "success_rate.epoch.env.logic": 0.91875, + "success_rate.epoch.env.math": 0.9773095623987034, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.8397727272727272, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8796456682572408, + "success_rate.epoch.global": 0.9056689342403628, + "success_rate.window.env.agentgym:alfworld": 0.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.6875, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.99755859375, + "tokens_p.mean_in_band": 0.6898871527777778, + "tokens_rate.above_band": 0.9726443768996961, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02735562310030395 + }, + { + "epoch": 2.2368981678738815, + "grad_norm": 147.29677746440314, + "learning_rate": 3.5372238582164736e-07, + "loss": 0.6009, + "step": 10500, + "success_rate.epoch.env.abd": 0.98989898989899, + "success_rate.epoch.env.agentgym:alfworld": 0.8771929824561403, + "success_rate.epoch.env.agentgym:sciworld": 0.984375, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9380530973451328, + "success_rate.epoch.env.logic": 0.9195046439628483, + "success_rate.epoch.env.math": 0.9773828756058158, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.8391845979614949, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8789499390988695, + "success_rate.epoch.global": 0.9051918735891648, + "success_rate.window.env.ded": 0.5, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.7916666666666666, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9930827460193161, + "tokens_p.mean_in_band": 0.5300830696202532, + "tokens_rate.above_band": 0.9065309985802177, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0934690014197823 + }, + { + "epoch": 2.237963357477631, + "grad_norm": 145.82931575421242, + "learning_rate": 3.5368866352309426e-07, + "loss": 0.2903, + "step": 10505, + "success_rate.epoch.env.abd": 0.98989898989899, + "success_rate.epoch.env.agentgym:alfworld": 0.8793103448275862, + "success_rate.epoch.env.agentgym:sciworld": 0.984375, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9385964912280702, + "success_rate.epoch.env.logic": 0.9166666666666666, + "success_rate.epoch.env.math": 0.9774557165861514, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.8400900900900901, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8790227674686086, + "success_rate.epoch.global": 0.9051685393258427, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.99902950310559, + "tokens_p.mean_in_band": 0.7232142857142857, + "tokens_rate.above_band": 0.9857142857142858, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.014285714285714285 + }, + { + "epoch": 2.2390285470813804, + "grad_norm": 45.73652035213183, + "learning_rate": 3.5365494582419777e-07, + "loss": 0.1962, + "step": 10510, + "success_rate.epoch.env.abd": 0.9900990099009901, + "success_rate.epoch.env.agentgym:alfworld": 0.8793103448275862, + "success_rate.epoch.env.agentgym:sciworld": 0.9846153846153847, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9391304347826087, + "success_rate.epoch.env.logic": 0.916923076923077, + "success_rate.epoch.env.math": 0.9775280898876404, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.8404494382022472, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8791739019996979, + "success_rate.epoch.global": 0.905592841163311, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9992647058823529, + "tokens_p.mean_in_band": 0.7395833333333334, + "tokens_rate.above_band": 0.9912536443148688, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.008746355685131196 + }, + { + "epoch": 2.24009373668513, + "grad_norm": 152.56967823307238, + "learning_rate": 3.536212327495312e-07, + "loss": 0.2413, + "step": 10515, + "success_rate.epoch.env.abd": 0.9902912621359223, + "success_rate.epoch.env.agentgym:alfworld": 0.8793103448275862, + "success_rate.epoch.env.agentgym:sciworld": 0.9846153846153847, + "success_rate.epoch.env.agentgym:textcraft": 1.0, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9391304347826087, + "success_rate.epoch.env.logic": 0.9174311926605505, + "success_rate.epoch.env.math": 0.9776, + "success_rate.epoch.env.sat": 0.13333333333333333, + "success_rate.epoch.env.science": 0.8409854423292273, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8784270358804194, + "success_rate.epoch.global": 0.9055679287305123, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9934640522875817, + "tokens_p.mean_in_band": 0.6307444852941176, + "tokens_rate.above_band": 0.9, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.1 + }, + { + "epoch": 2.2411589262888794, + "grad_norm": 57.488395828120865, + "learning_rate": 3.5358752432366436e-07, + "loss": 0.401, + "step": 10520, + "success_rate.epoch.env.abd": 0.9902912621359223, + "success_rate.epoch.env.agentgym:alfworld": 0.8813559322033898, + "success_rate.epoch.env.agentgym:sciworld": 0.9846153846153847, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9391304347826087, + "success_rate.epoch.env.logic": 0.9179331306990881, + "success_rate.epoch.env.math": 0.9777424483306836, + "success_rate.epoch.env.sat": 0.13333333333333333, + "success_rate.epoch.env.science": 0.841340782122905, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8741584280203014, + "success_rate.epoch.global": 0.9055432372505543, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:textcraft": 0.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9967938813349815, + "tokens_p.mean_in_band": 0.6931423611111112, + "tokens_rate.above_band": 0.9677033492822966, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03229665071770335 + }, + { + "epoch": 2.242224115892629, + "grad_norm": 101.06287248325886, + "learning_rate": 3.5355382057116366e-07, + "loss": 0.3001, + "step": 10525, + "success_rate.epoch.env.abd": 0.9902912621359223, + "success_rate.epoch.env.agentgym:alfworld": 0.8813559322033898, + "success_rate.epoch.env.agentgym:sciworld": 0.9846153846153847, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9391304347826087, + "success_rate.epoch.env.logic": 0.9179331306990881, + "success_rate.epoch.env.math": 0.9779527559055118, + "success_rate.epoch.env.sat": 0.13333333333333333, + "success_rate.epoch.env.science": 0.8398220244716351, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.874039478013352, + "success_rate.epoch.global": 0.9050772626931567, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.75, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9938271604938271, + "tokens_p.mean_in_band": 0.5947916666666667, + "tokens_rate.above_band": 0.84375, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.15625 + }, + { + "epoch": 2.2432893054963783, + "grad_norm": 113.62139919590547, + "learning_rate": 3.535201215165923e-07, + "loss": 0.2201, + "step": 10530, + "success_rate.epoch.env.abd": 0.9902912621359223, + "success_rate.epoch.env.agentgym:alfworld": 0.8833333333333333, + "success_rate.epoch.env.agentgym:sciworld": 0.9846153846153847, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9391304347826087, + "success_rate.epoch.env.logic": 0.9181818181818182, + "success_rate.epoch.env.math": 0.978021978021978, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.8396017699115044, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8734705437256863, + "success_rate.epoch.global": 0.9046153846153846, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.76, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9962053571428572, + "tokens_p.mean_in_band": 0.595108695652174, + "tokens_rate.above_band": 0.9480812641083521, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.05191873589164785 + }, + { + "epoch": 2.244354495100128, + "grad_norm": 36.16810147761408, + "learning_rate": 3.5348642718450975e-07, + "loss": 0.3298, + "step": 10535, + "success_rate.epoch.env.abd": 0.9902912621359223, + "success_rate.epoch.env.agentgym:alfworld": 0.8852459016393442, + "success_rate.epoch.env.agentgym:sciworld": 0.9846153846153847, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9396551724137931, + "success_rate.epoch.env.logic": 0.918429003021148, + "success_rate.epoch.env.math": 0.9781931464174455, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.8399558498896247, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8737623381938783, + "success_rate.epoch.global": 0.9050328227571116, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9967320261437909, + "tokens_p.mean_in_band": 0.837890625, + "tokens_rate.above_band": 0.9956616052060737, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.004338394793926247 + }, + { + "epoch": 2.245419684703877, + "grad_norm": 400.61767649684, + "learning_rate": 3.534527375994723e-07, + "loss": 0.2668, + "step": 10540, + "success_rate.epoch.env.abd": 0.9902912621359223, + "success_rate.epoch.env.agentgym:alfworld": 0.8870967741935484, + "success_rate.epoch.env.agentgym:sciworld": 0.9846153846153847, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9401709401709402, + "success_rate.epoch.env.logic": 0.9159159159159159, + "success_rate.epoch.env.math": 0.9782945736434109, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.8393839383938394, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8737062535517238, + "success_rate.epoch.global": 0.9045751633986928, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.8333333333333333, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9984098451327433, + "tokens_p.mean_in_band": 0.5321180555555556, + "tokens_rate.above_band": 0.9436325678496869, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.05636743215031315 + }, + { + "epoch": 2.246484874307627, + "grad_norm": 344.50962458125224, + "learning_rate": 3.5341905278603255e-07, + "loss": 0.3096, + "step": 10545, + "success_rate.epoch.env.abd": 0.9902912621359223, + "success_rate.epoch.env.agentgym:alfworld": 0.8888888888888888, + "success_rate.epoch.env.agentgym:sciworld": 0.9846153846153847, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9401709401709402, + "success_rate.epoch.env.logic": 0.9166666666666666, + "success_rate.epoch.env.math": 0.9768518518518519, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.8399122807017544, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8738542977301281, + "success_rate.epoch.global": 0.9045553145336226, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.6666666666666666, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9973072562358276, + "tokens_p.mean_in_band": 0.584375, + "tokens_rate.above_band": 0.9887892376681614, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.011210762331838564 + }, + { + "epoch": 2.247550063911376, + "grad_norm": 79.51576849188532, + "learning_rate": 3.533853727687399e-07, + "loss": 0.2618, + "step": 10550, + "success_rate.epoch.env.abd": 0.9902912621359223, + "success_rate.epoch.env.agentgym:alfworld": 0.8888888888888888, + "success_rate.epoch.env.agentgym:sciworld": 0.9846153846153847, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.940677966101695, + "success_rate.epoch.env.logic": 0.9142011834319527, + "success_rate.epoch.env.math": 0.9769585253456221, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.8404371584699454, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8730652207102673, + "success_rate.epoch.global": 0.9041036717062635, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.7, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9995993589743589, + "tokens_p.mean_in_band": 0.5754743303571429, + "tokens_rate.above_band": 0.9653465346534653, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.034653465346534656 + }, + { + "epoch": 2.248615253515126, + "grad_norm": 269.0632414404321, + "learning_rate": 3.5335169757214004e-07, + "loss": 0.2153, + "step": 10555, + "success_rate.epoch.env.abd": 0.9903846153846154, + "success_rate.epoch.env.agentgym:alfworld": 0.8888888888888888, + "success_rate.epoch.env.agentgym:sciworld": 0.9846153846153847, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.940677966101695, + "success_rate.epoch.env.logic": 0.9147058823529411, + "success_rate.epoch.env.math": 0.9770992366412213, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.840958605664488, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8731797853157058, + "success_rate.epoch.global": 0.9045161290322581, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9949596774193549, + "tokens_p.mean_in_band": 0.8095703125, + "tokens_rate.above_band": 0.96875, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03125 + }, + { + "epoch": 2.249680443118875, + "grad_norm": 247.01504608607294, + "learning_rate": 3.533180272207752e-07, + "loss": 0.2698, + "step": 10560, + "success_rate.epoch.env.abd": 0.9903846153846154, + "success_rate.epoch.env.agentgym:alfworld": 0.8888888888888888, + "success_rate.epoch.env.agentgym:sciworld": 0.9846153846153847, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9327731092436975, + "success_rate.epoch.env.logic": 0.9127906976744186, + "success_rate.epoch.env.math": 0.9772382397572079, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.8400435255712732, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8722165018144559, + "success_rate.epoch.global": 0.9036402569593148, + "success_rate.window.env.ded": 0.0, + "success_rate.window.env.logic": 0.75, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.0, + "success_rate.window.env_macro_mean": 0.4375, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.983567950889077, + "tokens_p.mean_below_band": 3.2782554626464844e-07, + "tokens_p.mean_in_band": 0.4833713503649635, + "tokens_rate.above_band": 0.6322269807280514, + "tokens_rate.below_band": 0.0010706638115631692, + "tokens_rate.in_band": 0.36670235546038543 + }, + { + "epoch": 2.2507456327226247, + "grad_norm": 228.30694282313985, + "learning_rate": 3.5328436173918415e-07, + "loss": 0.302, + "step": 10565, + "success_rate.epoch.env.abd": 0.9904761904761905, + "success_rate.epoch.env.agentgym:alfworld": 0.8888888888888888, + "success_rate.epoch.env.agentgym:sciworld": 0.9846153846153847, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9327731092436975, + "success_rate.epoch.env.logic": 0.9132947976878613, + "success_rate.epoch.env.math": 0.9773755656108597, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.8405639913232104, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8723304533336019, + "success_rate.epoch.global": 0.9040511727078892, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9979910714285715, + "tokens_p.mean_in_band": 0.7712053571428571, + "tokens_rate.above_band": 0.9523809523809523, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.047619047619047616 + }, + { + "epoch": 2.251810822326374, + "grad_norm": 195.40166225314178, + "learning_rate": 3.53250701151902e-07, + "loss": 0.2517, + "step": 10570, + "success_rate.epoch.env.abd": 0.9904761904761905, + "success_rate.epoch.env.agentgym:alfworld": 0.8888888888888888, + "success_rate.epoch.env.agentgym:sciworld": 0.9846153846153847, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9327731092436975, + "success_rate.epoch.env.logic": 0.9137931034482759, + "success_rate.epoch.env.math": 0.9774096385542169, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.8405172413793104, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8723746014026812, + "success_rate.epoch.global": 0.9039932030586236, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env_macro_mean": 0.9444444444444445, + "success_rate.window.global": 0.8888888888888888, + "tokens_p.mean_above_band": 0.9929288321167883, + "tokens_p.mean_in_band": 0.71630859375, + "tokens_rate.above_band": 0.9448275862068966, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.05517241379310345 + }, + { + "epoch": 2.2528760119301237, + "grad_norm": 279.78370992556984, + "learning_rate": 3.5321704548346026e-07, + "loss": 0.34, + "step": 10575, + "success_rate.epoch.env.abd": 0.9904761904761905, + "success_rate.epoch.env.agentgym:alfworld": 0.890625, + "success_rate.epoch.env.agentgym:sciworld": 0.9846153846153847, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9327731092436975, + "success_rate.epoch.env.logic": 0.9140401146131805, + "success_rate.epoch.env.math": 0.9775449101796407, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.8401287553648069, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8725318657560392, + "success_rate.epoch.global": 0.9039763113367174, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9375, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9952590811965812, + "tokens_p.mean_in_band": 0.4995888157894737, + "tokens_rate.above_band": 0.9609856262833676, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.039014373716632446 + }, + { + "epoch": 2.253941201533873, + "grad_norm": 175.15692383435746, + "learning_rate": 3.531833947583871e-07, + "loss": 0.1896, + "step": 10580, + "success_rate.epoch.env.abd": 0.9905660377358491, + "success_rate.epoch.env.agentgym:alfworld": 0.890625, + "success_rate.epoch.env.agentgym:sciworld": 0.9846153846153847, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9333333333333333, + "success_rate.epoch.env.logic": 0.9150141643059491, + "success_rate.epoch.env.math": 0.9776119402985075, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.8404710920770878, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8727167282899674, + "success_rate.epoch.global": 0.9043807919123842, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9998421717171717, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 2.2550063911376226, + "grad_norm": 50.35143934821158, + "learning_rate": 3.5314974900120686e-07, + "loss": 0.1891, + "step": 10585, + "success_rate.epoch.env.abd": 0.9905660377358491, + "success_rate.epoch.env.agentgym:alfworld": 0.8923076923076924, + "success_rate.epoch.env.agentgym:sciworld": 0.9848484848484849, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9333333333333333, + "success_rate.epoch.env.logic": 0.9152542372881356, + "success_rate.epoch.env.math": 0.9776785714285714, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.8413205537806177, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8729959972314739, + "success_rate.epoch.global": 0.9047818791946308, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9974941037735849, + "tokens_p.mean_in_band": 0.796875, + "tokens_rate.above_band": 0.9883449883449883, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.011655011655011656 + }, + { + "epoch": 2.256071580741372, + "grad_norm": 152.1123489151362, + "learning_rate": 3.531161082364403e-07, + "loss": 0.2123, + "step": 10590, + "success_rate.epoch.env.abd": 0.9905660377358491, + "success_rate.epoch.env.agentgym:alfworld": 0.8939393939393939, + "success_rate.epoch.env.agentgym:sciworld": 0.9852941176470589, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9333333333333333, + "success_rate.epoch.env.logic": 0.9129213483146067, + "success_rate.epoch.env.math": 0.9777777777777777, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.8416578108395324, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8730124434919165, + "success_rate.epoch.global": 0.9047619047619048, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9981496710526315, + "tokens_p.mean_in_band": 0.49107142857142855, + "tokens_rate.above_band": 0.9848812095032398, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.01511879049676026 + }, + { + "epoch": 2.2571367703451215, + "grad_norm": 115.21188902946518, + "learning_rate": 3.5308247248860455e-07, + "loss": 0.2604, + "step": 10595, + "success_rate.epoch.env.abd": 0.9905660377358491, + "success_rate.epoch.env.agentgym:alfworld": 0.8955223880597015, + "success_rate.epoch.env.agentgym:sciworld": 0.9852941176470589, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9333333333333333, + "success_rate.epoch.env.logic": 0.9131652661064426, + "success_rate.epoch.env.math": 0.9779735682819384, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.8419936373276776, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8732268552105027, + "success_rate.epoch.global": 0.9051580698835274, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9969536163522013, + "tokens_p.mean_in_band": 0.712109375, + "tokens_rate.above_band": 0.9845201238390093, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.015479876160990712 + }, + { + "epoch": 2.2582019599488707, + "grad_norm": 354.0929266790693, + "learning_rate": 3.530488417822132e-07, + "loss": 0.3058, + "step": 10600, + "success_rate.epoch.env.abd": 0.9905660377358491, + "success_rate.epoch.env.agentgym:alfworld": 0.8970588235294118, + "success_rate.epoch.env.agentgym:sciworld": 0.9852941176470589, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9333333333333333, + "success_rate.epoch.env.logic": 0.9136490250696379, + "success_rate.epoch.env.math": 0.9781021897810219, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.8424947145877378, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8734677545915982, + "success_rate.epoch.global": 0.9055509527754764, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9985972568578554, + "tokens_p.mean_in_band": 0.703125, + "tokens_rate.above_band": 0.9950372208436724, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.004962779156327543 + }, + { + "epoch": 2.2592671495526204, + "grad_norm": 155.10747268681467, + "learning_rate": 3.53015216141776e-07, + "loss": 0.3618, + "step": 10605, + "success_rate.epoch.env.abd": 0.9907407407407407, + "success_rate.epoch.env.agentgym:alfworld": 0.8970588235294118, + "success_rate.epoch.env.agentgym:sciworld": 0.9710144927536232, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9333333333333333, + "success_rate.epoch.env.logic": 0.9141274238227147, + "success_rate.epoch.env.math": 0.9781976744186046, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.8417721518987342, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8721719726655174, + "success_rate.epoch.global": 0.9051155115511551, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 0.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.7, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.997032122905028, + "tokens_p.mean_in_band": 0.7093098958333334, + "tokens_rate.above_band": 0.9675675675675676, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.032432432432432434 + }, + { + "epoch": 2.2603323391563697, + "grad_norm": 2187.0544159385286, + "learning_rate": 3.5298159559179904e-07, + "loss": 0.2654, + "step": 10610, + "success_rate.epoch.env.abd": 0.9908256880733946, + "success_rate.epoch.env.agentgym:alfworld": 0.8970588235294118, + "success_rate.epoch.env.agentgym:sciworld": 0.9714285714285714, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9333333333333333, + "success_rate.epoch.env.logic": 0.9141274238227147, + "success_rate.epoch.env.math": 0.978386167146974, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.8421052631578947, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8722647572105295, + "success_rate.epoch.global": 0.905505341002465, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9960488505747126, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 2.2613975287601193, + "grad_norm": 59.58398357067685, + "learning_rate": 3.529479801567848e-07, + "loss": 0.3142, + "step": 10615, + "success_rate.epoch.env.abd": 0.990990990990991, + "success_rate.epoch.env.agentgym:alfworld": 0.8970588235294118, + "success_rate.epoch.env.agentgym:sciworld": 0.9714285714285714, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9333333333333333, + "success_rate.epoch.env.logic": 0.914364640883978, + "success_rate.epoch.env.math": 0.9784791965566715, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.8405036726128017, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8721642080144807, + "success_rate.epoch.global": 0.9050736497545008, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.3333333333333333, + "success_rate.window.env_macro_mean": 0.8666666666666666, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9978448275862069, + "tokens_p.mean_in_band": 0.44375, + "tokens_rate.above_band": 0.9530516431924883, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.046948356807511735 + }, + { + "epoch": 2.2624627183638686, + "grad_norm": 24.329539692316768, + "learning_rate": 3.52914369861232e-07, + "loss": 0.2094, + "step": 10620, + "success_rate.epoch.env.abd": 0.9910714285714286, + "success_rate.epoch.env.agentgym:alfworld": 0.8970588235294118, + "success_rate.epoch.env.agentgym:sciworld": 0.971830985915493, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9344262295081968, + "success_rate.epoch.env.logic": 0.9146005509641874, + "success_rate.epoch.env.math": 0.9785100286532952, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.8411703239289446, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8723923118085897, + "success_rate.epoch.global": 0.9054604726976365, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9982416879795396, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 2.2635279079676183, + "grad_norm": 55.45856846103631, + "learning_rate": 3.5288076472963544e-07, + "loss": 0.193, + "step": 10625, + "success_rate.epoch.env.abd": 0.9910714285714286, + "success_rate.epoch.env.agentgym:alfworld": 0.8970588235294118, + "success_rate.epoch.env.agentgym:sciworld": 0.971830985915493, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.936, + "success_rate.epoch.env.logic": 0.9148351648351648, + "success_rate.epoch.env.math": 0.9785714285714285, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.841831425598335, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8726223923495265, + "success_rate.epoch.global": 0.9058441558441559, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9972713097713097, + "tokens_p.mean_in_band": 0.7317708333333334, + "tokens_rate.above_band": 0.993801652892562, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.006198347107438017 + }, + { + "epoch": 2.2645930975713675, + "grad_norm": 230.45932447351632, + "learning_rate": 3.528471647864864e-07, + "loss": 0.2204, + "step": 10630, + "success_rate.epoch.env.abd": 0.9912280701754386, + "success_rate.epoch.env.agentgym:alfworld": 0.8985507246376812, + "success_rate.epoch.env.agentgym:sciworld": 0.971830985915493, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9365079365079365, + "success_rate.epoch.env.logic": 0.915068493150685, + "success_rate.epoch.env.math": 0.9786628733997155, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.8411214953271028, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8727834216306892, + "success_rate.epoch.global": 0.9058205335489087, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9972524650780608, + "tokens_p.mean_in_band": 0.7430555555555556, + "tokens_rate.above_band": 0.9926590538336052, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.00734094616639478 + }, + { + "epoch": 2.265658287175117, + "grad_norm": 62.55307292288869, + "learning_rate": 3.528135700562723e-07, + "loss": 0.2409, + "step": 10635, + "success_rate.epoch.env.abd": 0.991304347826087, + "success_rate.epoch.env.agentgym:alfworld": 0.8985507246376812, + "success_rate.epoch.env.agentgym:sciworld": 0.971830985915493, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9365079365079365, + "success_rate.epoch.env.logic": 0.9155313351498637, + "success_rate.epoch.env.math": 0.9787835926449788, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.8416149068322981, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8728882625761698, + "success_rate.epoch.global": 0.9061996779388084, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9912014563106796, + "tokens_p.mean_in_band": 0.84375, + "tokens_rate.above_band": 0.9903846153846154, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.009615384615384616 + }, + { + "epoch": 2.266723476778867, + "grad_norm": 124.68884782592995, + "learning_rate": 3.5277998056347664e-07, + "loss": 0.217, + "step": 10640, + "success_rate.epoch.env.abd": 0.991304347826087, + "success_rate.epoch.env.agentgym:alfworld": 0.8985507246376812, + "success_rate.epoch.env.agentgym:sciworld": 0.9722222222222222, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.937007874015748, + "success_rate.epoch.env.logic": 0.9159891598915989, + "success_rate.epoch.env.math": 0.9788434414668548, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.8412371134020619, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.872981994753253, + "success_rate.epoch.global": 0.9061748195669607, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9943794964028777, + "tokens_p.mean_in_band": 0.62841796875, + "tokens_rate.above_band": 0.972027972027972, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.027972027972027972 + }, + { + "epoch": 2.267788666382616, + "grad_norm": 198.79627418355625, + "learning_rate": 3.527463963325793e-07, + "loss": 0.2911, + "step": 10645, + "success_rate.epoch.env.abd": 0.991304347826087, + "success_rate.epoch.env.agentgym:alfworld": 0.8985507246376812, + "success_rate.epoch.env.agentgym:sciworld": 0.972972972972973, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.937007874015748, + "success_rate.epoch.env.logic": 0.9139784946236559, + "success_rate.epoch.env.math": 0.9789029535864979, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.8406988694758479, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8728239359965472, + "success_rate.epoch.global": 0.9057507987220448, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 0.6666666666666666, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.8333333333333333, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.994637573964497, + "tokens_p.mean_in_band": 0.552734375, + "tokens_rate.above_band": 0.9548022598870056, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.04519774011299435 + }, + { + "epoch": 2.2688538559863654, + "grad_norm": 208.3595787489902, + "learning_rate": 3.527128173880563e-07, + "loss": 0.1807, + "step": 10650, + "success_rate.epoch.env.abd": 0.991304347826087, + "success_rate.epoch.env.agentgym:alfworld": 0.8985507246376812, + "success_rate.epoch.env.agentgym:sciworld": 0.972972972972973, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.937007874015748, + "success_rate.epoch.env.logic": 0.9146666666666666, + "success_rate.epoch.env.math": 0.9790209790209791, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.8411885245901639, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8729417407776207, + "success_rate.epoch.global": 0.9061256961018298, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9957561728395061, + "tokens_p.mean_in_band": 0.765625, + "tokens_rate.above_band": 0.9418604651162791, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.05813953488372093 + }, + { + "epoch": 2.269919045590115, + "grad_norm": 180.83061547128702, + "learning_rate": 3.526792437543794e-07, + "loss": 0.2654, + "step": 10655, + "success_rate.epoch.env.abd": 0.9913793103448276, + "success_rate.epoch.env.agentgym:alfworld": 0.8985507246376812, + "success_rate.epoch.env.agentgym:sciworld": 0.9733333333333334, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.937007874015748, + "success_rate.epoch.env.logic": 0.9148936170212766, + "success_rate.epoch.env.math": 0.979050279329609, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.8411405295315683, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8730002479125066, + "success_rate.epoch.global": 0.9061014263074485, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env_macro_mean": 0.9666666666666668, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9970868644067796, + "tokens_p.mean_in_band": 0.7020596590909091, + "tokens_rate.above_band": 0.9554655870445344, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.044534412955465584 + }, + { + "epoch": 2.2709842351938647, + "grad_norm": 73.78093962309599, + "learning_rate": 3.526456754560172e-07, + "loss": 0.2106, + "step": 10660, + "success_rate.epoch.env.abd": 0.9913793103448276, + "success_rate.epoch.env.agentgym:alfworld": 0.8985507246376812, + "success_rate.epoch.env.agentgym:sciworld": 0.9736842105263158, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.937007874015748, + "success_rate.epoch.env.logic": 0.9126984126984127, + "success_rate.epoch.env.math": 0.979050279329609, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.8412537917087968, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8728428783713564, + "success_rate.epoch.global": 0.9056827150749802, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.science": 0.8571428571428571, + "success_rate.window.env_macro_mean": 0.7857142857142857, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9957107843137255, + "tokens_p.mean_in_band": 0.5017361111111112, + "tokens_rate.above_band": 0.9659090909090909, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03409090909090909 + }, + { + "epoch": 2.272049424797614, + "grad_norm": 297.9625919760371, + "learning_rate": 3.526121125174338e-07, + "loss": 0.2347, + "step": 10665, + "success_rate.epoch.env.abd": 0.9913793103448276, + "success_rate.epoch.env.agentgym:alfworld": 0.8985507246376812, + "success_rate.epoch.env.agentgym:sciworld": 0.961038961038961, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9375, + "success_rate.epoch.env.logic": 0.9126984126984127, + "success_rate.epoch.env.math": 0.9792243767313019, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.8415741675075681, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8717830010711165, + "success_rate.epoch.global": 0.9056603773584906, + "success_rate.window.env.agentgym:sciworld": 0.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.75, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9992861675126904, + "tokens_p.mean_in_band": 0.7742745535714286, + "tokens_rate.above_band": 0.9911949685534591, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.00880503144654088 + }, + { + "epoch": 2.273114614401363, + "grad_norm": 176.22258092123795, + "learning_rate": 3.5257855496308974e-07, + "loss": 0.1842, + "step": 10670, + "success_rate.epoch.env.abd": 0.9913793103448276, + "success_rate.epoch.env.agentgym:alfworld": 0.8985507246376812, + "success_rate.epoch.env.agentgym:sciworld": 0.961038961038961, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.937984496124031, + "success_rate.epoch.env.logic": 0.9129287598944591, + "success_rate.epoch.env.math": 0.9793388429752066, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.8420523138832998, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8719018607019996, + "success_rate.epoch.global": 0.9060297572435395, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9980221518987342, + "tokens_p.mean_in_band": 0.81640625, + "tokens_rate.above_band": 0.9916317991631799, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.008368200836820083 + }, + { + "epoch": 2.274179804005113, + "grad_norm": 85.51227995699642, + "learning_rate": 3.525450028174415e-07, + "loss": 0.2446, + "step": 10675, + "success_rate.epoch.env.abd": 0.9913793103448276, + "success_rate.epoch.env.agentgym:alfworld": 0.8857142857142857, + "success_rate.epoch.env.agentgym:sciworld": 0.961038961038961, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.937984496124031, + "success_rate.epoch.env.logic": 0.9131578947368421, + "success_rate.epoch.env.math": 0.9794238683127572, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.8418418418418419, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8707443379033705, + "success_rate.epoch.global": 0.905616224648986, + "success_rate.window.env.agentgym:alfworld": 0.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.7, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9966643258426966, + "tokens_p.mean_in_band": 0.6833767361111112, + "tokens_rate.above_band": 0.9081632653061225, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.09183673469387756 + }, + { + "epoch": 2.2752449936088626, + "grad_norm": 22.846849457120204, + "learning_rate": 3.525114561049416e-07, + "loss": 0.306, + "step": 10680, + "success_rate.epoch.env.abd": 0.9914529914529915, + "success_rate.epoch.env.agentgym:alfworld": 0.8857142857142857, + "success_rate.epoch.env.agentgym:sciworld": 0.9615384615384616, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.937984496124031, + "success_rate.epoch.env.logic": 0.9131578947368421, + "success_rate.epoch.env.math": 0.9794801641586868, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.8427860696517413, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8708874020182336, + "success_rate.epoch.global": 0.905982905982906, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9961180124223602, + "tokens_p.mean_in_band": 0.84765625, + "tokens_rate.above_band": 0.9877300613496932, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.012269938650306749 + }, + { + "epoch": 2.276310183212612, + "grad_norm": 25.147806274700965, + "learning_rate": 3.5247791485003874e-07, + "loss": 0.2724, + "step": 10685, + "success_rate.epoch.env.abd": 0.9915254237288136, + "success_rate.epoch.env.agentgym:alfworld": 0.8857142857142857, + "success_rate.epoch.env.agentgym:sciworld": 0.9615384615384616, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.937984496124031, + "success_rate.epoch.env.logic": 0.9131578947368421, + "success_rate.epoch.env.math": 0.9782608695652174, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.8424182358771061, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8707497023734806, + "success_rate.epoch.global": 0.9055727554179567, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.math": 0.8, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.85, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9886098130841121, + "tokens_p.mean_in_band": 0.6296142578125, + "tokens_rate.above_band": 0.9145299145299145, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.08547008547008547 + }, + { + "epoch": 2.2773753728163615, + "grad_norm": 116.74880959657163, + "learning_rate": 3.524443790771774e-07, + "loss": 0.2487, + "step": 10690, + "success_rate.epoch.env.abd": 0.9916666666666667, + "success_rate.epoch.env.agentgym:alfworld": 0.8857142857142857, + "success_rate.epoch.env.agentgym:sciworld": 0.9615384615384616, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.937984496124031, + "success_rate.epoch.env.logic": 0.9140625, + "success_rate.epoch.env.math": 0.9782903663500678, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.841897233201581, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8708000971289658, + "success_rate.epoch.global": 0.9055512721665382, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9969031531531531, + "tokens_p.mean_in_band": 0.6125, + "tokens_rate.above_band": 0.9568965517241379, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.04310344827586207 + }, + { + "epoch": 2.2784405624201107, + "grad_norm": 557.7708945851317, + "learning_rate": 3.524108488107984e-07, + "loss": 0.1765, + "step": 10695, + "success_rate.epoch.env.abd": 0.9916666666666667, + "success_rate.epoch.env.agentgym:alfworld": 0.8857142857142857, + "success_rate.epoch.env.agentgym:sciworld": 0.9620253164556962, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.937984496124031, + "success_rate.epoch.env.logic": 0.9145077720207254, + "success_rate.epoch.env.math": 0.977027027027027, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.8415354330708661, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8707370959911661, + "success_rate.epoch.global": 0.9051459293394777, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.6666666666666666, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.8541666666666666, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9949564873417721, + "tokens_p.mean_in_band": 0.50421142578125, + "tokens_rate.above_band": 0.9294117647058824, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.07058823529411765 + }, + { + "epoch": 2.2795057520238604, + "grad_norm": 37.39371352089454, + "learning_rate": 3.523773240753382e-07, + "loss": 0.2124, + "step": 10700, + "success_rate.epoch.env.abd": 0.9834710743801653, + "success_rate.epoch.env.agentgym:alfworld": 0.8857142857142857, + "success_rate.epoch.env.agentgym:sciworld": 0.9620253164556962, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.937984496124031, + "success_rate.epoch.env.logic": 0.9123711340206185, + "success_rate.epoch.env.math": 0.9771197846567967, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.8411764705882353, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.869773601887578, + "success_rate.epoch.global": 0.9043611323641928, + "success_rate.window.env.abd": 0.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.5625, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9855769230769231, + "tokens_p.mean_in_band": 0.7723137842465754, + "tokens_rate.above_band": 0.7517006802721088, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.24829931972789115 + }, + { + "epoch": 2.2805709416276096, + "grad_norm": 290.3568710654841, + "learning_rate": 3.5234380489522936e-07, + "loss": 0.2414, + "step": 10705, + "success_rate.epoch.env.abd": 0.983739837398374, + "success_rate.epoch.env.agentgym:alfworld": 0.8888888888888888, + "success_rate.epoch.env.agentgym:sciworld": 0.9620253164556962, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9384615384615385, + "success_rate.epoch.env.logic": 0.9125964010282777, + "success_rate.epoch.env.math": 0.9771197846567967, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.841796875, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8702068818830092, + "success_rate.epoch.global": 0.9047256097560976, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9984342379958246, + "tokens_p.mean_in_band": 0.7799479166666666, + "tokens_rate.above_band": 0.9937759336099585, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.006224066390041493 + }, + { + "epoch": 2.2816361312313593, + "grad_norm": 209.5976528695828, + "learning_rate": 3.5231029129490056e-07, + "loss": 0.1927, + "step": 10710, + "success_rate.epoch.env.abd": 0.983739837398374, + "success_rate.epoch.env.agentgym:alfworld": 0.8888888888888888, + "success_rate.epoch.env.agentgym:sciworld": 0.9620253164556962, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9384615384615385, + "success_rate.epoch.env.logic": 0.9102564102564102, + "success_rate.epoch.env.math": 0.9772117962466488, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.8425655976676385, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8700724040180657, + "success_rate.epoch.global": 0.904707668944571, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.75, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9961189516129032, + "tokens_p.mean_in_band": 0.4803059895833333, + "tokens_rate.above_band": 0.9627329192546584, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.037267080745341616 + }, + { + "epoch": 2.2827013208351086, + "grad_norm": 90.31931724893175, + "learning_rate": 3.522767832987762e-07, + "loss": 0.2834, + "step": 10715, + "success_rate.epoch.env.abd": 0.983739837398374, + "success_rate.epoch.env.agentgym:alfworld": 0.8888888888888888, + "success_rate.epoch.env.agentgym:sciworld": 0.9620253164556962, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9384615384615385, + "success_rate.epoch.env.logic": 0.907928388746803, + "success_rate.epoch.env.math": 0.9772423025435074, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.8416988416988417, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8697847430015616, + "success_rate.epoch.global": 0.9039334341906202, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.7142857142857143, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 0.6785714285714286, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9965277777777778, + "tokens_p.mean_in_band": 0.591796875, + "tokens_rate.above_band": 0.9402985074626866, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.05970149253731343 + }, + { + "epoch": 2.2837665104388583, + "grad_norm": 81.28878968548122, + "learning_rate": 3.5224328093127664e-07, + "loss": 0.1832, + "step": 10720, + "success_rate.epoch.env.abd": 0.983739837398374, + "success_rate.epoch.env.agentgym:alfworld": 0.8888888888888888, + "success_rate.epoch.env.agentgym:sciworld": 0.9620253164556962, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9393939393939394, + "success_rate.epoch.env.logic": 0.9056122448979592, + "success_rate.epoch.env.math": 0.9773936170212766, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.8420038535645472, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8697004324040191, + "success_rate.epoch.global": 0.9039186134137152, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.75, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9985672242874845, + "tokens_p.mean_in_band": 0.525, + "tokens_rate.above_band": 0.9584323040380047, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.04156769596199525 + }, + { + "epoch": 2.2848317000426075, + "grad_norm": 78.74146652947383, + "learning_rate": 3.5220978421681827e-07, + "loss": 0.1419, + "step": 10725, + "success_rate.epoch.env.abd": 0.983739837398374, + "success_rate.epoch.env.agentgym:alfworld": 0.8888888888888888, + "success_rate.epoch.env.agentgym:sciworld": 0.95, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9398496240601504, + "success_rate.epoch.env.logic": 0.905852417302799, + "success_rate.epoch.env.math": 0.9762219286657859, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.8423076923076923, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8685915861315654, + "success_rate.epoch.global": 0.9035285285285285, + "success_rate.window.env.agentgym:sciworld": 0.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.8, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.76, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9993388575458392, + "tokens_p.mean_below_band": 7.566995918750763e-10, + "tokens_p.mean_in_band": 0.6609375, + "tokens_rate.above_band": 0.9916083916083916, + "tokens_rate.below_band": 0.0013986013986013986, + "tokens_rate.in_band": 0.006993006993006993 + }, + { + "epoch": 2.285896889646357, + "grad_norm": 12.344528293430853, + "learning_rate": 3.521762931798131e-07, + "loss": 0.1707, + "step": 10730, + "success_rate.epoch.env.abd": 0.983739837398374, + "success_rate.epoch.env.agentgym:alfworld": 0.8904109589041096, + "success_rate.epoch.env.agentgym:sciworld": 0.95, + "success_rate.epoch.env.agentgym:textcraft": 0.95, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9398496240601504, + "success_rate.epoch.env.logic": 0.9065656565656566, + "success_rate.epoch.env.math": 0.9762845849802372, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.842911877394636, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8688554180115176, + "success_rate.epoch.global": 0.9038893044128646, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9938979289940828, + "tokens_p.mean_in_band": 0.875, + "tokens_rate.above_band": 0.9825581395348837, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.01744186046511628 + }, + { + "epoch": 2.2869620792501064, + "grad_norm": 148.8198533793192, + "learning_rate": 3.521428078446693e-07, + "loss": 0.1666, + "step": 10735, + "success_rate.epoch.env.abd": 0.9838709677419355, + "success_rate.epoch.env.agentgym:alfworld": 0.8904109589041096, + "success_rate.epoch.env.agentgym:sciworld": 0.95, + "success_rate.epoch.env.agentgym:textcraft": 0.9523809523809523, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9398496240601504, + "success_rate.epoch.env.logic": 0.907035175879397, + "success_rate.epoch.env.math": 0.9763469119579501, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.8435114503816794, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8691866454663366, + "success_rate.epoch.global": 0.90424739195231, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 1.0010911312849162, + "tokens_p.mean_in_band": 0.8046875, + "tokens_rate.above_band": 0.9972144846796658, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.002785515320334262 + }, + { + "epoch": 2.288027268853856, + "grad_norm": 188.99532954003726, + "learning_rate": 3.521093282357906e-07, + "loss": 0.2501, + "step": 10740, + "success_rate.epoch.env.abd": 0.9838709677419355, + "success_rate.epoch.env.agentgym:alfworld": 0.8904109589041096, + "success_rate.epoch.env.agentgym:sciworld": 0.95, + "success_rate.epoch.env.agentgym:textcraft": 0.9523809523809523, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9398496240601504, + "success_rate.epoch.env.logic": 0.905, + "success_rate.epoch.env.math": 0.9764089121887287, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.8444022770398482, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8690882501035685, + "success_rate.epoch.global": 0.9042316258351893, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8333333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9948961318051576, + "tokens_p.mean_in_band": 0.46890625, + "tokens_rate.above_band": 0.9654218533886584, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.034578146611341634 + }, + { + "epoch": 2.2890924584576053, + "grad_norm": 189.05306246799432, + "learning_rate": 3.520758543775769e-07, + "loss": 0.1519, + "step": 10745, + "success_rate.epoch.env.abd": 0.9838709677419355, + "success_rate.epoch.env.agentgym:alfworld": 0.8904109589041096, + "success_rate.epoch.env.agentgym:sciworld": 0.95, + "success_rate.epoch.env.agentgym:textcraft": 0.9523809523809523, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9398496240601504, + "success_rate.epoch.env.logic": 0.9057071960297767, + "success_rate.epoch.env.math": 0.9765319426336375, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.8438978240302744, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8691178658731242, + "success_rate.epoch.global": 0.904215976331361, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.8888888888888888, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9934593023255814, + "tokens_p.mean_in_band": 0.4396701388888889, + "tokens_rate.above_band": 0.8269230769230769, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.17307692307692307 + }, + { + "epoch": 2.290157648061355, + "grad_norm": 199.9336574714201, + "learning_rate": 3.520423862944235e-07, + "loss": 0.2702, + "step": 10750, + "success_rate.epoch.env.abd": 0.9838709677419355, + "success_rate.epoch.env.agentgym:alfworld": 0.8904109589041096, + "success_rate.epoch.env.agentgym:sciworld": 0.95, + "success_rate.epoch.env.agentgym:textcraft": 0.9523809523809523, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9398496240601504, + "success_rate.epoch.env.logic": 0.905940594059406, + "success_rate.epoch.env.math": 0.9766233766233766, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.8447789275634995, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8692274963779053, + "success_rate.epoch.global": 0.9045689019896831, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9937015503875969, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 2.2912228376651043, + "grad_norm": 252.97441239856076, + "learning_rate": 3.520089240107218e-07, + "loss": 0.1689, + "step": 10755, + "success_rate.epoch.env.abd": 0.9838709677419355, + "success_rate.epoch.env.agentgym:alfworld": 0.8904109589041096, + "success_rate.epoch.env.agentgym:sciworld": 0.95, + "success_rate.epoch.env.agentgym:textcraft": 0.9523809523809523, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9398496240601504, + "success_rate.epoch.env.logic": 0.905940594059406, + "success_rate.epoch.env.math": 0.9767141009055628, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.8448598130841122, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.869243097269069, + "success_rate.epoch.global": 0.9045521292217328, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8571428571428571, + "success_rate.window.env_macro_mean": 0.9285714285714286, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9923349056603774, + "tokens_p.mean_below_band": 4.6798959374427795e-08, + "tokens_p.mean_in_band": 0.798828125, + "tokens_rate.above_band": 0.9724770642201835, + "tokens_rate.below_band": 0.009174311926605505, + "tokens_rate.in_band": 0.01834862385321101 + }, + { + "epoch": 2.292288027268854, + "grad_norm": 37.7307893222106, + "learning_rate": 3.5197546755085885e-07, + "loss": 0.2123, + "step": 10760, + "success_rate.epoch.env.abd": 0.9838709677419355, + "success_rate.epoch.env.agentgym:alfworld": 0.8904109589041096, + "success_rate.epoch.env.agentgym:sciworld": 0.95, + "success_rate.epoch.env.agentgym:textcraft": 0.9523809523809523, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9398496240601504, + "success_rate.epoch.env.logic": 0.9064039408866995, + "success_rate.epoch.env.math": 0.9767741935483871, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.845724907063197, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8693693275826327, + "success_rate.epoch.global": 0.9049012435991222, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9942602040816326, + "tokens_p.mean_in_band": 0.7958984375, + "tokens_rate.above_band": 0.9607843137254902, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0392156862745098 + }, + { + "epoch": 2.293353216872603, + "grad_norm": 95.4721040616205, + "learning_rate": 3.519420169392175e-07, + "loss": 0.2353, + "step": 10765, + "success_rate.epoch.env.abd": 0.9838709677419355, + "success_rate.epoch.env.agentgym:alfworld": 0.8904109589041096, + "success_rate.epoch.env.agentgym:sciworld": 0.9506172839506173, + "success_rate.epoch.env.agentgym:textcraft": 0.9523809523809523, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9398496240601504, + "success_rate.epoch.env.logic": 0.9066339066339066, + "success_rate.epoch.env.math": 0.9768637532133676, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.8464384828862165, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8695193625995258, + "success_rate.epoch.global": 0.9052478134110787, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9964398734177216, + "tokens_p.mean_in_band": 0.86328125, + "tokens_rate.above_band": 0.9813664596273292, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.018633540372670808 + }, + { + "epoch": 2.294418406476353, + "grad_norm": 380.40758038516884, + "learning_rate": 3.5190857220017627e-07, + "loss": 0.1563, + "step": 10770, + "success_rate.epoch.env.abd": 0.984, + "success_rate.epoch.env.agentgym:alfworld": 0.8904109589041096, + "success_rate.epoch.env.agentgym:sciworld": 0.9506172839506173, + "success_rate.epoch.env.agentgym:textcraft": 0.9523809523809523, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9398496240601504, + "success_rate.epoch.env.logic": 0.9070904645476773, + "success_rate.epoch.env.math": 0.9769230769230769, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.8471454880294659, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8696422643290526, + "success_rate.epoch.global": 0.90559186637618, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9940878378378378, + "tokens_p.mean_in_band": 0.6953125, + "tokens_rate.above_band": 0.9736842105263158, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02631578947368421 + }, + { + "epoch": 2.295483596080102, + "grad_norm": 111.0059881001226, + "learning_rate": 3.518751333581095e-07, + "loss": 0.1495, + "step": 10775, + "success_rate.epoch.env.abd": 0.984, + "success_rate.epoch.env.agentgym:alfworld": 0.8904109589041096, + "success_rate.epoch.env.agentgym:sciworld": 0.9506172839506173, + "success_rate.epoch.env.agentgym:textcraft": 0.9523809523809523, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9398496240601504, + "success_rate.epoch.env.logic": 0.9073170731707317, + "success_rate.epoch.env.math": 0.9770700636942675, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.8477064220183487, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8697272215457006, + "success_rate.epoch.global": 0.9059334298118669, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9946428571428572, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 2.296548785683852, + "grad_norm": 112.71082779838599, + "learning_rate": 3.5184170043738715e-07, + "loss": 0.2298, + "step": 10780, + "success_rate.epoch.env.abd": 0.984, + "success_rate.epoch.env.agentgym:alfworld": 0.8904109589041096, + "success_rate.epoch.env.agentgym:sciworld": 0.9512195121951219, + "success_rate.epoch.env.agentgym:textcraft": 0.9523809523809523, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9402985074626866, + "success_rate.epoch.env.logic": 0.9075425790754258, + "success_rate.epoch.env.math": 0.97712833545108, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.8474885844748858, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8698287717061628, + "success_rate.epoch.global": 0.9059120403749099, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.96, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 1.0004050925925927, + "tokens_p.mean_in_band": 0.6734375, + "tokens_rate.above_band": 0.9953917050691244, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.004608294930875576 + }, + { + "epoch": 2.297613975287601, + "grad_norm": 211.86203830332747, + "learning_rate": 3.518082734623749e-07, + "loss": 0.244, + "step": 10785, + "success_rate.epoch.env.abd": 0.984, + "success_rate.epoch.env.agentgym:alfworld": 0.8783783783783784, + "success_rate.epoch.env.agentgym:sciworld": 0.9512195121951219, + "success_rate.epoch.env.agentgym:textcraft": 0.9523809523809523, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9416058394160584, + "success_rate.epoch.env.logic": 0.9075425790754258, + "success_rate.epoch.env.math": 0.9771573604060914, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.8472727272727273, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8688367643589349, + "success_rate.epoch.global": 0.9055316091954023, + "success_rate.window.env.agentgym:alfworld": 0.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.7, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9943306510607169, + "tokens_p.mean_in_band": 0.7763247282608695, + "tokens_rate.above_band": 0.983453237410072, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.016546762589928057 + }, + { + "epoch": 2.2986791648913507, + "grad_norm": 98.98032406920781, + "learning_rate": 3.5177485245743406e-07, + "loss": 0.2516, + "step": 10790, + "success_rate.epoch.env.abd": 0.984, + "success_rate.epoch.env.agentgym:alfworld": 0.8783783783783784, + "success_rate.epoch.env.agentgym:sciworld": 0.9512195121951219, + "success_rate.epoch.env.agentgym:textcraft": 0.9523809523809523, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9416058394160584, + "success_rate.epoch.env.logic": 0.9075425790754258, + "success_rate.epoch.env.math": 0.9760705289672544, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.8469202898550725, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8687059217356176, + "success_rate.epoch.global": 0.9051539012168933, + "success_rate.window.env.math": 0.8333333333333334, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.7916666666666667, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9908854166666666, + "tokens_p.mean_in_band": 0.6825657894736842, + "tokens_rate.above_band": 0.8347826086956521, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.16521739130434782 + }, + { + "epoch": 2.2997443544951, + "grad_norm": 43.41128327043323, + "learning_rate": 3.5174143744692166e-07, + "loss": 0.3017, + "step": 10795, + "success_rate.epoch.env.abd": 0.984, + "success_rate.epoch.env.agentgym:alfworld": 0.8783783783783784, + "success_rate.epoch.env.agentgym:sciworld": 0.9512195121951219, + "success_rate.epoch.env.agentgym:textcraft": 0.9523809523809523, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9416058394160584, + "success_rate.epoch.env.logic": 0.9079903147699758, + "success_rate.epoch.env.math": 0.9761306532663316, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.8477477477477477, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8688273142707359, + "success_rate.epoch.global": 0.9054921540656206, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.99375, + "tokens_p.mean_in_band": 0.7291666666666666, + "tokens_rate.above_band": 0.963855421686747, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03614457831325301 + }, + { + "epoch": 2.3008095440988496, + "grad_norm": 325.84587311815767, + "learning_rate": 3.517080284551903e-07, + "loss": 0.2381, + "step": 10800, + "success_rate.epoch.env.abd": 0.9841269841269841, + "success_rate.epoch.env.agentgym:alfworld": 0.88, + "success_rate.epoch.env.agentgym:sciworld": 0.9512195121951219, + "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9416058394160584, + "success_rate.epoch.env.logic": 0.9082125603864735, + "success_rate.epoch.env.math": 0.9761904761904762, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.8472596585804133, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8691643222058645, + "success_rate.epoch.global": 0.9054726368159204, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9523809523809523, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9991143724696356, + "tokens_p.mean_in_band": 0.6583806818181818, + "tokens_rate.above_band": 0.9782178217821782, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02178217821782178 + }, + { + "epoch": 2.301874733702599, + "grad_norm": 57.590187926664065, + "learning_rate": 3.5167462550658814e-07, + "loss": 0.181, + "step": 10805, + "success_rate.epoch.env.abd": 0.984251968503937, + "success_rate.epoch.env.agentgym:alfworld": 0.88, + "success_rate.epoch.env.agentgym:sciworld": 0.9512195121951219, + "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9420289855072463, + "success_rate.epoch.env.logic": 0.908433734939759, + "success_rate.epoch.env.math": 0.97625, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.8479427549194991, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8693017699485952, + "success_rate.epoch.global": 0.9058073654390935, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9949389460154242, + "tokens_p.mean_in_band": 0.8125, + "tokens_rate.above_band": 0.9873096446700508, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.012690355329949238 + }, + { + "epoch": 2.3029399233063486, + "grad_norm": 94.29949364953272, + "learning_rate": 3.516412286254592e-07, + "loss": 0.2183, + "step": 10810, + "success_rate.epoch.env.abd": 0.984251968503937, + "success_rate.epoch.env.agentgym:alfworld": 0.88, + "success_rate.epoch.env.agentgym:sciworld": 0.9529411764705882, + "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9420289855072463, + "success_rate.epoch.env.logic": 0.9088729016786571, + "success_rate.epoch.env.math": 0.9763092269326683, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.848349687778769, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8695405872946228, + "success_rate.epoch.global": 0.9061397318278053, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9975507554296507, + "tokens_p.mean_in_band": 0.8671875, + "tokens_rate.above_band": 0.9990566037735849, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0009433962264150943 + }, + { + "epoch": 2.3040051129100982, + "grad_norm": 270.6706210666898, + "learning_rate": 3.516078378361427e-07, + "loss": 0.1295, + "step": 10815, + "success_rate.epoch.env.abd": 0.984251968503937, + "success_rate.epoch.env.agentgym:alfworld": 0.88, + "success_rate.epoch.env.agentgym:sciworld": 0.9529411764705882, + "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9424460431654677, + "success_rate.epoch.env.logic": 0.9093078758949881, + "success_rate.epoch.env.math": 0.976456009913259, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.8486197684772929, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.869655941435865, + "success_rate.epoch.global": 0.9064697609001406, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9985849056603774, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 2.3050703025138475, + "grad_norm": 158.42480524875137, + "learning_rate": 3.515744531629737e-07, + "loss": 0.2727, + "step": 10820, + "success_rate.epoch.env.abd": 0.9844961240310077, + "success_rate.epoch.env.agentgym:alfworld": 0.88, + "success_rate.epoch.env.agentgym:sciworld": 0.9529411764705882, + "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9424460431654677, + "success_rate.epoch.env.logic": 0.9097387173396675, + "success_rate.epoch.env.math": 0.9765432098765432, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.8490230905861457, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.869761897712582, + "success_rate.epoch.global": 0.9067974772249474, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9992236024844721, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 2.3061354921175967, + "grad_norm": 162.36449691438347, + "learning_rate": 3.515410746302827e-07, + "loss": 0.2156, + "step": 10825, + "success_rate.epoch.env.abd": 0.9846153846153847, + "success_rate.epoch.env.agentgym:alfworld": 0.88, + "success_rate.epoch.env.agentgym:sciworld": 0.9534883720930233, + "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9424460431654677, + "success_rate.epoch.env.logic": 0.909952606635071, + "success_rate.epoch.env.math": 0.9766009852216748, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.8496905393457118, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8699078585859379, + "success_rate.epoch.global": 0.9071229050279329, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9965062111801242, + "tokens_p.mean_in_band": 0.6981026785714286, + "tokens_rate.above_band": 0.9583333333333334, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.041666666666666664 + }, + { + "epoch": 2.3072006817213464, + "grad_norm": 209.46721697374392, + "learning_rate": 3.515077022623957e-07, + "loss": 0.2887, + "step": 10830, + "success_rate.epoch.env.abd": 0.9847328244274809, + "success_rate.epoch.env.agentgym:alfworld": 0.88, + "success_rate.epoch.env.agentgym:sciworld": 0.9534883720930233, + "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9428571428571428, + "success_rate.epoch.env.logic": 0.910377358490566, + "success_rate.epoch.env.math": 0.9766584766584766, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.8493392070484581, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8699678086312846, + "success_rate.epoch.global": 0.907098121085595, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.99755859375, + "tokens_p.mean_in_band": 0.4765625, + "tokens_rate.above_band": 0.9696969696969697, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.030303030303030304 + }, + { + "epoch": 2.308265871325096, + "grad_norm": 292.374860147934, + "learning_rate": 3.5147433608363425e-07, + "loss": 0.433, + "step": 10835, + "success_rate.epoch.env.abd": 0.9847328244274809, + "success_rate.epoch.env.agentgym:alfworld": 0.88, + "success_rate.epoch.env.agentgym:sciworld": 0.9534883720930233, + "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9440559440559441, + "success_rate.epoch.env.logic": 0.9105882352941177, + "success_rate.epoch.env.math": 0.9767441860465116, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.8488576449912126, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8700599745706612, + "success_rate.epoch.global": 0.9070735090152566, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9968282029950083, + "tokens_p.mean_in_band": 0.46268136160714285, + "tokens_rate.above_band": 0.9980625518959314, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0019374481040686411 + }, + { + "epoch": 2.3093310609288453, + "grad_norm": 53.31293918115055, + "learning_rate": 3.5144097611831544e-07, + "loss": 0.1717, + "step": 10840, + "success_rate.epoch.env.abd": 0.9847328244274809, + "success_rate.epoch.env.agentgym:alfworld": 0.88, + "success_rate.epoch.env.agentgym:sciworld": 0.9534883720930233, + "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9440559440559441, + "success_rate.epoch.env.logic": 0.9112149532710281, + "success_rate.epoch.env.math": 0.976857490864799, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.8491228070175438, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8701513550089821, + "success_rate.epoch.global": 0.907394609536973, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9976635514018691, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 2.3103962505325946, + "grad_norm": 265.3463427271913, + "learning_rate": 3.5140762239075165e-07, + "loss": 0.1828, + "step": 10845, + "success_rate.epoch.env.abd": 0.9847328244274809, + "success_rate.epoch.env.agentgym:alfworld": 0.88, + "success_rate.epoch.env.agentgym:sciworld": 0.9534883720930233, + "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9440559440559441, + "success_rate.epoch.env.logic": 0.9118329466357309, + "success_rate.epoch.env.math": 0.9769696969696969, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.8486439195100612, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8701742015509928, + "success_rate.epoch.global": 0.9073691460055097, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.8888888888888888, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9930555555555556, + "tokens_p.mean_in_band": 0.6712239583333334, + "tokens_rate.above_band": 0.9428571428571428, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.05714285714285714 + }, + { + "epoch": 2.3114614401363442, + "grad_norm": 1020.7716963288548, + "learning_rate": 3.5137427492525104e-07, + "loss": 0.2734, + "step": 10850, + "success_rate.epoch.env.abd": 0.9847328244274809, + "success_rate.epoch.env.agentgym:alfworld": 0.881578947368421, + "success_rate.epoch.env.agentgym:sciworld": 0.9534883720930233, + "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9440559440559441, + "success_rate.epoch.env.logic": 0.9122401847575058, + "success_rate.epoch.env.math": 0.9770531400966184, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.8491717523975588, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8704103344150489, + "success_rate.epoch.global": 0.9076870281400137, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.996733234714004, + "tokens_p.mean_in_band": 0.7421875, + "tokens_rate.above_band": 0.9960707269155207, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.003929273084479371 + }, + { + "epoch": 2.312526629740094, + "grad_norm": 24.902540009846284, + "learning_rate": 3.5134093374611677e-07, + "loss": 0.1761, + "step": 10855, + "success_rate.epoch.env.abd": 0.9847328244274809, + "success_rate.epoch.env.agentgym:alfworld": 0.881578947368421, + "success_rate.epoch.env.agentgym:sciworld": 0.9540229885057471, + "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9440559440559441, + "success_rate.epoch.env.logic": 0.9124423963133641, + "success_rate.epoch.env.math": 0.9771084337349397, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.8490893321769297, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8704748527228919, + "success_rate.epoch.global": 0.9076607387140903, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env_macro_mean": 0.9583333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.996284965034965, + "tokens_p.mean_in_band": 0.64609375, + "tokens_rate.above_band": 0.9662162162162162, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.033783783783783786 + }, + { + "epoch": 2.313591819343843, + "grad_norm": 139.46393070170106, + "learning_rate": 3.513075988776478e-07, + "loss": 0.3364, + "step": 10860, + "success_rate.epoch.env.abd": 0.9847328244274809, + "success_rate.epoch.env.agentgym:alfworld": 0.8846153846153846, + "success_rate.epoch.env.agentgym:sciworld": 0.9545454545454546, + "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9440559440559441, + "success_rate.epoch.env.logic": 0.9128440366972477, + "success_rate.epoch.env.math": 0.9760191846522782, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.8492201039861352, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8707477678499008, + "success_rate.epoch.global": 0.9076346284935242, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.75, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9993605047748977, + "tokens_p.mean_in_band": 0.73046875, + "tokens_rate.above_band": 0.9892037786774629, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.010796221322537112 + }, + { + "epoch": 2.314657008947593, + "grad_norm": 81.24879969062685, + "learning_rate": 3.512742703441383e-07, + "loss": 0.3067, + "step": 10865, + "success_rate.epoch.env.abd": 0.9847328244274809, + "success_rate.epoch.env.agentgym:alfworld": 0.8846153846153846, + "success_rate.epoch.env.agentgym:sciworld": 0.9545454545454546, + "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9440559440559441, + "success_rate.epoch.env.logic": 0.9136363636363637, + "success_rate.epoch.env.math": 0.9749403341288783, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.8486159169550173, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8706667941575915, + "success_rate.epoch.global": 0.9072690217391305, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.75, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.75, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9996019108280255, + "tokens_p.mean_in_band": 0.4296875, + "tokens_rate.above_band": 0.9781931464174455, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.021806853582554516 + }, + { + "epoch": 2.315722198551342, + "grad_norm": 239.88705770247424, + "learning_rate": 3.512409481698778e-07, + "loss": 0.1198, + "step": 10870, + "success_rate.epoch.env.abd": 0.9847328244274809, + "success_rate.epoch.env.agentgym:alfworld": 0.8846153846153846, + "success_rate.epoch.env.agentgym:sciworld": 0.9545454545454546, + "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9440559440559441, + "success_rate.epoch.env.logic": 0.9140271493212669, + "success_rate.epoch.env.math": 0.9750297265160524, + "success_rate.epoch.env.sat": 0.1111111111111111, + "success_rate.epoch.env.science": 0.8481449525452976, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8700734546984953, + "success_rate.epoch.global": 0.9069058903182126, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.7333333333333333, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9983766233766234, + "tokens_p.mean_in_band": 0.5870535714285714, + "tokens_rate.above_band": 0.9166666666666666, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.08333333333333333 + }, + { + "epoch": 2.3167873881550918, + "grad_norm": 187.17945962524766, + "learning_rate": 3.512076323791515e-07, + "loss": 0.2214, + "step": 10875, + "success_rate.epoch.env.abd": 0.9848484848484849, + "success_rate.epoch.env.agentgym:alfworld": 0.8846153846153846, + "success_rate.epoch.env.agentgym:sciworld": 0.9550561797752809, + "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9440559440559441, + "success_rate.epoch.env.logic": 0.9144144144144144, + "success_rate.epoch.env.math": 0.9750889679715302, + "success_rate.epoch.env.sat": 0.1111111111111111, + "success_rate.epoch.env.science": 0.8469475494411006, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8700621355253367, + "success_rate.epoch.global": 0.9065452091767882, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.9, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9970657276995305, + "tokens_p.mean_in_band": 0.4659090909090909, + "tokens_rate.above_band": 0.9508928571428571, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.049107142857142856 + }, + { + "epoch": 2.317852577758841, + "grad_norm": 391.9682440873458, + "learning_rate": 3.511743229962394e-07, + "loss": 0.2438, + "step": 10880, + "success_rate.epoch.env.abd": 0.9848484848484849, + "success_rate.epoch.env.agentgym:alfworld": 0.8846153846153846, + "success_rate.epoch.env.agentgym:sciworld": 0.9550561797752809, + "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9444444444444444, + "success_rate.epoch.env.logic": 0.9146067415730337, + "success_rate.epoch.env.math": 0.9752650176678446, + "success_rate.epoch.env.sat": 0.1111111111111111, + "success_rate.epoch.env.science": 0.8472103004291845, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8701548290009292, + "success_rate.epoch.global": 0.9068594485541358, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9955357142857143, + "tokens_p.mean_in_band": 0.80078125, + "tokens_rate.above_band": 0.9697732997481109, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.030226700251889168 + }, + { + "epoch": 2.3189177673625907, + "grad_norm": 121.18918070220326, + "learning_rate": 3.5114102004541727e-07, + "loss": 0.2787, + "step": 10885, + "success_rate.epoch.env.abd": 0.9848484848484849, + "success_rate.epoch.env.agentgym:alfworld": 0.8860759493670886, + "success_rate.epoch.env.agentgym:sciworld": 0.9555555555555556, + "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9444444444444444, + "success_rate.epoch.env.logic": 0.9146067415730337, + "success_rate.epoch.env.math": 0.9753521126760564, + "success_rate.epoch.env.sat": 0.1111111111111111, + "success_rate.epoch.env.science": 0.847008547008547, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8703225819208888, + "success_rate.epoch.global": 0.9068364611260054, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9990552325581395, + "tokens_p.mean_in_band": 0.2835286458333333, + "tokens_rate.above_band": 0.9862385321100917, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.013761467889908258 + }, + { + "epoch": 2.31998295696634, + "grad_norm": 164.42361707192023, + "learning_rate": 3.5110772355095615e-07, + "loss": 0.2019, + "step": 10890, + "success_rate.epoch.env.abd": 0.9848484848484849, + "success_rate.epoch.env.agentgym:alfworld": 0.8860759493670886, + "success_rate.epoch.env.agentgym:sciworld": 0.9555555555555556, + "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9448275862068966, + "success_rate.epoch.env.logic": 0.9149888143176734, + "success_rate.epoch.env.math": 0.9754098360655737, + "success_rate.epoch.env.sat": 0.1111111111111111, + "success_rate.epoch.env.science": 0.8466780238500852, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8703673468970841, + "success_rate.epoch.global": 0.906813627254509, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9988704819277109, + "tokens_p.mean_in_band": 0.5926339285714286, + "tokens_rate.above_band": 0.9861386138613861, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.013861386138613862 + }, + { + "epoch": 2.3210481465700896, + "grad_norm": 266.8056800521331, + "learning_rate": 3.5107443353712214e-07, + "loss": 0.3655, + "step": 10895, + "success_rate.epoch.env.abd": 0.9848484848484849, + "success_rate.epoch.env.agentgym:alfworld": 0.8875, + "success_rate.epoch.env.agentgym:sciworld": 0.9560439560439561, + "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9448275862068966, + "success_rate.epoch.env.logic": 0.9151785714285714, + "success_rate.epoch.env.math": 0.9754672897196262, + "success_rate.epoch.env.sat": 0.10526315789473684, + "success_rate.epoch.env.science": 0.8463497453310697, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8700022041835269, + "success_rate.epoch.global": 0.9064580559254327, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.7916666666666666, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9983734772978959, + "tokens_p.mean_in_band": 0.5698784722222222, + "tokens_rate.above_band": 0.9804560260586319, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.019543973941368076 + }, + { + "epoch": 2.322113336173839, + "grad_norm": 104.4113513988998, + "learning_rate": 3.5104115002817686e-07, + "loss": 0.3625, + "step": 10900, + "success_rate.epoch.env.abd": 0.9849624060150376, + "success_rate.epoch.env.agentgym:alfworld": 0.8875, + "success_rate.epoch.env.agentgym:sciworld": 0.9565217391304348, + "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9448275862068966, + "success_rate.epoch.env.logic": 0.9131403118040089, + "success_rate.epoch.env.math": 0.9755529685681025, + "success_rate.epoch.env.sat": 0.10526315789473684, + "success_rate.epoch.env.science": 0.8460236886632826, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.869848846620723, + "success_rate.epoch.global": 0.9061048440610484, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.75, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9968522451456311, + "tokens_p.mean_in_band": 0.49973060344827586, + "tokens_rate.above_band": 0.9660023446658851, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03399765533411489 + }, + { + "epoch": 2.3231785257775885, + "grad_norm": 313.4408587656172, + "learning_rate": 3.5100787304837705e-07, + "loss": 0.287, + "step": 10905, + "success_rate.epoch.env.abd": 0.9850746268656716, + "success_rate.epoch.env.agentgym:alfworld": 0.8875, + "success_rate.epoch.env.agentgym:sciworld": 0.956989247311828, + "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9448275862068966, + "success_rate.epoch.env.logic": 0.9133333333333333, + "success_rate.epoch.env.math": 0.9756380510440835, + "success_rate.epoch.env.sat": 0.10526315789473684, + "success_rate.epoch.env.science": 0.8465430016863407, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8699740417171223, + "success_rate.epoch.global": 0.906415343915344, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9983198924731183, + "tokens_p.mean_in_band": 0.765625, + "tokens_rate.above_band": 0.9841269841269841, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.015873015873015872 + }, + { + "epoch": 2.324243715381338, + "grad_norm": 142.3907490921456, + "learning_rate": 3.509746026219748e-07, + "loss": 0.2068, + "step": 10910, + "success_rate.epoch.env.abd": 0.9851851851851852, + "success_rate.epoch.env.agentgym:alfworld": 0.8875, + "success_rate.epoch.env.agentgym:sciworld": 0.956989247311828, + "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9448275862068966, + "success_rate.epoch.env.logic": 0.9135254988913526, + "success_rate.epoch.env.math": 0.9756944444444444, + "success_rate.epoch.env.sat": 0.10526315789473684, + "success_rate.epoch.env.science": 0.8464765100671141, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8700006440497283, + "success_rate.epoch.global": 0.9063941990771259, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env_macro_mean": 0.9583333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.99625, + "tokens_p.mean_in_band": 0.2611607142857143, + "tokens_rate.above_band": 0.9345794392523364, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.06542056074766354 + }, + { + "epoch": 2.3253089049850875, + "grad_norm": 424.1618384504015, + "learning_rate": 3.5094133877321723e-07, + "loss": 0.2341, + "step": 10915, + "success_rate.epoch.env.abd": 0.9851851851851852, + "success_rate.epoch.env.agentgym:alfworld": 0.8875, + "success_rate.epoch.env.agentgym:sciworld": 0.9574468085106383, + "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9448275862068966, + "success_rate.epoch.env.logic": 0.9137168141592921, + "success_rate.epoch.env.math": 0.9756944444444444, + "success_rate.epoch.env.sat": 0.10526315789473684, + "success_rate.epoch.env.science": 0.8475, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8701526773587861, + "success_rate.epoch.global": 0.9067017082785808, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9951171875, + "tokens_p.mean_in_band": 0.8583984375, + "tokens_rate.above_band": 0.9795918367346939, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02040816326530612 + }, + { + "epoch": 2.3263740945888367, + "grad_norm": 20.68506087550506, + "learning_rate": 3.5090808152634684e-07, + "loss": 0.1204, + "step": 10920, + "success_rate.epoch.env.abd": 0.9851851851851852, + "success_rate.epoch.env.agentgym:alfworld": 0.8875, + "success_rate.epoch.env.agentgym:sciworld": 0.9574468085106383, + "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9452054794520548, + "success_rate.epoch.env.logic": 0.9146608315098468, + "success_rate.epoch.env.math": 0.9757785467128027, + "success_rate.epoch.env.sat": 0.10526315789473684, + "success_rate.epoch.env.science": 0.8476269775187344, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8702920401208594, + "success_rate.epoch.global": 0.9070072036673216, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9977558348294434, + "tokens_p.mean_in_band": 0.890625, + "tokens_rate.above_band": 0.9982078853046595, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0017921146953405018 + }, + { + "epoch": 2.3274392841925864, + "grad_norm": 38.27788884088602, + "learning_rate": 3.508748309056014e-07, + "loss": 0.24, + "step": 10925, + "success_rate.epoch.env.abd": 0.9851851851851852, + "success_rate.epoch.env.agentgym:alfworld": 0.8875, + "success_rate.epoch.env.agentgym:sciworld": 0.9574468085106383, + "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9452054794520548, + "success_rate.epoch.env.logic": 0.9148471615720524, + "success_rate.epoch.env.math": 0.9746835443037974, + "success_rate.epoch.env.sat": 0.10526315789473684, + "success_rate.epoch.env.science": 0.8485099337748344, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.870289702294432, + "success_rate.epoch.global": 0.9069843342036553, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.5, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8333333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9894859813084113, + "tokens_p.mean_in_band": 0.376953125, + "tokens_rate.above_band": 0.9907407407407407, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.009259259259259259 + }, + { + "epoch": 2.3285044737963356, + "grad_norm": 62.60312562236735, + "learning_rate": 3.5084158693521354e-07, + "loss": 0.2694, + "step": 10930, + "success_rate.epoch.env.abd": 0.9851851851851852, + "success_rate.epoch.env.agentgym:alfworld": 0.8875, + "success_rate.epoch.env.agentgym:sciworld": 0.9578947368421052, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9455782312925171, + "success_rate.epoch.env.logic": 0.9128540305010894, + "success_rate.epoch.env.math": 0.9747126436781609, + "success_rate.epoch.env.sat": 0.10526315789473684, + "success_rate.epoch.env.science": 0.8491343775762572, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8704221911000442, + "success_rate.epoch.global": 0.9069616135328562, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8333333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9980552575107297, + "tokens_p.mean_in_band": 0.679296875, + "tokens_rate.above_band": 0.9789915966386554, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02100840336134454 + }, + { + "epoch": 2.3295696634000853, + "grad_norm": 103.45720820916355, + "learning_rate": 3.5080834963941135e-07, + "loss": 0.1351, + "step": 10935, + "success_rate.epoch.env.abd": 0.9851851851851852, + "success_rate.epoch.env.agentgym:alfworld": 0.8875, + "success_rate.epoch.env.agentgym:sciworld": 0.9583333333333334, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9455782312925171, + "success_rate.epoch.env.logic": 0.9136069114470843, + "success_rate.epoch.env.math": 0.9747706422018348, + "success_rate.epoch.env.sat": 0.10526315789473684, + "success_rate.epoch.env.science": 0.8495065789473685, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8705696163120448, + "success_rate.epoch.global": 0.9072632944228275, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9965722120658135, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 2.3306348530038346, + "grad_norm": 234.33541657364728, + "learning_rate": 3.507751190424178e-07, + "loss": 0.1285, + "step": 10940, + "success_rate.epoch.env.abd": 0.9852941176470589, + "success_rate.epoch.env.agentgym:alfworld": 0.8875, + "success_rate.epoch.env.agentgym:sciworld": 0.9583333333333334, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9459459459459459, + "success_rate.epoch.env.logic": 0.9141630901287554, + "success_rate.epoch.env.math": 0.9748283752860412, + "success_rate.epoch.env.sat": 0.10526315789473684, + "success_rate.epoch.env.science": 0.8498769483182936, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8707024279713272, + "success_rate.epoch.global": 0.907563025210084, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9960403726708075, + "tokens_p.mean_in_band": 0.78125, + "tokens_rate.above_band": 0.9926017262638718, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.007398273736128237 + }, + { + "epoch": 2.3317000426075842, + "grad_norm": 170.70696067579433, + "learning_rate": 3.5074189516845126e-07, + "loss": 0.3629, + "step": 10945, + "success_rate.epoch.env.abd": 0.9854014598540146, + "success_rate.epoch.env.agentgym:alfworld": 0.8875, + "success_rate.epoch.env.agentgym:sciworld": 0.9587628865979382, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9463087248322147, + "success_rate.epoch.env.logic": 0.9141630901287554, + "success_rate.epoch.env.math": 0.9748858447488584, + "success_rate.epoch.env.sat": 0.1, + "success_rate.epoch.env.science": 0.8503679476696647, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8703556084510801, + "success_rate.epoch.global": 0.9075386597938144, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8333333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 1.0001905487804879, + "tokens_p.mean_in_band": 0.5772569444444444, + "tokens_rate.above_band": 0.9785202863961814, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.021479713603818614 + }, + { + "epoch": 2.3327652322113335, + "grad_norm": 228.38892361976727, + "learning_rate": 3.50708678041725e-07, + "loss": 0.2608, + "step": 10950, + "success_rate.epoch.env.abd": 0.9854014598540146, + "success_rate.epoch.env.agentgym:alfworld": 0.8875, + "success_rate.epoch.env.agentgym:sciworld": 0.9591836734693877, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9466666666666667, + "success_rate.epoch.env.logic": 0.9143468950749465, + "success_rate.epoch.env.math": 0.9738339021615472, + "success_rate.epoch.env.sat": 0.1, + "success_rate.epoch.env.science": 0.850040749796251, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8703177351048409, + "success_rate.epoch.global": 0.9071933204881182, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.6666666666666666, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.8833333333333332, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9953013126491647, + "tokens_p.mean_in_band": 0.69296875, + "tokens_rate.above_band": 0.9654377880184332, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03456221198156682 + }, + { + "epoch": 2.333830421815083, + "grad_norm": 198.01043081154796, + "learning_rate": 3.5067546768644745e-07, + "loss": 0.1753, + "step": 10955, + "success_rate.epoch.env.abd": 0.9854014598540146, + "success_rate.epoch.env.agentgym:alfworld": 0.8875, + "success_rate.epoch.env.agentgym:sciworld": 0.9595959595959596, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9466666666666667, + "success_rate.epoch.env.logic": 0.9145299145299145, + "success_rate.epoch.env.math": 0.9739819004524887, + "success_rate.epoch.env.sat": 0.1, + "success_rate.epoch.env.science": 0.8502847843775427, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8704074931460929, + "success_rate.epoch.global": 0.9074903969270166, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9989475388601037, + "tokens_p.mean_in_band": 0.8046875, + "tokens_rate.above_band": 0.9974160206718347, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.002583979328165375 + }, + { + "epoch": 2.3348956114188324, + "grad_norm": 35.31957113571333, + "learning_rate": 3.50642264126822e-07, + "loss": 0.1807, + "step": 10960, + "success_rate.epoch.env.abd": 0.9854014598540146, + "success_rate.epoch.env.agentgym:alfworld": 0.8888888888888888, + "success_rate.epoch.env.agentgym:sciworld": 0.9595959595959596, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9466666666666667, + "success_rate.epoch.env.logic": 0.9148936170212766, + "success_rate.epoch.env.math": 0.9740698985343855, + "success_rate.epoch.env.sat": 0.1, + "success_rate.epoch.env.science": 0.8499594484995945, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8705452434719292, + "success_rate.epoch.global": 0.9074664964901085, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9375, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9968519656019657, + "tokens_p.mean_in_band": 0.6315104166666666, + "tokens_rate.above_band": 0.9926829268292683, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.007317073170731708 + }, + { + "epoch": 2.335960801022582, + "grad_norm": 48.7135950432769, + "learning_rate": 3.506090673870472e-07, + "loss": 0.2937, + "step": 10965, + "success_rate.epoch.env.abd": 0.9855072463768116, + "success_rate.epoch.env.agentgym:alfworld": 0.8888888888888888, + "success_rate.epoch.env.agentgym:sciworld": 0.96, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9466666666666667, + "success_rate.epoch.env.logic": 0.9152542372881356, + "success_rate.epoch.env.math": 0.9741282339707537, + "success_rate.epoch.env.sat": 0.1, + "success_rate.epoch.env.science": 0.8504446240905417, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8706737851283848, + "success_rate.epoch.global": 0.9077608142493638, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9992917847025495, + "tokens_p.mean_in_band": 0.8190104166666666, + "tokens_rate.above_band": 0.9915730337078652, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.008426966292134831 + }, + { + "epoch": 2.3370259906263313, + "grad_norm": 50.60756611662624, + "learning_rate": 3.505758774913167e-07, + "loss": 0.2169, + "step": 10970, + "success_rate.epoch.env.abd": 0.9857142857142858, + "success_rate.epoch.env.agentgym:alfworld": 0.8888888888888888, + "success_rate.epoch.env.agentgym:sciworld": 0.96, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9466666666666667, + "success_rate.epoch.env.logic": 0.9135021097046413, + "success_rate.epoch.env.math": 0.9741863075196409, + "success_rate.epoch.env.sat": 0.1, + "success_rate.epoch.env.science": 0.8509266720386784, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8705824245148396, + "success_rate.epoch.global": 0.9077362079898541, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9992954911433173, + "tokens_p.mean_in_band": 0.48408564814814814, + "tokens_rate.above_band": 0.9583333333333334, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.041666666666666664 + }, + { + "epoch": 2.338091180230081, + "grad_norm": 176.03274686441375, + "learning_rate": 3.5054269446381877e-07, + "loss": 0.3998, + "step": 10975, + "success_rate.epoch.env.abd": 0.9857142857142858, + "success_rate.epoch.env.agentgym:alfworld": 0.8888888888888888, + "success_rate.epoch.env.agentgym:sciworld": 0.96, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 1.0, + "success_rate.epoch.env.ded": 0.9470198675496688, + "success_rate.epoch.env.logic": 0.9138655462184874, + "success_rate.epoch.env.math": 0.9731843575418995, + "success_rate.epoch.env.sat": 0.1, + "success_rate.epoch.env.science": 0.8512861736334405, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8705891689706459, + "success_rate.epoch.global": 0.9077117572692794, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.75, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9375, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9978830645161291, + "tokens_p.mean_in_band": 0.5849494934082031, + "tokens_rate.above_band": 0.9951845906902087, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.004815409309791332 + }, + { + "epoch": 2.3391563698338302, + "grad_norm": 110.65938234443695, + "learning_rate": 3.505095183287373e-07, + "loss": 0.328, + "step": 10980, + "success_rate.epoch.env.abd": 0.9857142857142858, + "success_rate.epoch.env.agentgym:alfworld": 0.8888888888888888, + "success_rate.epoch.env.agentgym:sciworld": 0.96, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 0.9473684210526315, + "success_rate.epoch.env.ded": 0.9470198675496688, + "success_rate.epoch.env.logic": 0.9142259414225942, + "success_rate.epoch.env.math": 0.9732739420935412, + "success_rate.epoch.env.sat": 0.1, + "success_rate.epoch.env.science": 0.8509615384615384, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8658158749375985, + "success_rate.epoch.global": 0.9073724007561437, + "success_rate.window.env.babyai": 0.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.6875, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9910201149425287, + "tokens_p.mean_below_band": 7.566995918750763e-10, + "tokens_p.mean_in_band": 0.709228515625, + "tokens_rate.above_band": 0.9666666666666667, + "tokens_rate.below_band": 0.003703703703703704, + "tokens_rate.in_band": 0.02962962962962963 + }, + { + "epoch": 2.34022155943758, + "grad_norm": 98.01721192373007, + "learning_rate": 3.504763491102506e-07, + "loss": 0.2004, + "step": 10985, + "success_rate.epoch.env.abd": 0.9857142857142858, + "success_rate.epoch.env.agentgym:alfworld": 0.8888888888888888, + "success_rate.epoch.env.agentgym:sciworld": 0.96, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 0.9473684210526315, + "success_rate.epoch.env.ded": 0.9470198675496688, + "success_rate.epoch.env.logic": 0.9145833333333333, + "success_rate.epoch.env.math": 0.9733924611973392, + "success_rate.epoch.env.sat": 0.1, + "success_rate.epoch.env.science": 0.8514376996805112, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8659024269588266, + "success_rate.epoch.global": 0.9076633165829145, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9953947368421052, + "tokens_p.mean_in_band": 0.88671875, + "tokens_rate.above_band": 0.979381443298969, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.020618556701030927 + }, + { + "epoch": 2.341286749041329, + "grad_norm": 68.84285969692594, + "learning_rate": 3.504431868325323e-07, + "loss": 0.3017, + "step": 10990, + "success_rate.epoch.env.abd": 0.9857142857142858, + "success_rate.epoch.env.agentgym:alfworld": 0.8795180722891566, + "success_rate.epoch.env.agentgym:sciworld": 0.96, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 0.9473684210526315, + "success_rate.epoch.env.ded": 0.9473684210526315, + "success_rate.epoch.env.logic": 0.9149377593360996, + "success_rate.epoch.env.math": 0.9733924611973392, + "success_rate.epoch.env.sat": 0.1, + "success_rate.epoch.env.science": 0.8520286396181385, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8651681635809744, + "success_rate.epoch.global": 0.9076393237319975, + "success_rate.window.env.agentgym:alfworld": 0.5, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9986053719008264, + "tokens_p.mean_in_band": 0.6766183035714286, + "tokens_rate.above_band": 0.9942481511914544, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.005751848808545604 + }, + { + "epoch": 2.342351938645079, + "grad_norm": 341.44705935430034, + "learning_rate": 3.5041003151975067e-07, + "loss": 0.1903, + "step": 10995, + "success_rate.epoch.env.abd": 0.9858156028368794, + "success_rate.epoch.env.agentgym:alfworld": 0.8809523809523809, + "success_rate.epoch.env.agentgym:sciworld": 0.9603960396039604, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 0.9473684210526315, + "success_rate.epoch.env.ded": 0.9473684210526315, + "success_rate.epoch.env.logic": 0.9151138716356108, + "success_rate.epoch.env.math": 0.9734806629834254, + "success_rate.epoch.env.sat": 0.1, + "success_rate.epoch.env.science": 0.8523809523809524, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8653998265117189, + "success_rate.epoch.global": 0.9079275905118602, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9985576923076923, + "tokens_p.mean_in_band": 0.81640625, + "tokens_rate.above_band": 0.9908536585365854, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.009146341463414634 + }, + { + "epoch": 2.343417128248828, + "grad_norm": 31.547450500014687, + "learning_rate": 3.5037688319606924e-07, + "loss": 0.2906, + "step": 11000, + "success_rate.epoch.env.abd": 0.986013986013986, + "success_rate.epoch.env.agentgym:alfworld": 0.8823529411764706, + "success_rate.epoch.env.agentgym:sciworld": 0.9607843137254902, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 0.9473684210526315, + "success_rate.epoch.env.ded": 0.9473684210526315, + "success_rate.epoch.env.logic": 0.9154639175257732, + "success_rate.epoch.env.math": 0.9735099337748344, + "success_rate.epoch.env.sat": 0.1, + "success_rate.epoch.env.science": 0.8527315914489311, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8656468422637439, + "success_rate.epoch.global": 0.9082140634723086, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9973268839103869, + "tokens_p.mean_in_band": 0.53125, + "tokens_rate.above_band": 0.9979674796747967, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0020325203252032522 + }, + { + "epoch": 2.3444823178525778, + "grad_norm": 256.4405931443873, + "learning_rate": 3.503437418856461e-07, + "loss": 0.168, + "step": 11005, + "success_rate.epoch.env.abd": 0.9861111111111112, + "success_rate.epoch.env.agentgym:alfworld": 0.8823529411764706, + "success_rate.epoch.env.agentgym:sciworld": 0.9607843137254902, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 0.9473684210526315, + "success_rate.epoch.env.ded": 0.9477124183006536, + "success_rate.epoch.env.logic": 0.9156378600823045, + "success_rate.epoch.env.math": 0.973568281938326, + "success_rate.epoch.env.sat": 0.1, + "success_rate.epoch.env.science": 0.8525236593059937, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8656891587112198, + "success_rate.epoch.global": 0.9081885856079405, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.96, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9964511041009464, + "tokens_p.mean_in_band": 0.6907552083333334, + "tokens_rate.above_band": 0.990625, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.009375 + }, + { + "epoch": 2.3455475074563275, + "grad_norm": 144.97709738464556, + "learning_rate": 3.503106076126346e-07, + "loss": 0.3379, + "step": 11010, + "success_rate.epoch.env.abd": 0.9861111111111112, + "success_rate.epoch.env.agentgym:alfworld": 0.8823529411764706, + "success_rate.epoch.env.agentgym:sciworld": 0.9611650485436893, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 0.9473684210526315, + "success_rate.epoch.env.ded": 0.9483870967741935, + "success_rate.epoch.env.logic": 0.9158110882956879, + "success_rate.epoch.env.math": 0.9725877192982456, + "success_rate.epoch.env.sat": 0.1, + "success_rate.epoch.env.science": 0.8519685039370078, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8656612426654067, + "success_rate.epoch.global": 0.9078540507111935, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.75, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.85, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9997905027932961, + "tokens_p.mean_in_band": 0.5681423611111112, + "tokens_rate.above_band": 0.9900442477876106, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.00995575221238938 + }, + { + "epoch": 2.3466126970600767, + "grad_norm": 146.60794017629738, + "learning_rate": 3.5027748040118255e-07, + "loss": 0.2192, + "step": 11015, + "success_rate.epoch.env.abd": 0.9861111111111112, + "success_rate.epoch.env.agentgym:alfworld": 0.8735632183908046, + "success_rate.epoch.env.agentgym:sciworld": 0.9611650485436893, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 0.9473684210526315, + "success_rate.epoch.env.ded": 0.9487179487179487, + "success_rate.epoch.env.logic": 0.9158110882956879, + "success_rate.epoch.env.math": 0.9726177437020811, + "success_rate.epoch.env.sat": 0.1, + "success_rate.epoch.env.science": 0.8526645768025078, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8649582632497179, + "success_rate.epoch.global": 0.907829839704069, + "success_rate.window.env.agentgym:alfworld": 0.5, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.998989898989899, + "tokens_p.mean_in_band": 0.8017578125, + "tokens_rate.above_band": 0.9939759036144579, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.006024096385542169 + }, + { + "epoch": 2.347677886663826, + "grad_norm": 120.57955067475064, + "learning_rate": 3.502443602754329e-07, + "loss": 0.2976, + "step": 11020, + "success_rate.epoch.env.abd": 0.9861111111111112, + "success_rate.epoch.env.agentgym:alfworld": 0.8735632183908046, + "success_rate.epoch.env.agentgym:sciworld": 0.9615384615384616, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 0.9473684210526315, + "success_rate.epoch.env.ded": 0.9490445859872612, + "success_rate.epoch.env.logic": 0.9144602851323829, + "success_rate.epoch.env.math": 0.972707423580786, + "success_rate.epoch.env.sat": 0.1, + "success_rate.epoch.env.science": 0.8527799530148786, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8649177453580684, + "success_rate.epoch.global": 0.9078057775046097, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.75, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9983258928571429, + "tokens_p.mean_in_band": 0.630859375, + "tokens_rate.above_band": 0.9767441860465116, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.023255813953488372 + }, + { + "epoch": 2.3487430762675756, + "grad_norm": 264.0727461488841, + "learning_rate": 3.502112472595234e-07, + "loss": 0.3691, + "step": 11025, + "success_rate.epoch.env.abd": 0.9861111111111112, + "success_rate.epoch.env.agentgym:alfworld": 0.8735632183908046, + "success_rate.epoch.env.agentgym:sciworld": 0.9615384615384616, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 0.9473684210526315, + "success_rate.epoch.env.ded": 0.9493670886075949, + "success_rate.epoch.env.logic": 0.9148073022312373, + "success_rate.epoch.env.math": 0.9728260869565217, + "success_rate.epoch.env.sat": 0.1, + "success_rate.epoch.env.science": 0.85234375, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8649497435471634, + "success_rate.epoch.global": 0.9077818627450981, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9958479020979021, + "tokens_p.mean_in_band": 0.6197916666666666, + "tokens_rate.above_band": 0.9694915254237289, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.030508474576271188 + }, + { + "epoch": 2.3498082658713253, + "grad_norm": 32.628655459982646, + "learning_rate": 3.501781413775866e-07, + "loss": 0.2459, + "step": 11030, + "success_rate.epoch.env.abd": 0.9862068965517241, + "success_rate.epoch.env.agentgym:alfworld": 0.8636363636363636, + "success_rate.epoch.env.agentgym:sciworld": 0.9615384615384616, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 0.9473684210526315, + "success_rate.epoch.env.ded": 0.9493670886075949, + "success_rate.epoch.env.logic": 0.9151515151515152, + "success_rate.epoch.env.math": 0.972885032537961, + "success_rate.epoch.env.sat": 0.1, + "success_rate.epoch.env.science": 0.8520249221183801, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8640636763931879, + "success_rate.epoch.global": 0.907452657299939, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 0.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.75, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9980177238805971, + "tokens_p.mean_in_band": 0.5654296875, + "tokens_rate.above_band": 0.9852941176470589, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.014705882352941176 + }, + { + "epoch": 2.3508734554750745, + "grad_norm": 76.97015092120746, + "learning_rate": 3.501450426537497e-07, + "loss": 0.1713, + "step": 11035, + "success_rate.epoch.env.abd": 0.9863945578231292, + "success_rate.epoch.env.agentgym:alfworld": 0.8636363636363636, + "success_rate.epoch.env.agentgym:sciworld": 0.9619047619047619, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 0.9473684210526315, + "success_rate.epoch.env.ded": 0.9503105590062112, + "success_rate.epoch.env.logic": 0.9153225806451613, + "success_rate.epoch.env.math": 0.9729437229437229, + "success_rate.epoch.env.sat": 0.1, + "success_rate.epoch.env.science": 0.8521400778210116, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8642311621784935, + "success_rate.epoch.global": 0.9077344701583435, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9988290398126464, + "tokens_p.mean_in_band": 0.85546875, + "tokens_rate.above_band": 0.9984411535463756, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.001558846453624318 + }, + { + "epoch": 2.351938645078824, + "grad_norm": 247.05306713436704, + "learning_rate": 3.5011195111213497e-07, + "loss": 0.3353, + "step": 11040, + "success_rate.epoch.env.abd": 0.9863945578231292, + "success_rate.epoch.env.agentgym:alfworld": 0.8636363636363636, + "success_rate.epoch.env.agentgym:sciworld": 0.9619047619047619, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 0.9473684210526315, + "success_rate.epoch.env.ded": 0.9503105590062112, + "success_rate.epoch.env.logic": 0.9154929577464789, + "success_rate.epoch.env.math": 0.9730312837108953, + "success_rate.epoch.env.sat": 0.1, + "success_rate.epoch.env.science": 0.8520526723470179, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8642466651234476, + "success_rate.epoch.global": 0.9077109896782027, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env_macro_mean": 0.9444444444444445, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9956487341772152, + "tokens_p.mean_in_band": 0.55625, + "tokens_rate.above_band": 0.8876404494382022, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.11235955056179775 + }, + { + "epoch": 2.3530038346825735, + "grad_norm": 66.73765785891717, + "learning_rate": 3.5007886677685916e-07, + "loss": 0.1328, + "step": 11045, + "success_rate.epoch.env.abd": 0.9863945578231292, + "success_rate.epoch.env.agentgym:alfworld": 0.8636363636363636, + "success_rate.epoch.env.agentgym:sciworld": 0.9619047619047619, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 0.95, + "success_rate.epoch.env.ded": 0.9503105590062112, + "success_rate.epoch.env.logic": 0.914, + "success_rate.epoch.env.math": 0.9731471535982814, + "success_rate.epoch.env.sat": 0.1, + "success_rate.epoch.env.science": 0.851508120649652, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8643112050680757, + "success_rate.epoch.global": 0.9073849878934624, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.logic": 0.6666666666666666, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.7916666666666666, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9958053691275168, + "tokens_p.mean_in_band": 0.6730324074074074, + "tokens_rate.above_band": 0.9430379746835443, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.056962025316455694 + }, + { + "epoch": 2.354069024286323, + "grad_norm": 168.69128030927538, + "learning_rate": 3.50045789672034e-07, + "loss": 0.1824, + "step": 11050, + "success_rate.epoch.env.abd": 0.9863945578231292, + "success_rate.epoch.env.agentgym:alfworld": 0.8636363636363636, + "success_rate.epoch.env.agentgym:sciworld": 0.9619047619047619, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 0.95, + "success_rate.epoch.env.ded": 0.9503105590062112, + "success_rate.epoch.env.logic": 0.9146825396825397, + "success_rate.epoch.env.math": 0.973175965665236, + "success_rate.epoch.env.sat": 0.09523809523809523, + "success_rate.epoch.env.science": 0.851195065535852, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8639145134202384, + "success_rate.epoch.global": 0.9070609535304768, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.6875, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9971590909090909, + "tokens_p.mean_in_band": 0.5193359375, + "tokens_rate.above_band": 0.8918918918918919, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.10810810810810811 + }, + { + "epoch": 2.3551342138900724, + "grad_norm": 152.40827715421915, + "learning_rate": 3.5001271982176575e-07, + "loss": 0.1787, + "step": 11055, + "success_rate.epoch.env.abd": 0.9863945578231292, + "success_rate.epoch.env.agentgym:alfworld": 0.8636363636363636, + "success_rate.epoch.env.agentgym:sciworld": 0.9619047619047619, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 0.95, + "success_rate.epoch.env.ded": 0.9503105590062112, + "success_rate.epoch.env.logic": 0.9151873767258383, + "success_rate.epoch.env.math": 0.9732620320855615, + "success_rate.epoch.env.sat": 0.09523809523809523, + "success_rate.epoch.env.science": 0.8508839354342813, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8639399473622432, + "success_rate.epoch.global": 0.9070397111913358, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9971217105263158, + "tokens_p.mean_in_band": 0.7053571428571429, + "tokens_rate.above_band": 0.9156626506024096, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.08433734939759036 + }, + { + "epoch": 2.356199403493822, + "grad_norm": 59.24375211064543, + "learning_rate": 3.499796572501555e-07, + "loss": 0.3147, + "step": 11060, + "success_rate.epoch.env.abd": 0.9863945578231292, + "success_rate.epoch.env.agentgym:alfworld": 0.8636363636363636, + "success_rate.epoch.env.agentgym:sciworld": 0.9619047619047619, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 0.95, + "success_rate.epoch.env.ded": 0.950920245398773, + "success_rate.epoch.env.logic": 0.9153543307086615, + "success_rate.epoch.env.math": 0.9733475479744137, + "success_rate.epoch.env.sat": 0.09523809523809523, + "success_rate.epoch.env.science": 0.8505747126436781, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8639902140416645, + "success_rate.epoch.global": 0.9070185962807439, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9375, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.994299674267101, + "tokens_p.mean_in_band": 0.61875, + "tokens_rate.above_band": 0.9839743589743589, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.016025641025641024 + }, + { + "epoch": 2.3572645930975713, + "grad_norm": 103.70876244723827, + "learning_rate": 3.499466019812991e-07, + "loss": 0.2315, + "step": 11065, + "success_rate.epoch.env.abd": 0.9865771812080537, + "success_rate.epoch.env.agentgym:alfworld": 0.8651685393258427, + "success_rate.epoch.env.agentgym:sciworld": 0.9619047619047619, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 0.95, + "success_rate.epoch.env.ded": 0.950920245398773, + "success_rate.epoch.env.logic": 0.916015625, + "success_rate.epoch.env.math": 0.9733759318423855, + "success_rate.epoch.env.sat": 0.09523809523809523, + "success_rate.epoch.env.science": 0.8508033664881408, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8642295895942259, + "success_rate.epoch.global": 0.9072966507177034, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9996482176360225, + "tokens_p.mean_in_band": 0.8671875, + "tokens_rate.above_band": 0.9962616822429906, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.003738317757009346 + }, + { + "epoch": 2.358329782701321, + "grad_norm": 77.01239787629495, + "learning_rate": 3.4991355403928694e-07, + "loss": 0.3863, + "step": 11070, + "success_rate.epoch.env.abd": 0.9865771812080537, + "success_rate.epoch.env.agentgym:alfworld": 0.8666666666666667, + "success_rate.epoch.env.agentgym:sciworld": 0.9622641509433962, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 0.95, + "success_rate.epoch.env.ded": 0.950920245398773, + "success_rate.epoch.env.logic": 0.9161793372319688, + "success_rate.epoch.env.math": 0.9734607218683652, + "success_rate.epoch.env.sat": 0.09523809523809523, + "success_rate.epoch.env.science": 0.851258581235698, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8644624289928591, + "success_rate.epoch.global": 0.9075730471079309, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9989536830357143, + "tokens_p.mean_in_band": 0.7377232142857143, + "tokens_rate.above_band": 0.9846153846153847, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.015384615384615385 + }, + { + "epoch": 2.3593949723050702, + "grad_norm": 12.668494255059269, + "learning_rate": 3.4988051344820406e-07, + "loss": 0.2475, + "step": 11075, + "success_rate.epoch.env.abd": 0.9866666666666667, + "success_rate.epoch.env.agentgym:alfworld": 0.8666666666666667, + "success_rate.epoch.env.agentgym:sciworld": 0.9622641509433962, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 0.95, + "success_rate.epoch.env.ded": 0.9512195121951219, + "success_rate.epoch.env.logic": 0.9161793372319688, + "success_rate.epoch.env.math": 0.9725158562367865, + "success_rate.epoch.env.sat": 0.09090909090909091, + "success_rate.epoch.env.science": 0.8515981735159818, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8640491994087377, + "success_rate.epoch.global": 0.9072532699167658, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 0.75, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.75, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9972246891651865, + "tokens_p.mean_in_band": 0.6430921052631579, + "tokens_rate.above_band": 0.9673539518900344, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03264604810996564 + }, + { + "epoch": 2.36046016190882, + "grad_norm": 56.933228687245524, + "learning_rate": 3.4984748023213027e-07, + "loss": 0.218, + "step": 11080, + "success_rate.epoch.env.abd": 0.9867549668874173, + "success_rate.epoch.env.agentgym:alfworld": 0.8666666666666667, + "success_rate.epoch.env.agentgym:sciworld": 0.9622641509433962, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 0.95, + "success_rate.epoch.env.ded": 0.9512195121951219, + "success_rate.epoch.env.logic": 0.9161793372319688, + "success_rate.epoch.env.math": 0.9726027397260274, + "success_rate.epoch.env.sat": 0.08695652173913043, + "success_rate.epoch.env.science": 0.8521607278241091, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8637569420312975, + "success_rate.epoch.global": 0.9072317723770006, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.75, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9958881578947368, + "tokens_p.mean_in_band": 0.5427631578947368, + "tokens_rate.above_band": 0.9090909090909091, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.09090909090909091 + }, + { + "epoch": 2.361525351512569, + "grad_norm": 727.1168428241937, + "learning_rate": 3.4981445441513994e-07, + "loss": 0.2851, + "step": 11085, + "success_rate.epoch.env.abd": 0.9868421052631579, + "success_rate.epoch.env.agentgym:alfworld": 0.8666666666666667, + "success_rate.epoch.env.agentgym:sciworld": 0.9622641509433962, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 0.95, + "success_rate.epoch.env.ded": 0.9512195121951219, + "success_rate.epoch.env.logic": 0.916504854368932, + "success_rate.epoch.env.math": 0.9727463312368972, + "success_rate.epoch.env.sat": 0.08695652173913043, + "success_rate.epoch.env.science": 0.8523845571536715, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8638278580634008, + "success_rate.epoch.global": 0.9075059101654847, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9973714953271028, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 2.362590541116319, + "grad_norm": 80.2901158722856, + "learning_rate": 3.4978143602130205e-07, + "loss": 0.2347, + "step": 11090, + "success_rate.epoch.env.abd": 0.9868421052631579, + "success_rate.epoch.env.agentgym:alfworld": 0.8666666666666667, + "success_rate.epoch.env.agentgym:sciworld": 0.9626168224299065, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 0.95, + "success_rate.epoch.env.ded": 0.9512195121951219, + "success_rate.epoch.env.logic": 0.9147286821705426, + "success_rate.epoch.env.math": 0.9728317659352143, + "success_rate.epoch.env.sat": 0.08695652173913043, + "success_rate.epoch.env.science": 0.8529411764705882, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8637568174546147, + "success_rate.epoch.global": 0.9074837949322333, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.75, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9958462132921174, + "tokens_p.mean_in_band": 0.671875, + "tokens_rate.above_band": 0.9803030303030303, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.019696969696969695 + }, + { + "epoch": 2.363655730720068, + "grad_norm": 34.87870520427987, + "learning_rate": 3.4974842507468016e-07, + "loss": 0.2887, + "step": 11095, + "success_rate.epoch.env.abd": 0.9868421052631579, + "success_rate.epoch.env.agentgym:alfworld": 0.8681318681318682, + "success_rate.epoch.env.agentgym:sciworld": 0.9626168224299065, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 0.95, + "success_rate.epoch.env.ded": 0.9512195121951219, + "success_rate.epoch.env.logic": 0.9147286821705426, + "success_rate.epoch.env.math": 0.972972972972973, + "success_rate.epoch.env.sat": 0.08695652173913043, + "success_rate.epoch.env.science": 0.8525206922498119, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8638646287529952, + "success_rate.epoch.global": 0.9074618096357226, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9980633802816902, + "tokens_p.mean_in_band": 0.5052083333333334, + "tokens_rate.above_band": 0.9957924263674615, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.004207573632538569 + }, + { + "epoch": 2.3647209203238178, + "grad_norm": 143.8052000119641, + "learning_rate": 3.4971542159933235e-07, + "loss": 0.343, + "step": 11100, + "success_rate.epoch.env.abd": 0.9868421052631579, + "success_rate.epoch.env.agentgym:alfworld": 0.8681318681318682, + "success_rate.epoch.env.agentgym:sciworld": 0.9629629629629629, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 0.95, + "success_rate.epoch.env.ded": 0.9512195121951219, + "success_rate.epoch.env.logic": 0.9153846153846154, + "success_rate.epoch.env.math": 0.9730848861283644, + "success_rate.epoch.env.sat": 0.08695652173913043, + "success_rate.epoch.env.science": 0.8526315789473684, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8639759808984567, + "success_rate.epoch.global": 0.9077328646748682, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9988207547169812, + "tokens_p.mean_in_band": 0.873046875, + "tokens_rate.above_band": 0.9814814814814815, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.018518518518518517 + }, + { + "epoch": 2.365786109927567, + "grad_norm": 66.84384196733302, + "learning_rate": 3.496824256193114e-07, + "loss": 0.1747, + "step": 11105, + "success_rate.epoch.env.abd": 0.9869281045751634, + "success_rate.epoch.env.agentgym:alfworld": 0.8681318681318682, + "success_rate.epoch.env.agentgym:sciworld": 0.9629629629629629, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 0.9523809523809523, + "success_rate.epoch.env.ded": 0.9512195121951219, + "success_rate.epoch.env.logic": 0.9153846153846154, + "success_rate.epoch.env.math": 0.9732510288065843, + "success_rate.epoch.env.sat": 0.08695652173913043, + "success_rate.epoch.env.science": 0.8528528528528528, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8642354689236079, + "success_rate.epoch.global": 0.9080023364485982, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9984707446808511, + "tokens_p.mean_in_band": 0.6671875, + "tokens_rate.above_band": 0.9791666666666666, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.020833333333333332 + }, + { + "epoch": 2.3668512995313167, + "grad_norm": 109.9298418509935, + "learning_rate": 3.4964943715866455e-07, + "loss": 0.5967, + "step": 11110, + "success_rate.epoch.env.abd": 0.9869281045751634, + "success_rate.epoch.env.agentgym:alfworld": 0.8681318681318682, + "success_rate.epoch.env.agentgym:sciworld": 0.9629629629629629, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 0.9523809523809523, + "success_rate.epoch.env.ded": 0.9512195121951219, + "success_rate.epoch.env.logic": 0.9157088122605364, + "success_rate.epoch.env.math": 0.9733879222108496, + "success_rate.epoch.env.sat": 0.08695652173913043, + "success_rate.epoch.env.science": 0.851685393258427, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8641712535314042, + "success_rate.epoch.global": 0.9076878276062901, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.3333333333333333, + "success_rate.window.env_macro_mean": 0.7777777777777777, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9914568345323741, + "tokens_p.mean_in_band": 0.6556919642857143, + "tokens_rate.above_band": 0.952054794520548, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.04794520547945205 + }, + { + "epoch": 2.367916489135066, + "grad_norm": 49.83853824283283, + "learning_rate": 3.496164562414335e-07, + "loss": 0.2379, + "step": 11115, + "success_rate.epoch.env.abd": 0.9869281045751634, + "success_rate.epoch.env.agentgym:alfworld": 0.8695652173913043, + "success_rate.epoch.env.agentgym:sciworld": 0.9629629629629629, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 0.9523809523809523, + "success_rate.epoch.env.ded": 0.9512195121951219, + "success_rate.epoch.env.logic": 0.916030534351145, + "success_rate.epoch.env.math": 0.9734422880490297, + "success_rate.epoch.env.sat": 0.08695652173913043, + "success_rate.epoch.env.science": 0.8522388059701492, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8643860580677631, + "success_rate.epoch.global": 0.9079558652729385, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9982918006430869, + "tokens_p.mean_in_band": 0.8638392857142857, + "tokens_rate.above_band": 0.9779874213836478, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0220125786163522 + }, + { + "epoch": 2.3689816787388156, + "grad_norm": 127.47305892717033, + "learning_rate": 3.4958348289165445e-07, + "loss": 0.2766, + "step": 11120, + "success_rate.epoch.env.abd": 0.9869281045751634, + "success_rate.epoch.env.agentgym:alfworld": 0.8695652173913043, + "success_rate.epoch.env.agentgym:sciworld": 0.9629629629629629, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 0.9523809523809523, + "success_rate.epoch.env.ded": 0.9512195121951219, + "success_rate.epoch.env.logic": 0.9165085388994307, + "success_rate.epoch.env.math": 0.973630831643002, + "success_rate.epoch.env.sat": 0.08695652173913043, + "success_rate.epoch.env.science": 0.8522388059701492, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8644466533534229, + "success_rate.epoch.global": 0.9082223508975101, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9974563953488372, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 2.370046868342565, + "grad_norm": 233.27903982768112, + "learning_rate": 3.4955051713335827e-07, + "loss": 0.2593, + "step": 11125, + "success_rate.epoch.env.abd": 0.9869281045751634, + "success_rate.epoch.env.agentgym:alfworld": 0.8695652173913043, + "success_rate.epoch.env.agentgym:sciworld": 0.9636363636363636, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 0.9523809523809523, + "success_rate.epoch.env.ded": 0.9515151515151515, + "success_rate.epoch.env.logic": 0.9166666666666666, + "success_rate.epoch.env.math": 0.9736575481256332, + "success_rate.epoch.env.sat": 0.08695652173913043, + "success_rate.epoch.env.science": 0.8513011152416357, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8644663073093124, + "success_rate.epoch.global": 0.9079099307159353, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6, + "success_rate.window.env_macro_mean": 0.9199999999999999, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9977851605758582, + "tokens_p.mean_in_band": 0.5736177884615384, + "tokens_rate.above_band": 0.9858078602620087, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.014192139737991267 + }, + { + "epoch": 2.3711120579463145, + "grad_norm": 76.97890279663311, + "learning_rate": 3.4951755899057015e-07, + "loss": 0.1438, + "step": 11130, + "success_rate.epoch.env.abd": 0.9869281045751634, + "success_rate.epoch.env.agentgym:alfworld": 0.8695652173913043, + "success_rate.epoch.env.agentgym:sciworld": 0.9636363636363636, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 0.9523809523809523, + "success_rate.epoch.env.ded": 0.9515151515151515, + "success_rate.epoch.env.logic": 0.9166666666666666, + "success_rate.epoch.env.math": 0.9737638748738647, + "success_rate.epoch.env.sat": 0.08695652173913043, + "success_rate.epoch.env.science": 0.8519615099925981, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8645360092637847, + "success_rate.epoch.global": 0.9081750143926309, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9943181818181818, + "tokens_p.mean_in_band": 0.84375, + "tokens_rate.above_band": 0.9649122807017544, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03508771929824561 + }, + { + "epoch": 2.3721772475500638, + "grad_norm": 56.06022830971406, + "learning_rate": 3.4948460848730966e-07, + "loss": 0.1753, + "step": 11135, + "success_rate.epoch.env.abd": 0.9869281045751634, + "success_rate.epoch.env.agentgym:alfworld": 0.8695652173913043, + "success_rate.epoch.env.agentgym:sciworld": 0.9636363636363636, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 0.9523809523809523, + "success_rate.epoch.env.ded": 0.9518072289156626, + "success_rate.epoch.env.logic": 0.9168241965973535, + "success_rate.epoch.env.math": 0.9738693467336683, + "success_rate.epoch.env.sat": 0.08695652173913043, + "success_rate.epoch.env.science": 0.8523985239852399, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8646261995532067, + "success_rate.epoch.global": 0.9084385763490241, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9999078171091446, + "tokens_p.mean_in_band": 0.76953125, + "tokens_rate.above_band": 0.9970588235294118, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0029411764705882353 + }, + { + "epoch": 2.3732424371538134, + "grad_norm": 60.27607452460975, + "learning_rate": 3.49451665647591e-07, + "loss": 0.2476, + "step": 11140, + "success_rate.epoch.env.abd": 0.9869281045751634, + "success_rate.epoch.env.agentgym:alfworld": 0.8709677419354839, + "success_rate.epoch.env.agentgym:sciworld": 0.954954954954955, + "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, + "success_rate.epoch.env.babyai": 0.9523809523809523, + "success_rate.epoch.env.ded": 0.9518072289156626, + "success_rate.epoch.env.logic": 0.9169811320754717, + "success_rate.epoch.env.math": 0.973973973973974, + "success_rate.epoch.env.sat": 0.08333333333333333, + "success_rate.epoch.env.science": 0.8526160648489315, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8636786569203967, + "success_rate.epoch.global": 0.9081282198053806, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 0.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.6666666666666666, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9990190582959642, + "tokens_p.mean_in_band": 0.6414473684210527, + "tokens_rate.above_band": 0.9591397849462365, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.04086021505376344 + }, + { + "epoch": 2.3743076267575627, + "grad_norm": 692.8649133006721, + "learning_rate": 3.4941873049542254e-07, + "loss": 0.2884, + "step": 11145, + "success_rate.epoch.env.abd": 0.987012987012987, + "success_rate.epoch.env.agentgym:alfworld": 0.8709677419354839, + "success_rate.epoch.env.agentgym:sciworld": 0.954954954954955, + "success_rate.epoch.env.agentgym:textcraft": 0.9583333333333334, + "success_rate.epoch.env.babyai": 0.9523809523809523, + "success_rate.epoch.env.ded": 0.9518072289156626, + "success_rate.epoch.env.logic": 0.9171374764595104, + "success_rate.epoch.env.math": 0.9740777666999003, + "success_rate.epoch.env.sat": 0.08333333333333333, + "success_rate.epoch.env.science": 0.8522058823529411, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8638374233980962, + "success_rate.epoch.global": 0.9081050228310502, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9966216216216216, + "tokens_p.mean_in_band": 0.62890625, + "tokens_rate.above_band": 0.9847908745247148, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.015209125475285171 + }, + { + "epoch": 2.3753728163613124, + "grad_norm": 36.16419467614971, + "learning_rate": 3.4938580305480727e-07, + "loss": 0.0947, + "step": 11150, + "success_rate.epoch.env.abd": 0.987012987012987, + "success_rate.epoch.env.agentgym:alfworld": 0.8631578947368421, + "success_rate.epoch.env.agentgym:sciworld": 0.954954954954955, + "success_rate.epoch.env.agentgym:textcraft": 0.9583333333333334, + "success_rate.epoch.env.babyai": 0.9523809523809523, + "success_rate.epoch.env.ded": 0.9518072289156626, + "success_rate.epoch.env.logic": 0.9171374764595104, + "success_rate.epoch.env.math": 0.974155069582505, + "success_rate.epoch.env.sat": 0.08333333333333333, + "success_rate.epoch.env.science": 0.8527472527472527, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.863183680314303, + "success_rate.epoch.global": 0.9080819578827547, + "success_rate.window.env.agentgym:alfworld": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8333333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9975293803418803, + "tokens_p.mean_in_band": 0.4375, + "tokens_rate.above_band": 0.9957446808510638, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.00425531914893617 + }, + { + "epoch": 2.3764380059650616, + "grad_norm": 67.95497167009935, + "learning_rate": 3.4935288334974246e-07, + "loss": 0.2231, + "step": 11155, + "success_rate.epoch.env.abd": 0.987012987012987, + "success_rate.epoch.env.agentgym:alfworld": 0.8631578947368421, + "success_rate.epoch.env.agentgym:sciworld": 0.954954954954955, + "success_rate.epoch.env.agentgym:textcraft": 0.9583333333333334, + "success_rate.epoch.env.babyai": 0.9523809523809523, + "success_rate.epoch.env.ded": 0.9518072289156626, + "success_rate.epoch.env.logic": 0.9177570093457944, + "success_rate.epoch.env.math": 0.9742063492063492, + "success_rate.epoch.env.sat": 0.08333333333333333, + "success_rate.epoch.env.science": 0.8517165814463111, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8631509658787746, + "success_rate.epoch.global": 0.9077752553916004, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.8333333333333334, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9961890243902439, + "tokens_p.mean_in_band": 0.64453125, + "tokens_rate.above_band": 0.917910447761194, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.08208955223880597 + }, + { + "epoch": 2.3775031955688113, + "grad_norm": 1052.336631436009, + "learning_rate": 3.493199714042198e-07, + "loss": 0.3742, + "step": 11160, + "success_rate.epoch.env.abd": 0.9870967741935484, + "success_rate.epoch.env.agentgym:alfworld": 0.8645833333333334, + "success_rate.epoch.env.agentgym:sciworld": 0.9553571428571429, + "success_rate.epoch.env.agentgym:textcraft": 0.9583333333333334, + "success_rate.epoch.env.babyai": 0.9523809523809523, + "success_rate.epoch.env.ded": 0.9518072289156626, + "success_rate.epoch.env.logic": 0.9177570093457944, + "success_rate.epoch.env.math": 0.9742574257425742, + "success_rate.epoch.env.sat": 0.08333333333333333, + "success_rate.epoch.env.science": 0.8508005822416302, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8632461014252097, + "success_rate.epoch.global": 0.9074702886247877, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6, + "success_rate.window.env_macro_mean": 0.9199999999999999, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.998630275974026, + "tokens_p.mean_in_band": 0.3428955078125, + "tokens_rate.above_band": 0.9871794871794872, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.01282051282051282 + }, + { + "epoch": 2.3785683851725605, + "grad_norm": 46.269544382856104, + "learning_rate": 3.492870672422252e-07, + "loss": 0.2332, + "step": 11165, + "success_rate.epoch.env.abd": 0.9870967741935484, + "success_rate.epoch.env.agentgym:alfworld": 0.8645833333333334, + "success_rate.epoch.env.agentgym:sciworld": 0.9553571428571429, + "success_rate.epoch.env.agentgym:textcraft": 0.96, + "success_rate.epoch.env.babyai": 0.9545454545454546, + "success_rate.epoch.env.ded": 0.9520958083832335, + "success_rate.epoch.env.logic": 0.9182156133828996, + "success_rate.epoch.env.math": 0.9733464955577492, + "success_rate.epoch.env.sat": 0.08333333333333333, + "success_rate.epoch.env.science": 0.850909090909091, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8635893678632534, + "success_rate.epoch.global": 0.90744920993228, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.6666666666666666, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9444444444444443, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9989193925233645, + "tokens_p.mean_in_band": 0.6328125, + "tokens_rate.above_band": 0.9972041006523765, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0027958993476234857 + }, + { + "epoch": 2.37963357477631, + "grad_norm": 71.63228015035224, + "learning_rate": 3.49254170887739e-07, + "loss": 0.3182, + "step": 11170, + "success_rate.epoch.env.abd": 0.9870967741935484, + "success_rate.epoch.env.agentgym:alfworld": 0.8645833333333334, + "success_rate.epoch.env.agentgym:sciworld": 0.9473684210526315, + "success_rate.epoch.env.agentgym:textcraft": 0.96, + "success_rate.epoch.env.babyai": 0.9545454545454546, + "success_rate.epoch.env.ded": 0.9520958083832335, + "success_rate.epoch.env.logic": 0.9183673469387755, + "success_rate.epoch.env.math": 0.9733727810650887, + "success_rate.epoch.env.sat": 0.08333333333333333, + "success_rate.epoch.env.science": 0.8515568428674873, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8629381905193534, + "success_rate.epoch.global": 0.9074282498593135, + "success_rate.window.env.agentgym:sciworld": 0.5, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9964334239130435, + "tokens_p.mean_in_band": 0.670654296875, + "tokens_rate.above_band": 0.9787234042553191, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02127659574468085 + }, + { + "epoch": 2.3806987643800595, + "grad_norm": 327.0478312384573, + "learning_rate": 3.492212823647358e-07, + "loss": 0.2949, + "step": 11175, + "success_rate.epoch.env.abd": 0.9872611464968153, + "success_rate.epoch.env.agentgym:alfworld": 0.865979381443299, + "success_rate.epoch.env.agentgym:sciworld": 0.9473684210526315, + "success_rate.epoch.env.agentgym:textcraft": 0.96, + "success_rate.epoch.env.babyai": 0.9545454545454546, + "success_rate.epoch.env.ded": 0.9523809523809523, + "success_rate.epoch.env.logic": 0.9183673469387755, + "success_rate.epoch.env.math": 0.9724409448818898, + "success_rate.epoch.env.sat": 0.08333333333333333, + "success_rate.epoch.env.science": 0.8512635379061372, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.862994592634481, + "success_rate.epoch.global": 0.9071268237934904, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 0.5, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.85, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9983592487883683, + "tokens_p.mean_in_band": 0.6180245535714286, + "tokens_rate.above_band": 0.9943775100401606, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.005622489959839358 + }, + { + "epoch": 2.381763953983809, + "grad_norm": 21.533846986080754, + "learning_rate": 3.491884016971846e-07, + "loss": 0.2154, + "step": 11180, + "success_rate.epoch.env.abd": 0.9872611464968153, + "success_rate.epoch.env.agentgym:alfworld": 0.8686868686868687, + "success_rate.epoch.env.agentgym:sciworld": 0.9473684210526315, + "success_rate.epoch.env.agentgym:textcraft": 0.96, + "success_rate.epoch.env.babyai": 0.9545454545454546, + "success_rate.epoch.env.ded": 0.9523809523809523, + "success_rate.epoch.env.logic": 0.9191176470588235, + "success_rate.epoch.env.math": 0.9724680432645034, + "success_rate.epoch.env.sat": 0.08333333333333333, + "success_rate.epoch.env.science": 0.8507570295602018, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8632653542163259, + "success_rate.epoch.global": 0.9071068830442082, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9992795389048992, + "tokens_p.mean_in_band": 0.6514369419642857, + "tokens_rate.above_band": 0.9900142653352354, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.009985734664764621 + }, + { + "epoch": 2.382829143587559, + "grad_norm": 235.064921009993, + "learning_rate": 3.4915552890904853e-07, + "loss": 0.296, + "step": 11185, + "success_rate.epoch.env.abd": 0.9872611464968153, + "success_rate.epoch.env.agentgym:alfworld": 0.8686868686868687, + "success_rate.epoch.env.agentgym:sciworld": 0.9473684210526315, + "success_rate.epoch.env.agentgym:textcraft": 0.9615384615384616, + "success_rate.epoch.env.babyai": 0.9545454545454546, + "success_rate.epoch.env.ded": 0.9523809523809523, + "success_rate.epoch.env.logic": 0.9175824175824175, + "success_rate.epoch.env.math": 0.97252208047105, + "success_rate.epoch.env.sat": 0.08333333333333333, + "success_rate.epoch.env.science": 0.8505747126436781, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.863253986248333, + "success_rate.epoch.global": 0.9068080357142857, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.825, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9993580337490829, + "tokens_p.mean_in_band": 0.5082347972972973, + "tokens_rate.above_band": 0.9735714285714285, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02642857142857143 + }, + { + "epoch": 2.383894333191308, + "grad_norm": 132.30094566011968, + "learning_rate": 3.491226640242851e-07, + "loss": 0.48, + "step": 11190, + "success_rate.epoch.env.abd": 0.9872611464968153, + "success_rate.epoch.env.agentgym:alfworld": 0.8613861386138614, + "success_rate.epoch.env.agentgym:sciworld": 0.9473684210526315, + "success_rate.epoch.env.agentgym:textcraft": 0.9615384615384616, + "success_rate.epoch.env.babyai": 0.9545454545454546, + "success_rate.epoch.env.ded": 0.9523809523809523, + "success_rate.epoch.env.logic": 0.9175824175824175, + "success_rate.epoch.env.math": 0.9725759059745348, + "success_rate.epoch.env.sat": 0.08333333333333333, + "success_rate.epoch.env.science": 0.8512160228898427, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8626534776734824, + "success_rate.epoch.global": 0.9067890929326655, + "success_rate.window.env.agentgym:alfworld": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8333333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9982638888888888, + "tokens_p.mean_in_band": 0.66552734375, + "tokens_rate.above_band": 0.9878048780487805, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.012195121951219513 + }, + { + "epoch": 2.3849595227950573, + "grad_norm": 119.03417947625366, + "learning_rate": 3.490898070668459e-07, + "loss": 0.2041, + "step": 11195, + "success_rate.epoch.env.abd": 0.9872611464968153, + "success_rate.epoch.env.agentgym:alfworld": 0.8613861386138614, + "success_rate.epoch.env.agentgym:sciworld": 0.9473684210526315, + "success_rate.epoch.env.agentgym:textcraft": 0.9615384615384616, + "success_rate.epoch.env.babyai": 0.9545454545454546, + "success_rate.epoch.env.ded": 0.9523809523809523, + "success_rate.epoch.env.logic": 0.9177330895795247, + "success_rate.epoch.env.math": 0.9727095516569201, + "success_rate.epoch.env.sat": 0.08333333333333333, + "success_rate.epoch.env.science": 0.8509272467902995, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8626530723625687, + "success_rate.epoch.global": 0.9067702552719201, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9894662921348315, + "tokens_p.mean_in_band": 0.695703125, + "tokens_rate.above_band": 0.898989898989899, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.10101010101010101 + }, + { + "epoch": 2.386024712398807, + "grad_norm": 304.03407850072375, + "learning_rate": 3.4905695806067695e-07, + "loss": 0.2916, + "step": 11200, + "success_rate.epoch.env.abd": 0.9811320754716981, + "success_rate.epoch.env.agentgym:alfworld": 0.8627450980392157, + "success_rate.epoch.env.agentgym:sciworld": 0.9473684210526315, + "success_rate.epoch.env.agentgym:textcraft": 0.9615384615384616, + "success_rate.epoch.env.babyai": 0.9545454545454546, + "success_rate.epoch.env.ded": 0.9523809523809523, + "success_rate.epoch.env.logic": 0.9180327868852459, + "success_rate.epoch.env.math": 0.9727891156462585, + "success_rate.epoch.env.sat": 0.08333333333333333, + "success_rate.epoch.env.science": 0.8504273504273504, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8622084590291458, + "success_rate.epoch.global": 0.9064748201438849, + "success_rate.window.env.abd": 0.5, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.8, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9957748724489796, + "tokens_p.mean_below_band": 4.842877388000488e-07, + "tokens_p.mean_in_band": 0.13498369235436894, + "tokens_rate.above_band": 0.48635235732009924, + "tokens_rate.below_band": 0.0024813895781637717, + "tokens_rate.in_band": 0.511166253101737 + }, + { + "epoch": 2.3870899020025567, + "grad_norm": 237.75061366615947, + "learning_rate": 3.4902411702971835e-07, + "loss": 0.1495, + "step": 11205, + "success_rate.epoch.env.abd": 0.9811320754716981, + "success_rate.epoch.env.agentgym:alfworld": 0.8653846153846154, + "success_rate.epoch.env.agentgym:sciworld": 0.9478260869565217, + "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, + "success_rate.epoch.env.babyai": 0.9545454545454546, + "success_rate.epoch.env.ded": 0.9523809523809523, + "success_rate.epoch.env.logic": 0.9180327868852459, + "success_rate.epoch.env.math": 0.972894482090997, + "success_rate.epoch.env.sat": 0.08333333333333333, + "success_rate.epoch.env.science": 0.8506401137980085, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8626484421645265, + "success_rate.epoch.global": 0.9067328918322296, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9982718894009217, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 2.388155091606306, + "grad_norm": 148.02542978282412, + "learning_rate": 3.4899128399790434e-07, + "loss": 0.2462, + "step": 11210, + "success_rate.epoch.env.abd": 0.9811320754716981, + "success_rate.epoch.env.agentgym:alfworld": 0.8666666666666667, + "success_rate.epoch.env.agentgym:sciworld": 0.9478260869565217, + "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, + "success_rate.epoch.env.babyai": 0.9545454545454546, + "success_rate.epoch.env.ded": 0.9523809523809523, + "success_rate.epoch.env.logic": 0.9183303085299456, + "success_rate.epoch.env.math": 0.9729468599033816, + "success_rate.epoch.env.sat": 0.08333333333333333, + "success_rate.epoch.env.science": 0.8504606661941885, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8627804879041006, + "success_rate.epoch.global": 0.906714364336819, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9973299050632911, + "tokens_p.mean_in_band": 0.6047585227272727, + "tokens_rate.above_band": 0.9663608562691132, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03363914373088685 + }, + { + "epoch": 2.389220281210055, + "grad_norm": 464.4619204739061, + "learning_rate": 3.489584589891635e-07, + "loss": 0.143, + "step": 11215, + "success_rate.epoch.env.abd": 0.9811320754716981, + "success_rate.epoch.env.agentgym:alfworld": 0.8666666666666667, + "success_rate.epoch.env.agentgym:sciworld": 0.9478260869565217, + "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, + "success_rate.epoch.env.babyai": 0.9545454545454546, + "success_rate.epoch.env.ded": 0.9526627218934911, + "success_rate.epoch.env.logic": 0.9187725631768953, + "success_rate.epoch.env.math": 0.972972972972973, + "success_rate.epoch.env.sat": 0.08333333333333333, + "success_rate.epoch.env.science": 0.8509887005649718, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8628966853222699, + "success_rate.epoch.global": 0.9069703622392975, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9923502604166666, + "tokens_p.mean_in_band": 0.86015625, + "tokens_rate.above_band": 0.9746192893401016, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.025380710659898477 + }, + { + "epoch": 2.390285470813805, + "grad_norm": 637.8273901020881, + "learning_rate": 3.489256420274183e-07, + "loss": 0.3191, + "step": 11220, + "success_rate.epoch.env.abd": 0.9811320754716981, + "success_rate.epoch.env.agentgym:alfworld": 0.8666666666666667, + "success_rate.epoch.env.agentgym:sciworld": 0.9482758620689655, + "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, + "success_rate.epoch.env.babyai": 0.9545454545454546, + "success_rate.epoch.env.ded": 0.9529411764705882, + "success_rate.epoch.env.logic": 0.9171171171171171, + "success_rate.epoch.env.math": 0.973051010587103, + "success_rate.epoch.env.sat": 0.08333333333333333, + "success_rate.epoch.env.science": 0.8514084507042253, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8628576463571015, + "success_rate.epoch.global": 0.9069512862616311, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9966692789968652, + "tokens_p.mean_in_band": 0.6085069444444444, + "tokens_rate.above_band": 0.9725609756097561, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.027439024390243903 + }, + { + "epoch": 2.3913506604175545, + "grad_norm": 64.07865439749847, + "learning_rate": 3.488928331365857e-07, + "loss": 0.2279, + "step": 11225, + "success_rate.epoch.env.abd": 0.98125, + "success_rate.epoch.env.agentgym:alfworld": 0.8666666666666667, + "success_rate.epoch.env.agentgym:sciworld": 0.9482758620689655, + "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, + "success_rate.epoch.env.babyai": 0.9545454545454546, + "success_rate.epoch.env.ded": 0.9532163742690059, + "success_rate.epoch.env.logic": 0.9174147217235189, + "success_rate.epoch.env.math": 0.9731285988483686, + "success_rate.epoch.env.sat": 0.08333333333333333, + "success_rate.epoch.env.science": 0.8517217146872804, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8629559717368689, + "success_rate.epoch.global": 0.9072052401746725, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9978553921568627, + "tokens_p.mean_in_band": 0.80078125, + "tokens_rate.above_band": 0.9902912621359223, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.009708737864077669 + }, + { + "epoch": 2.3924158500213037, + "grad_norm": 116.04393733212471, + "learning_rate": 3.488600323405764e-07, + "loss": 0.3599, + "step": 11230, + "success_rate.epoch.env.abd": 0.98125, + "success_rate.epoch.env.agentgym:alfworld": 0.8666666666666667, + "success_rate.epoch.env.agentgym:sciworld": 0.9487179487179487, + "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, + "success_rate.epoch.env.babyai": 0.9545454545454546, + "success_rate.epoch.env.ded": 0.9532163742690059, + "success_rate.epoch.env.logic": 0.9160714285714285, + "success_rate.epoch.env.math": 0.9731543624161074, + "success_rate.epoch.env.sat": 0.08333333333333333, + "success_rate.epoch.env.science": 0.8515406162464986, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8628599225208553, + "success_rate.epoch.global": 0.9069134458356015, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 0.6666666666666666, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.8666666666666667, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9954459798994975, + "tokens_p.mean_in_band": 0.625, + "tokens_rate.above_band": 0.9476190476190476, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.05238095238095238 + }, + { + "epoch": 2.3934810396250534, + "grad_norm": 181.36751715093163, + "learning_rate": 3.4882723966329554e-07, + "loss": 0.2478, + "step": 11235, + "success_rate.epoch.env.abd": 0.98125, + "success_rate.epoch.env.agentgym:alfworld": 0.8666666666666667, + "success_rate.epoch.env.agentgym:sciworld": 0.9487179487179487, + "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, + "success_rate.epoch.env.babyai": 0.9545454545454546, + "success_rate.epoch.env.ded": 0.9534883720930233, + "success_rate.epoch.env.logic": 0.9160714285714285, + "success_rate.epoch.env.math": 0.9732313575525813, + "success_rate.epoch.env.sat": 0.08333333333333333, + "success_rate.epoch.env.science": 0.8514644351464435, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8628847235990768, + "success_rate.epoch.global": 0.9068946796959826, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env_macro_mean": 0.9444444444444445, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9967311715481172, + "tokens_p.mean_in_band": 0.7433035714285714, + "tokens_rate.above_band": 0.9715447154471545, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.028455284552845527 + }, + { + "epoch": 2.3945462292288027, + "grad_norm": 64.64369872626597, + "learning_rate": 3.487944551286421e-07, + "loss": 0.194, + "step": 11240, + "success_rate.epoch.env.abd": 0.9813664596273292, + "success_rate.epoch.env.agentgym:alfworld": 0.8679245283018868, + "success_rate.epoch.env.agentgym:sciworld": 0.9487179487179487, + "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, + "success_rate.epoch.env.babyai": 0.9545454545454546, + "success_rate.epoch.env.ded": 0.9534883720930233, + "success_rate.epoch.env.logic": 0.9168141592920354, + "success_rate.epoch.env.math": 0.9733079122974261, + "success_rate.epoch.env.sat": 0.08333333333333333, + "success_rate.epoch.env.science": 0.8514644351464435, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8630841423925314, + "success_rate.epoch.global": 0.9071467244179751, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9995335820895522, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 2.3956114188325524, + "grad_norm": 104.44353183729035, + "learning_rate": 3.487616787605093e-07, + "loss": 0.3379, + "step": 11245, + "success_rate.epoch.env.abd": 0.9814814814814815, + "success_rate.epoch.env.agentgym:alfworld": 0.8679245283018868, + "success_rate.epoch.env.agentgym:sciworld": 0.95, + "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, + "success_rate.epoch.env.babyai": 0.9545454545454546, + "success_rate.epoch.env.ded": 0.953757225433526, + "success_rate.epoch.env.logic": 0.9168141592920354, + "success_rate.epoch.env.math": 0.9733333333333334, + "success_rate.epoch.env.sat": 0.08333333333333333, + "success_rate.epoch.env.science": 0.8504867872044506, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8631490241716787, + "success_rate.epoch.global": 0.9068574514038877, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.9, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9984008528784648, + "tokens_p.mean_in_band": 0.5869766235351562, + "tokens_rate.above_band": 0.989451476793249, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.010548523206751054 + }, + { + "epoch": 2.3966766084363016, + "grad_norm": 116.84335950118749, + "learning_rate": 3.487289105827842e-07, + "loss": 0.2582, + "step": 11250, + "success_rate.epoch.env.abd": 0.9814814814814815, + "success_rate.epoch.env.agentgym:alfworld": 0.8598130841121495, + "success_rate.epoch.env.agentgym:sciworld": 0.95, + "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, + "success_rate.epoch.env.babyai": 0.9545454545454546, + "success_rate.epoch.env.ded": 0.953757225433526, + "success_rate.epoch.env.logic": 0.9169611307420494, + "success_rate.epoch.env.math": 0.9734345351043643, + "success_rate.epoch.env.sat": 0.08333333333333333, + "success_rate.epoch.env.science": 0.8509015256588072, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8624718848521936, + "success_rate.epoch.global": 0.9068389876144318, + "success_rate.window.env.agentgym:alfworld": 0.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.75, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9990202145214522, + "tokens_p.mean_in_band": 0.7897135416666666, + "tokens_rate.above_band": 0.9901960784313726, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.00980392156862745 + }, + { + "epoch": 2.3977417980400513, + "grad_norm": 34.8890199872237, + "learning_rate": 3.4869615061934806e-07, + "loss": 0.1608, + "step": 11255, + "success_rate.epoch.env.abd": 0.9814814814814815, + "success_rate.epoch.env.agentgym:alfworld": 0.8598130841121495, + "success_rate.epoch.env.agentgym:sciworld": 0.95, + "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, + "success_rate.epoch.env.babyai": 0.9545454545454546, + "success_rate.epoch.env.ded": 0.9540229885057471, + "success_rate.epoch.env.logic": 0.9173989455184535, + "success_rate.epoch.env.math": 0.9734848484848485, + "success_rate.epoch.env.sat": 0.08333333333333333, + "success_rate.epoch.env.science": 0.851313969571231, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8625779153196057, + "success_rate.epoch.global": 0.9070891514500538, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9944968553459119, + "tokens_p.mean_in_band": 0.81640625, + "tokens_rate.above_band": 0.9968652037617555, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.003134796238244514 + }, + { + "epoch": 2.3988069876438005, + "grad_norm": 142.41336638126378, + "learning_rate": 3.4866339889407614e-07, + "loss": 0.2173, + "step": 11260, + "success_rate.epoch.env.abd": 0.9817073170731707, + "success_rate.epoch.env.agentgym:alfworld": 0.8598130841121495, + "success_rate.epoch.env.agentgym:sciworld": 0.95, + "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, + "success_rate.epoch.env.babyai": 0.9545454545454546, + "success_rate.epoch.env.ded": 0.9540229885057471, + "success_rate.epoch.env.logic": 0.9181184668989547, + "success_rate.epoch.env.math": 0.9734848484848485, + "success_rate.epoch.env.sat": 0.08, + "success_rate.epoch.env.science": 0.8515193370165746, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8623794963272602, + "success_rate.epoch.global": 0.9070701660417783, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.75, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.996149289099526, + "tokens_p.mean_in_band": 0.6040900735294118, + "tokens_rate.above_band": 0.9254385964912281, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.07456140350877193 + }, + { + "epoch": 2.39987217724755, + "grad_norm": 131.5516543860267, + "learning_rate": 3.4863065543083766e-07, + "loss": 0.2447, + "step": 11265, + "success_rate.epoch.env.abd": 0.9817073170731707, + "success_rate.epoch.env.agentgym:alfworld": 0.8598130841121495, + "success_rate.epoch.env.agentgym:sciworld": 0.9504132231404959, + "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, + "success_rate.epoch.env.babyai": 0.9545454545454546, + "success_rate.epoch.env.ded": 0.9540229885057471, + "success_rate.epoch.env.logic": 0.9181184668989547, + "success_rate.epoch.env.math": 0.9735349716446124, + "success_rate.epoch.env.sat": 0.08, + "success_rate.epoch.env.science": 0.852233676975945, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8624865587144993, + "success_rate.epoch.global": 0.9073183760683761, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9948369565217391, + "tokens_p.mean_in_band": 0.765625, + "tokens_rate.above_band": 0.9913793103448276, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.008620689655172414 + }, + { + "epoch": 2.4009373668512994, + "grad_norm": 206.4952815685344, + "learning_rate": 3.485979202534958e-07, + "loss": 0.3351, + "step": 11270, + "success_rate.epoch.env.abd": 0.9817073170731707, + "success_rate.epoch.env.agentgym:alfworld": 0.8611111111111112, + "success_rate.epoch.env.agentgym:sciworld": 0.9508196721311475, + "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, + "success_rate.epoch.env.babyai": 0.9545454545454546, + "success_rate.epoch.env.ded": 0.9545454545454546, + "success_rate.epoch.env.logic": 0.9182608695652174, + "success_rate.epoch.env.math": 0.9735599622285175, + "success_rate.epoch.env.sat": 0.08, + "success_rate.epoch.env.science": 0.8519533927347498, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8626787451725261, + "success_rate.epoch.global": 0.9072988811933937, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9583333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9990871659415786, + "tokens_p.mean_in_band": 0.46337890625, + "tokens_rate.above_band": 0.9950525664811379, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.004947433518862091 + }, + { + "epoch": 2.402002556455049, + "grad_norm": 6.389346849192408, + "learning_rate": 3.4856519338590766e-07, + "loss": 0.1308, + "step": 11275, + "success_rate.epoch.env.abd": 0.9817073170731707, + "success_rate.epoch.env.agentgym:alfworld": 0.8611111111111112, + "success_rate.epoch.env.agentgym:sciworld": 0.9508196721311475, + "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, + "success_rate.epoch.env.babyai": 0.9565217391304348, + "success_rate.epoch.env.ded": 0.9548022598870056, + "success_rate.epoch.env.logic": 0.9182608695652174, + "success_rate.epoch.env.math": 0.9736098020735156, + "success_rate.epoch.env.sat": 0.08, + "success_rate.epoch.env.science": 0.8525597269624573, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.862941405536093, + "success_rate.epoch.global": 0.9075451647183846, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.996875, + "tokens_p.mean_in_band": 0.8828125, + "tokens_rate.above_band": 0.9982788296041308, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0017211703958691911 + }, + { + "epoch": 2.4030677460587984, + "grad_norm": 10.013904024918261, + "learning_rate": 3.4853247485192444e-07, + "loss": 0.1659, + "step": 11280, + "success_rate.epoch.env.abd": 0.9817073170731707, + "success_rate.epoch.env.agentgym:alfworld": 0.8611111111111112, + "success_rate.epoch.env.agentgym:sciworld": 0.9508196721311475, + "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, + "success_rate.epoch.env.babyai": 0.9565217391304348, + "success_rate.epoch.env.ded": 0.9548022598870056, + "success_rate.epoch.env.logic": 0.9186851211072664, + "success_rate.epoch.env.math": 0.9737089201877934, + "success_rate.epoch.env.sat": 0.08, + "success_rate.epoch.env.science": 0.8521798365122616, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8629544491002867, + "success_rate.epoch.global": 0.9075251722310546, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.8888888888888888, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9959239130434783, + "tokens_p.mean_below_band": 1.8533319234848022e-07, + "tokens_p.mean_in_band": 0.7756696428571429, + "tokens_rate.above_band": 0.8598130841121495, + "tokens_rate.below_band": 0.009345794392523364, + "tokens_rate.in_band": 0.1308411214953271 + }, + { + "epoch": 2.404132935662548, + "grad_norm": 146.73062001564242, + "learning_rate": 3.484997646753912e-07, + "loss": 0.2775, + "step": 11285, + "success_rate.epoch.env.abd": 0.9818181818181818, + "success_rate.epoch.env.agentgym:alfworld": 0.8611111111111112, + "success_rate.epoch.env.agentgym:sciworld": 0.9512195121951219, + "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, + "success_rate.epoch.env.babyai": 0.9565217391304348, + "success_rate.epoch.env.ded": 0.9548022598870056, + "success_rate.epoch.env.logic": 0.918825561312608, + "success_rate.epoch.env.math": 0.9738317757009346, + "success_rate.epoch.env.sat": 0.08, + "success_rate.epoch.env.science": 0.8523809523809524, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8630430960453922, + "success_rate.epoch.global": 0.90776955602537, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9969093406593407, + "tokens_p.mean_in_band": 0.7421875, + "tokens_rate.above_band": 0.994535519125683, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.00546448087431694 + }, + { + "epoch": 2.4051981252662973, + "grad_norm": 210.47051997233478, + "learning_rate": 3.484670628801468e-07, + "loss": 0.3161, + "step": 11290, + "success_rate.epoch.env.abd": 0.9819277108433735, + "success_rate.epoch.env.agentgym:alfworld": 0.8611111111111112, + "success_rate.epoch.env.agentgym:sciworld": 0.9516129032258065, + "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, + "success_rate.epoch.env.babyai": 0.9565217391304348, + "success_rate.epoch.env.ded": 0.9548022598870056, + "success_rate.epoch.env.logic": 0.918825561312608, + "success_rate.epoch.env.math": 0.9739776951672863, + "success_rate.epoch.env.sat": 0.08, + "success_rate.epoch.env.science": 0.8519021739130435, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8630585561412393, + "success_rate.epoch.global": 0.9077490774907749, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9982638888888888, + "tokens_p.mean_in_band": 0.6930803571428571, + "tokens_rate.above_band": 0.9536423841059603, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.046357615894039736 + }, + { + "epoch": 2.406263314870047, + "grad_norm": 47.26272492608127, + "learning_rate": 3.4843436949002404e-07, + "loss": 0.1687, + "step": 11295, + "success_rate.epoch.env.abd": 0.9820359281437125, + "success_rate.epoch.env.agentgym:alfworld": 0.8623853211009175, + "success_rate.epoch.env.agentgym:sciworld": 0.9516129032258065, + "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, + "success_rate.epoch.env.babyai": 0.9565217391304348, + "success_rate.epoch.env.ded": 0.9548022598870056, + "success_rate.epoch.env.logic": 0.918825561312608, + "success_rate.epoch.env.math": 0.974025974025974, + "success_rate.epoch.env.sat": 0.08, + "success_rate.epoch.env.science": 0.851150202976996, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8631202593424018, + "success_rate.epoch.global": 0.907465825446898, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9990335051546392, + "tokens_p.mean_below_band": 3.688037395477295e-07, + "tokens_p.mean_in_band": 0.6334635416666666, + "tokens_rate.above_band": 0.9822784810126582, + "tokens_rate.below_band": 0.002531645569620253, + "tokens_rate.in_band": 0.015189873417721518 + }, + { + "epoch": 2.407328504473796, + "grad_norm": 276.7905843791326, + "learning_rate": 3.4840168452884967e-07, + "loss": 0.1136, + "step": 11300, + "success_rate.epoch.env.abd": 0.9821428571428571, + "success_rate.epoch.env.agentgym:alfworld": 0.8623853211009175, + "success_rate.epoch.env.agentgym:sciworld": 0.952, + "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, + "success_rate.epoch.env.babyai": 0.9565217391304348, + "success_rate.epoch.env.ded": 0.9548022598870056, + "success_rate.epoch.env.logic": 0.918825561312608, + "success_rate.epoch.env.math": 0.9740980573543015, + "success_rate.epoch.env.sat": 0.08, + "success_rate.epoch.env.science": 0.851652056641942, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.863217346866639, + "success_rate.epoch.global": 0.9077084425799685, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9978885135135135, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 2.408393694077546, + "grad_norm": 44.02478639331081, + "learning_rate": 3.483690080204443e-07, + "loss": 0.3458, + "step": 11305, + "success_rate.epoch.env.abd": 0.9822485207100592, + "success_rate.epoch.env.agentgym:alfworld": 0.8623853211009175, + "success_rate.epoch.env.agentgym:sciworld": 0.9523809523809523, + "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, + "success_rate.epoch.env.babyai": 0.9565217391304348, + "success_rate.epoch.env.ded": 0.9548022598870056, + "success_rate.epoch.env.logic": 0.9189655172413793, + "success_rate.epoch.env.math": 0.9742173112338858, + "success_rate.epoch.env.sat": 0.08, + "success_rate.epoch.env.science": 0.8511784511784511, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8632420941660044, + "success_rate.epoch.global": 0.9076882845188284, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.9, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 1.0003824429277477, + "tokens_p.mean_in_band": 0.66796875, + "tokens_rate.above_band": 0.9981207423067888, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0018792576932111817 + }, + { + "epoch": 2.409458883681295, + "grad_norm": 86.93008222328166, + "learning_rate": 3.4833633998862235e-07, + "loss": 0.2578, + "step": 11310, + "success_rate.epoch.env.abd": 0.9822485207100592, + "success_rate.epoch.env.agentgym:alfworld": 0.8623853211009175, + "success_rate.epoch.env.agentgym:sciworld": 0.953125, + "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, + "success_rate.epoch.env.babyai": 0.9565217391304348, + "success_rate.epoch.env.ded": 0.9548022598870056, + "success_rate.epoch.env.logic": 0.9189655172413793, + "success_rate.epoch.env.math": 0.9742883379247016, + "success_rate.epoch.env.sat": 0.08, + "success_rate.epoch.env.science": 0.8516778523489933, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8633615919369504, + "success_rate.epoch.global": 0.9079290558163797, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9975066489361702, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 2.410524073285045, + "grad_norm": 80.27174008216069, + "learning_rate": 3.4830368045719193e-07, + "loss": 0.116, + "step": 11315, + "success_rate.epoch.env.abd": 0.9822485207100592, + "success_rate.epoch.env.agentgym:alfworld": 0.8636363636363636, + "success_rate.epoch.env.agentgym:sciworld": 0.953125, + "success_rate.epoch.env.agentgym:textcraft": 0.9642857142857143, + "success_rate.epoch.env.babyai": 0.9565217391304348, + "success_rate.epoch.env.ded": 0.9555555555555556, + "success_rate.epoch.env.logic": 0.9192439862542955, + "success_rate.epoch.env.math": 0.9743119266055046, + "success_rate.epoch.env.sat": 0.08, + "success_rate.epoch.env.science": 0.8518766756032171, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8637095892528315, + "success_rate.epoch.global": 0.908168574401665, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9992198043184886, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 2.411589262888794, + "grad_norm": 77.62408344093436, + "learning_rate": 3.482710294499552e-07, + "loss": 0.1487, + "step": 11320, + "success_rate.epoch.env.abd": 0.9823529411764705, + "success_rate.epoch.env.agentgym:alfworld": 0.8636363636363636, + "success_rate.epoch.env.agentgym:sciworld": 0.9534883720930233, + "success_rate.epoch.env.agentgym:textcraft": 0.9642857142857143, + "success_rate.epoch.env.babyai": 0.9583333333333334, + "success_rate.epoch.env.ded": 0.9560439560439561, + "success_rate.epoch.env.logic": 0.9196581196581196, + "success_rate.epoch.env.math": 0.9743354720439963, + "success_rate.epoch.env.sat": 0.08, + "success_rate.epoch.env.science": 0.8518766756032171, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8640009952612904, + "success_rate.epoch.global": 0.9084068500259471, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 1.0, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 2.4126544524925437, + "grad_norm": 82.86894299641722, + "learning_rate": 3.482383869907078e-07, + "loss": 0.2435, + "step": 11325, + "success_rate.epoch.env.abd": 0.9823529411764705, + "success_rate.epoch.env.agentgym:alfworld": 0.8648648648648649, + "success_rate.epoch.env.agentgym:sciworld": 0.9534883720930233, + "success_rate.epoch.env.agentgym:textcraft": 0.9642857142857143, + "success_rate.epoch.env.babyai": 0.9583333333333334, + "success_rate.epoch.env.ded": 0.9560439560439561, + "success_rate.epoch.env.logic": 0.9197952218430034, + "success_rate.epoch.env.math": 0.9744292237442922, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.8521739130434782, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8638809652137468, + "success_rate.epoch.global": 0.9083850931677019, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9980743838028169, + "tokens_p.mean_in_band": 0.59765625, + "tokens_rate.above_band": 0.9759450171821306, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.024054982817869417 + }, + { + "epoch": 2.413719642096293, + "grad_norm": 68.71597285807619, + "learning_rate": 3.4820575310323957e-07, + "loss": 0.1064, + "step": 11330, + "success_rate.epoch.env.abd": 0.9826589595375722, + "success_rate.epoch.env.agentgym:alfworld": 0.8648648648648649, + "success_rate.epoch.env.agentgym:sciworld": 0.9534883720930233, + "success_rate.epoch.env.agentgym:textcraft": 0.9642857142857143, + "success_rate.epoch.env.babyai": 0.9583333333333334, + "success_rate.epoch.env.ded": 0.9560439560439561, + "success_rate.epoch.env.logic": 0.9185059422750425, + "success_rate.epoch.env.math": 0.9744990892531876, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.8522727272727273, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8638069123529544, + "success_rate.epoch.global": 0.908363448631905, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 0.6666666666666666, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 1.0, + "tokens_p.mean_in_band": 0.4632352941176471, + "tokens_rate.above_band": 0.9733542319749217, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02664576802507837 + }, + { + "epoch": 2.4147848317000427, + "grad_norm": 108.17849516842011, + "learning_rate": 3.4817312781133367e-07, + "loss": 0.3906, + "step": 11335, + "success_rate.epoch.env.abd": 0.9827586206896551, + "success_rate.epoch.env.agentgym:alfworld": 0.8648648648648649, + "success_rate.epoch.env.agentgym:sciworld": 0.9534883720930233, + "success_rate.epoch.env.agentgym:textcraft": 0.9642857142857143, + "success_rate.epoch.env.babyai": 0.9583333333333334, + "success_rate.epoch.env.ded": 0.9560439560439561, + "success_rate.epoch.env.logic": 0.9187817258883249, + "success_rate.epoch.env.math": 0.9745685740236149, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.8506666666666667, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8637013549829301, + "success_rate.epoch.global": 0.907826982492276, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.25, + "success_rate.window.env_macro_mean": 0.8125, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9941770186335404, + "tokens_p.mean_in_band": 0.5744243421052632, + "tokens_rate.above_band": 0.8944444444444445, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.10555555555555556 + }, + { + "epoch": 2.415850021303792, + "grad_norm": 429.42557788297694, + "learning_rate": 3.4814051113876723e-07, + "loss": 0.2361, + "step": 11340, + "success_rate.epoch.env.abd": 0.9828571428571429, + "success_rate.epoch.env.agentgym:alfworld": 0.8648648648648649, + "success_rate.epoch.env.agentgym:sciworld": 0.9534883720930233, + "success_rate.epoch.env.agentgym:textcraft": 0.9642857142857143, + "success_rate.epoch.env.babyai": 0.9583333333333334, + "success_rate.epoch.env.ded": 0.9565217391304348, + "success_rate.epoch.env.logic": 0.9187817258883249, + "success_rate.epoch.env.math": 0.9746146872166818, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.8504983388704319, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8637426359511845, + "success_rate.epoch.global": 0.9078068823831535, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9992732558139535, + "tokens_p.mean_in_band": 0.5872395833333334, + "tokens_rate.above_band": 0.9862385321100917, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.013761467889908258 + }, + { + "epoch": 2.4169152109075416, + "grad_norm": 100.54038489364434, + "learning_rate": 3.4810790310931106e-07, + "loss": 0.2291, + "step": 11345, + "success_rate.epoch.env.abd": 0.9830508474576272, + "success_rate.epoch.env.agentgym:alfworld": 0.8660714285714286, + "success_rate.epoch.env.agentgym:sciworld": 0.9534883720930233, + "success_rate.epoch.env.agentgym:textcraft": 0.9642857142857143, + "success_rate.epoch.env.babyai": 0.9583333333333334, + "success_rate.epoch.env.ded": 0.9567567567567568, + "success_rate.epoch.env.logic": 0.9187817258883249, + "success_rate.epoch.env.math": 0.9746606334841629, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.8495692511597084, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8638110127230142, + "success_rate.epoch.global": 0.907530737704918, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.9, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9975190033783784, + "tokens_p.mean_in_band": 0.4833984375, + "tokens_rate.above_band": 0.9801324503311258, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.019867549668874173 + }, + { + "epoch": 2.417980400511291, + "grad_norm": 293.76814639339, + "learning_rate": 3.480753037467297e-07, + "loss": 0.2855, + "step": 11350, + "success_rate.epoch.env.abd": 0.9830508474576272, + "success_rate.epoch.env.agentgym:alfworld": 0.8660714285714286, + "success_rate.epoch.env.agentgym:sciworld": 0.9538461538461539, + "success_rate.epoch.env.agentgym:textcraft": 0.9642857142857143, + "success_rate.epoch.env.babyai": 0.9583333333333334, + "success_rate.epoch.env.ded": 0.9567567567567568, + "success_rate.epoch.env.logic": 0.9175084175084175, + "success_rate.epoch.env.math": 0.974706413730804, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.8498677248677249, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8637590788437307, + "success_rate.epoch.global": 0.9075114971895759, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 0.6666666666666666, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9980808597748209, + "tokens_p.mean_in_band": 0.6023706896551724, + "tokens_rate.above_band": 0.9711729622266402, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02882703777335984 + }, + { + "epoch": 2.4190455901150405, + "grad_norm": 83.88927403350766, + "learning_rate": 3.480427130747812e-07, + "loss": 0.1484, + "step": 11355, + "success_rate.epoch.env.abd": 0.9830508474576272, + "success_rate.epoch.env.agentgym:alfworld": 0.8660714285714286, + "success_rate.epoch.env.agentgym:sciworld": 0.9538461538461539, + "success_rate.epoch.env.agentgym:textcraft": 0.9642857142857143, + "success_rate.epoch.env.babyai": 0.9583333333333334, + "success_rate.epoch.env.ded": 0.9567567567567568, + "success_rate.epoch.env.logic": 0.9179229480737019, + "success_rate.epoch.env.math": 0.9747292418772563, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.8504611330698287, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8638527849268072, + "success_rate.epoch.global": 0.9077471967380224, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9953571428571428, + "tokens_p.mean_in_band": 0.7916666666666666, + "tokens_rate.above_band": 0.9668508287292817, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03314917127071823 + }, + { + "epoch": 2.42011077971879, + "grad_norm": 90.26815823858966, + "learning_rate": 3.480101311172175e-07, + "loss": 0.1995, + "step": 11360, + "success_rate.epoch.env.abd": 0.9830508474576272, + "success_rate.epoch.env.agentgym:alfworld": 0.8660714285714286, + "success_rate.epoch.env.agentgym:sciworld": 0.9538461538461539, + "success_rate.epoch.env.agentgym:textcraft": 0.9642857142857143, + "success_rate.epoch.env.babyai": 0.9583333333333334, + "success_rate.epoch.env.ded": 0.9518716577540107, + "success_rate.epoch.env.logic": 0.9183333333333333, + "success_rate.epoch.env.math": 0.9747974797479748, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.8506578947368421, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8634700836354088, + "success_rate.epoch.global": 0.9077275038129131, + "success_rate.window.env.ded": 0.5, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9938478171334432, + "tokens_p.mean_below_band": 1.1874362826347351e-08, + "tokens_p.mean_in_band": 0.5233415233415234, + "tokens_rate.above_band": 0.8992592592592593, + "tokens_rate.below_band": 0.0002469135802469136, + "tokens_rate.in_band": 0.10049382716049382 + }, + { + "epoch": 2.4211759693225394, + "grad_norm": 89.88018945924556, + "learning_rate": 3.479775578977841e-07, + "loss": 0.2932, + "step": 11365, + "success_rate.epoch.env.abd": 0.9831460674157303, + "success_rate.epoch.env.agentgym:alfworld": 0.8660714285714286, + "success_rate.epoch.env.agentgym:sciworld": 0.9538461538461539, + "success_rate.epoch.env.agentgym:textcraft": 0.9642857142857143, + "success_rate.epoch.env.babyai": 0.9583333333333334, + "success_rate.epoch.env.ded": 0.9518716577540107, + "success_rate.epoch.env.logic": 0.9183333333333333, + "success_rate.epoch.env.math": 0.974910394265233, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.8510498687664042, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8635246389540381, + "success_rate.epoch.global": 0.9079614604462475, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9899193548387096, + "tokens_p.mean_in_band": 0.7555803571428571, + "tokens_rate.above_band": 0.9465648854961832, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.05343511450381679 + }, + { + "epoch": 2.4222411589262887, + "grad_norm": 30.385510202584697, + "learning_rate": 3.4794499344022005e-07, + "loss": 0.1685, + "step": 11370, + "success_rate.epoch.env.abd": 0.9831460674157303, + "success_rate.epoch.env.agentgym:alfworld": 0.8660714285714286, + "success_rate.epoch.env.agentgym:sciworld": 0.9538461538461539, + "success_rate.epoch.env.agentgym:textcraft": 0.9642857142857143, + "success_rate.epoch.env.babyai": 0.9583333333333334, + "success_rate.epoch.env.ded": 0.9521276595744681, + "success_rate.epoch.env.logic": 0.9183333333333333, + "success_rate.epoch.env.math": 0.9750445632798574, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.8513425016371972, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8635867120182085, + "success_rate.epoch.global": 0.9081942336874052, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9985677083333333, + "tokens_p.mean_in_band": 0.5966796875, + "tokens_rate.above_band": 0.9836065573770492, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.01639344262295082 + }, + { + "epoch": 2.4233063485300383, + "grad_norm": 42.159334060110666, + "learning_rate": 3.4791243776825814e-07, + "loss": 0.2069, + "step": 11375, + "success_rate.epoch.env.abd": 0.9832402234636871, + "success_rate.epoch.env.agentgym:alfworld": 0.8660714285714286, + "success_rate.epoch.env.agentgym:sciworld": 0.9538461538461539, + "success_rate.epoch.env.agentgym:textcraft": 0.9642857142857143, + "success_rate.epoch.env.babyai": 0.9583333333333334, + "success_rate.epoch.env.ded": 0.9523809523809523, + "success_rate.epoch.env.logic": 0.9183333333333333, + "success_rate.epoch.env.math": 0.9751111111111112, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.8511749347258486, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8636091147249673, + "success_rate.epoch.global": 0.9081735620585267, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9945175438596491, + "tokens_p.mean_in_band": 0.4521484375, + "tokens_rate.above_band": 0.9344262295081968, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.06557377049180328 + }, + { + "epoch": 2.424371538133788, + "grad_norm": 35.188423877498934, + "learning_rate": 3.478798909056248e-07, + "loss": 0.1762, + "step": 11380, + "success_rate.epoch.env.abd": 0.9833333333333333, + "success_rate.epoch.env.agentgym:alfworld": 0.8660714285714286, + "success_rate.epoch.env.agentgym:sciworld": 0.9541984732824428, + "success_rate.epoch.env.agentgym:textcraft": 0.9642857142857143, + "success_rate.epoch.env.babyai": 0.9583333333333334, + "success_rate.epoch.env.ded": 0.9526315789473684, + "success_rate.epoch.env.logic": 0.9172185430463576, + "success_rate.epoch.env.math": 0.9751332149200711, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.8513689700130378, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8635906969687422, + "success_rate.epoch.global": 0.9081529944640161, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.75, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9583333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9974755700325733, + "tokens_p.mean_in_band": 0.737109375, + "tokens_rate.above_band": 0.9935275080906149, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.006472491909385114 + }, + { + "epoch": 2.4254367277375373, + "grad_norm": 233.357671967341, + "learning_rate": 3.4784735287603987e-07, + "loss": 0.2373, + "step": 11385, + "success_rate.epoch.env.abd": 0.9833333333333333, + "success_rate.epoch.env.agentgym:alfworld": 0.8660714285714286, + "success_rate.epoch.env.agentgym:sciworld": 0.9541984732824428, + "success_rate.epoch.env.agentgym:textcraft": 0.9642857142857143, + "success_rate.epoch.env.babyai": 0.9583333333333334, + "success_rate.epoch.env.ded": 0.9526315789473684, + "success_rate.epoch.env.logic": 0.9172185430463576, + "success_rate.epoch.env.math": 0.975177304964539, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.8521400778210116, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8636648058644187, + "success_rate.epoch.global": 0.9083835341365462, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9930862831858407, + "tokens_p.mean_in_band": 0.8037109375, + "tokens_rate.above_band": 0.9338842975206612, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.06611570247933884 + }, + { + "epoch": 2.4265019173412865, + "grad_norm": 85.55642516680025, + "learning_rate": 3.4781482370321677e-07, + "loss": 0.2777, + "step": 11390, + "success_rate.epoch.env.abd": 0.9833333333333333, + "success_rate.epoch.env.agentgym:alfworld": 0.8660714285714286, + "success_rate.epoch.env.agentgym:sciworld": 0.9541984732824428, + "success_rate.epoch.env.agentgym:textcraft": 0.9642857142857143, + "success_rate.epoch.env.babyai": 0.9583333333333334, + "success_rate.epoch.env.ded": 0.9526315789473684, + "success_rate.epoch.env.logic": 0.9174917491749175, + "success_rate.epoch.env.math": 0.9752431476569408, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.8526179702650291, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8637390732521442, + "success_rate.epoch.global": 0.9086129193790686, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9919117647058824, + "tokens_p.mean_in_band": 0.8015625, + "tokens_rate.above_band": 0.9444444444444444, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.05555555555555555 + }, + { + "epoch": 2.427567106945036, + "grad_norm": 302.58357906753685, + "learning_rate": 3.477823034108627e-07, + "loss": 0.1959, + "step": 11395, + "success_rate.epoch.env.abd": 0.9833333333333333, + "success_rate.epoch.env.agentgym:alfworld": 0.8660714285714286, + "success_rate.epoch.env.agentgym:sciworld": 0.9541984732824428, + "success_rate.epoch.env.agentgym:textcraft": 0.9655172413793104, + "success_rate.epoch.env.babyai": 0.9583333333333334, + "success_rate.epoch.env.ded": 0.9528795811518325, + "success_rate.epoch.env.logic": 0.9180327868852459, + "success_rate.epoch.env.math": 0.9753086419753086, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.8527131782945736, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8639373704663533, + "success_rate.epoch.global": 0.9088411588411588, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9990646258503402, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 2.428632296548786, + "grad_norm": 86.80230576270047, + "learning_rate": 3.47749792022678e-07, + "loss": 0.2918, + "step": 11400, + "success_rate.epoch.env.abd": 0.9833333333333333, + "success_rate.epoch.env.agentgym:alfworld": 0.8660714285714286, + "success_rate.epoch.env.agentgym:sciworld": 0.9541984732824428, + "success_rate.epoch.env.agentgym:textcraft": 0.9655172413793104, + "success_rate.epoch.env.babyai": 0.9583333333333334, + "success_rate.epoch.env.ded": 0.953125, + "success_rate.epoch.env.logic": 0.9181669394435352, + "success_rate.epoch.env.math": 0.9753954305799648, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.8518041237113402, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8638971255052514, + "success_rate.epoch.global": 0.908570004982561, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9968245967741935, + "tokens_p.mean_in_band": 0.68359375, + "tokens_rate.above_band": 0.9810126582278481, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0189873417721519 + }, + { + "epoch": 2.429697486152535, + "grad_norm": 194.61855717702295, + "learning_rate": 3.477172895623569e-07, + "loss": 0.2382, + "step": 11405, + "success_rate.epoch.env.abd": 0.9833333333333333, + "success_rate.epoch.env.agentgym:alfworld": 0.8672566371681416, + "success_rate.epoch.env.agentgym:sciworld": 0.9541984732824428, + "success_rate.epoch.env.agentgym:textcraft": 0.9655172413793104, + "success_rate.epoch.env.babyai": 0.9583333333333334, + "success_rate.epoch.env.ded": 0.953125, + "success_rate.epoch.env.logic": 0.9183006535947712, + "success_rate.epoch.env.math": 0.9754601226993865, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.8521850899742931, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8640575419716445, + "success_rate.epoch.global": 0.9087972166998012, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9981449771689498, + "tokens_p.mean_in_band": 0.8671875, + "tokens_rate.above_band": 0.9984802431610942, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.001519756838905775 + }, + { + "epoch": 2.430762675756285, + "grad_norm": 361.56079632673146, + "learning_rate": 3.4768479605358683e-07, + "loss": 0.2162, + "step": 11410, + "success_rate.epoch.env.abd": 0.9834254143646409, + "success_rate.epoch.env.agentgym:alfworld": 0.8672566371681416, + "success_rate.epoch.env.agentgym:sciworld": 0.9541984732824428, + "success_rate.epoch.env.agentgym:textcraft": 0.9655172413793104, + "success_rate.epoch.env.babyai": 0.9583333333333334, + "success_rate.epoch.env.ded": 0.9533678756476683, + "success_rate.epoch.env.logic": 0.9185667752442996, + "success_rate.epoch.env.math": 0.9746724890829694, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.8523748395378691, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8640578323603413, + "success_rate.epoch.global": 0.9087754090233019, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.75, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9978618421052632, + "tokens_p.mean_in_band": 0.29296875, + "tokens_rate.above_band": 0.9947643979057592, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.005235602094240838 + }, + { + "epoch": 2.431827865360034, + "grad_norm": 103.97261188741398, + "learning_rate": 3.47652311520049e-07, + "loss": 0.3553, + "step": 11415, + "success_rate.epoch.env.abd": 0.9836065573770492, + "success_rate.epoch.env.agentgym:alfworld": 0.8672566371681416, + "success_rate.epoch.env.agentgym:sciworld": 0.9541984732824428, + "success_rate.epoch.env.agentgym:textcraft": 0.9655172413793104, + "success_rate.epoch.env.babyai": 0.9583333333333334, + "success_rate.epoch.env.ded": 0.9536082474226805, + "success_rate.epoch.env.logic": 0.9186991869918699, + "success_rate.epoch.env.math": 0.974716652136007, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.8514724711907811, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8640301706549721, + "success_rate.epoch.global": 0.9085064292779427, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.9, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9967320261437909, + "tokens_p.mean_in_band": 0.6657366071428571, + "tokens_rate.above_band": 0.9954456733897202, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.004554326610279766 + }, + { + "epoch": 2.4328930549637837, + "grad_norm": 65.81892410091692, + "learning_rate": 3.476198359854177e-07, + "loss": 0.3064, + "step": 11420, + "success_rate.epoch.env.abd": 0.9836065573770492, + "success_rate.epoch.env.agentgym:alfworld": 0.8672566371681416, + "success_rate.epoch.env.agentgym:sciworld": 0.9541984732824428, + "success_rate.epoch.env.agentgym:textcraft": 0.9655172413793104, + "success_rate.epoch.env.babyai": 0.9583333333333334, + "success_rate.epoch.env.ded": 0.9540816326530612, + "success_rate.epoch.env.logic": 0.9189627228525121, + "success_rate.epoch.env.math": 0.9747826086956521, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.8517571884984025, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8641290429239075, + "success_rate.epoch.global": 0.908732116428219, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9997360641891891, + "tokens_p.mean_in_band": 0.703125, + "tokens_rate.above_band": 0.9983136593591906, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0016863406408094434 + }, + { + "epoch": 2.433958244567533, + "grad_norm": 127.67852529289556, + "learning_rate": 3.47587369473361e-07, + "loss": 0.099, + "step": 11425, + "success_rate.epoch.env.abd": 0.9836065573770492, + "success_rate.epoch.env.agentgym:alfworld": 0.8672566371681416, + "success_rate.epoch.env.agentgym:sciworld": 0.9545454545454546, + "success_rate.epoch.env.agentgym:textcraft": 0.9655172413793104, + "success_rate.epoch.env.babyai": 0.9583333333333334, + "success_rate.epoch.env.ded": 0.9540816326530612, + "success_rate.epoch.env.logic": 0.919093851132686, + "success_rate.epoch.env.math": 0.9748263888888888, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.8523233609166136, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8642279576652379, + "success_rate.epoch.global": 0.9089566929133859, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.993646978021978, + "tokens_p.mean_in_band": 0.828125, + "tokens_rate.above_band": 0.9891304347826086, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.010869565217391304 + }, + { + "epoch": 2.4350234341712826, + "grad_norm": 85.56016805031243, + "learning_rate": 3.475549120075402e-07, + "loss": 0.1275, + "step": 11430, + "success_rate.epoch.env.abd": 0.9836065573770492, + "success_rate.epoch.env.agentgym:alfworld": 0.8672566371681416, + "success_rate.epoch.env.agentgym:sciworld": 0.9545454545454546, + "success_rate.epoch.env.agentgym:textcraft": 0.9655172413793104, + "success_rate.epoch.env.babyai": 0.9583333333333334, + "success_rate.epoch.env.ded": 0.9540816326530612, + "success_rate.epoch.env.logic": 0.9193548387096774, + "success_rate.epoch.env.math": 0.9749134948096886, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.8526984126984127, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8642936981452006, + "success_rate.epoch.global": 0.9091801669121257, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9930555555555556, + "tokens_p.mean_in_band": 0.8141741071428571, + "tokens_rate.above_band": 0.9278350515463918, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.07216494845360824 + }, + { + "epoch": 2.436088623775032, + "grad_norm": 92.90690599262439, + "learning_rate": 3.4752246361161016e-07, + "loss": 0.2782, + "step": 11435, + "success_rate.epoch.env.abd": 0.9836065573770492, + "success_rate.epoch.env.agentgym:alfworld": 0.8672566371681416, + "success_rate.epoch.env.agentgym:sciworld": 0.9545454545454546, + "success_rate.epoch.env.agentgym:textcraft": 0.9655172413793104, + "success_rate.epoch.env.babyai": 0.9583333333333334, + "success_rate.epoch.env.ded": 0.9540816326530612, + "success_rate.epoch.env.logic": 0.9193548387096774, + "success_rate.epoch.env.math": 0.975, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.8532574320050601, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8643523821903787, + "success_rate.epoch.global": 0.9094025465230167, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.993287037037037, + "tokens_p.mean_in_band": 0.78125, + "tokens_rate.above_band": 0.9854014598540146, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.014598540145985401 + }, + { + "epoch": 2.4371538133787816, + "grad_norm": 143.73509558216423, + "learning_rate": 3.47490024309219e-07, + "loss": 0.3133, + "step": 11440, + "success_rate.epoch.env.abd": 0.9836065573770492, + "success_rate.epoch.env.agentgym:alfworld": 0.8672566371681416, + "success_rate.epoch.env.agentgym:sciworld": 0.9545454545454546, + "success_rate.epoch.env.agentgym:textcraft": 0.9655172413793104, + "success_rate.epoch.env.babyai": 0.9583333333333334, + "success_rate.epoch.env.ded": 0.9543147208121827, + "success_rate.epoch.env.logic": 0.9196141479099679, + "success_rate.epoch.env.math": 0.9750215331610681, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.8538122243226213, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8644495388120188, + "success_rate.epoch.global": 0.9096238397655105, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9992843511450382, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 2.438219002982531, + "grad_norm": 259.5958069642486, + "learning_rate": 3.4745759412400813e-07, + "loss": 0.3571, + "step": 11445, + "success_rate.epoch.env.abd": 0.9836065573770492, + "success_rate.epoch.env.agentgym:alfworld": 0.8672566371681416, + "success_rate.epoch.env.agentgym:sciworld": 0.9548872180451128, + "success_rate.epoch.env.agentgym:textcraft": 0.9655172413793104, + "success_rate.epoch.env.babyai": 0.9583333333333334, + "success_rate.epoch.env.ded": 0.9543147208121827, + "success_rate.epoch.env.logic": 0.92, + "success_rate.epoch.env.math": 0.9750644883920895, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.8535512256442489, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8644958635522314, + "success_rate.epoch.global": 0.9096003898635477, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9375, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9962225274725275, + "tokens_p.mean_in_band": 0.6966145833333334, + "tokens_rate.above_band": 0.9238578680203046, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.07614213197969544 + }, + { + "epoch": 2.4392841925862805, + "grad_norm": 99.21318413765184, + "learning_rate": 3.4742517307961257e-07, + "loss": 0.4552, + "step": 11450, + "success_rate.epoch.env.abd": 0.9836065573770492, + "success_rate.epoch.env.agentgym:alfworld": 0.8672566371681416, + "success_rate.epoch.env.agentgym:sciworld": 0.9548872180451128, + "success_rate.epoch.env.agentgym:textcraft": 0.9655172413793104, + "success_rate.epoch.env.babyai": 0.9583333333333334, + "success_rate.epoch.env.ded": 0.9543147208121827, + "success_rate.epoch.env.logic": 0.9202551834130781, + "success_rate.epoch.env.math": 0.9751499571550986, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.8532915360501567, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8645032237869583, + "success_rate.epoch.global": 0.9095770539620807, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9950657894736842, + "tokens_p.mean_in_band": 0.6218039772727273, + "tokens_rate.above_band": 0.8962264150943396, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.10377358490566038 + }, + { + "epoch": 2.4403493821900297, + "grad_norm": 37.63868455620462, + "learning_rate": 3.473927611996605e-07, + "loss": 0.262, + "step": 11455, + "success_rate.epoch.env.abd": 0.9837837837837838, + "success_rate.epoch.env.agentgym:alfworld": 0.8596491228070176, + "success_rate.epoch.env.agentgym:sciworld": 0.9548872180451128, + "success_rate.epoch.env.agentgym:textcraft": 0.9666666666666667, + "success_rate.epoch.env.babyai": 0.9583333333333334, + "success_rate.epoch.env.ded": 0.9545454545454546, + "success_rate.epoch.env.logic": 0.9203821656050956, + "success_rate.epoch.env.math": 0.9751499571550986, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.8536585365853658, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8639981195863641, + "success_rate.epoch.global": 0.9095538312318138, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 0.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8333333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9995216836734694, + "tokens_p.mean_in_band": 0.4375, + "tokens_rate.above_band": 0.9987261146496815, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0012738853503184713 + }, + { + "epoch": 2.4414145717937794, + "grad_norm": 505.1513087001165, + "learning_rate": 3.4736035850777347e-07, + "loss": 0.2379, + "step": 11460, + "success_rate.epoch.env.abd": 0.9837837837837838, + "success_rate.epoch.env.agentgym:alfworld": 0.8596491228070176, + "success_rate.epoch.env.agentgym:sciworld": 0.9548872180451128, + "success_rate.epoch.env.agentgym:textcraft": 0.9666666666666667, + "success_rate.epoch.env.babyai": 0.9583333333333334, + "success_rate.epoch.env.ded": 0.9547738693467337, + "success_rate.epoch.env.logic": 0.919047619047619, + "success_rate.epoch.env.math": 0.9752136752136752, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.8533998752339363, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8638798400364505, + "success_rate.epoch.global": 0.909288824383164, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.8125, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 1.0002132444168734, + "tokens_p.mean_in_band": 0.5558449074074074, + "tokens_rate.above_band": 0.9835265405735204, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.01647345942647956 + }, + { + "epoch": 2.4424797613975286, + "grad_norm": 154.4784904410716, + "learning_rate": 3.4732796502756637e-07, + "loss": 0.2254, + "step": 11465, + "success_rate.epoch.env.abd": 0.9838709677419355, + "success_rate.epoch.env.agentgym:alfworld": 0.8596491228070176, + "success_rate.epoch.env.agentgym:sciworld": 0.9548872180451128, + "success_rate.epoch.env.agentgym:textcraft": 0.9666666666666667, + "success_rate.epoch.env.babyai": 0.9583333333333334, + "success_rate.epoch.env.ded": 0.9547738693467337, + "success_rate.epoch.env.logic": 0.9177215189873418, + "success_rate.epoch.env.math": 0.9752559726962458, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.8538557213930348, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8638124970855, + "success_rate.epoch.global": 0.9092664092664092, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9964345637583892, + "tokens_p.mean_in_band": 0.54296875, + "tokens_rate.above_band": 0.962843295638126, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03715670436187399 + }, + { + "epoch": 2.4435449510012783, + "grad_norm": 82.59548213976454, + "learning_rate": 3.472955807826472e-07, + "loss": 0.1186, + "step": 11470, + "success_rate.epoch.env.abd": 0.9838709677419355, + "success_rate.epoch.env.agentgym:alfworld": 0.8596491228070176, + "success_rate.epoch.env.agentgym:sciworld": 0.9548872180451128, + "success_rate.epoch.env.agentgym:textcraft": 0.9666666666666667, + "success_rate.epoch.env.babyai": 0.9583333333333334, + "success_rate.epoch.env.ded": 0.9547738693467337, + "success_rate.epoch.env.logic": 0.9178515007898894, + "success_rate.epoch.env.math": 0.9753191489361702, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.8543990086741016, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8638794466603671, + "success_rate.epoch.global": 0.909484833895041, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9909274193548387, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 2.4446101406050276, + "grad_norm": 211.67444853530813, + "learning_rate": 3.472632057966176e-07, + "loss": 0.2402, + "step": 11475, + "success_rate.epoch.env.abd": 0.9838709677419355, + "success_rate.epoch.env.agentgym:alfworld": 0.853448275862069, + "success_rate.epoch.env.agentgym:sciworld": 0.9552238805970149, + "success_rate.epoch.env.agentgym:textcraft": 0.967741935483871, + "success_rate.epoch.env.babyai": 0.9583333333333334, + "success_rate.epoch.env.ded": 0.9547738693467337, + "success_rate.epoch.env.logic": 0.9178515007898894, + "success_rate.epoch.env.math": 0.9753610875106202, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.8547589616810878, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8634806262972393, + "success_rate.epoch.global": 0.909462055715658, + "success_rate.window.env.agentgym:alfworld": 0.5, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.997656955736224, + "tokens_p.mean_in_band": 0.572265625, + "tokens_rate.above_band": 0.9981965734896303, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0018034265103697023 + }, + { + "epoch": 2.4456753302087773, + "grad_norm": 103.98837166252731, + "learning_rate": 3.47230840093072e-07, + "loss": 0.3586, + "step": 11480, + "success_rate.epoch.env.abd": 0.9838709677419355, + "success_rate.epoch.env.agentgym:alfworld": 0.8547008547008547, + "success_rate.epoch.env.agentgym:sciworld": 0.9552238805970149, + "success_rate.epoch.env.agentgym:textcraft": 0.967741935483871, + "success_rate.epoch.env.babyai": 0.9583333333333334, + "success_rate.epoch.env.ded": 0.9547738693467337, + "success_rate.epoch.env.logic": 0.917981072555205, + "success_rate.epoch.env.math": 0.9754445385266723, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.8551171393341553, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8636464244129866, + "success_rate.epoch.global": 0.9096789650215621, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.99375, + "tokens_p.mean_in_band": 0.8528645833333334, + "tokens_rate.above_band": 0.9810126582278481, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0189873417721519 + }, + { + "epoch": 2.4467405198125265, + "grad_norm": 78.73909494746351, + "learning_rate": 3.471984836955983e-07, + "loss": 0.2572, + "step": 11485, + "success_rate.epoch.env.abd": 0.9838709677419355, + "success_rate.epoch.env.agentgym:alfworld": 0.8547008547008547, + "success_rate.epoch.env.agentgym:sciworld": 0.9558823529411765, + "success_rate.epoch.env.agentgym:textcraft": 0.967741935483871, + "success_rate.epoch.env.babyai": 0.9583333333333334, + "success_rate.epoch.env.ded": 0.9552238805970149, + "success_rate.epoch.env.logic": 0.917981072555205, + "success_rate.epoch.env.math": 0.9755067567567568, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.8547692307692307, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8637212238002232, + "success_rate.epoch.global": 0.9096558317399618, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9961622807017544, + "tokens_p.mean_in_band": 0.5792410714285714, + "tokens_rate.above_band": 0.9661016949152542, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03389830508474576 + }, + { + "epoch": 2.447805709416276, + "grad_norm": 152.68207496662538, + "learning_rate": 3.4716613662777785e-07, + "loss": 0.2705, + "step": 11490, + "success_rate.epoch.env.abd": 0.9838709677419355, + "success_rate.epoch.env.agentgym:alfworld": 0.8547008547008547, + "success_rate.epoch.env.agentgym:sciworld": 0.9568345323741008, + "success_rate.epoch.env.agentgym:textcraft": 0.967741935483871, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9552238805970149, + "success_rate.epoch.env.logic": 0.9181102362204724, + "success_rate.epoch.env.math": 0.975548060708263, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.855036855036855, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8639991272533133, + "success_rate.epoch.global": 0.9098712446351931, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9982221006564551, + "tokens_p.mean_in_band": 0.8828125, + "tokens_rate.above_band": 0.9978165938864629, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.002183406113537118 + }, + { + "epoch": 2.4488708990200254, + "grad_norm": 56.7599018824399, + "learning_rate": 3.471337989131847e-07, + "loss": 0.2709, + "step": 11495, + "success_rate.epoch.env.abd": 0.983957219251337, + "success_rate.epoch.env.agentgym:alfworld": 0.8547008547008547, + "success_rate.epoch.env.agentgym:sciworld": 0.9568345323741008, + "success_rate.epoch.env.agentgym:textcraft": 0.967741935483871, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9552238805970149, + "success_rate.epoch.env.logic": 0.9184952978056427, + "success_rate.epoch.env.math": 0.975609756097561, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.8553034947884733, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8640718225474483, + "success_rate.epoch.global": 0.9100856327307326, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9942129629629629, + "tokens_p.mean_in_band": 0.7447916666666666, + "tokens_rate.above_band": 0.9642857142857143, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03571428571428571 + }, + { + "epoch": 2.449936088623775, + "grad_norm": 113.19760536961066, + "learning_rate": 3.471014705753865e-07, + "loss": 0.2691, + "step": 11500, + "success_rate.epoch.env.abd": 0.983957219251337, + "success_rate.epoch.env.agentgym:alfworld": 0.8559322033898306, + "success_rate.epoch.env.agentgym:sciworld": 0.9568345323741008, + "success_rate.epoch.env.agentgym:textcraft": 0.96875, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9552238805970149, + "success_rate.epoch.env.logic": 0.918622848200313, + "success_rate.epoch.env.math": 0.9756711409395973, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.854434250764526, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8642135593127087, + "success_rate.epoch.global": 0.9098243948742287, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.9, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9977176966292135, + "tokens_p.mean_in_band": 0.33735795454545453, + "tokens_rate.above_band": 0.9798165137614679, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02018348623853211 + }, + { + "epoch": 2.4510012782275243, + "grad_norm": 24.636930932812355, + "learning_rate": 3.4706915163794375e-07, + "loss": 0.1825, + "step": 11505, + "success_rate.epoch.env.abd": 0.983957219251337, + "success_rate.epoch.env.agentgym:alfworld": 0.8559322033898306, + "success_rate.epoch.env.agentgym:sciworld": 0.9571428571428572, + "success_rate.epoch.env.agentgym:textcraft": 0.96875, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9554455445544554, + "success_rate.epoch.env.logic": 0.9188767550702028, + "success_rate.epoch.env.math": 0.975752508361204, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.8540012217470984, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.864252853312733, + "success_rate.epoch.global": 0.9098011363636364, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.9, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9980736301369864, + "tokens_p.mean_in_band": 0.2848557692307692, + "tokens_rate.above_band": 0.9573770491803278, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.04262295081967213 + }, + { + "epoch": 2.452066467831274, + "grad_norm": 153.46031226260934, + "learning_rate": 3.470368421244105e-07, + "loss": 0.2244, + "step": 11510, + "success_rate.epoch.env.abd": 0.9842105263157894, + "success_rate.epoch.env.agentgym:alfworld": 0.8487394957983193, + "success_rate.epoch.env.agentgym:sciworld": 0.9571428571428572, + "success_rate.epoch.env.agentgym:textcraft": 0.96875, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9554455445544554, + "success_rate.epoch.env.logic": 0.9190031152647975, + "success_rate.epoch.env.math": 0.975752508361204, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.853836784409257, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.863618537160887, + "success_rate.epoch.global": 0.9095418044402457, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 0.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.7, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9988407258064517, + "tokens_p.mean_in_band": 0.5881696428571429, + "tokens_rate.above_band": 0.9779179810725552, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.022082018927444796 + }, + { + "epoch": 2.4531316574350233, + "grad_norm": 50.78545166643288, + "learning_rate": 3.4700454205833345e-07, + "loss": 0.1686, + "step": 11515, + "success_rate.epoch.env.abd": 0.9842931937172775, + "success_rate.epoch.env.agentgym:alfworld": 0.8487394957983193, + "success_rate.epoch.env.agentgym:sciworld": 0.9571428571428572, + "success_rate.epoch.env.agentgym:textcraft": 0.96875, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9554455445544554, + "success_rate.epoch.env.logic": 0.9191290824261276, + "success_rate.epoch.env.math": 0.9758333333333333, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.8541919805589308, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8636771422231252, + "success_rate.epoch.global": 0.9097549481621112, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9967873831775701, + "tokens_p.mean_in_band": 0.8463541666666666, + "tokens_rate.above_band": 0.9727272727272728, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02727272727272727 + }, + { + "epoch": 2.454196847038773, + "grad_norm": 235.36057636835326, + "learning_rate": 3.469722514632528e-07, + "loss": 0.3356, + "step": 11520, + "success_rate.epoch.env.abd": 0.984375, + "success_rate.epoch.env.agentgym:alfworld": 0.85, + "success_rate.epoch.env.agentgym:sciworld": 0.9571428571428572, + "success_rate.epoch.env.agentgym:textcraft": 0.96875, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9554455445544554, + "success_rate.epoch.env.logic": 0.9193798449612403, + "success_rate.epoch.env.math": 0.9758735440931781, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.8539393939393939, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8638026601467456, + "success_rate.epoch.global": 0.9097320169252469, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.998766447368421, + "tokens_p.mean_in_band": 0.609375, + "tokens_rate.above_band": 0.979381443298969, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.020618556701030927 + }, + { + "epoch": 2.455262036642522, + "grad_norm": 73.33270362818654, + "learning_rate": 3.469399703627017e-07, + "loss": 0.2989, + "step": 11525, + "success_rate.epoch.env.abd": 0.984375, + "success_rate.epoch.env.agentgym:alfworld": 0.85, + "success_rate.epoch.env.agentgym:sciworld": 0.9571428571428572, + "success_rate.epoch.env.agentgym:textcraft": 0.96875, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9554455445544554, + "success_rate.epoch.env.logic": 0.9197530864197531, + "success_rate.epoch.env.math": 0.9751243781094527, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.853599516031458, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8637375871982775, + "success_rate.epoch.global": 0.9094746716697936, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.75, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.8055555555555555, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9980357142857142, + "tokens_p.mean_in_band": 0.671875, + "tokens_rate.above_band": 0.9562841530054644, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.04371584699453552 + }, + { + "epoch": 2.456327226246272, + "grad_norm": 54.47932816134175, + "learning_rate": 3.4690769878020627e-07, + "loss": 0.1991, + "step": 11530, + "success_rate.epoch.env.abd": 0.984375, + "success_rate.epoch.env.agentgym:alfworld": 0.85, + "success_rate.epoch.env.agentgym:sciworld": 0.9571428571428572, + "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9558823529411765, + "success_rate.epoch.env.logic": 0.9202453987730062, + "success_rate.epoch.env.math": 0.9751449875724938, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.8537764350453172, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.863926098008627, + "success_rate.epoch.global": 0.9096864763687412, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 1.0001041666666666, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 2.4573924158500215, + "grad_norm": 89.9965072813347, + "learning_rate": 3.4687543673928586e-07, + "loss": 0.2778, + "step": 11535, + "success_rate.epoch.env.abd": 0.984375, + "success_rate.epoch.env.agentgym:alfworld": 0.85, + "success_rate.epoch.env.agentgym:sciworld": 0.9574468085106383, + "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9514563106796117, + "success_rate.epoch.env.logic": 0.9203675344563553, + "success_rate.epoch.env.math": 0.9751449875724938, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.8543046357615894, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8636104839637032, + "success_rate.epoch.global": 0.9096638655462185, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 0.5, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9916038074712644, + "tokens_p.mean_in_band": 0.6220118087557603, + "tokens_rate.above_band": 0.8651336233685519, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.1348663766314481 + }, + { + "epoch": 2.458457605453771, + "grad_norm": 440.90524424875747, + "learning_rate": 3.4684318426345293e-07, + "loss": 0.3518, + "step": 11540, + "success_rate.epoch.env.abd": 0.984375, + "success_rate.epoch.env.agentgym:alfworld": 0.85, + "success_rate.epoch.env.agentgym:sciworld": 0.9574468085106383, + "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9514563106796117, + "success_rate.epoch.env.logic": 0.9203675344563553, + "success_rate.epoch.env.math": 0.9751861042183623, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.8538046734571599, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.863568770722016, + "success_rate.epoch.global": 0.9094084769445738, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9866763565891473, + "tokens_p.mean_in_band": 0.7350643382352942, + "tokens_rate.above_band": 0.8835616438356164, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.11643835616438356 + }, + { + "epoch": 2.45952279505752, + "grad_norm": 73.24932544978624, + "learning_rate": 3.4681094137621275e-07, + "loss": 0.1991, + "step": 11545, + "success_rate.epoch.env.abd": 0.984375, + "success_rate.epoch.env.agentgym:alfworld": 0.85, + "success_rate.epoch.env.agentgym:sciworld": 0.9574468085106383, + "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9514563106796117, + "success_rate.epoch.env.logic": 0.9208523592085236, + "success_rate.epoch.env.math": 0.9752475247524752, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.8534688995215312, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8635879044811662, + "success_rate.epoch.global": 0.9093866171003717, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.8888888888888888, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9932324840764332, + "tokens_p.mean_in_band": 0.673828125, + "tokens_rate.above_band": 0.9289940828402367, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.07100591715976332 + }, + { + "epoch": 2.4605879846612697, + "grad_norm": 52.70768315769327, + "learning_rate": 3.4677870810106364e-07, + "loss": 0.1735, + "step": 11550, + "success_rate.epoch.env.abd": 0.9844559585492227, + "success_rate.epoch.env.agentgym:alfworld": 0.8512396694214877, + "success_rate.epoch.env.agentgym:sciworld": 0.9574468085106383, + "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9514563106796117, + "success_rate.epoch.env.logic": 0.9209726443768997, + "success_rate.epoch.env.math": 0.9752883031301482, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.85381861575179, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8637543960945312, + "success_rate.epoch.global": 0.9095966620305981, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.997163955479452, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 2.4616531742650194, + "grad_norm": 254.35518799634457, + "learning_rate": 3.467464844614972e-07, + "loss": 0.1413, + "step": 11555, + "success_rate.epoch.env.abd": 0.9845360824742269, + "success_rate.epoch.env.agentgym:alfworld": 0.8524590163934426, + "success_rate.epoch.env.agentgym:sciworld": 0.9574468085106383, + "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9514563106796117, + "success_rate.epoch.env.logic": 0.9209726443768997, + "success_rate.epoch.env.math": 0.9753492193919474, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.8542534205829864, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8639175953663454, + "success_rate.epoch.global": 0.9098057354301573, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9985795454545454, + "tokens_p.mean_in_band": 0.65234375, + "tokens_rate.above_band": 0.9977324263038548, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0022675736961451248 + }, + { + "epoch": 2.4627183638687686, + "grad_norm": 159.21691195554962, + "learning_rate": 3.467142704809975e-07, + "loss": 0.3126, + "step": 11560, + "success_rate.epoch.env.abd": 0.9846153846153847, + "success_rate.epoch.env.agentgym:alfworld": 0.8524590163934426, + "success_rate.epoch.env.agentgym:sciworld": 0.9574468085106383, + "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9516908212560387, + "success_rate.epoch.env.logic": 0.9209726443768997, + "success_rate.epoch.env.math": 0.9753896636587367, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.8547717842323651, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8639969245148684, + "success_rate.epoch.global": 0.9100138440239963, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9984038978494624, + "tokens_p.mean_in_band": 0.77734375, + "tokens_rate.above_band": 0.9973190348525469, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.002680965147453083 + }, + { + "epoch": 2.463783553472518, + "grad_norm": 71.30557236258782, + "learning_rate": 3.466820661830421e-07, + "loss": 0.2282, + "step": 11565, + "success_rate.epoch.env.abd": 0.9846153846153847, + "success_rate.epoch.env.agentgym:alfworld": 0.8524590163934426, + "success_rate.epoch.env.agentgym:sciworld": 0.958041958041958, + "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9516908212560387, + "success_rate.epoch.env.logic": 0.921092564491654, + "success_rate.epoch.env.math": 0.9754901960784313, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.8549437537004144, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8640867037452157, + "success_rate.epoch.global": 0.9102209944751382, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9984529702970297, + "tokens_p.mean_in_band": 0.69140625, + "tokens_rate.above_band": 0.9805825242718447, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.019417475728155338 + }, + { + "epoch": 2.4648487430762676, + "grad_norm": 82.9666043908078, + "learning_rate": 3.466498715911011e-07, + "loss": 0.1536, + "step": 11570, + "success_rate.epoch.env.abd": 0.9846938775510204, + "success_rate.epoch.env.agentgym:alfworld": 0.8524590163934426, + "success_rate.epoch.env.agentgym:sciworld": 0.9583333333333334, + "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9516908212560387, + "success_rate.epoch.env.logic": 0.9213313161875946, + "success_rate.epoch.env.math": 0.9755501222493888, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.8552009456264775, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8641708617470311, + "success_rate.epoch.global": 0.9104271933853928, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9975369458128078, + "tokens_p.mean_in_band": 0.81640625, + "tokens_rate.above_band": 0.9950980392156863, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.004901960784313725 + }, + { + "epoch": 2.4659139326800172, + "grad_norm": 32.05132161979001, + "learning_rate": 3.4661768672863774e-07, + "loss": 0.1857, + "step": 11575, + "success_rate.epoch.env.abd": 0.9846938775510204, + "success_rate.epoch.env.agentgym:alfworld": 0.8536585365853658, + "success_rate.epoch.env.agentgym:sciworld": 0.9586206896551724, + "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9519230769230769, + "success_rate.epoch.env.logic": 0.9201807228915663, + "success_rate.epoch.env.math": 0.9755700325732899, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.855457227138643, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8642476554489256, + "success_rate.epoch.global": 0.9104032997250229, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.6666666666666666, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9444444444444443, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9985320686540199, + "tokens_p.mean_in_band": 0.49107142857142855, + "tokens_rate.above_band": 0.9875111507582516, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.012488849241748439 + }, + { + "epoch": 2.4669791222837665, + "grad_norm": 39.7553168130122, + "learning_rate": 3.4658551161910814e-07, + "loss": 0.1681, + "step": 11580, + "success_rate.epoch.env.abd": 0.9846938775510204, + "success_rate.epoch.env.agentgym:alfworld": 0.8548387096774194, + "success_rate.epoch.env.agentgym:sciworld": 0.9586206896551724, + "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9519230769230769, + "success_rate.epoch.env.logic": 0.9201807228915663, + "success_rate.epoch.env.math": 0.9756493506493507, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.8552941176470589, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8643473265104281, + "success_rate.epoch.global": 0.9103795153177869, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.996510152284264, + "tokens_p.mean_in_band": 0.7546875, + "tokens_rate.above_band": 0.9516908212560387, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.04830917874396135 + }, + { + "epoch": 2.468044311887516, + "grad_norm": 27.228230464070002, + "learning_rate": 3.465533462859613e-07, + "loss": 0.1176, + "step": 11585, + "success_rate.epoch.env.abd": 0.9847715736040609, + "success_rate.epoch.env.agentgym:alfworld": 0.8548387096774194, + "success_rate.epoch.env.agentgym:sciworld": 0.9586206896551724, + "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.95260663507109, + "success_rate.epoch.env.logic": 0.9204204204204204, + "success_rate.epoch.env.math": 0.975669099756691, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.8555490311215502, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8644632914478592, + "success_rate.epoch.global": 0.9105839416058394, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.999467178175618, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 2.4691095014912654, + "grad_norm": 44.531969535206905, + "learning_rate": 3.4652119075263905e-07, + "loss": 0.2738, + "step": 11590, + "success_rate.epoch.env.abd": 0.9847715736040609, + "success_rate.epoch.env.agentgym:alfworld": 0.8548387096774194, + "success_rate.epoch.env.agentgym:sciworld": 0.9586206896551724, + "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.95260663507109, + "success_rate.epoch.env.logic": 0.9204204204204204, + "success_rate.epoch.env.math": 0.9749596122778675, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.8559718969555035, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.864437234934689, + "success_rate.epoch.global": 0.9105598543468366, + "success_rate.window.env.math": 0.8, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9913793103448276, + "tokens_p.mean_in_band": 0.5, + "tokens_rate.above_band": 0.9775280898876404, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02247191011235955 + }, + { + "epoch": 2.470174691095015, + "grad_norm": 124.58083391346277, + "learning_rate": 3.4648904504257606e-07, + "loss": 0.3096, + "step": 11595, + "success_rate.epoch.env.abd": 0.9847715736040609, + "success_rate.epoch.env.agentgym:alfworld": 0.8548387096774194, + "success_rate.epoch.env.agentgym:sciworld": 0.9586206896551724, + "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9528301886792453, + "success_rate.epoch.env.logic": 0.9210134128166915, + "success_rate.epoch.env.math": 0.9749596122778675, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.8557242990654206, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8644889574905386, + "success_rate.epoch.global": 0.9105358764759309, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9971264367816092, + "tokens_p.mean_in_band": 0.5404459635416666, + "tokens_rate.above_band": 0.9886363636363636, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.011363636363636364 + }, + { + "epoch": 2.4712398806987643, + "grad_norm": 89.69715584799415, + "learning_rate": 3.464569091792e-07, + "loss": 0.2962, + "step": 11600, + "success_rate.epoch.env.abd": 0.9848484848484849, + "success_rate.epoch.env.agentgym:alfworld": 0.8548387096774194, + "success_rate.epoch.env.agentgym:sciworld": 0.9586206896551724, + "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9528301886792453, + "success_rate.epoch.env.logic": 0.9211309523809523, + "success_rate.epoch.env.math": 0.9750402576489533, + "success_rate.epoch.env.sat": 0.07692307692307693, + "success_rate.epoch.env.science": 0.8548951048951049, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8644385849459436, + "success_rate.epoch.global": 0.9102854553692795, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9929775280898876, + "tokens_p.mean_in_band": 0.400146484375, + "tokens_rate.above_band": 0.8476190476190476, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.1523809523809524 + }, + { + "epoch": 2.472305070302514, + "grad_norm": 48.56348724429622, + "learning_rate": 3.4642478318593124e-07, + "loss": 0.206, + "step": 11605, + "success_rate.epoch.env.abd": 0.985, + "success_rate.epoch.env.agentgym:alfworld": 0.8548387096774194, + "success_rate.epoch.env.agentgym:sciworld": 0.9586206896551724, + "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9528301886792453, + "success_rate.epoch.env.logic": 0.9212481426448736, + "success_rate.epoch.env.math": 0.9750603378921963, + "success_rate.epoch.env.sat": 0.07407407407407407, + "success_rate.epoch.env.science": 0.8547356188262638, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8641913391951104, + "success_rate.epoch.global": 0.9100361663652803, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.76, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.996625, + "tokens_p.mean_in_band": 0.6397372159090909, + "tokens_rate.above_band": 0.9191176470588235, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.08088235294117647 + }, + { + "epoch": 2.4733702599062632, + "grad_norm": 67.10807693520958, + "learning_rate": 3.463926670861829e-07, + "loss": 0.1941, + "step": 11610, + "success_rate.epoch.env.abd": 0.985, + "success_rate.epoch.env.agentgym:alfworld": 0.8548387096774194, + "success_rate.epoch.env.agentgym:sciworld": 0.9586206896551724, + "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9530516431924883, + "success_rate.epoch.env.logic": 0.9213649851632048, + "success_rate.epoch.env.math": 0.9751004016064257, + "success_rate.epoch.env.sat": 0.07407407407407407, + "success_rate.epoch.env.science": 0.8546612623045744, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8642189759427573, + "success_rate.epoch.global": 0.9100135317997293, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env_macro_mean": 0.9583333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9977134146341463, + "tokens_p.mean_in_band": 0.6875, + "tokens_rate.above_band": 0.9879518072289156, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.012048192771084338 + }, + { + "epoch": 2.474435449510013, + "grad_norm": 129.39596683898907, + "learning_rate": 3.4636056090336096e-07, + "loss": 0.1696, + "step": 11615, + "success_rate.epoch.env.abd": 0.9850746268656716, + "success_rate.epoch.env.agentgym:alfworld": 0.8492063492063492, + "success_rate.epoch.env.agentgym:sciworld": 0.9586206896551724, + "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9530516431924883, + "success_rate.epoch.env.logic": 0.9201183431952663, + "success_rate.epoch.env.math": 0.9751602564102564, + "success_rate.epoch.env.sat": 0.07407407407407407, + "success_rate.epoch.env.science": 0.8548293811451706, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8636211212219472, + "success_rate.epoch.global": 0.9097659765976598, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 0.5, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9984088594704684, + "tokens_p.mean_in_band": 0.6881167763157895, + "tokens_rate.above_band": 0.9627450980392157, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03725490196078431 + }, + { + "epoch": 2.475500639113762, + "grad_norm": 33.13922157173911, + "learning_rate": 3.4632846466086426e-07, + "loss": 0.2553, + "step": 11620, + "success_rate.epoch.env.abd": 0.9850746268656716, + "success_rate.epoch.env.agentgym:alfworld": 0.8492063492063492, + "success_rate.epoch.env.agentgym:sciworld": 0.9586206896551724, + "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9530516431924883, + "success_rate.epoch.env.logic": 0.9189985272459499, + "success_rate.epoch.env.math": 0.9752198241406874, + "success_rate.epoch.env.sat": 0.07407407407407407, + "success_rate.epoch.env.science": 0.8545874206578188, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8635027386122892, + "success_rate.epoch.global": 0.9095195330040413, + "success_rate.window.env.logic": 0.6666666666666666, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.8055555555555555, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9956530139103554, + "tokens_p.mean_in_band": 0.45682565789473684, + "tokens_rate.above_band": 0.9445255474452555, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.05547445255474453 + }, + { + "epoch": 2.476565828717512, + "grad_norm": 43.805918696932984, + "learning_rate": 3.462963783820843e-07, + "loss": 0.1684, + "step": 11625, + "success_rate.epoch.env.abd": 0.9851485148514851, + "success_rate.epoch.env.agentgym:alfworld": 0.8492063492063492, + "success_rate.epoch.env.agentgym:sciworld": 0.9586206896551724, + "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9530516431924883, + "success_rate.epoch.env.logic": 0.9191176470588235, + "success_rate.epoch.env.math": 0.9752593774940144, + "success_rate.epoch.env.sat": 0.07142857142857142, + "success_rate.epoch.env.science": 0.8544303797468354, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8632691038482462, + "success_rate.epoch.global": 0.9092741935483871, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.76, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9968327702702703, + "tokens_p.mean_in_band": 0.709375, + "tokens_rate.above_band": 0.8809523809523809, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.11904761904761904 + }, + { + "epoch": 2.477631018321261, + "grad_norm": 17.613045496522837, + "learning_rate": 3.462643020904052e-07, + "loss": 0.1682, + "step": 11630, + "success_rate.epoch.env.abd": 0.9851485148514851, + "success_rate.epoch.env.agentgym:alfworld": 0.8503937007874016, + "success_rate.epoch.env.agentgym:sciworld": 0.9586206896551724, + "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9532710280373832, + "success_rate.epoch.env.logic": 0.9193548387096774, + "success_rate.epoch.env.math": 0.9753184713375797, + "success_rate.epoch.env.sat": 0.07142857142857142, + "success_rate.epoch.env.science": 0.8545977011494252, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8634391350594243, + "success_rate.epoch.global": 0.9094769780956639, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 1.0015871278458843, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 2.4786962079250108, + "grad_norm": 106.20804606607898, + "learning_rate": 3.4623223580920414e-07, + "loss": 0.2946, + "step": 11635, + "success_rate.epoch.env.abd": 0.9852216748768473, + "success_rate.epoch.env.agentgym:alfworld": 0.8503937007874016, + "success_rate.epoch.env.agentgym:sciworld": 0.9586206896551724, + "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9532710280373832, + "success_rate.epoch.env.logic": 0.9194729136163983, + "success_rate.epoch.env.math": 0.9753772835583797, + "success_rate.epoch.env.sat": 0.07142857142857142, + "success_rate.epoch.env.science": 0.8544412607449857, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8634476447638282, + "success_rate.epoch.global": 0.9094558429973238, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9950810185185185, + "tokens_p.mean_in_band": 0.5357730263157895, + "tokens_rate.above_band": 0.8503937007874016, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.14960629921259844 + }, + { + "epoch": 2.47976139752876, + "grad_norm": 243.35404457537976, + "learning_rate": 3.462001795618507e-07, + "loss": 0.3275, + "step": 11640, + "success_rate.epoch.env.abd": 0.9852216748768473, + "success_rate.epoch.env.agentgym:alfworld": 0.8503937007874016, + "success_rate.epoch.env.agentgym:sciworld": 0.9586206896551724, + "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9490740740740741, + "success_rate.epoch.env.logic": 0.9194729136163983, + "success_rate.epoch.env.math": 0.9754552652414885, + "success_rate.epoch.env.sat": 0.07142857142857142, + "success_rate.epoch.env.science": 0.8542024013722127, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8630514782499215, + "success_rate.epoch.global": 0.9092122830440588, + "success_rate.window.env.ded": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.75, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9901818813716404, + "tokens_p.mean_in_band": 0.5979263630319149, + "tokens_rate.above_band": 0.8516179952644041, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.1483820047355959 + }, + { + "epoch": 2.4808265871325097, + "grad_norm": 37.921405900959556, + "learning_rate": 3.461681333717071e-07, + "loss": 0.1655, + "step": 11645, + "success_rate.epoch.env.abd": 0.9852216748768473, + "success_rate.epoch.env.agentgym:alfworld": 0.8515625, + "success_rate.epoch.env.agentgym:sciworld": 0.958904109589041, + "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9493087557603687, + "success_rate.epoch.env.logic": 0.9198250728862973, + "success_rate.epoch.env.math": 0.9754940711462451, + "success_rate.epoch.env.sat": 0.07142857142857142, + "success_rate.epoch.env.science": 0.8542857142857143, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.863247949060914, + "success_rate.epoch.global": 0.9094138543516874, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.999754259501966, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 2.481891776736259, + "grad_norm": 149.74655241448744, + "learning_rate": 3.461360972621286e-07, + "loss": 0.3679, + "step": 11650, + "success_rate.epoch.env.abd": 0.9852216748768473, + "success_rate.epoch.env.agentgym:alfworld": 0.8515625, + "success_rate.epoch.env.agentgym:sciworld": 0.9591836734693877, + "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9493087557603687, + "success_rate.epoch.env.logic": 0.9188405797101449, + "success_rate.epoch.env.math": 0.9755134281200631, + "success_rate.epoch.env.sat": 0.07142857142857142, + "success_rate.epoch.env.science": 0.854618015963512, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8632158335478057, + "success_rate.epoch.global": 0.909392999556934, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 0.75, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9375, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9957729468599034, + "tokens_p.mean_in_band": 0.7159598214285714, + "tokens_rate.above_band": 0.9366515837104072, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.06334841628959276 + }, + { + "epoch": 2.4829569663400086, + "grad_norm": 121.2565545041361, + "learning_rate": 3.461040712564628e-07, + "loss": 0.202, + "step": 11655, + "success_rate.epoch.env.abd": 0.9852216748768473, + "success_rate.epoch.env.agentgym:alfworld": 0.8515625, + "success_rate.epoch.env.agentgym:sciworld": 0.9591836734693877, + "success_rate.epoch.env.agentgym:textcraft": 0.9705882352941176, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9495412844036697, + "success_rate.epoch.env.logic": 0.9190751445086706, + "success_rate.epoch.env.math": 0.9755713159968479, + "success_rate.epoch.env.sat": 0.07142857142857142, + "success_rate.epoch.env.science": 0.8547835990888383, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8633596362788136, + "success_rate.epoch.global": 0.9095732920627901, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9962071240105541, + "tokens_p.mean_in_band": 0.7484375, + "tokens_rate.above_band": 0.9869791666666666, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.013020833333333334 + }, + { + "epoch": 2.484022155943758, + "grad_norm": 160.30270076728354, + "learning_rate": 3.4607205537804993e-07, + "loss": 0.2054, + "step": 11660, + "success_rate.epoch.env.abd": 0.9852941176470589, + "success_rate.epoch.env.agentgym:alfworld": 0.8515625, + "success_rate.epoch.env.agentgym:sciworld": 0.9594594594594594, + "success_rate.epoch.env.agentgym:textcraft": 0.9705882352941176, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9495412844036697, + "success_rate.epoch.env.logic": 0.9191919191919192, + "success_rate.epoch.env.math": 0.9748427672955975, + "success_rate.epoch.env.sat": 0.07142857142857142, + "success_rate.epoch.env.science": 0.8545454545454545, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8633140281150772, + "success_rate.epoch.global": 0.9093315684976837, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.6666666666666666, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.8833333333333332, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.99609375, + "tokens_p.mean_in_band": 0.6497395833333334, + "tokens_rate.above_band": 0.9440993788819876, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.055900621118012424 + }, + { + "epoch": 2.4850873455475075, + "grad_norm": 74.7418241140314, + "learning_rate": 3.46040049650223e-07, + "loss": 0.2437, + "step": 11665, + "success_rate.epoch.env.abd": 0.9852941176470589, + "success_rate.epoch.env.agentgym:alfworld": 0.8515625, + "success_rate.epoch.env.agentgym:sciworld": 0.9594594594594594, + "success_rate.epoch.env.agentgym:textcraft": 0.9705882352941176, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9497716894977168, + "success_rate.epoch.env.logic": 0.9193083573487032, + "success_rate.epoch.env.math": 0.9749412685982772, + "success_rate.epoch.env.sat": 0.07142857142857142, + "success_rate.epoch.env.science": 0.8536585365853658, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8632738850781153, + "success_rate.epoch.global": 0.9090909090909091, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.3333333333333333, + "success_rate.window.env_macro_mean": 0.8333333333333334, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9966313073394495, + "tokens_p.mean_in_band": 0.6186079545454546, + "tokens_rate.above_band": 0.9753914988814317, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.024608501118568233 + }, + { + "epoch": 2.486152535151257, + "grad_norm": 76.71743606210104, + "learning_rate": 3.4600805409630764e-07, + "loss": 0.2261, + "step": 11670, + "success_rate.epoch.env.abd": 0.9853658536585366, + "success_rate.epoch.env.agentgym:alfworld": 0.8515625, + "success_rate.epoch.env.agentgym:sciworld": 0.9594594594594594, + "success_rate.epoch.env.agentgym:textcraft": 0.9705882352941176, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.95, + "success_rate.epoch.env.logic": 0.9183381088825215, + "success_rate.epoch.env.math": 0.9749804534792806, + "success_rate.epoch.env.sat": 0.10344827586206896, + "success_rate.epoch.env.science": 0.8537414965986394, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8661349439304203, + "success_rate.epoch.global": 0.9090709422358885, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.75, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9583333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 1.000458211143695, + "tokens_p.mean_in_band": 0.4696044921875, + "tokens_rate.above_band": 0.9696682464454977, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03033175355450237 + }, + { + "epoch": 2.4872177247550065, + "grad_norm": 186.86214227877957, + "learning_rate": 3.459760687396219e-07, + "loss": 0.2013, + "step": 11675, + "success_rate.epoch.env.abd": 0.9853658536585366, + "success_rate.epoch.env.agentgym:alfworld": 0.8515625, + "success_rate.epoch.env.agentgym:sciworld": 0.9594594594594594, + "success_rate.epoch.env.agentgym:textcraft": 0.9705882352941176, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9504504504504504, + "success_rate.epoch.env.logic": 0.9186875891583453, + "success_rate.epoch.env.math": 0.9750195160031225, + "success_rate.epoch.env.sat": 0.10344827586206896, + "success_rate.epoch.env.science": 0.853423882286361, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8661823420156786, + "success_rate.epoch.global": 0.9090510628972167, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9984012619372442, + "tokens_p.mean_in_band": 0.615625, + "tokens_rate.above_band": 0.9966009517335146, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.003399048266485384 + }, + { + "epoch": 2.4882829143587557, + "grad_norm": 15.866613156525055, + "learning_rate": 3.4594409360347647e-07, + "loss": 0.1673, + "step": 11680, + "success_rate.epoch.env.abd": 0.9853658536585366, + "success_rate.epoch.env.agentgym:alfworld": 0.8515625, + "success_rate.epoch.env.agentgym:sciworld": 0.9594594594594594, + "success_rate.epoch.env.agentgym:textcraft": 0.9705882352941176, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9504504504504504, + "success_rate.epoch.env.logic": 0.9190340909090909, + "success_rate.epoch.env.math": 0.9751166407465007, + "success_rate.epoch.env.sat": 0.10344827586206896, + "success_rate.epoch.env.science": 0.853024307518372, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8661863467180542, + "success_rate.epoch.global": 0.9090312705007654, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.8333333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9916930379746836, + "tokens_p.mean_in_band": 0.6328125, + "tokens_rate.above_band": 0.9634146341463414, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.036585365853658534 + }, + { + "epoch": 2.4893481039625054, + "grad_norm": 75.41214746544202, + "learning_rate": 3.459121287111746e-07, + "loss": 0.235, + "step": 11685, + "success_rate.epoch.env.abd": 0.9853658536585366, + "success_rate.epoch.env.agentgym:alfworld": 0.8515625, + "success_rate.epoch.env.agentgym:sciworld": 0.959731543624161, + "success_rate.epoch.env.agentgym:textcraft": 0.9705882352941176, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9511111111111111, + "success_rate.epoch.env.logic": 0.9192634560906515, + "success_rate.epoch.env.math": 0.9751359751359752, + "success_rate.epoch.env.sat": 0.10344827586206896, + "success_rate.epoch.env.science": 0.8532731376975169, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8663163716794672, + "success_rate.epoch.global": 0.909229762164521, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 1.0007974952741021, + "tokens_p.mean_in_band": 0.8984375, + "tokens_rate.above_band": 0.9995276334435522, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0004723665564478035 + }, + { + "epoch": 2.4904132935662546, + "grad_norm": 60.60112702534587, + "learning_rate": 3.458801740860121e-07, + "loss": 0.3327, + "step": 11690, + "success_rate.epoch.env.abd": 0.9854368932038835, + "success_rate.epoch.env.agentgym:alfworld": 0.8515625, + "success_rate.epoch.env.agentgym:sciworld": 0.959731543624161, + "success_rate.epoch.env.agentgym:textcraft": 0.9705882352941176, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9511111111111111, + "success_rate.epoch.env.logic": 0.9192634560906515, + "success_rate.epoch.env.math": 0.9751745539177658, + "success_rate.epoch.env.sat": 0.10344827586206896, + "success_rate.epoch.env.science": 0.8532883642495784, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8663277212139399, + "success_rate.epoch.global": 0.9092096668843893, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8571428571428571, + "success_rate.window.env_macro_mean": 0.9523809523809524, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9923155737704918, + "tokens_p.mean_in_band": 0.74140625, + "tokens_rate.above_band": 0.9606299212598425, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03937007874015748 + }, + { + "epoch": 2.4914784831700043, + "grad_norm": 83.3353500087414, + "learning_rate": 3.4584822975127727e-07, + "loss": 0.3095, + "step": 11695, + "success_rate.epoch.env.abd": 0.9854368932038835, + "success_rate.epoch.env.agentgym:alfworld": 0.8527131782945736, + "success_rate.epoch.env.agentgym:sciworld": 0.959731543624161, + "success_rate.epoch.env.agentgym:textcraft": 0.9705882352941176, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9511111111111111, + "success_rate.epoch.env.logic": 0.9194915254237288, + "success_rate.epoch.env.math": 0.9752130131680867, + "success_rate.epoch.env.sat": 0.10344827586206896, + "success_rate.epoch.env.science": 0.8525784753363229, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.866392022847096, + "success_rate.epoch.global": 0.9089724092982837, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6, + "success_rate.window.env_macro_mean": 0.9, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9971374045801527, + "tokens_p.mean_in_band": 0.501678466796875, + "tokens_rate.above_band": 0.9703703703703703, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02962962962962963 + }, + { + "epoch": 2.4925436727737535, + "grad_norm": 230.28593129715713, + "learning_rate": 3.4581629573025084e-07, + "loss": 0.2218, + "step": 11700, + "success_rate.epoch.env.abd": 0.9855072463768116, + "success_rate.epoch.env.agentgym:alfworld": 0.8527131782945736, + "success_rate.epoch.env.agentgym:sciworld": 0.959731543624161, + "success_rate.epoch.env.agentgym:textcraft": 0.9714285714285714, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9511111111111111, + "success_rate.epoch.env.logic": 0.9197183098591549, + "success_rate.epoch.env.math": 0.9752895752895753, + "success_rate.epoch.env.sat": 0.10344827586206896, + "success_rate.epoch.env.science": 0.8527435610302352, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.866517397534206, + "success_rate.epoch.global": 0.9091697376978105, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9955065359477124, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 2.4936088623775032, + "grad_norm": 128.89215135374454, + "learning_rate": 3.4578437204620617e-07, + "loss": 0.2854, + "step": 11705, + "success_rate.epoch.env.abd": 0.9856459330143541, + "success_rate.epoch.env.agentgym:alfworld": 0.8527131782945736, + "success_rate.epoch.env.agentgym:sciworld": 0.959731543624161, + "success_rate.epoch.env.agentgym:textcraft": 0.9714285714285714, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9511111111111111, + "success_rate.epoch.env.logic": 0.9200561009817672, + "success_rate.epoch.env.math": 0.9753086419753086, + "success_rate.epoch.env.sat": 0.10344827586206896, + "success_rate.epoch.env.science": 0.852513966480447, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8665415747974877, + "success_rate.epoch.global": 0.90914990266061, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9375, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.993421052631579, + "tokens_p.mean_in_band": 0.365234375, + "tokens_rate.above_band": 0.9405940594059405, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0594059405940594 + }, + { + "epoch": 2.4946740519812525, + "grad_norm": 591.9981165598932, + "learning_rate": 3.457524587224091e-07, + "loss": 0.2783, + "step": 11710, + "success_rate.epoch.env.abd": 0.985781990521327, + "success_rate.epoch.env.agentgym:alfworld": 0.8527131782945736, + "success_rate.epoch.env.agentgym:sciworld": 0.96, + "success_rate.epoch.env.agentgym:textcraft": 0.9714285714285714, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9511111111111111, + "success_rate.epoch.env.logic": 0.9201680672268907, + "success_rate.epoch.env.math": 0.9753276792598303, + "success_rate.epoch.env.sat": 0.10344827586206896, + "success_rate.epoch.env.science": 0.852924791086351, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8666276058900659, + "success_rate.epoch.global": 0.9093459961148284, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9993610594795539, + "tokens_p.mean_in_band": 0.8515625, + "tokens_rate.above_band": 0.9981447124304267, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0018552875695732839 + }, + { + "epoch": 2.495739241585002, + "grad_norm": 101.91699001828762, + "learning_rate": 3.4572055578211756e-07, + "loss": 0.2473, + "step": 11715, + "success_rate.epoch.env.abd": 0.9858490566037735, + "success_rate.epoch.env.agentgym:alfworld": 0.8527131782945736, + "success_rate.epoch.env.agentgym:sciworld": 0.9602649006622517, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9513274336283186, + "success_rate.epoch.env.logic": 0.9189944134078212, + "success_rate.epoch.env.math": 0.9753656658968437, + "success_rate.epoch.env.sat": 0.10344827586206896, + "success_rate.epoch.env.science": 0.8530884808013356, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8666612388526556, + "success_rate.epoch.global": 0.9093258668964032, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9285714285714286, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 1.0008012820512822, + "tokens_p.mean_in_band": 0.7483258928571429, + "tokens_rate.above_band": 0.9925768822905621, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.007423117709437964 + }, + { + "epoch": 2.4968044311887514, + "grad_norm": 446.2540681917151, + "learning_rate": 3.456886632485825e-07, + "loss": 0.3318, + "step": 11720, + "success_rate.epoch.env.abd": 0.9858490566037735, + "success_rate.epoch.env.agentgym:alfworld": 0.8461538461538461, + "success_rate.epoch.env.agentgym:sciworld": 0.9602649006622517, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9515418502202643, + "success_rate.epoch.env.logic": 0.9192200557103064, + "success_rate.epoch.env.math": 0.9754035357417371, + "success_rate.epoch.env.sat": 0.10344827586206896, + "success_rate.epoch.env.science": 0.8527777777777777, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8660801382685682, + "success_rate.epoch.global": 0.9090909090909091, + "success_rate.window.env.agentgym:alfworld": 0.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 0.7777777777777777, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9982881310418904, + "tokens_p.mean_in_band": 0.6337890625, + "tokens_rate.above_band": 0.9883227176220807, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.01167728237791932 + }, + { + "epoch": 2.497869620792501, + "grad_norm": 163.65331932638185, + "learning_rate": 3.456567811450468e-07, + "loss": 0.2001, + "step": 11725, + "success_rate.epoch.env.abd": 0.9859154929577465, + "success_rate.epoch.env.agentgym:alfworld": 0.8461538461538461, + "success_rate.epoch.env.agentgym:sciworld": 0.9602649006622517, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9515418502202643, + "success_rate.epoch.env.logic": 0.9192200557103064, + "success_rate.epoch.env.math": 0.97544128933231, + "success_rate.epoch.env.sat": 0.10344827586206896, + "success_rate.epoch.env.science": 0.8533480907581626, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8661414567162891, + "success_rate.epoch.global": 0.9092858674672958, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9956219806763285, + "tokens_p.mean_in_band": 0.84375, + "tokens_rate.above_band": 0.9764150943396226, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02358490566037736 + }, + { + "epoch": 2.4989348103962508, + "grad_norm": 22.243719943978714, + "learning_rate": 3.456249094947458e-07, + "loss": 0.185, + "step": 11730, + "success_rate.epoch.env.abd": 0.985981308411215, + "success_rate.epoch.env.agentgym:alfworld": 0.8473282442748091, + "success_rate.epoch.env.agentgym:sciworld": 0.9602649006622517, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9515418502202643, + "success_rate.epoch.env.logic": 0.9196675900277008, + "success_rate.epoch.env.math": 0.9754601226993865, + "success_rate.epoch.env.sat": 0.10344827586206896, + "success_rate.epoch.env.science": 0.8535911602209945, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8663186976909922, + "success_rate.epoch.global": 0.9094799914401883, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9981409348441926, + "tokens_p.mean_in_band": 0.83984375, + "tokens_rate.above_band": 0.9943661971830986, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.005633802816901409 + }, + { + "epoch": 2.5, + "grad_norm": 108.83580877913427, + "learning_rate": 3.4559304832090754e-07, + "loss": 0.2537, + "step": 11735, + "success_rate.epoch.env.abd": 0.985981308411215, + "success_rate.epoch.env.agentgym:alfworld": 0.8473282442748091, + "success_rate.epoch.env.agentgym:sciworld": 0.9602649006622517, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9515418502202643, + "success_rate.epoch.env.logic": 0.9186206896551724, + "success_rate.epoch.env.math": 0.975553857906799, + "success_rate.epoch.env.sat": 0.10344827586206896, + "success_rate.epoch.env.science": 0.8537527593818984, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8662467371451549, + "success_rate.epoch.global": 0.9094597480247705, + "success_rate.window.env.logic": 0.6666666666666666, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8888888888888888, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9985608552631579, + "tokens_p.mean_in_band": 0.70703125, + "tokens_rate.above_band": 0.95, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.05 + }, + { + "epoch": 2.5010651896037492, + "grad_norm": 62.47162215038496, + "learning_rate": 3.455611976467522e-07, + "loss": 0.1701, + "step": 11740, + "success_rate.epoch.env.abd": 0.985981308411215, + "success_rate.epoch.env.agentgym:alfworld": 0.8473282442748091, + "success_rate.epoch.env.agentgym:sciworld": 0.9605263157894737, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9515418502202643, + "success_rate.epoch.env.logic": 0.9175824175824175, + "success_rate.epoch.env.math": 0.9755725190839695, + "success_rate.epoch.env.sat": 0.10344827586206896, + "success_rate.epoch.env.science": 0.8541552008805724, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8662143958479103, + "success_rate.epoch.global": 0.9094395908800341, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 0.6666666666666666, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9952290076335878, + "tokens_p.mean_in_band": 0.609375, + "tokens_rate.above_band": 0.9924242424242424, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.007575757575757576 + }, + { + "epoch": 2.502130379207499, + "grad_norm": 378.8348212985776, + "learning_rate": 3.455293574954922e-07, + "loss": 0.1936, + "step": 11745, + "success_rate.epoch.env.abd": 0.985981308411215, + "success_rate.epoch.env.agentgym:alfworld": 0.8473282442748091, + "success_rate.epoch.env.agentgym:sciworld": 0.9605263157894737, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9515418502202643, + "success_rate.epoch.env.logic": 0.9178082191780822, + "success_rate.epoch.env.math": 0.975609756097561, + "success_rate.epoch.env.sat": 0.10344827586206896, + "success_rate.epoch.env.science": 0.8540866703236424, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8662320783981219, + "success_rate.epoch.global": 0.9094195194556666, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env_macro_mean": 0.9444444444444445, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9875, + "tokens_p.mean_in_band": 0.6967075892857143, + "tokens_rate.above_band": 0.8653846153846154, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.1346153846153846 + }, + { + "epoch": 2.5031955688112486, + "grad_norm": 80.84940404778263, + "learning_rate": 3.454975278903324e-07, + "loss": 0.2153, + "step": 11750, + "success_rate.epoch.env.abd": 0.985981308411215, + "success_rate.epoch.env.agentgym:alfworld": 0.8473282442748091, + "success_rate.epoch.env.agentgym:sciworld": 0.9607843137254902, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9515418502202643, + "success_rate.epoch.env.logic": 0.9167803547066848, + "success_rate.epoch.env.math": 0.9756468797564688, + "success_rate.epoch.env.sat": 0.10344827586206896, + "success_rate.epoch.env.science": 0.8544061302681992, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8661945072224931, + "success_rate.epoch.global": 0.9093995332060258, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 0.6666666666666666, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9949363425925926, + "tokens_p.mean_in_band": 0.779296875, + "tokens_rate.above_band": 0.9818181818181818, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.01818181818181818 + }, + { + "epoch": 2.504260758414998, + "grad_norm": 127.90250998608596, + "learning_rate": 3.454657088544702e-07, + "loss": 0.1443, + "step": 11755, + "success_rate.epoch.env.abd": 0.985981308411215, + "success_rate.epoch.env.agentgym:alfworld": 0.849624060150376, + "success_rate.epoch.env.agentgym:sciworld": 0.961038961038961, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9515418502202643, + "success_rate.epoch.env.logic": 0.9168937329700273, + "success_rate.epoch.env.math": 0.9757207890743551, + "success_rate.epoch.env.sat": 0.13333333333333333, + "success_rate.epoch.env.science": 0.8544857768052516, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8691674576569096, + "success_rate.epoch.global": 0.9095913614228245, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.999124343257443, + "tokens_p.mean_in_band": 0.796875, + "tokens_rate.above_band": 0.9947735191637631, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.005226480836236934 + }, + { + "epoch": 2.505325948018747, + "grad_norm": 106.26361396706993, + "learning_rate": 3.4543390041109484e-07, + "loss": 0.2536, + "step": 11760, + "success_rate.epoch.env.abd": 0.985981308411215, + "success_rate.epoch.env.agentgym:alfworld": 0.849624060150376, + "success_rate.epoch.env.agentgym:sciworld": 0.9612903225806452, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9515418502202643, + "success_rate.epoch.env.logic": 0.9168937329700273, + "success_rate.epoch.env.math": 0.9758490566037736, + "success_rate.epoch.env.sat": 0.13333333333333333, + "success_rate.epoch.env.science": 0.8546448087431694, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8692164268395479, + "success_rate.epoch.global": 0.9097823790407775, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9981715425531915, + "tokens_p.mean_in_band": 0.8359375, + "tokens_rate.above_band": 0.9947089947089947, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.005291005291005291 + }, + { + "epoch": 2.5063911376224968, + "grad_norm": 93.51863281513182, + "learning_rate": 3.454021025833882e-07, + "loss": 0.2033, + "step": 11765, + "success_rate.epoch.env.abd": 0.985981308411215, + "success_rate.epoch.env.agentgym:alfworld": 0.849624060150376, + "success_rate.epoch.env.agentgym:sciworld": 0.9612903225806452, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9519650655021834, + "success_rate.epoch.env.logic": 0.9170068027210885, + "success_rate.epoch.env.math": 0.9759217456734387, + "success_rate.epoch.env.sat": 0.13333333333333333, + "success_rate.epoch.env.science": 0.8548827059465357, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8692934151400942, + "success_rate.epoch.global": 0.9099725911870125, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 1.0001858275520317, + "tokens_p.mean_in_band": 0.828125, + "tokens_rate.above_band": 0.9980217606330366, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0019782393669634025 + }, + { + "epoch": 2.5074563272262465, + "grad_norm": 35.81025600724148, + "learning_rate": 3.453703153945243e-07, + "loss": 0.0896, + "step": 11770, + "success_rate.epoch.env.abd": 0.985981308411215, + "success_rate.epoch.env.agentgym:alfworld": 0.849624060150376, + "success_rate.epoch.env.agentgym:sciworld": 0.9612903225806452, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9521739130434783, + "success_rate.epoch.env.logic": 0.9172320217096337, + "success_rate.epoch.env.math": 0.9759579263711495, + "success_rate.epoch.env.sat": 0.13333333333333333, + "success_rate.epoch.env.science": 0.8552774755168662, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8693720530308107, + "success_rate.epoch.global": 0.9101620029455081, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9991834916864608, + "tokens_p.mean_in_band": 0.892578125, + "tokens_rate.above_band": 0.9952718676122931, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.004728132387706856 + }, + { + "epoch": 2.5085215168299957, + "grad_norm": 46.35443262909214, + "learning_rate": 3.453385388676694e-07, + "loss": 0.2168, + "step": 11775, + "success_rate.epoch.env.abd": 0.986046511627907, + "success_rate.epoch.env.agentgym:alfworld": 0.849624060150376, + "success_rate.epoch.env.agentgym:sciworld": 0.9617834394904459, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9523809523809523, + "success_rate.epoch.env.logic": 0.9172320217096337, + "success_rate.epoch.env.math": 0.9759939984996249, + "success_rate.epoch.env.sat": 0.13333333333333333, + "success_rate.epoch.env.science": 0.8550488599348535, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.869424127213577, + "success_rate.epoch.global": 0.9101406676464413, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9988425925925926, + "tokens_p.mean_in_band": 0.484375, + "tokens_rate.above_band": 0.9830097087378641, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.01699029126213592 + }, + { + "epoch": 2.509586706433745, + "grad_norm": 924.0894569565932, + "learning_rate": 3.4530677302598205e-07, + "loss": 0.4212, + "step": 11780, + "success_rate.epoch.env.abd": 0.986046511627907, + "success_rate.epoch.env.agentgym:alfworld": 0.849624060150376, + "success_rate.epoch.env.agentgym:sciworld": 0.9617834394904459, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9523809523809523, + "success_rate.epoch.env.logic": 0.9172320217096337, + "success_rate.epoch.env.math": 0.9760479041916168, + "success_rate.epoch.env.sat": 0.13333333333333333, + "success_rate.epoch.env.science": 0.8545159545700378, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.869380581788775, + "success_rate.epoch.global": 0.9099099099099099, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.7142857142857143, + "success_rate.window.env_macro_mean": 0.8571428571428572, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9924242424242424, + "tokens_p.mean_in_band": 0.5083333333333333, + "tokens_rate.above_band": 0.868421052631579, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.13157894736842105 + }, + { + "epoch": 2.5106518960374946, + "grad_norm": 36.89431111688203, + "learning_rate": 3.452750178926129e-07, + "loss": 0.127, + "step": 11785, + "success_rate.epoch.env.abd": 0.9861111111111112, + "success_rate.epoch.env.agentgym:alfworld": 0.8507462686567164, + "success_rate.epoch.env.agentgym:sciworld": 0.9617834394904459, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9525862068965517, + "success_rate.epoch.env.logic": 0.9172320217096337, + "success_rate.epoch.env.math": 0.9761549925484352, + "success_rate.epoch.env.sat": 0.13333333333333333, + "success_rate.epoch.env.science": 0.8545945945945946, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8695240173239133, + "success_rate.epoch.global": 0.9100982646874347, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 1.0005194663167105, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 2.5117170856412443, + "grad_norm": 271.9096404052713, + "learning_rate": 3.452432734907049e-07, + "loss": 0.3341, + "step": 11790, + "success_rate.epoch.env.abd": 0.9861751152073732, + "success_rate.epoch.env.agentgym:alfworld": 0.8518518518518519, + "success_rate.epoch.env.agentgym:sciworld": 0.9617834394904459, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9525862068965517, + "success_rate.epoch.env.logic": 0.9173441734417345, + "success_rate.epoch.env.math": 0.9762258543833581, + "success_rate.epoch.env.sat": 0.13333333333333333, + "success_rate.epoch.env.science": 0.8548300053966541, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8696683820203206, + "success_rate.epoch.global": 0.910285833507198, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9982358870967742, + "tokens_p.mean_in_band": 0.7734375, + "tokens_rate.above_band": 0.9763779527559056, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.023622047244094488 + }, + { + "epoch": 2.5127822752449935, + "grad_norm": 96.73718424779733, + "learning_rate": 3.452115398433931e-07, + "loss": 0.2184, + "step": 11795, + "success_rate.epoch.env.abd": 0.9862385321100917, + "success_rate.epoch.env.agentgym:alfworld": 0.8518518518518519, + "success_rate.epoch.env.agentgym:sciworld": 0.9620253164556962, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9525862068965517, + "success_rate.epoch.env.logic": 0.9174560216508796, + "success_rate.epoch.env.math": 0.9762787249814677, + "success_rate.epoch.env.sat": 0.13333333333333333, + "success_rate.epoch.env.science": 0.8551427032848681, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8697395375260875, + "success_rate.epoch.global": 0.9104726212783677, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9983928571428572, + "tokens_p.mean_in_band": 0.78515625, + "tokens_rate.above_band": 0.9887005649717514, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.011299435028248588 + }, + { + "epoch": 2.513847464848743, + "grad_norm": 115.53188679644788, + "learning_rate": 3.4517981697380487e-07, + "loss": 0.2252, + "step": 11800, + "success_rate.epoch.env.abd": 0.9862385321100917, + "success_rate.epoch.env.agentgym:alfworld": 0.8518518518518519, + "success_rate.epoch.env.agentgym:sciworld": 0.9620253164556962, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9527896995708155, + "success_rate.epoch.env.logic": 0.9174560216508796, + "success_rate.epoch.env.math": 0.9763488543976349, + "success_rate.epoch.env.sat": 0.13333333333333333, + "success_rate.epoch.env.science": 0.855531686358754, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8697997743592073, + "success_rate.epoch.global": 0.9106586328693123, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9952847633136095, + "tokens_p.mean_in_band": 0.8203125, + "tokens_rate.above_band": 0.9941176470588236, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0058823529411764705 + }, + { + "epoch": 2.5149126544524925, + "grad_norm": 44.20564611105242, + "learning_rate": 3.451481049050594e-07, + "loss": 0.2193, + "step": 11805, + "success_rate.epoch.env.abd": 0.9862385321100917, + "success_rate.epoch.env.agentgym:alfworld": 0.8529411764705882, + "success_rate.epoch.env.agentgym:sciworld": 0.9620253164556962, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9531914893617022, + "success_rate.epoch.env.logic": 0.9175675675675675, + "success_rate.epoch.env.math": 0.9763663220088626, + "success_rate.epoch.env.sat": 0.13333333333333333, + "success_rate.epoch.env.science": 0.8559185859667916, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8699822314088049, + "success_rate.epoch.global": 0.9108438731080241, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.997800736497545, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 2.515977844056242, + "grad_norm": 167.84103061086964, + "learning_rate": 3.4511640366026847e-07, + "loss": 0.3937, + "step": 11810, + "success_rate.epoch.env.abd": 0.9817351598173516, + "success_rate.epoch.env.agentgym:alfworld": 0.8529411764705882, + "success_rate.epoch.env.agentgym:sciworld": 0.9622641509433962, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9533898305084746, + "success_rate.epoch.env.logic": 0.9176788124156545, + "success_rate.epoch.env.math": 0.9763663220088626, + "success_rate.epoch.env.sat": 0.13333333333333333, + "success_rate.epoch.env.science": 0.8558462359850507, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.869616113064085, + "success_rate.epoch.global": 0.9106145251396648, + "success_rate.window.env.abd": 0.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env_macro_mean": 0.7666666666666667, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9952766021765417, + "tokens_p.mean_in_band": 0.6990209651898734, + "tokens_rate.above_band": 0.91280353200883, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.08719646799116998 + }, + { + "epoch": 2.5170430336599914, + "grad_norm": 316.42876209930756, + "learning_rate": 3.4508471326253555e-07, + "loss": 0.5389, + "step": 11815, + "success_rate.epoch.env.abd": 0.9817351598173516, + "success_rate.epoch.env.agentgym:alfworld": 0.8529411764705882, + "success_rate.epoch.env.agentgym:sciworld": 0.9622641509433962, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9533898305084746, + "success_rate.epoch.env.logic": 0.9176788124156545, + "success_rate.epoch.env.math": 0.9764359351988218, + "success_rate.epoch.env.sat": 0.13333333333333333, + "success_rate.epoch.env.science": 0.8557743480574774, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8696159062697565, + "success_rate.epoch.global": 0.9105926078876729, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env_macro_mean": 0.9166666666666667, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9895833333333334, + "tokens_p.mean_in_band": 0.55078125, + "tokens_rate.above_band": 0.9590163934426229, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.040983606557377046 + }, + { + "epoch": 2.518108223263741, + "grad_norm": 84.00079833773995, + "learning_rate": 3.4505303373495643e-07, + "loss": 0.2716, + "step": 11820, + "success_rate.epoch.env.abd": 0.9817351598173516, + "success_rate.epoch.env.agentgym:alfworld": 0.8529411764705882, + "success_rate.epoch.env.agentgym:sciworld": 0.9625, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9533898305084746, + "success_rate.epoch.env.logic": 0.9177897574123989, + "success_rate.epoch.env.math": 0.9765051395007343, + "success_rate.epoch.env.sat": 0.13333333333333333, + "success_rate.epoch.env.science": 0.8555496548061604, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8696332976428423, + "success_rate.epoch.global": 0.9105707809602308, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9375, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9972098214285714, + "tokens_p.mean_in_band": 0.228125, + "tokens_rate.above_band": 0.9438202247191011, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.056179775280898875 + }, + { + "epoch": 2.5191734128674903, + "grad_norm": 34.11316047221333, + "learning_rate": 3.450213651006189e-07, + "loss": 0.2427, + "step": 11825, + "success_rate.epoch.env.abd": 0.9818181818181818, + "success_rate.epoch.env.agentgym:alfworld": 0.8540145985401459, + "success_rate.epoch.env.agentgym:sciworld": 0.9625, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9533898305084746, + "success_rate.epoch.env.logic": 0.918010752688172, + "success_rate.epoch.env.math": 0.9765395894428153, + "success_rate.epoch.env.sat": 0.13333333333333333, + "success_rate.epoch.env.science": 0.8558558558558559, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8697894876735636, + "success_rate.epoch.global": 0.910754678182192, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9986213235294118, + "tokens_p.mean_in_band": 0.80078125, + "tokens_rate.above_band": 0.9951219512195122, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.004878048780487805 + }, + { + "epoch": 2.52023860247124, + "grad_norm": 39.41352343538918, + "learning_rate": 3.449897073826029e-07, + "loss": 0.1307, + "step": 11830, + "success_rate.epoch.env.abd": 0.9819004524886877, + "success_rate.epoch.env.agentgym:alfworld": 0.8489208633093526, + "success_rate.epoch.env.agentgym:sciworld": 0.9625, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9535864978902954, + "success_rate.epoch.env.logic": 0.9181208053691275, + "success_rate.epoch.env.math": 0.9765567765567765, + "success_rate.epoch.env.sat": 0.13333333333333333, + "success_rate.epoch.env.science": 0.8561607615018508, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8693910647883314, + "success_rate.epoch.global": 0.9107326082495383, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 0.5, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9995909685863874, + "tokens_p.mean_in_band": 0.6865234375, + "tokens_rate.above_band": 0.9986928104575163, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.00130718954248366 + }, + { + "epoch": 2.5213037920749892, + "grad_norm": 64.30824711847141, + "learning_rate": 3.4495806060398017e-07, + "loss": 0.165, + "step": 11835, + "success_rate.epoch.env.abd": 0.9819004524886877, + "success_rate.epoch.env.agentgym:alfworld": 0.85, + "success_rate.epoch.env.agentgym:sciworld": 0.9625, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9535864978902954, + "success_rate.epoch.env.logic": 0.9181208053691275, + "success_rate.epoch.env.math": 0.9766763848396501, + "success_rate.epoch.env.sat": 0.13333333333333333, + "success_rate.epoch.env.science": 0.8563127311146329, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8695138570234501, + "success_rate.epoch.global": 0.9109154208478395, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9984375, + "tokens_p.mean_in_band": 0.8125, + "tokens_rate.above_band": 0.9900990099009901, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.009900990099009901 + }, + { + "epoch": 2.522368981678739, + "grad_norm": 104.599357907905, + "learning_rate": 3.4492642478781487e-07, + "loss": 0.3187, + "step": 11840, + "success_rate.epoch.env.abd": 0.9819004524886877, + "success_rate.epoch.env.agentgym:alfworld": 0.85, + "success_rate.epoch.env.agentgym:sciworld": 0.9625, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9535864978902954, + "success_rate.epoch.env.logic": 0.9186666666666666, + "success_rate.epoch.env.math": 0.976693372177713, + "success_rate.epoch.env.sat": 0.13333333333333333, + "success_rate.epoch.env.science": 0.8560885608856088, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8695446459695026, + "success_rate.epoch.global": 0.9108931126098508, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9976615646258503, + "tokens_p.mean_in_band": 0.46875, + "tokens_rate.above_band": 0.9671052631578947, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03289473684210526 + }, + { + "epoch": 2.523434171282488, + "grad_norm": 17.943847670638224, + "learning_rate": 3.4489479995716285e-07, + "loss": 0.2725, + "step": 11845, + "success_rate.epoch.env.abd": 0.9819819819819819, + "success_rate.epoch.env.agentgym:alfworld": 0.85, + "success_rate.epoch.env.agentgym:sciworld": 0.9625, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9537815126050421, + "success_rate.epoch.env.logic": 0.9186666666666666, + "success_rate.epoch.env.math": 0.9767610748002905, + "success_rate.epoch.env.sat": 0.13333333333333333, + "success_rate.epoch.env.science": 0.8563913729615992, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.869603469506467, + "success_rate.epoch.global": 0.9110748521313482, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9990770042194093, + "tokens_p.mean_in_band": 0.70703125, + "tokens_rate.above_band": 0.9978947368421053, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.002105263157894737 + }, + { + "epoch": 2.524499360886238, + "grad_norm": 54.977073550462976, + "learning_rate": 3.4486318613507205e-07, + "loss": 0.1834, + "step": 11850, + "success_rate.epoch.env.abd": 0.9820627802690582, + "success_rate.epoch.env.agentgym:alfworld": 0.851063829787234, + "success_rate.epoch.env.agentgym:sciworld": 0.9625, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9539748953974896, + "success_rate.epoch.env.logic": 0.918774966711052, + "success_rate.epoch.env.math": 0.9768115942028985, + "success_rate.epoch.env.sat": 0.13333333333333333, + "success_rate.epoch.env.science": 0.8566176470588235, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8697601153620101, + "success_rate.epoch.global": 0.9112558518216975, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9971033868092691, + "tokens_p.mean_in_band": 0.7734375, + "tokens_rate.above_band": 0.9929203539823008, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.007079646017699115 + }, + { + "epoch": 2.525564550489987, + "grad_norm": 105.62523633771117, + "learning_rate": 3.448315833445824e-07, + "loss": 0.3318, + "step": 11855, + "success_rate.epoch.env.abd": 0.9820627802690582, + "success_rate.epoch.env.agentgym:alfworld": 0.851063829787234, + "success_rate.epoch.env.agentgym:sciworld": 0.9627329192546584, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9539748953974896, + "success_rate.epoch.env.logic": 0.918774966711052, + "success_rate.epoch.env.math": 0.9747292418772563, + "success_rate.epoch.env.sat": 0.13333333333333333, + "success_rate.epoch.env.science": 0.8563941299790356, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8695716653483035, + "success_rate.epoch.global": 0.9106236034938046, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 0.4, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.7166666666666667, + "success_rate.window.global": 0.6, + "tokens_p.mean_above_band": 0.9933176100628931, + "tokens_p.mean_in_band": 0.4583625793457031, + "tokens_rate.above_band": 0.9520958083832335, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.04790419161676647 + }, + { + "epoch": 2.5266297400937368, + "grad_norm": 46.006171320987605, + "learning_rate": 3.447999916087259e-07, + "loss": 0.2269, + "step": 11860, + "success_rate.epoch.env.abd": 0.9820627802690582, + "success_rate.epoch.env.agentgym:alfworld": 0.851063829787234, + "success_rate.epoch.env.agentgym:sciworld": 0.9629629629629629, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9539748953974896, + "success_rate.epoch.env.logic": 0.9188829787234043, + "success_rate.epoch.env.math": 0.9748020158387329, + "success_rate.epoch.env.sat": 0.13333333333333333, + "success_rate.epoch.env.science": 0.856694560669456, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8696363253821722, + "success_rate.epoch.global": 0.9108047841070342, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9943647540983607, + "tokens_p.mean_in_band": 0.7125, + "tokens_rate.above_band": 0.973404255319149, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.026595744680851064 + }, + { + "epoch": 2.527694929697486, + "grad_norm": 282.99357569758575, + "learning_rate": 3.447684109505262e-07, + "loss": 0.2537, + "step": 11865, + "success_rate.epoch.env.abd": 0.9821428571428571, + "success_rate.epoch.env.agentgym:alfworld": 0.851063829787234, + "success_rate.epoch.env.agentgym:sciworld": 0.9629629629629629, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9539748953974896, + "success_rate.epoch.env.logic": 0.9190981432360743, + "success_rate.epoch.env.math": 0.9748563218390804, + "success_rate.epoch.env.sat": 0.13333333333333333, + "success_rate.epoch.env.science": 0.8569937369519833, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8696953002612033, + "success_rate.epoch.global": 0.910985231640704, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9959608843537415, + "tokens_p.mean_in_band": 0.8671875, + "tokens_rate.above_band": 0.98, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02 + }, + { + "epoch": 2.5287601193012357, + "grad_norm": 0.0, + "learning_rate": 3.447368413929991e-07, + "loss": 0.2368, + "step": 11870, + "success_rate.epoch.env.abd": 0.9821428571428571, + "success_rate.epoch.env.agentgym:alfworld": 0.851063829787234, + "success_rate.epoch.env.agentgym:sciworld": 0.9629629629629629, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9539748953974896, + "success_rate.epoch.env.logic": 0.9194187582562747, + "success_rate.epoch.env.math": 0.9749283667621776, + "success_rate.epoch.env.sat": 0.13333333333333333, + "success_rate.epoch.env.science": 0.8572173006774362, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.869751320594726, + "success_rate.epoch.global": 0.9111649505350293, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9871323529411765, + "tokens_p.mean_in_band": 0.875, + "tokens_rate.above_band": 0.9883720930232558, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.011627906976744186 + }, + { + "epoch": 2.529825308904985, + "grad_norm": 71.12536466098928, + "learning_rate": 3.4470528295915243e-07, + "loss": 0.2253, + "step": 11875, + "success_rate.epoch.env.abd": 0.9821428571428571, + "success_rate.epoch.env.agentgym:alfworld": 0.851063829787234, + "success_rate.epoch.env.agentgym:sciworld": 0.9631901840490797, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9539748953974896, + "success_rate.epoch.env.logic": 0.919631093544137, + "success_rate.epoch.env.math": 0.974964234620887, + "success_rate.epoch.env.sat": 0.13333333333333333, + "success_rate.epoch.env.science": 0.8575883575883576, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8698282734259635, + "success_rate.epoch.global": 0.9113439451944388, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.998046875, + "tokens_p.mean_in_band": 0.7, + "tokens_rate.above_band": 0.9746192893401016, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.025380710659898477 + }, + { + "epoch": 2.5308904985087346, + "grad_norm": 200.85363222817136, + "learning_rate": 3.4467373567198557e-07, + "loss": 0.4284, + "step": 11880, + "success_rate.epoch.env.abd": 0.9821428571428571, + "success_rate.epoch.env.agentgym:alfworld": 0.852112676056338, + "success_rate.epoch.env.agentgym:sciworld": 0.9631901840490797, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9541666666666667, + "success_rate.epoch.env.logic": 0.919631093544137, + "success_rate.epoch.env.math": 0.9750712250712251, + "success_rate.epoch.env.sat": 0.13333333333333333, + "success_rate.epoch.env.science": 0.8572170301142263, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8699170262000077, + "success_rate.epoch.global": 0.9113211341242711, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9996421755725191, + "tokens_p.mean_in_band": 0.3802083333333333, + "tokens_rate.above_band": 0.9924242424242424, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.007575757575757576 + }, + { + "epoch": 2.5319556881124843, + "grad_norm": 105.95514999887611, + "learning_rate": 3.4464219955449003e-07, + "loss": 0.2484, + "step": 11885, + "success_rate.epoch.env.abd": 0.9821428571428571, + "success_rate.epoch.env.agentgym:alfworld": 0.8531468531468531, + "success_rate.epoch.env.agentgym:sciworld": 0.9634146341463414, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9541666666666667, + "success_rate.epoch.env.logic": 0.9185282522996058, + "success_rate.epoch.env.math": 0.9751243781094527, + "success_rate.epoch.env.sat": 0.13333333333333333, + "success_rate.epoch.env.science": 0.8573651452282157, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8699494856632316, + "success_rate.epoch.global": 0.9112984146096729, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.997965976331361, + "tokens_p.mean_in_band": 0.40625, + "tokens_rate.above_band": 0.9835111542192047, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.016488845780795344 + }, + { + "epoch": 2.5330208777162335, + "grad_norm": 82.42247365060848, + "learning_rate": 3.4461067462964906e-07, + "loss": 0.3629, + "step": 11890, + "success_rate.epoch.env.abd": 0.9823788546255506, + "success_rate.epoch.env.agentgym:alfworld": 0.8531468531468531, + "success_rate.epoch.env.agentgym:sciworld": 0.9634146341463414, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9543568464730291, + "success_rate.epoch.env.logic": 0.9185282522996058, + "success_rate.epoch.env.math": 0.9751420454545454, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.8571428571428571, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8695786203250474, + "success_rate.epoch.global": 0.9110755057079912, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.75, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9976973684210526, + "tokens_p.mean_in_band": 0.6907894736842105, + "tokens_rate.above_band": 0.9375, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0625 + }, + { + "epoch": 2.5340860673199828, + "grad_norm": 59.630699271046154, + "learning_rate": 3.445791609204379e-07, + "loss": 0.3481, + "step": 11895, + "success_rate.epoch.env.abd": 0.9823788546255506, + "success_rate.epoch.env.agentgym:alfworld": 0.8531468531468531, + "success_rate.epoch.env.agentgym:sciworld": 0.9636363636363636, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9543568464730291, + "success_rate.epoch.env.logic": 0.9186351706036745, + "success_rate.epoch.env.math": 0.9751948972360028, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.8575116159008777, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.869646825628099, + "success_rate.epoch.global": 0.9112532480511693, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9953125, + "tokens_p.mean_in_band": 0.83984375, + "tokens_rate.above_band": 0.9859154929577465, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.014084507042253521 + }, + { + "epoch": 2.5351512569237324, + "grad_norm": 57.46211290920506, + "learning_rate": 3.445476584498234e-07, + "loss": 0.2065, + "step": 11900, + "success_rate.epoch.env.abd": 0.9823788546255506, + "success_rate.epoch.env.agentgym:alfworld": 0.8531468531468531, + "success_rate.epoch.env.agentgym:sciworld": 0.9636363636363636, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.96, + "success_rate.epoch.env.ded": 0.9543568464730291, + "success_rate.epoch.env.logic": 0.9186351706036745, + "success_rate.epoch.env.math": 0.9752650176678446, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.8574369531652084, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8696464126913874, + "success_rate.epoch.global": 0.9112307999202075, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env_macro_mean": 0.9166666666666667, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9936342592592593, + "tokens_p.mean_in_band": 0.404296875, + "tokens_rate.above_band": 0.9642857142857143, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03571428571428571 + }, + { + "epoch": 2.536216446527482, + "grad_norm": 113.43745596251871, + "learning_rate": 3.4451616724076433e-07, + "loss": 0.2582, + "step": 11905, + "success_rate.epoch.env.abd": 0.9823788546255506, + "success_rate.epoch.env.agentgym:alfworld": 0.8541666666666666, + "success_rate.epoch.env.agentgym:sciworld": 0.963855421686747, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.9615384615384616, + "success_rate.epoch.env.ded": 0.9543568464730291, + "success_rate.epoch.env.logic": 0.9174311926605505, + "success_rate.epoch.env.math": 0.9752824858757062, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.8578028747433265, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8698242985960707, + "success_rate.epoch.global": 0.9112084411706152, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8333333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9987148268398268, + "tokens_p.mean_in_band": 0.5125558035714286, + "tokens_rate.above_band": 0.9705882352941176, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.029411764705882353 + }, + { + "epoch": 2.5372816361312314, + "grad_norm": 133.23284597020634, + "learning_rate": 3.444846873162113e-07, + "loss": 0.1637, + "step": 11910, + "success_rate.epoch.env.abd": 0.9823788546255506, + "success_rate.epoch.env.agentgym:alfworld": 0.8541666666666666, + "success_rate.epoch.env.agentgym:sciworld": 0.963855421686747, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.9629629629629629, + "success_rate.epoch.env.ded": 0.9543568464730291, + "success_rate.epoch.env.logic": 0.9164490861618799, + "success_rate.epoch.env.math": 0.9752824858757062, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.8581669226830517, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8698976115838485, + "success_rate.epoch.global": 0.9111861712696205, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.logic": 0.6666666666666666, + "success_rate.window.env.science": 1.0, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9972064393939394, + "tokens_p.mean_in_band": 0.7469618055555556, + "tokens_rate.above_band": 0.9865470852017937, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.013452914798206279 + }, + { + "epoch": 2.5383468257349806, + "grad_norm": 94.25149636356029, + "learning_rate": 3.4445321869910676e-07, + "loss": 0.1338, + "step": 11915, + "success_rate.epoch.env.abd": 0.9823788546255506, + "success_rate.epoch.env.agentgym:alfworld": 0.8541666666666666, + "success_rate.epoch.env.agentgym:sciworld": 0.963855421686747, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.9629629629629629, + "success_rate.epoch.env.ded": 0.9545454545454546, + "success_rate.epoch.env.logic": 0.9166666666666666, + "success_rate.epoch.env.math": 0.9753173483779972, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.858456821665815, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8699640615895091, + "success_rate.epoch.global": 0.9113622843545509, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 1.0006613756613756, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 2.5394120153387303, + "grad_norm": 61.80527106212896, + "learning_rate": 3.4442176141238465e-07, + "loss": 0.2954, + "step": 11920, + "success_rate.epoch.env.abd": 0.9823788546255506, + "success_rate.epoch.env.agentgym:alfworld": 0.8551724137931035, + "success_rate.epoch.env.agentgym:sciworld": 0.963855421686747, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.9629629629629629, + "success_rate.epoch.env.ded": 0.9545454545454546, + "success_rate.epoch.env.logic": 0.9154746423927178, + "success_rate.epoch.env.math": 0.9753867791842475, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.8582355940846507, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8699333275965611, + "success_rate.epoch.global": 0.9111418959034238, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.6875, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9980852601156069, + "tokens_p.mean_in_band": 0.4550189393939394, + "tokens_rate.above_band": 0.9632516703786191, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.036748329621380846 + }, + { + "epoch": 2.54047720494248, + "grad_norm": 233.32823743982402, + "learning_rate": 3.443903154789709e-07, + "loss": 0.1735, + "step": 11925, + "success_rate.epoch.env.abd": 0.9823788546255506, + "success_rate.epoch.env.agentgym:alfworld": 0.8551724137931035, + "success_rate.epoch.env.agentgym:sciworld": 0.963855421686747, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.9629629629629629, + "success_rate.epoch.env.ded": 0.9547325102880658, + "success_rate.epoch.env.logic": 0.9155844155844156, + "success_rate.epoch.env.math": 0.9754040758959944, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.858739837398374, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.870007724774723, + "success_rate.epoch.global": 0.9113174007505431, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9985346585117227, + "tokens_p.mean_in_band": 0.8671875, + "tokens_rate.above_band": 0.9989816700610998, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0010183299389002036 + }, + { + "epoch": 2.541542394546229, + "grad_norm": 21.235977992283846, + "learning_rate": 3.44358880921783e-07, + "loss": 0.5277, + "step": 11930, + "success_rate.epoch.env.abd": 0.9823788546255506, + "success_rate.epoch.env.agentgym:alfworld": 0.8561643835616438, + "success_rate.epoch.env.agentgym:sciworld": 0.963855421686747, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.9629629629629629, + "success_rate.epoch.env.ded": 0.9549180327868853, + "success_rate.epoch.env.logic": 0.9156939040207522, + "success_rate.epoch.env.math": 0.9747368421052631, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.8590978205778003, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8700966093285767, + "success_rate.epoch.global": 0.9112950916617386, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.5, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9979564032697548, + "tokens_p.mean_in_band": 0.633056640625, + "tokens_rate.above_band": 0.9839142091152815, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0160857908847185 + }, + { + "epoch": 2.5426075841499785, + "grad_norm": 68.92423608218708, + "learning_rate": 3.4432745776373033e-07, + "loss": 0.2849, + "step": 11935, + "success_rate.epoch.env.abd": 0.9823788546255506, + "success_rate.epoch.env.agentgym:alfworld": 0.8561643835616438, + "success_rate.epoch.env.agentgym:sciworld": 0.9640718562874252, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.9629629629629629, + "success_rate.epoch.env.ded": 0.9549180327868853, + "success_rate.epoch.env.logic": 0.9158031088082902, + "success_rate.epoch.env.math": 0.9741258741258741, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.8588056680161943, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8700441110419604, + "success_rate.epoch.global": 0.9110761361400748, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.8, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.8666666666666667, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9957627118644068, + "tokens_p.mean_in_band": 0.5522017045454546, + "tokens_rate.above_band": 0.9797047970479705, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02029520295202952 + }, + { + "epoch": 2.543672773753728, + "grad_norm": 61.590577156416416, + "learning_rate": 3.442960460277138e-07, + "loss": 0.2655, + "step": 11940, + "success_rate.epoch.env.abd": 0.9823788546255506, + "success_rate.epoch.env.agentgym:alfworld": 0.8561643835616438, + "success_rate.epoch.env.agentgym:sciworld": 0.9640718562874252, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.9629629629629629, + "success_rate.epoch.env.ded": 0.9551020408163265, + "success_rate.epoch.env.logic": 0.9148387096774193, + "success_rate.epoch.env.math": 0.9741620111731844, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.8590909090909091, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8700023825892873, + "success_rate.epoch.global": 0.9110543883762027, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.6666666666666666, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9988162878787878, + "tokens_p.mean_in_band": 0.59661865234375, + "tokens_rate.above_band": 0.99, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.01 + }, + { + "epoch": 2.544737963357478, + "grad_norm": 273.7021754812007, + "learning_rate": 3.442646457366261e-07, + "loss": 0.3168, + "step": 11945, + "success_rate.epoch.env.abd": 0.9823788546255506, + "success_rate.epoch.env.agentgym:alfworld": 0.8571428571428571, + "success_rate.epoch.env.agentgym:sciworld": 0.9640718562874252, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.9629629629629629, + "success_rate.epoch.env.ded": 0.9551020408163265, + "success_rate.epoch.env.logic": 0.9149484536082474, + "success_rate.epoch.env.math": 0.9742698191933241, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.858728557013118, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.870078171085141, + "success_rate.epoch.global": 0.9110327258475407, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9982839595375722, + "tokens_p.mean_in_band": 0.2734375, + "tokens_rate.above_band": 0.9857549857549858, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.014245014245014245 + }, + { + "epoch": 2.545803152961227, + "grad_norm": 188.12848402098945, + "learning_rate": 3.4423325691335146e-07, + "loss": 0.4486, + "step": 11950, + "success_rate.epoch.env.abd": 0.9824561403508771, + "success_rate.epoch.env.agentgym:alfworld": 0.8571428571428571, + "success_rate.epoch.env.agentgym:sciworld": 0.9640718562874252, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.9629629629629629, + "success_rate.epoch.env.ded": 0.9552845528455285, + "success_rate.epoch.env.logic": 0.915057915057915, + "success_rate.epoch.env.math": 0.974323386537127, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.8585095669687814, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.870096701676383, + "success_rate.epoch.global": 0.9110111480539801, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9956430288461539, + "tokens_p.mean_in_band": 0.6462656656901041, + "tokens_rate.above_band": 0.9454545454545454, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.05454545454545454 + }, + { + "epoch": 2.5468683425649763, + "grad_norm": 986.7548172335769, + "learning_rate": 3.442018795807659e-07, + "loss": 0.4894, + "step": 11955, + "success_rate.epoch.env.abd": 0.982532751091703, + "success_rate.epoch.env.agentgym:alfworld": 0.8571428571428571, + "success_rate.epoch.env.agentgym:sciworld": 0.9640718562874252, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.9629629629629629, + "success_rate.epoch.env.ded": 0.9516129032258065, + "success_rate.epoch.env.logic": 0.9151670951156813, + "success_rate.epoch.env.math": 0.9743589743589743, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.8587939698492463, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8698088954837633, + "success_rate.epoch.global": 0.9109896544993168, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 0.5, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9904011285574092, + "tokens_p.mean_in_band": 0.501787781084656, + "tokens_rate.above_band": 0.7294201861130994, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.2705798138869005 + }, + { + "epoch": 2.547933532168726, + "grad_norm": 43.304350197091985, + "learning_rate": 3.4417051376173694e-07, + "loss": 0.1971, + "step": 11960, + "success_rate.epoch.env.abd": 0.982532751091703, + "success_rate.epoch.env.agentgym:alfworld": 0.86, + "success_rate.epoch.env.agentgym:sciworld": 0.9642857142857143, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.9629629629629629, + "success_rate.epoch.env.ded": 0.9518072289156626, + "success_rate.epoch.env.logic": 0.9151670951156813, + "success_rate.epoch.env.math": 0.9743944636678201, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.8585047666833918, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8700826784554251, + "success_rate.epoch.global": 0.9109682446912137, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9971291415662651, + "tokens_p.mean_in_band": 0.6881510416666666, + "tokens_rate.above_band": 0.991044776119403, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.008955223880597015 + }, + { + "epoch": 2.5489987217724757, + "grad_norm": 59.336164353104444, + "learning_rate": 3.4413915947912385e-07, + "loss": 0.3882, + "step": 11965, + "success_rate.epoch.env.abd": 0.982532751091703, + "success_rate.epoch.env.agentgym:alfworld": 0.8609271523178808, + "success_rate.epoch.env.agentgym:sciworld": 0.9642857142857143, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.9629629629629629, + "success_rate.epoch.env.ded": 0.9518072289156626, + "success_rate.epoch.env.logic": 0.9153846153846154, + "success_rate.epoch.env.math": 0.9737387698686939, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.8588588588588588, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8701593212702573, + "success_rate.epoch.global": 0.9109469181411628, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.5, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9964539007092199, + "tokens_p.mean_in_band": 0.0031890869140625, + "tokens_rate.above_band": 0.9976415094339622, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0023584905660377358 + }, + { + "epoch": 2.550063911376225, + "grad_norm": 96.63634927298644, + "learning_rate": 3.4410781675577737e-07, + "loss": 0.3646, + "step": 11970, + "success_rate.epoch.env.abd": 0.9826086956521739, + "success_rate.epoch.env.agentgym:alfworld": 0.8618421052631579, + "success_rate.epoch.env.agentgym:sciworld": 0.9644970414201184, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.9629629629629629, + "success_rate.epoch.env.ded": 0.9518072289156626, + "success_rate.epoch.env.logic": 0.9143222506393862, + "success_rate.epoch.env.math": 0.9737750172532781, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.8585707146426786, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8701491360941962, + "success_rate.epoch.global": 0.9107316126528237, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.861111111111111, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9984358359957402, + "tokens_p.mean_in_band": 0.4806640625, + "tokens_rate.above_band": 0.9791449426485923, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.020855057351407715 + }, + { + "epoch": 2.5511291009799746, + "grad_norm": 32.12220785385132, + "learning_rate": 3.4407648561453977e-07, + "loss": 0.2023, + "step": 11975, + "success_rate.epoch.env.abd": 0.9826086956521739, + "success_rate.epoch.env.agentgym:alfworld": 0.8627450980392157, + "success_rate.epoch.env.agentgym:sciworld": 0.9644970414201184, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.9629629629629629, + "success_rate.epoch.env.ded": 0.9518072289156626, + "success_rate.epoch.env.logic": 0.9144316730523627, + "success_rate.epoch.env.math": 0.9738111647139903, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.8589935226706528, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.870282897064898, + "success_rate.epoch.global": 0.9109045128801084, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9984186746987952, + "tokens_p.mean_in_band": 0.759765625, + "tokens_rate.above_band": 0.9952038369304557, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.004796163069544364 + }, + { + "epoch": 2.552194290583724, + "grad_norm": 53.54048083969894, + "learning_rate": 3.440451660782451e-07, + "loss": 0.2448, + "step": 11980, + "success_rate.epoch.env.abd": 0.9826839826839827, + "success_rate.epoch.env.agentgym:alfworld": 0.8627450980392157, + "success_rate.epoch.env.agentgym:sciworld": 0.9647058823529412, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.9629629629629629, + "success_rate.epoch.env.ded": 0.9518072289156626, + "success_rate.epoch.env.logic": 0.9145408163265306, + "success_rate.epoch.env.math": 0.9738831615120275, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.8592039800995025, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8703443266526878, + "success_rate.epoch.global": 0.911076744635608, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.998077876984127, + "tokens_p.mean_in_band": 0.86328125, + "tokens_rate.above_band": 0.998019801980198, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0019801980198019802 + }, + { + "epoch": 2.5532594801874735, + "grad_norm": 70.7509008739078, + "learning_rate": 3.4401385816971866e-07, + "loss": 0.2233, + "step": 11985, + "success_rate.epoch.env.abd": 0.9826839826839827, + "success_rate.epoch.env.agentgym:alfworld": 0.8627450980392157, + "success_rate.epoch.env.agentgym:sciworld": 0.9647058823529412, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.9629629629629629, + "success_rate.epoch.env.ded": 0.9518072289156626, + "success_rate.epoch.env.logic": 0.9146496815286624, + "success_rate.epoch.env.math": 0.9739726027397261, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.858987090367428, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8703426372615746, + "success_rate.epoch.global": 0.9110553733359058, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9957540760869565, + "tokens_p.mean_in_band": 0.76318359375, + "tokens_rate.above_band": 0.9583333333333334, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.041666666666666664 + }, + { + "epoch": 2.5543246697912227, + "grad_norm": 182.2298132980826, + "learning_rate": 3.4398256191177756e-07, + "loss": 0.2803, + "step": 11990, + "success_rate.epoch.env.abd": 0.9827586206896551, + "success_rate.epoch.env.agentgym:alfworld": 0.8627450980392157, + "success_rate.epoch.env.agentgym:sciworld": 0.9647058823529412, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.9629629629629629, + "success_rate.epoch.env.ded": 0.9518072289156626, + "success_rate.epoch.env.logic": 0.9150823827629911, + "success_rate.epoch.env.math": 0.974025974025974, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.8586309523809523, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8703612347651905, + "success_rate.epoch.global": 0.9110340843443097, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9969512195121951, + "tokens_p.mean_in_band": 0.498046875, + "tokens_rate.above_band": 0.9534883720930233, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.046511627906976744 + }, + { + "epoch": 2.5553898593949724, + "grad_norm": 495.6901149480837, + "learning_rate": 3.439512773272302e-07, + "loss": 0.2772, + "step": 11995, + "success_rate.epoch.env.abd": 0.9827586206896551, + "success_rate.epoch.env.agentgym:alfworld": 0.8636363636363636, + "success_rate.epoch.env.agentgym:sciworld": 0.9647058823529412, + "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, + "success_rate.epoch.env.babyai": 0.9629629629629629, + "success_rate.epoch.env.ded": 0.9518072289156626, + "success_rate.epoch.env.logic": 0.9152970922882427, + "success_rate.epoch.env.math": 0.9740967961826857, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.8588410104011887, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8704873125196765, + "success_rate.epoch.global": 0.9112050739957717, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9984984984984985, + "tokens_p.mean_in_band": 0.7640625, + "tokens_rate.above_band": 0.985207100591716, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.014792899408284023 + }, + { + "epoch": 2.5564550489987217, + "grad_norm": 46.33575643759948, + "learning_rate": 3.439200044388766e-07, + "loss": 0.261, + "step": 12000, + "success_rate.epoch.env.abd": 0.9827586206896551, + "success_rate.epoch.env.agentgym:alfworld": 0.8636363636363636, + "success_rate.epoch.env.agentgym:sciworld": 0.9647058823529412, + "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, + "success_rate.epoch.env.babyai": 0.9629629629629629, + "success_rate.epoch.env.ded": 0.952, + "success_rate.epoch.env.logic": 0.9155107187894073, + "success_rate.epoch.env.math": 0.9741144414168937, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.858201581027668, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8705359819921256, + "success_rate.epoch.global": 0.9109917513907538, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6, + "success_rate.window.env_macro_mean": 0.9199999999999999, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9979003359462486, + "tokens_p.mean_below_band": 2.648448571562767e-09, + "tokens_p.mean_in_band": 0.44557291666666665, + "tokens_rate.above_band": 0.9823982398239824, + "tokens_rate.below_band": 0.0011001100110011, + "tokens_rate.in_band": 0.0165016501650165 + }, + { + "epoch": 2.5575202386024714, + "grad_norm": 75.8550203441575, + "learning_rate": 3.438887432695082e-07, + "loss": 0.2481, + "step": 12005, + "success_rate.epoch.env.abd": 0.9828326180257511, + "success_rate.epoch.env.agentgym:alfworld": 0.8636363636363636, + "success_rate.epoch.env.agentgym:sciworld": 0.9647058823529412, + "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, + "success_rate.epoch.env.babyai": 0.9629629629629629, + "success_rate.epoch.env.ded": 0.952191235059761, + "success_rate.epoch.env.logic": 0.9156171284634761, + "success_rate.epoch.env.math": 0.9741847826086957, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.8584114454859398, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8705952408757619, + "success_rate.epoch.global": 0.9111621673367797, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9981602186711522, + "tokens_p.mean_in_band": 0.814453125, + "tokens_rate.above_band": 0.998320738874895, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0016792611251049538 + }, + { + "epoch": 2.5585854282062206, + "grad_norm": 67.08803362906818, + "learning_rate": 3.4385749384190794e-07, + "loss": 0.2412, + "step": 12010, + "success_rate.epoch.env.abd": 0.9828326180257511, + "success_rate.epoch.env.agentgym:alfworld": 0.8636363636363636, + "success_rate.epoch.env.agentgym:sciworld": 0.9647058823529412, + "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, + "success_rate.epoch.env.babyai": 0.9655172413793104, + "success_rate.epoch.env.ded": 0.952191235059761, + "success_rate.epoch.env.logic": 0.9159347553324969, + "success_rate.epoch.env.math": 0.9742023082145281, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.8586903003446578, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8708832668530271, + "success_rate.epoch.global": 0.9113319319701891, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9973872950819672, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 2.5596506178099703, + "grad_norm": 237.71341347317403, + "learning_rate": 3.438262561788502e-07, + "loss": 0.4032, + "step": 12015, + "success_rate.epoch.env.abd": 0.9828326180257511, + "success_rate.epoch.env.agentgym:alfworld": 0.864516129032258, + "success_rate.epoch.env.agentgym:sciworld": 0.9647058823529412, + "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, + "success_rate.epoch.env.babyai": 0.9655172413793104, + "success_rate.epoch.env.ded": 0.9525691699604744, + "success_rate.epoch.env.logic": 0.9159347553324969, + "success_rate.epoch.env.math": 0.9742895805142084, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.8588293162813576, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8710181749014805, + "success_rate.epoch.global": 0.911501049017738, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9983127376425855, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 2.5607158074137195, + "grad_norm": 62.7319749104168, + "learning_rate": 3.437950303031007e-07, + "loss": 0.1932, + "step": 12020, + "success_rate.epoch.env.abd": 0.9829059829059829, + "success_rate.epoch.env.agentgym:alfworld": 0.864516129032258, + "success_rate.epoch.env.agentgym:sciworld": 0.9647058823529412, + "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, + "success_rate.epoch.env.babyai": 0.9655172413793104, + "success_rate.epoch.env.ded": 0.952755905511811, + "success_rate.epoch.env.logic": 0.9161451814768461, + "success_rate.epoch.env.math": 0.9743243243243244, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.8591065292096219, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8710893097482348, + "success_rate.epoch.global": 0.9116695221778032, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9971498371335505, + "tokens_p.mean_in_band": 0.8567708333333334, + "tokens_rate.above_band": 0.9967532467532467, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.003246753246753247 + }, + { + "epoch": 2.561780997017469, + "grad_norm": 129.25711300667405, + "learning_rate": 3.4376381623741664e-07, + "loss": 0.2234, + "step": 12025, + "success_rate.epoch.env.abd": 0.9830508474576272, + "success_rate.epoch.env.agentgym:alfworld": 0.864516129032258, + "success_rate.epoch.env.agentgym:sciworld": 0.9647058823529412, + "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, + "success_rate.epoch.env.babyai": 0.9655172413793104, + "success_rate.epoch.env.ded": 0.952755905511811, + "success_rate.epoch.env.logic": 0.91625, + "success_rate.epoch.env.math": 0.9744107744107744, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.8592447278077489, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8711324308172689, + "success_rate.epoch.global": 0.9118373551206537, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9990354938271605, + "tokens_p.mean_in_band": 0.8203125, + "tokens_rate.above_band": 0.9938650306748467, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.006134969325153374 + }, + { + "epoch": 2.5628461866212184, + "grad_norm": 130.4999063049413, + "learning_rate": 3.437326140045467e-07, + "loss": 0.2314, + "step": 12030, + "success_rate.epoch.env.abd": 0.9830508474576272, + "success_rate.epoch.env.agentgym:alfworld": 0.864516129032258, + "success_rate.epoch.env.agentgym:sciworld": 0.9647058823529412, + "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, + "success_rate.epoch.env.babyai": 0.9655172413793104, + "success_rate.epoch.env.ded": 0.9529411764705882, + "success_rate.epoch.env.logic": 0.916354556803995, + "success_rate.epoch.env.math": 0.9744107744107744, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.8593063019052272, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8711643764409281, + "success_rate.epoch.global": 0.9118149061255453, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.science": 0.875, + "success_rate.window.env_macro_mean": 0.9583333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9966216216216216, + "tokens_p.mean_in_band": 0.5714285714285714, + "tokens_rate.above_band": 0.9694323144104804, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03056768558951965 + }, + { + "epoch": 2.563911376224968, + "grad_norm": 285.1381440444271, + "learning_rate": 3.437014236272307e-07, + "loss": 0.192, + "step": 12035, + "success_rate.epoch.env.abd": 0.9830508474576272, + "success_rate.epoch.env.agentgym:alfworld": 0.8653846153846154, + "success_rate.epoch.env.agentgym:sciworld": 0.9647058823529412, + "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, + "success_rate.epoch.env.babyai": 0.9655172413793104, + "success_rate.epoch.env.ded": 0.953125, + "success_rate.epoch.env.logic": 0.9153175591531756, + "success_rate.epoch.env.math": 0.9744623655913979, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.8595121951219512, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8711891761344098, + "success_rate.epoch.global": 0.9117925421162218, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9996292372881356, + "tokens_p.mean_in_band": 0.684326171875, + "tokens_rate.above_band": 0.9866220735785953, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.013377926421404682 + }, + { + "epoch": 2.5649765658287174, + "grad_norm": 443.55851214993623, + "learning_rate": 3.436702451282e-07, + "loss": 0.3052, + "step": 12040, + "success_rate.epoch.env.abd": 0.9830508474576272, + "success_rate.epoch.env.agentgym:alfworld": 0.8653846153846154, + "success_rate.epoch.env.agentgym:sciworld": 0.9649122807017544, + "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, + "success_rate.epoch.env.babyai": 0.9655172413793104, + "success_rate.epoch.env.ded": 0.953307392996109, + "success_rate.epoch.env.logic": 0.9156327543424317, + "success_rate.epoch.env.math": 0.9745137491616365, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.8596491228070176, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8712702941152718, + "success_rate.epoch.global": 0.9119591913848479, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9974626068376068, + "tokens_p.mean_in_band": 0.6640625, + "tokens_rate.above_band": 0.9989327641408752, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0010672358591248667 + }, + { + "epoch": 2.566041755432467, + "grad_norm": 41.880318313677236, + "learning_rate": 3.436390785301774e-07, + "loss": 0.2218, + "step": 12045, + "success_rate.epoch.env.abd": 0.9830508474576272, + "success_rate.epoch.env.agentgym:alfworld": 0.8653846153846154, + "success_rate.epoch.env.agentgym:sciworld": 0.9649122807017544, + "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, + "success_rate.epoch.env.babyai": 0.9655172413793104, + "success_rate.epoch.env.ded": 0.9534883720930233, + "success_rate.epoch.env.logic": 0.9157372986369269, + "success_rate.epoch.env.math": 0.9746158984635939, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.8597857838364168, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8713179608173415, + "success_rate.epoch.global": 0.9121252121440694, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.998624213836478, + "tokens_p.mean_in_band": 0.6768973214285714, + "tokens_rate.above_band": 0.9784615384615385, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.021538461538461538 + }, + { + "epoch": 2.5671069450362163, + "grad_norm": 136.0080793586695, + "learning_rate": 3.436079238558768e-07, + "loss": 0.2955, + "step": 12050, + "success_rate.epoch.env.abd": 0.9830508474576272, + "success_rate.epoch.env.agentgym:alfworld": 0.8662420382165605, + "success_rate.epoch.env.agentgym:sciworld": 0.9649122807017544, + "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, + "success_rate.epoch.env.babyai": 0.9655172413793104, + "success_rate.epoch.env.ded": 0.9534883720930233, + "success_rate.epoch.env.logic": 0.9158415841584159, + "success_rate.epoch.env.math": 0.9746497665110073, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.8601941747572815, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8714455942102245, + "success_rate.epoch.global": 0.9122906079427818, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9979383680555556, + "tokens_p.mean_in_band": 0.7408854166666666, + "tokens_rate.above_band": 0.9896907216494846, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.010309278350515464 + }, + { + "epoch": 2.568172134639966, + "grad_norm": 82.02322753070825, + "learning_rate": 3.4357678112800344e-07, + "loss": 0.3307, + "step": 12055, + "success_rate.epoch.env.abd": 0.9831932773109243, + "success_rate.epoch.env.agentgym:alfworld": 0.8662420382165605, + "success_rate.epoch.env.agentgym:sciworld": 0.9649122807017544, + "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, + "success_rate.epoch.env.babyai": 0.9655172413793104, + "success_rate.epoch.env.ded": 0.9534883720930233, + "success_rate.epoch.env.logic": 0.9160493827160494, + "success_rate.epoch.env.math": 0.9746835443037974, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.8604651162790697, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8715051349125434, + "success_rate.epoch.global": 0.9124553823032124, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9940664556962026, + "tokens_p.mean_in_band": 0.783203125, + "tokens_rate.above_band": 0.9753086419753086, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.024691358024691357 + }, + { + "epoch": 2.5692373242437156, + "grad_norm": 54.3420676600815, + "learning_rate": 3.43545650369254e-07, + "loss": 0.3046, + "step": 12060, + "success_rate.epoch.env.abd": 0.9832635983263598, + "success_rate.epoch.env.agentgym:alfworld": 0.8662420382165605, + "success_rate.epoch.env.agentgym:sciworld": 0.9649122807017544, + "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, + "success_rate.epoch.env.babyai": 0.9655172413793104, + "success_rate.epoch.env.ded": 0.9536679536679536, + "success_rate.epoch.env.logic": 0.9161528976572133, + "success_rate.epoch.env.math": 0.9740518962075848, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.860318994683422, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8714665574434225, + "success_rate.epoch.global": 0.9122445152822052, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.5, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.86, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9988111413043478, + "tokens_p.mean_in_band": 0.3940972222222222, + "tokens_rate.above_band": 0.9761273209549072, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.023872679045092837 + }, + { + "epoch": 2.570302513847465, + "grad_norm": 216.6815496720348, + "learning_rate": 3.435145316023163e-07, + "loss": 0.2443, + "step": 12065, + "success_rate.epoch.env.abd": 0.9832635983263598, + "success_rate.epoch.env.agentgym:alfworld": 0.8662420382165605, + "success_rate.epoch.env.agentgym:sciworld": 0.9651162790697675, + "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, + "success_rate.epoch.env.babyai": 0.9655172413793104, + "success_rate.epoch.env.ded": 0.9536679536679536, + "success_rate.epoch.env.logic": 0.9163591635916359, + "success_rate.epoch.env.math": 0.9740863787375416, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.860655737704918, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8715376019755943, + "success_rate.epoch.global": 0.9124087591240876, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9971181556195965, + "tokens_p.mean_in_band": 0.8671875, + "tokens_rate.above_band": 0.9914285714285714, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.008571428571428572 + }, + { + "epoch": 2.571367703451214, + "grad_norm": 126.43073394207107, + "learning_rate": 3.4348342484986954e-07, + "loss": 0.2138, + "step": 12070, + "success_rate.epoch.env.abd": 0.9833333333333333, + "success_rate.epoch.env.agentgym:alfworld": 0.8662420382165605, + "success_rate.epoch.env.agentgym:sciworld": 0.9651162790697675, + "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, + "success_rate.epoch.env.babyai": 0.9655172413793104, + "success_rate.epoch.env.ded": 0.9536679536679536, + "success_rate.epoch.env.logic": 0.9164619164619164, + "success_rate.epoch.env.math": 0.9741379310344828, + "success_rate.epoch.env.sat": 0.12903225806451613, + "success_rate.epoch.env.science": 0.860990860990861, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8715884350174249, + "success_rate.epoch.global": 0.9125723893144031, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.994552752293578, + "tokens_p.mean_in_band": 0.87890625, + "tokens_rate.above_band": 0.990909090909091, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.00909090909090909 + }, + { + "epoch": 2.572432893054964, + "grad_norm": 154.7327436373478, + "learning_rate": 3.43452330134584e-07, + "loss": 0.1627, + "step": 12075, + "success_rate.epoch.env.abd": 0.9834710743801653, + "success_rate.epoch.env.agentgym:alfworld": 0.8670886075949367, + "success_rate.epoch.env.agentgym:sciworld": 0.9651162790697675, + "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, + "success_rate.epoch.env.babyai": 0.9655172413793104, + "success_rate.epoch.env.ded": 0.9536679536679536, + "success_rate.epoch.env.logic": 0.9165644171779141, + "success_rate.epoch.env.math": 0.974155069582505, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.8612578012481997, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8713464924612476, + "success_rate.epoch.global": 0.9125489464851763, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8333333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9970346715328468, + "tokens_p.mean_in_band": 0.660888671875, + "tokens_rate.above_band": 0.9625292740046838, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03747072599531616 + }, + { + "epoch": 2.5734980826587135, + "grad_norm": 175.87832004518725, + "learning_rate": 3.434212474791214e-07, + "loss": 0.1704, + "step": 12080, + "success_rate.epoch.env.abd": 0.9834710743801653, + "success_rate.epoch.env.agentgym:alfworld": 0.8670886075949367, + "success_rate.epoch.env.agentgym:sciworld": 0.9653179190751445, + "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, + "success_rate.epoch.env.babyai": 0.9655172413793104, + "success_rate.epoch.env.ded": 0.9536679536679536, + "success_rate.epoch.env.logic": 0.9167686658506732, + "success_rate.epoch.env.math": 0.9741892786234282, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.8615900383141762, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8714167047144327, + "success_rate.epoch.global": 0.9127117066815559, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9952330508474576, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 2.5745632722624627, + "grad_norm": 25.00494534986744, + "learning_rate": 3.4339017690613447e-07, + "loss": 0.1206, + "step": 12085, + "success_rate.epoch.env.abd": 0.9836065573770492, + "success_rate.epoch.env.agentgym:alfworld": 0.8679245283018868, + "success_rate.epoch.env.agentgym:sciworld": 0.9653179190751445, + "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, + "success_rate.epoch.env.babyai": 0.9655172413793104, + "success_rate.epoch.env.ded": 0.9536679536679536, + "success_rate.epoch.env.logic": 0.9167686658506732, + "success_rate.epoch.env.math": 0.9742063492063492, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.8615090735434575, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8714992055795269, + "success_rate.epoch.global": 0.9126880921419283, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env_macro_mean": 0.9583333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9971774193548387, + "tokens_p.mean_in_band": 0.7428385416666666, + "tokens_rate.above_band": 0.9810126582278481, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0189873417721519 + }, + { + "epoch": 2.575628461866212, + "grad_norm": 86.39718190365788, + "learning_rate": 3.4335911843826724e-07, + "loss": 0.2611, + "step": 12090, + "success_rate.epoch.env.abd": 0.9836065573770492, + "success_rate.epoch.env.agentgym:alfworld": 0.8679245283018868, + "success_rate.epoch.env.agentgym:sciworld": 0.9653179190751445, + "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, + "success_rate.epoch.env.babyai": 0.9655172413793104, + "success_rate.epoch.env.ded": 0.9540229885057471, + "success_rate.epoch.env.logic": 0.9156479217603912, + "success_rate.epoch.env.math": 0.9742574257425742, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.8617731172545281, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8714582429426913, + "success_rate.epoch.global": 0.9126645651770814, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.75, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 1.0002976190476192, + "tokens_p.mean_in_band": 0.611328125, + "tokens_rate.above_band": 0.9897172236503856, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.010282776349614395 + }, + { + "epoch": 2.5766936514699617, + "grad_norm": 360.3867822448883, + "learning_rate": 3.433280720981549e-07, + "loss": 0.4524, + "step": 12095, + "success_rate.epoch.env.abd": 0.983739837398374, + "success_rate.epoch.env.agentgym:alfworld": 0.8625, + "success_rate.epoch.env.agentgym:sciworld": 0.9653179190751445, + "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, + "success_rate.epoch.env.babyai": 0.9655172413793104, + "success_rate.epoch.env.ded": 0.9540229885057471, + "success_rate.epoch.env.logic": 0.9158536585365854, + "success_rate.epoch.env.math": 0.974291364535267, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.8614945264159923, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8709736826199449, + "success_rate.epoch.global": 0.9124560429391079, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 0.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.7333333333333333, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9986822289156626, + "tokens_p.mean_in_band": 0.5602678571428571, + "tokens_rate.above_band": 0.9595375722543352, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.04046242774566474 + }, + { + "epoch": 2.5777588410737113, + "grad_norm": 138.625620808841, + "learning_rate": 3.4329703790842384e-07, + "loss": 0.178, + "step": 12100, + "success_rate.epoch.env.abd": 0.9838709677419355, + "success_rate.epoch.env.agentgym:alfworld": 0.8633540372670807, + "success_rate.epoch.env.agentgym:sciworld": 0.9653179190751445, + "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, + "success_rate.epoch.env.babyai": 0.9655172413793104, + "success_rate.epoch.env.ded": 0.9540229885057471, + "success_rate.epoch.env.logic": 0.9159561510353228, + "success_rate.epoch.env.math": 0.9743421052631579, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.8616920152091255, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8710951271317996, + "success_rate.epoch.global": 0.9126177720302975, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9981930272108843, + "tokens_p.mean_in_band": 0.890625, + "tokens_rate.above_band": 0.9966101694915255, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.003389830508474576 + }, + { + "epoch": 2.5788240306774606, + "grad_norm": 20.383299070372694, + "learning_rate": 3.432660158916915e-07, + "loss": 0.2621, + "step": 12105, + "success_rate.epoch.env.abd": 0.9838709677419355, + "success_rate.epoch.env.agentgym:alfworld": 0.8633540372670807, + "success_rate.epoch.env.agentgym:sciworld": 0.9653179190751445, + "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, + "success_rate.epoch.env.babyai": 0.9666666666666667, + "success_rate.epoch.env.ded": 0.9540229885057471, + "success_rate.epoch.env.logic": 0.9159561510353228, + "success_rate.epoch.env.math": 0.9743926460932371, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.862085308056872, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8712399688559072, + "success_rate.epoch.global": 0.9127789046653144, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9978448275862069, + "tokens_p.mean_in_band": 0.6067708333333334, + "tokens_rate.above_band": 0.9854368932038835, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.014563106796116505 + }, + { + "epoch": 2.57988922028121, + "grad_norm": 164.3190243933171, + "learning_rate": 3.4323500607056656e-07, + "loss": 0.1043, + "step": 12110, + "success_rate.epoch.env.abd": 0.9838709677419355, + "success_rate.epoch.env.agentgym:alfworld": 0.8633540372670807, + "success_rate.epoch.env.agentgym:sciworld": 0.9653179190751445, + "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, + "success_rate.epoch.env.babyai": 0.9666666666666667, + "success_rate.epoch.env.ded": 0.9540229885057471, + "success_rate.epoch.env.logic": 0.9162621359223301, + "success_rate.epoch.env.math": 0.9744597249508841, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.8622811168954093, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8712916845452882, + "success_rate.epoch.global": 0.9129394441376771, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9884020618556701, + "tokens_p.mean_in_band": 0.828125, + "tokens_rate.above_band": 0.9797979797979798, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.020202020202020204 + }, + { + "epoch": 2.5809544098849595, + "grad_norm": 252.57942758067566, + "learning_rate": 3.4320400846764867e-07, + "loss": 0.2068, + "step": 12115, + "success_rate.epoch.env.abd": 0.9838709677419355, + "success_rate.epoch.env.agentgym:alfworld": 0.8633540372670807, + "success_rate.epoch.env.agentgym:sciworld": 0.9659090909090909, + "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, + "success_rate.epoch.env.babyai": 0.9666666666666667, + "success_rate.epoch.env.ded": 0.9543726235741445, + "success_rate.epoch.env.logic": 0.9162621359223301, + "success_rate.epoch.env.math": 0.9744931327665141, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.8624763705103969, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8713979998482848, + "success_rate.epoch.global": 0.9130993937167003, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9993822324011572, + "tokens_p.mean_in_band": 0.6627604166666666, + "tokens_rate.above_band": 0.9985556090515166, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0014443909484833895 + }, + { + "epoch": 2.582019599488709, + "grad_norm": 41.87077369208928, + "learning_rate": 3.4317302310552874e-07, + "loss": 0.3314, + "step": 12120, + "success_rate.epoch.env.abd": 0.9838709677419355, + "success_rate.epoch.env.agentgym:alfworld": 0.8633540372670807, + "success_rate.epoch.env.agentgym:sciworld": 0.9661016949152542, + "success_rate.epoch.env.agentgym:textcraft": 0.9736842105263158, + "success_rate.epoch.env.babyai": 0.9666666666666667, + "success_rate.epoch.env.ded": 0.9543726235741445, + "success_rate.epoch.env.logic": 0.9162621359223301, + "success_rate.epoch.env.math": 0.9745430809399478, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.8618576143328619, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8714284574442307, + "success_rate.epoch.global": 0.9128919860627178, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6, + "success_rate.window.env_macro_mean": 0.9, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9982108778625954, + "tokens_p.mean_in_band": 0.380859375, + "tokens_rate.above_band": 0.9562043795620438, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.043795620437956206 + }, + { + "epoch": 2.5830847890924584, + "grad_norm": 20.958413798253876, + "learning_rate": 3.4314205000678866e-07, + "loss": 0.1431, + "step": 12125, + "success_rate.epoch.env.abd": 0.9839357429718876, + "success_rate.epoch.env.agentgym:alfworld": 0.8633540372670807, + "success_rate.epoch.env.agentgym:sciworld": 0.9661016949152542, + "success_rate.epoch.env.agentgym:textcraft": 0.9736842105263158, + "success_rate.epoch.env.babyai": 0.9666666666666667, + "success_rate.epoch.env.ded": 0.9545454545454546, + "success_rate.epoch.env.logic": 0.9162621359223301, + "success_rate.epoch.env.math": 0.974559686888454, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.862312030075188, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8714928781616937, + "success_rate.epoch.global": 0.9130514369394106, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9961301597869507, + "tokens_p.mean_in_band": 0.6979166666666666, + "tokens_rate.above_band": 0.9960212201591512, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.003978779840848806 + }, + { + "epoch": 2.5841499786962077, + "grad_norm": 56.63666614541662, + "learning_rate": 3.431110891940014e-07, + "loss": 0.1411, + "step": 12130, + "success_rate.epoch.env.abd": 0.984, + "success_rate.epoch.env.agentgym:alfworld": 0.8633540372670807, + "success_rate.epoch.env.agentgym:sciworld": 0.9661016949152542, + "success_rate.epoch.env.agentgym:textcraft": 0.9736842105263158, + "success_rate.epoch.env.babyai": 0.9666666666666667, + "success_rate.epoch.env.ded": 0.9547169811320755, + "success_rate.epoch.env.logic": 0.9165659008464329, + "success_rate.epoch.env.math": 0.9745762711864406, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.8625703564727955, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8715669199102783, + "success_rate.epoch.global": 0.9132103051342957, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9986631016042781, + "tokens_p.mean_in_band": 0.8828125, + "tokens_rate.above_band": 0.9946808510638298, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.005319148936170213 + }, + { + "epoch": 2.5852151682999573, + "grad_norm": 89.9056166821671, + "learning_rate": 3.4308014068973094e-07, + "loss": 0.3164, + "step": 12135, + "success_rate.epoch.env.abd": 0.984, + "success_rate.epoch.env.agentgym:alfworld": 0.8633540372670807, + "success_rate.epoch.env.agentgym:sciworld": 0.9662921348314607, + "success_rate.epoch.env.agentgym:textcraft": 0.9736842105263158, + "success_rate.epoch.env.babyai": 0.9666666666666667, + "success_rate.epoch.env.ded": 0.9547169811320755, + "success_rate.epoch.env.logic": 0.9166666666666666, + "success_rate.epoch.env.math": 0.9746258945998699, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.8624239588207768, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8715845955009921, + "success_rate.epoch.global": 0.9131862119277767, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9996830628803245, + "tokens_p.mean_in_band": 0.5598958333333334, + "tokens_rate.above_band": 0.9879759519038076, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.012024048096192385 + }, + { + "epoch": 2.586280357903707, + "grad_norm": 87.15984604395845, + "learning_rate": 3.4304920451653235e-07, + "loss": 0.2294, + "step": 12140, + "success_rate.epoch.env.abd": 0.984, + "success_rate.epoch.env.agentgym:alfworld": 0.8633540372670807, + "success_rate.epoch.env.agentgym:sciworld": 0.9664804469273743, + "success_rate.epoch.env.agentgym:textcraft": 0.9736842105263158, + "success_rate.epoch.env.babyai": 0.9666666666666667, + "success_rate.epoch.env.ded": 0.9547169811320755, + "success_rate.epoch.env.logic": 0.916767189384801, + "success_rate.epoch.env.math": 0.9746753246753247, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.8627450980392157, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8716445413289867, + "success_rate.epoch.global": 0.9133442563262334, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9972133757961783, + "tokens_p.mean_in_band": 0.6809895833333334, + "tokens_rate.above_band": 0.98125, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.01875 + }, + { + "epoch": 2.5873455475074563, + "grad_norm": 24.81964765900838, + "learning_rate": 3.430182806969517e-07, + "loss": 0.3304, + "step": 12145, + "success_rate.epoch.env.abd": 0.984, + "success_rate.epoch.env.agentgym:alfworld": 0.8633540372670807, + "success_rate.epoch.env.agentgym:sciworld": 0.9664804469273743, + "success_rate.epoch.env.agentgym:textcraft": 0.9736842105263158, + "success_rate.epoch.env.babyai": 0.9666666666666667, + "success_rate.epoch.env.ded": 0.9547169811320755, + "success_rate.epoch.env.logic": 0.9157641395908543, + "success_rate.epoch.env.math": 0.9747409326424871, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.862937062937063, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8715767706990835, + "success_rate.epoch.global": 0.9133200072687625, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9980418797953964, + "tokens_p.mean_in_band": 0.5228794642857143, + "tokens_rate.above_band": 0.9654320987654321, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0345679012345679 + }, + { + "epoch": 2.588410737111206, + "grad_norm": 629.4801858140722, + "learning_rate": 3.429873692535261e-07, + "loss": 0.2735, + "step": 12150, + "success_rate.epoch.env.abd": 0.984, + "success_rate.epoch.env.agentgym:alfworld": 0.8633540372670807, + "success_rate.epoch.env.agentgym:sciworld": 0.9666666666666667, + "success_rate.epoch.env.agentgym:textcraft": 0.9736842105263158, + "success_rate.epoch.env.babyai": 0.9666666666666667, + "success_rate.epoch.env.ded": 0.9547169811320755, + "success_rate.epoch.env.logic": 0.9158653846153846, + "success_rate.epoch.env.math": 0.9747736093143596, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.8633193863319386, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8716406311382262, + "success_rate.epoch.global": 0.9134772356248866, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9955357142857143, + "tokens_p.mean_in_band": 0.3828125, + "tokens_rate.above_band": 0.9940828402366864, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.005917159763313609 + }, + { + "epoch": 2.589475926714955, + "grad_norm": 147.1787513620685, + "learning_rate": 3.4295647020878346e-07, + "loss": 0.257, + "step": 12155, + "success_rate.epoch.env.abd": 0.984, + "success_rate.epoch.env.agentgym:alfworld": 0.8633540372670807, + "success_rate.epoch.env.agentgym:sciworld": 0.9666666666666667, + "success_rate.epoch.env.agentgym:textcraft": 0.9736842105263158, + "success_rate.epoch.env.babyai": 0.9666666666666667, + "success_rate.epoch.env.ded": 0.9548872180451128, + "success_rate.epoch.env.logic": 0.9159663865546218, + "success_rate.epoch.env.math": 0.9748873148744366, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.8633828996282528, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8716814000208323, + "success_rate.epoch.global": 0.9136338946224878, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9956018518518519, + "tokens_p.mean_in_band": 0.7633928571428571, + "tokens_rate.above_band": 0.9830097087378641, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.01699029126213592 + }, + { + "epoch": 2.590541116318705, + "grad_norm": 55.17296432064818, + "learning_rate": 3.4292558358524284e-07, + "loss": 0.2617, + "step": 12160, + "success_rate.epoch.env.abd": 0.984, + "success_rate.epoch.env.agentgym:alfworld": 0.8633540372670807, + "success_rate.epoch.env.agentgym:sciworld": 0.9666666666666667, + "success_rate.epoch.env.agentgym:textcraft": 0.9736842105263158, + "success_rate.epoch.env.babyai": 0.9666666666666667, + "success_rate.epoch.env.ded": 0.9548872180451128, + "success_rate.epoch.env.logic": 0.9161676646706587, + "success_rate.epoch.env.math": 0.9749357326478149, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.8636995827538247, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8717328890221946, + "success_rate.epoch.global": 0.9137899873486355, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9933712121212122, + "tokens_p.mean_in_band": 0.7890625, + "tokens_rate.above_band": 0.9801980198019802, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.019801980198019802 + }, + { + "epoch": 2.591606305922454, + "grad_norm": 60.10127876467344, + "learning_rate": 3.4289470940541427e-07, + "loss": 0.1999, + "step": 12165, + "success_rate.epoch.env.abd": 0.984, + "success_rate.epoch.env.agentgym:alfworld": 0.8641975308641975, + "success_rate.epoch.env.agentgym:sciworld": 0.9666666666666667, + "success_rate.epoch.env.agentgym:textcraft": 0.9736842105263158, + "success_rate.epoch.env.babyai": 0.9666666666666667, + "success_rate.epoch.env.ded": 0.9548872180451128, + "success_rate.epoch.env.logic": 0.916267942583732, + "success_rate.epoch.env.math": 0.9749679075738126, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.8640776699029126, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8718559829844925, + "success_rate.epoch.global": 0.913945516868122, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9950132978723404, + "tokens_p.mean_in_band": 0.69765625, + "tokens_rate.above_band": 0.9740932642487047, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.025906735751295335 + }, + { + "epoch": 2.592671495526204, + "grad_norm": 98.1968561237007, + "learning_rate": 3.428638476917985e-07, + "loss": 0.1965, + "step": 12170, + "success_rate.epoch.env.abd": 0.9840637450199203, + "success_rate.epoch.env.agentgym:alfworld": 0.8641975308641975, + "success_rate.epoch.env.agentgym:sciworld": 0.9668508287292817, + "success_rate.epoch.env.agentgym:textcraft": 0.9736842105263158, + "success_rate.epoch.env.babyai": 0.9666666666666667, + "success_rate.epoch.env.ded": 0.9548872180451128, + "success_rate.epoch.env.logic": 0.9164677804295943, + "success_rate.epoch.env.math": 0.9750320102432779, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.8642032332563511, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8719139294346107, + "success_rate.epoch.global": 0.9141004862236629, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9966755319148937, + "tokens_p.mean_in_band": 0.658203125, + "tokens_rate.above_band": 0.9832635983263598, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.016736401673640166 + }, + { + "epoch": 2.593736685129953, + "grad_norm": 66.34300862681035, + "learning_rate": 3.428329984668874e-07, + "loss": 0.224, + "step": 12175, + "success_rate.epoch.env.abd": 0.9840637450199203, + "success_rate.epoch.env.agentgym:alfworld": 0.8641975308641975, + "success_rate.epoch.env.agentgym:sciworld": 0.967032967032967, + "success_rate.epoch.env.agentgym:textcraft": 0.9736842105263158, + "success_rate.epoch.env.babyai": 0.967741935483871, + "success_rate.epoch.env.ded": 0.9548872180451128, + "success_rate.epoch.env.logic": 0.9165673420738975, + "success_rate.epoch.env.math": 0.975095785440613, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.8643911439114391, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.872060170763485, + "success_rate.epoch.global": 0.9142548984360956, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9991861979166666, + "tokens_p.mean_in_band": 0.77734375, + "tokens_rate.above_band": 0.9896907216494846, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.010309278350515464 + }, + { + "epoch": 2.5948018747337027, + "grad_norm": 20.708303786492305, + "learning_rate": 3.428021617531637e-07, + "loss": 0.1319, + "step": 12180, + "success_rate.epoch.env.abd": 0.9840637450199203, + "success_rate.epoch.env.agentgym:alfworld": 0.8641975308641975, + "success_rate.epoch.env.agentgym:sciworld": 0.967032967032967, + "success_rate.epoch.env.agentgym:textcraft": 0.9736842105263158, + "success_rate.epoch.env.babyai": 0.967741935483871, + "success_rate.epoch.env.ded": 0.9550561797752809, + "success_rate.epoch.env.logic": 0.9167657550535078, + "success_rate.epoch.env.math": 0.9751275510204082, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.8647031753336402, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8721248227372826, + "success_rate.epoch.global": 0.9144087565045756, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9942708333333333, + "tokens_p.mean_in_band": 0.80625, + "tokens_rate.above_band": 0.9836065573770492, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.01639344262295082 + }, + { + "epoch": 2.595867064337452, + "grad_norm": 462.0097831592874, + "learning_rate": 3.4277133757310093e-07, + "loss": 0.2027, + "step": 12185, + "success_rate.epoch.env.abd": 0.9841269841269841, + "success_rate.epoch.env.agentgym:alfworld": 0.8641975308641975, + "success_rate.epoch.env.agentgym:sciworld": 0.9672131147540983, + "success_rate.epoch.env.agentgym:textcraft": 0.9736842105263158, + "success_rate.epoch.env.babyai": 0.967741935483871, + "success_rate.epoch.env.ded": 0.9550561797752809, + "success_rate.epoch.env.logic": 0.9167657550535078, + "success_rate.epoch.env.math": 0.9745547073791349, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.8640330730362885, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8720339537272437, + "success_rate.epoch.global": 0.9140247178936056, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 0.75, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.8125, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9949087078651685, + "tokens_p.mean_in_band": 0.5501302083333334, + "tokens_rate.above_band": 0.9368421052631579, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.06315789473684211 + }, + { + "epoch": 2.5969322539412016, + "grad_norm": 163.897267507085, + "learning_rate": 3.427405259491634e-07, + "loss": 0.2552, + "step": 12190, + "success_rate.epoch.env.abd": 0.9841269841269841, + "success_rate.epoch.env.agentgym:alfworld": 0.8641975308641975, + "success_rate.epoch.env.agentgym:sciworld": 0.9672131147540983, + "success_rate.epoch.env.agentgym:textcraft": 0.9743589743589743, + "success_rate.epoch.env.babyai": 0.967741935483871, + "success_rate.epoch.env.ded": 0.9550561797752809, + "success_rate.epoch.env.logic": 0.9156769596199525, + "success_rate.epoch.env.math": 0.974587039390089, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.864406779661017, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8720332270940422, + "success_rate.epoch.global": 0.9139996424101555, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.75, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9952980324074074, + "tokens_p.mean_in_band": 0.5223817567567568, + "tokens_rate.above_band": 0.9589345172031076, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.041065482796892344 + }, + { + "epoch": 2.597997443544951, + "grad_norm": 52.529101453778985, + "learning_rate": 3.427097269038067e-07, + "loss": 0.1746, + "step": 12195, + "success_rate.epoch.env.abd": 0.9841269841269841, + "success_rate.epoch.env.agentgym:alfworld": 0.8641975308641975, + "success_rate.epoch.env.agentgym:sciworld": 0.9672131147540983, + "success_rate.epoch.env.agentgym:textcraft": 0.9743589743589743, + "success_rate.epoch.env.babyai": 0.967741935483871, + "success_rate.epoch.env.ded": 0.9550561797752809, + "success_rate.epoch.env.logic": 0.9159763313609467, + "success_rate.epoch.env.math": 0.9746353836398225, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.8646547782350251, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8720873829635636, + "success_rate.epoch.global": 0.91415313225058, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9916237113402062, + "tokens_p.mean_in_band": 0.86328125, + "tokens_rate.above_band": 0.9797979797979798, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.020202020202020204 + }, + { + "epoch": 2.5990626331487006, + "grad_norm": 151.37354831033662, + "learning_rate": 3.426789404594767e-07, + "loss": 0.2205, + "step": 12200, + "success_rate.epoch.env.abd": 0.9841897233201581, + "success_rate.epoch.env.agentgym:alfworld": 0.8641975308641975, + "success_rate.epoch.env.agentgym:sciworld": 0.967391304347826, + "success_rate.epoch.env.agentgym:textcraft": 0.9743589743589743, + "success_rate.epoch.env.babyai": 0.967741935483871, + "success_rate.epoch.env.ded": 0.9550561797752809, + "success_rate.epoch.env.logic": 0.9159763313609467, + "success_rate.epoch.env.math": 0.9746995572422518, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.8649018712916476, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8721375825495595, + "success_rate.epoch.global": 0.9143060751826118, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.997043918918919, + "tokens_p.mean_in_band": 0.7552083333333334, + "tokens_rate.above_band": 0.9801324503311258, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.019867549668874173 + }, + { + "epoch": 2.60012782275245, + "grad_norm": 89.29630922816644, + "learning_rate": 3.426481666386104e-07, + "loss": 0.2014, + "step": 12205, + "success_rate.epoch.env.abd": 0.9841897233201581, + "success_rate.epoch.env.agentgym:alfworld": 0.8641975308641975, + "success_rate.epoch.env.agentgym:sciworld": 0.9675675675675676, + "success_rate.epoch.env.agentgym:textcraft": 0.9743589743589743, + "success_rate.epoch.env.babyai": 0.967741935483871, + "success_rate.epoch.env.ded": 0.9552238805970149, + "success_rate.epoch.env.logic": 0.9161747343565525, + "success_rate.epoch.env.math": 0.9747315224257739, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.8651480637813211, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8722121757050393, + "success_rate.epoch.global": 0.9144584741241331, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 1.0005926042983566, + "tokens_p.mean_in_band": 0.87890625, + "tokens_rate.above_band": 0.9987373737373737, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0012626262626262627 + }, + { + "epoch": 2.6011930123561995, + "grad_norm": 196.83021355492997, + "learning_rate": 3.426174054636356e-07, + "loss": 0.3583, + "step": 12210, + "success_rate.epoch.env.abd": 0.9841897233201581, + "success_rate.epoch.env.agentgym:alfworld": 0.8641975308641975, + "success_rate.epoch.env.agentgym:sciworld": 0.9675675675675676, + "success_rate.epoch.env.agentgym:textcraft": 0.9743589743589743, + "success_rate.epoch.env.babyai": 0.967741935483871, + "success_rate.epoch.env.ded": 0.9516728624535316, + "success_rate.epoch.env.logic": 0.9161747343565525, + "success_rate.epoch.env.math": 0.9748110831234257, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.8644222020018199, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.871830601230009, + "success_rate.epoch.global": 0.9140777560802414, + "success_rate.window.env.ded": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.3333333333333333, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 0.5833333333333333, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.987717048145225, + "tokens_p.mean_below_band": 2.8312206268310547e-07, + "tokens_p.mean_in_band": 0.4772267206477733, + "tokens_rate.above_band": 0.7186613726602382, + "tokens_rate.below_band": 0.0011344299489506524, + "tokens_rate.in_band": 0.2802041973908111 + }, + { + "epoch": 2.6022582019599487, + "grad_norm": 111.40438070189366, + "learning_rate": 3.425866569569708e-07, + "loss": 0.2828, + "step": 12215, + "success_rate.epoch.env.abd": 0.9841897233201581, + "success_rate.epoch.env.agentgym:alfworld": 0.8641975308641975, + "success_rate.epoch.env.agentgym:sciworld": 0.9675675675675676, + "success_rate.epoch.env.agentgym:textcraft": 0.9743589743589743, + "success_rate.epoch.env.babyai": 0.967741935483871, + "success_rate.epoch.env.ded": 0.9518518518518518, + "success_rate.epoch.env.logic": 0.9166666666666666, + "success_rate.epoch.env.math": 0.9748427672955975, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.8640909090909091, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8718643569545267, + "success_rate.epoch.global": 0.9140528087896509, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9983733733733734, + "tokens_p.mean_in_band": 0.5924479166666666, + "tokens_rate.above_band": 0.9940298507462687, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.005970149253731343 + }, + { + "epoch": 2.6033233915636984, + "grad_norm": 96.6807663742799, + "learning_rate": 3.4255592114102526e-07, + "loss": 0.1794, + "step": 12220, + "success_rate.epoch.env.abd": 0.9841897233201581, + "success_rate.epoch.env.agentgym:alfworld": 0.8641975308641975, + "success_rate.epoch.env.agentgym:sciworld": 0.9675675675675676, + "success_rate.epoch.env.agentgym:textcraft": 0.9743589743589743, + "success_rate.epoch.env.babyai": 0.967741935483871, + "success_rate.epoch.env.ded": 0.9518518518518518, + "success_rate.epoch.env.logic": 0.9168618266978923, + "success_rate.epoch.env.math": 0.9748743718592965, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.8639455782312925, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.871871760021373, + "success_rate.epoch.global": 0.9140279497611887, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9966193931398417, + "tokens_p.mean_in_band": 0.6796875, + "tokens_rate.above_band": 0.994750656167979, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.005249343832020997 + }, + { + "epoch": 2.6043885811674476, + "grad_norm": 143.38548451088715, + "learning_rate": 3.42525198038199e-07, + "loss": 0.2257, + "step": 12225, + "success_rate.epoch.env.abd": 0.9841897233201581, + "success_rate.epoch.env.agentgym:alfworld": 0.8641975308641975, + "success_rate.epoch.env.agentgym:sciworld": 0.9675675675675676, + "success_rate.epoch.env.agentgym:textcraft": 0.9743589743589743, + "success_rate.epoch.env.babyai": 0.967741935483871, + "success_rate.epoch.env.ded": 0.9518518518518518, + "success_rate.epoch.env.logic": 0.9169590643274854, + "success_rate.epoch.env.math": 0.974937343358396, + "success_rate.epoch.env.sat": 0.12121212121212122, + "success_rate.epoch.env.science": 0.8641919420552286, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8715643685818047, + "success_rate.epoch.global": 0.9140031785272824, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.75, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9921328671328671, + "tokens_p.mean_in_band": 0.714453125, + "tokens_rate.above_band": 0.8773006134969326, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.12269938650306748 + }, + { + "epoch": 2.6054537707711973, + "grad_norm": 43.62137269560941, + "learning_rate": 3.4249448767088283e-07, + "loss": 0.112, + "step": 12230, + "success_rate.epoch.env.abd": 0.9841897233201581, + "success_rate.epoch.env.agentgym:alfworld": 0.8641975308641975, + "success_rate.epoch.env.agentgym:sciworld": 0.9675675675675676, + "success_rate.epoch.env.agentgym:textcraft": 0.9743589743589743, + "success_rate.epoch.env.babyai": 0.967741935483871, + "success_rate.epoch.env.ded": 0.9518518518518518, + "success_rate.epoch.env.logic": 0.9170560747663551, + "success_rate.epoch.env.math": 0.9749530369442705, + "success_rate.epoch.env.sat": 0.12121212121212122, + "success_rate.epoch.env.science": 0.8642309427153811, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8715781599167953, + "success_rate.epoch.global": 0.9139784946236559, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.875, + "success_rate.window.env_macro_mean": 0.9583333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9954166666666666, + "tokens_p.mean_in_band": 0.6376953125, + "tokens_rate.above_band": 0.974025974025974, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.025974025974025976 + }, + { + "epoch": 2.606518960374947, + "grad_norm": 95.49498414806867, + "learning_rate": 3.4246379006145827e-07, + "loss": 0.2985, + "step": 12235, + "success_rate.epoch.env.abd": 0.984251968503937, + "success_rate.epoch.env.agentgym:alfworld": 0.8641975308641975, + "success_rate.epoch.env.agentgym:sciworld": 0.9675675675675676, + "success_rate.epoch.env.agentgym:textcraft": 0.975, + "success_rate.epoch.env.babyai": 0.967741935483871, + "success_rate.epoch.env.ded": 0.9518518518518518, + "success_rate.epoch.env.logic": 0.9159859976662778, + "success_rate.epoch.env.math": 0.9749843652282677, + "success_rate.epoch.env.sat": 0.12121212121212122, + "success_rate.epoch.env.science": 0.8631863186318632, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.871452696091814, + "success_rate.epoch.global": 0.9134260073904628, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.4, + "success_rate.window.env_macro_mean": 0.6799999999999999, + "success_rate.window.global": 0.6, + "tokens_p.mean_above_band": 0.9967912946428571, + "tokens_p.mean_in_band": 0.5739889705882353, + "tokens_rate.above_band": 0.9294605809128631, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.07053941908713693 + }, + { + "epoch": 2.6075841499786963, + "grad_norm": 87.8559922223753, + "learning_rate": 3.4243310523229753e-07, + "loss": 0.2659, + "step": 12240, + "success_rate.epoch.env.abd": 0.984251968503937, + "success_rate.epoch.env.agentgym:alfworld": 0.8641975308641975, + "success_rate.epoch.env.agentgym:sciworld": 0.967741935483871, + "success_rate.epoch.env.agentgym:textcraft": 0.975609756097561, + "success_rate.epoch.env.babyai": 0.967741935483871, + "success_rate.epoch.env.ded": 0.9518518518518518, + "success_rate.epoch.env.logic": 0.9140534262485482, + "success_rate.epoch.env.math": 0.9750467872738615, + "success_rate.epoch.env.sat": 0.12121212121212122, + "success_rate.epoch.env.science": 0.8631863186318632, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8713539665137895, + "success_rate.epoch.global": 0.9132267697171965, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 1.0005063657407407, + "tokens_p.mean_in_band": 0.41271551724137934, + "tokens_rate.above_band": 0.9781132075471698, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02188679245283019 + }, + { + "epoch": 2.6086493395824455, + "grad_norm": 129.2130996445585, + "learning_rate": 3.424024332057634e-07, + "loss": 0.4608, + "step": 12245, + "success_rate.epoch.env.abd": 0.984313725490196, + "success_rate.epoch.env.agentgym:alfworld": 0.8641975308641975, + "success_rate.epoch.env.agentgym:sciworld": 0.967741935483871, + "success_rate.epoch.env.agentgym:textcraft": 0.975609756097561, + "success_rate.epoch.env.babyai": 0.967741935483871, + "success_rate.epoch.env.ded": 0.9520295202952029, + "success_rate.epoch.env.logic": 0.91415313225058, + "success_rate.epoch.env.math": 0.9751088985687617, + "success_rate.epoch.env.sat": 0.12121212121212122, + "success_rate.epoch.env.science": 0.8633093525179856, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8714016280240319, + "success_rate.epoch.global": 0.9133637320238512, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9989388794567062, + "tokens_p.mean_in_band": 0.8125, + "tokens_rate.above_band": 0.9949324324324325, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.005067567567567568 + }, + { + "epoch": 2.609714529186195, + "grad_norm": 79.208831291153, + "learning_rate": 3.423717740042095e-07, + "loss": 0.2418, + "step": 12250, + "success_rate.epoch.env.abd": 0.984313725490196, + "success_rate.epoch.env.agentgym:alfworld": 0.8641975308641975, + "success_rate.epoch.env.agentgym:sciworld": 0.967741935483871, + "success_rate.epoch.env.agentgym:textcraft": 0.975609756097561, + "success_rate.epoch.env.babyai": 0.967741935483871, + "success_rate.epoch.env.ded": 0.9520295202952029, + "success_rate.epoch.env.logic": 0.9143518518518519, + "success_rate.epoch.env.math": 0.9751707014276847, + "success_rate.epoch.env.sat": 0.12121212121212122, + "success_rate.epoch.env.science": 0.8635547576301615, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8714476214397018, + "success_rate.epoch.global": 0.913515406162465, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9946428571428572, + "tokens_p.mean_in_band": 0.8072916666666666, + "tokens_rate.above_band": 0.9668508287292817, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03314917127071823 + }, + { + "epoch": 2.610779718789945, + "grad_norm": 38.64000966474176, + "learning_rate": 3.4234112764998004e-07, + "loss": 0.1079, + "step": 12255, + "success_rate.epoch.env.abd": 0.984375, + "success_rate.epoch.env.agentgym:alfworld": 0.8650306748466258, + "success_rate.epoch.env.agentgym:sciworld": 0.967741935483871, + "success_rate.epoch.env.agentgym:textcraft": 0.975609756097561, + "success_rate.epoch.env.babyai": 0.967741935483871, + "success_rate.epoch.env.ded": 0.9522058823529411, + "success_rate.epoch.env.logic": 0.9144508670520232, + "success_rate.epoch.env.math": 0.9752014879107254, + "success_rate.epoch.env.sat": 0.12121212121212122, + "success_rate.epoch.env.science": 0.8637992831541219, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8715789948721693, + "success_rate.epoch.global": 0.9136665501572877, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9996522257551669, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 2.611844908393694, + "grad_norm": 453.03288817797215, + "learning_rate": 3.4231049416540986e-07, + "loss": 0.3774, + "step": 12260, + "success_rate.epoch.env.abd": 0.984375, + "success_rate.epoch.env.agentgym:alfworld": 0.8650306748466258, + "success_rate.epoch.env.agentgym:sciworld": 0.967741935483871, + "success_rate.epoch.env.agentgym:textcraft": 0.975609756097561, + "success_rate.epoch.env.babyai": 0.967741935483871, + "success_rate.epoch.env.ded": 0.9523809523809523, + "success_rate.epoch.env.logic": 0.9145496535796767, + "success_rate.epoch.env.math": 0.9752628324056896, + "success_rate.epoch.env.sat": 0.12121212121212122, + "success_rate.epoch.env.science": 0.8635957066189625, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8715909607372119, + "success_rate.epoch.global": 0.9136427076064201, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9375, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9963235294117647, + "tokens_p.mean_in_band": 0.3515625, + "tokens_rate.above_band": 0.9714285714285714, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02857142857142857 + }, + { + "epoch": 2.6129100979974433, + "grad_norm": 125.18524775678662, + "learning_rate": 3.422798735728244e-07, + "loss": 0.2371, + "step": 12265, + "success_rate.epoch.env.abd": 0.984375, + "success_rate.epoch.env.agentgym:alfworld": 0.8658536585365854, + "success_rate.epoch.env.agentgym:sciworld": 0.967741935483871, + "success_rate.epoch.env.agentgym:textcraft": 0.975609756097561, + "success_rate.epoch.env.babyai": 0.967741935483871, + "success_rate.epoch.env.ded": 0.9523809523809523, + "success_rate.epoch.env.logic": 0.9147465437788018, + "success_rate.epoch.env.math": 0.9753238741517581, + "success_rate.epoch.env.sat": 0.12121212121212122, + "success_rate.epoch.env.science": 0.8633318445734703, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8716652383362719, + "success_rate.epoch.global": 0.9136189481017067, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9966836734693878, + "tokens_p.mean_in_band": 0.6640625, + "tokens_rate.above_band": 0.9760956175298805, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02390438247011952 + }, + { + "epoch": 2.613975287601193, + "grad_norm": 93.96632888151346, + "learning_rate": 3.422492658945397e-07, + "loss": 0.2015, + "step": 12270, + "success_rate.epoch.env.abd": 0.9844357976653697, + "success_rate.epoch.env.agentgym:alfworld": 0.8658536585365854, + "success_rate.epoch.env.agentgym:sciworld": 0.9679144385026738, + "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, + "success_rate.epoch.env.babyai": 0.967741935483871, + "success_rate.epoch.env.ded": 0.9523809523809523, + "success_rate.epoch.env.logic": 0.9147465437788018, + "success_rate.epoch.env.math": 0.9753846153846154, + "success_rate.epoch.env.sat": 0.12121212121212122, + "success_rate.epoch.env.science": 0.863514719000892, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8717613871033055, + "success_rate.epoch.global": 0.913769123783032, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9983212809917356, + "tokens_p.mean_in_band": 0.73828125, + "tokens_rate.above_band": 0.983739837398374, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.016260162601626018 + }, + { + "epoch": 2.6150404772049427, + "grad_norm": 120.6420295419577, + "learning_rate": 3.422186711528625e-07, + "loss": 0.1961, + "step": 12275, + "success_rate.epoch.env.abd": 0.9844961240310077, + "success_rate.epoch.env.agentgym:alfworld": 0.8658536585365854, + "success_rate.epoch.env.agentgym:sciworld": 0.9679144385026738, + "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, + "success_rate.epoch.env.babyai": 0.967741935483871, + "success_rate.epoch.env.ded": 0.9523809523809523, + "success_rate.epoch.env.logic": 0.9147465437788018, + "success_rate.epoch.env.math": 0.9754299754299754, + "success_rate.epoch.env.sat": 0.12121212121212122, + "success_rate.epoch.env.science": 0.8638790035587188, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8718041117368348, + "success_rate.epoch.global": 0.9139187782020132, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9923537234042553, + "tokens_p.mean_in_band": 0.83984375, + "tokens_rate.above_band": 0.9690721649484536, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.030927835051546393 + }, + { + "epoch": 2.616105666808692, + "grad_norm": 82.01705876916566, + "learning_rate": 3.4218808937009e-07, + "loss": 0.3291, + "step": 12280, + "success_rate.epoch.env.abd": 0.9844961240310077, + "success_rate.epoch.env.agentgym:alfworld": 0.8658536585365854, + "success_rate.epoch.env.agentgym:sciworld": 0.9679144385026738, + "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, + "success_rate.epoch.env.babyai": 0.967741935483871, + "success_rate.epoch.env.ded": 0.9523809523809523, + "success_rate.epoch.env.logic": 0.9138920780711826, + "success_rate.epoch.env.math": 0.9755052051439069, + "success_rate.epoch.env.sat": 0.12121212121212122, + "success_rate.epoch.env.science": 0.864, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8717442717775253, + "success_rate.epoch.global": 0.9138946638946639, + "success_rate.window.env.logic": 0.6666666666666666, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8888888888888888, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9982114467408585, + "tokens_p.mean_in_band": 0.48662109375, + "tokens_rate.above_band": 0.9402092675635276, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.059790732436472344 + }, + { + "epoch": 2.617170856412441, + "grad_norm": 180.05305984008223, + "learning_rate": 3.4215752056851e-07, + "loss": 0.2132, + "step": 12285, + "success_rate.epoch.env.abd": 0.9845559845559846, + "success_rate.epoch.env.agentgym:alfworld": 0.8658536585365854, + "success_rate.epoch.env.agentgym:sciworld": 0.9679144385026738, + "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, + "success_rate.epoch.env.babyai": 0.967741935483871, + "success_rate.epoch.env.ded": 0.9523809523809523, + "success_rate.epoch.env.logic": 0.9140893470790378, + "success_rate.epoch.env.math": 0.9755501222493888, + "success_rate.epoch.env.sat": 0.12121212121212122, + "success_rate.epoch.env.science": 0.8642413487133984, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8717936713549538, + "success_rate.epoch.global": 0.914043583535109, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9969135802469136, + "tokens_p.mean_in_band": 0.59296875, + "tokens_rate.above_band": 0.9418604651162791, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.05813953488372093 + }, + { + "epoch": 2.618236046016191, + "grad_norm": 38.28839485192977, + "learning_rate": 3.4212696477040066e-07, + "loss": 0.2753, + "step": 12290, + "success_rate.epoch.env.abd": 0.9845559845559846, + "success_rate.epoch.env.agentgym:alfworld": 0.8658536585365854, + "success_rate.epoch.env.agentgym:sciworld": 0.9679144385026738, + "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, + "success_rate.epoch.env.babyai": 0.967741935483871, + "success_rate.epoch.env.ded": 0.9525547445255474, + "success_rate.epoch.env.logic": 0.9140893470790378, + "success_rate.epoch.env.math": 0.975594874923734, + "success_rate.epoch.env.sat": 0.12121212121212122, + "success_rate.epoch.env.science": 0.863716814159292, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8717658541063021, + "success_rate.epoch.global": 0.9138466850828729, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.8888888888888888, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9954308093994778, + "tokens_p.mean_in_band": 0.5618489583333334, + "tokens_rate.above_band": 0.9845758354755784, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.015424164524421594 + }, + { + "epoch": 2.6193012356199405, + "grad_norm": 74.68936543167256, + "learning_rate": 3.420964219980311e-07, + "loss": 0.1572, + "step": 12295, + "success_rate.epoch.env.abd": 0.9846153846153847, + "success_rate.epoch.env.agentgym:alfworld": 0.8658536585365854, + "success_rate.epoch.env.agentgym:sciworld": 0.9679144385026738, + "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, + "success_rate.epoch.env.babyai": 0.967741935483871, + "success_rate.epoch.env.ded": 0.9525547445255474, + "success_rate.epoch.env.logic": 0.9144811858608894, + "success_rate.epoch.env.math": 0.975594874923734, + "success_rate.epoch.env.sat": 0.12121212121212122, + "success_rate.epoch.env.science": 0.8640176600441501, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8718342254450394, + "success_rate.epoch.global": 0.9139951740779042, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9922680412371134, + "tokens_p.mean_in_band": 0.7869318181818182, + "tokens_rate.above_band": 0.8981481481481481, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.10185185185185185 + }, + { + "epoch": 2.62036642522369, + "grad_norm": 199.40942827091234, + "learning_rate": 3.4206589227366043e-07, + "loss": 0.2017, + "step": 12300, + "success_rate.epoch.env.abd": 0.9846743295019157, + "success_rate.epoch.env.agentgym:alfworld": 0.8658536585365854, + "success_rate.epoch.env.agentgym:sciworld": 0.9680851063829787, + "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, + "success_rate.epoch.env.babyai": 0.967741935483871, + "success_rate.epoch.env.ded": 0.9525547445255474, + "success_rate.epoch.env.logic": 0.9145785876993167, + "success_rate.epoch.env.math": 0.9750456482045039, + "success_rate.epoch.env.sat": 0.12121212121212122, + "success_rate.epoch.env.science": 0.8641975308641975, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8718303762365012, + "success_rate.epoch.global": 0.9139710942876806, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.75, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9915644171779141, + "tokens_p.mean_in_band": 0.7388392857142857, + "tokens_rate.above_band": 0.9588235294117647, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.041176470588235294 + }, + { + "epoch": 2.621431614827439, + "grad_norm": 161.56315644597106, + "learning_rate": 3.420353756195386e-07, + "loss": 0.178, + "step": 12305, + "success_rate.epoch.env.abd": 0.9847908745247148, + "success_rate.epoch.env.agentgym:alfworld": 0.8658536585365854, + "success_rate.epoch.env.agentgym:sciworld": 0.9680851063829787, + "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, + "success_rate.epoch.env.babyai": 0.967741935483871, + "success_rate.epoch.env.ded": 0.9525547445255474, + "success_rate.epoch.env.logic": 0.9146757679180887, + "success_rate.epoch.env.math": 0.9750911300121506, + "success_rate.epoch.env.sat": 0.12121212121212122, + "success_rate.epoch.env.science": 0.8644366197183099, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8718756758640768, + "success_rate.epoch.global": 0.9141188594984542, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9954954954954955, + "tokens_p.mean_in_band": 0.83671875, + "tokens_rate.above_band": 0.9568965517241379, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.04310344827586207 + }, + { + "epoch": 2.6224968044311887, + "grad_norm": 1129.3170743467604, + "learning_rate": 3.4200487205790593e-07, + "loss": 0.3213, + "step": 12310, + "success_rate.epoch.env.abd": 0.9847908745247148, + "success_rate.epoch.env.agentgym:alfworld": 0.8658536585365854, + "success_rate.epoch.env.agentgym:sciworld": 0.9682539682539683, + "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, + "success_rate.epoch.env.babyai": 0.967741935483871, + "success_rate.epoch.env.ded": 0.9528985507246377, + "success_rate.epoch.env.logic": 0.9147727272727273, + "success_rate.epoch.env.math": 0.975121359223301, + "success_rate.epoch.env.sat": 0.12121212121212122, + "success_rate.epoch.env.science": 0.8642355008787346, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8719155611182853, + "success_rate.epoch.global": 0.9140946502057613, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9984206989247312, + "tokens_p.mean_in_band": 0.6102764423076923, + "tokens_rate.above_band": 0.9862142099681867, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.013785790031813362 + }, + { + "epoch": 2.6235619940349384, + "grad_norm": 20.309635711448852, + "learning_rate": 3.4197438161099324e-07, + "loss": 0.125, + "step": 12315, + "success_rate.epoch.env.abd": 0.9849056603773585, + "success_rate.epoch.env.agentgym:alfworld": 0.8658536585365854, + "success_rate.epoch.env.agentgym:sciworld": 0.9682539682539683, + "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, + "success_rate.epoch.env.babyai": 0.967741935483871, + "success_rate.epoch.env.ded": 0.9494584837545126, + "success_rate.epoch.env.logic": 0.9149659863945578, + "success_rate.epoch.env.math": 0.9751665657177468, + "success_rate.epoch.env.sat": 0.12121212121212122, + "success_rate.epoch.env.science": 0.8643546971027217, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8716457775476291, + "success_rate.epoch.global": 0.9140705237932215, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 0.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9923189769707705, + "tokens_p.mean_in_band": 0.6464285714285715, + "tokens_rate.above_band": 0.9416180150125104, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.058381984987489574 + }, + { + "epoch": 2.6246271836386876, + "grad_norm": 50.60032986386894, + "learning_rate": 3.4194390430102167e-07, + "loss": 0.3125, + "step": 12320, + "success_rate.epoch.env.abd": 0.9850187265917603, + "success_rate.epoch.env.agentgym:alfworld": 0.8666666666666667, + "success_rate.epoch.env.agentgym:sciworld": 0.9682539682539683, + "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, + "success_rate.epoch.env.babyai": 0.967741935483871, + "success_rate.epoch.env.ded": 0.9494584837545126, + "success_rate.epoch.env.logic": 0.9149659863945578, + "success_rate.epoch.env.math": 0.975211608222491, + "success_rate.epoch.env.sat": 0.12121212121212122, + "success_rate.epoch.env.science": 0.8641542506573181, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8717158384934313, + "success_rate.epoch.global": 0.9140464798359536, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9375, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9960411051212938, + "tokens_p.mean_in_band": 0.564453125, + "tokens_rate.above_band": 0.9867021276595744, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.013297872340425532 + }, + { + "epoch": 2.6256923732424373, + "grad_norm": 552.7538325052278, + "learning_rate": 3.419134401502028e-07, + "loss": 0.3424, + "step": 12325, + "success_rate.epoch.env.abd": 0.9851851851851852, + "success_rate.epoch.env.agentgym:alfworld": 0.8674698795180723, + "success_rate.epoch.env.agentgym:sciworld": 0.9682539682539683, + "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, + "success_rate.epoch.env.babyai": 0.967741935483871, + "success_rate.epoch.env.ded": 0.9494584837545126, + "success_rate.epoch.env.logic": 0.9149659863945578, + "success_rate.epoch.env.math": 0.9752714113389626, + "success_rate.epoch.env.sat": 0.12121212121212122, + "success_rate.epoch.env.science": 0.8642732049036778, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8718202411123095, + "success_rate.epoch.global": 0.9141931081542136, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9982775590551181, + "tokens_p.mean_in_band": 0.673828125, + "tokens_rate.above_band": 0.9883268482490273, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.011673151750972763 + }, + { + "epoch": 2.6267575628461866, + "grad_norm": 78.88062688923914, + "learning_rate": 3.418829891807389e-07, + "loss": 0.1106, + "step": 12330, + "success_rate.epoch.env.abd": 0.9851851851851852, + "success_rate.epoch.env.agentgym:alfworld": 0.8682634730538922, + "success_rate.epoch.env.agentgym:sciworld": 0.9682539682539683, + "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, + "success_rate.epoch.env.babyai": 0.967741935483871, + "success_rate.epoch.env.ded": 0.9494584837545126, + "success_rate.epoch.env.logic": 0.9150622876557192, + "success_rate.epoch.env.math": 0.9747140276941602, + "success_rate.epoch.env.sat": 0.12121212121212122, + "success_rate.epoch.env.science": 0.8641328090869376, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8718377061428039, + "success_rate.epoch.global": 0.9139986376021798, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.6666666666666666, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.8666666666666667, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9975961538461539, + "tokens_p.mean_in_band": 0.6390625, + "tokens_rate.above_band": 0.9676375404530745, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.032362459546925564 + }, + { + "epoch": 2.6278227524499362, + "grad_norm": 326.9640938071797, + "learning_rate": 3.4185255141482226e-07, + "loss": 0.3564, + "step": 12335, + "success_rate.epoch.env.abd": 0.9851851851851852, + "success_rate.epoch.env.agentgym:alfworld": 0.8682634730538922, + "success_rate.epoch.env.agentgym:sciworld": 0.968421052631579, + "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, + "success_rate.epoch.env.babyai": 0.967741935483871, + "success_rate.epoch.env.ded": 0.9496402877697842, + "success_rate.epoch.env.logic": 0.9150622876557192, + "success_rate.epoch.env.math": 0.9741741741741742, + "success_rate.epoch.env.sat": 0.12121212121212122, + "success_rate.epoch.env.science": 0.8643698211949411, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8718418922319767, + "success_rate.epoch.global": 0.9139748384903095, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 0.75, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9375, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9975833333333334, + "tokens_p.mean_in_band": 0.52578125, + "tokens_rate.above_band": 0.9868421052631579, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.013157894736842105 + }, + { + "epoch": 2.6288879420536855, + "grad_norm": 191.830844701382, + "learning_rate": 3.4182212687463575e-07, + "loss": 0.2215, + "step": 12340, + "success_rate.epoch.env.abd": 0.9851851851851852, + "success_rate.epoch.env.agentgym:alfworld": 0.8682634730538922, + "success_rate.epoch.env.agentgym:sciworld": 0.9685863874345549, + "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, + "success_rate.epoch.env.babyai": 0.967741935483871, + "success_rate.epoch.env.ded": 0.9496402877697842, + "success_rate.epoch.env.logic": 0.9150622876557192, + "success_rate.epoch.env.math": 0.9742051589682064, + "success_rate.epoch.env.sat": 0.12121212121212122, + "success_rate.epoch.env.science": 0.8647826086956522, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8718972656044968, + "success_rate.epoch.global": 0.9141208418194161, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9960106382978723, + "tokens_p.mean_in_band": 0.6927083333333334, + "tokens_rate.above_band": 0.9591836734693877, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.04081632653061224 + }, + { + "epoch": 2.629953131657435, + "grad_norm": 0.0, + "learning_rate": 3.4179171558235253e-07, + "loss": 0.3357, + "step": 12345, + "success_rate.epoch.env.abd": 0.9851851851851852, + "success_rate.epoch.env.agentgym:alfworld": 0.8682634730538922, + "success_rate.epoch.env.agentgym:sciworld": 0.9685863874345549, + "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, + "success_rate.epoch.env.babyai": 0.96875, + "success_rate.epoch.env.ded": 0.9498207885304659, + "success_rate.epoch.env.logic": 0.9142212189616253, + "success_rate.epoch.env.math": 0.9742206235011991, + "success_rate.epoch.env.sat": 0.12121212121212122, + "success_rate.epoch.env.science": 0.8650173611111112, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8719516031982392, + "success_rate.epoch.global": 0.9140969162995595, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.6666666666666666, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.995336859688196, + "tokens_p.mean_in_band": 0.71703125, + "tokens_rate.above_band": 0.9472573839662447, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.052742616033755275 + }, + { + "epoch": 2.6310183212611844, + "grad_norm": 101.32455904266345, + "learning_rate": 3.417613175601361e-07, + "loss": 0.1704, + "step": 12350, + "success_rate.epoch.env.abd": 0.9851851851851852, + "success_rate.epoch.env.agentgym:alfworld": 0.8682634730538922, + "success_rate.epoch.env.agentgym:sciworld": 0.9685863874345549, + "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, + "success_rate.epoch.env.babyai": 0.96875, + "success_rate.epoch.env.ded": 0.9498207885304659, + "success_rate.epoch.env.logic": 0.9142212189616253, + "success_rate.epoch.env.math": 0.974251497005988, + "success_rate.epoch.env.sat": 0.12121212121212122, + "success_rate.epoch.env.science": 0.8650519031141869, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8719575500625905, + "success_rate.epoch.global": 0.9140730717185386, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.875, + "success_rate.window.env_macro_mean": 0.9375, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9902173913043478, + "tokens_p.mean_in_band": 0.6592881944444444, + "tokens_rate.above_band": 0.9274193548387096, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.07258064516129033 + }, + { + "epoch": 2.632083510864934, + "grad_norm": 35.22862126377775, + "learning_rate": 3.417309328301404e-07, + "loss": 0.177, + "step": 12355, + "success_rate.epoch.env.abd": 0.985239852398524, + "success_rate.epoch.env.agentgym:alfworld": 0.8682634730538922, + "success_rate.epoch.env.agentgym:sciworld": 0.96875, + "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, + "success_rate.epoch.env.babyai": 0.96875, + "success_rate.epoch.env.ded": 0.9498207885304659, + "success_rate.epoch.env.logic": 0.9142212189616253, + "success_rate.epoch.env.math": 0.9743130227001194, + "success_rate.epoch.env.sat": 0.12121212121212122, + "success_rate.epoch.env.science": 0.8652849740932642, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.87200417519459, + "success_rate.epoch.global": 0.9142181695373185, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.996875, + "tokens_p.mean_in_band": 0.82421875, + "tokens_rate.above_band": 0.9815950920245399, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.018404907975460124 + }, + { + "epoch": 2.6331487004686833, + "grad_norm": 379.97062484076656, + "learning_rate": 3.4170056141450957e-07, + "loss": 0.427, + "step": 12360, + "success_rate.epoch.env.abd": 0.985239852398524, + "success_rate.epoch.env.agentgym:alfworld": 0.8682634730538922, + "success_rate.epoch.env.agentgym:sciworld": 0.96875, + "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, + "success_rate.epoch.env.babyai": 0.96875, + "success_rate.epoch.env.ded": 0.9498207885304659, + "success_rate.epoch.env.logic": 0.9132882882882883, + "success_rate.epoch.env.math": 0.9743283582089552, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.8656330749354005, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8716283064026846, + "success_rate.epoch.global": 0.914025623735671, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.625, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9956098942598187, + "tokens_p.mean_in_band": 0.5803052325581395, + "tokens_rate.above_band": 0.9390070921985816, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.06099290780141844 + }, + { + "epoch": 2.634213890072433, + "grad_norm": 76.11846409403248, + "learning_rate": 3.416702033353781e-07, + "loss": 0.2087, + "step": 12365, + "success_rate.epoch.env.abd": 0.985239852398524, + "success_rate.epoch.env.agentgym:alfworld": 0.8690476190476191, + "success_rate.epoch.env.agentgym:sciworld": 0.96875, + "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, + "success_rate.epoch.env.babyai": 0.96875, + "success_rate.epoch.env.ded": 0.9498207885304659, + "success_rate.epoch.env.logic": 0.9134831460674158, + "success_rate.epoch.env.math": 0.9743895175699822, + "success_rate.epoch.env.sat": 0.11764705882352941, + "success_rate.epoch.env.science": 0.8658064516129033, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8717386282037196, + "success_rate.epoch.global": 0.9141703130259172, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9968377976190477, + "tokens_p.mean_in_band": 0.884765625, + "tokens_rate.above_band": 0.9882352941176471, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.011764705882352941 + }, + { + "epoch": 2.6352790796761822, + "grad_norm": 27.976698684638354, + "learning_rate": 3.416398586148707e-07, + "loss": 0.1146, + "step": 12370, + "success_rate.epoch.env.abd": 0.985239852398524, + "success_rate.epoch.env.agentgym:alfworld": 0.8698224852071006, + "success_rate.epoch.env.agentgym:sciworld": 0.96875, + "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, + "success_rate.epoch.env.babyai": 0.96875, + "success_rate.epoch.env.ded": 0.9498207885304659, + "success_rate.epoch.env.logic": 0.9135802469135802, + "success_rate.epoch.env.math": 0.974435196195006, + "success_rate.epoch.env.sat": 0.11428571428571428, + "success_rate.epoch.env.science": 0.8660369257191928, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8715374259490964, + "success_rate.epoch.global": 0.9141465053763441, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9963417658730159, + "tokens_p.mean_in_band": 0.6346354166666667, + "tokens_rate.above_band": 0.9710982658959537, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.028901734104046242 + }, + { + "epoch": 2.636344269279932, + "grad_norm": 88.06362553512136, + "learning_rate": 3.4160952727510236e-07, + "loss": 0.4213, + "step": 12375, + "success_rate.epoch.env.abd": 0.985239852398524, + "success_rate.epoch.env.agentgym:alfworld": 0.8705882352941177, + "success_rate.epoch.env.agentgym:sciworld": 0.96875, + "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, + "success_rate.epoch.env.babyai": 0.96875, + "success_rate.epoch.env.ded": 0.95, + "success_rate.epoch.env.logic": 0.9136771300448431, + "success_rate.epoch.env.math": 0.974435196195006, + "success_rate.epoch.env.sat": 0.11428571428571428, + "success_rate.epoch.env.science": 0.8660102739726028, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8716297162164803, + "success_rate.epoch.global": 0.9141227775914122, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.science": 0.8571428571428571, + "success_rate.window.env_macro_mean": 0.9642857142857143, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9972644658753709, + "tokens_p.mean_in_band": 0.53984375, + "tokens_rate.above_band": 0.9853801169590644, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.014619883040935672 + }, + { + "epoch": 2.637409458883681, + "grad_norm": 1299.8528138523136, + "learning_rate": 3.4157920933817844e-07, + "loss": 0.5056, + "step": 12380, + "success_rate.epoch.env.abd": 0.985239852398524, + "success_rate.epoch.env.agentgym:alfworld": 0.8705882352941177, + "success_rate.epoch.env.agentgym:sciworld": 0.96875, + "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, + "success_rate.epoch.env.babyai": 0.96875, + "success_rate.epoch.env.ded": 0.9501779359430605, + "success_rate.epoch.env.logic": 0.9138702460850112, + "success_rate.epoch.env.math": 0.9744807121661722, + "success_rate.epoch.env.sat": 0.11428571428571428, + "success_rate.epoch.env.science": 0.8658119658119658, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8716495580159129, + "success_rate.epoch.global": 0.9140991292699263, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9375, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9959677419354839, + "tokens_p.mean_in_band": 0.61328125, + "tokens_rate.above_band": 0.9850467289719627, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.014953271028037384 + }, + { + "epoch": 2.638474648487431, + "grad_norm": 42.82145309479514, + "learning_rate": 3.415489048261944e-07, + "loss": 0.1889, + "step": 12385, + "success_rate.epoch.env.abd": 0.985239852398524, + "success_rate.epoch.env.agentgym:alfworld": 0.8705882352941177, + "success_rate.epoch.env.agentgym:sciworld": 0.96875, + "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, + "success_rate.epoch.env.babyai": 0.9696969696969697, + "success_rate.epoch.env.ded": 0.950530035335689, + "success_rate.epoch.env.logic": 0.9129464285714286, + "success_rate.epoch.env.math": 0.9745260663507109, + "success_rate.epoch.env.sat": 0.11428571428571428, + "success_rate.epoch.env.science": 0.8659265584970111, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8716982124200583, + "success_rate.epoch.global": 0.9140755600133734, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.996821530418251, + "tokens_p.mean_in_band": 0.6125, + "tokens_rate.above_band": 0.9905838041431262, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.009416195856873822 + }, + { + "epoch": 2.63953983809118, + "grad_norm": 354.2634797140363, + "learning_rate": 3.4151861376123587e-07, + "loss": 0.3427, + "step": 12390, + "success_rate.epoch.env.abd": 0.985239852398524, + "success_rate.epoch.env.agentgym:alfworld": 0.8713450292397661, + "success_rate.epoch.env.agentgym:sciworld": 0.96875, + "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, + "success_rate.epoch.env.babyai": 0.9696969696969697, + "success_rate.epoch.env.ded": 0.9508771929824561, + "success_rate.epoch.env.logic": 0.9130434782608695, + "success_rate.epoch.env.math": 0.9745411486086442, + "success_rate.epoch.env.sat": 0.1388888888888889, + "success_rate.epoch.env.science": 0.86615515771526, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8740661994528959, + "success_rate.epoch.global": 0.914218958611482, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9996171516079633, + "tokens_p.mean_in_band": 0.70703125, + "tokens_rate.above_band": 0.9994897959183674, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0005102040816326531 + }, + { + "epoch": 2.6406050276949298, + "grad_norm": 260.97235863946565, + "learning_rate": 3.414883361653788e-07, + "loss": 0.2062, + "step": 12395, + "success_rate.epoch.env.abd": 0.9852941176470589, + "success_rate.epoch.env.agentgym:alfworld": 0.872093023255814, + "success_rate.epoch.env.agentgym:sciworld": 0.96875, + "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, + "success_rate.epoch.env.babyai": 0.9696969696969697, + "success_rate.epoch.env.ded": 0.9508771929824561, + "success_rate.epoch.env.logic": 0.9123196448390677, + "success_rate.epoch.env.math": 0.9745562130177515, + "success_rate.epoch.env.sat": 0.1388888888888889, + "success_rate.epoch.env.science": 0.8663260962111536, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.874090238429967, + "success_rate.epoch.global": 0.9141952682439187, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 0.75, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9984756097560976, + "tokens_p.mean_in_band": 0.7408854166666666, + "tokens_rate.above_band": 0.9933920704845814, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.006607929515418502 + }, + { + "epoch": 2.641670217298679, + "grad_norm": 122.82801333251628, + "learning_rate": 3.414580720606894e-07, + "loss": 0.2721, + "step": 12400, + "success_rate.epoch.env.abd": 0.9853479853479854, + "success_rate.epoch.env.agentgym:alfworld": 0.872093023255814, + "success_rate.epoch.env.agentgym:sciworld": 0.96875, + "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, + "success_rate.epoch.env.babyai": 0.9696969696969697, + "success_rate.epoch.env.ded": 0.9508771929824561, + "success_rate.epoch.env.logic": 0.912707182320442, + "success_rate.epoch.env.math": 0.9746012994683992, + "success_rate.epoch.env.sat": 0.1388888888888889, + "success_rate.epoch.env.science": 0.8664398128455976, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8741448028179115, + "success_rate.epoch.global": 0.9143379906852961, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9986772486772487, + "tokens_p.mean_in_band": 0.87890625, + "tokens_rate.above_band": 0.9947368421052631, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.005263157894736842 + }, + { + "epoch": 2.6427354069024287, + "grad_norm": 106.00914502389531, + "learning_rate": 3.4142782146922374e-07, + "loss": 0.3099, + "step": 12405, + "success_rate.epoch.env.abd": 0.9854014598540146, + "success_rate.epoch.env.agentgym:alfworld": 0.872093023255814, + "success_rate.epoch.env.agentgym:sciworld": 0.9689119170984456, + "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, + "success_rate.epoch.env.babyai": 0.9696969696969697, + "success_rate.epoch.env.ded": 0.951048951048951, + "success_rate.epoch.env.logic": 0.91280353200883, + "success_rate.epoch.env.math": 0.9746462264150944, + "success_rate.epoch.env.sat": 0.1388888888888889, + "success_rate.epoch.env.science": 0.8666100254885302, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8742083154496375, + "success_rate.epoch.global": 0.9144802391232149, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9949448529411765, + "tokens_p.mean_in_band": 0.8671875, + "tokens_rate.above_band": 0.9927007299270073, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0072992700729927005 + }, + { + "epoch": 2.643800596506178, + "grad_norm": 95.61160162625873, + "learning_rate": 3.413975844130284e-07, + "loss": 0.2611, + "step": 12410, + "success_rate.epoch.env.abd": 0.9854014598540146, + "success_rate.epoch.env.agentgym:alfworld": 0.872093023255814, + "success_rate.epoch.env.agentgym:sciworld": 0.9690721649484536, + "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, + "success_rate.epoch.env.babyai": 0.9696969696969697, + "success_rate.epoch.env.ded": 0.951048951048951, + "success_rate.epoch.env.logic": 0.9128996692392503, + "success_rate.epoch.env.math": 0.974676089517079, + "success_rate.epoch.env.sat": 0.1388888888888889, + "success_rate.epoch.env.science": 0.8669491525423729, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8742651677438428, + "success_rate.epoch.global": 0.9146220159151194, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9950331125827815, + "tokens_p.mean_in_band": 0.845703125, + "tokens_rate.above_band": 0.9741935483870968, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.025806451612903226 + }, + { + "epoch": 2.6448657861099276, + "grad_norm": 52.318647459045245, + "learning_rate": 3.4136736091413977e-07, + "loss": 0.1276, + "step": 12415, + "success_rate.epoch.env.abd": 0.9854545454545455, + "success_rate.epoch.env.agentgym:alfworld": 0.872093023255814, + "success_rate.epoch.env.agentgym:sciworld": 0.9690721649484536, + "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, + "success_rate.epoch.env.babyai": 0.9696969696969697, + "success_rate.epoch.env.ded": 0.9512195121951219, + "success_rate.epoch.env.logic": 0.9130913091309131, + "success_rate.epoch.env.math": 0.9747207524985303, + "success_rate.epoch.env.sat": 0.1388888888888889, + "success_rate.epoch.env.science": 0.8671180702496826, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.874322337500854, + "success_rate.epoch.global": 0.9147633234028467, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9986349453978159, + "tokens_p.mean_in_band": 0.73828125, + "tokens_rate.above_band": 0.9968895800933126, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.003110419906687403 + }, + { + "epoch": 2.645930975713677, + "grad_norm": 223.657351330841, + "learning_rate": 3.413371509945847e-07, + "loss": 0.2661, + "step": 12420, + "success_rate.epoch.env.abd": 0.9855072463768116, + "success_rate.epoch.env.agentgym:alfworld": 0.872093023255814, + "success_rate.epoch.env.agentgym:sciworld": 0.9692307692307692, + "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, + "success_rate.epoch.env.babyai": 0.9696969696969697, + "success_rate.epoch.env.ded": 0.9512195121951219, + "success_rate.epoch.env.logic": 0.9131868131868132, + "success_rate.epoch.env.math": 0.9747800586510263, + "success_rate.epoch.env.sat": 0.1388888888888889, + "success_rate.epoch.env.science": 0.8672865595942519, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8743709379333583, + "success_rate.epoch.global": 0.9149041639127561, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9947916666666666, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 2.6469961653174265, + "grad_norm": 275.558977706429, + "learning_rate": 3.413069546763799e-07, + "loss": 0.3893, + "step": 12425, + "success_rate.epoch.env.abd": 0.9855072463768116, + "success_rate.epoch.env.agentgym:alfworld": 0.872093023255814, + "success_rate.epoch.env.agentgym:sciworld": 0.9692307692307692, + "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, + "success_rate.epoch.env.babyai": 0.9696969696969697, + "success_rate.epoch.env.ded": 0.9512195121951219, + "success_rate.epoch.env.logic": 0.9133771929824561, + "success_rate.epoch.env.math": 0.974824355971897, + "success_rate.epoch.env.sat": 0.1388888888888889, + "success_rate.epoch.env.science": 0.8675664276676508, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8744177147688051, + "success_rate.epoch.global": 0.9150445397558562, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9876373626373627, + "tokens_p.mean_in_band": 0.7940340909090909, + "tokens_rate.above_band": 0.8921568627450981, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.10784313725490197 + }, + { + "epoch": 2.6480613549211762, + "grad_norm": 56.73580731328113, + "learning_rate": 3.412767719815321e-07, + "loss": 0.3102, + "step": 12430, + "success_rate.epoch.env.abd": 0.9855072463768116, + "success_rate.epoch.env.agentgym:alfworld": 0.8670520231213873, + "success_rate.epoch.env.agentgym:sciworld": 0.9692307692307692, + "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, + "success_rate.epoch.env.babyai": 0.9696969696969697, + "success_rate.epoch.env.ded": 0.9512195121951219, + "success_rate.epoch.env.logic": 0.912472647702407, + "success_rate.epoch.env.math": 0.9748684979544127, + "success_rate.epoch.env.sat": 0.1388888888888889, + "success_rate.epoch.env.science": 0.8677894736842106, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.873901500458314, + "success_rate.epoch.global": 0.9148550724637681, + "success_rate.window.env.agentgym:alfworld": 0.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.625, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9981617647058824, + "tokens_p.mean_in_band": 0.5625651041666667, + "tokens_rate.above_band": 0.9645390070921985, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03546099290780142 + }, + { + "epoch": 2.6491265445249255, + "grad_norm": 148.82700566987907, + "learning_rate": 3.4124660293203834e-07, + "loss": 0.229, + "step": 12435, + "success_rate.epoch.env.abd": 0.9855072463768116, + "success_rate.epoch.env.agentgym:alfworld": 0.8670520231213873, + "success_rate.epoch.env.agentgym:sciworld": 0.9692307692307692, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9696969696969697, + "success_rate.epoch.env.ded": 0.9512195121951219, + "success_rate.epoch.env.logic": 0.9114754098360656, + "success_rate.epoch.env.math": 0.9749417249417249, + "success_rate.epoch.env.sat": 0.1388888888888889, + "success_rate.epoch.env.science": 0.8679562657695542, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8738829996458005, + "success_rate.epoch.global": 0.9148306478132193, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.75, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9960648148148148, + "tokens_p.mean_in_band": 0.3863146551724138, + "tokens_rate.above_band": 0.9588068181818182, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.041193181818181816 + }, + { + "epoch": 2.6501917341286747, + "grad_norm": 24.734506480244168, + "learning_rate": 3.4121644754988565e-07, + "loss": 0.2465, + "step": 12440, + "success_rate.epoch.env.abd": 0.9855072463768116, + "success_rate.epoch.env.agentgym:alfworld": 0.867816091954023, + "success_rate.epoch.env.agentgym:sciworld": 0.9693877551020408, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9696969696969697, + "success_rate.epoch.env.ded": 0.9512195121951219, + "success_rate.epoch.env.logic": 0.9114754098360656, + "success_rate.epoch.env.math": 0.9744186046511628, + "success_rate.epoch.env.sat": 0.1388888888888889, + "success_rate.epoch.env.science": 0.8677581863979849, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8739011682859619, + "success_rate.epoch.global": 0.9146421536441235, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 0.75, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9989754098360656, + "tokens_p.mean_in_band": 0.4957932692307692, + "tokens_rate.above_band": 0.9704545454545455, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.029545454545454545 + }, + { + "epoch": 2.6512569237324244, + "grad_norm": 170.44071539248512, + "learning_rate": 3.4118630585705095e-07, + "loss": 0.1713, + "step": 12445, + "success_rate.epoch.env.abd": 0.9855072463768116, + "success_rate.epoch.env.agentgym:alfworld": 0.867816091954023, + "success_rate.epoch.env.agentgym:sciworld": 0.9693877551020408, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9696969696969697, + "success_rate.epoch.env.ded": 0.9512195121951219, + "success_rate.epoch.env.logic": 0.9116684841875682, + "success_rate.epoch.env.math": 0.9744483159117305, + "success_rate.epoch.env.sat": 0.1388888888888889, + "success_rate.epoch.env.science": 0.867197318810222, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.873870433560899, + "success_rate.epoch.global": 0.9144542772861357, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 0.9, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9985074626865672, + "tokens_p.mean_in_band": 0.4775390625, + "tokens_rate.above_band": 0.9882005899705014, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.011799410029498525 + }, + { + "epoch": 2.652322113336174, + "grad_norm": 115.991140567978, + "learning_rate": 3.411561778755014e-07, + "loss": 0.2379, + "step": 12450, + "success_rate.epoch.env.abd": 0.9855072463768116, + "success_rate.epoch.env.agentgym:alfworld": 0.8693181818181818, + "success_rate.epoch.env.agentgym:sciworld": 0.9695431472081218, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9696969696969697, + "success_rate.epoch.env.ded": 0.9512195121951219, + "success_rate.epoch.env.logic": 0.9117647058823529, + "success_rate.epoch.env.math": 0.9744927536231884, + "success_rate.epoch.env.sat": 0.1388888888888889, + "success_rate.epoch.env.science": 0.8673640167364016, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.8740490553156864, + "success_rate.epoch.global": 0.9145942408376964, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9994095816464238, + "tokens_p.mean_in_band": 0.65234375, + "tokens_rate.above_band": 0.9986522911051213, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0013477088948787063 + }, + { + "epoch": 2.6533873029399233, + "grad_norm": 77.38937991969678, + "learning_rate": 3.4112606362719396e-07, + "loss": 0.1455, + "step": 12455, + "success_rate.epoch.env.abd": 0.9855072463768116, + "success_rate.epoch.env.agentgym:alfworld": 0.8693181818181818, + "success_rate.epoch.env.agentgym:sciworld": 0.9696969696969697, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9696969696969697, + "success_rate.epoch.env.ded": 0.9512195121951219, + "success_rate.epoch.env.logic": 0.9120521172638436, + "success_rate.epoch.env.math": 0.9745370370370371, + "success_rate.epoch.env.sat": 0.1388888888888889, + "success_rate.epoch.env.science": 0.8675302966987045, + "success_rate.epoch.env.webshop": 1.0, + "success_rate.epoch.env_macro_mean": 0.874108309610822, + "success_rate.epoch.global": 0.914733747141457, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9973684210526316, + "tokens_p.mean_in_band": 0.8489583333333334, + "tokens_rate.above_band": 0.9693877551020408, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.030612244897959183 + }, + { + "epoch": 2.6544524925436725, + "grad_norm": 267.3205182538426, + "learning_rate": 3.4109596313407576e-07, + "loss": 0.2356, + "step": 12460, + "success_rate.epoch.env.abd": 0.9855072463768116, + "success_rate.epoch.env.agentgym:alfworld": 0.8700564971751412, + "success_rate.epoch.env.agentgym:sciworld": 0.9696969696969697, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9696969696969697, + "success_rate.epoch.env.ded": 0.9513888888888888, + "success_rate.epoch.env.logic": 0.9122426868905742, + "success_rate.epoch.env.math": 0.9745370370370371, + "success_rate.epoch.env.sat": 0.1388888888888889, + "success_rate.epoch.env.science": 0.8678065054211843, + "success_rate.epoch.env.webshop": 0.96875, + "success_rate.epoch.env_macro_mean": 0.8713923523744523, + "success_rate.epoch.global": 0.9147097195042401, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env.webshop": 0.0, + "success_rate.window.env_macro_mean": 0.8, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9968581081081082, + "tokens_p.mean_in_band": 0.6544744318181818, + "tokens_rate.above_band": 0.965553235908142, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03444676409185804 + }, + { + "epoch": 2.6555176821474222, + "grad_norm": 74.44973028954672, + "learning_rate": 3.4106587641808375e-07, + "loss": 0.1964, + "step": 12465, + "success_rate.epoch.env.abd": 0.9855595667870036, + "success_rate.epoch.env.agentgym:alfworld": 0.8707865168539326, + "success_rate.epoch.env.agentgym:sciworld": 0.9696969696969697, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9696969696969697, + "success_rate.epoch.env.ded": 0.9513888888888888, + "success_rate.epoch.env.logic": 0.9122426868905742, + "success_rate.epoch.env.math": 0.974581166955517, + "success_rate.epoch.env.sat": 0.13513513513513514, + "success_rate.epoch.env.science": 0.8676103247293921, + "success_rate.epoch.env.webshop": 0.96875, + "success_rate.epoch.env_macro_mean": 0.8711084010618996, + "success_rate.epoch.global": 0.9145229566916314, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.75, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9979416167664671, + "tokens_p.mean_in_band": 0.6551339285714286, + "tokens_rate.above_band": 0.9597701149425287, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.040229885057471264 + }, + { + "epoch": 2.656582871751172, + "grad_norm": 146.02242392499596, + "learning_rate": 3.4103580350114494e-07, + "loss": 0.1527, + "step": 12470, + "success_rate.epoch.env.abd": 0.9856115107913669, + "success_rate.epoch.env.agentgym:alfworld": 0.8707865168539326, + "success_rate.epoch.env.agentgym:sciworld": 0.9696969696969697, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9696969696969697, + "success_rate.epoch.env.ded": 0.9513888888888888, + "success_rate.epoch.env.logic": 0.9125269978401728, + "success_rate.epoch.env.math": 0.9746105020196192, + "success_rate.epoch.env.sat": 0.13513513513513514, + "success_rate.epoch.env.science": 0.8674147963424771, + "success_rate.epoch.env.webshop": 0.96875, + "success_rate.epoch.env_macro_mean": 0.8711238612101858, + "success_rate.epoch.global": 0.9144993498049415, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9375, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.99375, + "tokens_p.mean_in_band": 0.5, + "tokens_rate.above_band": 0.9574468085106383, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0425531914893617 + }, + { + "epoch": 2.657648061354921, + "grad_norm": 80.72433270935532, + "learning_rate": 3.410057444051762e-07, + "loss": 0.2261, + "step": 12475, + "success_rate.epoch.env.abd": 0.9856115107913669, + "success_rate.epoch.env.agentgym:alfworld": 0.8707865168539326, + "success_rate.epoch.env.agentgym:sciworld": 0.9696969696969697, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9696969696969697, + "success_rate.epoch.env.ded": 0.9515570934256056, + "success_rate.epoch.env.logic": 0.9127155172413793, + "success_rate.epoch.env.math": 0.9746835443037974, + "success_rate.epoch.env.sat": 0.13513513513513514, + "success_rate.epoch.env.science": 0.8675249169435216, + "success_rate.epoch.env.webshop": 0.96875, + "success_rate.epoch.env_macro_mean": 0.8711729418304717, + "success_rate.epoch.global": 0.9146381045115223, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9982726130653267, + "tokens_p.mean_in_band": 0.8580729166666666, + "tokens_rate.above_band": 0.9851485148514851, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.01485148514851485 + }, + { + "epoch": 2.6587132509586704, + "grad_norm": 578.3522911448025, + "learning_rate": 3.409756991520845e-07, + "loss": 0.3023, + "step": 12480, + "success_rate.epoch.env.abd": 0.9856115107913669, + "success_rate.epoch.env.agentgym:alfworld": 0.8715083798882681, + "success_rate.epoch.env.agentgym:sciworld": 0.9696969696969697, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9696969696969697, + "success_rate.epoch.env.ded": 0.9515570934256056, + "success_rate.epoch.env.logic": 0.9128094725511302, + "success_rate.epoch.env.math": 0.9747126436781609, + "success_rate.epoch.env.sat": 0.13513513513513514, + "success_rate.epoch.env.science": 0.867854183927092, + "success_rate.epoch.env.webshop": 0.96875, + "success_rate.epoch.env_macro_mean": 0.8712796858942916, + "success_rate.epoch.global": 0.9147764095917045, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.994894801980198, + "tokens_p.mean_in_band": 0.7578125, + "tokens_rate.above_band": 0.9619047619047619, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0380952380952381 + }, + { + "epoch": 2.65977844056242, + "grad_norm": 54.715151789937785, + "learning_rate": 3.4094566776376654e-07, + "loss": 0.2974, + "step": 12485, + "success_rate.epoch.env.abd": 0.9856115107913669, + "success_rate.epoch.env.agentgym:alfworld": 0.8722222222222222, + "success_rate.epoch.env.agentgym:sciworld": 0.9698492462311558, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9696969696969697, + "success_rate.epoch.env.ded": 0.9515570934256056, + "success_rate.epoch.env.logic": 0.9129032258064517, + "success_rate.epoch.env.math": 0.9742120343839542, + "success_rate.epoch.env.sat": 0.13513513513513514, + "success_rate.epoch.env.science": 0.8679635761589404, + "success_rate.epoch.env.webshop": 0.96875, + "success_rate.epoch.env_macro_mean": 0.8713313818089375, + "success_rate.epoch.global": 0.9147525072791977, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.8, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.96, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9988290398126464, + "tokens_p.mean_in_band": 0.4895833333333333, + "tokens_rate.above_band": 0.9930232558139535, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0069767441860465115 + }, + { + "epoch": 2.6608436301661698, + "grad_norm": 142.77648136138615, + "learning_rate": 3.40915650262109e-07, + "loss": 0.1973, + "step": 12490, + "success_rate.epoch.env.abd": 0.9856115107913669, + "success_rate.epoch.env.agentgym:alfworld": 0.8722222222222222, + "success_rate.epoch.env.agentgym:sciworld": 0.9698492462311558, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9696969696969697, + "success_rate.epoch.env.ded": 0.9515570934256056, + "success_rate.epoch.env.logic": 0.9129032258064517, + "success_rate.epoch.env.math": 0.9742857142857143, + "success_rate.epoch.env.sat": 0.13513513513513514, + "success_rate.epoch.env.science": 0.8678232135481206, + "success_rate.epoch.env.webshop": 0.96875, + "success_rate.epoch.env_macro_mean": 0.8713253197444775, + "success_rate.epoch.global": 0.9147286821705426, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.9, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.990301724137931, + "tokens_p.mean_in_band": 0.803515625, + "tokens_rate.above_band": 0.9206349206349206, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.07936507936507936 + }, + { + "epoch": 2.661908819769919, + "grad_norm": 139.77275910617925, + "learning_rate": 3.408856466689884e-07, + "loss": 0.5987, + "step": 12495, + "success_rate.epoch.env.abd": 0.985663082437276, + "success_rate.epoch.env.agentgym:alfworld": 0.8722222222222222, + "success_rate.epoch.env.agentgym:sciworld": 0.9698492462311558, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9696969696969697, + "success_rate.epoch.env.ded": 0.9517241379310345, + "success_rate.epoch.env.logic": 0.9121114683815649, + "success_rate.epoch.env.math": 0.9743150684931506, + "success_rate.epoch.env.sat": 0.13513513513513514, + "success_rate.epoch.env.science": 0.8679867986798679, + "success_rate.epoch.env.webshop": 0.96875, + "success_rate.epoch.env_macro_mean": 0.8712907559322627, + "success_rate.epoch.global": 0.9147049338922928, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.6666666666666666, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9990666482300885, + "tokens_p.mean_in_band": 0.5896935096153846, + "tokens_rate.above_band": 0.9720430107526882, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02795698924731183 + }, + { + "epoch": 2.6629740093736682, + "grad_norm": 75.85065896581979, + "learning_rate": 3.4085565700627113e-07, + "loss": 0.2049, + "step": 12500, + "success_rate.epoch.env.abd": 0.9857142857142858, + "success_rate.epoch.env.agentgym:alfworld": 0.8729281767955801, + "success_rate.epoch.env.agentgym:sciworld": 0.9698492462311558, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9696969696969697, + "success_rate.epoch.env.ded": 0.9517241379310345, + "success_rate.epoch.env.logic": 0.9122994652406418, + "success_rate.epoch.env.math": 0.9743589743589743, + "success_rate.epoch.env.sat": 0.13513513513513514, + "success_rate.epoch.env.science": 0.8677379480840544, + "success_rate.epoch.env.webshop": 0.96875, + "success_rate.epoch.env_macro_mean": 0.8713580477485764, + "success_rate.epoch.global": 0.9146812620734063, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9992433414043583, + "tokens_p.mean_in_band": 0.3776041666666667, + "tokens_rate.above_band": 0.9927884615384616, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.007211538461538462 + }, + { + "epoch": 2.664039198977418, + "grad_norm": 314.7658792713378, + "learning_rate": 3.408256812958135e-07, + "loss": 0.245, + "step": 12505, + "success_rate.epoch.env.abd": 0.9857142857142858, + "success_rate.epoch.env.agentgym:alfworld": 0.8729281767955801, + "success_rate.epoch.env.agentgym:sciworld": 0.9698492462311558, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9696969696969697, + "success_rate.epoch.env.ded": 0.9518900343642611, + "success_rate.epoch.env.logic": 0.9123931623931624, + "success_rate.epoch.env.math": 0.9744172825469016, + "success_rate.epoch.env.sat": 0.13513513513513514, + "success_rate.epoch.env.science": 0.8679555738379268, + "success_rate.epoch.env.webshop": 0.96875, + "success_rate.epoch.env_macro_mean": 0.8714067320692628, + "success_rate.epoch.global": 0.9148183863709418, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9987726586102719, + "tokens_p.mean_in_band": 0.798828125, + "tokens_rate.above_band": 0.9880597014925373, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.011940298507462687 + }, + { + "epoch": 2.6651043885811676, + "grad_norm": 22.935712991378168, + "learning_rate": 3.407957195594615e-07, + "loss": 0.0616, + "step": 12510, + "success_rate.epoch.env.abd": 0.9857651245551602, + "success_rate.epoch.env.agentgym:alfworld": 0.8729281767955801, + "success_rate.epoch.env.agentgym:sciworld": 0.9698492462311558, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9696969696969697, + "success_rate.epoch.env.ded": 0.952054794520548, + "success_rate.epoch.env.logic": 0.9124866595517609, + "success_rate.epoch.env.math": 0.9744753261486103, + "success_rate.epoch.env.sat": 0.13513513513513514, + "success_rate.epoch.env.science": 0.8681183237469187, + "success_rate.epoch.env.webshop": 0.96875, + "success_rate.epoch.env_macro_mean": 0.8714549038571228, + "success_rate.epoch.global": 0.9149550706033376, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9986640334572491, + "tokens_p.mean_in_band": 0.8079427083333334, + "tokens_rate.above_band": 0.9889705882352942, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.011029411764705883 + }, + { + "epoch": 2.666169578184917, + "grad_norm": 76.3643800223536, + "learning_rate": 3.407657718190511e-07, + "loss": 0.2555, + "step": 12515, + "success_rate.epoch.env.abd": 0.9857651245551602, + "success_rate.epoch.env.agentgym:alfworld": 0.8736263736263736, + "success_rate.epoch.env.agentgym:sciworld": 0.9698492462311558, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9696969696969697, + "success_rate.epoch.env.ded": 0.9522184300341296, + "success_rate.epoch.env.logic": 0.9116080937167199, + "success_rate.epoch.env.math": 0.9745042492917847, + "success_rate.epoch.env.sat": 0.13513513513513514, + "success_rate.epoch.env.science": 0.8683347005742412, + "success_rate.epoch.env.webshop": 0.96875, + "success_rate.epoch.env_macro_mean": 0.8714756826280164, + "success_rate.epoch.global": 0.9149311118231336, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9959503239740821, + "tokens_p.mean_in_band": 0.7277644230769231, + "tokens_rate.above_band": 0.9907275320970043, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.009272467902995721 + }, + { + "epoch": 2.6672347677886665, + "grad_norm": 194.64564018268754, + "learning_rate": 3.40735838096408e-07, + "loss": 0.2488, + "step": 12520, + "success_rate.epoch.env.abd": 0.9857651245551602, + "success_rate.epoch.env.agentgym:alfworld": 0.8743169398907104, + "success_rate.epoch.env.agentgym:sciworld": 0.9701492537313433, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9705882352941176, + "success_rate.epoch.env.ded": 0.9522184300341296, + "success_rate.epoch.env.logic": 0.9116080937167199, + "success_rate.epoch.env.math": 0.9745475113122172, + "success_rate.epoch.env.sat": 0.13513513513513514, + "success_rate.epoch.env.science": 0.8680868496517821, + "success_rate.epoch.env.webshop": 0.96875, + "success_rate.epoch.env_macro_mean": 0.8716281599425297, + "success_rate.epoch.global": 0.9149072296865003, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9993140243902439, + "tokens_p.mean_below_band": 4.1443854570388794e-08, + "tokens_rate.above_band": 0.9975669099756691, + "tokens_rate.below_band": 0.0024330900243309003, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 2.6682999573924158, + "grad_norm": 108.75745094931737, + "learning_rate": 3.4070591841334763e-07, + "loss": 0.2611, + "step": 12525, + "success_rate.epoch.env.abd": 0.9857651245551602, + "success_rate.epoch.env.agentgym:alfworld": 0.8743169398907104, + "success_rate.epoch.env.agentgym:sciworld": 0.9701492537313433, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9705882352941176, + "success_rate.epoch.env.ded": 0.9522184300341296, + "success_rate.epoch.env.logic": 0.9117021276595745, + "success_rate.epoch.env.math": 0.9746192893401016, + "success_rate.epoch.env.sat": 0.13513513513513514, + "success_rate.epoch.env.science": 0.8678936605316974, + "success_rate.epoch.env.webshop": 0.96875, + "success_rate.epoch.env_macro_mean": 0.871625671110771, + "success_rate.epoch.global": 0.9148834238262535, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9890813253012049, + "tokens_p.mean_in_band": 0.6181640625, + "tokens_rate.above_band": 0.8736842105263158, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.12631578947368421 + }, + { + "epoch": 2.6693651469961654, + "grad_norm": 0.0, + "learning_rate": 3.4067601279167526e-07, + "loss": 0.2377, + "step": 12530, + "success_rate.epoch.env.abd": 0.9857651245551602, + "success_rate.epoch.env.agentgym:alfworld": 0.875, + "success_rate.epoch.env.agentgym:sciworld": 0.9701492537313433, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9705882352941176, + "success_rate.epoch.env.ded": 0.9522184300341296, + "success_rate.epoch.env.logic": 0.9117021276595745, + "success_rate.epoch.env.math": 0.9747191011235955, + "success_rate.epoch.env.sat": 0.13513513513513514, + "success_rate.epoch.env.science": 0.8680016346546792, + "success_rate.epoch.env.webshop": 0.96875, + "success_rate.epoch.env_macro_mean": 0.8717066571122042, + "success_rate.epoch.global": 0.9150191326530612, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9988613360323887, + "tokens_p.mean_in_band": 0.880859375, + "tokens_rate.above_band": 0.9919678714859438, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.008032128514056224 + }, + { + "epoch": 2.6704303365999147, + "grad_norm": 218.3030956182925, + "learning_rate": 3.406461212531859e-07, + "loss": 0.2067, + "step": 12535, + "success_rate.epoch.env.abd": 0.9857651245551602, + "success_rate.epoch.env.agentgym:alfworld": 0.875, + "success_rate.epoch.env.agentgym:sciworld": 0.9701492537313433, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9705882352941176, + "success_rate.epoch.env.ded": 0.9522184300341296, + "success_rate.epoch.env.logic": 0.9109225874867445, + "success_rate.epoch.env.math": 0.9747616376892877, + "success_rate.epoch.env.sat": 0.13513513513513514, + "success_rate.epoch.env.science": 0.8678090575275398, + "success_rate.epoch.env.webshop": 0.96875, + "success_rate.epoch.env_macro_mean": 0.8716221497727245, + "success_rate.epoch.global": 0.9148360394778733, + "success_rate.window.env.logic": 0.6666666666666666, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.8055555555555555, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9972411717495987, + "tokens_p.mean_in_band": 0.46986607142857145, + "tokens_rate.above_band": 0.956989247311828, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.043010752688172046 + }, + { + "epoch": 2.6714955262036644, + "grad_norm": 171.61619979360648, + "learning_rate": 3.406162438196643e-07, + "loss": 0.1834, + "step": 12540, + "success_rate.epoch.env.abd": 0.9857651245551602, + "success_rate.epoch.env.agentgym:alfworld": 0.875, + "success_rate.epoch.env.agentgym:sciworld": 0.9702970297029703, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9705882352941176, + "success_rate.epoch.env.ded": 0.9523809523809523, + "success_rate.epoch.env.logic": 0.9110169491525424, + "success_rate.epoch.env.math": 0.9747757847533632, + "success_rate.epoch.env.sat": 0.13157894736842105, + "success_rate.epoch.env.science": 0.8680781758957655, + "success_rate.epoch.env.webshop": 0.96875, + "success_rate.epoch.env_macro_mean": 0.8713613986499823, + "success_rate.epoch.global": 0.9148124602670057, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8333333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9972363945578231, + "tokens_p.mean_in_band": 0.6227678571428571, + "tokens_rate.above_band": 0.9767441860465116, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.023255813953488372 + }, + { + "epoch": 2.6725607158074136, + "grad_norm": 135.820816695306, + "learning_rate": 3.4058638051288497e-07, + "loss": 0.1628, + "step": 12545, + "success_rate.epoch.env.abd": 0.9858156028368794, + "success_rate.epoch.env.agentgym:alfworld": 0.875, + "success_rate.epoch.env.agentgym:sciworld": 0.9702970297029703, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9714285714285714, + "success_rate.epoch.env.ded": 0.9525423728813559, + "success_rate.epoch.env.logic": 0.9112050739957717, + "success_rate.epoch.env.math": 0.9748181309457191, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.8681318681318682, + "success_rate.epoch.env.webshop": 0.96875, + "success_rate.epoch.env_macro_mean": 0.8711761785613432, + "success_rate.epoch.global": 0.91478895588702, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8571428571428571, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9973958333333334, + "tokens_p.mean_in_band": 0.7047697368421053, + "tokens_rate.above_band": 0.9578713968957872, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.04212860310421286 + }, + { + "epoch": 2.6736259054111633, + "grad_norm": 306.6589585928038, + "learning_rate": 3.40556531354612e-07, + "loss": 0.2072, + "step": 12550, + "success_rate.epoch.env.abd": 0.9858657243816255, + "success_rate.epoch.env.agentgym:alfworld": 0.875, + "success_rate.epoch.env.agentgym:sciworld": 0.9702970297029703, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9714285714285714, + "success_rate.epoch.env.ded": 0.9525423728813559, + "success_rate.epoch.env.logic": 0.9113924050632911, + "success_rate.epoch.env.math": 0.9748603351955307, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.8679398618447786, + "success_rate.epoch.env.webshop": 0.96875, + "success_rate.epoch.env_macro_mean": 0.8711841467954329, + "success_rate.epoch.global": 0.914765525982256, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9375, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9904983108108109, + "tokens_p.mean_in_band": 0.5859375, + "tokens_rate.above_band": 0.9736842105263158, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02631578947368421 + }, + { + "epoch": 2.6746910950149125, + "grad_norm": 547.4531653007602, + "learning_rate": 3.4052669636659917e-07, + "loss": 0.4401, + "step": 12555, + "success_rate.epoch.env.abd": 0.9859154929577465, + "success_rate.epoch.env.agentgym:alfworld": 0.8763440860215054, + "success_rate.epoch.env.agentgym:sciworld": 0.9704433497536946, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9722222222222222, + "success_rate.epoch.env.ded": 0.9525423728813559, + "success_rate.epoch.env.logic": 0.9104320337197049, + "success_rate.epoch.env.math": 0.9748743718592965, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.8676948051948052, + "success_rate.epoch.env.webshop": 0.96875, + "success_rate.epoch.env_macro_mean": 0.8712880044419972, + "success_rate.epoch.global": 0.914583992407466, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.8095238095238094, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.999162198391421, + "tokens_p.mean_in_band": 0.4446614583333333, + "tokens_rate.above_band": 0.9920212765957447, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.007978723404255319 + }, + { + "epoch": 2.675756284618662, + "grad_norm": 88.24170323368735, + "learning_rate": 3.4049687557059014e-07, + "loss": 0.2505, + "step": 12560, + "success_rate.epoch.env.abd": 0.9859154929577465, + "success_rate.epoch.env.agentgym:alfworld": 0.8770053475935828, + "success_rate.epoch.env.agentgym:sciworld": 0.9704433497536946, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9722222222222222, + "success_rate.epoch.env.ded": 0.9525423728813559, + "success_rate.epoch.env.logic": 0.9096638655462185, + "success_rate.epoch.env.math": 0.9748743718592965, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.8680161943319838, + "success_rate.epoch.env.webshop": 0.96875, + "success_rate.epoch.env_macro_mean": 0.87130750285434, + "success_rate.epoch.global": 0.9145609602021478, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 0.6666666666666666, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8888888888888888, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9962022569444444, + "tokens_p.mean_in_band": 0.6235119047619048, + "tokens_rate.above_band": 0.9320388349514563, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.06796116504854369 + }, + { + "epoch": 2.6768214742224115, + "grad_norm": 67.47804442850637, + "learning_rate": 3.40467068988318e-07, + "loss": 0.1269, + "step": 12565, + "success_rate.epoch.env.abd": 0.9859649122807017, + "success_rate.epoch.env.agentgym:alfworld": 0.8776595744680851, + "success_rate.epoch.env.agentgym:sciworld": 0.9704433497536946, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9722222222222222, + "success_rate.epoch.env.ded": 0.9525423728813559, + "success_rate.epoch.env.logic": 0.909853249475891, + "success_rate.epoch.env.math": 0.9749303621169917, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.8673139158576052, + "success_rate.epoch.env.webshop": 0.96875, + "success_rate.epoch.env_macro_mean": 0.8713299339371077, + "success_rate.epoch.global": 0.9143803216650899, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.0, + "success_rate.window.env_macro_mean": 0.8, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 1.0004937923250565, + "tokens_p.mean_in_band": 0.6892361111111112, + "tokens_rate.above_band": 0.9899441340782122, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.01005586592178771 + }, + { + "epoch": 2.677886663826161, + "grad_norm": 92.06360362357528, + "learning_rate": 3.404372766415057e-07, + "loss": 0.3672, + "step": 12570, + "success_rate.epoch.env.abd": 0.986013986013986, + "success_rate.epoch.env.agentgym:alfworld": 0.8783068783068783, + "success_rate.epoch.env.agentgym:sciworld": 0.9705882352941176, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9722222222222222, + "success_rate.epoch.env.ded": 0.9525423728813559, + "success_rate.epoch.env.logic": 0.909853249475891, + "success_rate.epoch.env.math": 0.9749582637729549, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.867124394184168, + "success_rate.epoch.env.webshop": 0.96875, + "success_rate.epoch.env_macro_mean": 0.8713917196730193, + "success_rate.epoch.global": 0.9143441977641317, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.8888888888888888, + "tokens_p.mean_above_band": 0.9979103343465046, + "tokens_p.mean_in_band": 0.1728515625, + "tokens_rate.above_band": 0.996969696969697, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0030303030303030303 + }, + { + "epoch": 2.6789518534299104, + "grad_norm": 82.6772385835561, + "learning_rate": 3.4040749855186557e-07, + "loss": 0.3534, + "step": 12575, + "success_rate.epoch.env.abd": 0.986013986013986, + "success_rate.epoch.env.agentgym:alfworld": 0.8789473684210526, + "success_rate.epoch.env.agentgym:sciworld": 0.9707317073170731, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9722222222222222, + "success_rate.epoch.env.ded": 0.9525423728813559, + "success_rate.epoch.env.logic": 0.909853249475891, + "success_rate.epoch.env.math": 0.9749861033907726, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.8674456083803385, + "success_rate.epoch.env.webshop": 0.96875, + "success_rate.epoch.env_macro_mean": 0.871494721123121, + "success_rate.epoch.global": 0.9144788555258607, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9974226804123711, + "tokens_p.mean_in_band": 0.85546875, + "tokens_rate.above_band": 0.9974293059125964, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.002570694087403599 + }, + { + "epoch": 2.68001704303366, + "grad_norm": 134.60927404078097, + "learning_rate": 3.4037773474109964e-07, + "loss": 0.2746, + "step": 12580, + "success_rate.epoch.env.abd": 0.986013986013986, + "success_rate.epoch.env.agentgym:alfworld": 0.8795811518324608, + "success_rate.epoch.env.agentgym:sciworld": 0.970873786407767, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9722222222222222, + "success_rate.epoch.env.ded": 0.9527027027027027, + "success_rate.epoch.env.logic": 0.909853249475891, + "success_rate.epoch.env.math": 0.975, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.8673100120627262, + "success_rate.epoch.env.webshop": 0.9696969696969697, + "success_rate.epoch.env_macro_mean": 0.8716548540605786, + "success_rate.epoch.global": 0.914456129336054, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 0.9666666666666667, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9997229259024699, + "tokens_p.mean_in_band": 0.5791015625, + "tokens_rate.above_band": 0.9974731522425774, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.002526847757422615 + }, + { + "epoch": 2.6810822326374093, + "grad_norm": 74.06099495629941, + "learning_rate": 3.403479852308997e-07, + "loss": 0.2647, + "step": 12585, + "success_rate.epoch.env.abd": 0.986013986013986, + "success_rate.epoch.env.agentgym:alfworld": 0.8795811518324608, + "success_rate.epoch.env.agentgym:sciworld": 0.970873786407767, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9722222222222222, + "success_rate.epoch.env.ded": 0.9528619528619529, + "success_rate.epoch.env.logic": 0.910135841170324, + "success_rate.epoch.env.math": 0.975, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.8672282390693943, + "success_rate.epoch.env.webshop": 0.9696969696969697, + "success_rate.epoch.env_macro_mean": 0.8716875875933376, + "success_rate.epoch.global": 0.9144334743770569, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env_macro_mean": 0.9444444444444445, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9968398876404494, + "tokens_p.mean_in_band": 0.72412109375, + "tokens_rate.above_band": 0.956989247311828, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.043010752688172046 + }, + { + "epoch": 2.682147422241159, + "grad_norm": 128.30272537127172, + "learning_rate": 3.4031825004294687e-07, + "loss": 0.2658, + "step": 12590, + "success_rate.epoch.env.abd": 0.986013986013986, + "success_rate.epoch.env.agentgym:alfworld": 0.8795811518324608, + "success_rate.epoch.env.agentgym:sciworld": 0.970873786407767, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9722222222222222, + "success_rate.epoch.env.ded": 0.9530201342281879, + "success_rate.epoch.env.logic": 0.9102296450939458, + "success_rate.epoch.env.math": 0.9750415973377704, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.867440929114938, + "success_rate.epoch.env.webshop": 0.9696969696969697, + "success_rate.epoch.env_macro_mean": 0.8717336123818078, + "success_rate.epoch.global": 0.9145539906103286, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9992045454545454, + "tokens_p.mean_in_band": 0.7760416666666666, + "tokens_rate.above_band": 0.9918845807033363, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.008115419296663661 + }, + { + "epoch": 2.6832126118449082, + "grad_norm": 123.3337714347934, + "learning_rate": 3.402885291989119e-07, + "loss": 0.2349, + "step": 12595, + "success_rate.epoch.env.abd": 0.9860627177700348, + "success_rate.epoch.env.agentgym:alfworld": 0.8802083333333334, + "success_rate.epoch.env.agentgym:sciworld": 0.970873786407767, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9722222222222222, + "success_rate.epoch.env.ded": 0.9530201342281879, + "success_rate.epoch.env.logic": 0.9102296450939458, + "success_rate.epoch.env.math": 0.9750968456004427, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.8672530987604958, + "success_rate.epoch.env.webshop": 0.9696969696969697, + "success_rate.epoch.env_macro_mean": 0.8717830061240943, + "success_rate.epoch.global": 0.91453125, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9375, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9964812332439679, + "tokens_p.mean_in_band": 0.729736328125, + "tokens_rate.above_band": 0.979002624671916, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02099737532808399 + }, + { + "epoch": 2.684277801448658, + "grad_norm": 873.5598063836308, + "learning_rate": 3.402588227204553e-07, + "loss": 0.1938, + "step": 12600, + "success_rate.epoch.env.abd": 0.9860627177700348, + "success_rate.epoch.env.agentgym:alfworld": 0.8808290155440415, + "success_rate.epoch.env.agentgym:sciworld": 0.970873786407767, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9722222222222222, + "success_rate.epoch.env.ded": 0.9531772575250836, + "success_rate.epoch.env.logic": 0.9094693028095734, + "success_rate.epoch.env.math": 0.9751106194690266, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.8674650698602795, + "success_rate.epoch.env.webshop": 0.9696969696969697, + "success_rate.epoch.env_macro_mean": 0.8718051159596943, + "success_rate.epoch.global": 0.9145085803432137, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.6666666666666666, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9999003984063745, + "tokens_p.mean_in_band": 0.53125, + "tokens_rate.above_band": 0.9866352201257862, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.013364779874213837 + }, + { + "epoch": 2.6853429910524076, + "grad_norm": 243.5140877420023, + "learning_rate": 3.402291306292268e-07, + "loss": 0.3984, + "step": 12605, + "success_rate.epoch.env.abd": 0.9860627177700348, + "success_rate.epoch.env.agentgym:alfworld": 0.8814432989690721, + "success_rate.epoch.env.agentgym:sciworld": 0.970873786407767, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9722222222222222, + "success_rate.epoch.env.ded": 0.9531772575250836, + "success_rate.epoch.env.logic": 0.9095634095634095, + "success_rate.epoch.env.math": 0.9751518498067366, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.867277799920287, + "success_rate.epoch.env.webshop": 0.9705882352941176, + "success_rate.epoch.env_macro_mean": 0.8719372628845791, + "success_rate.epoch.global": 0.9144859813084112, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9973341232227488, + "tokens_p.mean_in_band": 0.5738636363636364, + "tokens_rate.above_band": 0.9829192546583851, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.017080745341614908 + }, + { + "epoch": 2.686408180656157, + "grad_norm": 115.42695363407147, + "learning_rate": 3.4019945294686585e-07, + "loss": 0.1744, + "step": 12610, + "success_rate.epoch.env.abd": 0.9861111111111112, + "success_rate.epoch.env.agentgym:alfworld": 0.8814432989690721, + "success_rate.epoch.env.agentgym:sciworld": 0.966183574879227, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9722222222222222, + "success_rate.epoch.env.ded": 0.9531772575250836, + "success_rate.epoch.env.logic": 0.9099378881987578, + "success_rate.epoch.env.math": 0.9751792608935466, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.8673835125448028, + "success_rate.epoch.env.webshop": 0.9705882352941176, + "success_rate.epoch.env_macro_mean": 0.8715614250808709, + "success_rate.epoch.global": 0.9144634525660964, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 0.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9984461325966851, + "tokens_p.mean_in_band": 0.1328125, + "tokens_rate.above_band": 0.9986206896551724, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.001379310344827586 + }, + { + "epoch": 2.687473370259906, + "grad_norm": 68.83061612542053, + "learning_rate": 3.401697896950012e-07, + "loss": 0.2169, + "step": 12615, + "success_rate.epoch.env.abd": 0.9861111111111112, + "success_rate.epoch.env.agentgym:alfworld": 0.8814432989690721, + "success_rate.epoch.env.agentgym:sciworld": 0.9663461538461539, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9722222222222222, + "success_rate.epoch.env.ded": 0.9531772575250836, + "success_rate.epoch.env.logic": 0.9099378881987578, + "success_rate.epoch.env.math": 0.9752066115702479, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.867699642431466, + "success_rate.epoch.env.webshop": 0.9714285714285714, + "success_rate.epoch.env_macro_mean": 0.8716838246867568, + "success_rate.epoch.global": 0.9145962732919255, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9983789625360231, + "tokens_p.mean_in_band": 0.8828125, + "tokens_rate.above_band": 0.994269340974212, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0057306590257879654 + }, + { + "epoch": 2.6885385598636558, + "grad_norm": 42.58555076893122, + "learning_rate": 3.401401408952514e-07, + "loss": 0.1896, + "step": 12620, + "success_rate.epoch.env.abd": 0.986159169550173, + "success_rate.epoch.env.agentgym:alfworld": 0.8814432989690721, + "success_rate.epoch.env.agentgym:sciworld": 0.9665071770334929, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9722222222222222, + "success_rate.epoch.env.ded": 0.9533333333333334, + "success_rate.epoch.env.logic": 0.9099378881987578, + "success_rate.epoch.env.math": 0.9752339020363237, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.8679619349722443, + "success_rate.epoch.env.webshop": 0.9714285714285714, + "success_rate.epoch.env_macro_mean": 0.8717433465450753, + "success_rate.epoch.global": 0.9147286821705426, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9992732558139535, + "tokens_p.mean_in_band": 0.7734375, + "tokens_rate.above_band": 0.9950413223140496, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0049586776859504135 + }, + { + "epoch": 2.6896037494674054, + "grad_norm": 128.33269913118167, + "learning_rate": 3.401105065692242e-07, + "loss": 0.3188, + "step": 12625, + "success_rate.epoch.env.abd": 0.986159169550173, + "success_rate.epoch.env.agentgym:alfworld": 0.8814432989690721, + "success_rate.epoch.env.agentgym:sciworld": 0.9665071770334929, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9722222222222222, + "success_rate.epoch.env.ded": 0.9533333333333334, + "success_rate.epoch.env.logic": 0.9101239669421488, + "success_rate.epoch.env.math": 0.9752883031301482, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.8681710213776722, + "success_rate.epoch.env.webshop": 0.9714285714285714, + "success_rate.epoch.env_macro_mean": 0.8717842162034976, + "success_rate.epoch.global": 0.9148606811145511, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9884510869565217, + "tokens_p.mean_in_band": 0.6796875, + "tokens_rate.above_band": 0.9387755102040817, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.061224489795918366 + }, + { + "epoch": 2.6906689390711547, + "grad_norm": 28.585383766342733, + "learning_rate": 3.400808867385169e-07, + "loss": 0.1609, + "step": 12630, + "success_rate.epoch.env.abd": 0.986159169550173, + "success_rate.epoch.env.agentgym:alfworld": 0.8814432989690721, + "success_rate.epoch.env.agentgym:sciworld": 0.9665071770334929, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.9722222222222222, + "success_rate.epoch.env.ded": 0.9534883720930233, + "success_rate.epoch.env.logic": 0.9103092783505154, + "success_rate.epoch.env.math": 0.975328947368421, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.8683794466403162, + "success_rate.epoch.env.webshop": 0.9714285714285714, + "success_rate.epoch.env_macro_mean": 0.8718377998097678, + "success_rate.epoch.global": 0.9149922720247295, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9938322368421053, + "tokens_p.mean_in_band": 0.7607421875, + "tokens_rate.above_band": 0.9743589743589743, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02564102564102564 + }, + { + "epoch": 2.691734128674904, + "grad_norm": 227.12093536884058, + "learning_rate": 3.400512814247162e-07, + "loss": 0.1493, + "step": 12635, + "success_rate.epoch.env.abd": 0.9862068965517241, + "success_rate.epoch.env.agentgym:alfworld": 0.8814432989690721, + "success_rate.epoch.env.agentgym:sciworld": 0.9666666666666667, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.972972972972973, + "success_rate.epoch.env.ded": 0.9534883720930233, + "success_rate.epoch.env.logic": 0.9104938271604939, + "success_rate.epoch.env.math": 0.975355969331873, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.8685353335965259, + "success_rate.epoch.env.webshop": 0.9714285714285714, + "success_rate.epoch.env_macro_mean": 0.8719582930020512, + "success_rate.epoch.global": 0.9151234567901234, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9975458115183246, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 2.6927993182786536, + "grad_norm": 65.53678140940933, + "learning_rate": 3.400216906493982e-07, + "loss": 0.2631, + "step": 12640, + "success_rate.epoch.env.abd": 0.9862068965517241, + "success_rate.epoch.env.agentgym:alfworld": 0.8814432989690721, + "success_rate.epoch.env.agentgym:sciworld": 0.966824644549763, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.972972972972973, + "success_rate.epoch.env.ded": 0.9536423841059603, + "success_rate.epoch.env.logic": 0.9106776180698152, + "success_rate.epoch.env.math": 0.9753963914707491, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.8682965299684543, + "success_rate.epoch.env.webshop": 0.9714285714285714, + "success_rate.epoch.env_macro_mean": 0.8719853293035201, + "success_rate.epoch.global": 0.9151001540832049, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9991349480968859, + "tokens_p.mean_in_band": 0.6575520833333334, + "tokens_rate.above_band": 0.9796610169491525, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.020338983050847456 + }, + { + "epoch": 2.6938645078824033, + "grad_norm": 85.14168393718177, + "learning_rate": 3.399921144341285e-07, + "loss": 0.4298, + "step": 12645, + "success_rate.epoch.env.abd": 0.9862068965517241, + "success_rate.epoch.env.agentgym:alfworld": 0.882051282051282, + "success_rate.epoch.env.agentgym:sciworld": 0.966824644549763, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.972972972972973, + "success_rate.epoch.env.ded": 0.9537953795379538, + "success_rate.epoch.env.logic": 0.90992835209826, + "success_rate.epoch.env.math": 0.9748771163298744, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.8684003152088259, + "success_rate.epoch.env.webshop": 0.9722222222222222, + "success_rate.epoch.env_macro_mean": 0.8720207723431379, + "success_rate.epoch.global": 0.914923076923077, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.6666666666666666, + "success_rate.window.env.math": 0.5, + "success_rate.window.env.science": 1.0, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 0.861111111111111, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9985029940119761, + "tokens_p.mean_in_band": 0.68125, + "tokens_rate.above_band": 0.9852507374631269, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.014749262536873156 + }, + { + "epoch": 2.6949296974861525, + "grad_norm": 162.84839387484718, + "learning_rate": 3.3996255280046204e-07, + "loss": 0.3466, + "step": 12650, + "success_rate.epoch.env.abd": 0.9862068965517241, + "success_rate.epoch.env.agentgym:alfworld": 0.882051282051282, + "success_rate.epoch.env.agentgym:sciworld": 0.966824644549763, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.972972972972973, + "success_rate.epoch.env.ded": 0.954248366013072, + "success_rate.epoch.env.logic": 0.9101123595505618, + "success_rate.epoch.env.math": 0.9749045280960175, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.8681621408894136, + "success_rate.epoch.env.webshop": 0.9722222222222222, + "success_rate.epoch.env_macro_mean": 0.8720595206498789, + "success_rate.epoch.global": 0.914900153609831, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.999058734939759, + "tokens_p.mean_in_band": 0.19375, + "tokens_rate.above_band": 0.9900596421471173, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.009940357852882704 + }, + { + "epoch": 2.6959948870899018, + "grad_norm": 165.0099130703682, + "learning_rate": 3.399330057699431e-07, + "loss": 0.1159, + "step": 12655, + "success_rate.epoch.env.abd": 0.9862068965517241, + "success_rate.epoch.env.agentgym:alfworld": 0.8826530612244898, + "success_rate.epoch.env.agentgym:sciworld": 0.966824644549763, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.972972972972973, + "success_rate.epoch.env.ded": 0.954248366013072, + "success_rate.epoch.env.logic": 0.9103869653767821, + "success_rate.epoch.env.math": 0.974959172563963, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.8682658277624853, + "success_rate.epoch.env.webshop": 0.9722222222222222, + "success_rate.epoch.env_macro_mean": 0.8721535857717377, + "success_rate.epoch.global": 0.9150306748466258, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9977678571428571, + "tokens_p.mean_in_band": 0.73046875, + "tokens_rate.above_band": 0.9967637540453075, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.003236245954692557 + }, + { + "epoch": 2.6970600766936514, + "grad_norm": 98.05996758161302, + "learning_rate": 3.399034733641053e-07, + "loss": 0.2274, + "step": 12660, + "success_rate.epoch.env.abd": 0.9862068965517241, + "success_rate.epoch.env.agentgym:alfworld": 0.8826530612244898, + "success_rate.epoch.env.agentgym:sciworld": 0.966824644549763, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.972972972972973, + "success_rate.epoch.env.ded": 0.954248366013072, + "success_rate.epoch.env.logic": 0.9103869653767821, + "success_rate.epoch.env.math": 0.9750271444082519, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.8680800942285041, + "success_rate.epoch.env.webshop": 0.972972972972973, + "success_rate.epoch.env_macro_mean": 0.8722111302318339, + "success_rate.epoch.global": 0.9150076569678407, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9988541666666667, + "tokens_p.mean_in_band": 0.625, + "tokens_rate.above_band": 0.974025974025974, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.025974025974025976 + }, + { + "epoch": 2.698125266297401, + "grad_norm": 330.8750113513593, + "learning_rate": 3.398739556044717e-07, + "loss": 0.2343, + "step": 12665, + "success_rate.epoch.env.abd": 0.9862068965517241, + "success_rate.epoch.env.agentgym:alfworld": 0.8826530612244898, + "success_rate.epoch.env.agentgym:sciworld": 0.9671361502347418, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.972972972972973, + "success_rate.epoch.env.ded": 0.9543973941368078, + "success_rate.epoch.env.logic": 0.9098277608915907, + "success_rate.epoch.env.math": 0.9750406945198047, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.8681318681318682, + "success_rate.epoch.env.webshop": 0.972972972972973, + "success_rate.epoch.env_macro_mean": 0.8722080987171467, + "success_rate.epoch.global": 0.9149847094801223, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.8, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.96, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9990974729241877, + "tokens_p.mean_in_band": 0.7963169642857143, + "tokens_rate.above_band": 0.9875222816399287, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.012477718360071301 + }, + { + "epoch": 2.6991904559011504, + "grad_norm": 51.17089504857138, + "learning_rate": 3.398444525125547e-07, + "loss": 0.1257, + "step": 12670, + "success_rate.epoch.env.abd": 0.9862542955326461, + "success_rate.epoch.env.agentgym:alfworld": 0.8826530612244898, + "success_rate.epoch.env.agentgym:sciworld": 0.9671361502347418, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.972972972972973, + "success_rate.epoch.env.ded": 0.9545454545454546, + "success_rate.epoch.env.logic": 0.910010111223458, + "success_rate.epoch.env.math": 0.9745533297238765, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.8682352941176471, + "success_rate.epoch.env.webshop": 0.972972972972973, + "success_rate.epoch.env_macro_mean": 0.8722075415272638, + "success_rate.epoch.global": 0.9149618320610687, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.75, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9965415019762845, + "tokens_p.mean_below_band": 7.729977369308472e-08, + "tokens_p.mean_in_band": 0.7769097222222222, + "tokens_rate.above_band": 0.9806201550387597, + "tokens_rate.below_band": 0.001937984496124031, + "tokens_rate.in_band": 0.01744186046511628 + }, + { + "epoch": 2.7002556455048996, + "grad_norm": 430.729366643276, + "learning_rate": 3.398149641098558e-07, + "loss": 0.5388, + "step": 12675, + "success_rate.epoch.env.abd": 0.9863013698630136, + "success_rate.epoch.env.agentgym:alfworld": 0.8787878787878788, + "success_rate.epoch.env.agentgym:sciworld": 0.9671361502347418, + "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, + "success_rate.epoch.env.babyai": 0.972972972972973, + "success_rate.epoch.env.ded": 0.9545454545454546, + "success_rate.epoch.env.logic": 0.9101917255297679, + "success_rate.epoch.env.math": 0.9745945945945946, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.8683385579937304, + "success_rate.epoch.env.webshop": 0.972972972972973, + "success_rate.epoch.env_macro_mean": 0.8718900901587971, + "success_rate.epoch.global": 0.9149390243902439, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 0.5, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9994070208728653, + "tokens_p.mean_in_band": 0.710546875, + "tokens_rate.above_band": 0.9906015037593985, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.009398496240601503 + }, + { + "epoch": 2.7013208351086493, + "grad_norm": 6.7423208327181605, + "learning_rate": 3.39785490417866e-07, + "loss": 0.1175, + "step": 12680, + "success_rate.epoch.env.abd": 0.9863013698630136, + "success_rate.epoch.env.agentgym:alfworld": 0.8787878787878788, + "success_rate.epoch.env.agentgym:sciworld": 0.9671361502347418, + "success_rate.epoch.env.agentgym:textcraft": 0.9772727272727273, + "success_rate.epoch.env.babyai": 0.972972972972973, + "success_rate.epoch.env.ded": 0.9516129032258065, + "success_rate.epoch.env.logic": 0.9104627766599598, + "success_rate.epoch.env.math": 0.974622030237581, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.8680501174628035, + "success_rate.epoch.env.webshop": 0.972972972972973, + "success_rate.epoch.env_macro_mean": 0.8716724570814169, + "success_rate.epoch.global": 0.9147640791476408, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.ded": 0.5, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.8, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9939060642092746, + "tokens_p.mean_in_band": 0.7752700617283951, + "tokens_rate.above_band": 0.9121475054229935, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0878524945770065 + }, + { + "epoch": 2.702386024712399, + "grad_norm": 151.1336897590604, + "learning_rate": 3.397560314580656e-07, + "loss": 0.227, + "step": 12685, + "success_rate.epoch.env.abd": 0.9863013698630136, + "success_rate.epoch.env.agentgym:alfworld": 0.8787878787878788, + "success_rate.epoch.env.agentgym:sciworld": 0.9671361502347418, + "success_rate.epoch.env.agentgym:textcraft": 0.9772727272727273, + "success_rate.epoch.env.babyai": 0.972972972972973, + "success_rate.epoch.env.ded": 0.9516129032258065, + "success_rate.epoch.env.logic": 0.9105527638190954, + "success_rate.epoch.env.math": 0.9741379310344828, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.8683079327862446, + "success_rate.epoch.env.webshop": 0.972972972972973, + "success_rate.epoch.env_macro_mean": 0.8716600664704605, + "success_rate.epoch.global": 0.914741641337386, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.75, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.993844696969697, + "tokens_p.mean_in_band": 0.689453125, + "tokens_rate.above_band": 0.9705882352941176, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.029411764705882353 + }, + { + "epoch": 2.703451214316148, + "grad_norm": 41.35779551554649, + "learning_rate": 3.3972658725192395e-07, + "loss": 0.2901, + "step": 12690, + "success_rate.epoch.env.abd": 0.9863013698630136, + "success_rate.epoch.env.agentgym:alfworld": 0.8787878787878788, + "success_rate.epoch.env.agentgym:sciworld": 0.9671361502347418, + "success_rate.epoch.env.agentgym:textcraft": 0.9772727272727273, + "success_rate.epoch.env.babyai": 0.972972972972973, + "success_rate.epoch.env.ded": 0.9516129032258065, + "success_rate.epoch.env.logic": 0.9106425702811245, + "success_rate.epoch.env.math": 0.9742074153680816, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.8685134607881388, + "success_rate.epoch.env.webshop": 0.972972972972973, + "success_rate.epoch.env_macro_mean": 0.8716932318156899, + "success_rate.epoch.global": 0.9148710166919575, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9885869565217391, + "tokens_p.mean_in_band": 0.85546875, + "tokens_rate.above_band": 0.9745762711864406, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.025423728813559324 + }, + { + "epoch": 2.704516403919898, + "grad_norm": 253.11477489110263, + "learning_rate": 3.396971578208998e-07, + "loss": 0.1446, + "step": 12695, + "success_rate.epoch.env.abd": 0.9863013698630136, + "success_rate.epoch.env.agentgym:alfworld": 0.88, + "success_rate.epoch.env.agentgym:sciworld": 0.9672897196261683, + "success_rate.epoch.env.agentgym:textcraft": 0.9772727272727273, + "success_rate.epoch.env.babyai": 0.972972972972973, + "success_rate.epoch.env.ded": 0.9516129032258065, + "success_rate.epoch.env.logic": 0.9107321965897693, + "success_rate.epoch.env.math": 0.9742212674543501, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.8687694704049844, + "success_rate.epoch.env.webshop": 0.972972972972973, + "success_rate.epoch.env_macro_mean": 0.8718500662352632, + "success_rate.epoch.global": 0.915, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9982002617801047, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 2.705581593523647, + "grad_norm": 75.85965181853233, + "learning_rate": 3.3966774318644115e-07, + "loss": 0.2657, + "step": 12700, + "success_rate.epoch.env.abd": 0.9863013698630136, + "success_rate.epoch.env.agentgym:alfworld": 0.88, + "success_rate.epoch.env.agentgym:sciworld": 0.9672897196261683, + "success_rate.epoch.env.agentgym:textcraft": 0.9772727272727273, + "success_rate.epoch.env.babyai": 0.972972972972973, + "success_rate.epoch.env.ded": 0.9519230769230769, + "success_rate.epoch.env.logic": 0.9099099099099099, + "success_rate.epoch.env.math": 0.9742489270386266, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.8689735614307932, + "success_rate.epoch.env.webshop": 0.972972972972973, + "success_rate.epoch.env_macro_mean": 0.8718245787468537, + "success_rate.epoch.global": 0.9149773071104387, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9985342401500938, + "tokens_p.mean_in_band": 0.6216517857142857, + "tokens_rate.above_band": 0.987037037037037, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.012962962962962963 + }, + { + "epoch": 2.706646783127397, + "grad_norm": 64.85725402361429, + "learning_rate": 3.3963834336998517e-07, + "loss": 0.168, + "step": 12705, + "success_rate.epoch.env.abd": 0.9863013698630136, + "success_rate.epoch.env.agentgym:alfworld": 0.88, + "success_rate.epoch.env.agentgym:sciworld": 0.9674418604651163, + "success_rate.epoch.env.agentgym:textcraft": 0.9772727272727273, + "success_rate.epoch.env.babyai": 0.972972972972973, + "success_rate.epoch.env.ded": 0.9519230769230769, + "success_rate.epoch.env.logic": 0.91, + "success_rate.epoch.env.math": 0.974304068522484, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.8691770186335404, + "success_rate.epoch.env.webshop": 0.972972972972973, + "success_rate.epoch.env_macro_mean": 0.8718701087119122, + "success_rate.epoch.global": 0.9151057401812689, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9983836206896551, + "tokens_p.mean_in_band": 0.890625, + "tokens_rate.above_band": 0.997134670487106, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0028653295128939827 + }, + { + "epoch": 2.707711972731146, + "grad_norm": 102.2612461823331, + "learning_rate": 3.3960895839295816e-07, + "loss": 0.2986, + "step": 12710, + "success_rate.epoch.env.abd": 0.9863481228668942, + "success_rate.epoch.env.agentgym:alfworld": 0.88, + "success_rate.epoch.env.agentgym:sciworld": 0.9674418604651163, + "success_rate.epoch.env.agentgym:textcraft": 0.9772727272727273, + "success_rate.epoch.env.babyai": 0.972972972972973, + "success_rate.epoch.env.ded": 0.952076677316294, + "success_rate.epoch.env.logic": 0.9090909090909091, + "success_rate.epoch.env.math": 0.9743178170144462, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.8690937257939582, + "success_rate.epoch.env.webshop": 0.972972972972973, + "success_rate.epoch.env_macro_mean": 0.8717993558155837, + "success_rate.epoch.global": 0.9149321266968325, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env_macro_mean": 0.7666666666666667, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9979895931882686, + "tokens_p.mean_in_band": 0.5779854910714286, + "tokens_rate.above_band": 0.9741935483870968, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.025806451612903226 + }, + { + "epoch": 2.7087771623348957, + "grad_norm": 76.78319581436322, + "learning_rate": 3.395795882767758e-07, + "loss": 0.2994, + "step": 12715, + "success_rate.epoch.env.abd": 0.9863481228668942, + "success_rate.epoch.env.agentgym:alfworld": 0.88, + "success_rate.epoch.env.agentgym:sciworld": 0.9675925925925926, + "success_rate.epoch.env.agentgym:textcraft": 0.9772727272727273, + "success_rate.epoch.env.babyai": 0.972972972972973, + "success_rate.epoch.env.ded": 0.952076677316294, + "success_rate.epoch.env.logic": 0.9090909090909091, + "success_rate.epoch.env.math": 0.9743863393810032, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.8692456479690522, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8718977571085353, + "success_rate.epoch.global": 0.9150602409638554, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9966517857142857, + "tokens_p.mean_in_band": 0.8190104166666666, + "tokens_rate.above_band": 0.9837837837837838, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.016216216216216217 + }, + { + "epoch": 2.709842351938645, + "grad_norm": 118.15155090805835, + "learning_rate": 3.395502330428427e-07, + "loss": 0.1937, + "step": 12720, + "success_rate.epoch.env.abd": 0.9863481228668942, + "success_rate.epoch.env.agentgym:alfworld": 0.88, + "success_rate.epoch.env.agentgym:sciworld": 0.9675925925925926, + "success_rate.epoch.env.agentgym:textcraft": 0.9772727272727273, + "success_rate.epoch.env.babyai": 0.972972972972973, + "success_rate.epoch.env.ded": 0.9522292993630573, + "success_rate.epoch.env.logic": 0.9093625498007968, + "success_rate.epoch.env.math": 0.9744408945686901, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.8693467336683417, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8719504756215923, + "success_rate.epoch.global": 0.915187969924812, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9981904231625836, + "tokens_p.mean_in_band": 0.80234375, + "tokens_rate.above_band": 0.9889867841409692, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.011013215859030838 + }, + { + "epoch": 2.7109075415423947, + "grad_norm": 139.73339175908865, + "learning_rate": 3.395208927125529e-07, + "loss": 0.1715, + "step": 12725, + "success_rate.epoch.env.abd": 0.9864406779661017, + "success_rate.epoch.env.agentgym:alfworld": 0.88, + "success_rate.epoch.env.agentgym:sciworld": 0.9675925925925926, + "success_rate.epoch.env.agentgym:textcraft": 0.9772727272727273, + "success_rate.epoch.env.babyai": 0.972972972972973, + "success_rate.epoch.env.ded": 0.9522292993630573, + "success_rate.epoch.env.logic": 0.9093625498007968, + "success_rate.epoch.env.math": 0.9744544970729111, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.8696993060909792, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8719921783512348, + "success_rate.epoch.global": 0.9153153153153153, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9954819277108434, + "tokens_p.mean_in_band": 0.8149857954545454, + "tokens_rate.above_band": 0.9378531073446328, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.062146892655367235 + }, + { + "epoch": 2.711972731146144, + "grad_norm": 83.42112473422877, + "learning_rate": 3.3949156730728926e-07, + "loss": 0.2264, + "step": 12730, + "success_rate.epoch.env.abd": 0.9865319865319865, + "success_rate.epoch.env.agentgym:alfworld": 0.88, + "success_rate.epoch.env.agentgym:sciworld": 0.967741935483871, + "success_rate.epoch.env.agentgym:textcraft": 0.9772727272727273, + "success_rate.epoch.env.babyai": 0.972972972972973, + "success_rate.epoch.env.ded": 0.9523809523809523, + "success_rate.epoch.env.logic": 0.9093625498007968, + "success_rate.epoch.env.math": 0.9744680851063829, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.8695652173913043, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8720168877884035, + "success_rate.epoch.global": 0.9152923538230885, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.96, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9994783464566929, + "tokens_p.mean_in_band": 0.47098214285714285, + "tokens_rate.above_band": 0.9978001257071024, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0021998742928975488 + }, + { + "epoch": 2.7130379207498936, + "grad_norm": 267.4328429181617, + "learning_rate": 3.3946225684842396e-07, + "loss": 0.2145, + "step": 12735, + "success_rate.epoch.env.abd": 0.9865771812080537, + "success_rate.epoch.env.agentgym:alfworld": 0.88, + "success_rate.epoch.env.agentgym:sciworld": 0.967741935483871, + "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, + "success_rate.epoch.env.babyai": 0.972972972972973, + "success_rate.epoch.env.ded": 0.9523809523809523, + "success_rate.epoch.env.logic": 0.9087301587301587, + "success_rate.epoch.env.math": 0.9745087626128518, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.8696153846153846, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8720176785921333, + "success_rate.epoch.global": 0.9152694610778443, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.logic": 0.75, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9978309768637532, + "tokens_p.mean_in_band": 0.4552083333333333, + "tokens_rate.above_band": 0.9810844892812106, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.018915510718789406 + }, + { + "epoch": 2.714103110353643, + "grad_norm": 105.2856814348994, + "learning_rate": 3.394329613573183e-07, + "loss": 0.2259, + "step": 12740, + "success_rate.epoch.env.abd": 0.9865771812080537, + "success_rate.epoch.env.agentgym:alfworld": 0.88, + "success_rate.epoch.env.agentgym:sciworld": 0.967741935483871, + "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, + "success_rate.epoch.env.babyai": 0.972972972972973, + "success_rate.epoch.env.ded": 0.9525316455696202, + "success_rate.epoch.env.logic": 0.908820614469772, + "success_rate.epoch.env.math": 0.9745627980922098, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.869431643625192, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8720278098119011, + "success_rate.epoch.global": 0.9152466367713005, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9375, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9965277777777778, + "tokens_p.mean_in_band": 0.6986607142857143, + "tokens_rate.above_band": 0.9909326424870466, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.009067357512953367 + }, + { + "epoch": 2.7151682999573925, + "grad_norm": 133.42069168673714, + "learning_rate": 3.3940368085532264e-07, + "loss": 0.287, + "step": 12745, + "success_rate.epoch.env.abd": 0.9865771812080537, + "success_rate.epoch.env.agentgym:alfworld": 0.88, + "success_rate.epoch.env.agentgym:sciworld": 0.967741935483871, + "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, + "success_rate.epoch.env.babyai": 0.972972972972973, + "success_rate.epoch.env.ded": 0.9525316455696202, + "success_rate.epoch.env.logic": 0.907920792079208, + "success_rate.epoch.env.math": 0.9746031746031746, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.8693486590038314, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8719421343118139, + "success_rate.epoch.global": 0.9150746268656716, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env_macro_mean": 0.6111111111111112, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9992264851485149, + "tokens_p.mean_in_band": 0.6047894021739131, + "tokens_rate.above_band": 0.9634340222575517, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03656597774244833 + }, + { + "epoch": 2.7162334895611417, + "grad_norm": 13.830513736295519, + "learning_rate": 3.393744153637764e-07, + "loss": 0.1776, + "step": 12750, + "success_rate.epoch.env.abd": 0.9866666666666667, + "success_rate.epoch.env.agentgym:alfworld": 0.88, + "success_rate.epoch.env.agentgym:sciworld": 0.967741935483871, + "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, + "success_rate.epoch.env.babyai": 0.972972972972973, + "success_rate.epoch.env.ded": 0.9525316455696202, + "success_rate.epoch.env.logic": 0.9071146245059288, + "success_rate.epoch.env.math": 0.9746434231378764, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.8694986605434366, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8718942768535995, + "success_rate.epoch.global": 0.9150521609538003, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9980376766091051, + "tokens_p.mean_in_band": 0.47265625, + "tokens_rate.above_band": 0.9754977029096478, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02450229709035222 + }, + { + "epoch": 2.7172986791648914, + "grad_norm": 148.4499199044527, + "learning_rate": 3.393451649040079e-07, + "loss": 0.2425, + "step": 12755, + "success_rate.epoch.env.abd": 0.9866666666666667, + "success_rate.epoch.env.agentgym:alfworld": 0.8756218905472637, + "success_rate.epoch.env.agentgym:sciworld": 0.967741935483871, + "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, + "success_rate.epoch.env.babyai": 0.972972972972973, + "success_rate.epoch.env.ded": 0.9525316455696202, + "success_rate.epoch.env.logic": 0.9072063178677197, + "success_rate.epoch.env.math": 0.9747235387045814, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.8695984703632887, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8715209595168368, + "success_rate.epoch.global": 0.9150297619047619, + "success_rate.window.env.agentgym:alfworld": 0.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.75, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9978153495440729, + "tokens_p.mean_below_band": 4.94765117764473e-09, + "tokens_p.mean_in_band": 0.65234375, + "tokens_rate.above_band": 0.987987987987988, + "tokens_rate.below_band": 0.003003003003003003, + "tokens_rate.in_band": 0.009009009009009009 + }, + { + "epoch": 2.7183638687686407, + "grad_norm": 122.44707007883592, + "learning_rate": 3.3931592949733487e-07, + "loss": 0.2001, + "step": 12760, + "success_rate.epoch.env.abd": 0.9866666666666667, + "success_rate.epoch.env.agentgym:alfworld": 0.8762376237623762, + "success_rate.epoch.env.agentgym:sciworld": 0.967741935483871, + "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, + "success_rate.epoch.env.babyai": 0.972972972972973, + "success_rate.epoch.env.ded": 0.9525316455696202, + "success_rate.epoch.env.logic": 0.9072978303747534, + "success_rate.epoch.env.math": 0.9747899159663865, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.8697478991596639, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8716048733150484, + "success_rate.epoch.global": 0.9151560178306092, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9978582554517134, + "tokens_p.mean_in_band": 0.7252604166666666, + "tokens_rate.above_band": 0.963963963963964, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.036036036036036036 + }, + { + "epoch": 2.7194290583723904, + "grad_norm": 110.02481614687227, + "learning_rate": 3.3928670916506373e-07, + "loss": 0.2534, + "step": 12765, + "success_rate.epoch.env.abd": 0.9867549668874173, + "success_rate.epoch.env.agentgym:alfworld": 0.8762376237623762, + "success_rate.epoch.env.agentgym:sciworld": 0.967741935483871, + "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, + "success_rate.epoch.env.babyai": 0.972972972972973, + "success_rate.epoch.env.ded": 0.9526813880126183, + "success_rate.epoch.env.logic": 0.9075712881022615, + "success_rate.epoch.env.math": 0.974816369359916, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.8698473282442748, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8716628172122665, + "success_rate.epoch.global": 0.9152818991097923, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9993361928104575, + "tokens_p.mean_in_band": 0.8515625, + "tokens_rate.above_band": 0.99836867862969, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0016313213703099511 + }, + { + "epoch": 2.7204942479761396, + "grad_norm": 93.5136467802139, + "learning_rate": 3.392575039284902e-07, + "loss": 0.2452, + "step": 12770, + "success_rate.epoch.env.abd": 0.9834983498349835, + "success_rate.epoch.env.agentgym:alfworld": 0.8762376237623762, + "success_rate.epoch.env.agentgym:sciworld": 0.967741935483871, + "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, + "success_rate.epoch.env.babyai": 0.972972972972973, + "success_rate.epoch.env.ded": 0.9526813880126183, + "success_rate.epoch.env.logic": 0.9076620825147348, + "success_rate.epoch.env.math": 0.9748427672955975, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.8697638994668697, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8713698305321131, + "success_rate.epoch.global": 0.9151111111111111, + "success_rate.window.env.abd": 0.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env_macro_mean": 0.7083333333333334, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9911830357142857, + "tokens_p.mean_in_band": 0.245605762012012, + "tokens_rate.above_band": 0.14395886889460155, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.8560411311053985 + }, + { + "epoch": 2.7215594375798893, + "grad_norm": 29.63435732336514, + "learning_rate": 3.3922831380889865e-07, + "loss": 0.1235, + "step": 12775, + "success_rate.epoch.env.abd": 0.9834983498349835, + "success_rate.epoch.env.agentgym:alfworld": 0.8762376237623762, + "success_rate.epoch.env.agentgym:sciworld": 0.967741935483871, + "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, + "success_rate.epoch.env.babyai": 0.972972972972973, + "success_rate.epoch.env.ded": 0.9526813880126183, + "success_rate.epoch.env.logic": 0.9069539666993144, + "success_rate.epoch.env.math": 0.974869109947644, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.8700114025085519, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8713303514301413, + "success_rate.epoch.global": 0.9150887573964497, + "success_rate.window.env.logic": 0.6666666666666666, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8888888888888888, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.996309963099631, + "tokens_p.mean_in_band": 0.6375, + "tokens_rate.above_band": 0.9475524475524476, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.05244755244755245 + }, + { + "epoch": 2.722624627183639, + "grad_norm": 584.5256637068097, + "learning_rate": 3.3919913882756275e-07, + "loss": 0.111, + "step": 12780, + "success_rate.epoch.env.abd": 0.9834983498349835, + "success_rate.epoch.env.agentgym:alfworld": 0.8762376237623762, + "success_rate.epoch.env.agentgym:sciworld": 0.967741935483871, + "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, + "success_rate.epoch.env.babyai": 0.972972972972973, + "success_rate.epoch.env.ded": 0.9529780564263323, + "success_rate.epoch.env.logic": 0.9070450097847358, + "success_rate.epoch.env.math": 0.9748953974895398, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.870257966616085, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8713904026254653, + "success_rate.epoch.global": 0.9152141802067947, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9965923172242875, + "tokens_p.mean_in_band": 0.6982421875, + "tokens_rate.above_band": 0.9901840490797545, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0098159509202454 + }, + { + "epoch": 2.723689816787388, + "grad_norm": 244.25106779476602, + "learning_rate": 3.39169979005745e-07, + "loss": 0.4608, + "step": 12785, + "success_rate.epoch.env.abd": 0.9834983498349835, + "success_rate.epoch.env.agentgym:alfworld": 0.8762376237623762, + "success_rate.epoch.env.agentgym:sciworld": 0.9678899082568807, + "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, + "success_rate.epoch.env.babyai": 0.972972972972973, + "success_rate.epoch.env.ded": 0.9529780564263323, + "success_rate.epoch.env.logic": 0.9070450097847358, + "success_rate.epoch.env.math": 0.974934725848564, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.8705526116578349, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8714342159139911, + "success_rate.epoch.global": 0.9153392330383481, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9938271604938271, + "tokens_p.mean_in_band": 0.7083333333333334, + "tokens_rate.above_band": 0.9642857142857143, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03571428571428571 + }, + { + "epoch": 2.7247550063911374, + "grad_norm": 112.75010656510469, + "learning_rate": 3.391408343646969e-07, + "loss": 0.2757, + "step": 12790, + "success_rate.epoch.env.abd": 0.9834983498349835, + "success_rate.epoch.env.agentgym:alfworld": 0.8768472906403941, + "success_rate.epoch.env.agentgym:sciworld": 0.9678899082568807, + "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, + "success_rate.epoch.env.babyai": 0.972972972972973, + "success_rate.epoch.env.ded": 0.9529780564263323, + "success_rate.epoch.env.logic": 0.9070450097847358, + "success_rate.epoch.env.math": 0.9749739311783108, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.8704682779456193, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8714855375954046, + "success_rate.epoch.global": 0.9153166421207658, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env_macro_mean": 0.9444444444444445, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9970518867924528, + "tokens_p.mean_in_band": 0.64453125, + "tokens_rate.above_band": 0.9706959706959707, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.029304029304029304 + }, + { + "epoch": 2.725820195994887, + "grad_norm": 87.0786076861541, + "learning_rate": 3.3911170492565874e-07, + "loss": 0.2456, + "step": 12795, + "success_rate.epoch.env.abd": 0.9834983498349835, + "success_rate.epoch.env.agentgym:alfworld": 0.8780487804878049, + "success_rate.epoch.env.agentgym:sciworld": 0.9678899082568807, + "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, + "success_rate.epoch.env.babyai": 0.972972972972973, + "success_rate.epoch.env.ded": 0.953125, + "success_rate.epoch.env.logic": 0.9070450097847358, + "success_rate.epoch.env.math": 0.9750260145681582, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.8706148623161071, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8716261831573514, + "success_rate.epoch.global": 0.9154411764705882, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9989804241435563, + "tokens_p.mean_in_band": 0.8515625, + "tokens_rate.above_band": 0.9967479674796748, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0032520325203252032 + }, + { + "epoch": 2.726885385598637, + "grad_norm": 771.4129792867897, + "learning_rate": 3.3908259070985995e-07, + "loss": 0.3118, + "step": 12800, + "success_rate.epoch.env.abd": 0.9834983498349835, + "success_rate.epoch.env.agentgym:alfworld": 0.8780487804878049, + "success_rate.epoch.env.agentgym:sciworld": 0.9678899082568807, + "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, + "success_rate.epoch.env.babyai": 0.972972972972973, + "success_rate.epoch.env.ded": 0.953125, + "success_rate.epoch.env.logic": 0.9070450097847358, + "success_rate.epoch.env.math": 0.9750649350649351, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.8709556057185854, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8716606980572835, + "success_rate.epoch.global": 0.9155653450807636, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9923469387755102, + "tokens_p.mean_in_band": 0.8359375, + "tokens_rate.above_band": 0.9865771812080537, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.013422818791946308 + }, + { + "epoch": 2.727950575202386, + "grad_norm": 49.76267568919681, + "learning_rate": 3.390534917385188e-07, + "loss": 0.2392, + "step": 12805, + "success_rate.epoch.env.abd": 0.9834983498349835, + "success_rate.epoch.env.agentgym:alfworld": 0.8780487804878049, + "success_rate.epoch.env.agentgym:sciworld": 0.9678899082568807, + "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, + "success_rate.epoch.env.babyai": 0.972972972972973, + "success_rate.epoch.env.ded": 0.9532710280373832, + "success_rate.epoch.env.logic": 0.9072265625, + "success_rate.epoch.env.math": 0.9750908147379346, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.8711978971085242, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8717148573132459, + "success_rate.epoch.global": 0.9156891495601173, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9981060606060606, + "tokens_p.mean_in_band": 0.8095703125, + "tokens_rate.above_band": 0.9867109634551495, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.013289036544850499 + }, + { + "epoch": 2.7290157648061353, + "grad_norm": 155.5212812410368, + "learning_rate": 3.3902440803284225e-07, + "loss": 0.3043, + "step": 12810, + "success_rate.epoch.env.abd": 0.9834983498349835, + "success_rate.epoch.env.agentgym:alfworld": 0.8786407766990292, + "success_rate.epoch.env.agentgym:sciworld": 0.9680365296803652, + "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, + "success_rate.epoch.env.babyai": 0.972972972972973, + "success_rate.epoch.env.ded": 0.9532710280373832, + "success_rate.epoch.env.logic": 0.9072265625, + "success_rate.epoch.env.math": 0.9751295336787564, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.8714392803598201, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8718074682065938, + "success_rate.epoch.global": 0.9158125915080527, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9963010204081633, + "tokens_p.mean_in_band": 0.8323863636363636, + "tokens_rate.above_band": 0.9780439121756487, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.021956087824351298 + }, + { + "epoch": 2.730080954409885, + "grad_norm": 0.0, + "learning_rate": 3.3899533961402645e-07, + "loss": 0.2091, + "step": 12815, + "success_rate.epoch.env.abd": 0.9834983498349835, + "success_rate.epoch.env.agentgym:alfworld": 0.8786407766990292, + "success_rate.epoch.env.agentgym:sciworld": 0.9680365296803652, + "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, + "success_rate.epoch.env.babyai": 0.9736842105263158, + "success_rate.epoch.env.ded": 0.9532710280373832, + "success_rate.epoch.env.logic": 0.9074074074074074, + "success_rate.epoch.env.math": 0.975168132436627, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.8716317365269461, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8719095716052979, + "success_rate.epoch.global": 0.9159356725146199, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9944029850746269, + "tokens_p.mean_in_band": 0.7353515625, + "tokens_rate.above_band": 0.9710144927536232, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.028985507246376812 + }, + { + "epoch": 2.7311461440136346, + "grad_norm": 21.88860198382124, + "learning_rate": 3.389662865032562e-07, + "loss": 0.1709, + "step": 12820, + "success_rate.epoch.env.abd": 0.9834983498349835, + "success_rate.epoch.env.agentgym:alfworld": 0.8786407766990292, + "success_rate.epoch.env.agentgym:sciworld": 0.9680365296803652, + "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, + "success_rate.epoch.env.babyai": 0.9736842105263158, + "success_rate.epoch.env.ded": 0.9532710280373832, + "success_rate.epoch.env.logic": 0.9074074074074074, + "success_rate.epoch.env.math": 0.9752194114610222, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.8719193427931292, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8719403793589868, + "success_rate.epoch.global": 0.916058394160584, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9941964285714285, + "tokens_p.mean_in_band": 0.8153409090909091, + "tokens_rate.above_band": 0.9271523178807947, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0728476821192053 + }, + { + "epoch": 2.732211333617384, + "grad_norm": 681.6245002894009, + "learning_rate": 3.3893724872170517e-07, + "loss": 0.3349, + "step": 12825, + "success_rate.epoch.env.abd": 0.9834983498349835, + "success_rate.epoch.env.agentgym:alfworld": 0.8786407766990292, + "success_rate.epoch.env.agentgym:sciworld": 0.9681818181818181, + "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, + "success_rate.epoch.env.babyai": 0.9736842105263158, + "success_rate.epoch.env.ded": 0.9532710280373832, + "success_rate.epoch.env.logic": 0.9075875486381323, + "success_rate.epoch.env.math": 0.9752704791344667, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.8716896680343156, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8719537268723332, + "success_rate.epoch.global": 0.9160349854227405, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9967783505154639, + "tokens_p.mean_in_band": 0.5879720052083334, + "tokens_rate.above_band": 0.9938524590163934, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.006147540983606557 + }, + { + "epoch": 2.733276523221133, + "grad_norm": 77.80873807809769, + "learning_rate": 3.389082262905359e-07, + "loss": 0.3259, + "step": 12830, + "success_rate.epoch.env.abd": 0.9835526315789473, + "success_rate.epoch.env.agentgym:alfworld": 0.8792270531400966, + "success_rate.epoch.env.agentgym:sciworld": 0.9681818181818181, + "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, + "success_rate.epoch.env.babyai": 0.9736842105263158, + "success_rate.epoch.env.ded": 0.9532710280373832, + "success_rate.epoch.env.logic": 0.9067055393586005, + "success_rate.epoch.env.math": 0.975295934122491, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.8711839166046166, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8718881134599535, + "success_rate.epoch.global": 0.9157205240174673, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6, + "success_rate.window.env_macro_mean": 0.72, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.997957768187423, + "tokens_p.mean_in_band": 0.5323893229166666, + "tokens_rate.above_band": 0.9574970484061394, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.04250295159386069 + }, + { + "epoch": 2.734341712824883, + "grad_norm": 61.20504537149271, + "learning_rate": 3.3887921923089975e-07, + "loss": 0.3251, + "step": 12835, + "success_rate.epoch.env.abd": 0.9835526315789473, + "success_rate.epoch.env.agentgym:alfworld": 0.8803827751196173, + "success_rate.epoch.env.agentgym:sciworld": 0.9681818181818181, + "success_rate.epoch.env.agentgym:textcraft": 0.9782608695652174, + "success_rate.epoch.env.babyai": 0.9736842105263158, + "success_rate.epoch.env.ded": 0.9532710280373832, + "success_rate.epoch.env.logic": 0.9067961165048544, + "success_rate.epoch.env.math": 0.9753213367609255, + "success_rate.epoch.env.sat": 0.1282051282051282, + "success_rate.epoch.env.science": 0.8713754646840148, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8720650536082306, + "success_rate.epoch.global": 0.9158430232558139, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9996016288951841, + "tokens_p.mean_in_band": 0.6796875, + "tokens_rate.above_band": 0.995768688293371, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.004231311706629055 + }, + { + "epoch": 2.7354069024286325, + "grad_norm": 403.59498306493344, + "learning_rate": 3.388502275639368e-07, + "loss": 0.4661, + "step": 12840, + "success_rate.epoch.env.abd": 0.9835526315789473, + "success_rate.epoch.env.agentgym:alfworld": 0.8803827751196173, + "success_rate.epoch.env.agentgym:sciworld": 0.9681818181818181, + "success_rate.epoch.env.agentgym:textcraft": 0.9782608695652174, + "success_rate.epoch.env.babyai": 0.9736842105263158, + "success_rate.epoch.env.ded": 0.953416149068323, + "success_rate.epoch.env.logic": 0.9067961165048544, + "success_rate.epoch.env.math": 0.9753846153846154, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.8711474192350538, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8717718923355524, + "success_rate.epoch.global": 0.9156748911465893, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.6666666666666666, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9981534090909091, + "tokens_p.mean_in_band": 0.67578125, + "tokens_rate.above_band": 0.9606986899563319, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.039301310043668124 + }, + { + "epoch": 2.7364720920323817, + "grad_norm": 94.65129535578392, + "learning_rate": 3.388212513107761e-07, + "loss": 0.2215, + "step": 12845, + "success_rate.epoch.env.abd": 0.9835526315789473, + "success_rate.epoch.env.agentgym:alfworld": 0.8803827751196173, + "success_rate.epoch.env.agentgym:sciworld": 0.9681818181818181, + "success_rate.epoch.env.agentgym:textcraft": 0.9782608695652174, + "success_rate.epoch.env.babyai": 0.9736842105263158, + "success_rate.epoch.env.ded": 0.953416149068323, + "success_rate.epoch.env.logic": 0.906886517943744, + "success_rate.epoch.env.math": 0.9754350051177073, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.8713862120088954, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8718063999669909, + "success_rate.epoch.global": 0.9157971014492754, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9919554455445545, + "tokens_p.mean_in_band": 0.828125, + "tokens_rate.above_band": 0.9099099099099099, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.09009009009009009 + }, + { + "epoch": 2.737537281636131, + "grad_norm": 19.6105710498484, + "learning_rate": 3.3879229049253523e-07, + "loss": 0.1336, + "step": 12850, + "success_rate.epoch.env.abd": 0.9835526315789473, + "success_rate.epoch.env.agentgym:alfworld": 0.8809523809523809, + "success_rate.epoch.env.agentgym:sciworld": 0.9684684684684685, + "success_rate.epoch.env.agentgym:textcraft": 0.9782608695652174, + "success_rate.epoch.env.babyai": 0.9743589743589743, + "success_rate.epoch.env.ded": 0.953416149068323, + "success_rate.epoch.env.logic": 0.9069767441860465, + "success_rate.epoch.env.math": 0.9754475703324809, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.8715766099185789, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8719722371777938, + "success_rate.epoch.global": 0.9159189580318379, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9973834913112164, + "tokens_p.mean_in_band": 0.61328125, + "tokens_rate.above_band": 0.9968503937007874, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0031496062992125984 + }, + { + "epoch": 2.7386024712398807, + "grad_norm": 139.745001512881, + "learning_rate": 3.3876334513032063e-07, + "loss": 0.2752, + "step": 12855, + "success_rate.epoch.env.abd": 0.9835526315789473, + "success_rate.epoch.env.agentgym:alfworld": 0.8809523809523809, + "success_rate.epoch.env.agentgym:sciworld": 0.9684684684684685, + "success_rate.epoch.env.agentgym:textcraft": 0.9782608695652174, + "success_rate.epoch.env.babyai": 0.9743589743589743, + "success_rate.epoch.env.ded": 0.953416149068323, + "success_rate.epoch.env.logic": 0.9072463768115943, + "success_rate.epoch.env.math": 0.9755102040816327, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.8716715976331361, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8720110784586353, + "success_rate.epoch.global": 0.9160404624277456, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9965277777777778, + "tokens_p.mean_in_band": 0.875, + "tokens_rate.above_band": 0.9926470588235294, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.007352941176470588 + }, + { + "epoch": 2.7396676608436303, + "grad_norm": 89.58953375412416, + "learning_rate": 3.387344152452275e-07, + "loss": 0.2905, + "step": 12860, + "success_rate.epoch.env.abd": 0.9835526315789473, + "success_rate.epoch.env.agentgym:alfworld": 0.8809523809523809, + "success_rate.epoch.env.agentgym:sciworld": 0.9684684684684685, + "success_rate.epoch.env.agentgym:textcraft": 0.9782608695652174, + "success_rate.epoch.env.babyai": 0.9743589743589743, + "success_rate.epoch.env.ded": 0.9535603715170279, + "success_rate.epoch.env.logic": 0.9072463768115943, + "success_rate.epoch.env.math": 0.9750254841997962, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.8712652157875322, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8719431803423867, + "success_rate.epoch.global": 0.9157287157287157, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 0.5, + "success_rate.window.env.science": 0.7142857142857143, + "success_rate.window.env_macro_mean": 0.7380952380952381, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9981203007518797, + "tokens_p.mean_in_band": 0.5301339285714286, + "tokens_rate.above_band": 0.9661016949152542, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03389830508474576 + }, + { + "epoch": 2.7407328504473796, + "grad_norm": 194.36245017305367, + "learning_rate": 3.387055008583397e-07, + "loss": 0.2477, + "step": 12865, + "success_rate.epoch.env.abd": 0.9835526315789473, + "success_rate.epoch.env.agentgym:alfworld": 0.8815165876777251, + "success_rate.epoch.env.agentgym:sciworld": 0.96875, + "success_rate.epoch.env.agentgym:textcraft": 0.9782608695652174, + "success_rate.epoch.env.babyai": 0.9743589743589743, + "success_rate.epoch.env.ded": 0.9535603715170279, + "success_rate.epoch.env.logic": 0.9072463768115943, + "success_rate.epoch.env.math": 0.9750382068262863, + "success_rate.epoch.env.sat": 0.125, + "success_rate.epoch.env.science": 0.8715495031284505, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8720470665445944, + "success_rate.epoch.global": 0.915850144092219, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9985974754558204, + "tokens_p.mean_in_band": 0.7728794642857143, + "tokens_rate.above_band": 0.9902777777777778, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.009722222222222222 + }, + { + "epoch": 2.7417980400511293, + "grad_norm": 123.90996189365873, + "learning_rate": 3.386766019907299e-07, + "loss": 0.1662, + "step": 12870, + "success_rate.epoch.env.abd": 0.9835526315789473, + "success_rate.epoch.env.agentgym:alfworld": 0.8815165876777251, + "success_rate.epoch.env.agentgym:sciworld": 0.96875, + "success_rate.epoch.env.agentgym:textcraft": 0.9782608695652174, + "success_rate.epoch.env.babyai": 0.9743589743589743, + "success_rate.epoch.env.ded": 0.9535603715170279, + "success_rate.epoch.env.logic": 0.9074252651880425, + "success_rate.epoch.env.math": 0.9750889679715302, + "success_rate.epoch.env.sat": 0.12195121951219512, + "success_rate.epoch.env.science": 0.8716911764705882, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8718036613060511, + "success_rate.epoch.global": 0.9158273381294963, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.75, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9996050552922591, + "tokens_p.mean_in_band": 0.6830357142857143, + "tokens_rate.above_band": 0.9783616692426584, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.021638330757341576 + }, + { + "epoch": 2.7428632296548785, + "grad_norm": 291.7152713621318, + "learning_rate": 3.3864771866345927e-07, + "loss": 0.3322, + "step": 12875, + "success_rate.epoch.env.abd": 0.9835526315789473, + "success_rate.epoch.env.agentgym:alfworld": 0.8815165876777251, + "success_rate.epoch.env.agentgym:sciworld": 0.96875, + "success_rate.epoch.env.agentgym:textcraft": 0.9782608695652174, + "success_rate.epoch.env.babyai": 0.9743589743589743, + "success_rate.epoch.env.ded": 0.9535603715170279, + "success_rate.epoch.env.logic": 0.9074252651880425, + "success_rate.epoch.env.math": 0.9751269035532995, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.8719735876742479, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8737333222269945, + "success_rate.epoch.global": 0.915948275862069, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9923427152317881, + "tokens_p.mean_in_band": 0.818359375, + "tokens_rate.above_band": 0.9617834394904459, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03821656050955414 + }, + { + "epoch": 2.743928419258628, + "grad_norm": 69.54202925737356, + "learning_rate": 3.3861885089757785e-07, + "loss": 0.2093, + "step": 12880, + "success_rate.epoch.env.abd": 0.9835526315789473, + "success_rate.epoch.env.agentgym:alfworld": 0.8815165876777251, + "success_rate.epoch.env.agentgym:sciworld": 0.9690265486725663, + "success_rate.epoch.env.agentgym:textcraft": 0.9782608695652174, + "success_rate.epoch.env.babyai": 0.9743589743589743, + "success_rate.epoch.env.ded": 0.9535603715170279, + "success_rate.epoch.env.logic": 0.9075144508670521, + "success_rate.epoch.env.math": 0.9751647237709072, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.8717948717948718, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.873753762107886, + "success_rate.epoch.global": 0.9159253945480631, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9375, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9982755016722408, + "tokens_p.mean_in_band": 0.689453125, + "tokens_rate.above_band": 0.9867986798679867, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.013201320132013201 + }, + { + "epoch": 2.7449936088623774, + "grad_norm": 289.29992965259237, + "learning_rate": 3.385899987141243e-07, + "loss": 0.2662, + "step": 12885, + "success_rate.epoch.env.abd": 0.9835526315789473, + "success_rate.epoch.env.agentgym:alfworld": 0.8820754716981132, + "success_rate.epoch.env.agentgym:sciworld": 0.9690265486725663, + "success_rate.epoch.env.agentgym:textcraft": 0.9782608695652174, + "success_rate.epoch.env.babyai": 0.9743589743589743, + "success_rate.epoch.env.ded": 0.9535603715170279, + "success_rate.epoch.env.logic": 0.9076034648700674, + "success_rate.epoch.env.math": 0.9746963562753036, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.8720292504570384, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8737913902160649, + "success_rate.epoch.global": 0.9159025787965616, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.6666666666666666, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9966814159292036, + "tokens_p.mean_in_band": 0.575, + "tokens_rate.above_band": 0.9890590809628009, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.010940919037199124 + }, + { + "epoch": 2.746058798466127, + "grad_norm": 14.151230101867, + "learning_rate": 3.385611621341258e-07, + "loss": 0.0535, + "step": 12890, + "success_rate.epoch.env.abd": 0.9835526315789473, + "success_rate.epoch.env.agentgym:alfworld": 0.8820754716981132, + "success_rate.epoch.env.agentgym:sciworld": 0.9691629955947136, + "success_rate.epoch.env.agentgym:textcraft": 0.9782608695652174, + "success_rate.epoch.env.babyai": 0.9743589743589743, + "success_rate.epoch.env.ded": 0.9539877300613497, + "success_rate.epoch.env.logic": 0.9077809798270894, + "success_rate.epoch.env.math": 0.9747091552857865, + "success_rate.epoch.env.sat": 0.14285714285714285, + "success_rate.epoch.env.science": 0.8721694667640614, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8738726934652464, + "success_rate.epoch.global": 0.9160228898426324, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9996811224489796, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 2.7471239880698763, + "grad_norm": 26.00191983956801, + "learning_rate": 3.3853234117859824e-07, + "loss": 0.2004, + "step": 12895, + "success_rate.epoch.env.abd": 0.9835526315789473, + "success_rate.epoch.env.agentgym:alfworld": 0.8820754716981132, + "success_rate.epoch.env.agentgym:sciworld": 0.9691629955947136, + "success_rate.epoch.env.agentgym:textcraft": 0.9782608695652174, + "success_rate.epoch.env.babyai": 0.9743589743589743, + "success_rate.epoch.env.ded": 0.9541284403669725, + "success_rate.epoch.env.logic": 0.9069097888675623, + "success_rate.epoch.env.math": 0.9747729566094854, + "success_rate.epoch.env.sat": 0.16279069767441862, + "success_rate.epoch.env.science": 0.871897810218978, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8755995315508817, + "success_rate.epoch.global": 0.9158571428571428, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.7, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9989251592356688, + "tokens_p.mean_in_band": 0.69296875, + "tokens_rate.above_band": 0.9751552795031055, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.024844720496894408 + }, + { + "epoch": 2.748189177673626, + "grad_norm": 60.21817987872554, + "learning_rate": 3.385035358685462e-07, + "loss": 0.1821, + "step": 12900, + "success_rate.epoch.env.abd": 0.9836601307189542, + "success_rate.epoch.env.agentgym:alfworld": 0.8820754716981132, + "success_rate.epoch.env.agentgym:sciworld": 0.9691629955947136, + "success_rate.epoch.env.agentgym:textcraft": 0.9787234042553191, + "success_rate.epoch.env.babyai": 0.9743589743589743, + "success_rate.epoch.env.ded": 0.9541284403669725, + "success_rate.epoch.env.logic": 0.9069990412272292, + "success_rate.epoch.env.math": 0.9747983870967742, + "success_rate.epoch.env.sat": 0.16279069767441862, + "success_rate.epoch.env.science": 0.8720845481049563, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8756787546929765, + "success_rate.epoch.global": 0.9159771754636234, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9957865168539326, + "tokens_p.mean_in_band": 0.8645833333333334, + "tokens_rate.above_band": 0.9834254143646409, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.016574585635359115 + }, + { + "epoch": 2.7492543672773753, + "grad_norm": 109.7369405967248, + "learning_rate": 3.384747462249627e-07, + "loss": 0.2379, + "step": 12905, + "success_rate.epoch.env.abd": 0.9837133550488599, + "success_rate.epoch.env.agentgym:alfworld": 0.8820754716981132, + "success_rate.epoch.env.agentgym:sciworld": 0.9691629955947136, + "success_rate.epoch.env.agentgym:textcraft": 0.9787234042553191, + "success_rate.epoch.env.babyai": 0.9743589743589743, + "success_rate.epoch.env.ded": 0.9541284403669725, + "success_rate.epoch.env.logic": 0.907177033492823, + "success_rate.epoch.env.math": 0.9748237663645518, + "success_rate.epoch.env.sat": 0.16279069767441862, + "success_rate.epoch.env.science": 0.8723172062568206, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8757232323307167, + "success_rate.epoch.global": 0.9160968660968661, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9940476190476191, + "tokens_p.mean_in_band": 0.81640625, + "tokens_rate.above_band": 0.9545454545454546, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.045454545454545456 + }, + { + "epoch": 2.750319556881125, + "grad_norm": 111.7120308445304, + "learning_rate": 3.384459722688295e-07, + "loss": 0.3157, + "step": 12910, + "success_rate.epoch.env.abd": 0.9837662337662337, + "success_rate.epoch.env.agentgym:alfworld": 0.8820754716981132, + "success_rate.epoch.env.agentgym:sciworld": 0.9691629955947136, + "success_rate.epoch.env.agentgym:textcraft": 0.9787234042553191, + "success_rate.epoch.env.babyai": 0.9743589743589743, + "success_rate.epoch.env.ded": 0.9541284403669725, + "success_rate.epoch.env.logic": 0.907177033492823, + "success_rate.epoch.env.math": 0.9748743718592965, + "success_rate.epoch.env.sat": 0.16279069767441862, + "success_rate.epoch.env.science": 0.8725490196078431, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8757537139273659, + "success_rate.epoch.global": 0.9162162162162162, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.995345744680851, + "tokens_p.mean_in_band": 0.5693359375, + "tokens_rate.above_band": 0.9591836734693877, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.04081632653061224 + }, + { + "epoch": 2.751384746484874, + "grad_norm": 106.38458675061695, + "learning_rate": 3.384172140211168e-07, + "loss": 0.1548, + "step": 12915, + "success_rate.epoch.env.abd": 0.9838187702265372, + "success_rate.epoch.env.agentgym:alfworld": 0.8779342723004695, + "success_rate.epoch.env.agentgym:sciworld": 0.9691629955947136, + "success_rate.epoch.env.agentgym:textcraft": 0.9787234042553191, + "success_rate.epoch.env.babyai": 0.9743589743589743, + "success_rate.epoch.env.ded": 0.9541284403669725, + "success_rate.epoch.env.logic": 0.9074427480916031, + "success_rate.epoch.env.math": 0.9748995983935743, + "success_rate.epoch.env.sat": 0.16279069767441862, + "success_rate.epoch.env.science": 0.8723249909321726, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8753881002473701, + "success_rate.epoch.global": 0.9160511363636363, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 0.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.7333333333333333, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9959546925566343, + "tokens_p.mean_in_band": 0.6899038461538461, + "tokens_rate.above_band": 0.9596273291925466, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.040372670807453416 + }, + { + "epoch": 2.752449936088624, + "grad_norm": 963.3624042076193, + "learning_rate": 3.383884715027834e-07, + "loss": 0.1965, + "step": 12920, + "success_rate.epoch.env.abd": 0.9838187702265372, + "success_rate.epoch.env.agentgym:alfworld": 0.8779342723004695, + "success_rate.epoch.env.agentgym:sciworld": 0.9692982456140351, + "success_rate.epoch.env.agentgym:textcraft": 0.9787234042553191, + "success_rate.epoch.env.babyai": 0.9743589743589743, + "success_rate.epoch.env.ded": 0.9541284403669725, + "success_rate.epoch.env.logic": 0.9076190476190477, + "success_rate.epoch.env.math": 0.974937343358396, + "success_rate.epoch.env.sat": 0.16279069767441862, + "success_rate.epoch.env.science": 0.8721477725461789, + "success_rate.epoch.env.webshop": 0.9736842105263158, + "success_rate.epoch.env_macro_mean": 0.8754037435315151, + "success_rate.epoch.global": 0.9160283687943263, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9375, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9962121212121212, + "tokens_p.mean_in_band": 0.5712890625, + "tokens_rate.above_band": 0.9763313609467456, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.023668639053254437 + }, + { + "epoch": 2.753515125692373, + "grad_norm": 41.79778241882147, + "learning_rate": 3.383597447347767e-07, + "loss": 0.1631, + "step": 12925, + "success_rate.epoch.env.abd": 0.9838709677419355, + "success_rate.epoch.env.agentgym:alfworld": 0.8779342723004695, + "success_rate.epoch.env.agentgym:sciworld": 0.9692982456140351, + "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, + "success_rate.epoch.env.babyai": 0.9743589743589743, + "success_rate.epoch.env.ded": 0.9542682926829268, + "success_rate.epoch.env.logic": 0.9078822412155746, + "success_rate.epoch.env.math": 0.9749498997995992, + "success_rate.epoch.env.sat": 0.16279069767441862, + "success_rate.epoch.env.science": 0.8722403184943902, + "success_rate.epoch.env.webshop": 0.9743589743589743, + "success_rate.epoch.env_macro_mean": 0.875556322809815, + "success_rate.epoch.global": 0.9161473087818697, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9987999231950845, + "tokens_p.mean_in_band": 0.640625, + "tokens_rate.above_band": 0.9977011494252873, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0022988505747126436 + }, + { + "epoch": 2.754580315296123, + "grad_norm": 53.378081761554114, + "learning_rate": 3.3833103373803254e-07, + "loss": 0.1884, + "step": 12930, + "success_rate.epoch.env.abd": 0.9838709677419355, + "success_rate.epoch.env.agentgym:alfworld": 0.8785046728971962, + "success_rate.epoch.env.agentgym:sciworld": 0.9692982456140351, + "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, + "success_rate.epoch.env.babyai": 0.9743589743589743, + "success_rate.epoch.env.ded": 0.9544072948328267, + "success_rate.epoch.env.logic": 0.9070208728652751, + "success_rate.epoch.env.math": 0.974974974974975, + "success_rate.epoch.env.sat": 0.16279069767441862, + "success_rate.epoch.env.science": 0.872471098265896, + "success_rate.epoch.env.webshop": 0.9743589743589743, + "success_rate.epoch.env_macro_mean": 0.8755657672955613, + "success_rate.epoch.global": 0.9161244695898161, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9977627020785219, + "tokens_p.mean_in_band": 0.7125538793103449, + "tokens_rate.above_band": 0.9675977653631285, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03240223463687151 + }, + { + "epoch": 2.755645504899872, + "grad_norm": 129.9964445162915, + "learning_rate": 3.3830233853347524e-07, + "loss": 0.3093, + "step": 12935, + "success_rate.epoch.env.abd": 0.9838709677419355, + "success_rate.epoch.env.agentgym:alfworld": 0.8790697674418605, + "success_rate.epoch.env.agentgym:sciworld": 0.9694323144104804, + "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, + "success_rate.epoch.env.babyai": 0.9743589743589743, + "success_rate.epoch.env.ded": 0.9544072948328267, + "success_rate.epoch.env.logic": 0.9071969696969697, + "success_rate.epoch.env.math": 0.9750124937531235, + "success_rate.epoch.env.sat": 0.16279069767441862, + "success_rate.epoch.env.science": 0.8726091663659329, + "success_rate.epoch.env.webshop": 0.9743589743589743, + "success_rate.epoch.env_macro_mean": 0.8756612988456512, + "success_rate.epoch.global": 0.9162429378531074, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9957842612419701, + "tokens_p.mean_in_band": 0.74375, + "tokens_rate.above_band": 0.989406779661017, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.01059322033898305 + }, + { + "epoch": 2.7567106945036217, + "grad_norm": 117.04270256510601, + "learning_rate": 3.382736591420177e-07, + "loss": 0.2535, + "step": 12940, + "success_rate.epoch.env.abd": 0.9839228295819936, + "success_rate.epoch.env.agentgym:alfworld": 0.8790697674418605, + "success_rate.epoch.env.agentgym:sciworld": 0.9695652173913043, + "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, + "success_rate.epoch.env.babyai": 0.9743589743589743, + "success_rate.epoch.env.ded": 0.9544072948328267, + "success_rate.epoch.env.logic": 0.9074598677998111, + "success_rate.epoch.env.math": 0.9745254745254746, + "success_rate.epoch.env.sat": 0.16279069767441862, + "success_rate.epoch.env.science": 0.8727469358327326, + "success_rate.epoch.env.webshop": 0.975, + "success_rate.epoch.env_macro_mean": 0.8757285205550968, + "success_rate.epoch.global": 0.9162200282087447, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 0.8333333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9998007741347905, + "tokens_p.mean_in_band": 0.4586397058823529, + "tokens_rate.above_band": 0.9699646643109541, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.030035335689045935 + }, + { + "epoch": 2.757775884107371, + "grad_norm": 288.49917763886185, + "learning_rate": 3.382449955845613e-07, + "loss": 0.3495, + "step": 12945, + "success_rate.epoch.env.abd": 0.9839228295819936, + "success_rate.epoch.env.agentgym:alfworld": 0.8790697674418605, + "success_rate.epoch.env.agentgym:sciworld": 0.9695652173913043, + "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, + "success_rate.epoch.env.babyai": 0.9743589743589743, + "success_rate.epoch.env.ded": 0.9544072948328267, + "success_rate.epoch.env.logic": 0.9066918001885014, + "success_rate.epoch.env.math": 0.9745508982035929, + "success_rate.epoch.env.sat": 0.16279069767441862, + "success_rate.epoch.env.science": 0.8726618705035971, + "success_rate.epoch.env.webshop": 0.975, + "success_rate.epoch.env_macro_mean": 0.8756532742585216, + "success_rate.epoch.global": 0.916056338028169, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env_macro_mean": 0.7777777777777778, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9992827868852459, + "tokens_p.mean_in_band": 0.5480587121212122, + "tokens_rate.above_band": 0.9486780715396579, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.05132192846034215 + }, + { + "epoch": 2.7588410737111206, + "grad_norm": 73.63117908408128, + "learning_rate": 3.382163478819957e-07, + "loss": 0.1572, + "step": 12950, + "success_rate.epoch.env.abd": 0.9840255591054313, + "success_rate.epoch.env.agentgym:alfworld": 0.8796296296296297, + "success_rate.epoch.env.agentgym:sciworld": 0.9695652173913043, + "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, + "success_rate.epoch.env.babyai": 0.9743589743589743, + "success_rate.epoch.env.ded": 0.9544072948328267, + "success_rate.epoch.env.logic": 0.9066918001885014, + "success_rate.epoch.env.math": 0.9745762711864406, + "success_rate.epoch.env.sat": 0.16279069767441862, + "success_rate.epoch.env.science": 0.8728904847396768, + "success_rate.epoch.env.webshop": 0.975, + "success_rate.epoch.env_macro_mean": 0.8757365996158064, + "success_rate.epoch.global": 0.9161744022503516, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9978380503144654, + "tokens_p.mean_in_band": 0.8190104166666666, + "tokens_rate.above_band": 0.9906542056074766, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.009345794392523364 + }, + { + "epoch": 2.7599062633148703, + "grad_norm": 36.279593757060574, + "learning_rate": 3.3818771605519924e-07, + "loss": 0.1955, + "step": 12955, + "success_rate.epoch.env.abd": 0.9840764331210191, + "success_rate.epoch.env.agentgym:alfworld": 0.8796296296296297, + "success_rate.epoch.env.agentgym:sciworld": 0.9695652173913043, + "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, + "success_rate.epoch.env.babyai": 0.9743589743589743, + "success_rate.epoch.env.ded": 0.9544072948328267, + "success_rate.epoch.env.logic": 0.9067796610169492, + "success_rate.epoch.env.math": 0.9746394828443561, + "success_rate.epoch.env.sat": 0.16279069767441862, + "success_rate.epoch.env.science": 0.8730272596843616, + "success_rate.epoch.env.webshop": 0.975, + "success_rate.epoch.env_macro_mean": 0.8757673924745916, + "success_rate.epoch.global": 0.9162921348314607, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9967648678414097, + "tokens_p.mean_in_band": 0.89453125, + "tokens_rate.above_band": 0.9978021978021978, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.002197802197802198 + }, + { + "epoch": 2.7609714529186196, + "grad_norm": 498.2493770578576, + "learning_rate": 3.381591001250386e-07, + "loss": 0.3686, + "step": 12960, + "success_rate.epoch.env.abd": 0.9840764331210191, + "success_rate.epoch.env.agentgym:alfworld": 0.8796296296296297, + "success_rate.epoch.env.agentgym:sciworld": 0.9696969696969697, + "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, + "success_rate.epoch.env.babyai": 0.9743589743589743, + "success_rate.epoch.env.ded": 0.9544072948328267, + "success_rate.epoch.env.logic": 0.9068673565380997, + "success_rate.epoch.env.math": 0.9746772591857001, + "success_rate.epoch.env.sat": 0.16279069767441862, + "success_rate.epoch.env.science": 0.8728965270318654, + "success_rate.epoch.env.webshop": 0.975, + "success_rate.epoch.env_macro_mean": 0.8757788917032883, + "success_rate.epoch.global": 0.9162692847124825, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9961538461538462, + "tokens_p.mean_in_band": 0.6569010416666666, + "tokens_rate.above_band": 0.9558823529411765, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.04411764705882353 + }, + { + "epoch": 2.762036642522369, + "grad_norm": 129.86885318783732, + "learning_rate": 3.3813050011236886e-07, + "loss": 0.1692, + "step": 12965, + "success_rate.epoch.env.abd": 0.9840764331210191, + "success_rate.epoch.env.agentgym:alfworld": 0.8796296296296297, + "success_rate.epoch.env.agentgym:sciworld": 0.9698275862068966, + "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, + "success_rate.epoch.env.babyai": 0.9743589743589743, + "success_rate.epoch.env.ded": 0.9544072948328267, + "success_rate.epoch.env.logic": 0.9061913696060038, + "success_rate.epoch.env.math": 0.9747023809523809, + "success_rate.epoch.env.sat": 0.16279069767441862, + "success_rate.epoch.env.science": 0.8730782981766178, + "success_rate.epoch.env.webshop": 0.975, + "success_rate.epoch.env_macro_mean": 0.875748121020494, + "success_rate.epoch.global": 0.9162464985994397, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 0.6666666666666666, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9963959854014599, + "tokens_p.mean_in_band": 0.7291666666666666, + "tokens_rate.above_band": 0.9913169319826338, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.008683068017366137 + }, + { + "epoch": 2.7631018321261185, + "grad_norm": 71.0039974069899, + "learning_rate": 3.381019160380334e-07, + "loss": 0.1114, + "step": 12970, + "success_rate.epoch.env.abd": 0.9840764331210191, + "success_rate.epoch.env.agentgym:alfworld": 0.880184331797235, + "success_rate.epoch.env.agentgym:sciworld": 0.9698275862068966, + "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, + "success_rate.epoch.env.babyai": 0.9743589743589743, + "success_rate.epoch.env.ded": 0.9544072948328267, + "success_rate.epoch.env.logic": 0.9062792877225867, + "success_rate.epoch.env.math": 0.9747524752475247, + "success_rate.epoch.env.sat": 0.16279069767441862, + "success_rate.epoch.env.science": 0.8732595501606569, + "success_rate.epoch.env.webshop": 0.975, + "success_rate.epoch.env_macro_mean": 0.8758275725262551, + "success_rate.epoch.global": 0.9163636363636364, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9981897865853658, + "tokens_p.mean_in_band": 0.8203125, + "tokens_rate.above_band": 0.9939393939393939, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.006060606060606061 + }, + { + "epoch": 2.764167021729868, + "grad_norm": 80.35148605776824, + "learning_rate": 3.3807334792286423e-07, + "loss": 0.2656, + "step": 12975, + "success_rate.epoch.env.abd": 0.9840764331210191, + "success_rate.epoch.env.agentgym:alfworld": 0.880184331797235, + "success_rate.epoch.env.agentgym:sciworld": 0.9700854700854701, + "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, + "success_rate.epoch.env.babyai": 0.9743589743589743, + "success_rate.epoch.env.ded": 0.9545454545454546, + "success_rate.epoch.env.logic": 0.9062792877225867, + "success_rate.epoch.env.math": 0.974308300395257, + "success_rate.epoch.env.sat": 0.16279069767441862, + "success_rate.epoch.env.science": 0.87339514978602, + "success_rate.epoch.env.webshop": 0.975, + "success_rate.epoch.env_macro_mean": 0.8758355241957365, + "success_rate.epoch.global": 0.916340782122905, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 0.75, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9375, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9993332147937412, + "tokens_p.mean_in_band": 0.5126953125, + "tokens_rate.above_band": 0.9971631205673759, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0028368794326241137 + }, + { + "epoch": 2.7652322113336174, + "grad_norm": 308.8120839830239, + "learning_rate": 3.380447957876815e-07, + "loss": 0.3048, + "step": 12980, + "success_rate.epoch.env.abd": 0.9840764331210191, + "success_rate.epoch.env.agentgym:alfworld": 0.8807339449541285, + "success_rate.epoch.env.agentgym:sciworld": 0.9700854700854701, + "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, + "success_rate.epoch.env.babyai": 0.9743589743589743, + "success_rate.epoch.env.ded": 0.9548192771084337, + "success_rate.epoch.env.logic": 0.9063670411985019, + "success_rate.epoch.env.math": 0.9743463246176616, + "success_rate.epoch.env.sat": 0.1590909090909091, + "success_rate.epoch.env.science": 0.8734853884533144, + "success_rate.epoch.env.webshop": 0.975, + "success_rate.epoch.env_macro_mean": 0.875593675423189, + "success_rate.epoch.global": 0.9163179916317992, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8333333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9980904817861339, + "tokens_p.mean_in_band": 0.7134046052631579, + "tokens_rate.above_band": 0.9781609195402299, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.021839080459770115 + }, + { + "epoch": 2.7662974009373666, + "grad_norm": 20.06048388118293, + "learning_rate": 3.380162596532939e-07, + "loss": 0.5705, + "step": 12985, + "success_rate.epoch.env.abd": 0.9840764331210191, + "success_rate.epoch.env.agentgym:alfworld": 0.8767123287671232, + "success_rate.epoch.env.agentgym:sciworld": 0.9700854700854701, + "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, + "success_rate.epoch.env.babyai": 0.9743589743589743, + "success_rate.epoch.env.ded": 0.954954954954955, + "success_rate.epoch.env.logic": 0.9064546304957904, + "success_rate.epoch.env.math": 0.9744094488188977, + "success_rate.epoch.env.sat": 0.1590909090909091, + "success_rate.epoch.env.science": 0.8732193732193733, + "success_rate.epoch.env.webshop": 0.975, + "success_rate.epoch.env_macro_mean": 0.8752299263253799, + "success_rate.epoch.global": 0.9161559888579387, + "success_rate.window.env.agentgym:alfworld": 0.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.7, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9992523923444976, + "tokens_p.mean_in_band": 0.4013671875, + "tokens_rate.above_band": 0.9858490566037735, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.014150943396226415 + }, + { + "epoch": 2.7673625905411163, + "grad_norm": 69.499480929879, + "learning_rate": 3.379877395404982e-07, + "loss": 0.2843, + "step": 12990, + "success_rate.epoch.env.abd": 0.9840764331210191, + "success_rate.epoch.env.agentgym:alfworld": 0.8767123287671232, + "success_rate.epoch.env.agentgym:sciworld": 0.9700854700854701, + "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, + "success_rate.epoch.env.babyai": 0.9743589743589743, + "success_rate.epoch.env.ded": 0.954954954954955, + "success_rate.epoch.env.logic": 0.9058713886300093, + "success_rate.epoch.env.math": 0.9744597249508841, + "success_rate.epoch.env.sat": 0.1590909090909091, + "success_rate.epoch.env.science": 0.8733096085409253, + "success_rate.epoch.env.webshop": 0.975, + "success_rate.epoch.env_macro_mean": 0.8751896781060851, + "success_rate.epoch.global": 0.9161335187760778, + "success_rate.window.env.logic": 0.75, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9999484323432343, + "tokens_p.mean_in_band": 0.640625, + "tokens_rate.above_band": 0.9742765273311897, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02572347266881029 + }, + { + "epoch": 2.768427780144866, + "grad_norm": 444.4493964196311, + "learning_rate": 3.3795923547007975e-07, + "loss": 0.3772, + "step": 12995, + "success_rate.epoch.env.abd": 0.9840764331210191, + "success_rate.epoch.env.agentgym:alfworld": 0.8772727272727273, + "success_rate.epoch.env.agentgym:sciworld": 0.9702127659574468, + "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, + "success_rate.epoch.env.babyai": 0.9743589743589743, + "success_rate.epoch.env.ded": 0.9550898203592815, + "success_rate.epoch.env.logic": 0.9059590316573557, + "success_rate.epoch.env.math": 0.9744973025993134, + "success_rate.epoch.env.sat": 0.1590909090909091, + "success_rate.epoch.env.science": 0.8734447209384998, + "success_rate.epoch.env.webshop": 0.975, + "success_rate.epoch.env_macro_mean": 0.8752881229111084, + "success_rate.epoch.global": 0.91625, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9997165532879818, + "tokens_p.mean_in_band": 0.830078125, + "tokens_rate.above_band": 0.9954853273137697, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.004514672686230248 + }, + { + "epoch": 2.7694929697486153, + "grad_norm": 811.1075631010086, + "learning_rate": 3.379307474628121e-07, + "loss": 0.2975, + "step": 13000, + "success_rate.epoch.env.abd": 0.9840764331210191, + "success_rate.epoch.env.agentgym:alfworld": 0.8738738738738738, + "success_rate.epoch.env.agentgym:sciworld": 0.9702127659574468, + "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, + "success_rate.epoch.env.babyai": 0.9743589743589743, + "success_rate.epoch.env.ded": 0.9553571428571429, + "success_rate.epoch.env.logic": 0.9051162790697674, + "success_rate.epoch.env.math": 0.9744973025993134, + "success_rate.epoch.env.sat": 0.1590909090909091, + "success_rate.epoch.env.science": 0.8736692689850958, + "success_rate.epoch.env.webshop": 0.975, + "success_rate.epoch.env_macro_mean": 0.8749472378709281, + "success_rate.epoch.global": 0.9160887656033287, + "success_rate.window.env.agentgym:alfworld": 0.5, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.625, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9999617737003058, + "tokens_p.mean_in_band": 0.7431640625, + "tokens_rate.above_band": 0.9951308581862447, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.004869141813755326 + }, + { + "epoch": 2.7705581593523645, + "grad_norm": 285.1482322404372, + "learning_rate": 3.379022755394571e-07, + "loss": 0.181, + "step": 13005, + "success_rate.epoch.env.abd": 0.9841269841269841, + "success_rate.epoch.env.agentgym:alfworld": 0.8738738738738738, + "success_rate.epoch.env.agentgym:sciworld": 0.9703389830508474, + "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, + "success_rate.epoch.env.babyai": 0.9743589743589743, + "success_rate.epoch.env.ded": 0.9553571428571429, + "success_rate.epoch.env.logic": 0.9053803339517625, + "success_rate.epoch.env.math": 0.9745222929936306, + "success_rate.epoch.env.sat": 0.1590909090909091, + "success_rate.epoch.env.science": 0.8738036157390996, + "success_rate.epoch.env.webshop": 0.975, + "success_rate.epoch.env_macro_mean": 0.8750017978827174, + "success_rate.epoch.global": 0.9162049861495845, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9972718253968254, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 2.771623348956114, + "grad_norm": 37.65000953425945, + "learning_rate": 3.3787381972076493e-07, + "loss": 0.3427, + "step": 13010, + "success_rate.epoch.env.abd": 0.9842271293375394, + "success_rate.epoch.env.agentgym:alfworld": 0.8738738738738738, + "success_rate.epoch.env.agentgym:sciworld": 0.9703389830508474, + "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, + "success_rate.epoch.env.babyai": 0.9743589743589743, + "success_rate.epoch.env.ded": 0.9554896142433235, + "success_rate.epoch.env.logic": 0.9053803339517625, + "success_rate.epoch.env.math": 0.9745472344591287, + "success_rate.epoch.env.sat": 0.1590909090909091, + "success_rate.epoch.env.science": 0.8740268931351733, + "success_rate.epoch.env.webshop": 0.975, + "success_rate.epoch.env_macro_mean": 0.8750455101971091, + "success_rate.epoch.global": 0.9163208852005532, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9956191588785047, + "tokens_p.mean_in_band": 0.830078125, + "tokens_rate.above_band": 0.981651376146789, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.01834862385321101 + }, + { + "epoch": 2.772688538559864, + "grad_norm": 63.40570206532535, + "learning_rate": 3.3784538002747393e-07, + "loss": 0.2462, + "step": 13015, + "success_rate.epoch.env.abd": 0.9842767295597484, + "success_rate.epoch.env.agentgym:alfworld": 0.8738738738738738, + "success_rate.epoch.env.agentgym:sciworld": 0.9703389830508474, + "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, + "success_rate.epoch.env.babyai": 0.9743589743589743, + "success_rate.epoch.env.ded": 0.9554896142433235, + "success_rate.epoch.env.logic": 0.9057301293900185, + "success_rate.epoch.env.math": 0.9745969711773327, + "success_rate.epoch.env.sat": 0.1590909090909091, + "success_rate.epoch.env.science": 0.8740714538379908, + "success_rate.epoch.env.webshop": 0.975, + "success_rate.epoch.env_macro_mean": 0.8750903913863349, + "success_rate.epoch.global": 0.9164364640883977, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9986111111111111, + "tokens_p.mean_in_band": 0.890625, + "tokens_rate.above_band": 0.9926470588235294, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.007352941176470588 + }, + { + "epoch": 2.773753728163613, + "grad_norm": 332.560713721092, + "learning_rate": 3.3781695648031073e-07, + "loss": 0.3043, + "step": 13020, + "success_rate.epoch.env.abd": 0.9842767295597484, + "success_rate.epoch.env.agentgym:alfworld": 0.8738738738738738, + "success_rate.epoch.env.agentgym:sciworld": 0.9703389830508474, + "success_rate.epoch.env.agentgym:textcraft": 0.9795918367346939, + "success_rate.epoch.env.babyai": 0.9743589743589743, + "success_rate.epoch.env.ded": 0.9556213017751479, + "success_rate.epoch.env.logic": 0.9057301293900185, + "success_rate.epoch.env.math": 0.9746465138956607, + "success_rate.epoch.env.sat": 0.1590909090909091, + "success_rate.epoch.env.science": 0.8738961497703992, + "success_rate.epoch.env.webshop": 0.975, + "success_rate.epoch.env_macro_mean": 0.8751295819545702, + "success_rate.epoch.global": 0.9164137931034483, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9375, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9984884332281808, + "tokens_p.mean_in_band": 0.7177734375, + "tokens_rate.above_band": 0.9958115183246073, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.004188481675392671 + }, + { + "epoch": 2.7748189177673623, + "grad_norm": 140.19483916508162, + "learning_rate": 3.377885490999902e-07, + "loss": 0.47, + "step": 13025, + "success_rate.epoch.env.abd": 0.9842767295597484, + "success_rate.epoch.env.agentgym:alfworld": 0.8738738738738738, + "success_rate.epoch.env.agentgym:sciworld": 0.9703389830508474, + "success_rate.epoch.env.agentgym:textcraft": 0.9795918367346939, + "success_rate.epoch.env.babyai": 0.9743589743589743, + "success_rate.epoch.env.ded": 0.9556213017751479, + "success_rate.epoch.env.logic": 0.9057301293900185, + "success_rate.epoch.env.math": 0.9747204666990763, + "success_rate.epoch.env.sat": 0.1590909090909091, + "success_rate.epoch.env.science": 0.8740740740740741, + "success_rate.epoch.env.webshop": 0.975, + "success_rate.epoch.env_macro_mean": 0.8751524798733965, + "success_rate.epoch.global": 0.9165289256198347, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9919181034482759, + "tokens_p.mean_in_band": 0.890625, + "tokens_rate.above_band": 0.9914529914529915, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.008547008547008548 + }, + { + "epoch": 2.775884107371112, + "grad_norm": 131.39135791827394, + "learning_rate": 3.3776015790721555e-07, + "loss": 0.2402, + "step": 13030, + "success_rate.epoch.env.abd": 0.9842767295597484, + "success_rate.epoch.env.agentgym:alfworld": 0.8738738738738738, + "success_rate.epoch.env.agentgym:sciworld": 0.9703389830508474, + "success_rate.epoch.env.agentgym:textcraft": 0.9795918367346939, + "success_rate.epoch.env.babyai": 0.9743589743589743, + "success_rate.epoch.env.ded": 0.9556213017751479, + "success_rate.epoch.env.logic": 0.9058171745152355, + "success_rate.epoch.env.math": 0.9747695293546822, + "success_rate.epoch.env.sat": 0.1590909090909091, + "success_rate.epoch.env.science": 0.8742957746478873, + "success_rate.epoch.env.webshop": 0.975, + "success_rate.epoch.env_macro_mean": 0.8751850079056364, + "success_rate.epoch.global": 0.9166437414030262, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9921875, + "tokens_p.mean_in_band": 0.8658854166666666, + "tokens_rate.above_band": 0.9538461538461539, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.046153846153846156 + }, + { + "epoch": 2.7769492969748617, + "grad_norm": 101.11363122012816, + "learning_rate": 3.37731782922678e-07, + "loss": 0.1789, + "step": 13035, + "success_rate.epoch.env.abd": 0.9842767295597484, + "success_rate.epoch.env.agentgym:alfworld": 0.8738738738738738, + "success_rate.epoch.env.agentgym:sciworld": 0.9703389830508474, + "success_rate.epoch.env.agentgym:textcraft": 0.9795918367346939, + "success_rate.epoch.env.babyai": 0.9743589743589743, + "success_rate.epoch.env.ded": 0.9556213017751479, + "success_rate.epoch.env.logic": 0.9059040590405905, + "success_rate.epoch.env.math": 0.974818401937046, + "success_rate.epoch.env.sat": 0.1590909090909091, + "success_rate.epoch.env.science": 0.8744725738396625, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.8752688544871866, + "success_rate.epoch.global": 0.9167582417582417, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9981553819444444, + "tokens_p.mean_in_band": 0.798828125, + "tokens_rate.above_band": 0.993103448275862, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.006896551724137931 + }, + { + "epoch": 2.778014486578611, + "grad_norm": 292.06019742006487, + "learning_rate": 3.3770342416705697e-07, + "loss": 0.214, + "step": 13040, + "success_rate.epoch.env.abd": 0.9842767295597484, + "success_rate.epoch.env.agentgym:alfworld": 0.8699551569506726, + "success_rate.epoch.env.agentgym:sciworld": 0.9703389830508474, + "success_rate.epoch.env.agentgym:textcraft": 0.9795918367346939, + "success_rate.epoch.env.babyai": 0.9743589743589743, + "success_rate.epoch.env.ded": 0.9556213017751479, + "success_rate.epoch.env.logic": 0.906163753449862, + "success_rate.epoch.env.math": 0.9748670855485742, + "success_rate.epoch.env.sat": 0.1590909090909091, + "success_rate.epoch.env.science": 0.8745607870695713, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.8749486612442329, + "success_rate.epoch.global": 0.9167352537722908, + "success_rate.window.env.agentgym:alfworld": 0.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.75, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9988155976676385, + "tokens_p.mean_in_band": 0.6432291666666666, + "tokens_rate.above_band": 0.9913294797687862, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.008670520231213872 + }, + { + "epoch": 2.7790796761823606, + "grad_norm": 110.62272083868578, + "learning_rate": 3.376750816610202e-07, + "loss": 0.2493, + "step": 13045, + "success_rate.epoch.env.abd": 0.9843260188087775, + "success_rate.epoch.env.agentgym:alfworld": 0.8711111111111111, + "success_rate.epoch.env.agentgym:sciworld": 0.9703389830508474, + "success_rate.epoch.env.agentgym:textcraft": 0.9795918367346939, + "success_rate.epoch.env.babyai": 0.975, + "success_rate.epoch.env.ded": 0.9556213017751479, + "success_rate.epoch.env.logic": 0.90625, + "success_rate.epoch.env.math": 0.9749155812831645, + "success_rate.epoch.env.sat": 0.1590909090909091, + "success_rate.epoch.env.science": 0.8746048472075869, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.8751327586508908, + "success_rate.epoch.global": 0.9168493150684931, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9986013427109974, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 2.78014486578611, + "grad_norm": 418.49247014166167, + "learning_rate": 3.3764675542522355e-07, + "loss": 0.4602, + "step": 13050, + "success_rate.epoch.env.abd": 0.9843260188087775, + "success_rate.epoch.env.agentgym:alfworld": 0.8711111111111111, + "success_rate.epoch.env.agentgym:sciworld": 0.9703389830508474, + "success_rate.epoch.env.agentgym:textcraft": 0.9795918367346939, + "success_rate.epoch.env.babyai": 0.975, + "success_rate.epoch.env.ded": 0.9556213017751479, + "success_rate.epoch.env.logic": 0.9064220183486239, + "success_rate.epoch.env.math": 0.9749638902262879, + "success_rate.epoch.env.sat": 0.1590909090909091, + "success_rate.epoch.env.science": 0.8744300245527885, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.875136895436068, + "success_rate.epoch.global": 0.9168262653898769, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9927591463414634, + "tokens_p.mean_in_band": 0.59765625, + "tokens_rate.above_band": 0.9213483146067416, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.07865168539325842 + }, + { + "epoch": 2.7812100553898595, + "grad_norm": 107.4517307768208, + "learning_rate": 3.376184454803108e-07, + "loss": 0.2007, + "step": 13055, + "success_rate.epoch.env.abd": 0.9844236760124611, + "success_rate.epoch.env.agentgym:alfworld": 0.8711111111111111, + "success_rate.epoch.env.agentgym:sciworld": 0.9703389830508474, + "success_rate.epoch.env.agentgym:textcraft": 0.9795918367346939, + "success_rate.epoch.env.babyai": 0.975, + "success_rate.epoch.env.ded": 0.9557522123893806, + "success_rate.epoch.env.logic": 0.9065077910174152, + "success_rate.epoch.env.math": 0.975, + "success_rate.epoch.env.sat": 0.1590909090909091, + "success_rate.epoch.env.science": 0.8745620182200421, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.8751807539749472, + "success_rate.epoch.global": 0.9169398907103825, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9978966346153846, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 2.782275244993609, + "grad_norm": 50.84041348277853, + "learning_rate": 3.375901518469142e-07, + "loss": 0.2162, + "step": 13060, + "success_rate.epoch.env.abd": 0.9844236760124611, + "success_rate.epoch.env.agentgym:alfworld": 0.8716814159292036, + "success_rate.epoch.env.agentgym:sciworld": 0.9703389830508474, + "success_rate.epoch.env.agentgym:textcraft": 0.9795918367346939, + "success_rate.epoch.env.babyai": 0.975, + "success_rate.epoch.env.ded": 0.9558823529411765, + "success_rate.epoch.env.logic": 0.9065934065934066, + "success_rate.epoch.env.math": 0.9745437079731027, + "success_rate.epoch.env.sat": 0.1590909090909091, + "success_rate.epoch.env.science": 0.8744316194473593, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.8751988785337019, + "success_rate.epoch.global": 0.9167803547066848, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.5, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.86, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9967301324503312, + "tokens_p.mean_in_band": 0.5295973557692307, + "tokens_rate.above_band": 0.9830729166666666, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.016927083333333332 + }, + { + "epoch": 2.7833404345973585, + "grad_norm": 43.417265940975334, + "learning_rate": 3.3756187454565395e-07, + "loss": 0.1949, + "step": 13065, + "success_rate.epoch.env.abd": 0.9844236760124611, + "success_rate.epoch.env.agentgym:alfworld": 0.8716814159292036, + "success_rate.epoch.env.agentgym:sciworld": 0.9704641350210971, + "success_rate.epoch.env.agentgym:textcraft": 0.9795918367346939, + "success_rate.epoch.env.babyai": 0.975, + "success_rate.epoch.env.ded": 0.9558823529411765, + "success_rate.epoch.env.logic": 0.9066788655077768, + "success_rate.epoch.env.math": 0.9745803357314149, + "success_rate.epoch.env.sat": 0.15555555555555556, + "success_rate.epoch.env.science": 0.8746070555361509, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.8749159077333719, + "success_rate.epoch.global": 0.9167574931880109, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9971931137724551, + "tokens_p.mean_in_band": 0.4967830882352941, + "tokens_rate.above_band": 0.907608695652174, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.09239130434782608 + }, + { + "epoch": 2.7844056242011077, + "grad_norm": 53.617423870435076, + "learning_rate": 3.375336135971382e-07, + "loss": 0.2513, + "step": 13070, + "success_rate.epoch.env.abd": 0.984472049689441, + "success_rate.epoch.env.agentgym:alfworld": 0.8722466960352423, + "success_rate.epoch.env.agentgym:sciworld": 0.9704641350210971, + "success_rate.epoch.env.agentgym:textcraft": 0.9795918367346939, + "success_rate.epoch.env.babyai": 0.975, + "success_rate.epoch.env.ded": 0.9558823529411765, + "success_rate.epoch.env.logic": 0.9068493150684932, + "success_rate.epoch.env.math": 0.9746168582375478, + "success_rate.epoch.env.sat": 0.15555555555555556, + "success_rate.epoch.env.science": 0.874738311235171, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.8750024424196344, + "success_rate.epoch.global": 0.9168707482993197, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9967225609756097, + "tokens_p.mean_in_band": 0.8671875, + "tokens_rate.above_band": 0.9951456310679612, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0048543689320388345 + }, + { + "epoch": 2.7854708138048574, + "grad_norm": 101.20777957042894, + "learning_rate": 3.3750536902196345e-07, + "loss": 0.4592, + "step": 13075, + "success_rate.epoch.env.abd": 0.984472049689441, + "success_rate.epoch.env.agentgym:alfworld": 0.8728070175438597, + "success_rate.epoch.env.agentgym:sciworld": 0.9705882352941176, + "success_rate.epoch.env.agentgym:textcraft": 0.9795918367346939, + "success_rate.epoch.env.babyai": 0.975, + "success_rate.epoch.env.ded": 0.9558823529411765, + "success_rate.epoch.env.logic": 0.9070191431175935, + "success_rate.epoch.env.math": 0.9746532759445241, + "success_rate.epoch.env.sat": 0.15555555555555556, + "success_rate.epoch.env.science": 0.8745207389334263, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.8750636328956317, + "success_rate.epoch.global": 0.9168478260869565, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9988386824324325, + "tokens_p.mean_in_band": 0.4890625, + "tokens_rate.above_band": 0.9833887043189369, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.016611295681063124 + }, + { + "epoch": 2.7865360034086066, + "grad_norm": 57.014725508159245, + "learning_rate": 3.374771408407141e-07, + "loss": 0.2191, + "step": 13080, + "success_rate.epoch.env.abd": 0.9845201238390093, + "success_rate.epoch.env.agentgym:alfworld": 0.8733624454148472, + "success_rate.epoch.env.agentgym:sciworld": 0.9707112970711297, + "success_rate.epoch.env.agentgym:textcraft": 0.9803921568627451, + "success_rate.epoch.env.babyai": 0.975, + "success_rate.epoch.env.ded": 0.9558823529411765, + "success_rate.epoch.env.logic": 0.907103825136612, + "success_rate.epoch.env.math": 0.9746895893027698, + "success_rate.epoch.env.sat": 0.15555555555555556, + "success_rate.epoch.env.science": 0.8745644599303136, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.8752174147410655, + "success_rate.epoch.global": 0.9169606512890095, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 1.0, + "tokens_p.mean_in_band": 0.62109375, + "tokens_rate.above_band": 0.9983079526226735, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.001692047377326565 + }, + { + "epoch": 2.7876011930123563, + "grad_norm": 73.00569698653771, + "learning_rate": 3.374489290739626e-07, + "loss": 0.2837, + "step": 13085, + "success_rate.epoch.env.abd": 0.9845201238390093, + "success_rate.epoch.env.agentgym:alfworld": 0.8733624454148472, + "success_rate.epoch.env.agentgym:sciworld": 0.9707112970711297, + "success_rate.epoch.env.agentgym:textcraft": 0.9803921568627451, + "success_rate.epoch.env.babyai": 0.975, + "success_rate.epoch.env.ded": 0.9560117302052786, + "success_rate.epoch.env.logic": 0.9074410163339383, + "success_rate.epoch.env.math": 0.9747137404580153, + "success_rate.epoch.env.sat": 0.15555555555555556, + "success_rate.epoch.env.science": 0.8739993038635573, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.8752106477910577, + "success_rate.epoch.global": 0.9168021680216802, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.3333333333333333, + "success_rate.window.env_macro_mean": 0.8333333333333333, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 1.000561377245509, + "tokens_p.mean_in_band": 0.6723090277777778, + "tokens_rate.above_band": 0.9893364928909952, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.01066350710900474 + }, + { + "epoch": 2.7886663826161056, + "grad_norm": 246.97176089096868, + "learning_rate": 3.3742073374226966e-07, + "loss": 0.2788, + "step": 13090, + "success_rate.epoch.env.abd": 0.9845201238390093, + "success_rate.epoch.env.agentgym:alfworld": 0.8739130434782608, + "success_rate.epoch.env.agentgym:sciworld": 0.9708333333333333, + "success_rate.epoch.env.agentgym:textcraft": 0.9803921568627451, + "success_rate.epoch.env.babyai": 0.975, + "success_rate.epoch.env.ded": 0.9560117302052786, + "success_rate.epoch.env.logic": 0.9057971014492754, + "success_rate.epoch.env.math": 0.9747498808956646, + "success_rate.epoch.env.sat": 0.15555555555555556, + "success_rate.epoch.env.science": 0.8737830319888734, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.8751059739732323, + "success_rate.epoch.global": 0.9165087956698241, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.7333333333333333, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9961476824457594, + "tokens_p.mean_in_band": 0.6041015625, + "tokens_rate.above_band": 0.9806576402321083, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.019342359767891684 + }, + { + "epoch": 2.7897315722198552, + "grad_norm": 58.973753591352065, + "learning_rate": 3.373925548661836e-07, + "loss": 0.2696, + "step": 13095, + "success_rate.epoch.env.abd": 0.9846153846153847, + "success_rate.epoch.env.agentgym:alfworld": 0.8744588744588745, + "success_rate.epoch.env.agentgym:sciworld": 0.970954356846473, + "success_rate.epoch.env.agentgym:textcraft": 0.9803921568627451, + "success_rate.epoch.env.babyai": 0.975, + "success_rate.epoch.env.ded": 0.9560117302052786, + "success_rate.epoch.env.logic": 0.9057971014492754, + "success_rate.epoch.env.math": 0.9747619047619047, + "success_rate.epoch.env.sat": 0.15555555555555556, + "success_rate.epoch.env.science": 0.8736549809094064, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.8751647092511327, + "success_rate.epoch.global": 0.9164864864864865, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.96, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9993722098214286, + "tokens_p.mean_in_band": 0.6015625, + "tokens_rate.above_band": 0.9911504424778761, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.008849557522123894 + }, + { + "epoch": 2.7907967618236045, + "grad_norm": 85.82667345074272, + "learning_rate": 3.3736439246624113e-07, + "loss": 0.1691, + "step": 13100, + "success_rate.epoch.env.abd": 0.9846153846153847, + "success_rate.epoch.env.agentgym:alfworld": 0.875, + "success_rate.epoch.env.agentgym:sciworld": 0.970954356846473, + "success_rate.epoch.env.agentgym:textcraft": 0.9803921568627451, + "success_rate.epoch.env.babyai": 0.975, + "success_rate.epoch.env.ded": 0.956140350877193, + "success_rate.epoch.env.logic": 0.9057971014492754, + "success_rate.epoch.env.math": 0.9747979077508322, + "success_rate.epoch.env.sat": 0.15555555555555556, + "success_rate.epoch.env.science": 0.8738738738738738, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.8752487676298994, + "success_rate.epoch.global": 0.9165991902834008, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9974075112107623, + "tokens_p.mean_in_band": 0.4947916666666667, + "tokens_rate.above_band": 0.9933184855233853, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0066815144766146995 + }, + { + "epoch": 2.791861951427354, + "grad_norm": 0.0, + "learning_rate": 3.3733624656296675e-07, + "loss": 0.1779, + "step": 13105, + "success_rate.epoch.env.abd": 0.9847094801223242, + "success_rate.epoch.env.agentgym:alfworld": 0.875, + "success_rate.epoch.env.agentgym:sciworld": 0.9710743801652892, + "success_rate.epoch.env.agentgym:textcraft": 0.9803921568627451, + "success_rate.epoch.env.babyai": 0.975, + "success_rate.epoch.env.ded": 0.956140350877193, + "success_rate.epoch.env.logic": 0.9059674502712477, + "success_rate.epoch.env.math": 0.9748218527315915, + "success_rate.epoch.env.sat": 0.15555555555555556, + "success_rate.epoch.env.science": 0.8740048459674628, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.8752978026046335, + "success_rate.epoch.global": 0.916711590296496, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9986033519553073, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 2.7929271410311034, + "grad_norm": 148.755769224436, + "learning_rate": 3.3730811717687307e-07, + "loss": 0.2582, + "step": 13110, + "success_rate.epoch.env.abd": 0.9847560975609756, + "success_rate.epoch.env.agentgym:alfworld": 0.875, + "success_rate.epoch.env.agentgym:sciworld": 0.9710743801652892, + "success_rate.epoch.env.agentgym:textcraft": 0.9803921568627451, + "success_rate.epoch.env.babyai": 0.975, + "success_rate.epoch.env.ded": 0.956268221574344, + "success_rate.epoch.env.logic": 0.9059674502712477, + "success_rate.epoch.env.math": 0.9748696064485538, + "success_rate.epoch.env.sat": 0.15555555555555556, + "success_rate.epoch.env.science": 0.8741790528862772, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.8753338434020498, + "success_rate.epoch.global": 0.9168236877523553, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9959177927927928, + "tokens_p.mean_in_band": 0.8236607142857143, + "tokens_rate.above_band": 0.9844789356984479, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.015521064301552107 + }, + { + "epoch": 2.793992330634853, + "grad_norm": 39.723803455467966, + "learning_rate": 3.3728000432846045e-07, + "loss": 0.33, + "step": 13115, + "success_rate.epoch.env.abd": 0.9847560975609756, + "success_rate.epoch.env.agentgym:alfworld": 0.875, + "success_rate.epoch.env.agentgym:sciworld": 0.9710743801652892, + "success_rate.epoch.env.agentgym:textcraft": 0.9803921568627451, + "success_rate.epoch.env.babyai": 0.975, + "success_rate.epoch.env.ded": 0.9563953488372093, + "success_rate.epoch.env.logic": 0.9053201082055906, + "success_rate.epoch.env.math": 0.9749171793658306, + "success_rate.epoch.env.sat": 0.15555555555555556, + "success_rate.epoch.env.science": 0.87426597582038, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.8752987780428305, + "success_rate.epoch.global": 0.9168010752688172, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.6666666666666666, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9987116490891659, + "tokens_p.mean_in_band": 0.571546052631579, + "tokens_rate.above_band": 0.9821092278719398, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.017890772128060263 + }, + { + "epoch": 2.7950575202386023, + "grad_norm": 67.07005303066418, + "learning_rate": 3.3725190803821746e-07, + "loss": 0.2349, + "step": 13120, + "success_rate.epoch.env.abd": 0.9847560975609756, + "success_rate.epoch.env.agentgym:alfworld": 0.875, + "success_rate.epoch.env.agentgym:sciworld": 0.9710743801652892, + "success_rate.epoch.env.agentgym:textcraft": 0.9803921568627451, + "success_rate.epoch.env.babyai": 0.975, + "success_rate.epoch.env.ded": 0.9563953488372093, + "success_rate.epoch.env.logic": 0.9053201082055906, + "success_rate.epoch.env.math": 0.9744922059518186, + "success_rate.epoch.env.sat": 0.15217391304347827, + "success_rate.epoch.env.science": 0.8744827586206897, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.8749724295768506, + "success_rate.epoch.global": 0.9166442953020134, + "success_rate.window.env.math": 0.75, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.5833333333333334, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9935661764705882, + "tokens_p.mean_below_band": 4.731118679046631e-07, + "tokens_p.mean_in_band": 0.6099175347222222, + "tokens_rate.above_band": 0.8774193548387097, + "tokens_rate.below_band": 0.0064516129032258064, + "tokens_rate.in_band": 0.11612903225806452 + }, + { + "epoch": 2.796122709842352, + "grad_norm": 221.11689550649683, + "learning_rate": 3.3722382832662037e-07, + "loss": 0.2318, + "step": 13125, + "success_rate.epoch.env.abd": 0.9847560975609756, + "success_rate.epoch.env.agentgym:alfworld": 0.875, + "success_rate.epoch.env.agentgym:sciworld": 0.9710743801652892, + "success_rate.epoch.env.agentgym:textcraft": 0.9803921568627451, + "success_rate.epoch.env.babyai": 0.975, + "success_rate.epoch.env.ded": 0.9565217391304348, + "success_rate.epoch.env.logic": 0.9045904590459046, + "success_rate.epoch.env.math": 0.9745403111739745, + "success_rate.epoch.env.sat": 0.15217391304347827, + "success_rate.epoch.env.science": 0.874267998622115, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.8749024374274981, + "success_rate.epoch.global": 0.9164879356568365, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.7916666666666666, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9992093373493975, + "tokens_p.mean_in_band": 0.5407608695652174, + "tokens_rate.above_band": 0.9730363423212193, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.026963657678780773 + }, + { + "epoch": 2.7971878994461012, + "grad_norm": 263.2407026877318, + "learning_rate": 3.3719576521413355e-07, + "loss": 0.3875, + "step": 13130, + "success_rate.epoch.env.abd": 0.9848024316109423, + "success_rate.epoch.env.agentgym:alfworld": 0.875, + "success_rate.epoch.env.agentgym:sciworld": 0.9710743801652892, + "success_rate.epoch.env.agentgym:textcraft": 0.9803921568627451, + "success_rate.epoch.env.babyai": 0.975, + "success_rate.epoch.env.ded": 0.9565217391304348, + "success_rate.epoch.env.logic": 0.9046762589928058, + "success_rate.epoch.env.math": 0.9745882352941176, + "success_rate.epoch.env.sat": 0.15217391304347827, + "success_rate.epoch.env.science": 0.8740970072239422, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.8749032616746653, + "success_rate.epoch.global": 0.9164658634538153, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9375, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9927631578947368, + "tokens_p.mean_in_band": 0.64013671875, + "tokens_rate.above_band": 0.9223300970873787, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.07766990291262135 + }, + { + "epoch": 2.798253089049851, + "grad_norm": 111.10386601497021, + "learning_rate": 3.3716771872120914e-07, + "loss": 0.2898, + "step": 13135, + "success_rate.epoch.env.abd": 0.9848484848484849, + "success_rate.epoch.env.agentgym:alfworld": 0.875, + "success_rate.epoch.env.agentgym:sciworld": 0.9710743801652892, + "success_rate.epoch.env.agentgym:textcraft": 0.9803921568627451, + "success_rate.epoch.env.babyai": 0.975, + "success_rate.epoch.env.ded": 0.9565217391304348, + "success_rate.epoch.env.logic": 0.9031390134529148, + "success_rate.epoch.env.math": 0.9746121297602257, + "success_rate.epoch.env.sat": 0.15217391304347827, + "success_rate.epoch.env.science": 0.8742700103057368, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.8747855985151699, + "success_rate.epoch.global": 0.9163101604278074, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 0.3333333333333333, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8333333333333333, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9973282442748092, + "tokens_p.mean_in_band": 0.487640380859375, + "tokens_rate.above_band": 0.9761549925484352, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02384500745156483 + }, + { + "epoch": 2.7993182786536, + "grad_norm": 68.74999086321412, + "learning_rate": 3.3713968886828727e-07, + "loss": 0.2222, + "step": 13140, + "success_rate.epoch.env.abd": 0.9848484848484849, + "success_rate.epoch.env.agentgym:alfworld": 0.875, + "success_rate.epoch.env.agentgym:sciworld": 0.9711934156378601, + "success_rate.epoch.env.agentgym:textcraft": 0.9803921568627451, + "success_rate.epoch.env.babyai": 0.975, + "success_rate.epoch.env.ded": 0.9565217391304348, + "success_rate.epoch.env.logic": 0.9031390134529148, + "success_rate.epoch.env.math": 0.974659784138902, + "success_rate.epoch.env.sat": 0.15217391304347827, + "success_rate.epoch.env.science": 0.8744855967078189, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.8748203509018363, + "success_rate.epoch.global": 0.9164218958611482, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9948863636363636, + "tokens_p.mean_in_band": 0.8388671875, + "tokens_rate.above_band": 0.9763313609467456, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.023668639053254437 + }, + { + "epoch": 2.80038346825735, + "grad_norm": 103.778267862368, + "learning_rate": 3.371116756757959e-07, + "loss": 0.1873, + "step": 13145, + "success_rate.epoch.env.abd": 0.9848484848484849, + "success_rate.epoch.env.agentgym:alfworld": 0.875, + "success_rate.epoch.env.agentgym:sciworld": 0.9713114754098361, + "success_rate.epoch.env.agentgym:textcraft": 0.9803921568627451, + "success_rate.epoch.env.babyai": 0.975, + "success_rate.epoch.env.ded": 0.9565217391304348, + "success_rate.epoch.env.logic": 0.9032258064516129, + "success_rate.epoch.env.math": 0.9747191011235955, + "success_rate.epoch.env.sat": 0.15217391304347827, + "success_rate.epoch.env.science": 0.8746145940390545, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.8748560933642547, + "success_rate.epoch.global": 0.9165333333333333, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9948979591836735, + "tokens_p.mean_in_band": 0.7864583333333334, + "tokens_rate.above_band": 0.9607843137254902, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0392156862745098 + }, + { + "epoch": 2.8014486578610995, + "grad_norm": 124.46891115703816, + "learning_rate": 3.370836791641508e-07, + "loss": 0.3429, + "step": 13150, + "success_rate.epoch.env.abd": 0.9848484848484849, + "success_rate.epoch.env.agentgym:alfworld": 0.875, + "success_rate.epoch.env.agentgym:sciworld": 0.9713114754098361, + "success_rate.epoch.env.agentgym:textcraft": 0.9807692307692307, + "success_rate.epoch.env.babyai": 0.975, + "success_rate.epoch.env.ded": 0.9566473988439307, + "success_rate.epoch.env.logic": 0.9033989266547406, + "success_rate.epoch.env.math": 0.9747309312119794, + "success_rate.epoch.env.sat": 0.15217391304347827, + "success_rate.epoch.env.science": 0.8744870041039672, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.8749070109984736, + "success_rate.epoch.global": 0.9165113182423436, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.96, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9967401079136691, + "tokens_p.mean_in_band": 0.6328125, + "tokens_rate.above_band": 0.9893238434163701, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.010676156583629894 + }, + { + "epoch": 2.8025138474648488, + "grad_norm": 523.9811714252888, + "learning_rate": 3.370556993537556e-07, + "loss": 0.299, + "step": 13155, + "success_rate.epoch.env.abd": 0.9848484848484849, + "success_rate.epoch.env.agentgym:alfworld": 0.8680851063829788, + "success_rate.epoch.env.agentgym:sciworld": 0.9713114754098361, + "success_rate.epoch.env.agentgym:textcraft": 0.9807692307692307, + "success_rate.epoch.env.babyai": 0.975, + "success_rate.epoch.env.ded": 0.9566473988439307, + "success_rate.epoch.env.logic": 0.903485254691689, + "success_rate.epoch.env.math": 0.9747545582047685, + "success_rate.epoch.env.sat": 0.15217391304347827, + "success_rate.epoch.env.science": 0.8743169398907104, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.874272919834788, + "success_rate.epoch.global": 0.9162234042553191, + "success_rate.window.env.agentgym:alfworld": 0.3333333333333333, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.7708333333333333, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9995170015455951, + "tokens_p.mean_in_band": 0.42578125, + "tokens_rate.above_band": 0.9817905918057663, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.018209408194233688 + }, + { + "epoch": 2.803579037068598, + "grad_norm": 157.03777837965075, + "learning_rate": 3.3702773626500187e-07, + "loss": 0.2703, + "step": 13160, + "success_rate.epoch.env.abd": 0.9848484848484849, + "success_rate.epoch.env.agentgym:alfworld": 0.8686440677966102, + "success_rate.epoch.env.agentgym:sciworld": 0.9713114754098361, + "success_rate.epoch.env.agentgym:textcraft": 0.9807692307692307, + "success_rate.epoch.env.babyai": 0.975, + "success_rate.epoch.env.ded": 0.9566473988439307, + "success_rate.epoch.env.logic": 0.9026785714285714, + "success_rate.epoch.env.math": 0.974766355140187, + "success_rate.epoch.env.sat": 0.15217391304347827, + "success_rate.epoch.env.science": 0.8742759795570698, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.8742477484486326, + "success_rate.epoch.global": 0.9160690571049137, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8571428571428571, + "success_rate.window.env_macro_mean": 0.7142857142857143, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 1.0003367456896552, + "tokens_p.mean_in_band": 0.5755208333333334, + "tokens_rate.above_band": 0.9809725158562368, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.019027484143763214 + }, + { + "epoch": 2.8046442266723477, + "grad_norm": 157.95892369019975, + "learning_rate": 3.369997899182689e-07, + "loss": 0.2342, + "step": 13165, + "success_rate.epoch.env.abd": 0.984984984984985, + "success_rate.epoch.env.agentgym:alfworld": 0.8686440677966102, + "success_rate.epoch.env.agentgym:sciworld": 0.9713114754098361, + "success_rate.epoch.env.agentgym:textcraft": 0.9807692307692307, + "success_rate.epoch.env.babyai": 0.975, + "success_rate.epoch.env.ded": 0.9566473988439307, + "success_rate.epoch.env.logic": 0.9028520499108734, + "success_rate.epoch.env.math": 0.9747899159663865, + "success_rate.epoch.env.sat": 0.15217391304347827, + "success_rate.epoch.env.science": 0.8744043567052416, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.8742897408661939, + "success_rate.epoch.global": 0.9161803713527852, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.995697463768116, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 2.8057094162760974, + "grad_norm": 81.05017243267453, + "learning_rate": 3.369718603339237e-07, + "loss": 0.1975, + "step": 13170, + "success_rate.epoch.env.abd": 0.9850299401197605, + "success_rate.epoch.env.agentgym:alfworld": 0.8686440677966102, + "success_rate.epoch.env.agentgym:sciworld": 0.9713114754098361, + "success_rate.epoch.env.agentgym:textcraft": 0.9807692307692307, + "success_rate.epoch.env.babyai": 0.975, + "success_rate.epoch.env.ded": 0.9567723342939481, + "success_rate.epoch.env.logic": 0.9029385574354408, + "success_rate.epoch.env.math": 0.9748251748251748, + "success_rate.epoch.env.sat": 0.15217391304347827, + "success_rate.epoch.env.science": 0.8742352141400408, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.8743008785391893, + "success_rate.epoch.global": 0.916158940397351, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9970024807056229, + "tokens_p.mean_in_band": 0.4124348958333333, + "tokens_rate.above_band": 0.9869423286180631, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.013057671381936888 + }, + { + "epoch": 2.8067746058798466, + "grad_norm": 170.68941697109344, + "learning_rate": 3.369439475323212e-07, + "loss": 0.3088, + "step": 13175, + "success_rate.epoch.env.abd": 0.9850299401197605, + "success_rate.epoch.env.agentgym:alfworld": 0.8686440677966102, + "success_rate.epoch.env.agentgym:sciworld": 0.9714285714285714, + "success_rate.epoch.env.agentgym:textcraft": 0.9807692307692307, + "success_rate.epoch.env.babyai": 0.975, + "success_rate.epoch.env.ded": 0.9540229885057471, + "success_rate.epoch.env.logic": 0.9030249110320284, + "success_rate.epoch.env.math": 0.9748720335039553, + "success_rate.epoch.env.sat": 0.15217391304347827, + "success_rate.epoch.env.science": 0.8740237691001698, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.8740544710361011, + "success_rate.epoch.global": 0.916005291005291, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 0.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.7333333333333333, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.993224358049625, + "tokens_p.mean_in_band": 0.5604903796487604, + "tokens_rate.above_band": 0.8774683544303797, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.12253164556962025 + }, + { + "epoch": 2.807839795483596, + "grad_norm": 97.06331843866808, + "learning_rate": 3.36916051533804e-07, + "loss": 0.2659, + "step": 13180, + "success_rate.epoch.env.abd": 0.9850746268656716, + "success_rate.epoch.env.agentgym:alfworld": 0.869198312236287, + "success_rate.epoch.env.agentgym:sciworld": 0.9714285714285714, + "success_rate.epoch.env.agentgym:textcraft": 0.9807692307692307, + "success_rate.epoch.env.babyai": 0.975, + "success_rate.epoch.env.ded": 0.9540229885057471, + "success_rate.epoch.env.logic": 0.9023957409050577, + "success_rate.epoch.env.math": 0.9748953974895398, + "success_rate.epoch.env.sat": 0.15217391304347827, + "success_rate.epoch.env.science": 0.8741519674355496, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.874065500434245, + "success_rate.epoch.global": 0.9159841479524439, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 0.6666666666666666, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9994340232858991, + "tokens_p.mean_in_band": 0.65625, + "tokens_rate.above_band": 0.9948519948519948, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.005148005148005148 + }, + { + "epoch": 2.8089049850873455, + "grad_norm": 32.002691076004496, + "learning_rate": 3.368881723587027e-07, + "loss": 0.2187, + "step": 13185, + "success_rate.epoch.env.abd": 0.9851632047477745, + "success_rate.epoch.env.agentgym:alfworld": 0.869198312236287, + "success_rate.epoch.env.agentgym:sciworld": 0.9714285714285714, + "success_rate.epoch.env.agentgym:textcraft": 0.9807692307692307, + "success_rate.epoch.env.babyai": 0.975, + "success_rate.epoch.env.ded": 0.9541547277936963, + "success_rate.epoch.env.logic": 0.9025686448184234, + "success_rate.epoch.env.math": 0.9749187180678124, + "success_rate.epoch.env.sat": 0.15217391304347827, + "success_rate.epoch.env.science": 0.8742799051169096, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.8741149985563403, + "success_rate.epoch.global": 0.9160949868073879, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9991554054054054, + "tokens_p.mean_in_band": 0.8671875, + "tokens_rate.above_band": 0.9966329966329966, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.003367003367003367 + }, + { + "epoch": 2.8099701746910952, + "grad_norm": 54.994817616911064, + "learning_rate": 3.368603100273352e-07, + "loss": 0.1712, + "step": 13190, + "success_rate.epoch.env.abd": 0.9851632047477745, + "success_rate.epoch.env.agentgym:alfworld": 0.869198312236287, + "success_rate.epoch.env.agentgym:sciworld": 0.9714285714285714, + "success_rate.epoch.env.agentgym:textcraft": 0.9807692307692307, + "success_rate.epoch.env.babyai": 0.975, + "success_rate.epoch.env.ded": 0.9541547277936963, + "success_rate.epoch.env.logic": 0.9027409372236959, + "success_rate.epoch.env.math": 0.9749768303985171, + "success_rate.epoch.env.sat": 0.15217391304347827, + "success_rate.epoch.env.science": 0.8744075829383886, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.8741475515161092, + "success_rate.epoch.global": 0.916205533596838, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9898255813953488, + "tokens_p.mean_in_band": 0.8515625, + "tokens_rate.above_band": 0.9772727272727273, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.022727272727272728 + }, + { + "epoch": 2.8110353642948445, + "grad_norm": 69.48728142410503, + "learning_rate": 3.368324645600075e-07, + "loss": 0.2032, + "step": 13195, + "success_rate.epoch.env.abd": 0.9851632047477745, + "success_rate.epoch.env.agentgym:alfworld": 0.869198312236287, + "success_rate.epoch.env.agentgym:sciworld": 0.9715447154471545, + "success_rate.epoch.env.agentgym:textcraft": 0.9807692307692307, + "success_rate.epoch.env.babyai": 0.975, + "success_rate.epoch.env.ded": 0.9541547277936963, + "success_rate.epoch.env.logic": 0.9028268551236749, + "success_rate.epoch.env.math": 0.975, + "success_rate.epoch.env.sat": 0.15217391304347827, + "success_rate.epoch.env.science": 0.8746621621621622, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.8741911706746379, + "success_rate.epoch.global": 0.9163157894736842, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9955658783783784, + "tokens_p.mean_in_band": 0.845703125, + "tokens_rate.above_band": 0.9367088607594937, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.06329113924050633 + }, + { + "epoch": 2.8121005538985937, + "grad_norm": 130.95813450468498, + "learning_rate": 3.3680463597701315e-07, + "loss": 0.2187, + "step": 13200, + "success_rate.epoch.env.abd": 0.9851632047477745, + "success_rate.epoch.env.agentgym:alfworld": 0.865546218487395, + "success_rate.epoch.env.agentgym:sciworld": 0.9715447154471545, + "success_rate.epoch.env.agentgym:textcraft": 0.9807692307692307, + "success_rate.epoch.env.babyai": 0.975, + "success_rate.epoch.env.ded": 0.9541547277936963, + "success_rate.epoch.env.logic": 0.9029126213592233, + "success_rate.epoch.env.math": 0.9750462107208873, + "success_rate.epoch.env.sat": 0.15217391304347827, + "success_rate.epoch.env.science": 0.8741565452091767, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.873825194879598, + "success_rate.epoch.global": 0.9160315374507227, + "success_rate.window.env.agentgym:alfworld": 0.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.625, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9975490196078431, + "tokens_p.mean_in_band": 0.6214384191176471, + "tokens_rate.above_band": 0.9375, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0625 + }, + { + "epoch": 2.8131657435023434, + "grad_norm": 145.91243768394122, + "learning_rate": 3.3677682429863337e-07, + "loss": 0.2175, + "step": 13205, + "success_rate.epoch.env.abd": 0.985207100591716, + "success_rate.epoch.env.agentgym:alfworld": 0.865546218487395, + "success_rate.epoch.env.agentgym:sciworld": 0.9715447154471545, + "success_rate.epoch.env.agentgym:textcraft": 0.9807692307692307, + "success_rate.epoch.env.babyai": 0.975, + "success_rate.epoch.env.ded": 0.9541547277936963, + "success_rate.epoch.env.logic": 0.9030837004405287, + "success_rate.epoch.env.math": 0.9750692520775623, + "success_rate.epoch.env.sat": 0.15217391304347827, + "success_rate.epoch.env.science": 0.8743684742337487, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.8738660989983703, + "success_rate.epoch.global": 0.9161417322834645, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9891098484848485, + "tokens_p.mean_in_band": 0.7740885416666666, + "tokens_rate.above_band": 0.9565217391304348, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.043478260869565216 + }, + { + "epoch": 2.814230933106093, + "grad_norm": 183.15006531114483, + "learning_rate": 3.367490295451372e-07, + "loss": 0.3595, + "step": 13210, + "success_rate.epoch.env.abd": 0.9852941176470589, + "success_rate.epoch.env.agentgym:alfworld": 0.865546218487395, + "success_rate.epoch.env.agentgym:sciworld": 0.9715447154471545, + "success_rate.epoch.env.agentgym:textcraft": 0.9807692307692307, + "success_rate.epoch.env.babyai": 0.975, + "success_rate.epoch.env.ded": 0.9541547277936963, + "success_rate.epoch.env.logic": 0.903169014084507, + "success_rate.epoch.env.math": 0.9751152073732718, + "success_rate.epoch.env.sat": 0.15217391304347827, + "success_rate.epoch.env.science": 0.8744952893674294, + "success_rate.epoch.env.webshop": 0.975609756097561, + "success_rate.epoch.env_macro_mean": 0.8738974718282528, + "success_rate.epoch.global": 0.9162516382699869, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9970238095238095, + "tokens_p.mean_in_band": 0.828125, + "tokens_rate.above_band": 0.963302752293578, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03669724770642202 + }, + { + "epoch": 2.8152961227098423, + "grad_norm": 265.81524979643154, + "learning_rate": 3.367212517367812e-07, + "loss": 0.2729, + "step": 13215, + "success_rate.epoch.env.abd": 0.9853372434017595, + "success_rate.epoch.env.agentgym:alfworld": 0.865546218487395, + "success_rate.epoch.env.agentgym:sciworld": 0.97165991902834, + "success_rate.epoch.env.agentgym:textcraft": 0.9807692307692307, + "success_rate.epoch.env.babyai": 0.975, + "success_rate.epoch.env.ded": 0.9542857142857143, + "success_rate.epoch.env.logic": 0.9032541776605101, + "success_rate.epoch.env.math": 0.9751381215469613, + "success_rate.epoch.env.sat": 0.15217391304347827, + "success_rate.epoch.env.science": 0.8742857142857143, + "success_rate.epoch.env.webshop": 0.9761904761904762, + "success_rate.epoch.env_macro_mean": 0.8739673389726892, + "success_rate.epoch.global": 0.9162303664921466, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 0.9523809523809523, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9941052971576227, + "tokens_p.mean_in_band": 0.8032924107142857, + "tokens_rate.above_band": 0.9910371318822023, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.008962868117797696 + }, + { + "epoch": 2.8163613123135915, + "grad_norm": 123.04272163621596, + "learning_rate": 3.3669349089380964e-07, + "loss": 0.3237, + "step": 13220, + "success_rate.epoch.env.abd": 0.9853801169590644, + "success_rate.epoch.env.agentgym:alfworld": 0.865546218487395, + "success_rate.epoch.env.agentgym:sciworld": 0.97165991902834, + "success_rate.epoch.env.agentgym:textcraft": 0.9807692307692307, + "success_rate.epoch.env.babyai": 0.975, + "success_rate.epoch.env.ded": 0.9542857142857143, + "success_rate.epoch.env.logic": 0.9034240561896401, + "success_rate.epoch.env.math": 0.9751495628163829, + "success_rate.epoch.env.sat": 0.1702127659574468, + "success_rate.epoch.env.science": 0.8744545149378986, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.875693298679784, + "success_rate.epoch.global": 0.9163398692810457, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.997976618705036, + "tokens_p.mean_in_band": 0.796875, + "tokens_rate.above_band": 0.9952267303102625, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.00477326968973747 + }, + { + "epoch": 2.8174265019173412, + "grad_norm": 222.77855107590787, + "learning_rate": 3.3666574703645443e-07, + "loss": 0.1651, + "step": 13225, + "success_rate.epoch.env.abd": 0.9854651162790697, + "success_rate.epoch.env.agentgym:alfworld": 0.8666666666666667, + "success_rate.epoch.env.agentgym:sciworld": 0.97165991902834, + "success_rate.epoch.env.agentgym:textcraft": 0.9807692307692307, + "success_rate.epoch.env.babyai": 0.975, + "success_rate.epoch.env.ded": 0.9542857142857143, + "success_rate.epoch.env.logic": 0.9035933391761612, + "success_rate.epoch.env.math": 0.9751609935602575, + "success_rate.epoch.env.sat": 0.1702127659574468, + "success_rate.epoch.env.science": 0.8745808182427901, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.8758307954556535, + "success_rate.epoch.global": 0.9164490861618799, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9970664928292047, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 2.818491691521091, + "grad_norm": 81.78920043921914, + "learning_rate": 3.366380201849351e-07, + "loss": 0.1587, + "step": 13230, + "success_rate.epoch.env.abd": 0.9855072463768116, + "success_rate.epoch.env.agentgym:alfworld": 0.8666666666666667, + "success_rate.epoch.env.agentgym:sciworld": 0.97165991902834, + "success_rate.epoch.env.agentgym:textcraft": 0.9807692307692307, + "success_rate.epoch.env.babyai": 0.975609756097561, + "success_rate.epoch.env.ded": 0.9542857142857143, + "success_rate.epoch.env.logic": 0.9037620297462817, + "success_rate.epoch.env.math": 0.9751952227836472, + "success_rate.epoch.env.sat": 0.1702127659574468, + "success_rate.epoch.env.science": 0.8747068676716918, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.8759199641299914, + "success_rate.epoch.global": 0.9165580182529335, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9980244252873564, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 2.81955688112484, + "grad_norm": 252.80702348321032, + "learning_rate": 3.3661031035945877e-07, + "loss": 0.2815, + "step": 13235, + "success_rate.epoch.env.abd": 0.9855072463768116, + "success_rate.epoch.env.agentgym:alfworld": 0.8666666666666667, + "success_rate.epoch.env.agentgym:sciworld": 0.97165991902834, + "success_rate.epoch.env.agentgym:textcraft": 0.9811320754716981, + "success_rate.epoch.env.babyai": 0.975609756097561, + "success_rate.epoch.env.ded": 0.9542857142857143, + "success_rate.epoch.env.logic": 0.9038461538461539, + "success_rate.epoch.env.math": 0.9752407152682255, + "success_rate.epoch.env.sat": 0.1702127659574468, + "success_rate.epoch.env.science": 0.8748745399799264, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.875979976275005, + "success_rate.epoch.global": 0.9166666666666666, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9965494791666667, + "tokens_p.mean_in_band": 0.640625, + "tokens_rate.above_band": 0.9917355371900827, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.008264462809917356 + }, + { + "epoch": 2.82062207072859, + "grad_norm": 49.47513756336224, + "learning_rate": 3.3658261758022014e-07, + "loss": 0.1779, + "step": 13240, + "success_rate.epoch.env.abd": 0.9855491329479769, + "success_rate.epoch.env.agentgym:alfworld": 0.8666666666666667, + "success_rate.epoch.env.agentgym:sciworld": 0.97165991902834, + "success_rate.epoch.env.agentgym:textcraft": 0.9811320754716981, + "success_rate.epoch.env.babyai": 0.975609756097561, + "success_rate.epoch.env.ded": 0.9542857142857143, + "success_rate.epoch.env.logic": 0.9039301310043668, + "success_rate.epoch.env.math": 0.9752633989922126, + "success_rate.epoch.env.sat": 0.1702127659574468, + "success_rate.epoch.env.science": 0.8751252086811352, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.8760162686526936, + "success_rate.epoch.global": 0.9167750325097529, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9909156976744186, + "tokens_p.mean_in_band": 0.8125, + "tokens_rate.above_band": 0.9608938547486033, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03910614525139665 + }, + { + "epoch": 2.821687260332339, + "grad_norm": 143.94319400678518, + "learning_rate": 3.365549418674016e-07, + "loss": 0.2713, + "step": 13245, + "success_rate.epoch.env.abd": 0.9855491329479769, + "success_rate.epoch.env.agentgym:alfworld": 0.8677685950413223, + "success_rate.epoch.env.agentgym:sciworld": 0.9717741935483871, + "success_rate.epoch.env.agentgym:textcraft": 0.9811320754716981, + "success_rate.epoch.env.babyai": 0.975609756097561, + "success_rate.epoch.env.ded": 0.9542857142857143, + "success_rate.epoch.env.logic": 0.9040139616055847, + "success_rate.epoch.env.math": 0.9753086419753086, + "success_rate.epoch.env.sat": 0.1702127659574468, + "success_rate.epoch.env.science": 0.8752085418752086, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.8761461422593382, + "success_rate.epoch.global": 0.9168831168831169, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9979762801204819, + "tokens_p.mean_in_band": 0.8697916666666666, + "tokens_rate.above_band": 0.9955022488755623, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.004497751124437781 + }, + { + "epoch": 2.8227524499360888, + "grad_norm": 98.5871544151332, + "learning_rate": 3.365272832411729e-07, + "loss": 0.174, + "step": 13250, + "success_rate.epoch.env.abd": 0.9855907780979827, + "success_rate.epoch.env.agentgym:alfworld": 0.8677685950413223, + "success_rate.epoch.env.agentgym:sciworld": 0.9717741935483871, + "success_rate.epoch.env.agentgym:textcraft": 0.9811320754716981, + "success_rate.epoch.env.babyai": 0.975609756097561, + "success_rate.epoch.env.ded": 0.9544159544159544, + "success_rate.epoch.env.logic": 0.9040139616055847, + "success_rate.epoch.env.math": 0.9753537197626655, + "success_rate.epoch.env.sat": 0.1702127659574468, + "success_rate.epoch.env.science": 0.8753748750416528, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.8761809873715241, + "success_rate.epoch.global": 0.9169909208819714, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9981231231231231, + "tokens_p.mean_in_band": 0.279296875, + "tokens_rate.above_band": 0.9970059880239521, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0029940119760479044 + }, + { + "epoch": 2.823817639539838, + "grad_norm": 156.31417488362882, + "learning_rate": 3.364996417216915e-07, + "loss": 0.1741, + "step": 13255, + "success_rate.epoch.env.abd": 0.9855907780979827, + "success_rate.epoch.env.agentgym:alfworld": 0.8677685950413223, + "success_rate.epoch.env.agentgym:sciworld": 0.9717741935483871, + "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, + "success_rate.epoch.env.babyai": 0.975609756097561, + "success_rate.epoch.env.ded": 0.9544159544159544, + "success_rate.epoch.env.logic": 0.9040976460331299, + "success_rate.epoch.env.math": 0.9753761969904241, + "success_rate.epoch.env.sat": 0.1702127659574468, + "success_rate.epoch.env.science": 0.8756235450615231, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.8762450089792477, + "success_rate.epoch.global": 0.917098445595855, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9950911640953717, + "tokens_p.mean_in_band": 0.7578125, + "tokens_rate.above_band": 0.9944211994421199, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.005578800557880056 + }, + { + "epoch": 2.8248828291435877, + "grad_norm": 172.39001359867987, + "learning_rate": 3.364720173291023e-07, + "loss": 0.1901, + "step": 13260, + "success_rate.epoch.env.abd": 0.985632183908046, + "success_rate.epoch.env.agentgym:alfworld": 0.8677685950413223, + "success_rate.epoch.env.agentgym:sciworld": 0.9718875502008032, + "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, + "success_rate.epoch.env.babyai": 0.975609756097561, + "success_rate.epoch.env.ded": 0.9545454545454546, + "success_rate.epoch.env.logic": 0.9043478260869565, + "success_rate.epoch.env.math": 0.9754098360655737, + "success_rate.epoch.env.sat": 0.1702127659574468, + "success_rate.epoch.env.science": 0.8756648936170213, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.8763004117316524, + "success_rate.epoch.global": 0.9172056921086675, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9968477584059776, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 2.825948018747337, + "grad_norm": 73.59427325440687, + "learning_rate": 3.3644441008353785e-07, + "loss": 0.1672, + "step": 13265, + "success_rate.epoch.env.abd": 0.985632183908046, + "success_rate.epoch.env.agentgym:alfworld": 0.8677685950413223, + "success_rate.epoch.env.agentgym:sciworld": 0.9718875502008032, + "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, + "success_rate.epoch.env.babyai": 0.975609756097561, + "success_rate.epoch.env.ded": 0.9546742209631728, + "success_rate.epoch.env.logic": 0.9044309296264118, + "success_rate.epoch.env.math": 0.9754545454545455, + "success_rate.epoch.env.sat": 0.1702127659574468, + "success_rate.epoch.env.science": 0.8758300132802125, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.8763387480052285, + "success_rate.epoch.global": 0.917312661498708, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9988195825049702, + "tokens_p.mean_in_band": 0.7200520833333334, + "tokens_rate.above_band": 0.9940711462450593, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.005928853754940711 + }, + { + "epoch": 2.8270132083510866, + "grad_norm": 70.71758750163254, + "learning_rate": 3.364168200051181e-07, + "loss": 0.1778, + "step": 13270, + "success_rate.epoch.env.abd": 0.985632183908046, + "success_rate.epoch.env.agentgym:alfworld": 0.8683127572016461, + "success_rate.epoch.env.agentgym:sciworld": 0.9718875502008032, + "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, + "success_rate.epoch.env.babyai": 0.975609756097561, + "success_rate.epoch.env.ded": 0.9546742209631728, + "success_rate.epoch.env.logic": 0.9045967042497832, + "success_rate.epoch.env.math": 0.9754879709487063, + "success_rate.epoch.env.sat": 0.1702127659574468, + "success_rate.epoch.env.science": 0.8759946949602122, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.8764212974559428, + "success_rate.epoch.global": 0.9174193548387096, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9979723282442748, + "tokens_p.mean_in_band": 0.7174479166666666, + "tokens_rate.above_band": 0.9886792452830189, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.011320754716981131 + }, + { + "epoch": 2.828078397954836, + "grad_norm": 53.306510520850956, + "learning_rate": 3.3638924711395037e-07, + "loss": 0.2671, + "step": 13275, + "success_rate.epoch.env.abd": 0.9856733524355301, + "success_rate.epoch.env.agentgym:alfworld": 0.8688524590163934, + "success_rate.epoch.env.agentgym:sciworld": 0.9718875502008032, + "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, + "success_rate.epoch.env.babyai": 0.975609756097561, + "success_rate.epoch.env.ded": 0.9546742209631728, + "success_rate.epoch.env.logic": 0.9045967042497832, + "success_rate.epoch.env.math": 0.9755213055303718, + "success_rate.epoch.env.sat": 0.1702127659574468, + "success_rate.epoch.env.science": 0.8761999337967561, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.8764957923432557, + "success_rate.epoch.global": 0.9175257731958762, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9987359550561797, + "tokens_p.mean_in_band": 0.78125, + "tokens_rate.above_band": 0.9933035714285714, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.006696428571428571 + }, + { + "epoch": 2.8291435875585855, + "grad_norm": 158.25389435637865, + "learning_rate": 3.363616914301297e-07, + "loss": 0.4428, + "step": 13280, + "success_rate.epoch.env.abd": 0.9856733524355301, + "success_rate.epoch.env.agentgym:alfworld": 0.8688524590163934, + "success_rate.epoch.env.agentgym:sciworld": 0.9718875502008032, + "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, + "success_rate.epoch.env.babyai": 0.975609756097561, + "success_rate.epoch.env.ded": 0.9548022598870056, + "success_rate.epoch.env.logic": 0.9045967042497832, + "success_rate.epoch.env.math": 0.9755434782608695, + "success_rate.epoch.env.sat": 0.1702127659574468, + "success_rate.epoch.env.science": 0.8764861294583883, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.8765354657356159, + "success_rate.epoch.global": 0.9176319176319176, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.993455497382199, + "tokens_p.mean_in_band": 0.7247242647058824, + "tokens_rate.above_band": 0.9182692307692307, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.08173076923076923 + }, + { + "epoch": 2.8302087771623348, + "grad_norm": 95.88155796255984, + "learning_rate": 3.3633415297373846e-07, + "loss": 0.2301, + "step": 13285, + "success_rate.epoch.env.abd": 0.9856733524355301, + "success_rate.epoch.env.agentgym:alfworld": 0.8688524590163934, + "success_rate.epoch.env.agentgym:sciworld": 0.9718875502008032, + "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, + "success_rate.epoch.env.babyai": 0.975609756097561, + "success_rate.epoch.env.ded": 0.9549295774647887, + "success_rate.epoch.env.logic": 0.9046793760831889, + "success_rate.epoch.env.math": 0.9751356238698011, + "success_rate.epoch.env.sat": 0.1702127659574468, + "success_rate.epoch.env.science": 0.8763192612137203, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.8765023081697477, + "success_rate.epoch.global": 0.9174807197943444, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.75, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9969827586206896, + "tokens_p.mean_in_band": 0.6126302083333334, + "tokens_rate.above_band": 0.9797297297297297, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02027027027027027 + }, + { + "epoch": 2.8312739667660844, + "grad_norm": 52.86354257532786, + "learning_rate": 3.363066317648465e-07, + "loss": 0.2282, + "step": 13290, + "success_rate.epoch.env.abd": 0.9856733524355301, + "success_rate.epoch.env.agentgym:alfworld": 0.8693877551020408, + "success_rate.epoch.env.agentgym:sciworld": 0.9718875502008032, + "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, + "success_rate.epoch.env.babyai": 0.975609756097561, + "success_rate.epoch.env.ded": 0.9549295774647887, + "success_rate.epoch.env.logic": 0.9047619047619048, + "success_rate.epoch.env.math": 0.9751693002257337, + "success_rate.epoch.env.sat": 0.1702127659574468, + "success_rate.epoch.env.science": 0.8761936121172209, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.8765501128991839, + "success_rate.epoch.global": 0.9174582798459564, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9953758949880668, + "tokens_p.mean_in_band": 0.72578125, + "tokens_rate.above_band": 0.9766899766899767, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.023310023310023312 + }, + { + "epoch": 2.8323391563698337, + "grad_norm": 54.79103491602769, + "learning_rate": 3.3627912782351095e-07, + "loss": 0.1924, + "step": 13295, + "success_rate.epoch.env.abd": 0.9856733524355301, + "success_rate.epoch.env.agentgym:alfworld": 0.8693877551020408, + "success_rate.epoch.env.agentgym:sciworld": 0.9718875502008032, + "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, + "success_rate.epoch.env.babyai": 0.975609756097561, + "success_rate.epoch.env.ded": 0.9549295774647887, + "success_rate.epoch.env.logic": 0.9049265341400173, + "success_rate.epoch.env.math": 0.975214060387562, + "success_rate.epoch.env.sat": 0.1702127659574468, + "success_rate.epoch.env.science": 0.8760276224926011, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.8765540583460312, + "success_rate.epoch.global": 0.9174358974358975, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9928728070175439, + "tokens_p.mean_in_band": 0.5283203125, + "tokens_rate.above_band": 0.9661016949152542, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03389830508474576 + }, + { + "epoch": 2.8334043459735834, + "grad_norm": 124.81086349279724, + "learning_rate": 3.362516411697766e-07, + "loss": 0.2107, + "step": 13300, + "success_rate.epoch.env.abd": 0.9856733524355301, + "success_rate.epoch.env.agentgym:alfworld": 0.8693877551020408, + "success_rate.epoch.env.agentgym:sciworld": 0.9718875502008032, + "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, + "success_rate.epoch.env.babyai": 0.975609756097561, + "success_rate.epoch.env.ded": 0.9550561797752809, + "success_rate.epoch.env.logic": 0.905090595340811, + "success_rate.epoch.env.math": 0.9748088169140801, + "success_rate.epoch.env.sat": 0.1702127659574468, + "success_rate.epoch.env.science": 0.876149802890933, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.8765547492947707, + "success_rate.epoch.global": 0.9174135723431498, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.75, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9375, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 1.0002172653534183, + "tokens_p.mean_below_band": 5.029141902923584e-07, + "tokens_p.mean_in_band": 0.86328125, + "tokens_rate.above_band": 0.9953863898500577, + "tokens_rate.below_band": 0.0011534025374855825, + "tokens_rate.in_band": 0.0034602076124567475 + }, + { + "epoch": 2.8344695355773326, + "grad_norm": 89.37378216017451, + "learning_rate": 3.3622417182367556e-07, + "loss": 0.247, + "step": 13305, + "success_rate.epoch.env.abd": 0.9856733524355301, + "success_rate.epoch.env.agentgym:alfworld": 0.8693877551020408, + "success_rate.epoch.env.agentgym:sciworld": 0.9718875502008032, + "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, + "success_rate.epoch.env.babyai": 0.975609756097561, + "success_rate.epoch.env.ded": 0.9550561797752809, + "success_rate.epoch.env.logic": 0.9044750430292599, + "success_rate.epoch.env.math": 0.9748540637629097, + "success_rate.epoch.env.sat": 0.1702127659574468, + "success_rate.epoch.env.science": 0.8759435510338037, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.8764841531747845, + "success_rate.epoch.global": 0.9172634271099744, + "success_rate.window.env.logic": 0.6666666666666666, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.7777777777777777, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9954323444283647, + "tokens_p.mean_in_band": 0.6072048611111112, + "tokens_rate.above_band": 0.9504814305364512, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.04951856946354883 + }, + { + "epoch": 2.8355347251810823, + "grad_norm": 121.04398174470911, + "learning_rate": 3.3619671980522723e-07, + "loss": 0.1841, + "step": 13310, + "success_rate.epoch.env.abd": 0.9857142857142858, + "success_rate.epoch.env.agentgym:alfworld": 0.8693877551020408, + "success_rate.epoch.env.agentgym:sciworld": 0.9718875502008032, + "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, + "success_rate.epoch.env.babyai": 0.975609756097561, + "success_rate.epoch.env.ded": 0.9551820728291317, + "success_rate.epoch.env.logic": 0.9037800687285223, + "success_rate.epoch.env.math": 0.9748878923766816, + "success_rate.epoch.env.sat": 0.1702127659574468, + "success_rate.epoch.env.science": 0.8760655737704918, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.8764503080277236, + "success_rate.epoch.global": 0.9172413793103448, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9998471882640587, + "tokens_p.mean_in_band": 0.69375, + "tokens_rate.above_band": 0.9939246658566221, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.006075334143377886 + }, + { + "epoch": 2.8365999147848315, + "grad_norm": 382.8600877425378, + "learning_rate": 3.361692851344384e-07, + "loss": 0.2076, + "step": 13315, + "success_rate.epoch.env.abd": 0.9857142857142858, + "success_rate.epoch.env.agentgym:alfworld": 0.8693877551020408, + "success_rate.epoch.env.agentgym:sciworld": 0.972, + "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, + "success_rate.epoch.env.babyai": 0.975609756097561, + "success_rate.epoch.env.ded": 0.9553072625698324, + "success_rate.epoch.env.logic": 0.903862660944206, + "success_rate.epoch.env.math": 0.974910394265233, + "success_rate.epoch.env.sat": 0.1702127659574468, + "success_rate.epoch.env.science": 0.8762684124386252, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.8764999055106565, + "success_rate.epoch.global": 0.9173469387755102, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9964519650655022, + "tokens_p.mean_in_band": 0.76953125, + "tokens_rate.above_band": 0.9870689655172413, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.01293103448275862 + }, + { + "epoch": 2.837665104388581, + "grad_norm": 29.056420800762854, + "learning_rate": 3.361418678313033e-07, + "loss": 0.3414, + "step": 13320, + "success_rate.epoch.env.abd": 0.9857549857549858, + "success_rate.epoch.env.agentgym:alfworld": 0.8704453441295547, + "success_rate.epoch.env.agentgym:sciworld": 0.9721115537848606, + "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, + "success_rate.epoch.env.babyai": 0.9761904761904762, + "success_rate.epoch.env.ded": 0.9553072625698324, + "success_rate.epoch.env.logic": 0.903862660944206, + "success_rate.epoch.env.math": 0.9749440715883669, + "success_rate.epoch.env.sat": 0.1702127659574468, + "success_rate.epoch.env.science": 0.8763493621197253, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.8766731045970407, + "success_rate.epoch.global": 0.9174522292993631, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.998320895522388, + "tokens_p.mean_in_band": 0.75, + "tokens_rate.above_band": 0.9985096870342772, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0014903129657228018 + }, + { + "epoch": 2.838730293992331, + "grad_norm": 54.592933557964905, + "learning_rate": 3.361144679158035e-07, + "loss": 0.1603, + "step": 13325, + "success_rate.epoch.env.abd": 0.9857549857549858, + "success_rate.epoch.env.agentgym:alfworld": 0.8704453441295547, + "success_rate.epoch.env.agentgym:sciworld": 0.9722222222222222, + "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, + "success_rate.epoch.env.babyai": 0.9761904761904762, + "success_rate.epoch.env.ded": 0.9553072625698324, + "success_rate.epoch.env.logic": 0.9040274207369323, + "success_rate.epoch.env.math": 0.9749664729548503, + "success_rate.epoch.env.sat": 0.1702127659574468, + "success_rate.epoch.env.science": 0.8765512736773351, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.8767185356110571, + "success_rate.epoch.global": 0.917557251908397, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9984504132231405, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 2.83979548359608, + "grad_norm": 230.25757852382318, + "learning_rate": 3.3608708540790793e-07, + "loss": 0.628, + "step": 13330, + "success_rate.epoch.env.abd": 0.9857549857549858, + "success_rate.epoch.env.agentgym:alfworld": 0.8669354838709677, + "success_rate.epoch.env.agentgym:sciworld": 0.9722222222222222, + "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, + "success_rate.epoch.env.babyai": 0.9761904761904762, + "success_rate.epoch.env.ded": 0.9554317548746518, + "success_rate.epoch.env.logic": 0.9032534246575342, + "success_rate.epoch.env.math": 0.9749776586237712, + "success_rate.epoch.env.sat": 0.1702127659574468, + "success_rate.epoch.env.science": 0.8767926988265972, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.876363376227877, + "success_rate.epoch.global": 0.9174078780177891, + "success_rate.window.env.agentgym:alfworld": 0.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.6, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9980432780847146, + "tokens_p.mean_in_band": 0.6573893229166666, + "tokens_rate.above_band": 0.9576719576719577, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.042328042328042326 + }, + { + "epoch": 2.8408606731998294, + "grad_norm": 68.79339207379915, + "learning_rate": 3.3605972032757257e-07, + "loss": 0.1025, + "step": 13335, + "success_rate.epoch.env.abd": 0.9857549857549858, + "success_rate.epoch.env.agentgym:alfworld": 0.8674698795180723, + "success_rate.epoch.env.agentgym:sciworld": 0.9722222222222222, + "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, + "success_rate.epoch.env.babyai": 0.9761904761904762, + "success_rate.epoch.env.ded": 0.9555555555555556, + "success_rate.epoch.env.logic": 0.9033361847733106, + "success_rate.epoch.env.math": 0.9750223015165032, + "success_rate.epoch.env.sat": 0.1702127659574468, + "success_rate.epoch.env.science": 0.8769130576359492, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.8764457360593195, + "success_rate.epoch.global": 0.9175126903553299, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 1.0006009615384615, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 2.841925862803579, + "grad_norm": 0.0, + "learning_rate": 3.3603237269474103e-07, + "loss": 0.3074, + "step": 13340, + "success_rate.epoch.env.abd": 0.9857549857549858, + "success_rate.epoch.env.agentgym:alfworld": 0.868, + "success_rate.epoch.env.agentgym:sciworld": 0.9722222222222222, + "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, + "success_rate.epoch.env.babyai": 0.9761904761904762, + "success_rate.epoch.env.ded": 0.9555555555555556, + "success_rate.epoch.env.logic": 0.9035012809564474, + "success_rate.epoch.env.math": 0.9750334373606777, + "success_rate.epoch.env.sat": 0.1702127659574468, + "success_rate.epoch.env.science": 0.8765030874228145, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.87647267990442, + "success_rate.epoch.global": 0.9173637515842838, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9971046325878594, + "tokens_p.mean_in_band": 0.6453683035714286, + "tokens_rate.above_band": 0.9178885630498533, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.08211143695014662 + }, + { + "epoch": 2.8429910524073287, + "grad_norm": 74.57926843853564, + "learning_rate": 3.3600504252934405e-07, + "loss": 0.1891, + "step": 13345, + "success_rate.epoch.env.abd": 0.9857549857549858, + "success_rate.epoch.env.agentgym:alfworld": 0.8690476190476191, + "success_rate.epoch.env.agentgym:sciworld": 0.9722222222222222, + "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, + "success_rate.epoch.env.babyai": 0.9761904761904762, + "success_rate.epoch.env.ded": 0.9555555555555556, + "success_rate.epoch.env.logic": 0.9027303754266212, + "success_rate.epoch.env.math": 0.97506678539626, + "success_rate.epoch.env.sat": 0.1702127659574468, + "success_rate.epoch.env.science": 0.8763388510223953, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.8764859367365069, + "success_rate.epoch.global": 0.9172151898734178, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.6875, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9994127101879328, + "tokens_p.mean_below_band": 6.891787052154541e-07, + "tokens_p.mean_in_band": 0.5518092105263158, + "tokens_rate.above_band": 0.9806013579049466, + "tokens_rate.below_band": 0.0009699321047526673, + "tokens_rate.in_band": 0.01842870999030068 + }, + { + "epoch": 2.844056242011078, + "grad_norm": 1.1001882353835348, + "learning_rate": 3.359777298512996e-07, + "loss": 0.2088, + "step": 13350, + "success_rate.epoch.env.abd": 0.9857549857549858, + "success_rate.epoch.env.agentgym:alfworld": 0.8656126482213439, + "success_rate.epoch.env.agentgym:sciworld": 0.9722222222222222, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9761904761904762, + "success_rate.epoch.env.ded": 0.9556786703601108, + "success_rate.epoch.env.logic": 0.9028132992327366, + "success_rate.epoch.env.math": 0.9750778816199377, + "success_rate.epoch.env.sat": 0.1702127659574468, + "success_rate.epoch.env.science": 0.8762151652624757, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.8762127711533118, + "success_rate.epoch.global": 0.9170670037926675, + "success_rate.window.env.agentgym:alfworld": 0.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.7999999999999999, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9990633514986376, + "tokens_p.mean_below_band": 6.007030606269836e-08, + "tokens_p.mean_in_band": 0.7, + "tokens_rate.above_band": 0.9839142091152815, + "tokens_rate.below_band": 0.002680965147453083, + "tokens_rate.in_band": 0.013404825737265416 + }, + { + "epoch": 2.8451214316148272, + "grad_norm": 606.8623382270667, + "learning_rate": 3.35950434680513e-07, + "loss": 0.201, + "step": 13355, + "success_rate.epoch.env.abd": 0.9857954545454546, + "success_rate.epoch.env.agentgym:alfworld": 0.8656126482213439, + "success_rate.epoch.env.agentgym:sciworld": 0.9722222222222222, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9761904761904762, + "success_rate.epoch.env.ded": 0.9556786703601108, + "success_rate.epoch.env.logic": 0.9028132992327366, + "success_rate.epoch.env.math": 0.9751221679253665, + "success_rate.epoch.env.sat": 0.1702127659574468, + "success_rate.epoch.env.science": 0.8764153995470721, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.8762386792788113, + "success_rate.epoch.global": 0.9171717171717172, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9938668224299065, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 2.846186621218577, + "grad_norm": 84.20516969002915, + "learning_rate": 3.359231570368768e-07, + "loss": 0.1273, + "step": 13360, + "success_rate.epoch.env.abd": 0.9857954545454546, + "success_rate.epoch.env.agentgym:alfworld": 0.8656126482213439, + "success_rate.epoch.env.agentgym:sciworld": 0.9722222222222222, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9767441860465116, + "success_rate.epoch.env.ded": 0.9556786703601108, + "success_rate.epoch.env.logic": 0.9029787234042553, + "success_rate.epoch.env.math": 0.9751552795031055, + "success_rate.epoch.env.sat": 0.1702127659574468, + "success_rate.epoch.env.science": 0.8765751211631664, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.8763215853898465, + "success_rate.epoch.global": 0.9172761664564943, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9975079744816587, + "tokens_p.mean_in_band": 0.84375, + "tokens_rate.above_band": 0.9874015748031496, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.012598425196850394 + }, + { + "epoch": 2.8472518108223266, + "grad_norm": 62.607543112608894, + "learning_rate": 3.3589589694027076e-07, + "loss": 0.1702, + "step": 13365, + "success_rate.epoch.env.abd": 0.9858356940509915, + "success_rate.epoch.env.agentgym:alfworld": 0.8656126482213439, + "success_rate.epoch.env.agentgym:sciworld": 0.9722222222222222, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9767441860465116, + "success_rate.epoch.env.ded": 0.9558011049723757, + "success_rate.epoch.env.logic": 0.903143585386576, + "success_rate.epoch.env.math": 0.974756421612046, + "success_rate.epoch.env.sat": 0.1702127659574468, + "success_rate.epoch.env.science": 0.8766548272521796, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.8763223475987623, + "success_rate.epoch.global": 0.9172544080604534, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.75, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9981132075471698, + "tokens_p.mean_below_band": 1.318767317570746e-10, + "tokens_rate.above_band": 0.9962406015037594, + "tokens_rate.below_band": 0.0037593984962406013, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 2.848317000426076, + "grad_norm": 68.99032333810347, + "learning_rate": 3.358686544105618e-07, + "loss": 0.349, + "step": 13370, + "success_rate.epoch.env.abd": 0.9858757062146892, + "success_rate.epoch.env.agentgym:alfworld": 0.8661417322834646, + "success_rate.epoch.env.agentgym:sciworld": 0.9722222222222222, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9767441860465116, + "success_rate.epoch.env.ded": 0.9559228650137741, + "success_rate.epoch.env.logic": 0.902376910016978, + "success_rate.epoch.env.math": 0.9747675962815405, + "success_rate.epoch.env.sat": 0.1702127659574468, + "success_rate.epoch.env.science": 0.8762088974854932, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.8762759317624376, + "success_rate.epoch.global": 0.9169811320754717, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6, + "success_rate.window.env_macro_mean": 0.7666666666666666, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9994916523972602, + "tokens_p.mean_in_band": 0.6077745225694444, + "tokens_rate.above_band": 0.984822934232715, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.01517706576728499 + }, + { + "epoch": 2.849382190029825, + "grad_norm": 5.639069075887996, + "learning_rate": 3.358414294676041e-07, + "loss": 0.2665, + "step": 13375, + "success_rate.epoch.env.abd": 0.9859154929577465, + "success_rate.epoch.env.agentgym:alfworld": 0.8661417322834646, + "success_rate.epoch.env.agentgym:sciworld": 0.9722222222222222, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9767441860465116, + "success_rate.epoch.env.ded": 0.9532967032967034, + "success_rate.epoch.env.logic": 0.9017781541066893, + "success_rate.epoch.env.math": 0.9748010610079576, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.8762487914921044, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.8756706707222509, + "success_rate.epoch.global": 0.9167085427135678, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 0.0, + "success_rate.window.env.logic": 0.6666666666666666, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.611111111111111, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9930524800708592, + "tokens_p.mean_in_band": 0.7182348901098901, + "tokens_rate.above_band": 0.9254098360655738, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.07459016393442623 + }, + { + "epoch": 2.8504473796335748, + "grad_norm": 152.46141799171193, + "learning_rate": 3.3581422213123897e-07, + "loss": 0.3786, + "step": 13380, + "success_rate.epoch.env.abd": 0.9859550561797753, + "success_rate.epoch.env.agentgym:alfworld": 0.8661417322834646, + "success_rate.epoch.env.agentgym:sciworld": 0.9723320158102767, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9767441860465116, + "success_rate.epoch.env.ded": 0.9532967032967034, + "success_rate.epoch.env.logic": 0.9017781541066893, + "success_rate.epoch.env.math": 0.9748344370860927, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.8764478764478765, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.8757053814353408, + "success_rate.epoch.global": 0.9168130489335006, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9959239130434783, + "tokens_p.mean_in_band": 0.8072916666666666, + "tokens_rate.above_band": 0.9787234042553191, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02127659574468085 + }, + { + "epoch": 2.8515125692373244, + "grad_norm": 69.35748201714661, + "learning_rate": 3.357870324212949e-07, + "loss": 0.1599, + "step": 13385, + "success_rate.epoch.env.abd": 0.9859550561797753, + "success_rate.epoch.env.agentgym:alfworld": 0.8661417322834646, + "success_rate.epoch.env.agentgym:sciworld": 0.9723320158102767, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9767441860465116, + "success_rate.epoch.env.ded": 0.9534246575342465, + "success_rate.epoch.env.logic": 0.9018612521150592, + "success_rate.epoch.env.math": 0.9748566387295986, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.8766859344894027, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.8757482279745177, + "success_rate.epoch.global": 0.9169172932330827, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9953775038520801, + "tokens_p.mean_in_band": 0.8331473214285714, + "tokens_rate.above_band": 0.989329268292683, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.010670731707317074 + }, + { + "epoch": 2.8525777588410737, + "grad_norm": 44.45150645831097, + "learning_rate": 3.357598603575877e-07, + "loss": 0.2542, + "step": 13390, + "success_rate.epoch.env.abd": 0.9859550561797753, + "success_rate.epoch.env.agentgym:alfworld": 0.8671875, + "success_rate.epoch.env.agentgym:sciworld": 0.9724409448818898, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9767441860465116, + "success_rate.epoch.env.ded": 0.9508196721311475, + "success_rate.epoch.env.logic": 0.9010989010989011, + "success_rate.epoch.env.math": 0.9748566387295986, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.876883616543764, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.8755650500129952, + "success_rate.epoch.global": 0.9167709637046307, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 0.0, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.6, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9989785992217899, + "tokens_p.mean_in_band": 0.5740831163194444, + "tokens_rate.above_band": 0.9727479182437547, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02725208175624527 + }, + { + "epoch": 2.853642948444823, + "grad_norm": 94.03469740590714, + "learning_rate": 3.3573270595992e-07, + "loss": 0.3534, + "step": 13395, + "success_rate.epoch.env.abd": 0.9859550561797753, + "success_rate.epoch.env.agentgym:alfworld": 0.867704280155642, + "success_rate.epoch.env.agentgym:sciworld": 0.9724409448818898, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9767441860465116, + "success_rate.epoch.env.ded": 0.9508196721311475, + "success_rate.epoch.env.logic": 0.90042194092827, + "success_rate.epoch.env.math": 0.9749009247027741, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.8770019218449712, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.8755652692183946, + "success_rate.epoch.global": 0.91675, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9990327380952381, + "tokens_p.mean_in_band": 0.4375, + "tokens_rate.above_band": 0.975609756097561, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.024390243902439025 + }, + { + "epoch": 2.8547081380485726, + "grad_norm": 116.54053470997088, + "learning_rate": 3.3570556924808187e-07, + "loss": 0.1804, + "step": 13400, + "success_rate.epoch.env.abd": 0.9859550561797753, + "success_rate.epoch.env.agentgym:alfworld": 0.867704280155642, + "success_rate.epoch.env.agentgym:sciworld": 0.9725490196078431, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9767441860465116, + "success_rate.epoch.env.ded": 0.9510869565217391, + "success_rate.epoch.env.logic": 0.9005059021922428, + "success_rate.epoch.env.math": 0.9749119718309859, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.8771985929005437, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.8756259090878765, + "success_rate.epoch.global": 0.9168539325842696, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9958558863328822, + "tokens_p.mean_in_band": 0.84765625, + "tokens_rate.above_band": 0.9986486486486487, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0013513513513513514 + }, + { + "epoch": 2.8557733276523223, + "grad_norm": 183.97816327182832, + "learning_rate": 3.3567845024185023e-07, + "loss": 0.1388, + "step": 13405, + "success_rate.epoch.env.abd": 0.9859943977591037, + "success_rate.epoch.env.agentgym:alfworld": 0.867704280155642, + "success_rate.epoch.env.agentgym:sciworld": 0.9725490196078431, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9767441860465116, + "success_rate.epoch.env.ded": 0.9510869565217391, + "success_rate.epoch.env.logic": 0.9005897219882055, + "success_rate.epoch.env.math": 0.9749560632688928, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.8773554774832322, + "success_rate.epoch.env.webshop": 0.9767441860465116, + "success_rate.epoch.env_macro_mean": 0.8756553761238663, + "success_rate.epoch.global": 0.9169576059850374, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9919270833333333, + "tokens_p.mean_in_band": 0.87109375, + "tokens_rate.above_band": 0.9836065573770492, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.01639344262295082 + }, + { + "epoch": 2.8568385172560715, + "grad_norm": 178.02878990784518, + "learning_rate": 3.3565134896098936e-07, + "loss": 0.2687, + "step": 13410, + "success_rate.epoch.env.abd": 0.9859943977591037, + "success_rate.epoch.env.agentgym:alfworld": 0.867704280155642, + "success_rate.epoch.env.agentgym:sciworld": 0.9725490196078431, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9767441860465116, + "success_rate.epoch.env.ded": 0.9510869565217391, + "success_rate.epoch.env.logic": 0.9006734006734006, + "success_rate.epoch.env.math": 0.975, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.877511961722488, + "success_rate.epoch.env.webshop": 0.9772727272727273, + "success_rate.epoch.env_macro_mean": 0.8757292525676639, + "success_rate.epoch.global": 0.9170610211706102, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9972174657534246, + "tokens_p.mean_in_band": 0.8522135416666666, + "tokens_rate.above_band": 0.9605263157894737, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.039473684210526314 + }, + { + "epoch": 2.857903706859821, + "grad_norm": 43.520273841247246, + "learning_rate": 3.356242654252503e-07, + "loss": 0.2583, + "step": 13415, + "success_rate.epoch.env.abd": 0.9860335195530726, + "success_rate.epoch.env.agentgym:alfworld": 0.867704280155642, + "success_rate.epoch.env.agentgym:sciworld": 0.9725490196078431, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9767441860465116, + "success_rate.epoch.env.ded": 0.9513513513513514, + "success_rate.epoch.env.logic": 0.9006734006734006, + "success_rate.epoch.env.math": 0.9750328515111695, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.8776680471487734, + "success_rate.epoch.env.webshop": 0.9772727272727273, + "success_rate.epoch.env_macro_mean": 0.8757740210732127, + "success_rate.epoch.global": 0.9171641791044776, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.99737548828125, + "tokens_p.mean_in_band": 0.8984375, + "tokens_rate.above_band": 0.9980506822612085, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.001949317738791423 + }, + { + "epoch": 2.8589688964635704, + "grad_norm": 870.4958100045096, + "learning_rate": 3.3559719965437146e-07, + "loss": 0.372, + "step": 13420, + "success_rate.epoch.env.abd": 0.9860335195530726, + "success_rate.epoch.env.agentgym:alfworld": 0.8682170542635659, + "success_rate.epoch.env.agentgym:sciworld": 0.9725490196078431, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9767441860465116, + "success_rate.epoch.env.ded": 0.9513513513513514, + "success_rate.epoch.env.logic": 0.9007569386038689, + "success_rate.epoch.env.math": 0.9746281714785652, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.8778625954198473, + "success_rate.epoch.env.webshop": 0.9772727272727273, + "success_rate.epoch.env_macro_mean": 0.8758091283711091, + "success_rate.epoch.global": 0.9171428571428571, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.6666666666666666, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9970366379310345, + "tokens_p.mean_in_band": 0.6142578125, + "tokens_rate.above_band": 0.9775280898876404, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02247191011235955 + }, + { + "epoch": 2.86003408606732, + "grad_norm": 75.06795998869764, + "learning_rate": 3.355701516680781e-07, + "loss": 0.2515, + "step": 13425, + "success_rate.epoch.env.abd": 0.9860335195530726, + "success_rate.epoch.env.agentgym:alfworld": 0.8682170542635659, + "success_rate.epoch.env.agentgym:sciworld": 0.9725490196078431, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9767441860465116, + "success_rate.epoch.env.ded": 0.9513513513513514, + "success_rate.epoch.env.logic": 0.9007569386038689, + "success_rate.epoch.env.math": 0.9746835443037974, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.8774214036201969, + "success_rate.epoch.env.webshop": 0.9772727272727273, + "success_rate.epoch.env_macro_mean": 0.8757740539188892, + "success_rate.epoch.global": 0.9169975186104219, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6, + "success_rate.window.env_macro_mean": 0.8, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9923913043478261, + "tokens_p.mean_in_band": 0.5133579799107143, + "tokens_rate.above_band": 0.9426229508196722, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.05737704918032787 + }, + { + "epoch": 2.8610992756710694, + "grad_norm": 308.01910168070776, + "learning_rate": 3.3554312148608277e-07, + "loss": 0.2097, + "step": 13430, + "success_rate.epoch.env.abd": 0.9860335195530726, + "success_rate.epoch.env.agentgym:alfworld": 0.8687258687258688, + "success_rate.epoch.env.agentgym:sciworld": 0.9727626459143969, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9767441860465116, + "success_rate.epoch.env.ded": 0.9513513513513514, + "success_rate.epoch.env.logic": 0.9008403361344538, + "success_rate.epoch.env.math": 0.974716652136007, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.8775380710659898, + "success_rate.epoch.env.webshop": 0.9772727272727273, + "success_rate.epoch.env_macro_mean": 0.8758609278804751, + "success_rate.epoch.global": 0.9171003717472119, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9974729498861048, + "tokens_p.mean_in_band": 0.7608506944444444, + "tokens_rate.above_band": 0.9898534385569335, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.010146561443066516 + }, + { + "epoch": 2.862164465274819, + "grad_norm": 92.63997390591346, + "learning_rate": 3.3551610912808467e-07, + "loss": 0.1854, + "step": 13435, + "success_rate.epoch.env.abd": 0.9860335195530726, + "success_rate.epoch.env.agentgym:alfworld": 0.8692307692307693, + "success_rate.epoch.env.agentgym:sciworld": 0.9727626459143969, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9767441860465116, + "success_rate.epoch.env.ded": 0.9513513513513514, + "success_rate.epoch.env.logic": 0.9009235936188077, + "success_rate.epoch.env.math": 0.9747386759581882, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.8774540848638379, + "success_rate.epoch.env.webshop": 0.9772727272727273, + "success_rate.epoch.env_macro_mean": 0.8759087638449554, + "success_rate.epoch.global": 0.9170792079207921, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env_macro_mean": 0.9583333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9961939102564102, + "tokens_p.mean_in_band": 0.37646484375, + "tokens_rate.above_band": 0.9957446808510638, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.00425531914893617 + }, + { + "epoch": 2.8632296548785683, + "grad_norm": 311.7292311278598, + "learning_rate": 3.354891146137703e-07, + "loss": 0.2847, + "step": 13440, + "success_rate.epoch.env.abd": 0.9860724233983287, + "success_rate.epoch.env.agentgym:alfworld": 0.8692307692307693, + "success_rate.epoch.env.agentgym:sciworld": 0.9689922480620154, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9767441860465116, + "success_rate.epoch.env.ded": 0.9513513513513514, + "success_rate.epoch.env.logic": 0.9010067114093959, + "success_rate.epoch.env.math": 0.9748045178105995, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.8774928774928775, + "success_rate.epoch.env.webshop": 0.9772727272727273, + "success_rate.epoch.env_macro_mean": 0.8755866055054021, + "success_rate.epoch.global": 0.9170580964153275, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 0.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9987824675324676, + "tokens_p.mean_in_band": 0.640625, + "tokens_rate.above_band": 0.9914163090128756, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.008583690987124463 + }, + { + "epoch": 2.864294844482318, + "grad_norm": 42.1001288752035, + "learning_rate": 3.35462137962813e-07, + "loss": 0.2739, + "step": 13445, + "success_rate.epoch.env.abd": 0.9860724233983287, + "success_rate.epoch.env.agentgym:alfworld": 0.8692307692307693, + "success_rate.epoch.env.agentgym:sciworld": 0.9691119691119691, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9767441860465116, + "success_rate.epoch.env.ded": 0.9514824797843666, + "success_rate.epoch.env.logic": 0.9011725293132329, + "success_rate.epoch.env.math": 0.9748373101952278, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.8776091081593927, + "success_rate.epoch.env.webshop": 0.9772727272727273, + "success_rate.epoch.env_macro_mean": 0.8756380319088521, + "success_rate.epoch.global": 0.9171604938271605, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9966469957081545, + "tokens_p.mean_in_band": 0.83056640625, + "tokens_rate.above_band": 0.9831223628691983, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.016877637130801686 + }, + { + "epoch": 2.865360034086067, + "grad_norm": 70.20439648481599, + "learning_rate": 3.3543517919487335e-07, + "loss": 0.1487, + "step": 13450, + "success_rate.epoch.env.abd": 0.9860724233983287, + "success_rate.epoch.env.agentgym:alfworld": 0.8664122137404581, + "success_rate.epoch.env.agentgym:sciworld": 0.9692307692307692, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9767441860465116, + "success_rate.epoch.env.ded": 0.9514824797843666, + "success_rate.epoch.env.logic": 0.9013377926421404, + "success_rate.epoch.env.math": 0.9748482220294883, + "success_rate.epoch.env.sat": 0.16666666666666666, + "success_rate.epoch.env.science": 0.8774091627172196, + "success_rate.epoch.env.webshop": 0.9777777777777777, + "success_rate.epoch.env_macro_mean": 0.8754363523501735, + "success_rate.epoch.global": 0.9170160295930949, + "success_rate.window.env.agentgym:alfworld": 0.5, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 0.861111111111111, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9985567269076305, + "tokens_p.mean_in_band": 0.726318359375, + "tokens_rate.above_band": 0.9920318725099602, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.00796812749003984 + }, + { + "epoch": 2.866425223689817, + "grad_norm": 211.89191238439255, + "learning_rate": 3.3540823832959844e-07, + "loss": 0.2181, + "step": 13455, + "success_rate.epoch.env.abd": 0.9861495844875346, + "success_rate.epoch.env.agentgym:alfworld": 0.8664122137404581, + "success_rate.epoch.env.agentgym:sciworld": 0.9692307692307692, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9767441860465116, + "success_rate.epoch.env.ded": 0.9514824797843666, + "success_rate.epoch.env.logic": 0.9015025041736227, + "success_rate.epoch.env.math": 0.9748591244039878, + "success_rate.epoch.env.sat": 0.16326530612244897, + "success_rate.epoch.env.science": 0.8775639002840012, + "success_rate.epoch.env.webshop": 0.9777777777777777, + "success_rate.epoch.env_macro_mean": 0.8751641843517873, + "success_rate.epoch.global": 0.9169950738916256, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9936548223350253, + "tokens_p.mean_in_band": 0.716796875, + "tokens_rate.above_band": 0.9248826291079812, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.07511737089201878 + }, + { + "epoch": 2.867490413293566, + "grad_norm": 54.87609346706096, + "learning_rate": 3.353813153866228e-07, + "loss": 0.1973, + "step": 13460, + "success_rate.epoch.env.abd": 0.9861495844875346, + "success_rate.epoch.env.agentgym:alfworld": 0.8664122137404581, + "success_rate.epoch.env.agentgym:sciworld": 0.9692307692307692, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9767441860465116, + "success_rate.epoch.env.ded": 0.9514824797843666, + "success_rate.epoch.env.logic": 0.9016666666666666, + "success_rate.epoch.env.math": 0.9748917748917749, + "success_rate.epoch.env.sat": 0.16326530612244897, + "success_rate.epoch.env.science": 0.8774417139256458, + "success_rate.epoch.env.webshop": 0.9777777777777777, + "success_rate.epoch.env_macro_mean": 0.875170968590194, + "success_rate.epoch.global": 0.9169741697416974, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.992953431372549, + "tokens_p.mean_in_band": 0.672607421875, + "tokens_rate.above_band": 0.864406779661017, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.13559322033898305 + }, + { + "epoch": 2.868555602897316, + "grad_norm": 673.8028373010998, + "learning_rate": 3.353544103855676e-07, + "loss": 0.1918, + "step": 13465, + "success_rate.epoch.env.abd": 0.9861495844875346, + "success_rate.epoch.env.agentgym:alfworld": 0.8664122137404581, + "success_rate.epoch.env.agentgym:sciworld": 0.9692307692307692, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9767441860465116, + "success_rate.epoch.env.ded": 0.9514824797843666, + "success_rate.epoch.env.logic": 0.9016666666666666, + "success_rate.epoch.env.math": 0.9749460043196544, + "success_rate.epoch.env.sat": 0.16326530612244897, + "success_rate.epoch.env.science": 0.8776344762503933, + "success_rate.epoch.env.webshop": 0.9777777777777777, + "success_rate.epoch.env_macro_mean": 0.8751934223858875, + "success_rate.epoch.global": 0.9170761670761671, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9953703703703703, + "tokens_p.mean_in_band": 0.7896205357142857, + "tokens_rate.above_band": 0.9391304347826087, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.06086956521739131 + }, + { + "epoch": 2.869620792501065, + "grad_norm": 72.82181013389518, + "learning_rate": 3.3532752334604095e-07, + "loss": 0.2742, + "step": 13470, + "success_rate.epoch.env.abd": 0.9861878453038674, + "success_rate.epoch.env.agentgym:alfworld": 0.8669201520912547, + "success_rate.epoch.env.agentgym:sciworld": 0.9692307692307692, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9767441860465116, + "success_rate.epoch.env.ded": 0.9514824797843666, + "success_rate.epoch.env.logic": 0.9012448132780083, + "success_rate.epoch.env.math": 0.9749460043196544, + "success_rate.epoch.env.sat": 0.16326530612244897, + "success_rate.epoch.env.science": 0.8774355751099937, + "success_rate.epoch.env.webshop": 0.9777777777777777, + "success_rate.epoch.env_macro_mean": 0.8751866446257122, + "success_rate.epoch.global": 0.9169325153374233, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 0.8, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.8666666666666667, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.999751655629139, + "tokens_p.mean_in_band": 0.5293817934782609, + "tokens_rate.above_band": 0.9704370179948586, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02956298200514139 + }, + { + "epoch": 2.8706859821048147, + "grad_norm": 64.28836460039312, + "learning_rate": 3.3530065428763797e-07, + "loss": 0.2252, + "step": 13475, + "success_rate.epoch.env.abd": 0.9861878453038674, + "success_rate.epoch.env.agentgym:alfworld": 0.8674242424242424, + "success_rate.epoch.env.agentgym:sciworld": 0.9692307692307692, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9767441860465116, + "success_rate.epoch.env.ded": 0.9517426273458445, + "success_rate.epoch.env.logic": 0.9012448132780083, + "success_rate.epoch.env.math": 0.97498921949116, + "success_rate.epoch.env.sat": 0.16326530612244897, + "success_rate.epoch.env.science": 0.8775125628140703, + "success_rate.epoch.env.webshop": 0.9782608695652174, + "success_rate.epoch.env_macro_mean": 0.8753109657673019, + "success_rate.epoch.global": 0.9170343137254902, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9979608482871125, + "tokens_p.mean_in_band": 0.6640625, + "tokens_rate.above_band": 0.9975589910496339, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0024410089503661514 + }, + { + "epoch": 2.871751171708564, + "grad_norm": 35.587853025261694, + "learning_rate": 3.3527380322994074e-07, + "loss": 0.1936, + "step": 13480, + "success_rate.epoch.env.abd": 0.9862258953168044, + "success_rate.epoch.env.agentgym:alfworld": 0.8674242424242424, + "success_rate.epoch.env.agentgym:sciworld": 0.9692307692307692, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9767441860465116, + "success_rate.epoch.env.ded": 0.9518716577540107, + "success_rate.epoch.env.logic": 0.9012448132780083, + "success_rate.epoch.env.math": 0.9750322858372794, + "success_rate.epoch.env.sat": 0.16326530612244897, + "success_rate.epoch.env.science": 0.8773525721455459, + "success_rate.epoch.env.webshop": 0.9782608695652174, + "success_rate.epoch.env_macro_mean": 0.8753155254126382, + "success_rate.epoch.global": 0.9170134638922889, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9375, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9969857283464567, + "tokens_p.mean_in_band": 0.767578125, + "tokens_rate.above_band": 0.9921875, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0078125 + }, + { + "epoch": 2.8728163613123137, + "grad_norm": 129.34319327766767, + "learning_rate": 3.35246970192518e-07, + "loss": 0.142, + "step": 13485, + "success_rate.epoch.env.abd": 0.9863013698630136, + "success_rate.epoch.env.agentgym:alfworld": 0.8674242424242424, + "success_rate.epoch.env.agentgym:sciworld": 0.9692307692307692, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9767441860465116, + "success_rate.epoch.env.ded": 0.952, + "success_rate.epoch.env.logic": 0.900497512437811, + "success_rate.epoch.env.math": 0.9750322858372794, + "success_rate.epoch.env.sat": 0.16326530612244897, + "success_rate.epoch.env.science": 0.8772314437832759, + "success_rate.epoch.env.webshop": 0.9787234042553191, + "success_rate.epoch.env_macro_mean": 0.8752971547108049, + "success_rate.epoch.global": 0.9168704156479217, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 0.76, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 1.000129132231405, + "tokens_p.mean_below_band": 5.617039278149605e-09, + "tokens_p.mean_in_band": 0.5811011904761905, + "tokens_rate.above_band": 0.9850746268656716, + "tokens_rate.below_band": 0.0006784260515603799, + "tokens_rate.in_band": 0.014246947082767978 + }, + { + "epoch": 2.873881550916063, + "grad_norm": 144.89967409792152, + "learning_rate": 3.3522015519492546e-07, + "loss": 0.248, + "step": 13490, + "success_rate.epoch.env.abd": 0.9863013698630136, + "success_rate.epoch.env.agentgym:alfworld": 0.8679245283018868, + "success_rate.epoch.env.agentgym:sciworld": 0.9692307692307692, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9767441860465116, + "success_rate.epoch.env.ded": 0.9521276595744681, + "success_rate.epoch.env.logic": 0.900497512437811, + "success_rate.epoch.env.math": 0.9750859106529209, + "success_rate.epoch.env.sat": 0.16326530612244897, + "success_rate.epoch.env.science": 0.8770337922403004, + "success_rate.epoch.env.webshop": 0.9787234042553191, + "success_rate.epoch.env_macro_mean": 0.8753411473221483, + "success_rate.epoch.global": 0.9168498168498168, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9983811936936937, + "tokens_p.mean_in_band": 0.43861607142857145, + "tokens_rate.above_band": 0.9921787709497206, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.00782122905027933 + }, + { + "epoch": 2.8749467405198126, + "grad_norm": 122.80513310926591, + "learning_rate": 3.3519335825670575e-07, + "loss": 0.2702, + "step": 13495, + "success_rate.epoch.env.abd": 0.9863387978142076, + "success_rate.epoch.env.agentgym:alfworld": 0.8679245283018868, + "success_rate.epoch.env.agentgym:sciworld": 0.9692307692307692, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9767441860465116, + "success_rate.epoch.env.ded": 0.9521276595744681, + "success_rate.epoch.env.logic": 0.8999172870140613, + "success_rate.epoch.env.math": 0.9751499571550986, + "success_rate.epoch.env.sat": 0.16326530612244897, + "success_rate.epoch.env.science": 0.8770337922403004, + "success_rate.epoch.env.webshop": 0.9787234042553191, + "success_rate.epoch.env_macro_mean": 0.8752976245066594, + "success_rate.epoch.global": 0.916829268292683, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 0.6666666666666666, + "success_rate.window.env.math": 1.0, + "success_rate.window.env_macro_mean": 0.8888888888888888, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9993372756933115, + "tokens_p.mean_in_band": 0.5691636029411765, + "tokens_rate.above_band": 0.973015873015873, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.026984126984126985 + }, + { + "epoch": 2.8760119301235623, + "grad_norm": 69.21511294194717, + "learning_rate": 3.3516657939738824e-07, + "loss": 0.2618, + "step": 13500, + "success_rate.epoch.env.abd": 0.9863387978142076, + "success_rate.epoch.env.agentgym:alfworld": 0.8679245283018868, + "success_rate.epoch.env.agentgym:sciworld": 0.9692307692307692, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9767441860465116, + "success_rate.epoch.env.ded": 0.9521276595744681, + "success_rate.epoch.env.logic": 0.8993399339933993, + "success_rate.epoch.env.math": 0.9751605995717345, + "success_rate.epoch.env.sat": 0.16326530612244897, + "success_rate.epoch.env.science": 0.8766396002498439, + "success_rate.epoch.env.webshop": 0.9787234042553191, + "success_rate.epoch.env_macro_mean": 0.8752102697253428, + "success_rate.epoch.global": 0.9165651644336176, + "success_rate.window.env.logic": 0.6666666666666666, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.7777777777777777, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9969991721854304, + "tokens_p.mean_in_band": 0.5604248046875, + "tokens_rate.above_band": 0.949685534591195, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.050314465408805034 + }, + { + "epoch": 2.8770771197273115, + "grad_norm": 228.1119840975371, + "learning_rate": 3.351398186364893e-07, + "loss": 0.1488, + "step": 13505, + "success_rate.epoch.env.abd": 0.9863387978142076, + "success_rate.epoch.env.agentgym:alfworld": 0.868421052631579, + "success_rate.epoch.env.agentgym:sciworld": 0.9692307692307692, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9767441860465116, + "success_rate.epoch.env.ded": 0.9521276595744681, + "success_rate.epoch.env.logic": 0.8988486842105263, + "success_rate.epoch.env.math": 0.9752030782385634, + "success_rate.epoch.env.sat": 0.16326530612244897, + "success_rate.epoch.env.science": 0.8766781142678739, + "success_rate.epoch.env.webshop": 0.9787234042553191, + "success_rate.epoch.env_macro_mean": 0.8752181122009499, + "success_rate.epoch.global": 0.9165450121654501, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 0.75, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9375, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9983571029082774, + "tokens_p.mean_in_band": 0.5720703125, + "tokens_rate.above_band": 0.9781181619256017, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02188183807439825 + }, + { + "epoch": 2.8781423093310607, + "grad_norm": 40.752070478562615, + "learning_rate": 3.351130759935118e-07, + "loss": 0.2747, + "step": 13510, + "success_rate.epoch.env.abd": 0.9863387978142076, + "success_rate.epoch.env.agentgym:alfworld": 0.868421052631579, + "success_rate.epoch.env.agentgym:sciworld": 0.9692307692307692, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9772727272727273, + "success_rate.epoch.env.ded": 0.9522546419098143, + "success_rate.epoch.env.logic": 0.8988486842105263, + "success_rate.epoch.env.math": 0.9748186086214256, + "success_rate.epoch.env.sat": 0.16326530612244897, + "success_rate.epoch.env.science": 0.8765201122544434, + "success_rate.epoch.env.webshop": 0.9787234042553191, + "success_rate.epoch.env_macro_mean": 0.875228389649222, + "success_rate.epoch.global": 0.9164034021871202, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 0.75, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9974880790190735, + "tokens_p.mean_in_band": 0.53369140625, + "tokens_rate.above_band": 0.9786666666666667, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.021333333333333333 + }, + { + "epoch": 2.8792074989348104, + "grad_norm": 641.8474036380121, + "learning_rate": 3.3508635148794573e-07, + "loss": 0.2009, + "step": 13515, + "success_rate.epoch.env.abd": 0.9863387978142076, + "success_rate.epoch.env.agentgym:alfworld": 0.868421052631579, + "success_rate.epoch.env.agentgym:sciworld": 0.9692307692307692, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9772727272727273, + "success_rate.epoch.env.ded": 0.9522546419098143, + "success_rate.epoch.env.logic": 0.898360655737705, + "success_rate.epoch.env.math": 0.9748400852878465, + "success_rate.epoch.env.sat": 0.16326530612244897, + "success_rate.epoch.env.science": 0.8766739333540953, + "success_rate.epoch.env.webshop": 0.9787234042553191, + "success_rate.epoch.env_macro_mean": 0.8751999595849721, + "success_rate.epoch.global": 0.9163834951456311, + "success_rate.window.env.logic": 0.75, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9930037313432836, + "tokens_p.mean_in_band": 0.6909722222222222, + "tokens_rate.above_band": 0.9370629370629371, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.06293706293706294 + }, + { + "epoch": 2.88027268853856, + "grad_norm": 142.9246755014505, + "learning_rate": 3.350596451392677e-07, + "loss": 0.1623, + "step": 13520, + "success_rate.epoch.env.abd": 0.9864130434782609, + "success_rate.epoch.env.agentgym:alfworld": 0.868421052631579, + "success_rate.epoch.env.agentgym:sciworld": 0.9692307692307692, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9772727272727273, + "success_rate.epoch.env.ded": 0.9522546419098143, + "success_rate.epoch.env.logic": 0.898360655737705, + "success_rate.epoch.env.math": 0.9748829289059174, + "success_rate.epoch.env.sat": 0.16326530612244897, + "success_rate.epoch.env.science": 0.8768273716951789, + "success_rate.epoch.env.webshop": 0.9787234042553191, + "success_rate.epoch.env_macro_mean": 0.8752245530052637, + "success_rate.epoch.global": 0.9164848484848485, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9968487394957983, + "tokens_p.mean_in_band": 0.81640625, + "tokens_rate.above_band": 0.9916666666666667, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.008333333333333333 + }, + { + "epoch": 2.8813378781423093, + "grad_norm": 248.38681228361213, + "learning_rate": 3.3503295696694103e-07, + "loss": 0.3536, + "step": 13525, + "success_rate.epoch.env.abd": 0.986449864498645, + "success_rate.epoch.env.agentgym:alfworld": 0.868421052631579, + "success_rate.epoch.env.agentgym:sciworld": 0.9692307692307692, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9772727272727273, + "success_rate.epoch.env.ded": 0.9522546419098143, + "success_rate.epoch.env.logic": 0.8984438984438985, + "success_rate.epoch.env.math": 0.9748829289059174, + "success_rate.epoch.env.sat": 0.16326530612244897, + "success_rate.epoch.env.science": 0.8762022959975179, + "success_rate.epoch.env.webshop": 0.9787234042553191, + "success_rate.epoch.env_macro_mean": 0.8751786428260744, + "success_rate.epoch.global": 0.9162227602905569, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.science": 0.625, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9890350877192983, + "tokens_p.mean_in_band": 0.6328125, + "tokens_rate.above_band": 0.8837209302325582, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.11627906976744186 + }, + { + "epoch": 2.8824030677460586, + "grad_norm": 782.1273231612497, + "learning_rate": 3.3500628699041583e-07, + "loss": 0.2404, + "step": 13530, + "success_rate.epoch.env.abd": 0.9864864864864865, + "success_rate.epoch.env.agentgym:alfworld": 0.868421052631579, + "success_rate.epoch.env.agentgym:sciworld": 0.9692307692307692, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9772727272727273, + "success_rate.epoch.env.ded": 0.9523809523809523, + "success_rate.epoch.env.logic": 0.8985270049099836, + "success_rate.epoch.env.math": 0.9749149659863946, + "success_rate.epoch.env.sat": 0.16326530612244897, + "success_rate.epoch.env.science": 0.8763557483731019, + "success_rate.epoch.env.webshop": 0.9787234042553191, + "success_rate.epoch.env_macro_mean": 0.875217872678904, + "success_rate.epoch.global": 0.9163240628778718, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9988484087102177, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 2.8834682573498083, + "grad_norm": 66.71063310634881, + "learning_rate": 3.3497963522912913e-07, + "loss": 0.1643, + "step": 13535, + "success_rate.epoch.env.abd": 0.9864864864864865, + "success_rate.epoch.env.agentgym:alfworld": 0.868421052631579, + "success_rate.epoch.env.agentgym:sciworld": 0.9693486590038314, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9772727272727273, + "success_rate.epoch.env.ded": 0.9523809523809523, + "success_rate.epoch.env.logic": 0.8986099754701553, + "success_rate.epoch.env.math": 0.9749469214437367, + "success_rate.epoch.env.sat": 0.16326530612244897, + "success_rate.epoch.env.science": 0.8765470297029703, + "success_rate.epoch.env.webshop": 0.9787234042553191, + "success_rate.epoch.env_macro_mean": 0.8752564269625807, + "success_rate.epoch.global": 0.9164251207729469, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9972209618874773, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 2.884533446953558, + "grad_norm": 122.24913476510302, + "learning_rate": 3.3495300170250446e-07, + "loss": 0.1606, + "step": 13540, + "success_rate.epoch.env.abd": 0.9864864864864865, + "success_rate.epoch.env.agentgym:alfworld": 0.868421052631579, + "success_rate.epoch.env.agentgym:sciworld": 0.9693486590038314, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9772727272727273, + "success_rate.epoch.env.ded": 0.9525065963060686, + "success_rate.epoch.env.logic": 0.8986099754701553, + "success_rate.epoch.env.math": 0.9749787955894826, + "success_rate.epoch.env.sat": 0.16326530612244897, + "success_rate.epoch.env.science": 0.8764669549104386, + "success_rate.epoch.env.webshop": 0.9787234042553191, + "success_rate.epoch.env_macro_mean": 0.8752634672606107, + "success_rate.epoch.global": 0.9164053075995174, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env_macro_mean": 0.9444444444444445, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 1.0001237623762376, + "tokens_p.mean_in_band": 0.6186079545454546, + "tokens_rate.above_band": 0.9892262487757101, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.010773751224289911 + }, + { + "epoch": 2.885598636557307, + "grad_norm": 67.38008404636768, + "learning_rate": 3.349263864299522e-07, + "loss": 0.2854, + "step": 13545, + "success_rate.epoch.env.abd": 0.9865591397849462, + "success_rate.epoch.env.agentgym:alfworld": 0.868421052631579, + "success_rate.epoch.env.agentgym:sciworld": 0.9694656488549618, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9772727272727273, + "success_rate.epoch.env.ded": 0.9525065963060686, + "success_rate.epoch.env.logic": 0.8986928104575164, + "success_rate.epoch.env.math": 0.9749894022891056, + "success_rate.epoch.env.sat": 0.16326530612244897, + "success_rate.epoch.env.science": 0.876040703052729, + "success_rate.epoch.env.webshop": 0.9787234042553191, + "success_rate.epoch.env_macro_mean": 0.8752504520768712, + "success_rate.epoch.global": 0.9162650602409639, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6, + "success_rate.window.env_macro_mean": 0.9199999999999999, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9968039772727273, + "tokens_p.mean_in_band": 0.66015625, + "tokens_rate.above_band": 0.9606986899563319, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.039301310043668124 + }, + { + "epoch": 2.8866638261610564, + "grad_norm": 159.066285551253, + "learning_rate": 3.3489978943086924e-07, + "loss": 0.3446, + "step": 13550, + "success_rate.epoch.env.abd": 0.9865591397849462, + "success_rate.epoch.env.agentgym:alfworld": 0.868421052631579, + "success_rate.epoch.env.agentgym:sciworld": 0.9694656488549618, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9772727272727273, + "success_rate.epoch.env.ded": 0.9525065963060686, + "success_rate.epoch.env.logic": 0.8988580750407831, + "success_rate.epoch.env.math": 0.9750528541226215, + "success_rate.epoch.env.sat": 0.16326530612244897, + "success_rate.epoch.env.science": 0.8761171032357473, + "success_rate.epoch.env.webshop": 0.9787234042553191, + "success_rate.epoch.env_macro_mean": 0.8752781899495804, + "success_rate.epoch.global": 0.9163658243080626, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9955357142857143, + "tokens_p.mean_in_band": 0.8984375, + "tokens_rate.above_band": 0.9859154929577465, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.014084507042253521 + }, + { + "epoch": 2.887729015764806, + "grad_norm": 114.94302786061347, + "learning_rate": 3.348732107246394e-07, + "loss": 0.3266, + "step": 13555, + "success_rate.epoch.env.abd": 0.9865951742627346, + "success_rate.epoch.env.agentgym:alfworld": 0.868421052631579, + "success_rate.epoch.env.agentgym:sciworld": 0.9694656488549618, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9772727272727273, + "success_rate.epoch.env.ded": 0.9525065963060686, + "success_rate.epoch.env.logic": 0.8991049633848658, + "success_rate.epoch.env.math": 0.9750739332488382, + "success_rate.epoch.env.sat": 0.16326530612244897, + "success_rate.epoch.env.science": 0.8759618344105878, + "success_rate.epoch.env.webshop": 0.9787234042553191, + "success_rate.epoch.env_macro_mean": 0.8752917111425739, + "success_rate.epoch.global": 0.9163461538461538, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9375, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9952083333333334, + "tokens_p.mean_in_band": 0.66494140625, + "tokens_rate.above_band": 0.967741935483871, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03225806451612903 + }, + { + "epoch": 2.888794205368556, + "grad_norm": 164.51186914673178, + "learning_rate": 3.348466503306331e-07, + "loss": 0.1185, + "step": 13560, + "success_rate.epoch.env.abd": 0.9866666666666667, + "success_rate.epoch.env.agentgym:alfworld": 0.868421052631579, + "success_rate.epoch.env.agentgym:sciworld": 0.9694656488549618, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9772727272727273, + "success_rate.epoch.env.ded": 0.9525065963060686, + "success_rate.epoch.env.logic": 0.8992688870836718, + "success_rate.epoch.env.math": 0.9746728577458843, + "success_rate.epoch.env.sat": 0.16326530612244897, + "success_rate.epoch.env.science": 0.8761143559790963, + "success_rate.epoch.env.webshop": 0.9787234042553191, + "success_rate.epoch.env_macro_mean": 0.8752905167942369, + "success_rate.epoch.global": 0.9163265306122449, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.5, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9911858974358975, + "tokens_p.mean_in_band": 0.7340198863636364, + "tokens_rate.above_band": 0.9140625, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0859375 + }, + { + "epoch": 2.889859394972305, + "grad_norm": 67.36719506297227, + "learning_rate": 3.3482010826820716e-07, + "loss": 0.3898, + "step": 13565, + "success_rate.epoch.env.abd": 0.9866666666666667, + "success_rate.epoch.env.agentgym:alfworld": 0.868421052631579, + "success_rate.epoch.env.agentgym:sciworld": 0.9694656488549618, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9772727272727273, + "success_rate.epoch.env.ded": 0.9526315789473684, + "success_rate.epoch.env.logic": 0.8992688870836718, + "success_rate.epoch.env.math": 0.9747048903878583, + "success_rate.epoch.env.sat": 0.16326530612244897, + "success_rate.epoch.env.science": 0.8763424363301626, + "success_rate.epoch.env.webshop": 0.9787234042553191, + "success_rate.epoch.env_macro_mean": 0.8753255254882677, + "success_rate.epoch.global": 0.9164268585131895, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9962311557788944, + "tokens_p.mean_in_band": 0.83203125, + "tokens_rate.above_band": 0.9851485148514851, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.01485148514851485 + }, + { + "epoch": 2.8909245845760543, + "grad_norm": 16.71514285348692, + "learning_rate": 3.347935845567054e-07, + "loss": 0.1771, + "step": 13570, + "success_rate.epoch.env.abd": 0.9867021276595744, + "success_rate.epoch.env.agentgym:alfworld": 0.8689138576779026, + "success_rate.epoch.env.agentgym:sciworld": 0.9695817490494296, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9772727272727273, + "success_rate.epoch.env.ded": 0.952755905511811, + "success_rate.epoch.env.logic": 0.8992688870836718, + "success_rate.epoch.env.math": 0.9747368421052631, + "success_rate.epoch.env.sat": 0.16326530612244897, + "success_rate.epoch.env.science": 0.8764561618638872, + "success_rate.epoch.env.webshop": 0.9787234042553191, + "success_rate.epoch.env_macro_mean": 0.8754086500382016, + "success_rate.epoch.global": 0.9165269461077844, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9992088607594937, + "tokens_p.mean_in_band": 0.8828125, + "tokens_rate.above_band": 0.9978947368421053, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.002105263157894737 + }, + { + "epoch": 2.891989774179804, + "grad_norm": 240.080774308402, + "learning_rate": 3.3476707921545815e-07, + "loss": 0.2157, + "step": 13575, + "success_rate.epoch.env.abd": 0.9867021276595744, + "success_rate.epoch.env.agentgym:alfworld": 0.8689138576779026, + "success_rate.epoch.env.agentgym:sciworld": 0.9695817490494296, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9772727272727273, + "success_rate.epoch.env.ded": 0.952755905511811, + "success_rate.epoch.env.logic": 0.898538961038961, + "success_rate.epoch.env.math": 0.9747793190416141, + "success_rate.epoch.env.sat": 0.16326530612244897, + "success_rate.epoch.env.science": 0.8760330578512396, + "success_rate.epoch.env.webshop": 0.9787234042553191, + "success_rate.epoch.env_macro_mean": 0.8753076906635645, + "success_rate.epoch.global": 0.916267942583732, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6, + "success_rate.window.env_macro_mean": 0.5333333333333333, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9937106918238994, + "tokens_p.mean_in_band": 0.596484375, + "tokens_rate.above_band": 0.888268156424581, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.11173184357541899 + }, + { + "epoch": 2.8930549637835536, + "grad_norm": 133.50116436488105, + "learning_rate": 3.347405922637822e-07, + "loss": 0.2272, + "step": 13580, + "success_rate.epoch.env.abd": 0.9867724867724867, + "success_rate.epoch.env.agentgym:alfworld": 0.8689138576779026, + "success_rate.epoch.env.agentgym:sciworld": 0.9695817490494296, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9772727272727273, + "success_rate.epoch.env.ded": 0.9528795811518325, + "success_rate.epoch.env.logic": 0.898538961038961, + "success_rate.epoch.env.math": 0.9747899159663865, + "success_rate.epoch.env.sat": 0.16326530612244897, + "success_rate.epoch.env.science": 0.8762224938875306, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8753838115840504, + "success_rate.epoch.global": 0.9163679808841099, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.998761655011655, + "tokens_p.mean_in_band": 0.7135416666666666, + "tokens_rate.above_band": 0.9930555555555556, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.006944444444444444 + }, + { + "epoch": 2.894120153387303, + "grad_norm": 2.4929023943807134, + "learning_rate": 3.3471412372098104e-07, + "loss": 0.3044, + "step": 13585, + "success_rate.epoch.env.abd": 0.9868073878627969, + "success_rate.epoch.env.agentgym:alfworld": 0.8689138576779026, + "success_rate.epoch.env.agentgym:sciworld": 0.9695817490494296, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9772727272727273, + "success_rate.epoch.env.ded": 0.9528795811518325, + "success_rate.epoch.env.logic": 0.8987034035656402, + "success_rate.epoch.env.math": 0.9748322147651006, + "success_rate.epoch.env.sat": 0.16326530612244897, + "success_rate.epoch.env.science": 0.8763358778625954, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8754160867104838, + "success_rate.epoch.global": 0.9164677804295943, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9952586206896552, + "tokens_p.mean_in_band": 0.6494140625, + "tokens_rate.above_band": 0.9863945578231292, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.013605442176870748 + }, + { + "epoch": 2.8951853429910526, + "grad_norm": 165.6439172562267, + "learning_rate": 3.3468767360634485e-07, + "loss": 0.1351, + "step": 13590, + "success_rate.epoch.env.abd": 0.9868073878627969, + "success_rate.epoch.env.agentgym:alfworld": 0.8694029850746269, + "success_rate.epoch.env.agentgym:sciworld": 0.9695817490494296, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9772727272727273, + "success_rate.epoch.env.ded": 0.9528795811518325, + "success_rate.epoch.env.logic": 0.8987854251012146, + "success_rate.epoch.env.math": 0.9748533109807209, + "success_rate.epoch.env.sat": 0.16326530612244897, + "success_rate.epoch.env.science": 0.8765620237732399, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8754904858976258, + "success_rate.epoch.global": 0.9165673420738975, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.996905193236715, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 2.896250532594802, + "grad_norm": 58.68080529966273, + "learning_rate": 3.346612419391502e-07, + "loss": 0.1506, + "step": 13595, + "success_rate.epoch.env.abd": 0.9868073878627969, + "success_rate.epoch.env.agentgym:alfworld": 0.8698884758364313, + "success_rate.epoch.env.agentgym:sciworld": 0.9695817490494296, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9772727272727273, + "success_rate.epoch.env.ded": 0.9530026109660574, + "success_rate.epoch.env.logic": 0.8988673139158576, + "success_rate.epoch.env.math": 0.9748848890749268, + "success_rate.epoch.env.sat": 0.16326530612244897, + "success_rate.epoch.env.science": 0.8767123287671232, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.875569785213877, + "success_rate.epoch.global": 0.9166666666666666, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9980978260869565, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 2.8973157221985515, + "grad_norm": 66.77644335245357, + "learning_rate": 3.3463482873866034e-07, + "loss": 0.1027, + "step": 13600, + "success_rate.epoch.env.abd": 0.9868073878627969, + "success_rate.epoch.env.agentgym:alfworld": 0.8703703703703703, + "success_rate.epoch.env.agentgym:sciworld": 0.9695817490494296, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9772727272727273, + "success_rate.epoch.env.ded": 0.953125, + "success_rate.epoch.env.logic": 0.8988673139158576, + "success_rate.epoch.env.math": 0.9749058971141782, + "success_rate.epoch.env.sat": 0.16326530612244897, + "success_rate.epoch.env.science": 0.8769371011850501, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8756470637616096, + "success_rate.epoch.global": 0.9167657550535078, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9977014010507881, + "tokens_p.mean_in_band": 0.751953125, + "tokens_rate.above_band": 0.9896013864818024, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.010398613518197574 + }, + { + "epoch": 2.8983809118023007, + "grad_norm": 0.0, + "learning_rate": 3.346084340241251e-07, + "loss": 0.1737, + "step": 13605, + "success_rate.epoch.env.abd": 0.9868073878627969, + "success_rate.epoch.env.agentgym:alfworld": 0.8708487084870848, + "success_rate.epoch.env.agentgym:sciworld": 0.9695817490494296, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9772727272727273, + "success_rate.epoch.env.ded": 0.953125, + "success_rate.epoch.env.logic": 0.8988673139158576, + "success_rate.epoch.env.math": 0.9749582637729549, + "success_rate.epoch.env.sat": 0.16326530612244897, + "success_rate.epoch.env.science": 0.876783004552352, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8756813008654999, + "success_rate.epoch.global": 0.9167458432304038, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9981617647058824, + "tokens_p.mean_in_band": 0.46707589285714285, + "tokens_rate.above_band": 0.9807692307692307, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.019230769230769232 + }, + { + "epoch": 2.8994461014060504, + "grad_norm": 376.0435289587504, + "learning_rate": 3.3458205781478055e-07, + "loss": 0.2483, + "step": 13610, + "success_rate.epoch.env.abd": 0.9868421052631579, + "success_rate.epoch.env.agentgym:alfworld": 0.8708487084870848, + "success_rate.epoch.env.agentgym:sciworld": 0.9695817490494296, + "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, + "success_rate.epoch.env.babyai": 0.9777777777777777, + "success_rate.epoch.env.ded": 0.9506493506493506, + "success_rate.epoch.env.logic": 0.898949070331447, + "success_rate.epoch.env.math": 0.9749791492910759, + "success_rate.epoch.env.sat": 0.16, + "success_rate.epoch.env.science": 0.87689508793208, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8752279861151138, + "success_rate.epoch.global": 0.9166073546856465, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.ded": 0.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.7142857142857143, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9905625, + "tokens_p.mean_in_band": 0.5939903846153847, + "tokens_rate.above_band": 0.8368200836820083, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.16317991631799164 + }, + { + "epoch": 2.9005112910097997, + "grad_norm": 72.80282515141374, + "learning_rate": 3.345557001298497e-07, + "loss": 0.2528, + "step": 13615, + "success_rate.epoch.env.abd": 0.9868421052631579, + "success_rate.epoch.env.agentgym:alfworld": 0.8676470588235294, + "success_rate.epoch.env.agentgym:sciworld": 0.9696969696969697, + "success_rate.epoch.env.agentgym:textcraft": 0.9821428571428571, + "success_rate.epoch.env.babyai": 0.9777777777777777, + "success_rate.epoch.env.ded": 0.9506493506493506, + "success_rate.epoch.env.logic": 0.898949070331447, + "success_rate.epoch.env.math": 0.975, + "success_rate.epoch.env.sat": 0.16, + "success_rate.epoch.env.science": 0.8764759309718437, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.874940707938509, + "success_rate.epoch.global": 0.9163507109004739, + "success_rate.window.env.agentgym:alfworld": 0.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6, + "success_rate.window.env_macro_mean": 0.72, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9976624015748031, + "tokens_p.mean_in_band": 0.5286458333333334, + "tokens_rate.above_band": 0.9548872180451128, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.045112781954887216 + }, + { + "epoch": 2.9015764806135493, + "grad_norm": 719.3361544846681, + "learning_rate": 3.3452936098854174e-07, + "loss": 0.2242, + "step": 13620, + "success_rate.epoch.env.abd": 0.9868421052631579, + "success_rate.epoch.env.agentgym:alfworld": 0.8676470588235294, + "success_rate.epoch.env.agentgym:sciworld": 0.9699248120300752, + "success_rate.epoch.env.agentgym:textcraft": 0.9821428571428571, + "success_rate.epoch.env.babyai": 0.9777777777777777, + "success_rate.epoch.env.ded": 0.9506493506493506, + "success_rate.epoch.env.logic": 0.8991121872477804, + "success_rate.epoch.env.math": 0.9750208159866778, + "success_rate.epoch.env.sat": 0.16, + "success_rate.epoch.env.science": 0.8766253401874811, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8749917247068503, + "success_rate.epoch.global": 0.9164497041420119, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9976635514018691, + "tokens_p.mean_in_band": 0.890625, + "tokens_rate.above_band": 0.9953488372093023, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.004651162790697674 + }, + { + "epoch": 2.9026416702172986, + "grad_norm": 481.86247014032307, + "learning_rate": 3.345030404100524e-07, + "loss": 0.183, + "step": 13625, + "success_rate.epoch.env.abd": 0.9868766404199475, + "success_rate.epoch.env.agentgym:alfworld": 0.8676470588235294, + "success_rate.epoch.env.agentgym:sciworld": 0.9700374531835206, + "success_rate.epoch.env.agentgym:textcraft": 0.9821428571428571, + "success_rate.epoch.env.babyai": 0.9777777777777777, + "success_rate.epoch.env.ded": 0.9506493506493506, + "success_rate.epoch.env.logic": 0.8993558776167472, + "success_rate.epoch.env.math": 0.9750415973377704, + "success_rate.epoch.env.sat": 0.1568627450980392, + "success_rate.epoch.env.science": 0.8766999093381687, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8747507212776704, + "success_rate.epoch.global": 0.9164302600472813, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8333333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9966216216216216, + "tokens_p.mean_in_band": 0.6927083333333334, + "tokens_rate.above_band": 0.9390862944162437, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.06091370558375635 + }, + { + "epoch": 2.9037068598210483, + "grad_norm": 80.50282392334165, + "learning_rate": 3.34476738413564e-07, + "loss": 0.1404, + "step": 13630, + "success_rate.epoch.env.abd": 0.9868766404199475, + "success_rate.epoch.env.agentgym:alfworld": 0.8676470588235294, + "success_rate.epoch.env.agentgym:sciworld": 0.9700374531835206, + "success_rate.epoch.env.agentgym:textcraft": 0.9821428571428571, + "success_rate.epoch.env.babyai": 0.9777777777777777, + "success_rate.epoch.env.ded": 0.9483204134366925, + "success_rate.epoch.env.logic": 0.8994368463395012, + "success_rate.epoch.env.math": 0.9750727046115496, + "success_rate.epoch.env.sat": 0.1568627450980392, + "success_rate.epoch.env.science": 0.8768487775430124, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8745627219130084, + "success_rate.epoch.global": 0.9164108618654073, + "success_rate.window.env.ded": 0.5, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9957186873029521, + "tokens_p.mean_below_band": 1.257285475730896e-07, + "tokens_p.mean_in_band": 0.5504064498933902, + "tokens_rate.above_band": 0.8810606060606061, + "tokens_rate.below_band": 0.000505050505050505, + "tokens_rate.in_band": 0.11843434343434343 + }, + { + "epoch": 2.9047720494247975, + "grad_norm": 129.60142394429622, + "learning_rate": 3.344504550182453e-07, + "loss": 0.2027, + "step": 13635, + "success_rate.epoch.env.abd": 0.9869451697127938, + "success_rate.epoch.env.agentgym:alfworld": 0.8676470588235294, + "success_rate.epoch.env.agentgym:sciworld": 0.9701492537313433, + "success_rate.epoch.env.agentgym:textcraft": 0.9821428571428571, + "success_rate.epoch.env.babyai": 0.9777777777777777, + "success_rate.epoch.env.ded": 0.9483204134366925, + "success_rate.epoch.env.logic": 0.8996789727126806, + "success_rate.epoch.env.math": 0.975093399750934, + "success_rate.epoch.env.sat": 0.1568627450980392, + "success_rate.epoch.env.science": 0.8766214177978884, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8745823393319274, + "success_rate.epoch.global": 0.9163915094339623, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.9, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9971590909090909, + "tokens_p.mean_in_band": 0.7135416666666666, + "tokens_rate.above_band": 0.9794520547945206, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02054794520547945 + }, + { + "epoch": 2.905837239028547, + "grad_norm": 0.0, + "learning_rate": 3.344241902432513e-07, + "loss": 0.2155, + "step": 13640, + "success_rate.epoch.env.abd": 0.9869451697127938, + "success_rate.epoch.env.agentgym:alfworld": 0.8676470588235294, + "success_rate.epoch.env.agentgym:sciworld": 0.9701492537313433, + "success_rate.epoch.env.agentgym:textcraft": 0.9821428571428571, + "success_rate.epoch.env.babyai": 0.9777777777777777, + "success_rate.epoch.env.ded": 0.9485861182519281, + "success_rate.epoch.env.logic": 0.8996789727126806, + "success_rate.epoch.env.math": 0.9751449875724938, + "success_rate.epoch.env.sat": 0.1568627450980392, + "success_rate.epoch.env.science": 0.8767329716696806, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8746213253781626, + "success_rate.epoch.global": 0.916489988221437, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9979437108325873, + "tokens_p.mean_in_band": 0.83203125, + "tokens_rate.above_band": 0.9991055456171736, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0008944543828264759 + }, + { + "epoch": 2.9069024286322964, + "grad_norm": 131.81684788258252, + "learning_rate": 3.343979441077237e-07, + "loss": 0.1793, + "step": 13645, + "success_rate.epoch.env.abd": 0.9869791666666666, + "success_rate.epoch.env.agentgym:alfworld": 0.8676470588235294, + "success_rate.epoch.env.agentgym:sciworld": 0.9701492537313433, + "success_rate.epoch.env.agentgym:textcraft": 0.9821428571428571, + "success_rate.epoch.env.babyai": 0.9777777777777777, + "success_rate.epoch.env.ded": 0.9485861182519281, + "success_rate.epoch.env.logic": 0.8997594226142742, + "success_rate.epoch.env.math": 0.9751963621331129, + "success_rate.epoch.env.sat": 0.1568627450980392, + "success_rate.epoch.env.science": 0.8765432098765432, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8746191489802488, + "success_rate.epoch.global": 0.9164705882352941, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9913877952755905, + "tokens_p.mean_in_band": 0.640625, + "tokens_rate.above_band": 0.9548872180451128, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.045112781954887216 + }, + { + "epoch": 2.907967618236046, + "grad_norm": 26.348733399631833, + "learning_rate": 3.343717166307904e-07, + "loss": 0.211, + "step": 13650, + "success_rate.epoch.env.abd": 0.9870801033591732, + "success_rate.epoch.env.agentgym:alfworld": 0.8676470588235294, + "success_rate.epoch.env.agentgym:sciworld": 0.9701492537313433, + "success_rate.epoch.env.agentgym:textcraft": 0.9821428571428571, + "success_rate.epoch.env.babyai": 0.9777777777777777, + "success_rate.epoch.env.ded": 0.9485861182519281, + "success_rate.epoch.env.logic": 0.9, + "success_rate.epoch.env.math": 0.9752168525402726, + "success_rate.epoch.env.sat": 0.1568627450980392, + "success_rate.epoch.env.science": 0.8766175142943123, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8746588134259908, + "success_rate.epoch.global": 0.9165687426556992, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9960056390977443, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 2.9090328078397953, + "grad_norm": 38.51936733672637, + "learning_rate": 3.343455078315659e-07, + "loss": 0.1117, + "step": 13655, + "success_rate.epoch.env.abd": 0.9870801033591732, + "success_rate.epoch.env.agentgym:alfworld": 0.8681318681318682, + "success_rate.epoch.env.agentgym:sciworld": 0.9702602230483272, + "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, + "success_rate.epoch.env.babyai": 0.9777777777777777, + "success_rate.epoch.env.ded": 0.9487179487179487, + "success_rate.epoch.env.logic": 0.9, + "success_rate.epoch.env.math": 0.9752270850536746, + "success_rate.epoch.env.sat": 0.1568627450980392, + "success_rate.epoch.env.science": 0.8768028846153846, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8747712220745215, + "success_rate.epoch.global": 0.9166666666666666, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9975183823529412, + "tokens_p.mean_in_band": 0.84375, + "tokens_rate.above_band": 0.9985315712187959, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0014684287812041115 + }, + { + "epoch": 2.910097997443545, + "grad_norm": 103.53290918560029, + "learning_rate": 3.343193177291509e-07, + "loss": 0.2268, + "step": 13660, + "success_rate.epoch.env.abd": 0.9870801033591732, + "success_rate.epoch.env.agentgym:alfworld": 0.8686131386861314, + "success_rate.epoch.env.agentgym:sciworld": 0.9702602230483272, + "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, + "success_rate.epoch.env.babyai": 0.9777777777777777, + "success_rate.epoch.env.ded": 0.948849104859335, + "success_rate.epoch.env.logic": 0.9000799360511591, + "success_rate.epoch.env.math": 0.9752475247524752, + "success_rate.epoch.env.sat": 0.1568627450980392, + "success_rate.epoch.env.science": 0.8766876687668766, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8748255481288034, + "success_rate.epoch.global": 0.9166471277842907, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.96, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9986959761549925, + "tokens_p.mean_in_band": 0.5340711805555556, + "tokens_rate.above_band": 0.9738751814223512, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.026124818577648767 + }, + { + "epoch": 2.9111631870472943, + "grad_norm": 85.17111379979617, + "learning_rate": 3.342931463426326e-07, + "loss": 0.1473, + "step": 13665, + "success_rate.epoch.env.abd": 0.9870801033591732, + "success_rate.epoch.env.agentgym:alfworld": 0.8686131386861314, + "success_rate.epoch.env.agentgym:sciworld": 0.9702602230483272, + "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, + "success_rate.epoch.env.babyai": 0.9782608695652174, + "success_rate.epoch.env.ded": 0.948849104859335, + "success_rate.epoch.env.logic": 0.9001597444089456, + "success_rate.epoch.env.math": 0.9752883031301482, + "success_rate.epoch.env.sat": 0.1568627450980392, + "success_rate.epoch.env.science": 0.8765358106083309, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8748666227073811, + "success_rate.epoch.global": 0.9166276346604215, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9375, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9935546875, + "tokens_p.mean_in_band": 0.6376953125, + "tokens_rate.above_band": 0.975609756097561, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.024390243902439025 + }, + { + "epoch": 2.912228376651044, + "grad_norm": 28.24366223984647, + "learning_rate": 3.3426699369108453e-07, + "loss": 0.2943, + "step": 13670, + "success_rate.epoch.env.abd": 0.9870801033591732, + "success_rate.epoch.env.agentgym:alfworld": 0.8686131386861314, + "success_rate.epoch.env.agentgym:sciworld": 0.9702602230483272, + "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, + "success_rate.epoch.env.babyai": 0.9782608695652174, + "success_rate.epoch.env.ded": 0.9489795918367347, + "success_rate.epoch.env.logic": 0.9002394253790902, + "success_rate.epoch.env.math": 0.9753187988482106, + "success_rate.epoch.env.sat": 0.1568627450980392, + "success_rate.epoch.env.science": 0.8764213046080191, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8748780915860442, + "success_rate.epoch.global": 0.9166081871345029, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9970982142857143, + "tokens_p.mean_in_band": 0.67578125, + "tokens_rate.above_band": 0.963302752293578, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03669724770642202 + }, + { + "epoch": 2.913293566254793, + "grad_norm": 48.55987565337608, + "learning_rate": 3.3424085979356653e-07, + "loss": 0.2038, + "step": 13675, + "success_rate.epoch.env.abd": 0.9870801033591732, + "success_rate.epoch.env.agentgym:alfworld": 0.8686131386861314, + "success_rate.epoch.env.agentgym:sciworld": 0.9702602230483272, + "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, + "success_rate.epoch.env.babyai": 0.9782608695652174, + "success_rate.epoch.env.ded": 0.9489795918367347, + "success_rate.epoch.env.logic": 0.9003189792663477, + "success_rate.epoch.env.math": 0.9753593429158111, + "success_rate.epoch.env.sat": 0.1568627450980392, + "success_rate.epoch.env.science": 0.8763071407230355, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8748786310469417, + "success_rate.epoch.global": 0.916588785046729, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9857954545454546, + "tokens_p.mean_in_band": 0.69921875, + "tokens_rate.above_band": 0.952755905511811, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.047244094488188976 + }, + { + "epoch": 2.914358755858543, + "grad_norm": 91.68625530196564, + "learning_rate": 3.342147446691248e-07, + "loss": 0.2306, + "step": 13680, + "success_rate.epoch.env.abd": 0.9870801033591732, + "success_rate.epoch.env.agentgym:alfworld": 0.8686131386861314, + "success_rate.epoch.env.agentgym:sciworld": 0.9703703703703703, + "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, + "success_rate.epoch.env.babyai": 0.9782608695652174, + "success_rate.epoch.env.ded": 0.9489795918367347, + "success_rate.epoch.env.logic": 0.9004777070063694, + "success_rate.epoch.env.math": 0.9753896636587367, + "success_rate.epoch.env.sat": 0.1568627450980392, + "success_rate.epoch.env.science": 0.8758579528498955, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.874864995404383, + "success_rate.epoch.global": 0.9164527421236873, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9952362804878049, + "tokens_p.mean_in_band": 0.726318359375, + "tokens_rate.above_band": 0.9111111111111111, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.08888888888888889 + }, + { + "epoch": 2.915423945462292, + "grad_norm": 311.17938717189907, + "learning_rate": 3.3418864833679186e-07, + "loss": 0.3119, + "step": 13685, + "success_rate.epoch.env.abd": 0.9870801033591732, + "success_rate.epoch.env.agentgym:alfworld": 0.8690909090909091, + "success_rate.epoch.env.agentgym:sciworld": 0.9703703703703703, + "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, + "success_rate.epoch.env.babyai": 0.9782608695652174, + "success_rate.epoch.env.ded": 0.9489795918367347, + "success_rate.epoch.env.logic": 0.9007148530579825, + "success_rate.epoch.env.math": 0.9754299754299754, + "success_rate.epoch.env.sat": 0.1568627450980392, + "success_rate.epoch.env.science": 0.8759320011929616, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8749403841835369, + "success_rate.epoch.global": 0.9165501165501165, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9970386533665836, + "tokens_p.mean_in_band": 0.8984375, + "tokens_rate.above_band": 0.9975124378109452, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0024875621890547263 + }, + { + "epoch": 2.916489135066042, + "grad_norm": 38.684225393630584, + "learning_rate": 3.341625708155865e-07, + "loss": 0.1845, + "step": 13690, + "success_rate.epoch.env.abd": 0.9871134020618557, + "success_rate.epoch.env.agentgym:alfworld": 0.8690909090909091, + "success_rate.epoch.env.agentgym:sciworld": 0.9703703703703703, + "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, + "success_rate.epoch.env.babyai": 0.9782608695652174, + "success_rate.epoch.env.ded": 0.9489795918367347, + "success_rate.epoch.env.logic": 0.9007936507936508, + "success_rate.epoch.env.math": 0.9754601226993865, + "success_rate.epoch.env.sat": 0.1568627450980392, + "success_rate.epoch.env.science": 0.8761167361524718, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8749701095169253, + "success_rate.epoch.global": 0.9166472642607684, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9949596774193549, + "tokens_p.mean_in_band": 0.83984375, + "tokens_rate.above_band": 0.9789473684210527, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.021052631578947368 + }, + { + "epoch": 2.9175543246697915, + "grad_norm": 60.52852500756108, + "learning_rate": 3.341365121245139e-07, + "loss": 0.2775, + "step": 13695, + "success_rate.epoch.env.abd": 0.9871134020618557, + "success_rate.epoch.env.agentgym:alfworld": 0.8690909090909091, + "success_rate.epoch.env.agentgym:sciworld": 0.9705882352941176, + "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, + "success_rate.epoch.env.babyai": 0.9782608695652174, + "success_rate.epoch.env.ded": 0.9489795918367347, + "success_rate.epoch.env.logic": 0.9007936507936508, + "success_rate.epoch.env.math": 0.9755102040816327, + "success_rate.epoch.env.sat": 0.1568627450980392, + "success_rate.epoch.env.science": 0.8762273132996132, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8750045207399375, + "success_rate.epoch.global": 0.9167441860465116, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9969512195121951, + "tokens_p.mean_in_band": 0.84765625, + "tokens_rate.above_band": 0.9951456310679612, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0048543689320388345 + }, + { + "epoch": 2.9186195142735407, + "grad_norm": 105.9988224563135, + "learning_rate": 3.341104722825654e-07, + "loss": 0.4084, + "step": 13700, + "success_rate.epoch.env.abd": 0.9871134020618557, + "success_rate.epoch.env.agentgym:alfworld": 0.8690909090909091, + "success_rate.epoch.env.agentgym:sciworld": 0.9705882352941176, + "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, + "success_rate.epoch.env.babyai": 0.9787234042553191, + "success_rate.epoch.env.ded": 0.9465648854961832, + "success_rate.epoch.env.logic": 0.9002375296912114, + "success_rate.epoch.env.math": 0.9755102040816327, + "success_rate.epoch.env.sat": 0.1568627450980392, + "success_rate.epoch.env.science": 0.8761140819964349, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8747662003712043, + "success_rate.epoch.global": 0.9164924506387921, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.ded": 0.0, + "success_rate.window.env.logic": 0.6666666666666666, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.6166666666666667, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9938056792873051, + "tokens_p.mean_in_band": 0.7248507724719101, + "tokens_rate.above_band": 0.9098277608915907, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.09017223910840932 + }, + { + "epoch": 2.91968470387729, + "grad_norm": 191.71952869911075, + "learning_rate": 3.340844513087186e-07, + "loss": 0.2535, + "step": 13705, + "success_rate.epoch.env.abd": 0.9871134020618557, + "success_rate.epoch.env.agentgym:alfworld": 0.8690909090909091, + "success_rate.epoch.env.agentgym:sciworld": 0.9705882352941176, + "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, + "success_rate.epoch.env.babyai": 0.9787234042553191, + "success_rate.epoch.env.ded": 0.9467005076142132, + "success_rate.epoch.env.logic": 0.9004739336492891, + "success_rate.epoch.env.math": 0.9755301794453507, + "success_rate.epoch.env.sat": 0.1568627450980392, + "success_rate.epoch.env.science": 0.8762611275964391, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8748152046475525, + "success_rate.epoch.global": 0.9165893271461717, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9976946721311475, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 2.9207498934810396, + "grad_norm": 49.45679574806478, + "learning_rate": 3.3405844922193746e-07, + "loss": 0.2592, + "step": 13710, + "success_rate.epoch.env.abd": 0.9871794871794872, + "success_rate.epoch.env.agentgym:alfworld": 0.8690909090909091, + "success_rate.epoch.env.agentgym:sciworld": 0.9705882352941176, + "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, + "success_rate.epoch.env.babyai": 0.9787234042553191, + "success_rate.epoch.env.ded": 0.9467005076142132, + "success_rate.epoch.env.logic": 0.9006309148264984, + "success_rate.epoch.env.math": 0.9755501222493888, + "success_rate.epoch.env.sat": 0.1568627450980392, + "success_rate.epoch.env.science": 0.8761114404267931, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8748236884593008, + "success_rate.epoch.global": 0.9165701042873696, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9375, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9968171296296297, + "tokens_p.mean_in_band": 0.6241319444444444, + "tokens_rate.above_band": 0.96, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.04 + }, + { + "epoch": 2.9218150830847893, + "grad_norm": 37.68974011765153, + "learning_rate": 3.3403246604117213e-07, + "loss": 0.098, + "step": 13715, + "success_rate.epoch.env.abd": 0.9872122762148338, + "success_rate.epoch.env.agentgym:alfworld": 0.8690909090909091, + "success_rate.epoch.env.agentgym:sciworld": 0.9708029197080292, + "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, + "success_rate.epoch.env.babyai": 0.9787234042553191, + "success_rate.epoch.env.ded": 0.9467005076142132, + "success_rate.epoch.env.logic": 0.9007874015748032, + "success_rate.epoch.env.math": 0.9755700325732899, + "success_rate.epoch.env.sat": 0.1568627450980392, + "success_rate.epoch.env.science": 0.8762214983713354, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.874872227410756, + "success_rate.epoch.global": 0.9166666666666666, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.99933638996139, + "tokens_p.mean_in_band": 0.716796875, + "tokens_rate.above_band": 0.9923371647509579, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.007662835249042145 + }, + { + "epoch": 2.9228802726885386, + "grad_norm": 1063.2302774167858, + "learning_rate": 3.3400650178535897e-07, + "loss": 0.2805, + "step": 13720, + "success_rate.epoch.env.abd": 0.9872122762148338, + "success_rate.epoch.env.agentgym:alfworld": 0.8690909090909091, + "success_rate.epoch.env.agentgym:sciworld": 0.9708029197080292, + "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, + "success_rate.epoch.env.babyai": 0.9787234042553191, + "success_rate.epoch.env.ded": 0.9467005076142132, + "success_rate.epoch.env.logic": 0.9008654602675059, + "success_rate.epoch.env.math": 0.975609756097561, + "success_rate.epoch.env.sat": 0.1568627450980392, + "success_rate.epoch.env.science": 0.8761088113542282, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8748726906107437, + "success_rate.epoch.global": 0.9166473988439306, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9925, + "tokens_p.mean_in_band": 0.6784855769230769, + "tokens_rate.above_band": 0.8849557522123894, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.11504424778761062 + }, + { + "epoch": 2.923945462292288, + "grad_norm": 51.36229935927613, + "learning_rate": 3.339805564734205e-07, + "loss": 0.2539, + "step": 13725, + "success_rate.epoch.env.abd": 0.9872773536895675, + "success_rate.epoch.env.agentgym:alfworld": 0.8695652173913043, + "success_rate.epoch.env.agentgym:sciworld": 0.9708029197080292, + "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, + "success_rate.epoch.env.babyai": 0.9787234042553191, + "success_rate.epoch.env.ded": 0.9468354430379747, + "success_rate.epoch.env.logic": 0.9008654602675059, + "success_rate.epoch.env.math": 0.975609756097561, + "success_rate.epoch.env.sat": 0.1568627450980392, + "success_rate.epoch.env.science": 0.8763282172373081, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8749539385272865, + "success_rate.epoch.global": 0.916743648960739, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9970525568181818, + "tokens_p.mean_in_band": 0.6328125, + "tokens_rate.above_band": 0.9977324263038548, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0022675736961451248 + }, + { + "epoch": 2.9250106518960375, + "grad_norm": 99.77297555224303, + "learning_rate": 3.3395463012426546e-07, + "loss": 0.2581, + "step": 13730, + "success_rate.epoch.env.abd": 0.9872773536895675, + "success_rate.epoch.env.agentgym:alfworld": 0.8700361010830325, + "success_rate.epoch.env.agentgym:sciworld": 0.9708029197080292, + "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, + "success_rate.epoch.env.babyai": 0.9787234042553191, + "success_rate.epoch.env.ded": 0.9468354430379747, + "success_rate.epoch.env.logic": 0.9009433962264151, + "success_rate.epoch.env.math": 0.9756295694557271, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.8759210138520483, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8746943783792556, + "success_rate.epoch.global": 0.9164936562860438, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 0.6, + "success_rate.window.env_macro_mean": 0.72, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.996195652173913, + "tokens_p.mean_in_band": 0.5609756097560976, + "tokens_rate.above_band": 0.8937823834196891, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.10621761658031088 + }, + { + "epoch": 2.926075841499787, + "grad_norm": 167.65796785631733, + "learning_rate": 3.3392872275678884e-07, + "loss": 0.2252, + "step": 13735, + "success_rate.epoch.env.abd": 0.9872773536895675, + "success_rate.epoch.env.agentgym:alfworld": 0.8700361010830325, + "success_rate.epoch.env.agentgym:sciworld": 0.9709090909090909, + "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, + "success_rate.epoch.env.babyai": 0.9787234042553191, + "success_rate.epoch.env.ded": 0.9468354430379747, + "success_rate.epoch.env.logic": 0.9010989010989011, + "success_rate.epoch.env.math": 0.9756394640682094, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.8758458370108856, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8747122323651525, + "success_rate.epoch.global": 0.9164746543778802, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env_macro_mean": 0.9583333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9961222627737226, + "tokens_p.mean_in_band": 0.45390625, + "tokens_rate.above_band": 0.9647887323943662, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.035211267605633804 + }, + { + "epoch": 2.9271410311035364, + "grad_norm": 65.92616260040826, + "learning_rate": 3.3390283438987174e-07, + "loss": 0.1537, + "step": 13740, + "success_rate.epoch.env.abd": 0.9872773536895675, + "success_rate.epoch.env.agentgym:alfworld": 0.8709677419354839, + "success_rate.epoch.env.agentgym:sciworld": 0.9709090909090909, + "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, + "success_rate.epoch.env.babyai": 0.9791666666666666, + "success_rate.epoch.env.ded": 0.946969696969697, + "success_rate.epoch.env.logic": 0.9012539184952978, + "success_rate.epoch.env.math": 0.9756493506493507, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.8759553203997649, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.874874372779874, + "success_rate.epoch.global": 0.9165707710011507, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9995591692789969, + "tokens_p.mean_in_band": 0.6796875, + "tokens_rate.above_band": 0.9984350547730829, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.001564945226917058 + }, + { + "epoch": 2.9282062207072856, + "grad_norm": 50.79706360580214, + "learning_rate": 3.338769650423813e-07, + "loss": 0.3547, + "step": 13745, + "success_rate.epoch.env.abd": 0.9872773536895675, + "success_rate.epoch.env.agentgym:alfworld": 0.8714285714285714, + "success_rate.epoch.env.agentgym:sciworld": 0.9709090909090909, + "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, + "success_rate.epoch.env.babyai": 0.9791666666666666, + "success_rate.epoch.env.ded": 0.946969696969697, + "success_rate.epoch.env.logic": 0.9013312451057165, + "success_rate.epoch.env.math": 0.9756986634264885, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.876064610866373, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8749377145387153, + "success_rate.epoch.global": 0.9166666666666666, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9973312672176309, + "tokens_p.mean_in_band": 0.83203125, + "tokens_rate.above_band": 0.9972527472527473, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0027472527472527475 + }, + { + "epoch": 2.9292714103110353, + "grad_norm": 52.56713490968818, + "learning_rate": 3.3385111473317113e-07, + "loss": 0.2486, + "step": 13750, + "success_rate.epoch.env.abd": 0.9872773536895675, + "success_rate.epoch.env.agentgym:alfworld": 0.8718861209964412, + "success_rate.epoch.env.agentgym:sciworld": 0.9709090909090909, + "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, + "success_rate.epoch.env.babyai": 0.9791666666666666, + "success_rate.epoch.env.ded": 0.947103274559194, + "success_rate.epoch.env.logic": 0.9014084507042254, + "success_rate.epoch.env.math": 0.9757281553398058, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.8762100322675271, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8750143732723831, + "success_rate.epoch.global": 0.9167623421354765, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9985214194373402, + "tokens_p.mean_in_band": 0.763671875, + "tokens_rate.above_band": 0.9974489795918368, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.002551020408163265 + }, + { + "epoch": 2.930336599914785, + "grad_norm": 84.65153791720067, + "learning_rate": 3.3382528348108063e-07, + "loss": 0.2564, + "step": 13755, + "success_rate.epoch.env.abd": 0.9873096446700508, + "success_rate.epoch.env.agentgym:alfworld": 0.8723404255319149, + "success_rate.epoch.env.agentgym:sciworld": 0.9709090909090909, + "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, + "success_rate.epoch.env.babyai": 0.9791666666666666, + "success_rate.epoch.env.ded": 0.947103274559194, + "success_rate.epoch.env.logic": 0.9015625, + "success_rate.epoch.env.math": 0.9757575757575757, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.8760257913247362, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8750585391166296, + "success_rate.epoch.global": 0.916743119266055, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9976190476190476, + "tokens_p.mean_in_band": 0.6766493055555556, + "tokens_rate.above_band": 0.9790209790209791, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02097902097902098 + }, + { + "epoch": 2.9314017895185343, + "grad_norm": 180.65384693970705, + "learning_rate": 3.337994713049354e-07, + "loss": 0.1197, + "step": 13760, + "success_rate.epoch.env.abd": 0.9873417721518988, + "success_rate.epoch.env.agentgym:alfworld": 0.8723404255319149, + "success_rate.epoch.env.agentgym:sciworld": 0.9709090909090909, + "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, + "success_rate.epoch.env.babyai": 0.9791666666666666, + "success_rate.epoch.env.ded": 0.9472361809045227, + "success_rate.epoch.env.logic": 0.9017160686427457, + "success_rate.epoch.env.math": 0.9757771497779572, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.8761709601873536, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8751024796032588, + "success_rate.epoch.global": 0.9168384879725086, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9990808823529411, + "tokens_p.mean_in_band": 0.83203125, + "tokens_rate.above_band": 0.9920424403183024, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.007957559681697613 + }, + { + "epoch": 2.9324669791222835, + "grad_norm": 213.7760159704575, + "learning_rate": 3.337736782235472e-07, + "loss": 0.1954, + "step": 13765, + "success_rate.epoch.env.abd": 0.9873417721518988, + "success_rate.epoch.env.agentgym:alfworld": 0.8723404255319149, + "success_rate.epoch.env.agentgym:sciworld": 0.967391304347826, + "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, + "success_rate.epoch.env.babyai": 0.9791666666666666, + "success_rate.epoch.env.ded": 0.9472361809045227, + "success_rate.epoch.env.logic": 0.9018691588785047, + "success_rate.epoch.env.math": 0.9758064516129032, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.8762796139221994, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8748091395345575, + "success_rate.epoch.global": 0.9168097036274173, + "success_rate.window.env.agentgym:sciworld": 0.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.75, + "success_rate.window.global": 0.8888888888888888, + "tokens_p.mean_above_band": 0.9968525179856115, + "tokens_p.mean_in_band": 0.435546875, + "tokens_rate.above_band": 0.9928571428571429, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.007142857142857143 + }, + { + "epoch": 2.933532168726033, + "grad_norm": 179.1763456015147, + "learning_rate": 3.337479042557138e-07, + "loss": 0.1132, + "step": 13770, + "success_rate.epoch.env.abd": 0.9873417721518988, + "success_rate.epoch.env.agentgym:alfworld": 0.872791519434629, + "success_rate.epoch.env.agentgym:sciworld": 0.9675090252707581, + "success_rate.epoch.env.agentgym:textcraft": 0.9827586206896551, + "success_rate.epoch.env.babyai": 0.9791666666666666, + "success_rate.epoch.env.ded": 0.9472361809045227, + "success_rate.epoch.env.logic": 0.9019455252918288, + "success_rate.epoch.env.math": 0.9754131398629585, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.8764602803738317, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8748759591963245, + "success_rate.epoch.global": 0.9167904903417533, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 0.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8333333333333334, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.99875, + "tokens_p.mean_in_band": 0.6302083333333334, + "tokens_rate.above_band": 0.9749303621169917, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.025069637883008356 + }, + { + "epoch": 2.934597358329783, + "grad_norm": 44.670808139520055, + "learning_rate": 3.337221494202192e-07, + "loss": 0.1466, + "step": 13775, + "success_rate.epoch.env.abd": 0.9873417721518988, + "success_rate.epoch.env.agentgym:alfworld": 0.872791519434629, + "success_rate.epoch.env.agentgym:sciworld": 0.9675090252707581, + "success_rate.epoch.env.agentgym:textcraft": 0.9827586206896551, + "success_rate.epoch.env.babyai": 0.9791666666666666, + "success_rate.epoch.env.ded": 0.9472361809045227, + "success_rate.epoch.env.logic": 0.9021739130434783, + "success_rate.epoch.env.math": 0.9754428341384863, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.8763127187864644, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8748860065090345, + "success_rate.epoch.global": 0.9167713209270465, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.992445054945055, + "tokens_p.mean_in_band": 0.5966796875, + "tokens_rate.above_band": 0.9191919191919192, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.08080808080808081 + }, + { + "epoch": 2.935662547933532, + "grad_norm": 0.0, + "learning_rate": 3.3369641373583323e-07, + "loss": 0.3327, + "step": 13780, + "success_rate.epoch.env.abd": 0.9873417721518988, + "success_rate.epoch.env.agentgym:alfworld": 0.872791519434629, + "success_rate.epoch.env.agentgym:sciworld": 0.9675090252707581, + "success_rate.epoch.env.agentgym:textcraft": 0.9827586206896551, + "success_rate.epoch.env.babyai": 0.9791666666666666, + "success_rate.epoch.env.ded": 0.9473684210526315, + "success_rate.epoch.env.logic": 0.9024012393493416, + "success_rate.epoch.env.math": 0.9754823151125402, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.8763848396501458, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8749288399900987, + "success_rate.epoch.global": 0.9168662333219295, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9986489661654135, + "tokens_p.mean_in_band": 0.8645833333333334, + "tokens_rate.above_band": 0.994392523364486, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.005607476635514018 + }, + { + "epoch": 2.936727737537282, + "grad_norm": 73.69747390381193, + "learning_rate": 3.3367069722131185e-07, + "loss": 0.1716, + "step": 13785, + "success_rate.epoch.env.abd": 0.9873417721518988, + "success_rate.epoch.env.agentgym:alfworld": 0.872791519434629, + "success_rate.epoch.env.agentgym:sciworld": 0.9675090252707581, + "success_rate.epoch.env.agentgym:textcraft": 0.9827586206896551, + "success_rate.epoch.env.babyai": 0.9791666666666666, + "success_rate.epoch.env.ded": 0.9473684210526315, + "success_rate.epoch.env.logic": 0.9025522041763341, + "success_rate.epoch.env.math": 0.9755020080321285, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.8763096623981373, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8749375200350599, + "success_rate.epoch.global": 0.9168470213008315, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env_macro_mean": 0.9444444444444445, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9915081521739131, + "tokens_p.mean_in_band": 0.51171875, + "tokens_rate.above_band": 0.9787234042553191, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02127659574468085 + }, + { + "epoch": 2.937792927141031, + "grad_norm": 116.86833715647312, + "learning_rate": 3.3364499989539725e-07, + "loss": 0.2378, + "step": 13790, + "success_rate.epoch.env.abd": 0.9873417721518988, + "success_rate.epoch.env.agentgym:alfworld": 0.872791519434629, + "success_rate.epoch.env.agentgym:sciworld": 0.9675090252707581, + "success_rate.epoch.env.agentgym:textcraft": 0.9827586206896551, + "success_rate.epoch.env.babyai": 0.9795918367346939, + "success_rate.epoch.env.ded": 0.9473684210526315, + "success_rate.epoch.env.logic": 0.9026275115919629, + "success_rate.epoch.env.math": 0.9755314881668672, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.8764893926184248, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8750020371113038, + "success_rate.epoch.global": 0.9169416315849357, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9958333333333333, + "tokens_p.mean_in_band": 0.4609375, + "tokens_rate.above_band": 0.9905660377358491, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.009433962264150943 + }, + { + "epoch": 2.9388581167447807, + "grad_norm": 89.84531203150478, + "learning_rate": 3.336193217768172e-07, + "loss": 0.1752, + "step": 13795, + "success_rate.epoch.env.abd": 0.9873417721518988, + "success_rate.epoch.env.agentgym:alfworld": 0.8732394366197183, + "success_rate.epoch.env.agentgym:sciworld": 0.9676258992805755, + "success_rate.epoch.env.agentgym:textcraft": 0.9827586206896551, + "success_rate.epoch.env.babyai": 0.9795918367346939, + "success_rate.epoch.env.ded": 0.9473684210526315, + "success_rate.epoch.env.logic": 0.9026275115919629, + "success_rate.epoch.env.math": 0.9755804643714971, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.8765969802555168, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8750676148419063, + "success_rate.epoch.global": 0.9170360268212296, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9964398734177216, + "tokens_p.mean_in_band": 0.703125, + "tokens_rate.above_band": 0.9974747474747475, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0025252525252525255 + }, + { + "epoch": 2.93992330634853, + "grad_norm": 581.6091059065415, + "learning_rate": 3.3359366288428594e-07, + "loss": 0.1664, + "step": 13800, + "success_rate.epoch.env.abd": 0.9874055415617129, + "success_rate.epoch.env.agentgym:alfworld": 0.8732394366197183, + "success_rate.epoch.env.agentgym:sciworld": 0.9676258992805755, + "success_rate.epoch.env.agentgym:textcraft": 0.9827586206896551, + "success_rate.epoch.env.babyai": 0.9795918367346939, + "success_rate.epoch.env.ded": 0.9475, + "success_rate.epoch.env.logic": 0.9028527370855821, + "success_rate.epoch.env.math": 0.9755804643714971, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.8764501160092807, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8750924975332304, + "success_rate.epoch.global": 0.9170166874787149, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9375, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9978813559322034, + "tokens_p.mean_in_band": 0.6555989583333334, + "tokens_rate.above_band": 0.9874476987447699, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.012552301255230125 + }, + { + "epoch": 2.9409884959522796, + "grad_norm": 361.8161057373051, + "learning_rate": 3.335680232365034e-07, + "loss": 0.1848, + "step": 13805, + "success_rate.epoch.env.abd": 0.9874055415617129, + "success_rate.epoch.env.agentgym:alfworld": 0.8732394366197183, + "success_rate.epoch.env.agentgym:sciworld": 0.9676258992805755, + "success_rate.epoch.env.agentgym:textcraft": 0.9827586206896551, + "success_rate.epoch.env.babyai": 0.9795918367346939, + "success_rate.epoch.env.ded": 0.9476309226932669, + "success_rate.epoch.env.logic": 0.9023827824750192, + "success_rate.epoch.env.math": 0.975609756097561, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.876231884057971, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8750445000657265, + "success_rate.epoch.global": 0.9168840004535662, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.75, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.8125, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9980908360128617, + "tokens_p.mean_in_band": 0.6748621323529411, + "tokens_rate.above_band": 0.97339593114241, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.026604068857589983 + }, + { + "epoch": 2.942053685556029, + "grad_norm": 23.612774643333683, + "learning_rate": 3.335424028521556e-07, + "loss": 0.1494, + "step": 13810, + "success_rate.epoch.env.abd": 0.9874055415617129, + "success_rate.epoch.env.agentgym:alfworld": 0.8732394366197183, + "success_rate.epoch.env.agentgym:sciworld": 0.9676258992805755, + "success_rate.epoch.env.agentgym:textcraft": 0.9827586206896551, + "success_rate.epoch.env.babyai": 0.9795918367346939, + "success_rate.epoch.env.ded": 0.9476309226932669, + "success_rate.epoch.env.logic": 0.9026073619631901, + "success_rate.epoch.env.math": 0.9756389776357828, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.8763752171395484, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8750806031664511, + "success_rate.epoch.global": 0.9169781402197305, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9912050898203593, + "tokens_p.mean_in_band": 0.7924107142857143, + "tokens_rate.above_band": 0.9226519337016574, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.07734806629834254 + }, + { + "epoch": 2.9431188751597785, + "grad_norm": 253.6609624716674, + "learning_rate": 3.3351680174991445e-07, + "loss": 0.1918, + "step": 13815, + "success_rate.epoch.env.abd": 0.9874055415617129, + "success_rate.epoch.env.agentgym:alfworld": 0.8736842105263158, + "success_rate.epoch.env.agentgym:sciworld": 0.9676258992805755, + "success_rate.epoch.env.agentgym:textcraft": 0.9827586206896551, + "success_rate.epoch.env.babyai": 0.9795918367346939, + "success_rate.epoch.env.ded": 0.9478908188585607, + "success_rate.epoch.env.logic": 0.9026819923371647, + "success_rate.epoch.env.math": 0.9756778309409888, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.8764467592592593, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8751614846092495, + "success_rate.epoch.global": 0.9170720669759023, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.999875745526839, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 2.944184064763528, + "grad_norm": 118.37897388650002, + "learning_rate": 3.3349121994843786e-07, + "loss": 0.3028, + "step": 13820, + "success_rate.epoch.env.abd": 0.9874371859296482, + "success_rate.epoch.env.agentgym:alfworld": 0.8741258741258742, + "success_rate.epoch.env.agentgym:sciworld": 0.9676258992805755, + "success_rate.epoch.env.agentgym:textcraft": 0.9827586206896551, + "success_rate.epoch.env.babyai": 0.9795918367346939, + "success_rate.epoch.env.ded": 0.9478908188585607, + "success_rate.epoch.env.logic": 0.9029051987767585, + "success_rate.epoch.env.math": 0.9757068896853843, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.876229034123771, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8752076526107037, + "success_rate.epoch.global": 0.9170527743247825, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.9, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9962837837837838, + "tokens_p.mean_in_band": 0.486083984375, + "tokens_rate.above_band": 0.9585492227979274, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.04145077720207254 + }, + { + "epoch": 2.9452492543672775, + "grad_norm": 81.81235166389844, + "learning_rate": 3.3346565746636967e-07, + "loss": 0.1684, + "step": 13825, + "success_rate.epoch.env.abd": 0.9874371859296482, + "success_rate.epoch.env.agentgym:alfworld": 0.8741258741258742, + "success_rate.epoch.env.agentgym:sciworld": 0.9676258992805755, + "success_rate.epoch.env.agentgym:textcraft": 0.9827586206896551, + "success_rate.epoch.env.babyai": 0.9795918367346939, + "success_rate.epoch.env.ded": 0.9478908188585607, + "success_rate.epoch.env.logic": 0.9029051987767585, + "success_rate.epoch.env.math": 0.9757358790771679, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.8764790764790765, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8752330191331663, + "success_rate.epoch.global": 0.9171464047860932, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9902522935779816, + "tokens_p.mean_in_band": 0.880859375, + "tokens_rate.above_band": 0.9819819819819819, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.018018018018018018 + }, + { + "epoch": 2.9463144439710267, + "grad_norm": 61.626123211632304, + "learning_rate": 3.3344011432233965e-07, + "loss": 0.255, + "step": 13830, + "success_rate.epoch.env.abd": 0.9874686716791979, + "success_rate.epoch.env.agentgym:alfworld": 0.8741258741258742, + "success_rate.epoch.env.agentgym:sciworld": 0.967741935483871, + "success_rate.epoch.env.agentgym:textcraft": 0.9827586206896551, + "success_rate.epoch.env.babyai": 0.9795918367346939, + "success_rate.epoch.env.ded": 0.948019801980198, + "success_rate.epoch.env.logic": 0.9029793735676088, + "success_rate.epoch.env.math": 0.9757647993643226, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.8762975778546713, + "success_rate.epoch.env.webshop": 0.9791666666666666, + "success_rate.epoch.env_macro_mean": 0.8752510283629921, + "success_rate.epoch.global": 0.9171270718232044, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9444444444444443, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 1.0001511487303507, + "tokens_p.mean_in_band": 0.5615234375, + "tokens_rate.above_band": 0.9975874547647768, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0024125452352231603 + }, + { + "epoch": 2.9473796335747764, + "grad_norm": 70.16975958078042, + "learning_rate": 3.3341459053496345e-07, + "loss": 0.2641, + "step": 13835, + "success_rate.epoch.env.abd": 0.9874686716791979, + "success_rate.epoch.env.agentgym:alfworld": 0.8741258741258742, + "success_rate.epoch.env.agentgym:sciworld": 0.967741935483871, + "success_rate.epoch.env.agentgym:textcraft": 0.9827586206896551, + "success_rate.epoch.env.babyai": 0.9795918367346939, + "success_rate.epoch.env.ded": 0.9481481481481482, + "success_rate.epoch.env.logic": 0.9031273836765827, + "success_rate.epoch.env.math": 0.9758032526775089, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.8763688760806917, + "success_rate.epoch.env.webshop": 0.9795918367346939, + "success_rate.epoch.env_macro_mean": 0.8753247808979154, + "success_rate.epoch.global": 0.9172204077035703, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.999366224648986, + "tokens_p.mean_in_band": 0.7734375, + "tokens_rate.above_band": 0.9968895800933126, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.003110419906687403 + }, + { + "epoch": 2.9484448231785256, + "grad_norm": 136.18511815266748, + "learning_rate": 3.3338908612284266e-07, + "loss": 0.3768, + "step": 13840, + "success_rate.epoch.env.abd": 0.9874686716791979, + "success_rate.epoch.env.agentgym:alfworld": 0.8741258741258742, + "success_rate.epoch.env.agentgym:sciworld": 0.967741935483871, + "success_rate.epoch.env.agentgym:textcraft": 0.9827586206896551, + "success_rate.epoch.env.babyai": 0.9795918367346939, + "success_rate.epoch.env.ded": 0.9484029484029484, + "success_rate.epoch.env.logic": 0.9025875190258752, + "success_rate.epoch.env.math": 0.9758224336107808, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.8761877339475957, + "success_rate.epoch.env.webshop": 0.9795918367346939, + "success_rate.epoch.env_macro_mean": 0.8752841422073945, + "success_rate.epoch.global": 0.9170885363932951, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.6666666666666666, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.8333333333333333, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9996675531914894, + "tokens_p.mean_in_band": 0.6350740131578947, + "tokens_rate.above_band": 0.983435047951177, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.016564952048823016 + }, + { + "epoch": 2.9495100127822753, + "grad_norm": 199.43719178929103, + "learning_rate": 3.3336360110456463e-07, + "loss": 0.1873, + "step": 13845, + "success_rate.epoch.env.abd": 0.9875311720698254, + "success_rate.epoch.env.agentgym:alfworld": 0.8745644599303136, + "success_rate.epoch.env.agentgym:sciworld": 0.9678571428571429, + "success_rate.epoch.env.agentgym:textcraft": 0.9827586206896551, + "success_rate.epoch.env.babyai": 0.9795918367346939, + "success_rate.epoch.env.ded": 0.9484029484029484, + "success_rate.epoch.env.logic": 0.9027355623100304, + "success_rate.epoch.env.math": 0.9758320126782885, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.8762945914844649, + "success_rate.epoch.env.webshop": 0.9795918367346939, + "success_rate.epoch.env_macro_mean": 0.8753642125216554, + "success_rate.epoch.global": 0.9171817058096415, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9992579155672823, + "tokens_p.mean_in_band": 0.74609375, + "tokens_rate.above_band": 0.9973684210526316, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.002631578947368421 + }, + { + "epoch": 2.9505752023860246, + "grad_norm": 172.9738618836053, + "learning_rate": 3.3333813549870267e-07, + "loss": 0.1522, + "step": 13850, + "success_rate.epoch.env.abd": 0.9875311720698254, + "success_rate.epoch.env.agentgym:alfworld": 0.8745644599303136, + "success_rate.epoch.env.agentgym:sciworld": 0.9679715302491103, + "success_rate.epoch.env.agentgym:textcraft": 0.9827586206896551, + "success_rate.epoch.env.babyai": 0.9795918367346939, + "success_rate.epoch.env.ded": 0.9484029484029484, + "success_rate.epoch.env.logic": 0.9028094153378892, + "success_rate.epoch.env.math": 0.9758797943851325, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.8764012647312446, + "success_rate.epoch.env.webshop": 0.9795918367346939, + "success_rate.epoch.env_macro_mean": 0.8753953666465144, + "success_rate.epoch.global": 0.9172746660680211, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.997362012987013, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 2.9516403919897742, + "grad_norm": 125.43237331738564, + "learning_rate": 3.33312689323816e-07, + "loss": 0.1716, + "step": 13855, + "success_rate.epoch.env.abd": 0.9875311720698254, + "success_rate.epoch.env.agentgym:alfworld": 0.875, + "success_rate.epoch.env.agentgym:sciworld": 0.9679715302491103, + "success_rate.epoch.env.agentgym:textcraft": 0.9830508474576272, + "success_rate.epoch.env.babyai": 0.9795918367346939, + "success_rate.epoch.env.ded": 0.9485294117647058, + "success_rate.epoch.env.logic": 0.9028094153378892, + "success_rate.epoch.env.math": 0.975898854207823, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.8765432098765432, + "success_rate.epoch.env.webshop": 0.9795918367346939, + "success_rate.epoch.env_macro_mean": 0.8754876607526423, + "success_rate.epoch.global": 0.9173581520520296, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9980612998522895, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 2.9527055815935235, + "grad_norm": 52.94560375595468, + "learning_rate": 3.332872625984496e-07, + "loss": 0.1934, + "step": 13860, + "success_rate.epoch.env.abd": 0.9875621890547264, + "success_rate.epoch.env.agentgym:alfworld": 0.8754325259515571, + "success_rate.epoch.env.agentgym:sciworld": 0.9680851063829787, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.9795918367346939, + "success_rate.epoch.env.ded": 0.948780487804878, + "success_rate.epoch.env.logic": 0.9028094153378892, + "success_rate.epoch.env.math": 0.9759178839320963, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.8765786452353617, + "success_rate.epoch.env.webshop": 0.9795918367346939, + "success_rate.epoch.env_macro_mean": 0.875618421672007, + "success_rate.epoch.global": 0.9174507168458781, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9987322515212982, + "tokens_p.mean_in_band": 0.8606770833333334, + "tokens_rate.above_band": 0.9979757085020243, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0020242914979757085 + }, + { + "epoch": 2.953770771197273, + "grad_norm": 189.57117936015476, + "learning_rate": 3.332618553411342e-07, + "loss": 0.3331, + "step": 13865, + "success_rate.epoch.env.abd": 0.9875621890547264, + "success_rate.epoch.env.agentgym:alfworld": 0.8754325259515571, + "success_rate.epoch.env.agentgym:sciworld": 0.9683098591549296, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.9795918367346939, + "success_rate.epoch.env.ded": 0.948905109489051, + "success_rate.epoch.env.logic": 0.9028831562974203, + "success_rate.epoch.env.math": 0.9759368836291913, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.8764334862385321, + "success_rate.epoch.env.webshop": 0.9795918367346939, + "success_rate.epoch.env_macro_mean": 0.8756454176825451, + "success_rate.epoch.global": 0.9174311926605505, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.95, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 1.0002334578441836, + "tokens_p.mean_in_band": 0.6434151785714286, + "tokens_rate.above_band": 0.9852786540483701, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.014721345951629864 + }, + { + "epoch": 2.954835960801023, + "grad_norm": 80.79133231660975, + "learning_rate": 3.3323646757038646e-07, + "loss": 0.1763, + "step": 13870, + "success_rate.epoch.env.abd": 0.9875621890547264, + "success_rate.epoch.env.agentgym:alfworld": 0.8754325259515571, + "success_rate.epoch.env.agentgym:sciworld": 0.9685314685314685, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.9795918367346939, + "success_rate.epoch.env.ded": 0.948905109489051, + "success_rate.epoch.env.logic": 0.9029567854435178, + "success_rate.epoch.env.math": 0.9759747932256794, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.8765042979942693, + "success_rate.epoch.env.webshop": 0.98, + "success_rate.epoch.env_macro_mean": 0.8757192470589241, + "success_rate.epoch.global": 0.9175234689316049, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9990808823529411, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 2.955901150404772, + "grad_norm": 141.87967115918724, + "learning_rate": 3.332110993047089e-07, + "loss": 0.1314, + "step": 13875, + "success_rate.epoch.env.abd": 0.9875621890547264, + "success_rate.epoch.env.agentgym:alfworld": 0.8724137931034482, + "success_rate.epoch.env.agentgym:sciworld": 0.9686411149825784, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.9795918367346939, + "success_rate.epoch.env.ded": 0.948905109489051, + "success_rate.epoch.env.logic": 0.903177004538578, + "success_rate.epoch.env.math": 0.9760125835627212, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.876539673446004, + "success_rate.epoch.env.webshop": 0.98, + "success_rate.epoch.env_macro_mean": 0.8754814560122729, + "success_rate.epoch.global": 0.9175039071221255, + "success_rate.window.env.agentgym:alfworld": 0.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9971649484536083, + "tokens_p.mean_in_band": 0.466796875, + "tokens_rate.above_band": 0.9979423868312757, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.00205761316872428 + }, + { + "epoch": 2.9569663400085213, + "grad_norm": 0.0, + "learning_rate": 3.331857505625896e-07, + "loss": 0.1862, + "step": 13880, + "success_rate.epoch.env.abd": 0.9875621890547264, + "success_rate.epoch.env.agentgym:alfworld": 0.8724137931034482, + "success_rate.epoch.env.agentgym:sciworld": 0.96875, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.9795918367346939, + "success_rate.epoch.env.ded": 0.948905109489051, + "success_rate.epoch.env.logic": 0.9025679758308157, + "success_rate.epoch.env.math": 0.9760408483896308, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.8766809728183119, + "success_rate.epoch.env.webshop": 0.98, + "success_rate.epoch.env_macro_mean": 0.8754514033312618, + "success_rate.epoch.global": 0.9174843889384479, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.875, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.999406067251462, + "tokens_p.mean_in_band": 0.44047619047619047, + "tokens_rate.above_band": 0.9702127659574468, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.029787234042553193 + }, + { + "epoch": 2.958031529612271, + "grad_norm": 0.0, + "learning_rate": 3.331604213625026e-07, + "loss": 0.2228, + "step": 13885, + "success_rate.epoch.env.abd": 0.9875621890547264, + "success_rate.epoch.env.agentgym:alfworld": 0.872852233676976, + "success_rate.epoch.env.agentgym:sciworld": 0.96875, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.9795918367346939, + "success_rate.epoch.env.ded": 0.948905109489051, + "success_rate.epoch.env.logic": 0.9027882441597589, + "success_rate.epoch.env.math": 0.9760690466849745, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.8767867352773013, + "success_rate.epoch.env.webshop": 0.98, + "success_rate.epoch.env_macro_mean": 0.8755234642091531, + "success_rate.epoch.global": 0.9175762976163956, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9993640988372093, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 2.9590967192160207, + "grad_norm": 387.4544287097135, + "learning_rate": 3.331351117229077e-07, + "loss": 0.3024, + "step": 13890, + "success_rate.epoch.env.abd": 0.9875621890547264, + "success_rate.epoch.env.agentgym:alfworld": 0.872852233676976, + "success_rate.epoch.env.agentgym:sciworld": 0.96875, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.9795918367346939, + "success_rate.epoch.env.ded": 0.948905109489051, + "success_rate.epoch.env.logic": 0.9028614457831325, + "success_rate.epoch.env.math": 0.9760971786833855, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.8761415525114156, + "success_rate.epoch.env.webshop": 0.98, + "success_rate.epoch.env_macro_mean": 0.8754740233778712, + "success_rate.epoch.global": 0.9173342234089897, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.8333333333333334, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9846938775510204, + "tokens_p.mean_in_band": 0.491455078125, + "tokens_rate.above_band": 0.8596491228070176, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.14035087719298245 + }, + { + "epoch": 2.96016190881977, + "grad_norm": 71.45807436402674, + "learning_rate": 3.331098216622503e-07, + "loss": 0.1819, + "step": 13895, + "success_rate.epoch.env.abd": 0.9875621890547264, + "success_rate.epoch.env.agentgym:alfworld": 0.872852233676976, + "success_rate.epoch.env.agentgym:sciworld": 0.96875, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.9795918367346939, + "success_rate.epoch.env.ded": 0.948905109489051, + "success_rate.epoch.env.logic": 0.9029345372460497, + "success_rate.epoch.env.math": 0.9761252446183953, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.8760683760683761, + "success_rate.epoch.env.webshop": 0.98, + "success_rate.epoch.env_macro_mean": 0.8754765671010428, + "success_rate.epoch.global": 0.917314958879751, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env_macro_mean": 0.9444444444444445, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9921875, + "tokens_p.mean_in_band": 0.640625, + "tokens_rate.above_band": 0.9629629629629629, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.037037037037037035 + }, + { + "epoch": 2.961227098423519, + "grad_norm": 135.40746736484348, + "learning_rate": 3.3308455119896164e-07, + "loss": 0.2931, + "step": 13900, + "success_rate.epoch.env.abd": 0.9875621890547264, + "success_rate.epoch.env.agentgym:alfworld": 0.872852233676976, + "success_rate.epoch.env.agentgym:sciworld": 0.9688581314878892, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.9795918367346939, + "success_rate.epoch.env.ded": 0.9490291262135923, + "success_rate.epoch.env.logic": 0.9030075187969925, + "success_rate.epoch.env.math": 0.9761439186546734, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.8759601706970128, + "success_rate.epoch.env.webshop": 0.98, + "success_rate.epoch.env_macro_mean": 0.8754961669581598, + "success_rate.epoch.global": 0.9172957371225577, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.96, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9986906424581006, + "tokens_p.mean_in_band": 0.7060546875, + "tokens_rate.above_band": 0.9675675675675676, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.032432432432432434 + }, + { + "epoch": 2.962292288027269, + "grad_norm": 38.92787264919413, + "learning_rate": 3.3305930035145863e-07, + "loss": 0.1717, + "step": 13905, + "success_rate.epoch.env.abd": 0.9875621890547264, + "success_rate.epoch.env.agentgym:alfworld": 0.872852233676976, + "success_rate.epoch.env.agentgym:sciworld": 0.9688581314878892, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.9795918367346939, + "success_rate.epoch.env.ded": 0.9490291262135923, + "success_rate.epoch.env.logic": 0.9031531531531531, + "success_rate.epoch.env.math": 0.976171875, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.8761363636363636, + "success_rate.epoch.env.webshop": 0.98, + "success_rate.epoch.env_macro_mean": 0.8755279654709632, + "success_rate.epoch.global": 0.917387447327567, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9936079545454546, + "tokens_p.mean_in_band": 0.732421875, + "tokens_rate.above_band": 0.9777777777777777, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.022222222222222223 + }, + { + "epoch": 2.9633574776310185, + "grad_norm": 54.696034723488836, + "learning_rate": 3.330340691381439e-07, + "loss": 0.2109, + "step": 13910, + "success_rate.epoch.env.abd": 0.9875621890547264, + "success_rate.epoch.env.agentgym:alfworld": 0.872852233676976, + "success_rate.epoch.env.agentgym:sciworld": 0.9688581314878892, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.9795918367346939, + "success_rate.epoch.env.ded": 0.9491525423728814, + "success_rate.epoch.env.logic": 0.9032258064516129, + "success_rate.epoch.env.math": 0.9762090483619345, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.8759931895573212, + "success_rate.epoch.env.webshop": 0.98, + "success_rate.epoch.env_macro_mean": 0.8755361535382943, + "success_rate.epoch.global": 0.9173681878599912, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9375, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.998857973421927, + "tokens_p.mean_in_band": 0.3356119791666667, + "tokens_rate.above_band": 0.9901315789473685, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.009868421052631578 + }, + { + "epoch": 2.9644226672347678, + "grad_norm": 165.36439275421932, + "learning_rate": 3.330088575774058e-07, + "loss": 0.244, + "step": 13915, + "success_rate.epoch.env.abd": 0.9875930521091811, + "success_rate.epoch.env.agentgym:alfworld": 0.872852233676976, + "success_rate.epoch.env.agentgym:sciworld": 0.9688581314878892, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.9795918367346939, + "success_rate.epoch.env.ded": 0.9491525423728814, + "success_rate.epoch.env.logic": 0.9032983508245878, + "success_rate.epoch.env.math": 0.9762461059190031, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.8758503401360545, + "success_rate.epoch.env.webshop": 0.98, + "success_rate.epoch.env_macro_mean": 0.8755359367713154, + "success_rate.epoch.global": 0.9173489710112857, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9375, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9983504398826979, + "tokens_p.mean_in_band": 0.4367897727272727, + "tokens_rate.above_band": 0.96875, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03125 + }, + { + "epoch": 2.965487856838517, + "grad_norm": 49.75839838282585, + "learning_rate": 3.3298366568761834e-07, + "loss": 0.17, + "step": 13920, + "success_rate.epoch.env.abd": 0.9875930521091811, + "success_rate.epoch.env.agentgym:alfworld": 0.872852233676976, + "success_rate.epoch.env.agentgym:sciworld": 0.9688581314878892, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.9795918367346939, + "success_rate.epoch.env.ded": 0.9492753623188406, + "success_rate.epoch.env.logic": 0.9034431137724551, + "success_rate.epoch.env.math": 0.9762645914396887, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.876026040192471, + "success_rate.epoch.env.webshop": 0.98, + "success_rate.epoch.env_macro_mean": 0.875577915723218, + "success_rate.epoch.global": 0.9174403183023873, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9955956375838926, + "tokens_p.mean_in_band": 0.763671875, + "tokens_rate.above_band": 0.9966555183946488, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0033444816053511705 + }, + { + "epoch": 2.9665530464422667, + "grad_norm": 264.4967429866051, + "learning_rate": 3.3295849348714117e-07, + "loss": 0.2432, + "step": 13925, + "success_rate.epoch.env.abd": 0.9876237623762376, + "success_rate.epoch.env.agentgym:alfworld": 0.872852233676976, + "success_rate.epoch.env.agentgym:sciworld": 0.9689655172413794, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.9795918367346939, + "success_rate.epoch.env.ded": 0.9492753623188406, + "success_rate.epoch.env.logic": 0.9035153328347045, + "success_rate.epoch.env.math": 0.9762830482115086, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.8756359525155455, + "success_rate.epoch.env.webshop": 0.98, + "success_rate.epoch.env_macro_mean": 0.8755632506484626, + "success_rate.epoch.global": 0.9173106646058733, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6, + "success_rate.window.env_macro_mean": 0.9199999999999999, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9942857142857143, + "tokens_p.mean_below_band": 1.3869794202037156e-11, + "tokens_p.mean_in_band": 0.7005208333333334, + "tokens_rate.above_band": 0.9615384615384616, + "tokens_rate.below_band": 0.005494505494505495, + "tokens_rate.in_band": 0.03296703296703297 + }, + { + "epoch": 2.9676182360460164, + "grad_norm": 113.52741488629844, + "learning_rate": 3.329333409943197e-07, + "loss": 0.1583, + "step": 13930, + "success_rate.epoch.env.abd": 0.9876237623762376, + "success_rate.epoch.env.agentgym:alfworld": 0.872852233676976, + "success_rate.epoch.env.agentgym:sciworld": 0.9690721649484536, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.9795918367346939, + "success_rate.epoch.env.ded": 0.9492753623188406, + "success_rate.epoch.env.logic": 0.9030574198359433, + "success_rate.epoch.env.math": 0.9763106796116505, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.8754237288135593, + "success_rate.epoch.env.webshop": 0.98, + "success_rate.epoch.env_macro_mean": 0.8755145363217779, + "success_rate.epoch.global": 0.9171812968681077, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 0.75, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.8125, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 1.0012608069164266, + "tokens_p.mean_in_band": 0.59765625, + "tokens_rate.above_band": 0.9914285714285714, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.008571428571428572 + }, + { + "epoch": 2.9686834256497656, + "grad_norm": 282.68784744982264, + "learning_rate": 3.329082082274847e-07, + "loss": 0.1042, + "step": 13935, + "success_rate.epoch.env.abd": 0.9876237623762376, + "success_rate.epoch.env.agentgym:alfworld": 0.8732876712328768, + "success_rate.epoch.env.agentgym:sciworld": 0.9691780821917808, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.9795918367346939, + "success_rate.epoch.env.ded": 0.9492753623188406, + "success_rate.epoch.env.logic": 0.9030574198359433, + "success_rate.epoch.env.math": 0.9763657497094149, + "success_rate.epoch.env.sat": 0.15384615384615385, + "success_rate.epoch.env.science": 0.8754940711462451, + "success_rate.epoch.env.webshop": 0.98, + "success_rate.epoch.env_macro_mean": 0.875575151524476, + "success_rate.epoch.global": 0.9172725269883234, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9981008287292817, + "tokens_p.mean_in_band": 0.84375, + "tokens_rate.above_band": 0.9945054945054945, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.005494505494505495 + }, + { + "epoch": 2.969748615253515, + "grad_norm": 155.6263380918352, + "learning_rate": 3.328830952049529e-07, + "loss": 0.2128, + "step": 13940, + "success_rate.epoch.env.abd": 0.9876237623762376, + "success_rate.epoch.env.agentgym:alfworld": 0.8737201365187713, + "success_rate.epoch.env.agentgym:sciworld": 0.9692832764505119, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.9795918367346939, + "success_rate.epoch.env.ded": 0.9493975903614458, + "success_rate.epoch.env.logic": 0.9024571854058079, + "success_rate.epoch.env.math": 0.9763931888544891, + "success_rate.epoch.env.sat": 0.1509433962264151, + "success_rate.epoch.env.science": 0.8755292125317528, + "success_rate.epoch.env.webshop": 0.98, + "success_rate.epoch.env_macro_mean": 0.8753223766215613, + "success_rate.epoch.global": 0.917143485915493, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.5, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.sat": 0.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.7857142857142857, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9988258670520231, + "tokens_p.mean_in_band": 0.5027173913043478, + "tokens_rate.above_band": 0.9575645756457565, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.042435424354243544 + }, + { + "epoch": 2.9708138048572645, + "grad_norm": 180.59142436740416, + "learning_rate": 3.328580019450265e-07, + "loss": 0.4607, + "step": 13945, + "success_rate.epoch.env.abd": 0.9876543209876543, + "success_rate.epoch.env.agentgym:alfworld": 0.8741496598639455, + "success_rate.epoch.env.agentgym:sciworld": 0.9692832764505119, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.9795918367346939, + "success_rate.epoch.env.ded": 0.9493975903614458, + "success_rate.epoch.env.logic": 0.9025297619047619, + "success_rate.epoch.env.math": 0.9764296754250387, + "success_rate.epoch.env.sat": 0.1509433962264151, + "success_rate.epoch.env.science": 0.8756345177664975, + "success_rate.epoch.env.webshop": 0.98, + "success_rate.epoch.env_macro_mean": 0.8753836902816375, + "success_rate.epoch.global": 0.9172345570455045, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.995697463768116, + "tokens_p.mean_in_band": 0.8020833333333334, + "tokens_rate.above_band": 0.9928057553956835, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.007194244604316547 + }, + { + "epoch": 2.9718789944610142, + "grad_norm": 25.457645429553807, + "learning_rate": 3.3283292846599314e-07, + "loss": 0.2456, + "step": 13950, + "success_rate.epoch.env.abd": 0.9876847290640394, + "success_rate.epoch.env.agentgym:alfworld": 0.8711864406779661, + "success_rate.epoch.env.agentgym:sciworld": 0.9692832764505119, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.9795918367346939, + "success_rate.epoch.env.ded": 0.9493975903614458, + "success_rate.epoch.env.logic": 0.9025297619047619, + "success_rate.epoch.env.math": 0.9764569664222308, + "success_rate.epoch.env.sat": 0.1509433962264151, + "success_rate.epoch.env.science": 0.8758096310898339, + "success_rate.epoch.env.webshop": 0.98, + "success_rate.epoch.env_macro_mean": 0.8751354714826316, + "success_rate.epoch.global": 0.917215634606939, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.75, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9974206349206349, + "tokens_p.mean_in_band": 0.79609375, + "tokens_rate.above_band": 0.984375, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.015625 + }, + { + "epoch": 2.9729441840647635, + "grad_norm": 168.49700040947678, + "learning_rate": 3.3280787478612635e-07, + "loss": 0.272, + "step": 13955, + "success_rate.epoch.env.abd": 0.9876847290640394, + "success_rate.epoch.env.agentgym:alfworld": 0.8711864406779661, + "success_rate.epoch.env.agentgym:sciworld": 0.9694915254237289, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.9795918367346939, + "success_rate.epoch.env.ded": 0.9493975903614458, + "success_rate.epoch.env.logic": 0.9026745913818722, + "success_rate.epoch.env.math": 0.976475125337447, + "success_rate.epoch.env.sat": 0.1509433962264151, + "success_rate.epoch.env.science": 0.8756680731364276, + "success_rate.epoch.env.webshop": 0.98, + "success_rate.epoch.env_macro_mean": 0.8751563514291895, + "success_rate.epoch.global": 0.9171967536740513, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9375, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9985504314994607, + "tokens_p.mean_in_band": 0.58046875, + "tokens_rate.above_band": 0.9946351931330472, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.00536480686695279 + }, + { + "epoch": 2.974009373668513, + "grad_norm": 52.453416326424474, + "learning_rate": 3.32782840923685e-07, + "loss": 0.183, + "step": 13960, + "success_rate.epoch.env.abd": 0.9876847290640394, + "success_rate.epoch.env.agentgym:alfworld": 0.8711864406779661, + "success_rate.epoch.env.agentgym:sciworld": 0.9694915254237289, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.9795918367346939, + "success_rate.epoch.env.ded": 0.9495192307692307, + "success_rate.epoch.env.logic": 0.9021497405485545, + "success_rate.epoch.env.math": 0.976502311248074, + "success_rate.epoch.env.sat": 0.1509433962264151, + "success_rate.epoch.env.science": 0.8752107925801011, + "success_rate.epoch.env.webshop": 0.98, + "success_rate.epoch.env_macro_mean": 0.8750805964227137, + "success_rate.epoch.global": 0.9169588080631026, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.6666666666666666, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.3333333333333333, + "success_rate.window.env_macro_mean": 0.75, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9976728723404256, + "tokens_p.mean_below_band": 5.3085386753082275e-08, + "tokens_p.mean_in_band": 0.6656494140625, + "tokens_rate.above_band": 0.9343936381709742, + "tokens_rate.below_band": 0.0019880715705765406, + "tokens_rate.in_band": 0.0636182902584493 + }, + { + "epoch": 2.9750745632722624, + "grad_norm": 66.7103815478627, + "learning_rate": 3.327578268969136e-07, + "loss": 0.1388, + "step": 13965, + "success_rate.epoch.env.abd": 0.9877149877149877, + "success_rate.epoch.env.agentgym:alfworld": 0.8711864406779661, + "success_rate.epoch.env.agentgym:sciworld": 0.9694915254237289, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.9795918367346939, + "success_rate.epoch.env.ded": 0.9495192307692307, + "success_rate.epoch.env.logic": 0.9021497405485545, + "success_rate.epoch.env.math": 0.9761538461538461, + "success_rate.epoch.env.sat": 0.1509433962264151, + "success_rate.epoch.env.science": 0.8753859107493686, + "success_rate.epoch.env.webshop": 0.98, + "success_rate.epoch.env_macro_mean": 0.8750675883978036, + "success_rate.epoch.global": 0.9169402495075509, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.math": 0.75, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9946209016393442, + "tokens_p.mean_in_band": 0.2890625, + "tokens_rate.above_band": 0.991869918699187, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.008130081300813009 + }, + { + "epoch": 2.976139752876012, + "grad_norm": 140.60219152502108, + "learning_rate": 3.327328327240421e-07, + "loss": 0.2018, + "step": 13970, + "success_rate.epoch.env.abd": 0.9877450980392157, + "success_rate.epoch.env.agentgym:alfworld": 0.8716216216216216, + "success_rate.epoch.env.agentgym:sciworld": 0.9694915254237289, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.9795918367346939, + "success_rate.epoch.env.ded": 0.9496402877697842, + "success_rate.epoch.env.logic": 0.9023668639053254, + "success_rate.epoch.env.math": 0.9761721752498078, + "success_rate.epoch.env.sat": 0.1509433962264151, + "success_rate.epoch.env.science": 0.8751402918069585, + "success_rate.epoch.env.webshop": 0.9803921568627451, + "success_rate.epoch.env_macro_mean": 0.8751556191833949, + "success_rate.epoch.global": 0.9169217315260166, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.0, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 0.8571428571428571, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9970695020746888, + "tokens_p.mean_in_band": 0.57421875, + "tokens_rate.above_band": 0.9934047815333883, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.006595218466611707 + }, + { + "epoch": 2.9772049424797613, + "grad_norm": 110.16844687673188, + "learning_rate": 3.327078584232863e-07, + "loss": 0.347, + "step": 13975, + "success_rate.epoch.env.abd": 0.9877450980392157, + "success_rate.epoch.env.agentgym:alfworld": 0.8716216216216216, + "success_rate.epoch.env.agentgym:sciworld": 0.9694915254237289, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.9795918367346939, + "success_rate.epoch.env.ded": 0.9496402877697842, + "success_rate.epoch.env.logic": 0.9025110782865583, + "success_rate.epoch.env.math": 0.9761813292354975, + "success_rate.epoch.env.sat": 0.1509433962264151, + "success_rate.epoch.env.science": 0.8751050126015122, + "success_rate.epoch.env.webshop": 0.9803921568627451, + "success_rate.epoch.env_macro_mean": 0.875166354561711, + "success_rate.epoch.global": 0.9169032539855864, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8571428571428571, + "success_rate.window.env_macro_mean": 0.9523809523809524, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9901620370370371, + "tokens_p.mean_in_band": 0.65625, + "tokens_rate.above_band": 0.9152542372881356, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0847457627118644 + }, + { + "epoch": 2.978270132083511, + "grad_norm": 55.896515646269314, + "learning_rate": 3.326829040128471e-07, + "loss": 0.311, + "step": 13980, + "success_rate.epoch.env.abd": 0.9877450980392157, + "success_rate.epoch.env.agentgym:alfworld": 0.8716216216216216, + "success_rate.epoch.env.agentgym:sciworld": 0.9694915254237289, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.9795918367346939, + "success_rate.epoch.env.ded": 0.9473684210526315, + "success_rate.epoch.env.logic": 0.9025110782865583, + "success_rate.epoch.env.math": 0.9762178749520521, + "success_rate.epoch.env.sat": 0.1509433962264151, + "success_rate.epoch.env.science": 0.8747203579418344, + "success_rate.epoch.env.webshop": 0.9803921568627451, + "success_rate.epoch.env_macro_mean": 0.8749281749562314, + "success_rate.epoch.global": 0.9166666666666666, + "success_rate.window.env.ded": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6, + "success_rate.window.env_macro_mean": 0.5333333333333333, + "success_rate.window.global": 0.7, + "tokens_p.mean_above_band": 0.9951923076923077, + "tokens_p.mean_below_band": 8.307397365570068e-07, + "tokens_p.mean_in_band": 0.47934027777777777, + "tokens_rate.above_band": 0.9534225424601133, + "tokens_rate.below_band": 0.0002573340195573855, + "tokens_rate.in_band": 0.04632012352032939 + }, + { + "epoch": 2.9793353216872602, + "grad_norm": 68.14596240797243, + "learning_rate": 3.3265796951091117e-07, + "loss": 0.1403, + "step": 13985, + "success_rate.epoch.env.abd": 0.9877450980392157, + "success_rate.epoch.env.agentgym:alfworld": 0.8716216216216216, + "success_rate.epoch.env.agentgym:sciworld": 0.9694915254237289, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.9795918367346939, + "success_rate.epoch.env.ded": 0.9473684210526315, + "success_rate.epoch.env.logic": 0.9027982326951399, + "success_rate.epoch.env.math": 0.9762269938650306, + "success_rate.epoch.env.sat": 0.1509433962264151, + "success_rate.epoch.env.science": 0.874616029042167, + "success_rate.epoch.env.webshop": 0.9803921568627451, + "success_rate.epoch.env_macro_mean": 0.8749456244491306, + "success_rate.epoch.global": 0.9166485073000654, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9924568965517241, + "tokens_p.mean_in_band": 0.6927083333333334, + "tokens_rate.above_band": 0.8787878787878788, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.12121212121212122 + }, + { + "epoch": 2.98040051129101, + "grad_norm": 97.65722158025633, + "learning_rate": 3.326330549356505e-07, + "loss": 0.329, + "step": 13990, + "success_rate.epoch.env.abd": 0.9877450980392157, + "success_rate.epoch.env.agentgym:alfworld": 0.87248322147651, + "success_rate.epoch.env.agentgym:sciworld": 0.9694915254237289, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.9795918367346939, + "success_rate.epoch.env.ded": 0.9474940334128878, + "success_rate.epoch.env.logic": 0.9027982326951399, + "success_rate.epoch.env.math": 0.9762633996937213, + "success_rate.epoch.env.sat": 0.1509433962264151, + "success_rate.epoch.env.science": 0.8747209821428571, + "success_rate.epoch.env.webshop": 0.9803921568627451, + "success_rate.epoch.env_macro_mean": 0.8750482218259058, + "success_rate.epoch.global": 0.9167392250761863, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9984509056244042, + "tokens_p.mean_in_band": 0.7783203125, + "tokens_rate.above_band": 0.9924314096499527, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.007568590350047304 + }, + { + "epoch": 2.981465700894759, + "grad_norm": 40.76710571834433, + "learning_rate": 3.3260816030522277e-07, + "loss": 0.1395, + "step": 13995, + "success_rate.epoch.env.abd": 0.9877450980392157, + "success_rate.epoch.env.agentgym:alfworld": 0.87248322147651, + "success_rate.epoch.env.agentgym:sciworld": 0.9694915254237289, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.9795918367346939, + "success_rate.epoch.env.ded": 0.9474940334128878, + "success_rate.epoch.env.logic": 0.9030837004405287, + "success_rate.epoch.env.math": 0.9762815608263198, + "success_rate.epoch.env.sat": 0.1509433962264151, + "success_rate.epoch.env.science": 0.8745819397993311, + "success_rate.epoch.env.webshop": 0.9803921568627451, + "success_rate.epoch.env_macro_mean": 0.8750631842381295, + "success_rate.epoch.global": 0.9167210263100674, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9960443037974683, + "tokens_p.mean_in_band": 0.7552083333333334, + "tokens_rate.above_band": 0.9634146341463414, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.036585365853658534 + }, + { + "epoch": 2.982530890498509, + "grad_norm": 0.0, + "learning_rate": 3.32583285637771e-07, + "loss": 0.1259, + "step": 14000, + "success_rate.epoch.env.abd": 0.9877450980392157, + "success_rate.epoch.env.agentgym:alfworld": 0.87248322147651, + "success_rate.epoch.env.agentgym:sciworld": 0.9695945945945946, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.9795918367346939, + "success_rate.epoch.env.ded": 0.9474940334128878, + "success_rate.epoch.env.logic": 0.9031548055759354, + "success_rate.epoch.env.math": 0.9763358778625955, + "success_rate.epoch.env.sat": 0.1509433962264151, + "success_rate.epoch.env.science": 0.8746518105849582, + "success_rate.epoch.env.webshop": 0.9803921568627451, + "success_rate.epoch.env_macro_mean": 0.8750903080679635, + "success_rate.epoch.global": 0.9168114682884448, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9947615606936416, + "tokens_p.mean_in_band": 0.78173828125, + "tokens_rate.above_band": 0.9558011049723757, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.04419889502762431 + }, + { + "epoch": 2.983596080102258, + "grad_norm": 64.45447562635663, + "learning_rate": 3.325584309514236e-07, + "loss": 0.2547, + "step": 14005, + "success_rate.epoch.env.abd": 0.9877450980392157, + "success_rate.epoch.env.agentgym:alfworld": 0.87248322147651, + "success_rate.epoch.env.agentgym:sciworld": 0.9696969696969697, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.98, + "success_rate.epoch.env.ded": 0.9474940334128878, + "success_rate.epoch.env.logic": 0.9031548055759354, + "success_rate.epoch.env.math": 0.9763719512195121, + "success_rate.epoch.env.sat": 0.1509433962264151, + "success_rate.epoch.env.science": 0.8745130773511408, + "success_rate.epoch.env.webshop": 0.9803921568627451, + "success_rate.epoch.env_macro_mean": 0.8751273879307621, + "success_rate.epoch.global": 0.9167932306357127, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.75, + "success_rate.window.env_macro_mean": 0.9375, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9972067039106145, + "tokens_p.mean_in_band": 0.421875, + "tokens_rate.above_band": 0.9728260869565217, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02717391304347826 + }, + { + "epoch": 2.9846612697060078, + "grad_norm": 222.73755438116066, + "learning_rate": 3.3253359626429455e-07, + "loss": 0.2687, + "step": 14010, + "success_rate.epoch.env.abd": 0.9877450980392157, + "success_rate.epoch.env.agentgym:alfworld": 0.87248322147651, + "success_rate.epoch.env.agentgym:sciworld": 0.9696969696969697, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.98, + "success_rate.epoch.env.ded": 0.9476190476190476, + "success_rate.epoch.env.logic": 0.9031548055759354, + "success_rate.epoch.env.math": 0.9760456273764259, + "success_rate.epoch.env.sat": 0.1509433962264151, + "success_rate.epoch.env.science": 0.8743397275507367, + "success_rate.epoch.env.webshop": 0.9803921568627451, + "success_rate.epoch.env_macro_mean": 0.8750933279819137, + "success_rate.epoch.global": 0.9166666666666666, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.math": 0.8333333333333334, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.8333333333333334, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9971590909090909, + "tokens_p.mean_in_band": 0.5094401041666666, + "tokens_rate.above_band": 0.9540816326530612, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.04591836734693878 + }, + { + "epoch": 2.985726459309757, + "grad_norm": 32.947884218776494, + "learning_rate": 3.3250878159448317e-07, + "loss": 0.4146, + "step": 14015, + "success_rate.epoch.env.abd": 0.9877750611246944, + "success_rate.epoch.env.agentgym:alfworld": 0.87248322147651, + "success_rate.epoch.env.agentgym:sciworld": 0.9696969696969697, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.98, + "success_rate.epoch.env.ded": 0.9476190476190476, + "success_rate.epoch.env.logic": 0.9034381858083395, + "success_rate.epoch.env.math": 0.976063829787234, + "success_rate.epoch.env.sat": 0.1509433962264151, + "success_rate.epoch.env.science": 0.8741666666666666, + "success_rate.epoch.env.webshop": 0.9803921568627451, + "success_rate.epoch.env_macro_mean": 0.8751077356950611, + "success_rate.epoch.global": 0.9166486252435592, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9972826086956522, + "tokens_p.mean_in_band": 0.6493055555555556, + "tokens_rate.above_band": 0.9470588235294117, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.052941176470588235 + }, + { + "epoch": 2.9867916489135067, + "grad_norm": 399.25992896654066, + "learning_rate": 3.324839869600742e-07, + "loss": 0.3641, + "step": 14020, + "success_rate.epoch.env.abd": 0.9877750611246944, + "success_rate.epoch.env.agentgym:alfworld": 0.87248322147651, + "success_rate.epoch.env.agentgym:sciworld": 0.9698996655518395, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.98, + "success_rate.epoch.env.ded": 0.9476190476190476, + "success_rate.epoch.env.logic": 0.9034381858083395, + "success_rate.epoch.env.math": 0.9761092150170648, + "success_rate.epoch.env.sat": 0.1509433962264151, + "success_rate.epoch.env.science": 0.8739938939772411, + "success_rate.epoch.env.webshop": 0.9803921568627451, + "success_rate.epoch.env_macro_mean": 0.8751145819128133, + "success_rate.epoch.global": 0.9166306228373703, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.8888888888888888, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9981374172185431, + "tokens_p.mean_in_band": 0.6109375, + "tokens_rate.above_band": 0.967948717948718, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.03205128205128205 + }, + { + "epoch": 2.987856838517256, + "grad_norm": 60.66138842400301, + "learning_rate": 3.324592123791377e-07, + "loss": 0.2089, + "step": 14025, + "success_rate.epoch.env.abd": 0.9877750611246944, + "success_rate.epoch.env.agentgym:alfworld": 0.87248322147651, + "success_rate.epoch.env.agentgym:sciworld": 0.9698996655518395, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.98, + "success_rate.epoch.env.ded": 0.9476190476190476, + "success_rate.epoch.env.logic": 0.9035792549306063, + "success_rate.epoch.env.math": 0.9761273209549072, + "success_rate.epoch.env.sat": 0.1509433962264151, + "success_rate.epoch.env.science": 0.8739262953726794, + "success_rate.epoch.env.webshop": 0.9803921568627451, + "success_rate.epoch.env_macro_mean": 0.8751229070451358, + "success_rate.epoch.global": 0.9166126593216678, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8333333333333334, + "success_rate.window.env_macro_mean": 0.9444444444444445, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9963474025974026, + "tokens_p.mean_in_band": 0.64453125, + "tokens_rate.above_band": 0.9746835443037974, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02531645569620253 + }, + { + "epoch": 2.9889220281210056, + "grad_norm": 315.94223566805266, + "learning_rate": 3.3243445786972945e-07, + "loss": 0.204, + "step": 14030, + "success_rate.epoch.env.abd": 0.9878048780487805, + "success_rate.epoch.env.agentgym:alfworld": 0.87248322147651, + "success_rate.epoch.env.agentgym:sciworld": 0.97, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.98, + "success_rate.epoch.env.ded": 0.9478672985781991, + "success_rate.epoch.env.logic": 0.9036496350364963, + "success_rate.epoch.env.math": 0.9761363636363637, + "success_rate.epoch.env.sat": 0.1509433962264151, + "success_rate.epoch.env.science": 0.874031007751938, + "success_rate.epoch.env.webshop": 0.9807692307692307, + "success_rate.epoch.env_macro_mean": 0.8752083262637256, + "success_rate.epoch.global": 0.9167026327147173, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9979328793774319, + "tokens_p.mean_in_band": 0.8671875, + "tokens_rate.above_band": 0.9961240310077519, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.003875968992248062 + }, + { + "epoch": 2.989987217724755, + "grad_norm": 129.66468493805965, + "learning_rate": 3.324097234498901e-07, + "loss": 0.1164, + "step": 14035, + "success_rate.epoch.env.abd": 0.9878048780487805, + "success_rate.epoch.env.agentgym:alfworld": 0.87248322147651, + "success_rate.epoch.env.agentgym:sciworld": 0.97, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.98, + "success_rate.epoch.env.ded": 0.9481132075471698, + "success_rate.epoch.env.logic": 0.9037199124726477, + "success_rate.epoch.env.math": 0.9761634506242906, + "success_rate.epoch.env.sat": 0.1509433962264151, + "success_rate.epoch.env.science": 0.8741703539823009, + "success_rate.epoch.env.webshop": 0.9807692307692307, + "success_rate.epoch.env_macro_mean": 0.8752522007749448, + "success_rate.epoch.global": 0.9167924121577926, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9988924050632911, + "tokens_rate.above_band": 1.0, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.0 + }, + { + "epoch": 2.9910524073285045, + "grad_norm": 137.2455317126776, + "learning_rate": 3.323850091376461e-07, + "loss": 0.2647, + "step": 14040, + "success_rate.epoch.env.abd": 0.9878048780487805, + "success_rate.epoch.env.agentgym:alfworld": 0.87248322147651, + "success_rate.epoch.env.agentgym:sciworld": 0.9700996677740864, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.98, + "success_rate.epoch.env.ded": 0.9482352941176471, + "success_rate.epoch.env.logic": 0.9037900874635568, + "success_rate.epoch.env.math": 0.9761904761904762, + "success_rate.epoch.env.sat": 0.1509433962264151, + "success_rate.epoch.env.science": 0.8743093922651933, + "success_rate.epoch.env.webshop": 0.9807692307692307, + "success_rate.epoch.env_macro_mean": 0.8752938365189951, + "success_rate.epoch.global": 0.9168819982773471, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9969875145180023, + "tokens_p.mean_in_band": 0.6640625, + "tokens_rate.above_band": 0.9953757225433526, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.004624277456647399 + }, + { + "epoch": 2.992117596932254, + "grad_norm": 142.3021337214023, + "learning_rate": 3.3236031495100896e-07, + "loss": 0.2017, + "step": 14045, + "success_rate.epoch.env.abd": 0.9878048780487805, + "success_rate.epoch.env.agentgym:alfworld": 0.87248322147651, + "success_rate.epoch.env.agentgym:sciworld": 0.9700996677740864, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.9803921568627451, + "success_rate.epoch.env.ded": 0.9482352941176471, + "success_rate.epoch.env.logic": 0.9039301310043668, + "success_rate.epoch.env.math": 0.9762264150943396, + "success_rate.epoch.env.sat": 0.1509433962264151, + "success_rate.epoch.env.science": 0.8743787962451685, + "success_rate.epoch.env.webshop": 0.9811320754716981, + "success_rate.epoch.env_macro_mean": 0.8753847808817096, + "success_rate.epoch.global": 0.9169713916971391, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env.webshop": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9989820846905537, + "tokens_p.mean_in_band": 0.6611328125, + "tokens_rate.above_band": 0.9871382636655949, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.012861736334405145 + }, + { + "epoch": 2.9931827865360034, + "grad_norm": 106.325240203674, + "learning_rate": 3.323356409079757e-07, + "loss": 0.3575, + "step": 14050, + "success_rate.epoch.env.abd": 0.9878048780487805, + "success_rate.epoch.env.agentgym:alfworld": 0.87248322147651, + "success_rate.epoch.env.agentgym:sciworld": 0.9700996677740864, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.9803921568627451, + "success_rate.epoch.env.ded": 0.9483568075117371, + "success_rate.epoch.env.logic": 0.9034132171387074, + "success_rate.epoch.env.math": 0.9762264150943396, + "success_rate.epoch.env.sat": 0.1509433962264151, + "success_rate.epoch.env.science": 0.8745865490628445, + "success_rate.epoch.env.webshop": 0.9811320754716981, + "success_rate.epoch.env_macro_mean": 0.8753677220040829, + "success_rate.epoch.global": 0.9169531585732703, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 0.6666666666666666, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8888888888888888, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9965596330275229, + "tokens_p.mean_in_band": 0.7589285714285714, + "tokens_rate.above_band": 0.9589442815249267, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.04105571847507331 + }, + { + "epoch": 2.9942479761397527, + "grad_norm": 113.30351148050276, + "learning_rate": 3.323109870265285e-07, + "loss": 0.336, + "step": 14055, + "success_rate.epoch.env.abd": 0.9878048780487805, + "success_rate.epoch.env.agentgym:alfworld": 0.87248322147651, + "success_rate.epoch.env.agentgym:sciworld": 0.9700996677740864, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.9807692307692307, + "success_rate.epoch.env.ded": 0.9484777517564403, + "success_rate.epoch.env.logic": 0.9034833091436865, + "success_rate.epoch.env.math": 0.9762622456669179, + "success_rate.epoch.env.sat": 0.1509433962264151, + "success_rate.epoch.env.science": 0.8744147617736161, + "success_rate.epoch.env.webshop": 0.9811320754716981, + "success_rate.epoch.env_macro_mean": 0.8754070086804027, + "success_rate.epoch.global": 0.9169349645846748, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.6666666666666666, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.999198717948718, + "tokens_p.mean_in_band": 0.4060329861111111, + "tokens_rate.above_band": 0.985781990521327, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.014218009478672985 + }, + { + "epoch": 2.9953131657435024, + "grad_norm": 48.07466739262598, + "learning_rate": 3.32286353324635e-07, + "loss": 0.249, + "step": 14060, + "success_rate.epoch.env.abd": 0.9878345498783455, + "success_rate.epoch.env.agentgym:alfworld": 0.87248322147651, + "success_rate.epoch.env.agentgym:sciworld": 0.9700996677740864, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.9811320754716981, + "success_rate.epoch.env.ded": 0.9484777517564403, + "success_rate.epoch.env.logic": 0.9036231884057971, + "success_rate.epoch.env.math": 0.9762801204819277, + "success_rate.epoch.env.sat": 0.1509433962264151, + "success_rate.epoch.env.science": 0.8745529573590096, + "success_rate.epoch.env.webshop": 0.9811320754716981, + "success_rate.epoch.env_macro_mean": 0.8754695965162708, + "success_rate.epoch.global": 0.9170240137221269, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.babyai": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 1.0, + "success_rate.window.global": 1.0, + "tokens_p.mean_above_band": 0.9939298561151079, + "tokens_p.mean_in_band": 0.83984375, + "tokens_rate.above_band": 0.972027972027972, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.027972027972027972 + }, + { + "epoch": 2.996378355347252, + "grad_norm": 244.59089699719746, + "learning_rate": 3.32261739820248e-07, + "loss": 0.2891, + "step": 14065, + "success_rate.epoch.env.abd": 0.9878640776699029, + "success_rate.epoch.env.agentgym:alfworld": 0.87248322147651, + "success_rate.epoch.env.agentgym:sciworld": 0.9700996677740864, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.9811320754716981, + "success_rate.epoch.env.ded": 0.9484777517564403, + "success_rate.epoch.env.logic": 0.9036231884057971, + "success_rate.epoch.env.math": 0.9759669545625235, + "success_rate.epoch.env.sat": 0.1509433962264151, + "success_rate.epoch.env.science": 0.8743469892768766, + "success_rate.epoch.env.webshop": 0.9811320754716981, + "success_rate.epoch.env_macro_mean": 0.8754250868608179, + "success_rate.epoch.global": 0.9168986935103877, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.math": 0.8571428571428571, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.7857142857142857, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9958677685950413, + "tokens_p.mean_in_band": 0.42083333333333334, + "tokens_rate.above_band": 0.8897058823529411, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.11029411764705882 + }, + { + "epoch": 2.9974435449510013, + "grad_norm": 189.0541955962222, + "learning_rate": 3.3223714653130574e-07, + "loss": 0.4225, + "step": 14070, + "success_rate.epoch.env.abd": 0.9878640776699029, + "success_rate.epoch.env.agentgym:alfworld": 0.8729096989966555, + "success_rate.epoch.env.agentgym:sciworld": 0.9701986754966887, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.9811320754716981, + "success_rate.epoch.env.ded": 0.9484777517564403, + "success_rate.epoch.env.logic": 0.9029688631426502, + "success_rate.epoch.env.math": 0.9759939984996249, + "success_rate.epoch.env.sat": 0.1509433962264151, + "success_rate.epoch.env.science": 0.8744850315847295, + "success_rate.epoch.env.webshop": 0.9811320754716981, + "success_rate.epoch.env_macro_mean": 0.8754283819721411, + "success_rate.epoch.global": 0.9168806161745828, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.agentgym:sciworld": 1.0, + "success_rate.window.env.logic": 0.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 1.0, + "success_rate.window.env_macro_mean": 0.8, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.9974188790560472, + "tokens_p.mean_in_band": 0.4103732638888889, + "tokens_rate.above_band": 0.9741379310344828, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.02586206896551724 + }, + { + "epoch": 2.9985087345547505, + "grad_norm": 103.4420317712618, + "learning_rate": 3.322125734757316e-07, + "loss": 0.4346, + "step": 14075, + "success_rate.epoch.env.abd": 0.9878640776699029, + "success_rate.epoch.env.agentgym:alfworld": 0.8733333333333333, + "success_rate.epoch.env.agentgym:sciworld": 0.9701986754966887, + "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, + "success_rate.epoch.env.babyai": 0.9811320754716981, + "success_rate.epoch.env.ded": 0.9484777517564403, + "success_rate.epoch.env.logic": 0.9029688631426502, + "success_rate.epoch.env.math": 0.9760299625468165, + "success_rate.epoch.env.sat": 0.1509433962264151, + "success_rate.epoch.env.science": 0.8743828853538124, + "success_rate.epoch.env.webshop": 0.9811320754716981, + "success_rate.epoch.env_macro_mean": 0.8754608776224095, + "success_rate.epoch.global": 0.9168625774738192, + "success_rate.window.env.agentgym:alfworld": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.8, + "success_rate.window.env_macro_mean": 0.9333333333333332, + "success_rate.window.global": 0.9, + "tokens_p.mean_above_band": 0.99797197640118, + "tokens_p.mean_in_band": 0.5888671875, + "tokens_rate.above_band": 0.9769452449567724, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.023054755043227664 + }, + { + "epoch": 2.9995739241585, + "grad_norm": 178.74036372896242, + "learning_rate": 3.3218802067143415e-07, + "loss": 0.2448, + "step": 14080, + "success_rate.epoch.env.abd": 0.9878934624697336, + "success_rate.epoch.env.agentgym:alfworld": 0.8733333333333333, + "success_rate.epoch.env.agentgym:sciworld": 0.9701986754966887, + "success_rate.epoch.env.agentgym:textcraft": 0.9838709677419355, + "success_rate.epoch.env.babyai": 0.9811320754716981, + "success_rate.epoch.env.ded": 0.9485981308411215, + "success_rate.epoch.env.logic": 0.9030390738060782, + "success_rate.epoch.env.math": 0.9760479041916168, + "success_rate.epoch.env.sat": 0.1509433962264151, + "success_rate.epoch.env.science": 0.873972602739726, + "success_rate.epoch.env.webshop": 0.9811320754716981, + "success_rate.epoch.env_macro_mean": 0.8754692452536404, + "success_rate.epoch.global": 0.9167378309137489, + "success_rate.window.env.abd": 1.0, + "success_rate.window.env.agentgym:textcraft": 1.0, + "success_rate.window.env.ded": 1.0, + "success_rate.window.env.logic": 1.0, + "success_rate.window.env.math": 1.0, + "success_rate.window.env.science": 0.5, + "success_rate.window.env_macro_mean": 0.9166666666666666, + "success_rate.window.global": 0.8, + "tokens_p.mean_above_band": 0.9976535836177475, + "tokens_p.mean_in_band": 0.6021205357142857, + "tokens_rate.above_band": 0.9766666666666667, + "tokens_rate.below_band": 0.0, + "tokens_rate.in_band": 0.023333333333333334 + } + ], + "logging_steps": 5, + "max_steps": 18776, + "num_input_tokens_seen": 0, + "num_train_epochs": 4, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 351041909738496.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +}