| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.11074197120708748, | |
| "eval_steps": 500, | |
| "global_step": 100, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "completion_length": 73.265625, | |
| "epoch": 0.0011074197120708748, | |
| "grad_norm": 0.47520893812179565, | |
| "kl": 0.0, | |
| "learning_rate": 9.99375e-07, | |
| "loss": 0.000854941550642252, | |
| "reward": 2.2648561000823975, | |
| "reward_std": 0.32521533221006393, | |
| "rewards/GDino": 0.84943026304245, | |
| "rewards/GIT": 0.5776679813861847, | |
| "rewards/HPSv2": 0.2639656066894531, | |
| "rewards/ORM": 0.5737921893596649, | |
| "self_certainty_semantic": -25.4375, | |
| "self_certainty_token": -22.0, | |
| "step": 1 | |
| }, | |
| { | |
| "completion_length": 57.359375, | |
| "epoch": 0.0022148394241417496, | |
| "grad_norm": 0.7006784677505493, | |
| "kl": 0.00151824951171875, | |
| "learning_rate": 9.9875e-07, | |
| "loss": 0.0010380030144006014, | |
| "reward": 1.6890186071395874, | |
| "reward_std": 0.5064275413751602, | |
| "rewards/GDino": 0.7000000476837158, | |
| "rewards/GIT": 0.161313958466053, | |
| "rewards/HPSv2": 0.2509632110595703, | |
| "rewards/ORM": 0.5767413973808289, | |
| "self_certainty_semantic": -25.375, | |
| "self_certainty_token": -20.5625, | |
| "step": 2 | |
| }, | |
| { | |
| "completion_length": 54.640625, | |
| "epoch": 0.0033222591362126247, | |
| "grad_norm": 0.5812113285064697, | |
| "kl": 0.001556396484375, | |
| "learning_rate": 9.98125e-07, | |
| "loss": -0.0055133504793047905, | |
| "reward": 1.5832943320274353, | |
| "reward_std": 0.3882431983947754, | |
| "rewards/GDino": 0.6165956258773804, | |
| "rewards/GIT": 0.3970412313938141, | |
| "rewards/HPSv2": 0.24474143981933594, | |
| "rewards/ORM": 0.3249160535633564, | |
| "self_certainty_semantic": -25.1875, | |
| "self_certainty_token": -20.8125, | |
| "step": 3 | |
| }, | |
| { | |
| "completion_length": 63.578125, | |
| "epoch": 0.004429678848283499, | |
| "grad_norm": 0.6130731105804443, | |
| "kl": 0.001605987548828125, | |
| "learning_rate": 9.975e-07, | |
| "loss": -0.005623435601592064, | |
| "reward": 2.1563462018966675, | |
| "reward_std": 0.3505118489265442, | |
| "rewards/GDino": 0.8188963234424591, | |
| "rewards/GIT": 0.4581628292798996, | |
| "rewards/HPSv2": 0.24955368041992188, | |
| "rewards/ORM": 0.6297334432601929, | |
| "self_certainty_semantic": -25.5, | |
| "self_certainty_token": -22.0, | |
| "step": 4 | |
| }, | |
| { | |
| "completion_length": 57.65625, | |
| "epoch": 0.005537098560354375, | |
| "grad_norm": 0.8068524599075317, | |
| "kl": 0.00165557861328125, | |
| "learning_rate": 9.968749999999999e-07, | |
| "loss": -0.0018901200965046883, | |
| "reward": 1.6294466853141785, | |
| "reward_std": 0.3914882242679596, | |
| "rewards/GDino": 0.6075743436813354, | |
| "rewards/GIT": 0.2503758817911148, | |
| "rewards/HPSv2": 0.2523918151855469, | |
| "rewards/ORM": 0.5191046893596649, | |
| "self_certainty_semantic": -25.4375, | |
| "self_certainty_token": -21.8125, | |
| "step": 5 | |
| }, | |
| { | |
| "completion_length": 65.8125, | |
| "epoch": 0.006644518272425249, | |
| "grad_norm": 74728.3515625, | |
| "kl": 228.00085067749023, | |
| "learning_rate": 9.9625e-07, | |
| "loss": 2.2879227567464113, | |
| "reward": 2.15460866689682, | |
| "reward_std": 0.18937285244464874, | |
| "rewards/GDino": 0.7502027153968811, | |
| "rewards/GIT": 0.4551280438899994, | |
| "rewards/HPSv2": 0.2774028778076172, | |
| "rewards/ORM": 0.671875, | |
| "self_certainty_semantic": -25.3125, | |
| "self_certainty_token": -22.625, | |
| "step": 6 | |
| }, | |
| { | |
| "completion_length": 65.640625, | |
| "epoch": 0.007751937984496124, | |
| "grad_norm": 0.9850716590881348, | |
| "kl": 0.001739501953125, | |
| "learning_rate": 9.956249999999999e-07, | |
| "loss": -0.009785129223018885, | |
| "reward": 1.6486687660217285, | |
| "reward_std": 0.55589759349823, | |
| "rewards/GDino": 0.5765624940395355, | |
| "rewards/GIT": 0.15754839032888412, | |
| "rewards/HPSv2": 0.2522296905517578, | |
| "rewards/ORM": 0.6623281538486481, | |
| "self_certainty_semantic": -25.5625, | |
| "self_certainty_token": -22.1875, | |
| "step": 7 | |
| }, | |
| { | |
| "completion_length": 65.796875, | |
| "epoch": 0.008859357696566999, | |
| "grad_norm": 0.8074976801872253, | |
| "kl": 0.001628875732421875, | |
| "learning_rate": 9.95e-07, | |
| "loss": 0.0002866658614948392, | |
| "reward": 1.7531355023384094, | |
| "reward_std": 0.3834189176559448, | |
| "rewards/GDino": 0.7171875536441803, | |
| "rewards/GIT": 0.3904750794172287, | |
| "rewards/HPSv2": 0.2441272735595703, | |
| "rewards/ORM": 0.4013456404209137, | |
| "self_certainty_semantic": -25.4375, | |
| "self_certainty_token": -20.5, | |
| "step": 8 | |
| }, | |
| { | |
| "completion_length": 61.53125, | |
| "epoch": 0.009966777408637873, | |
| "grad_norm": 0.5135362148284912, | |
| "kl": 0.001628875732421875, | |
| "learning_rate": 9.94375e-07, | |
| "loss": -0.002820038120262325, | |
| "reward": 2.1886491775512695, | |
| "reward_std": 0.5042529106140137, | |
| "rewards/GDino": 0.800000011920929, | |
| "rewards/GIT": 0.3224633187055588, | |
| "rewards/HPSv2": 0.2661018371582031, | |
| "rewards/ORM": 0.8000838756561279, | |
| "self_certainty_semantic": -25.4375, | |
| "self_certainty_token": -21.5, | |
| "step": 9 | |
| }, | |
| { | |
| "completion_length": 66.9375, | |
| "epoch": 0.01107419712070875, | |
| "grad_norm": 1.035406231880188, | |
| "kl": 0.001590728759765625, | |
| "learning_rate": 9.9375e-07, | |
| "loss": 0.010037540923804045, | |
| "reward": 1.8388126492500305, | |
| "reward_std": 0.385573148727417, | |
| "rewards/GDino": 0.729426920413971, | |
| "rewards/GIT": 0.47063055634498596, | |
| "rewards/HPSv2": 0.25093841552734375, | |
| "rewards/ORM": 0.3878167122602463, | |
| "self_certainty_semantic": -25.375, | |
| "self_certainty_token": -20.75, | |
| "step": 10 | |
| }, | |
| { | |
| "completion_length": 54.65625, | |
| "epoch": 0.012181616832779624, | |
| "grad_norm": 0.6659172773361206, | |
| "kl": 0.00159454345703125, | |
| "learning_rate": 9.93125e-07, | |
| "loss": -0.010986692272126675, | |
| "reward": 2.312160015106201, | |
| "reward_std": 0.3424924612045288, | |
| "rewards/GDino": 0.7864583432674408, | |
| "rewards/GIT": 0.5519254580140114, | |
| "rewards/HPSv2": 0.2634601593017578, | |
| "rewards/ORM": 0.710316002368927, | |
| "self_certainty_semantic": -25.3125, | |
| "self_certainty_token": -20.9375, | |
| "step": 11 | |
| }, | |
| { | |
| "completion_length": 65.6875, | |
| "epoch": 0.013289036544850499, | |
| "grad_norm": 0.4100457727909088, | |
| "kl": 0.00152587890625, | |
| "learning_rate": 9.925e-07, | |
| "loss": -0.0020649502985179424, | |
| "reward": 1.831676721572876, | |
| "reward_std": 0.37266574054956436, | |
| "rewards/GDino": 0.6748343408107758, | |
| "rewards/GIT": 0.3966377377510071, | |
| "rewards/HPSv2": 0.2431049346923828, | |
| "rewards/ORM": 0.5170995742082596, | |
| "self_certainty_semantic": -25.5, | |
| "self_certainty_token": -21.5, | |
| "step": 12 | |
| }, | |
| { | |
| "completion_length": 62.15625, | |
| "epoch": 0.014396456256921373, | |
| "grad_norm": 1.1354421377182007, | |
| "kl": 0.0016326904296875, | |
| "learning_rate": 9.91875e-07, | |
| "loss": -0.0013978920178487897, | |
| "reward": 1.7478299736976624, | |
| "reward_std": 0.3111024349927902, | |
| "rewards/GDino": 0.7122170925140381, | |
| "rewards/GIT": 0.28808362782001495, | |
| "rewards/HPSv2": 0.2510089874267578, | |
| "rewards/ORM": 0.4965202957391739, | |
| "self_certainty_semantic": -25.4375, | |
| "self_certainty_token": -21.6875, | |
| "step": 13 | |
| }, | |
| { | |
| "completion_length": 63.734375, | |
| "epoch": 0.015503875968992248, | |
| "grad_norm": 171.63954162597656, | |
| "kl": 11.750831604003906, | |
| "learning_rate": 9.912499999999998e-07, | |
| "loss": 0.11320369923487306, | |
| "reward": 1.820958137512207, | |
| "reward_std": 0.6430586874485016, | |
| "rewards/GDino": 0.7286913394927979, | |
| "rewards/GIT": 0.39159613847732544, | |
| "rewards/HPSv2": 0.222503662109375, | |
| "rewards/ORM": 0.47816696763038635, | |
| "self_certainty_semantic": -25.375, | |
| "self_certainty_token": -21.875, | |
| "step": 14 | |
| }, | |
| { | |
| "completion_length": 64.796875, | |
| "epoch": 0.016611295681063124, | |
| "grad_norm": 1.790418267250061, | |
| "kl": 0.001697540283203125, | |
| "learning_rate": 9.90625e-07, | |
| "loss": -0.0012796747614629567, | |
| "reward": 2.4724700450897217, | |
| "reward_std": 0.361017182469368, | |
| "rewards/GDino": 0.8982033133506775, | |
| "rewards/GIT": 0.5411243438720703, | |
| "rewards/HPSv2": 0.2581005096435547, | |
| "rewards/ORM": 0.7750419676303864, | |
| "self_certainty_semantic": -25.625, | |
| "self_certainty_token": -21.8125, | |
| "step": 15 | |
| }, | |
| { | |
| "completion_length": 65.078125, | |
| "epoch": 0.017718715393133997, | |
| "grad_norm": 0.38361120223999023, | |
| "kl": 0.0015869140625, | |
| "learning_rate": 9.9e-07, | |
| "loss": 0.006866331794299185, | |
| "reward": 1.5055131912231445, | |
| "reward_std": 0.40322621166706085, | |
| "rewards/GDino": 0.651562511920929, | |
| "rewards/GIT": 0.2843637466430664, | |
| "rewards/HPSv2": 0.24664592742919922, | |
| "rewards/ORM": 0.32294100522994995, | |
| "self_certainty_semantic": -25.5, | |
| "self_certainty_token": -21.125, | |
| "step": 16 | |
| }, | |
| { | |
| "completion_length": 70.25, | |
| "epoch": 0.018826135105204873, | |
| "grad_norm": 1.0185045003890991, | |
| "kl": 0.001552581787109375, | |
| "learning_rate": 9.89375e-07, | |
| "loss": -0.010323233203962445, | |
| "reward": 1.5897727608680725, | |
| "reward_std": 0.530043363571167, | |
| "rewards/GDino": 0.5529386103153229, | |
| "rewards/GIT": 0.2131059616804123, | |
| "rewards/HPSv2": 0.2552909851074219, | |
| "rewards/ORM": 0.5684372782707214, | |
| "self_certainty_semantic": -25.3125, | |
| "self_certainty_token": -20.5, | |
| "step": 17 | |
| }, | |
| { | |
| "completion_length": 66.34375, | |
| "epoch": 0.019933554817275746, | |
| "grad_norm": 0.4375481605529785, | |
| "kl": 0.00156402587890625, | |
| "learning_rate": 9.8875e-07, | |
| "loss": -0.00136462040245533, | |
| "reward": 2.063610315322876, | |
| "reward_std": 0.42642320692539215, | |
| "rewards/GDino": 0.7955474257469177, | |
| "rewards/GIT": 0.5150393098592758, | |
| "rewards/HPSv2": 0.22445201873779297, | |
| "rewards/ORM": 0.528571605682373, | |
| "self_certainty_semantic": -25.4375, | |
| "self_certainty_token": -20.75, | |
| "step": 18 | |
| }, | |
| { | |
| "completion_length": 59.21875, | |
| "epoch": 0.021040974529346623, | |
| "grad_norm": 0.3959902226924896, | |
| "kl": 0.00164031982421875, | |
| "learning_rate": 9.88125e-07, | |
| "loss": -0.0053134458139538765, | |
| "reward": 1.5237417221069336, | |
| "reward_std": 0.4693976193666458, | |
| "rewards/GDino": 0.701702356338501, | |
| "rewards/GIT": 0.2579326629638672, | |
| "rewards/HPSv2": 0.24812698364257812, | |
| "rewards/ORM": 0.3159796893596649, | |
| "self_certainty_semantic": -25.1875, | |
| "self_certainty_token": -21.5625, | |
| "step": 19 | |
| }, | |
| { | |
| "completion_length": 61.484375, | |
| "epoch": 0.0221483942414175, | |
| "grad_norm": 0.5081169605255127, | |
| "kl": 0.001689910888671875, | |
| "learning_rate": 9.875e-07, | |
| "loss": 0.0003520832397043705, | |
| "reward": 1.9516127109527588, | |
| "reward_std": 0.2731045335531235, | |
| "rewards/GDino": 0.6437798738479614, | |
| "rewards/GIT": 0.4635310173034668, | |
| "rewards/HPSv2": 0.24121475219726562, | |
| "rewards/ORM": 0.6030870825052261, | |
| "self_certainty_semantic": -25.1875, | |
| "self_certainty_token": -21.5625, | |
| "step": 20 | |
| }, | |
| { | |
| "completion_length": 55.546875, | |
| "epoch": 0.023255813953488372, | |
| "grad_norm": 0.4565694034099579, | |
| "kl": 0.001667022705078125, | |
| "learning_rate": 9.86875e-07, | |
| "loss": 0.0016932454891502857, | |
| "reward": 2.180082321166992, | |
| "reward_std": 0.5037369430065155, | |
| "rewards/GDino": 0.7953125238418579, | |
| "rewards/GIT": 0.45517681539058685, | |
| "rewards/HPSv2": 0.2586212158203125, | |
| "rewards/ORM": 0.6709719300270081, | |
| "self_certainty_semantic": -25.25, | |
| "self_certainty_token": -21.9375, | |
| "step": 21 | |
| }, | |
| { | |
| "completion_length": 68.75, | |
| "epoch": 0.024363233665559248, | |
| "grad_norm": 0.45827633142471313, | |
| "kl": 0.001712799072265625, | |
| "learning_rate": 9.862499999999999e-07, | |
| "loss": 0.0007174527272582054, | |
| "reward": 1.8721013069152832, | |
| "reward_std": 0.4303991347551346, | |
| "rewards/GDino": 0.6911458671092987, | |
| "rewards/GIT": 0.36048486828804016, | |
| "rewards/HPSv2": 0.2603263854980469, | |
| "rewards/ORM": 0.5601442009210587, | |
| "self_certainty_semantic": -25.6875, | |
| "self_certainty_token": -22.4375, | |
| "step": 22 | |
| }, | |
| { | |
| "completion_length": 58.609375, | |
| "epoch": 0.02547065337763012, | |
| "grad_norm": 0.6875389218330383, | |
| "kl": 0.00162506103515625, | |
| "learning_rate": 9.85625e-07, | |
| "loss": -0.004631380317732692, | |
| "reward": 1.9805514812469482, | |
| "reward_std": 0.5138447731733322, | |
| "rewards/GDino": 0.706105500459671, | |
| "rewards/GIT": 0.4199465811252594, | |
| "rewards/HPSv2": 0.26941490173339844, | |
| "rewards/ORM": 0.5850843787193298, | |
| "self_certainty_semantic": -25.4375, | |
| "self_certainty_token": -20.9375, | |
| "step": 23 | |
| }, | |
| { | |
| "completion_length": 60.859375, | |
| "epoch": 0.026578073089700997, | |
| "grad_norm": 0.5052416324615479, | |
| "kl": 0.001667022705078125, | |
| "learning_rate": 9.849999999999999e-07, | |
| "loss": -0.0046843914315104485, | |
| "reward": 2.368114173412323, | |
| "reward_std": 0.4367552697658539, | |
| "rewards/GDino": 0.815625011920929, | |
| "rewards/GIT": 0.633857935667038, | |
| "rewards/HPSv2": 0.25930213928222656, | |
| "rewards/ORM": 0.6593290567398071, | |
| "self_certainty_semantic": -25.75, | |
| "self_certainty_token": -21.875, | |
| "step": 24 | |
| }, | |
| { | |
| "completion_length": 61.078125, | |
| "epoch": 0.02768549280177187, | |
| "grad_norm": 0.6162320971488953, | |
| "kl": 0.001617431640625, | |
| "learning_rate": 9.84375e-07, | |
| "loss": -0.005464642075821757, | |
| "reward": 1.9494624137878418, | |
| "reward_std": 0.40468768775463104, | |
| "rewards/GDino": 0.6967671811580658, | |
| "rewards/GIT": 0.40975040197372437, | |
| "rewards/HPSv2": 0.26043701171875, | |
| "rewards/ORM": 0.5825077295303345, | |
| "self_certainty_semantic": -25.5, | |
| "self_certainty_token": -21.3125, | |
| "step": 25 | |
| }, | |
| { | |
| "completion_length": 50.734375, | |
| "epoch": 0.028792912513842746, | |
| "grad_norm": 2.8454437255859375, | |
| "kl": 0.001804351806640625, | |
| "learning_rate": 9.8375e-07, | |
| "loss": -0.006305628921836615, | |
| "reward": 2.190965175628662, | |
| "reward_std": 0.44982025027275085, | |
| "rewards/GDino": 0.7243013381958008, | |
| "rewards/GIT": 0.5294483602046967, | |
| "rewards/HPSv2": 0.2750282287597656, | |
| "rewards/ORM": 0.6621872782707214, | |
| "self_certainty_semantic": -25.3125, | |
| "self_certainty_token": -22.375, | |
| "step": 26 | |
| }, | |
| { | |
| "completion_length": 62.484375, | |
| "epoch": 0.029900332225913623, | |
| "grad_norm": 0.4033506512641907, | |
| "kl": 0.0016021728515625, | |
| "learning_rate": 9.83125e-07, | |
| "loss": -0.0016465974040329456, | |
| "reward": 1.9733637571334839, | |
| "reward_std": 0.44280076026916504, | |
| "rewards/GDino": 0.7363362908363342, | |
| "rewards/GIT": 0.4528593420982361, | |
| "rewards/HPSv2": 0.24550628662109375, | |
| "rewards/ORM": 0.5386618673801422, | |
| "self_certainty_semantic": -25.5, | |
| "self_certainty_token": -22.375, | |
| "step": 27 | |
| }, | |
| { | |
| "completion_length": 65.046875, | |
| "epoch": 0.031007751937984496, | |
| "grad_norm": 0.559298574924469, | |
| "kl": 0.00167083740234375, | |
| "learning_rate": 9.825e-07, | |
| "loss": 0.004501585033722222, | |
| "reward": 1.4280173778533936, | |
| "reward_std": 0.27060839533805847, | |
| "rewards/GDino": 0.5987553596496582, | |
| "rewards/GIT": 0.10973574221134186, | |
| "rewards/HPSv2": 0.2664012908935547, | |
| "rewards/ORM": 0.453125, | |
| "self_certainty_semantic": -25.4375, | |
| "self_certainty_token": -20.9375, | |
| "step": 28 | |
| }, | |
| { | |
| "completion_length": 55.5625, | |
| "epoch": 0.03211517165005537, | |
| "grad_norm": 0.42233753204345703, | |
| "kl": 0.00168609619140625, | |
| "learning_rate": 9.81875e-07, | |
| "loss": -0.005473613273352385, | |
| "reward": 2.4506709575653076, | |
| "reward_std": 0.20222720131278038, | |
| "rewards/GDino": 0.8296874761581421, | |
| "rewards/GIT": 0.605083167552948, | |
| "rewards/HPSv2": 0.285858154296875, | |
| "rewards/ORM": 0.7300421893596649, | |
| "self_certainty_semantic": -25.6875, | |
| "self_certainty_token": -20.9375, | |
| "step": 29 | |
| }, | |
| { | |
| "completion_length": 57.640625, | |
| "epoch": 0.03322259136212625, | |
| "grad_norm": 0.5650274157524109, | |
| "kl": 0.0016326904296875, | |
| "learning_rate": 9.8125e-07, | |
| "loss": 0.0003150699194520712, | |
| "reward": 2.489137649536133, | |
| "reward_std": 0.4210814982652664, | |
| "rewards/GDino": 0.8948009014129639, | |
| "rewards/GIT": 0.586266428232193, | |
| "rewards/HPSv2": 0.24865341186523438, | |
| "rewards/ORM": 0.7594169676303864, | |
| "self_certainty_semantic": -25.3125, | |
| "self_certainty_token": -21.0625, | |
| "step": 30 | |
| }, | |
| { | |
| "completion_length": 78.78125, | |
| "epoch": 0.03433001107419712, | |
| "grad_norm": 0.6762183308601379, | |
| "kl": 0.001613616943359375, | |
| "learning_rate": 9.806249999999998e-07, | |
| "loss": 0.007568572706077248, | |
| "reward": 1.8555968403816223, | |
| "reward_std": 0.2906922847032547, | |
| "rewards/GDino": 0.5989583432674408, | |
| "rewards/GIT": 0.38505683839321136, | |
| "rewards/HPSv2": 0.2403736114501953, | |
| "rewards/ORM": 0.6312080323696136, | |
| "self_certainty_semantic": -25.5625, | |
| "self_certainty_token": -20.5625, | |
| "step": 31 | |
| }, | |
| { | |
| "completion_length": 62.5, | |
| "epoch": 0.035437430786267994, | |
| "grad_norm": 0.4184902012348175, | |
| "kl": 0.001628875732421875, | |
| "learning_rate": 9.8e-07, | |
| "loss": 0.007896744413301349, | |
| "reward": 1.495099127292633, | |
| "reward_std": 0.3622882664203644, | |
| "rewards/GDino": 0.6791666448116302, | |
| "rewards/GIT": 0.25104063749313354, | |
| "rewards/HPSv2": 0.23050880432128906, | |
| "rewards/ORM": 0.3343829959630966, | |
| "self_certainty_semantic": -25.3125, | |
| "self_certainty_token": -22.0625, | |
| "step": 32 | |
| }, | |
| { | |
| "completion_length": 70.109375, | |
| "epoch": 0.036544850498338874, | |
| "grad_norm": 0.47143352031707764, | |
| "kl": 0.0016937255859375, | |
| "learning_rate": 9.79375e-07, | |
| "loss": 0.00709247519262135, | |
| "reward": 2.3964842557907104, | |
| "reward_std": 0.5415211468935013, | |
| "rewards/GDino": 0.897656261920929, | |
| "rewards/GIT": 0.6205766499042511, | |
| "rewards/HPSv2": 0.2254810333251953, | |
| "rewards/ORM": 0.6527703106403351, | |
| "self_certainty_semantic": -25.3125, | |
| "self_certainty_token": -21.625, | |
| "step": 33 | |
| }, | |
| { | |
| "completion_length": 55.53125, | |
| "epoch": 0.03765227021040975, | |
| "grad_norm": 0.45762747526168823, | |
| "kl": 0.001678466796875, | |
| "learning_rate": 9.7875e-07, | |
| "loss": 0.020488019566982985, | |
| "reward": 1.9143174886703491, | |
| "reward_std": 0.2841227799654007, | |
| "rewards/GDino": 0.6593749821186066, | |
| "rewards/GIT": 0.4214262217283249, | |
| "rewards/HPSv2": 0.2424945831298828, | |
| "rewards/ORM": 0.5910216569900513, | |
| "self_certainty_semantic": -25.3125, | |
| "self_certainty_token": -21.75, | |
| "step": 34 | |
| }, | |
| { | |
| "completion_length": 55.828125, | |
| "epoch": 0.03875968992248062, | |
| "grad_norm": 0.3845841884613037, | |
| "kl": 0.00167083740234375, | |
| "learning_rate": 9.78125e-07, | |
| "loss": 0.01862273830920458, | |
| "reward": 2.274049997329712, | |
| "reward_std": 0.28603486716747284, | |
| "rewards/GDino": 0.7786458432674408, | |
| "rewards/GIT": 0.5405041128396988, | |
| "rewards/HPSv2": 0.23740386962890625, | |
| "rewards/ORM": 0.7174962311983109, | |
| "self_certainty_semantic": -25.3125, | |
| "self_certainty_token": -21.25, | |
| "step": 35 | |
| }, | |
| { | |
| "completion_length": 63.234375, | |
| "epoch": 0.03986710963455149, | |
| "grad_norm": 0.5729533433914185, | |
| "kl": 0.001678466796875, | |
| "learning_rate": 9.775e-07, | |
| "loss": -0.002963901497423649, | |
| "reward": 1.8639960289001465, | |
| "reward_std": 0.3890039473772049, | |
| "rewards/GDino": 0.6255208253860474, | |
| "rewards/GIT": 0.42713797092437744, | |
| "rewards/HPSv2": 0.24535751342773438, | |
| "rewards/ORM": 0.5659796744585037, | |
| "self_certainty_semantic": -25.625, | |
| "self_certainty_token": -21.3125, | |
| "step": 36 | |
| }, | |
| { | |
| "completion_length": 63.09375, | |
| "epoch": 0.04097452934662237, | |
| "grad_norm": 0.47338196635246277, | |
| "kl": 0.001888275146484375, | |
| "learning_rate": 9.76875e-07, | |
| "loss": 0.008916446007788181, | |
| "reward": 1.9735829830169678, | |
| "reward_std": 0.5416238605976105, | |
| "rewards/GDino": 0.7008762061595917, | |
| "rewards/GIT": 0.3141380175948143, | |
| "rewards/HPSv2": 0.2595968246459961, | |
| "rewards/ORM": 0.6989719867706299, | |
| "self_certainty_semantic": -25.375, | |
| "self_certainty_token": -23.125, | |
| "step": 37 | |
| }, | |
| { | |
| "completion_length": 58.640625, | |
| "epoch": 0.042081949058693245, | |
| "grad_norm": 1.639336347579956, | |
| "kl": 0.001651763916015625, | |
| "learning_rate": 9.7625e-07, | |
| "loss": -0.0003745388239622116, | |
| "reward": 1.8843677639961243, | |
| "reward_std": 0.27646802365779877, | |
| "rewards/GDino": 0.7309310734272003, | |
| "rewards/GIT": 0.2879854440689087, | |
| "rewards/HPSv2": 0.25732994079589844, | |
| "rewards/ORM": 0.6081212311983109, | |
| "self_certainty_semantic": -25.375, | |
| "self_certainty_token": -21.0625, | |
| "step": 38 | |
| }, | |
| { | |
| "completion_length": 54.453125, | |
| "epoch": 0.04318936877076412, | |
| "grad_norm": 0.4438176453113556, | |
| "kl": 0.00176239013671875, | |
| "learning_rate": 9.756249999999999e-07, | |
| "loss": -0.004410726949572563, | |
| "reward": 2.3740460872650146, | |
| "reward_std": 0.26216618716716766, | |
| "rewards/GDino": 0.8794216811656952, | |
| "rewards/GIT": 0.480433389544487, | |
| "rewards/HPSv2": 0.2703990936279297, | |
| "rewards/ORM": 0.7437919676303864, | |
| "self_certainty_semantic": -25.5, | |
| "self_certainty_token": -21.0, | |
| "step": 39 | |
| }, | |
| { | |
| "completion_length": 64.65625, | |
| "epoch": 0.044296788482835, | |
| "grad_norm": 0.9789016246795654, | |
| "kl": 0.0017242431640625, | |
| "learning_rate": 9.75e-07, | |
| "loss": -0.0008055282523855567, | |
| "reward": 2.2535433769226074, | |
| "reward_std": 0.46909773349761963, | |
| "rewards/GDino": 0.8751652538776398, | |
| "rewards/GIT": 0.4070926010608673, | |
| "rewards/HPSv2": 0.2731647491455078, | |
| "rewards/ORM": 0.6981207877397537, | |
| "self_certainty_semantic": -25.4375, | |
| "self_certainty_token": -20.625, | |
| "step": 40 | |
| }, | |
| { | |
| "completion_length": 60.3125, | |
| "epoch": 0.04540420819490587, | |
| "grad_norm": 0.39339736104011536, | |
| "kl": 0.001697540283203125, | |
| "learning_rate": 9.743749999999999e-07, | |
| "loss": -0.0026839073980227113, | |
| "reward": 1.926289677619934, | |
| "reward_std": 0.21494604647159576, | |
| "rewards/GDino": 0.6536072194576263, | |
| "rewards/GIT": 0.38067150115966797, | |
| "rewards/HPSv2": 0.2470531463623047, | |
| "rewards/ORM": 0.6449578106403351, | |
| "self_certainty_semantic": -25.375, | |
| "self_certainty_token": -21.0, | |
| "step": 41 | |
| }, | |
| { | |
| "completion_length": 55.734375, | |
| "epoch": 0.046511627906976744, | |
| "grad_norm": 0.43325623869895935, | |
| "kl": 0.001575469970703125, | |
| "learning_rate": 9.7375e-07, | |
| "loss": 0.01566000678576529, | |
| "reward": 2.2492642402648926, | |
| "reward_std": 0.545527771115303, | |
| "rewards/GDino": 0.8451037406921387, | |
| "rewards/GIT": 0.4486817270517349, | |
| "rewards/HPSv2": 0.2523536682128906, | |
| "rewards/ORM": 0.703125, | |
| "self_certainty_semantic": -25.4375, | |
| "self_certainty_token": -21.125, | |
| "step": 42 | |
| }, | |
| { | |
| "completion_length": 77.859375, | |
| "epoch": 0.047619047619047616, | |
| "grad_norm": 0.6008194088935852, | |
| "kl": 0.00209808349609375, | |
| "learning_rate": 9.73125e-07, | |
| "loss": 0.009053934598341584, | |
| "reward": 1.752554178237915, | |
| "reward_std": 0.3711804449558258, | |
| "rewards/GDino": 0.6425288617610931, | |
| "rewards/GIT": 0.38656318187713623, | |
| "rewards/HPSv2": 0.23595809936523438, | |
| "rewards/ORM": 0.4875040054321289, | |
| "self_certainty_semantic": -25.6875, | |
| "self_certainty_token": -21.8125, | |
| "step": 43 | |
| }, | |
| { | |
| "completion_length": 64.859375, | |
| "epoch": 0.048726467331118496, | |
| "grad_norm": 0.4626310169696808, | |
| "kl": 0.001750946044921875, | |
| "learning_rate": 9.725e-07, | |
| "loss": 0.00038470514118671417, | |
| "reward": 2.837794542312622, | |
| "reward_std": 0.3451881557703018, | |
| "rewards/GDino": 0.9479166865348816, | |
| "rewards/GIT": 0.7795328795909882, | |
| "rewards/HPSv2": 0.26932334899902344, | |
| "rewards/ORM": 0.8410216569900513, | |
| "self_certainty_semantic": -25.5625, | |
| "self_certainty_token": -21.5625, | |
| "step": 44 | |
| }, | |
| { | |
| "completion_length": 66.921875, | |
| "epoch": 0.04983388704318937, | |
| "grad_norm": 1.3941670656204224, | |
| "kl": 0.001880645751953125, | |
| "learning_rate": 9.71875e-07, | |
| "loss": -0.012070931028574705, | |
| "reward": 2.561403751373291, | |
| "reward_std": 0.48213036358356476, | |
| "rewards/GDino": 0.9039532244205475, | |
| "rewards/GIT": 0.5467919409275055, | |
| "rewards/HPSv2": 0.2617225646972656, | |
| "rewards/ORM": 0.8489359319210052, | |
| "self_certainty_semantic": -25.5625, | |
| "self_certainty_token": -21.9375, | |
| "step": 45 | |
| }, | |
| { | |
| "completion_length": 59.625, | |
| "epoch": 0.05094130675526024, | |
| "grad_norm": 0.5365378260612488, | |
| "kl": 0.001949310302734375, | |
| "learning_rate": 9.712499999999998e-07, | |
| "loss": 0.01103684725239873, | |
| "reward": 2.0622146129608154, | |
| "reward_std": 0.40072987973690033, | |
| "rewards/GDino": 0.645312488079071, | |
| "rewards/GIT": 0.33725525438785553, | |
| "rewards/HPSv2": 0.2619609832763672, | |
| "rewards/ORM": 0.8176859617233276, | |
| "self_certainty_semantic": -25.25, | |
| "self_certainty_token": -22.3125, | |
| "step": 46 | |
| }, | |
| { | |
| "completion_length": 64.6875, | |
| "epoch": 0.05204872646733112, | |
| "grad_norm": 0.5151812434196472, | |
| "kl": 0.001766204833984375, | |
| "learning_rate": 9.70625e-07, | |
| "loss": -0.004148014355450869, | |
| "reward": 1.7916635870933533, | |
| "reward_std": 0.31147970259189606, | |
| "rewards/GDino": 0.7293796539306641, | |
| "rewards/GIT": 0.20818163454532623, | |
| "rewards/HPSv2": 0.27945709228515625, | |
| "rewards/ORM": 0.5746453106403351, | |
| "self_certainty_semantic": -25.3125, | |
| "self_certainty_token": -21.5625, | |
| "step": 47 | |
| }, | |
| { | |
| "completion_length": 56.25, | |
| "epoch": 0.053156146179401995, | |
| "grad_norm": 0.7559373378753662, | |
| "kl": 0.001861572265625, | |
| "learning_rate": 9.7e-07, | |
| "loss": -0.002030523493885994, | |
| "reward": 1.4302473068237305, | |
| "reward_std": 0.4484506845474243, | |
| "rewards/GDino": 0.6244329512119293, | |
| "rewards/GIT": 0.0, | |
| "rewards/HPSv2": 0.2752876281738281, | |
| "rewards/ORM": 0.5305267572402954, | |
| "self_certainty_semantic": -25.4375, | |
| "self_certainty_token": -20.875, | |
| "step": 48 | |
| }, | |
| { | |
| "completion_length": 61.21875, | |
| "epoch": 0.05426356589147287, | |
| "grad_norm": 0.46310731768608093, | |
| "kl": 0.00177764892578125, | |
| "learning_rate": 9.69375e-07, | |
| "loss": 0.0054672048427164555, | |
| "reward": 1.9361683130264282, | |
| "reward_std": 0.3801421523094177, | |
| "rewards/GDino": 0.7904821038246155, | |
| "rewards/GIT": 0.2458050437271595, | |
| "rewards/HPSv2": 0.25890541076660156, | |
| "rewards/ORM": 0.640975683927536, | |
| "self_certainty_semantic": -25.5, | |
| "self_certainty_token": -21.6875, | |
| "step": 49 | |
| }, | |
| { | |
| "completion_length": 61.921875, | |
| "epoch": 0.05537098560354374, | |
| "grad_norm": 0.5111473798751831, | |
| "kl": 0.002353668212890625, | |
| "learning_rate": 9.6875e-07, | |
| "loss": 0.0035089042503386736, | |
| "reward": 2.212684750556946, | |
| "reward_std": 0.3874351307749748, | |
| "rewards/GDino": 0.7840971350669861, | |
| "rewards/GIT": 0.42198260873556137, | |
| "rewards/HPSv2": 0.25807952880859375, | |
| "rewards/ORM": 0.7485254108905792, | |
| "self_certainty_semantic": -25.4375, | |
| "self_certainty_token": -22.625, | |
| "step": 50 | |
| }, | |
| { | |
| "completion_length": 57.796875, | |
| "epoch": 0.05647840531561462, | |
| "grad_norm": 0.4804292917251587, | |
| "kl": 0.001743316650390625, | |
| "learning_rate": 9.68125e-07, | |
| "loss": -0.0010273723164573312, | |
| "reward": 1.8951371908187866, | |
| "reward_std": 0.5679852366447449, | |
| "rewards/GDino": 0.7922006845474243, | |
| "rewards/GIT": 0.27185457944869995, | |
| "rewards/HPSv2": 0.2777671813964844, | |
| "rewards/ORM": 0.5533146858215332, | |
| "self_certainty_semantic": -25.625, | |
| "self_certainty_token": -22.0, | |
| "step": 51 | |
| }, | |
| { | |
| "completion_length": 62.140625, | |
| "epoch": 0.05758582502768549, | |
| "grad_norm": 0.5876587629318237, | |
| "kl": 0.001842498779296875, | |
| "learning_rate": 9.675e-07, | |
| "loss": 0.010319232940673828, | |
| "reward": 2.453005313873291, | |
| "reward_std": 0.35728050768375397, | |
| "rewards/GDino": 0.917187511920929, | |
| "rewards/GIT": 0.6651300191879272, | |
| "rewards/HPSv2": 0.27350807189941406, | |
| "rewards/ORM": 0.5971797704696655, | |
| "self_certainty_semantic": -25.4375, | |
| "self_certainty_token": -21.0625, | |
| "step": 52 | |
| }, | |
| { | |
| "completion_length": 57.046875, | |
| "epoch": 0.058693244739756366, | |
| "grad_norm": 0.5244357585906982, | |
| "kl": 0.00168609619140625, | |
| "learning_rate": 9.66875e-07, | |
| "loss": 0.0012504801852628589, | |
| "reward": 1.8911731839179993, | |
| "reward_std": 0.3232653737068176, | |
| "rewards/GDino": 0.7297230660915375, | |
| "rewards/GIT": 0.3948078155517578, | |
| "rewards/HPSv2": 0.24039649963378906, | |
| "rewards/ORM": 0.5262457728385925, | |
| "self_certainty_semantic": -25.3125, | |
| "self_certainty_token": -21.25, | |
| "step": 53 | |
| }, | |
| { | |
| "completion_length": 68.921875, | |
| "epoch": 0.059800664451827246, | |
| "grad_norm": 0.5011692047119141, | |
| "kl": 0.0017547607421875, | |
| "learning_rate": 9.6625e-07, | |
| "loss": -0.001990929711610079, | |
| "reward": 1.5346381068229675, | |
| "reward_std": 0.5364750325679779, | |
| "rewards/GDino": 0.5896078050136566, | |
| "rewards/GIT": 0.2611962556838989, | |
| "rewards/HPSv2": 0.24633407592773438, | |
| "rewards/ORM": 0.4375000149011612, | |
| "self_certainty_semantic": -25.5, | |
| "self_certainty_token": -21.5625, | |
| "step": 54 | |
| }, | |
| { | |
| "completion_length": 65.28125, | |
| "epoch": 0.06090808416389812, | |
| "grad_norm": 0.43720903992652893, | |
| "kl": 0.001796722412109375, | |
| "learning_rate": 9.65625e-07, | |
| "loss": 0.011945425532758236, | |
| "reward": 1.7657405734062195, | |
| "reward_std": 0.5052186846733093, | |
| "rewards/GDino": 0.7055748403072357, | |
| "rewards/GIT": 0.3213713690638542, | |
| "rewards/HPSv2": 0.26223182678222656, | |
| "rewards/ORM": 0.4765625, | |
| "self_certainty_semantic": -25.5, | |
| "self_certainty_token": -21.8125, | |
| "step": 55 | |
| }, | |
| { | |
| "completion_length": 72.15625, | |
| "epoch": 0.06201550387596899, | |
| "grad_norm": 0.6576823592185974, | |
| "kl": 0.00201416015625, | |
| "learning_rate": 9.649999999999999e-07, | |
| "loss": 0.010990551207214594, | |
| "reward": 2.0798487663269043, | |
| "reward_std": 0.5881477892398834, | |
| "rewards/GDino": 0.7611979246139526, | |
| "rewards/GIT": 0.38940075039863586, | |
| "rewards/HPSv2": 0.25081634521484375, | |
| "rewards/ORM": 0.678433746099472, | |
| "self_certainty_semantic": -25.125, | |
| "self_certainty_token": -21.8125, | |
| "step": 56 | |
| }, | |
| { | |
| "completion_length": 53.84375, | |
| "epoch": 0.06312292358803986, | |
| "grad_norm": 0.5109694600105286, | |
| "kl": 0.001708984375, | |
| "learning_rate": 9.64375e-07, | |
| "loss": -0.009197955019772053, | |
| "reward": 1.825343132019043, | |
| "reward_std": 0.49610868096351624, | |
| "rewards/GDino": 0.7342002689838409, | |
| "rewards/GIT": 0.27930086851119995, | |
| "rewards/HPSv2": 0.2493419647216797, | |
| "rewards/ORM": 0.5625, | |
| "self_certainty_semantic": -25.6875, | |
| "self_certainty_token": -21.5, | |
| "step": 57 | |
| }, | |
| { | |
| "completion_length": 54.671875, | |
| "epoch": 0.06423034330011074, | |
| "grad_norm": 0.48297855257987976, | |
| "kl": 0.0018157958984375, | |
| "learning_rate": 9.637499999999999e-07, | |
| "loss": -2.7031637728214264e-05, | |
| "reward": 1.9436655044555664, | |
| "reward_std": 0.5841460824012756, | |
| "rewards/GDino": 0.7508301734924316, | |
| "rewards/GIT": 0.36742376536130905, | |
| "rewards/HPSv2": 0.24603271484375, | |
| "rewards/ORM": 0.579378753900528, | |
| "self_certainty_semantic": -25.5625, | |
| "self_certainty_token": -21.6875, | |
| "step": 58 | |
| }, | |
| { | |
| "completion_length": 57.34375, | |
| "epoch": 0.06533776301218161, | |
| "grad_norm": 1.5652471780776978, | |
| "kl": 0.00185394287109375, | |
| "learning_rate": 9.63125e-07, | |
| "loss": -0.0014887296129018068, | |
| "reward": 2.154895305633545, | |
| "reward_std": 0.5548917800188065, | |
| "rewards/GDino": 0.7907229363918304, | |
| "rewards/GIT": 0.44339829683303833, | |
| "rewards/HPSv2": 0.2567615509033203, | |
| "rewards/ORM": 0.664012536406517, | |
| "self_certainty_semantic": -25.5625, | |
| "self_certainty_token": -21.0625, | |
| "step": 59 | |
| }, | |
| { | |
| "completion_length": 52.0625, | |
| "epoch": 0.0664451827242525, | |
| "grad_norm": 0.8647972941398621, | |
| "kl": 0.00200653076171875, | |
| "learning_rate": 9.624999999999999e-07, | |
| "loss": -0.004864218062721193, | |
| "reward": 2.183086931705475, | |
| "reward_std": 0.27265597879886627, | |
| "rewards/GDino": 0.8968750238418579, | |
| "rewards/GIT": 0.4909053146839142, | |
| "rewards/HPSv2": 0.2511100769042969, | |
| "rewards/ORM": 0.544196605682373, | |
| "self_certainty_semantic": -25.25, | |
| "self_certainty_token": -20.8125, | |
| "step": 60 | |
| }, | |
| { | |
| "completion_length": 78.421875, | |
| "epoch": 0.06755260243632337, | |
| "grad_norm": 0.6149311065673828, | |
| "kl": 0.0018310546875, | |
| "learning_rate": 9.61875e-07, | |
| "loss": -0.003399772336706519, | |
| "reward": 2.3938775062561035, | |
| "reward_std": 0.3266971558332443, | |
| "rewards/GDino": 0.7299478650093079, | |
| "rewards/GIT": 0.6572037935256958, | |
| "rewards/HPSv2": 0.26293373107910156, | |
| "rewards/ORM": 0.743791937828064, | |
| "self_certainty_semantic": -25.5, | |
| "self_certainty_token": -20.5, | |
| "step": 61 | |
| }, | |
| { | |
| "completion_length": 71.796875, | |
| "epoch": 0.06866002214839424, | |
| "grad_norm": 0.8106938600540161, | |
| "kl": 0.00188446044921875, | |
| "learning_rate": 9.6125e-07, | |
| "loss": -0.004746791877551004, | |
| "reward": 2.3078866004943848, | |
| "reward_std": 0.4594850391149521, | |
| "rewards/GDino": 0.7886728346347809, | |
| "rewards/GIT": 0.6039779186248779, | |
| "rewards/HPSv2": 0.2555561065673828, | |
| "rewards/ORM": 0.6596797406673431, | |
| "self_certainty_semantic": -25.5625, | |
| "self_certainty_token": -21.125, | |
| "step": 62 | |
| }, | |
| { | |
| "completion_length": 57.703125, | |
| "epoch": 0.06976744186046512, | |
| "grad_norm": 0.5699672102928162, | |
| "kl": 0.00218963623046875, | |
| "learning_rate": 9.606249999999998e-07, | |
| "loss": 0.005022911122068763, | |
| "reward": 2.2111340165138245, | |
| "reward_std": 0.6219878196716309, | |
| "rewards/GDino": 0.794545441865921, | |
| "rewards/GIT": 0.45049863308668137, | |
| "rewards/HPSv2": 0.24386024475097656, | |
| "rewards/ORM": 0.7222297191619873, | |
| "self_certainty_semantic": -25.25, | |
| "self_certainty_token": -21.9375, | |
| "step": 63 | |
| }, | |
| { | |
| "completion_length": 78.453125, | |
| "epoch": 0.07087486157253599, | |
| "grad_norm": 0.7573527693748474, | |
| "kl": 0.0022125244140625, | |
| "learning_rate": 9.6e-07, | |
| "loss": 0.013895762618631124, | |
| "reward": 1.6789215207099915, | |
| "reward_std": 0.15597553551197052, | |
| "rewards/GDino": 0.7209441661834717, | |
| "rewards/GIT": 0.31718890368938446, | |
| "rewards/HPSv2": 0.26105499267578125, | |
| "rewards/ORM": 0.37973345816135406, | |
| "self_certainty_semantic": -25.5625, | |
| "self_certainty_token": -21.5625, | |
| "step": 64 | |
| }, | |
| { | |
| "completion_length": 63.59375, | |
| "epoch": 0.07198228128460686, | |
| "grad_norm": 0.4424923360347748, | |
| "kl": 0.0020599365234375, | |
| "learning_rate": 9.59375e-07, | |
| "loss": 0.0005846736021339893, | |
| "reward": 2.195925712585449, | |
| "reward_std": 0.5788445174694061, | |
| "rewards/GDino": 0.7169270515441895, | |
| "rewards/GIT": 0.6367218196392059, | |
| "rewards/HPSv2": 0.2345561981201172, | |
| "rewards/ORM": 0.6077205836772919, | |
| "self_certainty_semantic": -25.4375, | |
| "self_certainty_token": -21.75, | |
| "step": 65 | |
| }, | |
| { | |
| "completion_length": 67.6875, | |
| "epoch": 0.07308970099667775, | |
| "grad_norm": 0.5050013661384583, | |
| "kl": 0.00211334228515625, | |
| "learning_rate": 9.5875e-07, | |
| "loss": 0.010172993643209338, | |
| "reward": 2.220258355140686, | |
| "reward_std": 0.30588236451148987, | |
| "rewards/GDino": 0.7442708909511566, | |
| "rewards/GIT": 0.47482602298259735, | |
| "rewards/HPSv2": 0.25937461853027344, | |
| "rewards/ORM": 0.7417868673801422, | |
| "self_certainty_semantic": -25.375, | |
| "self_certainty_token": -21.5, | |
| "step": 66 | |
| }, | |
| { | |
| "completion_length": 72.75, | |
| "epoch": 0.07419712070874862, | |
| "grad_norm": 0.47647950053215027, | |
| "kl": 0.001953125, | |
| "learning_rate": 9.58125e-07, | |
| "loss": 0.002580178901553154, | |
| "reward": 2.3537763357162476, | |
| "reward_std": 0.2857324182987213, | |
| "rewards/GDino": 0.852263331413269, | |
| "rewards/GIT": 0.5637244433164597, | |
| "rewards/HPSv2": 0.2550220489501953, | |
| "rewards/ORM": 0.6827665567398071, | |
| "self_certainty_semantic": -25.4375, | |
| "self_certainty_token": -21.4375, | |
| "step": 67 | |
| }, | |
| { | |
| "completion_length": 60.109375, | |
| "epoch": 0.0753045404208195, | |
| "grad_norm": 0.45224544405937195, | |
| "kl": 0.0021209716796875, | |
| "learning_rate": 9.575e-07, | |
| "loss": 0.002825574716553092, | |
| "reward": 1.613221287727356, | |
| "reward_std": 0.332104429602623, | |
| "rewards/GDino": 0.6193348169326782, | |
| "rewards/GIT": 0.2909398823976517, | |
| "rewards/HPSv2": 0.2551765441894531, | |
| "rewards/ORM": 0.4477700889110565, | |
| "self_certainty_semantic": -25.25, | |
| "self_certainty_token": -21.0625, | |
| "step": 68 | |
| }, | |
| { | |
| "completion_length": 72.6875, | |
| "epoch": 0.07641196013289037, | |
| "grad_norm": 0.688894510269165, | |
| "kl": 0.002315521240234375, | |
| "learning_rate": 9.56875e-07, | |
| "loss": 0.012800770811736584, | |
| "reward": 2.1092969179153442, | |
| "reward_std": 0.36874186992645264, | |
| "rewards/GDino": 0.8054687678813934, | |
| "rewards/GIT": 0.3866874873638153, | |
| "rewards/HPSv2": 0.26236534118652344, | |
| "rewards/ORM": 0.6547753810882568, | |
| "self_certainty_semantic": -25.625, | |
| "self_certainty_token": -21.5, | |
| "step": 69 | |
| }, | |
| { | |
| "completion_length": 60.640625, | |
| "epoch": 0.07751937984496124, | |
| "grad_norm": 0.45330390334129333, | |
| "kl": 0.00215911865234375, | |
| "learning_rate": 9.5625e-07, | |
| "loss": -0.0010713667143136263, | |
| "reward": 1.552397072315216, | |
| "reward_std": 0.39455118775367737, | |
| "rewards/GDino": 0.6554375886917114, | |
| "rewards/GIT": 0.22663478553295135, | |
| "rewards/HPSv2": 0.2546577453613281, | |
| "rewards/ORM": 0.41566696763038635, | |
| "self_certainty_semantic": -25.25, | |
| "self_certainty_token": -20.75, | |
| "step": 70 | |
| }, | |
| { | |
| "completion_length": 76.515625, | |
| "epoch": 0.07862679955703211, | |
| "grad_norm": 0.5808414220809937, | |
| "kl": 0.00222015380859375, | |
| "learning_rate": 9.556249999999999e-07, | |
| "loss": 0.0038980550598353148, | |
| "reward": 1.9476300477981567, | |
| "reward_std": 0.38603267073631287, | |
| "rewards/GDino": 0.7262610197067261, | |
| "rewards/GIT": 0.30087296664714813, | |
| "rewards/HPSv2": 0.26424598693847656, | |
| "rewards/ORM": 0.6562500149011612, | |
| "self_certainty_semantic": -25.4375, | |
| "self_certainty_token": -19.9375, | |
| "step": 71 | |
| }, | |
| { | |
| "completion_length": 57.15625, | |
| "epoch": 0.07973421926910298, | |
| "grad_norm": 0.3693688213825226, | |
| "kl": 0.00208282470703125, | |
| "learning_rate": 9.55e-07, | |
| "loss": -0.00035159417893737555, | |
| "reward": 1.9391373991966248, | |
| "reward_std": 0.3963821530342102, | |
| "rewards/GDino": 0.6879567801952362, | |
| "rewards/GIT": 0.4622843265533447, | |
| "rewards/HPSv2": 0.24675464630126953, | |
| "rewards/ORM": 0.5421415567398071, | |
| "self_certainty_semantic": -25.0625, | |
| "self_certainty_token": -20.9375, | |
| "step": 72 | |
| }, | |
| { | |
| "completion_length": 66.65625, | |
| "epoch": 0.08084163898117387, | |
| "grad_norm": 0.6215986013412476, | |
| "kl": 0.0024871826171875, | |
| "learning_rate": 9.543749999999999e-07, | |
| "loss": 0.003838272183202207, | |
| "reward": 2.1008963584899902, | |
| "reward_std": 0.4600249230861664, | |
| "rewards/GDino": 0.8240202069282532, | |
| "rewards/GIT": 0.48449917137622833, | |
| "rewards/HPSv2": 0.24818038940429688, | |
| "rewards/ORM": 0.5441965609788895, | |
| "self_certainty_semantic": -25.4375, | |
| "self_certainty_token": -21.5, | |
| "step": 73 | |
| }, | |
| { | |
| "completion_length": 60.859375, | |
| "epoch": 0.08194905869324474, | |
| "grad_norm": 0.43593713641166687, | |
| "kl": 0.0030364990234375, | |
| "learning_rate": 9.5375e-07, | |
| "loss": 0.002844013855792582, | |
| "reward": 2.297879934310913, | |
| "reward_std": 0.2846696451306343, | |
| "rewards/GDino": 0.84375, | |
| "rewards/GIT": 0.5265894532203674, | |
| "rewards/HPSv2": 0.2544116973876953, | |
| "rewards/ORM": 0.6731287837028503, | |
| "self_certainty_semantic": -25.4375, | |
| "self_certainty_token": -21.75, | |
| "step": 74 | |
| }, | |
| { | |
| "completion_length": 68.703125, | |
| "epoch": 0.08305647840531562, | |
| "grad_norm": 0.48668116331100464, | |
| "kl": 0.002227783203125, | |
| "learning_rate": 9.53125e-07, | |
| "loss": -0.0021062323357909918, | |
| "reward": 1.7519539594650269, | |
| "reward_std": 0.3109753131866455, | |
| "rewards/GDino": 0.6498888432979584, | |
| "rewards/GIT": 0.2745012864470482, | |
| "rewards/HPSv2": 0.26706886291503906, | |
| "rewards/ORM": 0.5604948848485947, | |
| "self_certainty_semantic": -25.5, | |
| "self_certainty_token": -20.625, | |
| "step": 75 | |
| }, | |
| { | |
| "completion_length": 70.25, | |
| "epoch": 0.08416389811738649, | |
| "grad_norm": 0.5122522711753845, | |
| "kl": 0.00208282470703125, | |
| "learning_rate": 9.525e-07, | |
| "loss": -0.00045439647510647774, | |
| "reward": 2.371267318725586, | |
| "reward_std": 0.4085633456707001, | |
| "rewards/GDino": 0.8135416805744171, | |
| "rewards/GIT": 0.6540948301553726, | |
| "rewards/HPSv2": 0.2650108337402344, | |
| "rewards/ORM": 0.6386198997497559, | |
| "self_certainty_semantic": -25.6875, | |
| "self_certainty_token": -20.75, | |
| "step": 76 | |
| }, | |
| { | |
| "completion_length": 62.875, | |
| "epoch": 0.08527131782945736, | |
| "grad_norm": 0.505736768245697, | |
| "kl": 0.0037689208984375, | |
| "learning_rate": 9.51875e-07, | |
| "loss": -0.006699402409140021, | |
| "reward": 1.5121636986732483, | |
| "reward_std": 0.5349836349487305, | |
| "rewards/GDino": 0.616510659456253, | |
| "rewards/GIT": 0.18113864213228226, | |
| "rewards/HPSv2": 0.228485107421875, | |
| "rewards/ORM": 0.48602940142154694, | |
| "self_certainty_semantic": -25.125, | |
| "self_certainty_token": -21.875, | |
| "step": 77 | |
| }, | |
| { | |
| "completion_length": 65.8125, | |
| "epoch": 0.08637873754152824, | |
| "grad_norm": 0.4759610593318939, | |
| "kl": 0.0022735595703125, | |
| "learning_rate": 9.5125e-07, | |
| "loss": 0.0014968996401876211, | |
| "reward": 1.9482250213623047, | |
| "reward_std": 0.38150524348020554, | |
| "rewards/GDino": 0.7646995186805725, | |
| "rewards/GIT": 0.31973105669021606, | |
| "rewards/HPSv2": 0.2705249786376953, | |
| "rewards/ORM": 0.5932694524526596, | |
| "self_certainty_semantic": -25.5, | |
| "self_certainty_token": -21.125, | |
| "step": 78 | |
| }, | |
| { | |
| "completion_length": 72.609375, | |
| "epoch": 0.08748615725359911, | |
| "grad_norm": 0.4961722195148468, | |
| "kl": 0.00247955322265625, | |
| "learning_rate": 9.50625e-07, | |
| "loss": 0.00820195721462369, | |
| "reward": 2.2431598901748657, | |
| "reward_std": 0.19805177673697472, | |
| "rewards/GDino": 0.8183182775974274, | |
| "rewards/GIT": 0.60882468521595, | |
| "rewards/HPSv2": 0.2628040313720703, | |
| "rewards/ORM": 0.5532128810882568, | |
| "self_certainty_semantic": -25.4375, | |
| "self_certainty_token": -21.5625, | |
| "step": 79 | |
| }, | |
| { | |
| "completion_length": 66.0625, | |
| "epoch": 0.08859357696567, | |
| "grad_norm": 0.5290701389312744, | |
| "kl": 0.00308990478515625, | |
| "learning_rate": 9.499999999999999e-07, | |
| "loss": -0.001018086913973093, | |
| "reward": 1.7054139375686646, | |
| "reward_std": 0.4478110671043396, | |
| "rewards/GDino": 0.6419965624809265, | |
| "rewards/GIT": 0.19029075652360916, | |
| "rewards/HPSv2": 0.2727680206298828, | |
| "rewards/ORM": 0.6003586649894714, | |
| "self_certainty_semantic": -25.4375, | |
| "self_certainty_token": -21.5, | |
| "step": 80 | |
| }, | |
| { | |
| "completion_length": 69.75, | |
| "epoch": 0.08970099667774087, | |
| "grad_norm": 0.530961811542511, | |
| "kl": 0.00331878662109375, | |
| "learning_rate": 9.493749999999999e-07, | |
| "loss": -0.0018104221671819687, | |
| "reward": 2.1294270157814026, | |
| "reward_std": 0.30140096694231033, | |
| "rewards/GDino": 0.7601194977760315, | |
| "rewards/GIT": 0.36138176918029785, | |
| "rewards/HPSv2": 0.27007102966308594, | |
| "rewards/ORM": 0.7378547042608261, | |
| "self_certainty_semantic": -25.5625, | |
| "self_certainty_token": -20.25, | |
| "step": 81 | |
| }, | |
| { | |
| "completion_length": 62.25, | |
| "epoch": 0.09080841638981174, | |
| "grad_norm": 0.5380280017852783, | |
| "kl": 0.0029449462890625, | |
| "learning_rate": 9.487499999999999e-07, | |
| "loss": 0.0027263425290584564, | |
| "reward": 1.7531540989875793, | |
| "reward_std": 0.40144187211990356, | |
| "rewards/GDino": 0.6388830840587616, | |
| "rewards/GIT": 0.3787819594144821, | |
| "rewards/HPSv2": 0.26526451110839844, | |
| "rewards/ORM": 0.4702245742082596, | |
| "self_certainty_semantic": -25.625, | |
| "self_certainty_token": -21.3125, | |
| "step": 82 | |
| }, | |
| { | |
| "completion_length": 57.125, | |
| "epoch": 0.09191583610188261, | |
| "grad_norm": 0.46656447649002075, | |
| "kl": 0.00229644775390625, | |
| "learning_rate": 9.481249999999999e-07, | |
| "loss": 0.0034079640172421932, | |
| "reward": 2.1076533794403076, | |
| "reward_std": 0.3496774584054947, | |
| "rewards/GDino": 0.8086712956428528, | |
| "rewards/GIT": 0.44665491580963135, | |
| "rewards/HPSv2": 0.2527198791503906, | |
| "rewards/ORM": 0.5996073186397552, | |
| "self_certainty_semantic": -25.5, | |
| "self_certainty_token": -21.875, | |
| "step": 83 | |
| }, | |
| { | |
| "completion_length": 77.609375, | |
| "epoch": 0.09302325581395349, | |
| "grad_norm": 0.7098491787910461, | |
| "kl": 0.003326416015625, | |
| "learning_rate": 9.474999999999999e-07, | |
| "loss": -0.015582434833049774, | |
| "reward": 2.0792417526245117, | |
| "reward_std": 0.405472531914711, | |
| "rewards/GDino": 0.8217203617095947, | |
| "rewards/GIT": 0.6337592005729675, | |
| "rewards/HPSv2": 0.2409496307373047, | |
| "rewards/ORM": 0.3828125, | |
| "self_certainty_semantic": -25.4375, | |
| "self_certainty_token": -21.25, | |
| "step": 84 | |
| }, | |
| { | |
| "completion_length": 70.0, | |
| "epoch": 0.09413067552602436, | |
| "grad_norm": 0.453952431678772, | |
| "kl": 0.0030059814453125, | |
| "learning_rate": 9.468749999999999e-07, | |
| "loss": -0.008341801585629582, | |
| "reward": 1.7731398940086365, | |
| "reward_std": 0.43146421015262604, | |
| "rewards/GDino": 0.6217962503433228, | |
| "rewards/GIT": 0.33136892318725586, | |
| "rewards/HPSv2": 0.2414989471435547, | |
| "rewards/ORM": 0.5784757435321808, | |
| "self_certainty_semantic": -25.375, | |
| "self_certainty_token": -21.8125, | |
| "step": 85 | |
| }, | |
| { | |
| "completion_length": 55.46875, | |
| "epoch": 0.09523809523809523, | |
| "grad_norm": 0.6065813302993774, | |
| "kl": 0.0029296875, | |
| "learning_rate": 9.462499999999999e-07, | |
| "loss": -0.004339609295129776, | |
| "reward": 2.3409087657928467, | |
| "reward_std": 0.33414456248283386, | |
| "rewards/GDino": 0.843651682138443, | |
| "rewards/GIT": 0.3478253483772278, | |
| "rewards/HPSv2": 0.2929649353027344, | |
| "rewards/ORM": 0.8564667999744415, | |
| "self_certainty_semantic": -25.6875, | |
| "self_certainty_token": -21.6875, | |
| "step": 86 | |
| }, | |
| { | |
| "completion_length": 71.796875, | |
| "epoch": 0.09634551495016612, | |
| "grad_norm": 0.6815423965454102, | |
| "kl": 0.0028076171875, | |
| "learning_rate": 9.45625e-07, | |
| "loss": 0.004890406038612127, | |
| "reward": 2.096968352794647, | |
| "reward_std": 0.4522961378097534, | |
| "rewards/GDino": 0.7090134918689728, | |
| "rewards/GIT": 0.4619881361722946, | |
| "rewards/HPSv2": 0.26172447204589844, | |
| "rewards/ORM": 0.6642423272132874, | |
| "self_certainty_semantic": -25.375, | |
| "self_certainty_token": -21.3125, | |
| "step": 87 | |
| }, | |
| { | |
| "completion_length": 62.921875, | |
| "epoch": 0.09745293466223699, | |
| "grad_norm": 0.37047135829925537, | |
| "kl": 0.00237274169921875, | |
| "learning_rate": 9.45e-07, | |
| "loss": -0.007989626843482256, | |
| "reward": 2.100303888320923, | |
| "reward_std": 0.39728429913520813, | |
| "rewards/GDino": 0.8100375235080719, | |
| "rewards/GIT": 0.4551214128732681, | |
| "rewards/HPSv2": 0.2669391632080078, | |
| "rewards/ORM": 0.5682056248188019, | |
| "self_certainty_semantic": -25.5, | |
| "self_certainty_token": -21.625, | |
| "step": 88 | |
| }, | |
| { | |
| "completion_length": 61.25, | |
| "epoch": 0.09856035437430787, | |
| "grad_norm": 0.3903006613254547, | |
| "kl": 0.0033111572265625, | |
| "learning_rate": 9.44375e-07, | |
| "loss": -0.0016460134647786617, | |
| "reward": 2.1185483932495117, | |
| "reward_std": 0.34406720101833344, | |
| "rewards/GDino": 0.7301153540611267, | |
| "rewards/GIT": 0.4342738687992096, | |
| "rewards/HPSv2": 0.25724220275878906, | |
| "rewards/ORM": 0.6969169676303864, | |
| "self_certainty_semantic": -25.625, | |
| "self_certainty_token": -21.0625, | |
| "step": 89 | |
| }, | |
| { | |
| "completion_length": 64.734375, | |
| "epoch": 0.09966777408637874, | |
| "grad_norm": 0.6106704473495483, | |
| "kl": 0.002532958984375, | |
| "learning_rate": 9.4375e-07, | |
| "loss": 0.0018994538113474846, | |
| "reward": 2.281058669090271, | |
| "reward_std": 0.4019897133111954, | |
| "rewards/GDino": 0.8515625298023224, | |
| "rewards/GIT": 0.602006196975708, | |
| "rewards/HPSv2": 0.2570476531982422, | |
| "rewards/ORM": 0.5704423487186432, | |
| "self_certainty_semantic": -25.625, | |
| "self_certainty_token": -20.875, | |
| "step": 90 | |
| }, | |
| { | |
| "completion_length": 70.625, | |
| "epoch": 0.10077519379844961, | |
| "grad_norm": 0.6082563996315002, | |
| "kl": 0.0025634765625, | |
| "learning_rate": 9.43125e-07, | |
| "loss": -0.001378488726913929, | |
| "reward": 1.7446696758270264, | |
| "reward_std": 0.48222504556179047, | |
| "rewards/GDino": 0.6369770467281342, | |
| "rewards/GIT": 0.4495050609111786, | |
| "rewards/HPSv2": 0.2379169464111328, | |
| "rewards/ORM": 0.42027057707309723, | |
| "self_certainty_semantic": -25.5, | |
| "self_certainty_token": -21.25, | |
| "step": 91 | |
| }, | |
| { | |
| "completion_length": 69.328125, | |
| "epoch": 0.10188261351052048, | |
| "grad_norm": 0.3885723054409027, | |
| "kl": 0.00247955322265625, | |
| "learning_rate": 9.425e-07, | |
| "loss": 0.0029599489644169807, | |
| "reward": 1.6940485835075378, | |
| "reward_std": 0.48791858553886414, | |
| "rewards/GDino": 0.7451692521572113, | |
| "rewards/GIT": 0.3888908475637436, | |
| "rewards/HPSv2": 0.23882293701171875, | |
| "rewards/ORM": 0.32116562128067017, | |
| "self_certainty_semantic": -25.5, | |
| "self_certainty_token": -20.9375, | |
| "step": 92 | |
| }, | |
| { | |
| "completion_length": 78.96875, | |
| "epoch": 0.10299003322259136, | |
| "grad_norm": 2.441729784011841, | |
| "kl": 0.00281524658203125, | |
| "learning_rate": 9.41875e-07, | |
| "loss": 0.0027102059684693813, | |
| "reward": 2.098644495010376, | |
| "reward_std": 0.5861929953098297, | |
| "rewards/GDino": 0.7753971815109253, | |
| "rewards/GIT": 0.33432240784168243, | |
| "rewards/HPSv2": 0.24440956115722656, | |
| "rewards/ORM": 0.7445152401924133, | |
| "self_certainty_semantic": -25.375, | |
| "self_certainty_token": -21.3125, | |
| "step": 93 | |
| }, | |
| { | |
| "completion_length": 53.640625, | |
| "epoch": 0.10409745293466224, | |
| "grad_norm": 1.843809962272644, | |
| "kl": 0.00298309326171875, | |
| "learning_rate": 9.4125e-07, | |
| "loss": -0.002976842690259218, | |
| "reward": 2.022274136543274, | |
| "reward_std": 0.3149227201938629, | |
| "rewards/GDino": 0.7854060530662537, | |
| "rewards/GIT": 0.20830318331718445, | |
| "rewards/HPSv2": 0.2829475402832031, | |
| "rewards/ORM": 0.7456172108650208, | |
| "self_certainty_semantic": -25.5625, | |
| "self_certainty_token": -21.4375, | |
| "step": 94 | |
| }, | |
| { | |
| "completion_length": 73.8125, | |
| "epoch": 0.10520487264673312, | |
| "grad_norm": 0.4806905686855316, | |
| "kl": 0.0027923583984375, | |
| "learning_rate": 9.40625e-07, | |
| "loss": 0.0057201930321753025, | |
| "reward": 2.5528862476348877, | |
| "reward_std": 0.3981771767139435, | |
| "rewards/GDino": 0.9458979666233063, | |
| "rewards/GIT": 0.7319882810115814, | |
| "rewards/HPSv2": 0.265625, | |
| "rewards/ORM": 0.609375, | |
| "self_certainty_semantic": -25.625, | |
| "self_certainty_token": -22.375, | |
| "step": 95 | |
| }, | |
| { | |
| "completion_length": 71.578125, | |
| "epoch": 0.10631229235880399, | |
| "grad_norm": 1.3328330516815186, | |
| "kl": 0.00286865234375, | |
| "learning_rate": 9.399999999999999e-07, | |
| "loss": 0.006992874434217811, | |
| "reward": 2.4351861476898193, | |
| "reward_std": 0.25794728100299835, | |
| "rewards/GDino": 0.9020833373069763, | |
| "rewards/GIT": 0.6907803118228912, | |
| "rewards/HPSv2": 0.2606678009033203, | |
| "rewards/ORM": 0.5816546380519867, | |
| "self_certainty_semantic": -25.6875, | |
| "self_certainty_token": -20.125, | |
| "step": 96 | |
| }, | |
| { | |
| "completion_length": 60.703125, | |
| "epoch": 0.10741971207087486, | |
| "grad_norm": 0.5019268989562988, | |
| "kl": 0.003326416015625, | |
| "learning_rate": 9.393749999999999e-07, | |
| "loss": 0.011835527839139104, | |
| "reward": 1.6200063824653625, | |
| "reward_std": 0.4240207076072693, | |
| "rewards/GDino": 0.6504360437393188, | |
| "rewards/GIT": 0.18544349074363708, | |
| "rewards/HPSv2": 0.2720832824707031, | |
| "rewards/ORM": 0.5120435357093811, | |
| "self_certainty_semantic": -25.375, | |
| "self_certainty_token": -21.4375, | |
| "step": 97 | |
| }, | |
| { | |
| "completion_length": 68.578125, | |
| "epoch": 0.10852713178294573, | |
| "grad_norm": 0.38334423303604126, | |
| "kl": 0.003143310546875, | |
| "learning_rate": 9.387499999999999e-07, | |
| "loss": 0.0015034456737339497, | |
| "reward": 1.9381686449050903, | |
| "reward_std": 0.46784070134162903, | |
| "rewards/GDino": 0.7850436270236969, | |
| "rewards/GIT": 0.3971538841724396, | |
| "rewards/HPSv2": 0.2517681121826172, | |
| "rewards/ORM": 0.5042029470205307, | |
| "self_certainty_semantic": -25.4375, | |
| "self_certainty_token": -22.0, | |
| "step": 98 | |
| }, | |
| { | |
| "completion_length": 72.234375, | |
| "epoch": 0.10963455149501661, | |
| "grad_norm": 1.5332801342010498, | |
| "kl": 0.0026702880859375, | |
| "learning_rate": 9.381249999999999e-07, | |
| "loss": 0.0014210238587111235, | |
| "reward": 2.1606199741363525, | |
| "reward_std": 0.4609396979212761, | |
| "rewards/GDino": 0.800000011920929, | |
| "rewards/GIT": 0.6965132355690002, | |
| "rewards/HPSv2": 0.2425823211669922, | |
| "rewards/ORM": 0.4215243309736252, | |
| "self_certainty_semantic": -25.625, | |
| "self_certainty_token": -22.125, | |
| "step": 99 | |
| }, | |
| { | |
| "completion_length": 64.859375, | |
| "epoch": 0.11074197120708748, | |
| "grad_norm": 0.4810887575149536, | |
| "kl": 0.0039520263671875, | |
| "learning_rate": 9.374999999999999e-07, | |
| "loss": -0.006660776911303401, | |
| "reward": 2.0300318002700806, | |
| "reward_std": 0.49300554394721985, | |
| "rewards/GDino": 0.6639764606952667, | |
| "rewards/GIT": 0.41904042661190033, | |
| "rewards/HPSv2": 0.25483131408691406, | |
| "rewards/ORM": 0.6921834945678711, | |
| "self_certainty_semantic": -25.5625, | |
| "self_certainty_token": -21.5, | |
| "step": 100 | |
| } | |
| ], | |
| "logging_steps": 1.0, | |
| "max_steps": 1600, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |