{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 1429, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "completion_length": 181.85714721679688, "epoch": 0.0006997900629811056, "grad_norm": 1.5400680303573608, "kl": 0.0, "learning_rate": 4.95049504950495e-09, "loss": 0.0, "reward": 1.1785714626312256, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1 }, { "completion_length": 195.00001525878906, "epoch": 0.0013995801259622112, "grad_norm": 1.7240362167358398, "kl": 0.0, "learning_rate": 9.9009900990099e-09, "loss": -0.0, "reward": 1.1428571939468384, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2 }, { "completion_length": 214.07144165039062, "epoch": 0.002099370188943317, "grad_norm": 0.0002106739120790735, "kl": 0.0004433286958374083, "learning_rate": 1.485148514851485e-08, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 3 }, { "completion_length": 265.7857360839844, "epoch": 0.0027991602519244225, "grad_norm": 1.299356460571289, "kl": 0.00045476696686819196, "learning_rate": 1.98019801980198e-08, "loss": 0.0, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 4 }, { "completion_length": 185.00001525878906, "epoch": 0.0034989503149055285, "grad_norm": 1.4131066799163818, "kl": 0.0005135085666552186, "learning_rate": 2.4752475247524754e-08, "loss": 0.0, "reward": 0.9642857313156128, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 5 }, { "completion_length": 177.35714721679688, "epoch": 0.004198740377886634, "grad_norm": 1.5525341033935547, "kl": 0.0004281887086108327, "learning_rate": 2.97029702970297e-08, "loss": 0.0, "reward": 1.0, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 6 }, { "completion_length": 208.21429443359375, "epoch": 0.00489853044086774, "grad_norm": 1.535361886024475, "kl": 0.0005750337149947882, "learning_rate": 3.465346534653466e-08, "loss": 0.0, "reward": 0.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3214285969734192, "step": 7 }, { "completion_length": 164.57144165039062, "epoch": 0.005598320503848845, "grad_norm": 3.378136157989502, "kl": 0.0005283132195472717, "learning_rate": 3.96039603960396e-08, "loss": 0.0, "reward": 0.9285714626312256, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.4285714626312256, "rewards/check_similarity_func": 0.2857142984867096, "step": 8 }, { "completion_length": 163.57144165039062, "epoch": 0.006298110566829951, "grad_norm": 1.0481290817260742, "kl": 0.0005666803917847574, "learning_rate": 4.455445544554455e-08, "loss": 0.0, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 9 }, { "completion_length": 165.35714721679688, "epoch": 0.006997900629811057, "grad_norm": 2.2721924781799316, "kl": 0.0004833537095692009, "learning_rate": 4.950495049504951e-08, "loss": 0.0, "reward": 1.071428656578064, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 10 }, { "completion_length": 233.50001525878906, "epoch": 0.007697690692792162, "grad_norm": 1.1312509775161743, "kl": 0.0005269754910841584, "learning_rate": 5.445544554455445e-08, "loss": 0.0, "reward": 0.9642857313156128, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 11 }, { "completion_length": 207.21429443359375, "epoch": 0.008397480755773267, "grad_norm": 1.4746304750442505, "kl": 0.0004457664617802948, "learning_rate": 5.94059405940594e-08, "loss": 0.0, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 12 }, { "completion_length": 181.42857360839844, "epoch": 0.009097270818754374, "grad_norm": 1.6396013498306274, "kl": 0.00048512863577343524, "learning_rate": 6.435643564356436e-08, "loss": 0.0, "reward": 0.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3214285969734192, "step": 13 }, { "completion_length": 190.50001525878906, "epoch": 0.00979706088173548, "grad_norm": 1.8850737810134888, "kl": 0.0005107009201310575, "learning_rate": 6.930693069306931e-08, "loss": 0.0, "reward": 1.0, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 14 }, { "completion_length": 183.7857208251953, "epoch": 0.010496850944716585, "grad_norm": 2.188817024230957, "kl": 0.000583333894610405, "learning_rate": 7.425742574257424e-08, "loss": 0.0, "reward": 0.8928571939468384, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.25, "step": 15 }, { "completion_length": 176.00001525878906, "epoch": 0.01119664100769769, "grad_norm": 0.8027838468551636, "kl": 0.0004830018151551485, "learning_rate": 7.92079207920792e-08, "loss": 0.0, "reward": 1.1428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 16 }, { "completion_length": 239.50001525878906, "epoch": 0.011896431070678797, "grad_norm": 1.8379231691360474, "kl": 0.0005119083798490465, "learning_rate": 8.415841584158416e-08, "loss": 0.0, "reward": 0.8214285969734192, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3214285969734192, "step": 17 }, { "completion_length": 189.1428680419922, "epoch": 0.012596221133659902, "grad_norm": 1.0618878602981567, "kl": 0.0004455132002476603, "learning_rate": 8.91089108910891e-08, "loss": 0.0, "reward": 0.9642857313156128, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 18 }, { "completion_length": 166.92857360839844, "epoch": 0.013296011196641007, "grad_norm": 1.5080101490020752, "kl": 0.0005202718893997371, "learning_rate": 9.405940594059406e-08, "loss": 0.0, "reward": 1.2142857313156128, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.2857142984867096, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 19 }, { "completion_length": 222.35714721679688, "epoch": 0.013995801259622114, "grad_norm": 0.9121536016464233, "kl": 0.0005620857118628919, "learning_rate": 9.900990099009901e-08, "loss": 0.0, "reward": 0.9642857313156128, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 20 }, { "completion_length": 213.2857208251953, "epoch": 0.01469559132260322, "grad_norm": 2.4334096908569336, "kl": 0.0005722821806557477, "learning_rate": 1.0396039603960394e-07, "loss": 0.0, "reward": 0.9642857313156128, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 21 }, { "completion_length": 214.07144165039062, "epoch": 0.015395381385584325, "grad_norm": 1.0485061407089233, "kl": 0.0004890592535957694, "learning_rate": 1.089108910891089e-07, "loss": 0.0, "reward": 1.0357143878936768, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 22 }, { "completion_length": 165.21429443359375, "epoch": 0.01609517144856543, "grad_norm": 1.1050058603286743, "kl": 0.0005054257344454527, "learning_rate": 1.1386138613861386e-07, "loss": 0.0, "reward": 0.9642857313156128, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 23 }, { "completion_length": 192.7857208251953, "epoch": 0.016794961511546535, "grad_norm": 2.3293559551239014, "kl": 0.0004180397663731128, "learning_rate": 1.188118811881188e-07, "loss": 0.0, "reward": 0.9642857313156128, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 24 }, { "completion_length": 167.92857360839844, "epoch": 0.01749475157452764, "grad_norm": 2.2882895469665527, "kl": 0.0005512329516932368, "learning_rate": 1.2376237623762375e-07, "loss": 0.0, "reward": 1.071428656578064, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.2857142984867096, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.2857142984867096, "step": 25 }, { "completion_length": 166.07144165039062, "epoch": 0.01819454163750875, "grad_norm": 0.7067592144012451, "kl": 0.0004780480230692774, "learning_rate": 1.2871287128712872e-07, "loss": 0.0, "reward": 1.1428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 26 }, { "completion_length": 192.92857360839844, "epoch": 0.018894331700489854, "grad_norm": 2.014101505279541, "kl": 0.0005218880251049995, "learning_rate": 1.3366336633663366e-07, "loss": 0.0, "reward": 1.1785714626312256, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.2857142984867096, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 27 }, { "completion_length": 161.57144165039062, "epoch": 0.01959412176347096, "grad_norm": 1.5663647651672363, "kl": 0.00047171575715765357, "learning_rate": 1.3861386138613863e-07, "loss": 0.0, "reward": 1.1785714626312256, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 28 }, { "completion_length": 204.00001525878906, "epoch": 0.020293911826452064, "grad_norm": 0.00020572614448610693, "kl": 0.0004083892854396254, "learning_rate": 1.4356435643564355e-07, "loss": 0.0, "reward": 1.2857143878936768, "reward_std": 0.0, "rewards/check_gptzero_func": 0.2857142984867096, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 29 }, { "completion_length": 217.21429443359375, "epoch": 0.02099370188943317, "grad_norm": 0.0002116670657414943, "kl": 0.00042114814277738333, "learning_rate": 1.485148514851485e-07, "loss": 0.0, "reward": 0.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 30 }, { "completion_length": 141.92857360839844, "epoch": 0.021693491952414275, "grad_norm": 0.9107963442802429, "kl": 0.0005051563493907452, "learning_rate": 1.5346534653465346e-07, "loss": 0.0, "reward": 1.2857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.3571428656578064, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 31 }, { "completion_length": 123.64286041259766, "epoch": 0.02239328201539538, "grad_norm": 1.3862981796264648, "kl": 0.00045231543481349945, "learning_rate": 1.584158415841584e-07, "loss": 0.0, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 32 }, { "completion_length": 156.1428680419922, "epoch": 0.02309307207837649, "grad_norm": 2.0801780223846436, "kl": 0.0005031726323068142, "learning_rate": 1.6336633663366337e-07, "loss": 0.0, "reward": 1.2142857313156128, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.2857142984867096, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 33 }, { "completion_length": 173.50001525878906, "epoch": 0.023792862141357594, "grad_norm": 0.9146536588668823, "kl": 0.0004328509676270187, "learning_rate": 1.6831683168316832e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 34 }, { "completion_length": 164.1428680419922, "epoch": 0.0244926522043387, "grad_norm": 1.343801736831665, "kl": 0.0004730147775262594, "learning_rate": 1.7326732673267326e-07, "loss": 0.0, "reward": 1.1428571939468384, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 35 }, { "completion_length": 250.2857208251953, "epoch": 0.025192442267319804, "grad_norm": 0.9019289612770081, "kl": 0.0005297228926792741, "learning_rate": 1.782178217821782e-07, "loss": 0.0, "reward": 0.9642857313156128, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 36 }, { "completion_length": 192.71429443359375, "epoch": 0.02589223233030091, "grad_norm": 1.428687572479248, "kl": 0.0004457100585568696, "learning_rate": 1.8316831683168315e-07, "loss": 0.0, "reward": 1.1428571939468384, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 37 }, { "completion_length": 197.35714721679688, "epoch": 0.026592022393282014, "grad_norm": 1.309740662574768, "kl": 0.0004139345255680382, "learning_rate": 1.8811881188118812e-07, "loss": 0.0, "reward": 1.1428571939468384, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 38 }, { "completion_length": 196.42857360839844, "epoch": 0.02729181245626312, "grad_norm": 1.4873285293579102, "kl": 0.0004630376643035561, "learning_rate": 1.9306930693069306e-07, "loss": 0.0, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 39 }, { "completion_length": 169.2857208251953, "epoch": 0.02799160251924423, "grad_norm": 1.4043203592300415, "kl": 0.0004128798609599471, "learning_rate": 1.9801980198019803e-07, "loss": 0.0, "reward": 1.1428571939468384, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 40 }, { "completion_length": 210.42857360839844, "epoch": 0.028691392582225334, "grad_norm": 0.7562183141708374, "kl": 0.0004386621294543147, "learning_rate": 2.0297029702970297e-07, "loss": 0.0, "reward": 1.0357143878936768, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 41 }, { "completion_length": 305.21429443359375, "epoch": 0.02939118264520644, "grad_norm": 1.1253929138183594, "kl": 0.00045715304440818727, "learning_rate": 2.079207920792079e-07, "loss": 0.0, "reward": 1.1071429252624512, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 42 }, { "completion_length": 149.7857208251953, "epoch": 0.030090972708187544, "grad_norm": 1.0248587131500244, "kl": 0.0004310585209168494, "learning_rate": 2.1287128712871286e-07, "loss": 0.0, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 43 }, { "completion_length": 176.85714721679688, "epoch": 0.03079076277116865, "grad_norm": 1.5051597356796265, "kl": 0.0004847579402849078, "learning_rate": 2.178217821782178e-07, "loss": 0.0, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 44 }, { "completion_length": 143.07144165039062, "epoch": 0.031490552834149754, "grad_norm": 2.3099021911621094, "kl": 0.0005260786856524646, "learning_rate": 2.2277227722772277e-07, "loss": 0.0, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 45 }, { "completion_length": 164.07144165039062, "epoch": 0.03219034289713086, "grad_norm": 2.299760341644287, "kl": 0.000553513178601861, "learning_rate": 2.2772277227722772e-07, "loss": 0.0, "reward": 1.071428656578064, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.2857142984867096, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.2857142984867096, "step": 46 }, { "completion_length": 183.85714721679688, "epoch": 0.032890132960111965, "grad_norm": 2.6965394020080566, "kl": 0.0004872652643825859, "learning_rate": 2.3267326732673269e-07, "loss": 0.0, "reward": 1.1071429252624512, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 47 }, { "completion_length": 157.85714721679688, "epoch": 0.03358992302309307, "grad_norm": 1.6435201168060303, "kl": 0.00036823106347583234, "learning_rate": 2.376237623762376e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 48 }, { "completion_length": 189.07144165039062, "epoch": 0.034289713086074175, "grad_norm": 0.8998086452484131, "kl": 0.00037697338848374784, "learning_rate": 2.4257425742574255e-07, "loss": 0.0, "reward": 1.3571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.3571428656578064, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 49 }, { "completion_length": 205.57144165039062, "epoch": 0.03498950314905528, "grad_norm": 1.2689298391342163, "kl": 0.0005077106179669499, "learning_rate": 2.475247524752475e-07, "loss": 0.0, "reward": 1.2857143878936768, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.3571428656578064, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 50 }, { "completion_length": 170.07144165039062, "epoch": 0.03568929321203639, "grad_norm": 2.3544671535491943, "kl": 0.0005213998374529183, "learning_rate": 2.524752475247525e-07, "loss": 0.0, "reward": 0.9642857313156128, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 51 }, { "completion_length": 151.7857208251953, "epoch": 0.0363890832750175, "grad_norm": 1.7855045795440674, "kl": 0.0005098923575133085, "learning_rate": 2.5742574257425743e-07, "loss": 0.0, "reward": 1.1071429252624512, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 52 }, { "completion_length": 191.57144165039062, "epoch": 0.0370888733379986, "grad_norm": 2.675292491912842, "kl": 0.0004440405173227191, "learning_rate": 2.623762376237624e-07, "loss": 0.0, "reward": 1.1785714626312256, "reward_std": 0.3535533845424652, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 53 }, { "completion_length": 202.2857208251953, "epoch": 0.03778866340097971, "grad_norm": 1.8264518976211548, "kl": 0.0004832753329537809, "learning_rate": 2.673267326732673e-07, "loss": 0.0, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.2857142984867096, "step": 54 }, { "completion_length": 197.07144165039062, "epoch": 0.03848845346396081, "grad_norm": 1.1846554279327393, "kl": 0.000452172476798296, "learning_rate": 2.7227722772277226e-07, "loss": 0.0, "reward": 0.9642857313156128, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 55 }, { "completion_length": 157.35714721679688, "epoch": 0.03918824352694192, "grad_norm": 1.6096159219741821, "kl": 0.0004522771341726184, "learning_rate": 2.7722772277227726e-07, "loss": 0.0, "reward": 1.1428571939468384, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 56 }, { "completion_length": 195.85714721679688, "epoch": 0.03988803358992302, "grad_norm": 1.6588780879974365, "kl": 0.0005071864579804242, "learning_rate": 2.8217821782178215e-07, "loss": 0.0, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.2857142984867096, "step": 57 }, { "completion_length": 150.35714721679688, "epoch": 0.04058782365290413, "grad_norm": 1.7005771398544312, "kl": 0.0004665878077503294, "learning_rate": 2.871287128712871e-07, "loss": 0.0, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 58 }, { "completion_length": 178.7857208251953, "epoch": 0.041287613715885234, "grad_norm": 2.3391377925872803, "kl": 0.0005919806426391006, "learning_rate": 2.9207920792079203e-07, "loss": 0.0, "reward": 0.9642857313156128, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3214285969734192, "step": 59 }, { "completion_length": 124.28572082519531, "epoch": 0.04198740377886634, "grad_norm": 2.4664907455444336, "kl": 0.0005438023363240063, "learning_rate": 2.97029702970297e-07, "loss": 0.0, "reward": 1.1785714626312256, "reward_std": 0.3535534143447876, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 60 }, { "completion_length": 203.50001525878906, "epoch": 0.042687193841847444, "grad_norm": 1.642982840538025, "kl": 0.0005555747775360942, "learning_rate": 3.01980198019802e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 61 }, { "completion_length": 180.07144165039062, "epoch": 0.04338698390482855, "grad_norm": 2.22625732421875, "kl": 0.0005801984225399792, "learning_rate": 3.069306930693069e-07, "loss": 0.0, "reward": 1.071428656578064, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 62 }, { "completion_length": 227.50001525878906, "epoch": 0.044086773967809655, "grad_norm": 1.732594609260559, "kl": 0.0005832554306834936, "learning_rate": 3.1188118811881186e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 63 }, { "completion_length": 191.50001525878906, "epoch": 0.04478656403079076, "grad_norm": 1.6817090511322021, "kl": 0.000676915340591222, "learning_rate": 3.168316831683168e-07, "loss": 0.0, "reward": 0.7500000596046448, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.25, "step": 64 }, { "completion_length": 204.42857360839844, "epoch": 0.04548635409377187, "grad_norm": 1.8597685098648071, "kl": 0.00041905639227479696, "learning_rate": 3.217821782178218e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 65 }, { "completion_length": 201.92857360839844, "epoch": 0.04618614415675298, "grad_norm": 1.988457202911377, "kl": 0.00048096507089212537, "learning_rate": 3.2673267326732674e-07, "loss": 0.0, "reward": 1.25, "reward_std": 0.45456868410110474, "rewards/check_gptzero_func": 0.2857142984867096, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 66 }, { "completion_length": 210.1428680419922, "epoch": 0.04688593421973408, "grad_norm": 1.6554455757141113, "kl": 0.0006131522241048515, "learning_rate": 3.316831683168317e-07, "loss": 0.0, "reward": 0.8214285969734192, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3214285969734192, "step": 67 }, { "completion_length": 223.7857208251953, "epoch": 0.04758572428271519, "grad_norm": 1.6838781833648682, "kl": 0.0005825796979479492, "learning_rate": 3.3663366336633663e-07, "loss": 0.0, "reward": 1.0357143878936768, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 68 }, { "completion_length": 189.50001525878906, "epoch": 0.04828551434569629, "grad_norm": 1.3780592679977417, "kl": 0.0007338147843256593, "learning_rate": 3.415841584158416e-07, "loss": 0.0, "reward": 0.9642857313156128, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 69 }, { "completion_length": 173.2857208251953, "epoch": 0.0489853044086774, "grad_norm": 2.383615016937256, "kl": 0.0007332983077503741, "learning_rate": 3.465346534653465e-07, "loss": 0.0, "reward": 1.071428656578064, "reward_std": 0.4040610194206238, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 70 }, { "completion_length": 179.71429443359375, "epoch": 0.0496850944716585, "grad_norm": 1.2295047044754028, "kl": 0.0007355264970101416, "learning_rate": 3.5148514851485146e-07, "loss": 0.0, "reward": 1.0357143878936768, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 71 }, { "completion_length": 215.92857360839844, "epoch": 0.05038488453463961, "grad_norm": 0.9826344847679138, "kl": 0.0007876859162934124, "learning_rate": 3.564356435643564e-07, "loss": 0.0, "reward": 0.8928571939468384, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 72 }, { "completion_length": 151.42857360839844, "epoch": 0.05108467459762071, "grad_norm": 0.9193163514137268, "kl": 0.0007704061572439969, "learning_rate": 3.6138613861386135e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 73 }, { "completion_length": 201.6428680419922, "epoch": 0.05178446466060182, "grad_norm": 1.2143993377685547, "kl": 0.0008110264898277819, "learning_rate": 3.663366336633663e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 74 }, { "completion_length": 157.35714721679688, "epoch": 0.052484254723582924, "grad_norm": 1.1607396602630615, "kl": 0.0008065538131631911, "learning_rate": 3.712871287128713e-07, "loss": 0.0, "reward": 1.3571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.3571428656578064, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 75 }, { "completion_length": 194.50001525878906, "epoch": 0.05318404478656403, "grad_norm": 1.9686819314956665, "kl": 0.0010525076650083065, "learning_rate": 3.7623762376237623e-07, "loss": 0.0, "reward": 1.1785714626312256, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 76 }, { "completion_length": 153.6428680419922, "epoch": 0.053883834849545134, "grad_norm": 2.805495023727417, "kl": 0.0013437506277114153, "learning_rate": 3.811881188118812e-07, "loss": 0.0, "reward": 0.9642857313156128, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 77 }, { "completion_length": 257.64288330078125, "epoch": 0.05458362491252624, "grad_norm": 1.4481536149978638, "kl": 0.0011723138159140944, "learning_rate": 3.861386138613861e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 78 }, { "completion_length": 179.71429443359375, "epoch": 0.055283414975507345, "grad_norm": 1.9404147863388062, "kl": 0.0011452907929196954, "learning_rate": 3.9108910891089106e-07, "loss": 0.0, "reward": 1.1071429252624512, "reward_std": 0.3535534143447876, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 79 }, { "completion_length": 143.35714721679688, "epoch": 0.05598320503848846, "grad_norm": 1.6195811033248901, "kl": 0.0011998371919617057, "learning_rate": 3.9603960396039606e-07, "loss": 0.0, "reward": 1.2142857313156128, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.2857142984867096, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 80 }, { "completion_length": 193.57144165039062, "epoch": 0.05668299510146956, "grad_norm": 1.1084884405136108, "kl": 0.0011861013481393456, "learning_rate": 4.00990099009901e-07, "loss": 0.0, "reward": 0.9642857313156128, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 81 }, { "completion_length": 137.7857208251953, "epoch": 0.05738278516445067, "grad_norm": 2.3091042041778564, "kl": 0.0014914502389729023, "learning_rate": 4.0594059405940595e-07, "loss": 0.0, "reward": 1.1071429252624512, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 82 }, { "completion_length": 178.2857208251953, "epoch": 0.05808257522743177, "grad_norm": 1.342816710472107, "kl": 0.00154755765106529, "learning_rate": 4.1089108910891084e-07, "loss": 0.0, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 83 }, { "completion_length": 138.35714721679688, "epoch": 0.05878236529041288, "grad_norm": 0.0016529923304915428, "kl": 0.0018875404493883252, "learning_rate": 4.158415841584158e-07, "loss": 0.0, "reward": 1.1428571939468384, "reward_std": 0.0, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 84 }, { "completion_length": 148.7857208251953, "epoch": 0.05948215535339398, "grad_norm": 1.2848488092422485, "kl": 0.0015424591256305575, "learning_rate": 4.207920792079208e-07, "loss": 0.0, "reward": 1.1071429252624512, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 85 }, { "completion_length": 201.35714721679688, "epoch": 0.06018194541637509, "grad_norm": 1.8672930002212524, "kl": 0.0014850146835669875, "learning_rate": 4.257425742574257e-07, "loss": 0.0, "reward": 1.1785714626312256, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 86 }, { "completion_length": 166.57144165039062, "epoch": 0.06088173547935619, "grad_norm": 1.7195179462432861, "kl": 0.0020524458959698677, "learning_rate": 4.3069306930693066e-07, "loss": 0.0, "reward": 1.1428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 87 }, { "completion_length": 186.7857208251953, "epoch": 0.0615815255423373, "grad_norm": 2.001164674758911, "kl": 0.0017333102878183126, "learning_rate": 4.356435643564356e-07, "loss": 0.0, "reward": 1.0357143878936768, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 88 }, { "completion_length": 109.50000762939453, "epoch": 0.0622813156053184, "grad_norm": 2.169886827468872, "kl": 0.0017122991848737001, "learning_rate": 4.405940594059406e-07, "loss": 0.0, "reward": 1.1428571939468384, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 89 }, { "completion_length": 106.85714721679688, "epoch": 0.06298110566829951, "grad_norm": 1.6305705308914185, "kl": 0.002229567850008607, "learning_rate": 4.4554455445544555e-07, "loss": 0.0, "reward": 1.1071429252624512, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 90 }, { "completion_length": 208.85714721679688, "epoch": 0.06368089573128062, "grad_norm": 0.8611403107643127, "kl": 0.00243167020380497, "learning_rate": 4.504950495049505e-07, "loss": 0.0, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 91 }, { "completion_length": 179.7857208251953, "epoch": 0.06438068579426172, "grad_norm": 1.1815861463546753, "kl": 0.0024530005175620317, "learning_rate": 4.5544554455445543e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 92 }, { "completion_length": 210.50001525878906, "epoch": 0.06508047585724283, "grad_norm": 1.3900034427642822, "kl": 0.0018575751455500722, "learning_rate": 4.603960396039604e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 93 }, { "completion_length": 151.5, "epoch": 0.06578026592022393, "grad_norm": 0.001375512219965458, "kl": 0.0019482049392536283, "learning_rate": 4.6534653465346537e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 94 }, { "completion_length": 223.00001525878906, "epoch": 0.06648005598320504, "grad_norm": 1.664806842803955, "kl": 0.0027798519004136324, "learning_rate": 4.7029702970297026e-07, "loss": 0.0, "reward": 1.1785714626312256, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.2857142984867096, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 95 }, { "completion_length": 239.9285888671875, "epoch": 0.06717984604618614, "grad_norm": 0.9965423941612244, "kl": 0.002117543015629053, "learning_rate": 4.752475247524752e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 96 }, { "completion_length": 199.07144165039062, "epoch": 0.06787963610916725, "grad_norm": 1.2931371927261353, "kl": 0.0023884549736976624, "learning_rate": 4.801980198019802e-07, "loss": 0.0, "reward": 1.2142857313156128, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.2857142984867096, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 97 }, { "completion_length": 131.1428680419922, "epoch": 0.06857942617214835, "grad_norm": 1.7063499689102173, "kl": 0.004030201118439436, "learning_rate": 4.851485148514851e-07, "loss": 0.0, "reward": 1.0357143878936768, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 98 }, { "completion_length": 184.85714721679688, "epoch": 0.06927921623512946, "grad_norm": 2.1803157329559326, "kl": 0.005060242023319006, "learning_rate": 4.900990099009901e-07, "loss": 0.0, "reward": 0.9642857313156128, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.4642857313156128, "rewards/check_similarity_func": 0.4285714626312256, "step": 99 }, { "completion_length": 173.6428680419922, "epoch": 0.06997900629811056, "grad_norm": 0.7444345951080322, "kl": 0.002564522437751293, "learning_rate": 4.95049504950495e-07, "loss": 0.0, "reward": 0.9642857313156128, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.4642857313156128, "rewards/check_similarity_func": 0.4285714626312256, "step": 100 }, { "completion_length": 190.2857208251953, "epoch": 0.07067879636109167, "grad_norm": 1.9678312540054321, "kl": 0.004016941878944635, "learning_rate": 5e-07, "loss": 0.0, "reward": 1.1071429252624512, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 101 }, { "completion_length": 172.6428680419922, "epoch": 0.07137858642407278, "grad_norm": 0.8835659623146057, "kl": 0.004802301526069641, "learning_rate": 4.999993004586259e-07, "loss": 0.0, "reward": 1.1071429252624512, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 102 }, { "completion_length": 218.1428680419922, "epoch": 0.07207837648705388, "grad_norm": 1.698517918586731, "kl": 0.005469852592796087, "learning_rate": 4.999972018384183e-07, "loss": 0.0, "reward": 0.9642857313156128, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 103 }, { "completion_length": 184.92857360839844, "epoch": 0.072778166550035, "grad_norm": 0.6785391569137573, "kl": 0.004125654697418213, "learning_rate": 4.999937041511218e-07, "loss": 0.0, "reward": 1.2142857313156128, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 104 }, { "completion_length": 203.92857360839844, "epoch": 0.0734779566130161, "grad_norm": 0.7557395696640015, "kl": 0.0034580158535391092, "learning_rate": 4.999888074163107e-07, "loss": 0.0, "reward": 1.2857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.3571428656578064, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 105 }, { "completion_length": 206.21429443359375, "epoch": 0.0741777466759972, "grad_norm": 0.9162557721138, "kl": 0.004212158732116222, "learning_rate": 4.999825116613887e-07, "loss": 0.0, "reward": 1.071428656578064, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 106 }, { "completion_length": 140.07144165039062, "epoch": 0.0748775367389783, "grad_norm": 1.6296213865280151, "kl": 0.005939202383160591, "learning_rate": 4.999748169215891e-07, "loss": 0.0, "reward": 1.2857143878936768, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.2857142984867096, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 107 }, { "completion_length": 136.1428680419922, "epoch": 0.07557732680195942, "grad_norm": 1.8523602485656738, "kl": 0.011265984736382961, "learning_rate": 4.999657232399738e-07, "loss": 0.0, "reward": 1.1785714626312256, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.2857142984867096, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 108 }, { "completion_length": 140.7857208251953, "epoch": 0.07627711686494051, "grad_norm": 0.004031853750348091, "kl": 0.00854254886507988, "learning_rate": 4.999552306674344e-07, "loss": 0.0, "reward": 1.2857143878936768, "reward_std": 0.0, "rewards/check_gptzero_func": 0.2857142984867096, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 109 }, { "completion_length": 223.07144165039062, "epoch": 0.07697690692792163, "grad_norm": 1.1062513589859009, "kl": 0.005622380878776312, "learning_rate": 4.999433392626907e-07, "loss": 0.0, "reward": 1.1071429252624512, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 110 }, { "completion_length": 225.71429443359375, "epoch": 0.07767669699090272, "grad_norm": 0.0022427733056247234, "kl": 0.004698066506534815, "learning_rate": 4.999300490922909e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 111 }, { "completion_length": 183.85714721679688, "epoch": 0.07837648705388384, "grad_norm": 1.156471848487854, "kl": 0.007482955232262611, "learning_rate": 4.999153602306114e-07, "loss": 0.0, "reward": 1.2857143878936768, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.2857142984867096, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 112 }, { "completion_length": 152.71429443359375, "epoch": 0.07907627711686493, "grad_norm": 1.6487705707550049, "kl": 0.007868105545639992, "learning_rate": 4.998992727598557e-07, "loss": 0.0, "reward": 1.1785714626312256, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 113 }, { "completion_length": 168.21429443359375, "epoch": 0.07977606717984605, "grad_norm": 1.6021904945373535, "kl": 0.007955645211040974, "learning_rate": 4.998817867700546e-07, "loss": 0.0, "reward": 1.4285714626312256, "reward_std": 0.4040610194206238, "rewards/check_gptzero_func": 0.4285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 114 }, { "completion_length": 162.42857360839844, "epoch": 0.08047585724282715, "grad_norm": 1.8559244871139526, "kl": 0.008736505173146725, "learning_rate": 4.998629023590656e-07, "loss": 0.0, "reward": 1.3571429252624512, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.3571428656578064, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 115 }, { "completion_length": 198.7857208251953, "epoch": 0.08117564730580826, "grad_norm": 0.8295167088508606, "kl": 0.006919287610799074, "learning_rate": 4.99842619632572e-07, "loss": 0.0, "reward": 1.1071429252624512, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 116 }, { "completion_length": 188.00001525878906, "epoch": 0.08187543736878937, "grad_norm": 1.394171118736267, "kl": 0.009634546004235744, "learning_rate": 4.998209387040828e-07, "loss": 0.0, "reward": 1.1071429252624512, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 117 }, { "completion_length": 160.35714721679688, "epoch": 0.08257522743177047, "grad_norm": 1.560783863067627, "kl": 0.008878177031874657, "learning_rate": 4.997978596949316e-07, "loss": 0.0, "reward": 1.071428656578064, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 118 }, { "completion_length": 135.92857360839844, "epoch": 0.08327501749475158, "grad_norm": 2.2454240322113037, "kl": 0.010685044340789318, "learning_rate": 4.997733827342762e-07, "loss": 0.0, "reward": 1.5357143878936768, "reward_std": 0.3535533845424652, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 119 }, { "completion_length": 215.50001525878906, "epoch": 0.08397480755773268, "grad_norm": 1.7073709964752197, "kl": 0.008179904893040657, "learning_rate": 4.997475079590977e-07, "loss": 0.0, "reward": 1.1071429252624512, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 120 }, { "completion_length": 229.07144165039062, "epoch": 0.08467459762071379, "grad_norm": 1.7980595827102661, "kl": 0.00724398996680975, "learning_rate": 4.997202355141999e-07, "loss": 0.0, "reward": 1.25, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.2857142984867096, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 121 }, { "completion_length": 169.35714721679688, "epoch": 0.08537438768369489, "grad_norm": 1.6071653366088867, "kl": 0.010347086936235428, "learning_rate": 4.996915655522085e-07, "loss": 0.0, "reward": 1.571428656578064, "reward_std": 0.4040610194206238, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 122 }, { "completion_length": 166.21429443359375, "epoch": 0.086074177746676, "grad_norm": 2.336454391479492, "kl": 0.00916281808167696, "learning_rate": 4.9966149823357e-07, "loss": 0.0, "reward": 1.1785714626312256, "reward_std": 0.3535534143447876, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 123 }, { "completion_length": 145.2857208251953, "epoch": 0.0867739678096571, "grad_norm": 1.3544882535934448, "kl": 0.011103290133178234, "learning_rate": 4.996300337265512e-07, "loss": 0.0, "reward": 1.0357143878936768, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 124 }, { "completion_length": 199.6428680419922, "epoch": 0.08747375787263821, "grad_norm": 0.59739089012146, "kl": 0.012787672691047192, "learning_rate": 4.995971722072378e-07, "loss": 0.0, "reward": 1.321428656578064, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.4285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 125 }, { "completion_length": 179.07144165039062, "epoch": 0.08817354793561931, "grad_norm": 1.5247255563735962, "kl": 0.012628079392015934, "learning_rate": 4.995629138595338e-07, "loss": 0.0, "reward": 1.3571429252624512, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.3571428656578064, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 126 }, { "completion_length": 164.7857208251953, "epoch": 0.08887333799860042, "grad_norm": 1.0241918563842773, "kl": 0.011134312488138676, "learning_rate": 4.995272588751601e-07, "loss": 0.0, "reward": 1.1428571939468384, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 127 }, { "completion_length": 133.85714721679688, "epoch": 0.08957312806158152, "grad_norm": 1.0907318592071533, "kl": 0.012002574279904366, "learning_rate": 4.99490207453654e-07, "loss": 0.0, "reward": 1.2142857313156128, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 128 }, { "completion_length": 165.35714721679688, "epoch": 0.09027291812456263, "grad_norm": 0.006122582592070103, "kl": 0.017770450562238693, "learning_rate": 4.994517598023674e-07, "loss": 0.0, "reward": 1.2857143878936768, "reward_std": 0.0, "rewards/check_gptzero_func": 0.2857142984867096, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 129 }, { "completion_length": 205.07144165039062, "epoch": 0.09097270818754374, "grad_norm": 0.608751654624939, "kl": 0.014130090363323689, "learning_rate": 4.994119161364662e-07, "loss": 0.0, "reward": 1.3571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.3571428656578064, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 130 }, { "completion_length": 152.21429443359375, "epoch": 0.09167249825052484, "grad_norm": 1.5517243146896362, "kl": 0.01926426962018013, "learning_rate": 4.993706766789286e-07, "loss": 0.0, "reward": 1.2857143878936768, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.2857142984867096, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 131 }, { "completion_length": 184.92857360839844, "epoch": 0.09237228831350595, "grad_norm": 1.0248626470565796, "kl": 0.013803663663566113, "learning_rate": 4.993280416605443e-07, "loss": 0.0, "reward": 1.0357143878936768, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 132 }, { "completion_length": 144.92857360839844, "epoch": 0.09307207837648705, "grad_norm": 1.47726571559906, "kl": 0.023111265152692795, "learning_rate": 4.99284011319913e-07, "loss": 0.0, "reward": 1.25, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.3571428656578064, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 133 }, { "completion_length": 179.85714721679688, "epoch": 0.09377186843946816, "grad_norm": 1.1501293182373047, "kl": 0.018620293587446213, "learning_rate": 4.99238585903443e-07, "loss": 0.0, "reward": 1.4642857313156128, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.5, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 134 }, { "completion_length": 166.0, "epoch": 0.09447165850244926, "grad_norm": 0.5995805263519287, "kl": 0.0225803442299366, "learning_rate": 4.9919176566535e-07, "loss": 0.0, "reward": 1.3571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.3571428656578064, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 135 }, { "completion_length": 199.50001525878906, "epoch": 0.09517144856543037, "grad_norm": 0.0038320920430123806, "kl": 0.014900755137205124, "learning_rate": 4.991435508676557e-07, "loss": 0.0, "reward": 1.4285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 0.4285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 136 }, { "completion_length": 181.50001525878906, "epoch": 0.09587123862841147, "grad_norm": 1.0822861194610596, "kl": 0.0184601079672575, "learning_rate": 4.990939417801858e-07, "loss": 0.0, "reward": 1.2857143878936768, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.2857142984867096, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 137 }, { "completion_length": 215.2857208251953, "epoch": 0.09657102869139259, "grad_norm": 0.6916272044181824, "kl": 0.013206357136368752, "learning_rate": 4.990429386805694e-07, "loss": 0.0, "reward": 1.3571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.3571428656578064, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 138 }, { "completion_length": 138.42857360839844, "epoch": 0.09727081875437368, "grad_norm": 0.4737311899662018, "kl": 0.0251768771559, "learning_rate": 4.989905418542365e-07, "loss": 0.0, "reward": 1.3571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.3571428656578064, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 139 }, { "completion_length": 150.6428680419922, "epoch": 0.0979706088173548, "grad_norm": 1.9129955768585205, "kl": 0.023285748437047005, "learning_rate": 4.989367515944172e-07, "loss": 0.0, "reward": 1.4285714626312256, "reward_std": 0.4040610194206238, "rewards/check_gptzero_func": 0.4285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 140 }, { "completion_length": 142.92857360839844, "epoch": 0.0986703988803359, "grad_norm": 0.6981713771820068, "kl": 0.030149009078741074, "learning_rate": 4.988815682021397e-07, "loss": 0.0, "reward": 1.1428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 141 }, { "completion_length": 253.21429443359375, "epoch": 0.099370188943317, "grad_norm": 1.671301245689392, "kl": 0.019632816314697266, "learning_rate": 4.988249919862284e-07, "loss": 0.0, "reward": 1.3928571939468384, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.4285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 142 }, { "completion_length": 157.07144165039062, "epoch": 0.1000699790062981, "grad_norm": 0.5066415071487427, "kl": 0.027261968702077866, "learning_rate": 4.987670232633026e-07, "loss": 0.0, "reward": 1.5357143878936768, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 143 }, { "completion_length": 166.71429443359375, "epoch": 0.10076976906927922, "grad_norm": 0.006843244656920433, "kl": 0.027247583493590355, "learning_rate": 4.987076623577744e-07, "loss": 0.0, "reward": 1.4285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 0.4285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 144 }, { "completion_length": 166.5, "epoch": 0.10146955913226033, "grad_norm": 0.007074953056871891, "kl": 0.02989049255847931, "learning_rate": 4.986469096018472e-07, "loss": 0.0, "reward": 1.571428656578064, "reward_std": 0.0, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 145 }, { "completion_length": 168.35714721679688, "epoch": 0.10216934919524143, "grad_norm": 1.1491600275039673, "kl": 0.018468284979462624, "learning_rate": 4.985847653355133e-07, "loss": 0.0, "reward": 1.2857143878936768, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.2857142984867096, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 146 }, { "completion_length": 143.57144165039062, "epoch": 0.10286913925822254, "grad_norm": 1.1063581705093384, "kl": 0.037655703723430634, "learning_rate": 4.985212299065528e-07, "loss": 0.0, "reward": 1.3571429252624512, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.3571428656578064, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 147 }, { "completion_length": 159.92857360839844, "epoch": 0.10356892932120364, "grad_norm": 0.9509813189506531, "kl": 0.028517942875623703, "learning_rate": 4.984563036705308e-07, "loss": 0.0, "reward": 1.321428656578064, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.4285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 148 }, { "completion_length": 158.5, "epoch": 0.10426871938418475, "grad_norm": 1.3582109212875366, "kl": 0.023688364773988724, "learning_rate": 4.983899869907962e-07, "loss": 0.0, "reward": 1.4642857313156128, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.5, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 149 }, { "completion_length": 135.2857208251953, "epoch": 0.10496850944716585, "grad_norm": 0.8189286589622498, "kl": 0.0315164253115654, "learning_rate": 4.98322280238479e-07, "loss": 0.0, "reward": 1.3571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.3571428656578064, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 150 }, { "completion_length": 127.5714340209961, "epoch": 0.10566829951014696, "grad_norm": 1.6647231578826904, "kl": 0.04224463179707527, "learning_rate": 4.982531837924886e-07, "loss": 0.0, "reward": 1.1071429252624512, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 151 }, { "completion_length": 188.1428680419922, "epoch": 0.10636808957312806, "grad_norm": 1.4539200067520142, "kl": 0.02859153226017952, "learning_rate": 4.981826980395116e-07, "loss": 0.0, "reward": 1.4285714626312256, "reward_std": 0.4040610194206238, "rewards/check_gptzero_func": 0.4285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 152 }, { "completion_length": 151.6428680419922, "epoch": 0.10706787963610917, "grad_norm": 1.0444366931915283, "kl": 0.03756333515048027, "learning_rate": 4.981108233740095e-07, "loss": 0.0, "reward": 1.2857143878936768, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.2857142984867096, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 153 }, { "completion_length": 133.2857208251953, "epoch": 0.10776766969909027, "grad_norm": 2.0720365047454834, "kl": 0.06380664557218552, "learning_rate": 4.98037560198217e-07, "loss": 0.0001, "reward": 1.4642857313156128, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.5, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 154 }, { "completion_length": 176.1428680419922, "epoch": 0.10846745976207138, "grad_norm": 1.2341344356536865, "kl": 0.15105512738227844, "learning_rate": 4.979629089221387e-07, "loss": 0.0002, "reward": 1.3571429252624512, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.3571428656578064, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 155 }, { "completion_length": 196.35714721679688, "epoch": 0.10916724982505248, "grad_norm": 1.0107539892196655, "kl": 0.03982974961400032, "learning_rate": 4.978868699635481e-07, "loss": 0.0, "reward": 1.4285714626312256, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.4285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 156 }, { "completion_length": 184.21429443359375, "epoch": 0.10986703988803359, "grad_norm": 1.5776468515396118, "kl": 0.03241683542728424, "learning_rate": 4.978094437479843e-07, "loss": 0.0, "reward": 1.4642857313156128, "reward_std": 0.3535533845424652, "rewards/check_gptzero_func": 0.5, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 157 }, { "completion_length": 159.1428680419922, "epoch": 0.11056682995101469, "grad_norm": 1.1419658660888672, "kl": 0.04663442075252533, "learning_rate": 4.977306307087501e-07, "loss": 0.0, "reward": 1.071428656578064, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 158 }, { "completion_length": 182.50001525878906, "epoch": 0.1112666200139958, "grad_norm": 0.008137581869959831, "kl": 0.037541355937719345, "learning_rate": 4.976504312869092e-07, "loss": 0.0, "reward": 1.4285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 159 }, { "completion_length": 130.85714721679688, "epoch": 0.11196641007697691, "grad_norm": 1.1652930974960327, "kl": 0.038936153054237366, "learning_rate": 4.975688459312843e-07, "loss": 0.0, "reward": 1.2857143878936768, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.2857142984867096, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 160 }, { "completion_length": 233.9285888671875, "epoch": 0.11266620013995801, "grad_norm": 0.7469726800918579, "kl": 0.0383068211376667, "learning_rate": 4.974858750984539e-07, "loss": 0.0, "reward": 1.571428656578064, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 161 }, { "completion_length": 174.21429443359375, "epoch": 0.11336599020293912, "grad_norm": 0.005710199940949678, "kl": 0.03846415504813194, "learning_rate": 4.974015192527505e-07, "loss": 0.0, "reward": 1.571428656578064, "reward_std": 0.0, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 162 }, { "completion_length": 122.21429443359375, "epoch": 0.11406578026592022, "grad_norm": 1.5605430603027344, "kl": 0.056271836161613464, "learning_rate": 4.973157788662569e-07, "loss": 0.0001, "reward": 1.5000001192092896, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.5, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 163 }, { "completion_length": 179.92857360839844, "epoch": 0.11476557032890133, "grad_norm": 0.851883590221405, "kl": 0.04005267098546028, "learning_rate": 4.972286544188051e-07, "loss": 0.0, "reward": 1.4642857313156128, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.5, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 164 }, { "completion_length": 184.2857208251953, "epoch": 0.11546536039188243, "grad_norm": 0.937244713306427, "kl": 0.05456297844648361, "learning_rate": 4.971401463979721e-07, "loss": 0.0001, "reward": 1.3571429252624512, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.3571428656578064, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 165 }, { "completion_length": 182.35714721679688, "epoch": 0.11616515045486354, "grad_norm": 1.4655040502548218, "kl": 0.04574892669916153, "learning_rate": 4.970502552990782e-07, "loss": 0.0, "reward": 1.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 166 }, { "completion_length": 154.7857208251953, "epoch": 0.11686494051784464, "grad_norm": 0.98805171251297, "kl": 0.04196275398135185, "learning_rate": 4.969589816251836e-07, "loss": 0.0, "reward": 1.7142858505249023, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 167 }, { "completion_length": 114.28572082519531, "epoch": 0.11756473058082575, "grad_norm": 0.013440999202430248, "kl": 0.06862294673919678, "learning_rate": 4.968663258870862e-07, "loss": 0.0001, "reward": 1.2142857313156128, "reward_std": 0.0, "rewards/check_gptzero_func": 0.2857142984867096, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 168 }, { "completion_length": 160.0, "epoch": 0.11826452064380685, "grad_norm": 1.003117322921753, "kl": 0.050952788442373276, "learning_rate": 4.96772288603318e-07, "loss": 0.0001, "reward": 1.5000001192092896, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.5, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 169 }, { "completion_length": 172.85714721679688, "epoch": 0.11896431070678797, "grad_norm": 0.3903418481349945, "kl": 0.042292170226573944, "learning_rate": 4.966768703001428e-07, "loss": 0.0, "reward": 1.3571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.3571428656578064, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 170 }, { "completion_length": 142.92857360839844, "epoch": 0.11966410076976906, "grad_norm": 1.6304961442947388, "kl": 0.0691414475440979, "learning_rate": 4.965800715115531e-07, "loss": 0.0001, "reward": 1.4285714626312256, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.5, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 171 }, { "completion_length": 176.35714721679688, "epoch": 0.12036389083275018, "grad_norm": 1.1992915868759155, "kl": 0.05326082184910774, "learning_rate": 4.964818927792668e-07, "loss": 0.0001, "reward": 1.321428656578064, "reward_std": 0.3535533845424652, "rewards/check_gptzero_func": 0.3571428656578064, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 172 }, { "completion_length": 191.6428680419922, "epoch": 0.12106368089573127, "grad_norm": 0.2780166566371918, "kl": 0.03764572739601135, "learning_rate": 4.963823346527248e-07, "loss": 0.0, "reward": 1.2142857313156128, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 173 }, { "completion_length": 200.7857208251953, "epoch": 0.12176347095871239, "grad_norm": 0.6391341686248779, "kl": 0.04579555243253708, "learning_rate": 4.962813976890871e-07, "loss": 0.0, "reward": 1.4642857313156128, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.5, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 174 }, { "completion_length": 136.07144165039062, "epoch": 0.1224632610216935, "grad_norm": 1.8551373481750488, "kl": 0.06963112205266953, "learning_rate": 4.961790824532305e-07, "loss": 0.0001, "reward": 1.5357143878936768, "reward_std": 0.3535533845424652, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 175 }, { "completion_length": 183.71429443359375, "epoch": 0.1231630510846746, "grad_norm": 1.095022201538086, "kl": 0.040811192244291306, "learning_rate": 4.96075389517745e-07, "loss": 0.0, "reward": 1.3571429252624512, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.3571428656578064, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 176 }, { "completion_length": 165.7857208251953, "epoch": 0.12386284114765571, "grad_norm": 0.8499320149421692, "kl": 0.028491254895925522, "learning_rate": 4.959703194629303e-07, "loss": 0.0, "reward": 1.7857143878936768, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 177 }, { "completion_length": 181.7857208251953, "epoch": 0.1245626312106368, "grad_norm": 1.5815658569335938, "kl": 0.04530472680926323, "learning_rate": 4.958638728767936e-07, "loss": 0.0, "reward": 1.25, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.2857142984867096, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 178 }, { "completion_length": 207.7857208251953, "epoch": 0.12526242127361792, "grad_norm": 0.6339020133018494, "kl": 0.03002651035785675, "learning_rate": 4.957560503550449e-07, "loss": 0.0, "reward": 1.3571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.3571428656578064, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 179 }, { "completion_length": 153.57144165039062, "epoch": 0.12596221133659902, "grad_norm": 0.7833738923072815, "kl": 0.052023474127054214, "learning_rate": 4.956468525010947e-07, "loss": 0.0001, "reward": 1.3571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.3571428656578064, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 180 }, { "completion_length": 140.6428680419922, "epoch": 0.12666200139958012, "grad_norm": 0.9095503687858582, "kl": 0.06118329241871834, "learning_rate": 4.955362799260506e-07, "loss": 0.0001, "reward": 1.321428656578064, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.3571428656578064, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 181 }, { "completion_length": 182.92857360839844, "epoch": 0.12736179146256124, "grad_norm": 1.4771007299423218, "kl": 0.0452856682240963, "learning_rate": 4.954243332487132e-07, "loss": 0.0, "reward": 1.5357143878936768, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 182 }, { "completion_length": 151.21429443359375, "epoch": 0.12806158152554234, "grad_norm": 1.207655429840088, "kl": 0.04300834611058235, "learning_rate": 4.953110130955732e-07, "loss": 0.0, "reward": 1.3928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.5, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 183 }, { "completion_length": 167.2857208251953, "epoch": 0.12876137158852344, "grad_norm": 1.3498799800872803, "kl": 0.049103204160928726, "learning_rate": 4.951963201008075e-07, "loss": 0.0, "reward": 1.3571429252624512, "reward_std": 0.4040610194206238, "rewards/check_gptzero_func": 0.4285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 184 }, { "completion_length": 168.2857208251953, "epoch": 0.12946116165150454, "grad_norm": 1.1723055839538574, "kl": 0.038598768413066864, "learning_rate": 4.950802549062763e-07, "loss": 0.0, "reward": 1.1785714626312256, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 185 }, { "completion_length": 183.85714721679688, "epoch": 0.13016095171448566, "grad_norm": 0.9584428668022156, "kl": 0.03624965623021126, "learning_rate": 4.949628181615188e-07, "loss": 0.0, "reward": 1.5000001192092896, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.5, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 186 }, { "completion_length": 155.07144165039062, "epoch": 0.13086074177746676, "grad_norm": 0.5523901581764221, "kl": 0.05786675587296486, "learning_rate": 4.948440105237498e-07, "loss": 0.0001, "reward": 1.3571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.3571428656578064, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 187 }, { "completion_length": 185.57144165039062, "epoch": 0.13156053184044786, "grad_norm": 0.6750544309616089, "kl": 0.03796740248799324, "learning_rate": 4.947238326578562e-07, "loss": 0.0, "reward": 1.571428656578064, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 188 }, { "completion_length": 172.57144165039062, "epoch": 0.13226032190342898, "grad_norm": 1.2151705026626587, "kl": 0.04739536717534065, "learning_rate": 4.946022852363932e-07, "loss": 0.0, "reward": 1.3928571939468384, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.4285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 189 }, { "completion_length": 172.50001525878906, "epoch": 0.13296011196641008, "grad_norm": 1.22635817527771, "kl": 0.06361626088619232, "learning_rate": 4.944793689395802e-07, "loss": 0.0001, "reward": 1.2142857313156128, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 190 }, { "completion_length": 154.21429443359375, "epoch": 0.13365990202939118, "grad_norm": 1.5272878408432007, "kl": 0.04575209692120552, "learning_rate": 4.943550844552977e-07, "loss": 0.0, "reward": 1.3928571939468384, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.4285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 191 }, { "completion_length": 169.85714721679688, "epoch": 0.13435969209237228, "grad_norm": 0.8681791424751282, "kl": 0.0537167564034462, "learning_rate": 4.942294324790827e-07, "loss": 0.0001, "reward": 1.4285714626312256, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.4285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 192 }, { "completion_length": 196.6428680419922, "epoch": 0.1350594821553534, "grad_norm": 0.9454683065414429, "kl": 0.06295499950647354, "learning_rate": 4.941024137141252e-07, "loss": 0.0001, "reward": 1.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 193 }, { "completion_length": 152.92857360839844, "epoch": 0.1357592722183345, "grad_norm": 0.7572140097618103, "kl": 0.055502425879240036, "learning_rate": 4.939740288712644e-07, "loss": 0.0001, "reward": 1.5000001192092896, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.5, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 194 }, { "completion_length": 154.21429443359375, "epoch": 0.1364590622813156, "grad_norm": 0.005600570701062679, "kl": 0.049493707716464996, "learning_rate": 4.938442786689842e-07, "loss": 0.0, "reward": 1.571428656578064, "reward_std": 0.0, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 195 }, { "completion_length": 176.1428680419922, "epoch": 0.1371588523442967, "grad_norm": 1.0913110971450806, "kl": 0.06787514686584473, "learning_rate": 4.937131638334099e-07, "loss": 0.0001, "reward": 1.6428571939468384, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 196 }, { "completion_length": 167.92857360839844, "epoch": 0.13785864240727783, "grad_norm": 0.005277193617075682, "kl": 0.04347432777285576, "learning_rate": 4.935806850983033e-07, "loss": 0.0, "reward": 1.4285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 0.4285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 197 }, { "completion_length": 168.5, "epoch": 0.13855843247025892, "grad_norm": 0.96235591173172, "kl": 0.0519852377474308, "learning_rate": 4.934468432050594e-07, "loss": 0.0001, "reward": 1.5357143878936768, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 198 }, { "completion_length": 160.7857208251953, "epoch": 0.13925822253324002, "grad_norm": 1.047194242477417, "kl": 0.06388276815414429, "learning_rate": 4.933116389027016e-07, "loss": 0.0001, "reward": 1.5000001192092896, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.5, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 199 }, { "completion_length": 138.7857208251953, "epoch": 0.13995801259622112, "grad_norm": 0.009848758578300476, "kl": 0.061549946665763855, "learning_rate": 4.93175072947878e-07, "loss": 0.0001, "reward": 1.4285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 0.4285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 200 }, { "completion_length": 157.2857208251953, "epoch": 0.14065780265920225, "grad_norm": 0.4332066476345062, "kl": 0.08204694092273712, "learning_rate": 4.93037146104857e-07, "loss": 0.0001, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 201 }, { "completion_length": 225.85714721679688, "epoch": 0.14135759272218335, "grad_norm": 0.9557908177375793, "kl": 0.03517528995871544, "learning_rate": 4.928978591455227e-07, "loss": 0.0, "reward": 1.5000001192092896, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.5, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 202 }, { "completion_length": 155.0, "epoch": 0.14205738278516444, "grad_norm": 0.004708544816821814, "kl": 0.055276378989219666, "learning_rate": 4.927572128493712e-07, "loss": 0.0001, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 203 }, { "completion_length": 141.35714721679688, "epoch": 0.14275717284814557, "grad_norm": 0.4613928496837616, "kl": 0.0727616399526596, "learning_rate": 4.926152080035054e-07, "loss": 0.0001, "reward": 1.2857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.3571428656578064, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 204 }, { "completion_length": 110.35714721679688, "epoch": 0.14345696291112667, "grad_norm": 0.7493223547935486, "kl": 0.09131006896495819, "learning_rate": 4.924718454026318e-07, "loss": 0.0001, "reward": 1.571428656578064, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 205 }, { "completion_length": 192.21429443359375, "epoch": 0.14415675297410777, "grad_norm": 0.6739380359649658, "kl": 0.029740670695900917, "learning_rate": 4.923271258490547e-07, "loss": 0.0, "reward": 1.6428571939468384, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 206 }, { "completion_length": 135.42857360839844, "epoch": 0.14485654303708886, "grad_norm": 0.012454037554562092, "kl": 0.07708840817213058, "learning_rate": 4.921810501526727e-07, "loss": 0.0001, "reward": 1.2857143878936768, "reward_std": 0.0, "rewards/check_gptzero_func": 0.4285714626312256, "rewards/check_language_reward_func": 0.4285714626312256, "rewards/check_similarity_func": 0.4285714626312256, "step": 207 }, { "completion_length": 171.2857208251953, "epoch": 0.14555633310007, "grad_norm": 0.7572370767593384, "kl": 0.05998237058520317, "learning_rate": 4.920336191309738e-07, "loss": 0.0001, "reward": 1.2142857313156128, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.2857142984867096, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 208 }, { "completion_length": 249.57144165039062, "epoch": 0.1462561231630511, "grad_norm": 0.5551992058753967, "kl": 0.04767899215221405, "learning_rate": 4.918848336090309e-07, "loss": 0.0, "reward": 1.6071429252624512, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 209 }, { "completion_length": 137.07144165039062, "epoch": 0.1469559132260322, "grad_norm": 1.272829294204712, "kl": 0.09460750222206116, "learning_rate": 4.917346944194967e-07, "loss": 0.0001, "reward": 1.571428656578064, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 210 }, { "completion_length": 151.5, "epoch": 0.14765570328901328, "grad_norm": 1.102591633796692, "kl": 0.05412725359201431, "learning_rate": 4.915832024026001e-07, "loss": 0.0001, "reward": 1.4285714626312256, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.4285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 211 }, { "completion_length": 200.2857208251953, "epoch": 0.1483554933519944, "grad_norm": 0.8212209939956665, "kl": 0.05087078735232353, "learning_rate": 4.914303584061404e-07, "loss": 0.0001, "reward": 1.6428571939468384, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 212 }, { "completion_length": 170.71429443359375, "epoch": 0.1490552834149755, "grad_norm": 1.220133662223816, "kl": 0.044998493045568466, "learning_rate": 4.912761632854832e-07, "loss": 0.0, "reward": 1.5000001192092896, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.5, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 213 }, { "completion_length": 147.7857208251953, "epoch": 0.1497550734779566, "grad_norm": 0.5329028964042664, "kl": 0.047548793256282806, "learning_rate": 4.911206179035556e-07, "loss": 0.0, "reward": 1.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 214 }, { "completion_length": 154.42857360839844, "epoch": 0.1504548635409377, "grad_norm": 1.063930869102478, "kl": 0.06235465779900551, "learning_rate": 4.90963723130841e-07, "loss": 0.0001, "reward": 1.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 215 }, { "completion_length": 113.92857360839844, "epoch": 0.15115465360391883, "grad_norm": 0.7831299901008606, "kl": 0.05324404314160347, "learning_rate": 4.908054798453742e-07, "loss": 0.0001, "reward": 1.2857143878936768, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.4285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 216 }, { "completion_length": 179.7857208251953, "epoch": 0.15185444366689993, "grad_norm": 0.8284366130828857, "kl": 0.04782997816801071, "learning_rate": 4.906458889327374e-07, "loss": 0.0, "reward": 1.1785714626312256, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 217 }, { "completion_length": 216.42857360839844, "epoch": 0.15255423372988103, "grad_norm": 0.31528374552726746, "kl": 0.03577737882733345, "learning_rate": 4.90484951286054e-07, "loss": 0.0, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 218 }, { "completion_length": 197.00001525878906, "epoch": 0.15325402379286215, "grad_norm": 0.59961998462677, "kl": 0.054736871272325516, "learning_rate": 4.903226678059841e-07, "loss": 0.0001, "reward": 1.3571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.3571428656578064, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 219 }, { "completion_length": 185.71429443359375, "epoch": 0.15395381385584325, "grad_norm": 0.8938444256782532, "kl": 0.04518384858965874, "learning_rate": 4.901590394007201e-07, "loss": 0.0, "reward": 1.321428656578064, "reward_std": 0.3535534143447876, "rewards/check_gptzero_func": 0.3571428656578064, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 220 }, { "completion_length": 129.35714721679688, "epoch": 0.15465360391882435, "grad_norm": 2.63861083984375, "kl": 0.10122643411159515, "learning_rate": 4.899940669859807e-07, "loss": 0.0001, "reward": 1.4285714626312256, "reward_std": 0.5050762891769409, "rewards/check_gptzero_func": 0.5, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 221 }, { "completion_length": 140.7857208251953, "epoch": 0.15535339398180545, "grad_norm": 1.2665458917617798, "kl": 0.07523010671138763, "learning_rate": 4.898277514850059e-07, "loss": 0.0001, "reward": 1.4285714626312256, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.5, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 222 }, { "completion_length": 147.35714721679688, "epoch": 0.15605318404478657, "grad_norm": 0.6549923419952393, "kl": 0.06474055349826813, "learning_rate": 4.896600938285525e-07, "loss": 0.0001, "reward": 1.3571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.3571428656578064, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 223 }, { "completion_length": 150.6428680419922, "epoch": 0.15675297410776767, "grad_norm": 0.9126837253570557, "kl": 0.05601576715707779, "learning_rate": 4.894910949548883e-07, "loss": 0.0001, "reward": 1.571428656578064, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 224 }, { "completion_length": 183.85714721679688, "epoch": 0.15745276417074877, "grad_norm": 0.7506590485572815, "kl": 0.05505940318107605, "learning_rate": 4.893207558097866e-07, "loss": 0.0001, "reward": 1.5000001192092896, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.5, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 225 }, { "completion_length": 158.1428680419922, "epoch": 0.15815255423372987, "grad_norm": 0.005780498962849379, "kl": 0.05622350424528122, "learning_rate": 4.89149077346522e-07, "loss": 0.0001, "reward": 1.2857143878936768, "reward_std": 0.0, "rewards/check_gptzero_func": 0.2857142984867096, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 226 }, { "completion_length": 164.1428680419922, "epoch": 0.158852344296711, "grad_norm": 0.0037532849237322807, "kl": 0.05521821975708008, "learning_rate": 4.889760605258638e-07, "loss": 0.0001, "reward": 1.571428656578064, "reward_std": 0.0, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 227 }, { "completion_length": 133.07144165039062, "epoch": 0.1595521343596921, "grad_norm": 0.5637698173522949, "kl": 0.06298340857028961, "learning_rate": 4.888017063160714e-07, "loss": 0.0001, "reward": 1.3571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.3571428656578064, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 228 }, { "completion_length": 94.71428680419922, "epoch": 0.1602519244226732, "grad_norm": 1.2822608947753906, "kl": 0.1722114235162735, "learning_rate": 4.886260156928887e-07, "loss": 0.0002, "reward": 1.7142858505249023, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 229 }, { "completion_length": 189.85714721679688, "epoch": 0.1609517144856543, "grad_norm": 0.3507073223590851, "kl": 0.03131990507245064, "learning_rate": 4.884489896395388e-07, "loss": 0.0, "reward": 1.2142857313156128, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 230 }, { "completion_length": 157.35714721679688, "epoch": 0.16165150454863542, "grad_norm": 0.014316967688500881, "kl": 0.06276115030050278, "learning_rate": 4.882706291467177e-07, "loss": 0.0001, "reward": 1.571428656578064, "reward_std": 0.0, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 231 }, { "completion_length": 144.07144165039062, "epoch": 0.16235129461161651, "grad_norm": 0.014692558906972408, "kl": 0.08688913285732269, "learning_rate": 4.8809093521259e-07, "loss": 0.0001, "reward": 1.2857143878936768, "reward_std": 0.0, "rewards/check_gptzero_func": 0.4285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 232 }, { "completion_length": 148.2857208251953, "epoch": 0.1630510846745976, "grad_norm": 0.01085751224309206, "kl": 0.07385465502738953, "learning_rate": 4.879099088427823e-07, "loss": 0.0001, "reward": 1.571428656578064, "reward_std": 0.0, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 233 }, { "completion_length": 134.35714721679688, "epoch": 0.16375087473757874, "grad_norm": 1.3876590728759766, "kl": 0.06863179802894592, "learning_rate": 4.877275510503782e-07, "loss": 0.0001, "reward": 1.1071429252624512, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 234 }, { "completion_length": 166.35714721679688, "epoch": 0.16445066480055984, "grad_norm": 0.004137041047215462, "kl": 0.046288397163152695, "learning_rate": 4.875438628559124e-07, "loss": 0.0, "reward": 1.571428656578064, "reward_std": 0.0, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 235 }, { "completion_length": 182.21429443359375, "epoch": 0.16515045486354094, "grad_norm": 0.5135437250137329, "kl": 0.05192737281322479, "learning_rate": 4.873588452873644e-07, "loss": 0.0001, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 236 }, { "completion_length": 143.57144165039062, "epoch": 0.16585024492652203, "grad_norm": 0.7229478359222412, "kl": 0.06715958565473557, "learning_rate": 4.87172499380154e-07, "loss": 0.0001, "reward": 1.4285714626312256, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.4285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 237 }, { "completion_length": 194.21429443359375, "epoch": 0.16655003498950316, "grad_norm": 0.5588036179542542, "kl": 0.04597441107034683, "learning_rate": 4.869848261771348e-07, "loss": 0.0, "reward": 1.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 238 }, { "completion_length": 147.5, "epoch": 0.16724982505248426, "grad_norm": 0.7688071131706238, "kl": 0.05393356457352638, "learning_rate": 4.867958267285878e-07, "loss": 0.0001, "reward": 1.4642857313156128, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.4642857313156128, "rewards/check_similarity_func": 0.4285714626312256, "step": 239 }, { "completion_length": 154.7857208251953, "epoch": 0.16794961511546536, "grad_norm": 0.0038860563654452562, "kl": 0.04131142795085907, "learning_rate": 4.866055020922167e-07, "loss": 0.0, "reward": 1.6428571939468384, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 240 }, { "completion_length": 121.0714340209961, "epoch": 0.16864940517844645, "grad_norm": 1.7734692096710205, "kl": 0.12062158435583115, "learning_rate": 4.86413853333141e-07, "loss": 0.0001, "reward": 1.4285714626312256, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 241 }, { "completion_length": 164.2857208251953, "epoch": 0.16934919524142758, "grad_norm": 1.1529392004013062, "kl": 0.08768650144338608, "learning_rate": 4.862208815238908e-07, "loss": 0.0001, "reward": 1.6071429252624512, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 242 }, { "completion_length": 173.35714721679688, "epoch": 0.17004898530440868, "grad_norm": 1.0590676069259644, "kl": 0.057219248265028, "learning_rate": 4.860265877444e-07, "loss": 0.0001, "reward": 1.571428656578064, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.4642857313156128, "rewards/check_similarity_func": 0.4642857313156128, "step": 243 }, { "completion_length": 171.85714721679688, "epoch": 0.17074877536738978, "grad_norm": 0.008162341080605984, "kl": 0.052427247166633606, "learning_rate": 4.858309730820009e-07, "loss": 0.0001, "reward": 1.4285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 0.4285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 244 }, { "completion_length": 194.92857360839844, "epoch": 0.17144856543037088, "grad_norm": 0.2171749770641327, "kl": 0.04080924391746521, "learning_rate": 4.856340386314181e-07, "loss": 0.0, "reward": 1.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 245 }, { "completion_length": 123.14286041259766, "epoch": 0.172148355493352, "grad_norm": 0.0038752092514187098, "kl": 0.08252635598182678, "learning_rate": 4.854357854947618e-07, "loss": 0.0001, "reward": 1.2142857313156128, "reward_std": 0.0, "rewards/check_gptzero_func": 0.2857142984867096, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 246 }, { "completion_length": 155.7857208251953, "epoch": 0.1728481455563331, "grad_norm": 0.3389923870563507, "kl": 0.03915087878704071, "learning_rate": 4.852362147815224e-07, "loss": 0.0, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 247 }, { "completion_length": 150.85714721679688, "epoch": 0.1735479356193142, "grad_norm": 1.5924971103668213, "kl": 0.20066188275814056, "learning_rate": 4.850353276085634e-07, "loss": 0.0002, "reward": 1.5000001192092896, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.5, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 248 }, { "completion_length": 194.50001525878906, "epoch": 0.17424772568229532, "grad_norm": 0.5173002481460571, "kl": 0.04696575552225113, "learning_rate": 4.848331251001159e-07, "loss": 0.0, "reward": 1.3571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.3571428656578064, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 249 }, { "completion_length": 163.71429443359375, "epoch": 0.17494751574527642, "grad_norm": 1.08260178565979, "kl": 0.07576731592416763, "learning_rate": 4.846296083877722e-07, "loss": 0.0001, "reward": 1.7857143878936768, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 250 }, { "completion_length": 190.7857208251953, "epoch": 0.17564730580825752, "grad_norm": 0.029735488817095757, "kl": 0.10241223126649857, "learning_rate": 4.844247786104793e-07, "loss": 0.0001, "reward": 1.571428656578064, "reward_std": 0.0, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 251 }, { "completion_length": 138.35714721679688, "epoch": 0.17634709587123862, "grad_norm": 0.9147209525108337, "kl": 0.10348494350910187, "learning_rate": 4.842186369145323e-07, "loss": 0.0001, "reward": 1.5357143878936768, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 252 }, { "completion_length": 174.07144165039062, "epoch": 0.17704688593421974, "grad_norm": 0.005371859762817621, "kl": 0.05910150334239006, "learning_rate": 4.840111844535681e-07, "loss": 0.0001, "reward": 1.7142858505249023, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 253 }, { "completion_length": 167.07144165039062, "epoch": 0.17774667599720084, "grad_norm": 0.4594021439552307, "kl": 0.09640074521303177, "learning_rate": 4.838024223885599e-07, "loss": 0.0001, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 254 }, { "completion_length": 165.07144165039062, "epoch": 0.17844646606018194, "grad_norm": 0.671158492565155, "kl": 0.05565828084945679, "learning_rate": 4.835923518878088e-07, "loss": 0.0001, "reward": 1.571428656578064, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 255 }, { "completion_length": 133.0, "epoch": 0.17914625612316304, "grad_norm": 0.0070541659370064735, "kl": 0.07011232525110245, "learning_rate": 4.833809741269391e-07, "loss": 0.0001, "reward": 1.571428656578064, "reward_std": 0.0, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 256 }, { "completion_length": 134.0, "epoch": 0.17984604618614417, "grad_norm": 1.136944055557251, "kl": 0.0638783723115921, "learning_rate": 4.831682902888907e-07, "loss": 0.0001, "reward": 1.321428656578064, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.3571428656578064, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 257 }, { "completion_length": 168.35714721679688, "epoch": 0.18054583624912526, "grad_norm": 0.28826791048049927, "kl": 0.06505099684000015, "learning_rate": 4.829543015639127e-07, "loss": 0.0001, "reward": 1.6785714626312256, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 258 }, { "completion_length": 135.6428680419922, "epoch": 0.18124562631210636, "grad_norm": 0.65718674659729, "kl": 0.09104105085134506, "learning_rate": 4.827390091495569e-07, "loss": 0.0001, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 259 }, { "completion_length": 142.85714721679688, "epoch": 0.1819454163750875, "grad_norm": 1.0835249423980713, "kl": 0.10284409672021866, "learning_rate": 4.825224142506709e-07, "loss": 0.0001, "reward": 1.571428656578064, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 260 }, { "completion_length": 155.5, "epoch": 0.18264520643806859, "grad_norm": 3.3590807914733887, "kl": 0.06798404455184937, "learning_rate": 4.823045180793913e-07, "loss": 0.0001, "reward": 1.321428656578064, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.3571428656578064, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 261 }, { "completion_length": 153.35714721679688, "epoch": 0.18334499650104968, "grad_norm": 0.016060689464211464, "kl": 0.12102171778678894, "learning_rate": 4.820853218551374e-07, "loss": 0.0001, "reward": 1.7857143878936768, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 262 }, { "completion_length": 169.92857360839844, "epoch": 0.18404478656403078, "grad_norm": 0.3070160448551178, "kl": 0.054033033549785614, "learning_rate": 4.818648268046037e-07, "loss": 0.0001, "reward": 1.4285714626312256, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.4285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 263 }, { "completion_length": 171.21429443359375, "epoch": 0.1847445766270119, "grad_norm": 1.826019287109375, "kl": 0.06273362040519714, "learning_rate": 4.816430341617536e-07, "loss": 0.0001, "reward": 1.7142858505249023, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 264 }, { "completion_length": 161.07144165039062, "epoch": 0.185444366689993, "grad_norm": 0.3736879229545593, "kl": 0.05549635365605354, "learning_rate": 4.814199451678119e-07, "loss": 0.0001, "reward": 1.2142857313156128, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 265 }, { "completion_length": 142.21429443359375, "epoch": 0.1861441567529741, "grad_norm": 0.00739132147282362, "kl": 0.053415779024362564, "learning_rate": 4.811955610712587e-07, "loss": 0.0001, "reward": 1.4285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 0.4285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 266 }, { "completion_length": 142.42857360839844, "epoch": 0.1868439468159552, "grad_norm": 0.00797994714230299, "kl": 0.07118402421474457, "learning_rate": 4.809698831278217e-07, "loss": 0.0001, "reward": 1.1428571939468384, "reward_std": 0.0, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 267 }, { "completion_length": 167.71429443359375, "epoch": 0.18754373687893633, "grad_norm": 0.006582770962268114, "kl": 0.06534451991319656, "learning_rate": 4.807429126004692e-07, "loss": 0.0001, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 268 }, { "completion_length": 170.1428680419922, "epoch": 0.18824352694191743, "grad_norm": 0.2588343024253845, "kl": 0.06883275508880615, "learning_rate": 4.805146507594034e-07, "loss": 0.0001, "reward": 1.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 269 }, { "completion_length": 194.50001525878906, "epoch": 0.18894331700489853, "grad_norm": 0.7922128438949585, "kl": 0.05323435738682747, "learning_rate": 4.802850988820531e-07, "loss": 0.0001, "reward": 1.571428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 270 }, { "completion_length": 161.35714721679688, "epoch": 0.18964310706787962, "grad_norm": 1.190820336341858, "kl": 0.07306383550167084, "learning_rate": 4.800542582530668e-07, "loss": 0.0001, "reward": 1.6428571939468384, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 271 }, { "completion_length": 131.57144165039062, "epoch": 0.19034289713086075, "grad_norm": 0.8572449684143066, "kl": 0.07944708317518234, "learning_rate": 4.798221301643049e-07, "loss": 0.0001, "reward": 1.3571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.3571428656578064, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 272 }, { "completion_length": 175.21429443359375, "epoch": 0.19104268719384185, "grad_norm": 0.005390900652855635, "kl": 0.05170130357146263, "learning_rate": 4.79588715914833e-07, "loss": 0.0001, "reward": 1.3571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.4285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 273 }, { "completion_length": 190.42857360839844, "epoch": 0.19174247725682295, "grad_norm": 0.5068483352661133, "kl": 0.030970416963100433, "learning_rate": 4.793540168109146e-07, "loss": 0.0, "reward": 1.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 274 }, { "completion_length": 166.5, "epoch": 0.19244226731980407, "grad_norm": 1.038082242012024, "kl": 0.14471173286437988, "learning_rate": 4.791180341660034e-07, "loss": 0.0001, "reward": 1.7142858505249023, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 275 }, { "completion_length": 174.85714721679688, "epoch": 0.19314205738278517, "grad_norm": 1.090570092201233, "kl": 0.06294557452201843, "learning_rate": 4.788807693007366e-07, "loss": 0.0001, "reward": 1.2142857313156128, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.2857142984867096, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 276 }, { "completion_length": 253.1428680419922, "epoch": 0.19384184744576627, "grad_norm": 0.003912989981472492, "kl": 0.03256333991885185, "learning_rate": 4.786422235429269e-07, "loss": 0.0, "reward": 1.4285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 0.4285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 277 }, { "completion_length": 141.92857360839844, "epoch": 0.19454163750874737, "grad_norm": 0.9286792874336243, "kl": 0.05529168248176575, "learning_rate": 4.78402398227555e-07, "loss": 0.0001, "reward": 1.3928571939468384, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.4285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 278 }, { "completion_length": 193.85714721679688, "epoch": 0.1952414275717285, "grad_norm": 0.6359454393386841, "kl": 0.03859524428844452, "learning_rate": 4.781612946967632e-07, "loss": 0.0, "reward": 1.5000001192092896, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.5, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 279 }, { "completion_length": 198.07144165039062, "epoch": 0.1959412176347096, "grad_norm": 0.4445606768131256, "kl": 0.04135487228631973, "learning_rate": 4.779189142998462e-07, "loss": 0.0, "reward": 1.3571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.3571428656578064, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 280 }, { "completion_length": 200.35714721679688, "epoch": 0.1966410076976907, "grad_norm": 0.7150121331214905, "kl": 0.04442007467150688, "learning_rate": 4.776752583932455e-07, "loss": 0.0, "reward": 1.7857143878936768, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 281 }, { "completion_length": 137.21429443359375, "epoch": 0.1973407977606718, "grad_norm": 0.29894357919692993, "kl": 0.06865939497947693, "learning_rate": 4.774303283405395e-07, "loss": 0.0001, "reward": 1.3571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.5, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 282 }, { "completion_length": 212.7857208251953, "epoch": 0.1980405878236529, "grad_norm": 0.8590019345283508, "kl": 0.03856377676129341, "learning_rate": 4.771841255124385e-07, "loss": 0.0, "reward": 1.571428656578064, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 283 }, { "completion_length": 160.21429443359375, "epoch": 0.198740377886634, "grad_norm": 0.9675235748291016, "kl": 0.05722957104444504, "learning_rate": 4.769366512867747e-07, "loss": 0.0001, "reward": 1.7142858505249023, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.4642857313156128, "rewards/check_similarity_func": 0.4642857313156128, "step": 284 }, { "completion_length": 208.6428680419922, "epoch": 0.1994401679496151, "grad_norm": 0.5099925398826599, "kl": 0.048123881220817566, "learning_rate": 4.766879070484956e-07, "loss": 0.0, "reward": 1.5000001192092896, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.5, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 285 }, { "completion_length": 127.28572082519531, "epoch": 0.2001399580125962, "grad_norm": 0.7052018642425537, "kl": 0.0637233629822731, "learning_rate": 4.764378941896567e-07, "loss": 0.0001, "reward": 1.571428656578064, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 286 }, { "completion_length": 155.85714721679688, "epoch": 0.20083974807557733, "grad_norm": 0.926841676235199, "kl": 0.0659792497754097, "learning_rate": 4.7618661410941244e-07, "loss": 0.0001, "reward": 1.571428656578064, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 287 }, { "completion_length": 205.92857360839844, "epoch": 0.20153953813855843, "grad_norm": 0.9473655819892883, "kl": 0.047269660979509354, "learning_rate": 4.759340682140094e-07, "loss": 0.0, "reward": 1.4642857313156128, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.5, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 288 }, { "completion_length": 122.5714340209961, "epoch": 0.20223932820153953, "grad_norm": 0.33958908915519714, "kl": 0.08770330250263214, "learning_rate": 4.756802579167781e-07, "loss": 0.0001, "reward": 1.5000001192092896, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.5, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 289 }, { "completion_length": 145.7857208251953, "epoch": 0.20293911826452066, "grad_norm": 0.010607573203742504, "kl": 0.07517556101083755, "learning_rate": 4.754251846381248e-07, "loss": 0.0001, "reward": 1.4285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 0.4285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 290 }, { "completion_length": 237.07144165039062, "epoch": 0.20363890832750176, "grad_norm": 0.5633988976478577, "kl": 0.034626349806785583, "learning_rate": 4.75168849805524e-07, "loss": 0.0, "reward": 1.4285714626312256, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.4285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 291 }, { "completion_length": 175.07144165039062, "epoch": 0.20433869839048285, "grad_norm": 0.5073668956756592, "kl": 0.05200799182057381, "learning_rate": 4.7491125485351037e-07, "loss": 0.0001, "reward": 1.4285714626312256, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.4285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 292 }, { "completion_length": 137.35714721679688, "epoch": 0.20503848845346395, "grad_norm": 1.0030326843261719, "kl": 0.09372863173484802, "learning_rate": 4.7465240122367054e-07, "loss": 0.0001, "reward": 1.3571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.3571428656578064, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 293 }, { "completion_length": 146.6428680419922, "epoch": 0.20573827851644508, "grad_norm": 1.03136146068573, "kl": 0.06653367727994919, "learning_rate": 4.7439229036463503e-07, "loss": 0.0001, "reward": 1.3571429252624512, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.3571428656578064, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 294 }, { "completion_length": 205.57144165039062, "epoch": 0.20643806857942618, "grad_norm": 0.16924548149108887, "kl": 0.052602753043174744, "learning_rate": 4.7413092373207023e-07, "loss": 0.0001, "reward": 1.3571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.3571428656578064, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 295 }, { "completion_length": 174.7857208251953, "epoch": 0.20713785864240727, "grad_norm": 0.005794525146484375, "kl": 0.050076279789209366, "learning_rate": 4.738683027886705e-07, "loss": 0.0001, "reward": 1.7142858505249023, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 296 }, { "completion_length": 219.7857208251953, "epoch": 0.20783764870538837, "grad_norm": 0.00495543098077178, "kl": 0.04305650666356087, "learning_rate": 4.736044290041495e-07, "loss": 0.0, "reward": 1.4285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 0.4285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 297 }, { "completion_length": 221.50001525878906, "epoch": 0.2085374387683695, "grad_norm": 0.5875436663627625, "kl": 0.07391241937875748, "learning_rate": 4.7333930385523227e-07, "loss": 0.0001, "reward": 1.571428656578064, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 298 }, { "completion_length": 152.92857360839844, "epoch": 0.2092372288313506, "grad_norm": 0.9358134865760803, "kl": 0.0650520920753479, "learning_rate": 4.7307292882564676e-07, "loss": 0.0001, "reward": 1.5000001192092896, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 299 }, { "completion_length": 194.57144165039062, "epoch": 0.2099370188943317, "grad_norm": 0.3117639422416687, "kl": 0.052994661033153534, "learning_rate": 4.7280530540611595e-07, "loss": 0.0001, "reward": 1.3571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.3571428656578064, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 300 }, { "completion_length": 154.1428680419922, "epoch": 0.2106368089573128, "grad_norm": 0.012219325639307499, "kl": 0.08781339228153229, "learning_rate": 4.7253643509434915e-07, "loss": 0.0001, "reward": 1.7142858505249023, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 301 }, { "completion_length": 160.35714721679688, "epoch": 0.21133659902029392, "grad_norm": 0.009201272390782833, "kl": 0.053046923130750656, "learning_rate": 4.722663193950335e-07, "loss": 0.0001, "reward": 1.571428656578064, "reward_std": 0.0, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 302 }, { "completion_length": 138.57144165039062, "epoch": 0.21203638908327502, "grad_norm": 0.006869816686958075, "kl": 0.0772237777709961, "learning_rate": 4.7199495981982574e-07, "loss": 0.0001, "reward": 1.2142857313156128, "reward_std": 0.0, "rewards/check_gptzero_func": 0.2857142984867096, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 303 }, { "completion_length": 169.71429443359375, "epoch": 0.21273617914625612, "grad_norm": 1.3806769847869873, "kl": 0.06926502287387848, "learning_rate": 4.7172235788734416e-07, "loss": 0.0001, "reward": 1.5000001192092896, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 304 }, { "completion_length": 128.1428680419922, "epoch": 0.21343596920923724, "grad_norm": 1.4540537595748901, "kl": 0.08069150894880295, "learning_rate": 4.714485151231592e-07, "loss": 0.0001, "reward": 1.6428571939468384, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 305 }, { "completion_length": 150.21429443359375, "epoch": 0.21413575927221834, "grad_norm": 0.4197899103164673, "kl": 0.11029446870088577, "learning_rate": 4.711734330597856e-07, "loss": 0.0001, "reward": 1.2857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.3571428656578064, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 306 }, { "completion_length": 149.92857360839844, "epoch": 0.21483554933519944, "grad_norm": 0.5891143679618835, "kl": 0.0682171955704689, "learning_rate": 4.708971132366738e-07, "loss": 0.0001, "reward": 1.5000001192092896, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.5, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 307 }, { "completion_length": 165.35714721679688, "epoch": 0.21553533939818054, "grad_norm": 0.01377664040774107, "kl": 0.08428984135389328, "learning_rate": 4.7061955720020084e-07, "loss": 0.0001, "reward": 1.571428656578064, "reward_std": 0.0, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 308 }, { "completion_length": 160.35714721679688, "epoch": 0.21623512946116166, "grad_norm": 0.45644667744636536, "kl": 0.05785377323627472, "learning_rate": 4.703407665036622e-07, "loss": 0.0001, "reward": 1.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 309 }, { "completion_length": 181.57144165039062, "epoch": 0.21693491952414276, "grad_norm": 1.1292043924331665, "kl": 0.050720490515232086, "learning_rate": 4.7006074270726294e-07, "loss": 0.0001, "reward": 1.4285714626312256, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.4285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 310 }, { "completion_length": 95.35714721679688, "epoch": 0.21763470958712386, "grad_norm": 0.618823766708374, "kl": 0.109279103577137, "learning_rate": 4.6977948737810884e-07, "loss": 0.0001, "reward": 1.571428656578064, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 311 }, { "completion_length": 104.42857360839844, "epoch": 0.21833449965010496, "grad_norm": 1.2373698949813843, "kl": 0.11407748609781265, "learning_rate": 4.6949700209019794e-07, "loss": 0.0001, "reward": 0.9285714626312256, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 312 }, { "completion_length": 199.92857360839844, "epoch": 0.21903428971308608, "grad_norm": 0.4044800400733948, "kl": 0.04408475011587143, "learning_rate": 4.692132884244112e-07, "loss": 0.0, "reward": 1.2142857313156128, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 313 }, { "completion_length": 146.6428680419922, "epoch": 0.21973407977606718, "grad_norm": 0.3640643358230591, "kl": 0.07018928229808807, "learning_rate": 4.6892834796850445e-07, "loss": 0.0001, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 314 }, { "completion_length": 212.50001525878906, "epoch": 0.22043386983904828, "grad_norm": 1.1364244222640991, "kl": 0.03360125795006752, "learning_rate": 4.686421823170986e-07, "loss": 0.0, "reward": 1.6785714626312256, "reward_std": 0.45456868410110474, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 315 }, { "completion_length": 139.0, "epoch": 0.22113365990202938, "grad_norm": 1.013829231262207, "kl": 0.12020508199930191, "learning_rate": 4.683547930716716e-07, "loss": 0.0001, "reward": 1.2142857313156128, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.3571428656578064, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 316 }, { "completion_length": 158.42857360839844, "epoch": 0.2218334499650105, "grad_norm": 0.3907513916492462, "kl": 0.06901005655527115, "learning_rate": 4.680661818405485e-07, "loss": 0.0001, "reward": 1.5000001192092896, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.5, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 317 }, { "completion_length": 166.1428680419922, "epoch": 0.2225332400279916, "grad_norm": 0.37430232763290405, "kl": 0.07565050572156906, "learning_rate": 4.677763502388933e-07, "loss": 0.0001, "reward": 1.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 318 }, { "completion_length": 141.71429443359375, "epoch": 0.2232330300909727, "grad_norm": 1.0202125310897827, "kl": 0.1550026386976242, "learning_rate": 4.674852998886998e-07, "loss": 0.0002, "reward": 1.6071429252624512, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 319 }, { "completion_length": 130.92857360839844, "epoch": 0.22393282015395383, "grad_norm": 1.371641755104065, "kl": 0.07781083881855011, "learning_rate": 4.6719303241878196e-07, "loss": 0.0001, "reward": 1.5357143878936768, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 320 }, { "completion_length": 145.7857208251953, "epoch": 0.22463261021693492, "grad_norm": 0.8018258213996887, "kl": 0.06540663540363312, "learning_rate": 4.668995494647652e-07, "loss": 0.0001, "reward": 1.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 321 }, { "completion_length": 179.42857360839844, "epoch": 0.22533240027991602, "grad_norm": 0.5900102853775024, "kl": 0.08709373325109482, "learning_rate": 4.6660485266907745e-07, "loss": 0.0001, "reward": 1.3571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.4285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 322 }, { "completion_length": 185.92857360839844, "epoch": 0.22603219034289712, "grad_norm": 0.006499639246612787, "kl": 0.07353556156158447, "learning_rate": 4.6630894368093945e-07, "loss": 0.0001, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 323 }, { "completion_length": 239.2857208251953, "epoch": 0.22673198040587825, "grad_norm": 0.4501858353614807, "kl": 0.025078438222408295, "learning_rate": 4.6601182415635575e-07, "loss": 0.0, "reward": 1.571428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 324 }, { "completion_length": 190.1428680419922, "epoch": 0.22743177046885935, "grad_norm": 0.01143316924571991, "kl": 0.07399126142263412, "learning_rate": 4.657134957581057e-07, "loss": 0.0001, "reward": 1.6428571939468384, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 325 }, { "completion_length": 142.21429443359375, "epoch": 0.22813156053184044, "grad_norm": 0.0051004113629460335, "kl": 0.06388319283723831, "learning_rate": 4.654139601557336e-07, "loss": 0.0001, "reward": 1.7142858505249023, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 326 }, { "completion_length": 155.1428680419922, "epoch": 0.22883135059482154, "grad_norm": 0.8456053137779236, "kl": 0.06630801409482956, "learning_rate": 4.6511321902554e-07, "loss": 0.0001, "reward": 1.6428571939468384, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 327 }, { "completion_length": 155.57144165039062, "epoch": 0.22953114065780267, "grad_norm": 0.9589296579360962, "kl": 0.0630459114909172, "learning_rate": 4.6481127405057174e-07, "loss": 0.0001, "reward": 1.6071429252624512, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 328 }, { "completion_length": 155.57144165039062, "epoch": 0.23023093072078377, "grad_norm": 0.01848224736750126, "kl": 0.08523882180452347, "learning_rate": 4.6450812692061274e-07, "loss": 0.0001, "reward": 1.4285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 0.4285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 329 }, { "completion_length": 181.57144165039062, "epoch": 0.23093072078376486, "grad_norm": 0.5684490203857422, "kl": 0.08473139256238937, "learning_rate": 4.642037793321748e-07, "loss": 0.0001, "reward": 1.2142857313156128, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 330 }, { "completion_length": 187.2857208251953, "epoch": 0.23163051084674596, "grad_norm": 0.0040374863892793655, "kl": 0.043827127665281296, "learning_rate": 4.638982329884877e-07, "loss": 0.0, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 331 }, { "completion_length": 191.42857360839844, "epoch": 0.2323303009097271, "grad_norm": 0.39425128698349, "kl": 0.10597262531518936, "learning_rate": 4.6359148959949e-07, "loss": 0.0001, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 332 }, { "completion_length": 187.21429443359375, "epoch": 0.2330300909727082, "grad_norm": 0.005002294201403856, "kl": 0.045696940273046494, "learning_rate": 4.632835508818191e-07, "loss": 0.0, "reward": 1.3571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.4285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 333 }, { "completion_length": 154.0, "epoch": 0.23372988103568929, "grad_norm": 1.396830677986145, "kl": 0.08625094592571259, "learning_rate": 4.6297441855880215e-07, "loss": 0.0001, "reward": 1.7142858505249023, "reward_std": 0.4040610194206238, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 334 }, { "completion_length": 138.35714721679688, "epoch": 0.2344296710986704, "grad_norm": 0.009959784336388111, "kl": 0.08128954470157623, "learning_rate": 4.6266409436044583e-07, "loss": 0.0001, "reward": 1.2857143878936768, "reward_std": 0.0, "rewards/check_gptzero_func": 0.2857142984867096, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 335 }, { "completion_length": 140.92857360839844, "epoch": 0.2351294611616515, "grad_norm": 0.005020400509238243, "kl": 0.0948871597647667, "learning_rate": 4.623525800234271e-07, "loss": 0.0001, "reward": 1.2857143878936768, "reward_std": 0.0, "rewards/check_gptzero_func": 0.2857142984867096, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 336 }, { "completion_length": 193.71429443359375, "epoch": 0.2358292512246326, "grad_norm": 0.17118625342845917, "kl": 0.04552560672163963, "learning_rate": 4.620398772910833e-07, "loss": 0.0, "reward": 1.7142858505249023, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 337 }, { "completion_length": 157.21429443359375, "epoch": 0.2365290412876137, "grad_norm": 1.1121811866760254, "kl": 0.0529894083738327, "learning_rate": 4.6172598791340237e-07, "loss": 0.0001, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 338 }, { "completion_length": 162.07144165039062, "epoch": 0.23722883135059483, "grad_norm": 0.5859483480453491, "kl": 0.06984959542751312, "learning_rate": 4.6141091364701324e-07, "loss": 0.0001, "reward": 1.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 339 }, { "completion_length": 187.00001525878906, "epoch": 0.23792862141357593, "grad_norm": 0.38454321026802063, "kl": 0.046959273517131805, "learning_rate": 4.6109465625517583e-07, "loss": 0.0, "reward": 1.3571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.3571428656578064, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 340 }, { "completion_length": 147.42857360839844, "epoch": 0.23862841147655703, "grad_norm": 0.003442651592195034, "kl": 0.06190333515405655, "learning_rate": 4.607772175077711e-07, "loss": 0.0001, "reward": 1.3571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.4285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 341 }, { "completion_length": 165.7857208251953, "epoch": 0.23932820153953813, "grad_norm": 0.01085687056183815, "kl": 0.06698831170797348, "learning_rate": 4.604585991812914e-07, "loss": 0.0001, "reward": 1.7142858505249023, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 342 }, { "completion_length": 178.21429443359375, "epoch": 0.24002799160251925, "grad_norm": 0.13536937534809113, "kl": 0.036244507879018784, "learning_rate": 4.6013880305883024e-07, "loss": 0.0, "reward": 1.3928571939468384, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.4285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 343 }, { "completion_length": 188.85714721679688, "epoch": 0.24072778166550035, "grad_norm": 0.3801333010196686, "kl": 0.0812205970287323, "learning_rate": 4.5981783093007263e-07, "loss": 0.0001, "reward": 1.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 344 }, { "completion_length": 208.71429443359375, "epoch": 0.24142757172848145, "grad_norm": 0.8097065687179565, "kl": 0.0717221200466156, "learning_rate": 4.5949568459128497e-07, "loss": 0.0001, "reward": 1.3571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.3571428656578064, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 345 }, { "completion_length": 184.07144165039062, "epoch": 0.24212736179146255, "grad_norm": 0.5311720967292786, "kl": 0.07351028919219971, "learning_rate": 4.5917236584530463e-07, "loss": 0.0001, "reward": 1.2142857313156128, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 346 }, { "completion_length": 146.7857208251953, "epoch": 0.24282715185444367, "grad_norm": 0.2743346691131592, "kl": 0.14897701144218445, "learning_rate": 4.588478765015304e-07, "loss": 0.0001, "reward": 1.2142857313156128, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 347 }, { "completion_length": 144.35714721679688, "epoch": 0.24352694191742477, "grad_norm": 0.014916333369910717, "kl": 0.10870713740587234, "learning_rate": 4.5852221837591206e-07, "loss": 0.0001, "reward": 1.7857143878936768, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 348 }, { "completion_length": 152.5, "epoch": 0.24422673198040587, "grad_norm": 0.6929971575737, "kl": 0.05877465382218361, "learning_rate": 4.581953932909403e-07, "loss": 0.0001, "reward": 1.5000001192092896, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 349 }, { "completion_length": 204.7857208251953, "epoch": 0.244926522043387, "grad_norm": 0.4781638979911804, "kl": 0.06406878679990768, "learning_rate": 4.578674030756363e-07, "loss": 0.0001, "reward": 1.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 350 }, { "completion_length": 183.6428680419922, "epoch": 0.2456263121063681, "grad_norm": 1.307241678237915, "kl": 0.05882306769490242, "learning_rate": 4.5753824956554204e-07, "loss": 0.0001, "reward": 1.6428571939468384, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 351 }, { "completion_length": 168.1428680419922, "epoch": 0.2463261021693492, "grad_norm": 0.0121656134724617, "kl": 0.07229572534561157, "learning_rate": 4.572079346027095e-07, "loss": 0.0001, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 352 }, { "completion_length": 220.7857208251953, "epoch": 0.2470258922323303, "grad_norm": 0.35856860876083374, "kl": 0.027464624494314194, "learning_rate": 4.5687646003569044e-07, "loss": 0.0, "reward": 1.571428656578064, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 353 }, { "completion_length": 156.5, "epoch": 0.24772568229531142, "grad_norm": 0.7537292838096619, "kl": 0.07058604061603546, "learning_rate": 4.5654382771952643e-07, "loss": 0.0001, "reward": 1.7142858505249023, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 354 }, { "completion_length": 177.07144165039062, "epoch": 0.24842547235829252, "grad_norm": 0.7337928414344788, "kl": 0.0815427303314209, "learning_rate": 4.562100395157379e-07, "loss": 0.0001, "reward": 1.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 355 }, { "completion_length": 207.85714721679688, "epoch": 0.2491252624212736, "grad_norm": 0.46913161873817444, "kl": 0.06902351975440979, "learning_rate": 4.55875097292314e-07, "loss": 0.0001, "reward": 1.6428571939468384, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 356 }, { "completion_length": 190.42857360839844, "epoch": 0.2498250524842547, "grad_norm": 0.40108782052993774, "kl": 0.04293013736605644, "learning_rate": 4.555390029237025e-07, "loss": 0.0, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 357 }, { "completion_length": 222.71429443359375, "epoch": 0.25052484254723584, "grad_norm": 0.009811235591769218, "kl": 0.062084976583719254, "learning_rate": 4.5520175829079856e-07, "loss": 0.0001, "reward": 1.571428656578064, "reward_std": 0.0, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 358 }, { "completion_length": 167.92857360839844, "epoch": 0.25122463261021694, "grad_norm": 0.4211064279079437, "kl": 0.0686783418059349, "learning_rate": 4.548633652809349e-07, "loss": 0.0001, "reward": 1.5000001192092896, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.5, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 359 }, { "completion_length": 164.42857360839844, "epoch": 0.25192442267319803, "grad_norm": 0.6170043349266052, "kl": 0.05415661260485649, "learning_rate": 4.5452382578787074e-07, "loss": 0.0001, "reward": 1.3571429252624512, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.3571428656578064, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 360 }, { "completion_length": 174.6428680419922, "epoch": 0.25262421273617913, "grad_norm": 0.4590255320072174, "kl": 0.058168910443782806, "learning_rate": 4.5418314171178145e-07, "loss": 0.0001, "reward": 1.571428656578064, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 361 }, { "completion_length": 193.7857208251953, "epoch": 0.25332400279916023, "grad_norm": 0.004796852823346853, "kl": 0.04918672516942024, "learning_rate": 4.538413149592479e-07, "loss": 0.0, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 362 }, { "completion_length": 167.1428680419922, "epoch": 0.2540237928621414, "grad_norm": 0.781566321849823, "kl": 0.05501880124211311, "learning_rate": 4.534983474432458e-07, "loss": 0.0001, "reward": 1.7142858505249023, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 363 }, { "completion_length": 191.42857360839844, "epoch": 0.2547235829251225, "grad_norm": 0.005572193767875433, "kl": 0.0425318144261837, "learning_rate": 4.5315424108313484e-07, "loss": 0.0, "reward": 1.6428571939468384, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 364 }, { "completion_length": 171.00001525878906, "epoch": 0.2554233729881036, "grad_norm": 3.3054897785186768, "kl": 0.04474795237183571, "learning_rate": 4.5280899780464806e-07, "loss": 0.0, "reward": 1.5000001192092896, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 365 }, { "completion_length": 211.2857208251953, "epoch": 0.2561231630510847, "grad_norm": 0.001608588034287095, "kl": 0.027398638427257538, "learning_rate": 4.5246261953988116e-07, "loss": 0.0, "reward": 1.4285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 0.4285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 366 }, { "completion_length": 221.50001525878906, "epoch": 0.2568229531140658, "grad_norm": 1.020776391029358, "kl": 0.05467016249895096, "learning_rate": 4.521151082272816e-07, "loss": 0.0001, "reward": 1.5000001192092896, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.5, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 367 }, { "completion_length": 170.42857360839844, "epoch": 0.2575227431770469, "grad_norm": 0.05375458672642708, "kl": 0.1738869994878769, "learning_rate": 4.517664658116377e-07, "loss": 0.0002, "reward": 1.6428571939468384, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 368 }, { "completion_length": 212.57144165039062, "epoch": 0.258222533240028, "grad_norm": 0.7836793661117554, "kl": 0.050905875861644745, "learning_rate": 4.514166942440678e-07, "loss": 0.0001, "reward": 1.7500001192092896, "reward_std": 0.3535533845424652, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 369 }, { "completion_length": 180.92857360839844, "epoch": 0.25892232330300907, "grad_norm": 0.0031478935852646828, "kl": 0.04060571640729904, "learning_rate": 4.510657954820093e-07, "loss": 0.0, "reward": 1.4285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 0.4285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 370 }, { "completion_length": 128.21429443359375, "epoch": 0.2596221133659902, "grad_norm": 0.7343041300773621, "kl": 0.1011519655585289, "learning_rate": 4.507137714892079e-07, "loss": 0.0001, "reward": 1.4285714626312256, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.4285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 371 }, { "completion_length": 222.50001525878906, "epoch": 0.2603219034289713, "grad_norm": 0.30209633708000183, "kl": 0.043030209839344025, "learning_rate": 4.5036062423570635e-07, "loss": 0.0, "reward": 1.25, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.2857142984867096, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 372 }, { "completion_length": 176.71429443359375, "epoch": 0.2610216934919524, "grad_norm": 0.32525721192359924, "kl": 0.04940638691186905, "learning_rate": 4.500063556978336e-07, "loss": 0.0, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 373 }, { "completion_length": 174.85714721679688, "epoch": 0.2617214835549335, "grad_norm": 0.025635629892349243, "kl": 0.08890416473150253, "learning_rate": 4.496509678581936e-07, "loss": 0.0001, "reward": 1.7142858505249023, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 374 }, { "completion_length": 164.35714721679688, "epoch": 0.2624212736179146, "grad_norm": 0.5221972465515137, "kl": 0.10799188911914825, "learning_rate": 4.4929446270565437e-07, "loss": 0.0001, "reward": 1.7857143878936768, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 375 }, { "completion_length": 143.7857208251953, "epoch": 0.2631210636808957, "grad_norm": 1.3785922527313232, "kl": 0.1353205144405365, "learning_rate": 4.4893684223533675e-07, "loss": 0.0001, "reward": 1.4285714626312256, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 376 }, { "completion_length": 160.2857208251953, "epoch": 0.2638208537438768, "grad_norm": 0.016019895672798157, "kl": 0.09745477139949799, "learning_rate": 4.485781084486032e-07, "loss": 0.0001, "reward": 1.7142858505249023, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 377 }, { "completion_length": 147.42857360839844, "epoch": 0.26452064380685797, "grad_norm": 0.4822780191898346, "kl": 0.08942185342311859, "learning_rate": 4.4821826335304675e-07, "loss": 0.0001, "reward": 1.7142858505249023, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 378 }, { "completion_length": 118.5714340209961, "epoch": 0.26522043386983907, "grad_norm": 0.0046622492372989655, "kl": 0.11559158563613892, "learning_rate": 4.4785730896247974e-07, "loss": 0.0001, "reward": 1.5000001192092896, "reward_std": 0.0, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 379 }, { "completion_length": 176.92857360839844, "epoch": 0.26592022393282017, "grad_norm": 0.7222185730934143, "kl": 0.06742435693740845, "learning_rate": 4.4749524729692234e-07, "loss": 0.0001, "reward": 1.5000001192092896, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 380 }, { "completion_length": 117.0714340209961, "epoch": 0.26662001399580126, "grad_norm": 0.007016256917268038, "kl": 0.0856146365404129, "learning_rate": 4.471320803825914e-07, "loss": 0.0001, "reward": 1.7142858505249023, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 381 }, { "completion_length": 156.5, "epoch": 0.26731980405878236, "grad_norm": 0.007227389607578516, "kl": 0.06700412929058075, "learning_rate": 4.4676781025188935e-07, "loss": 0.0001, "reward": 1.7142858505249023, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 382 }, { "completion_length": 172.21429443359375, "epoch": 0.26801959412176346, "grad_norm": 1.355343222618103, "kl": 0.0928168073296547, "learning_rate": 4.4640243894339235e-07, "loss": 0.0001, "reward": 1.7142858505249023, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 383 }, { "completion_length": 203.1428680419922, "epoch": 0.26871938418474456, "grad_norm": 0.34504854679107666, "kl": 0.08159424364566803, "learning_rate": 4.460359685018391e-07, "loss": 0.0001, "reward": 1.3571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.3571428656578064, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 384 }, { "completion_length": 186.71429443359375, "epoch": 0.26941917424772566, "grad_norm": 0.5111631751060486, "kl": 0.05423164367675781, "learning_rate": 4.456684009781195e-07, "loss": 0.0001, "reward": 1.3571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.3571428656578064, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 385 }, { "completion_length": 154.0, "epoch": 0.2701189643107068, "grad_norm": 0.003857059171423316, "kl": 0.06783466786146164, "learning_rate": 4.452997384292633e-07, "loss": 0.0001, "reward": 1.7142858505249023, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 386 }, { "completion_length": 162.0, "epoch": 0.2708187543736879, "grad_norm": 0.4237903952598572, "kl": 0.06762188673019409, "learning_rate": 4.4492998291842775e-07, "loss": 0.0001, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 387 }, { "completion_length": 189.57144165039062, "epoch": 0.271518544436669, "grad_norm": 0.003911654464900494, "kl": 0.042102497071027756, "learning_rate": 4.445591365148874e-07, "loss": 0.0, "reward": 1.571428656578064, "reward_std": 0.0, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 388 }, { "completion_length": 163.6428680419922, "epoch": 0.2722183344996501, "grad_norm": 0.5007469058036804, "kl": 0.0691145732998848, "learning_rate": 4.4418720129402136e-07, "loss": 0.0001, "reward": 1.5000001192092896, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.5, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 389 }, { "completion_length": 174.2857208251953, "epoch": 0.2729181245626312, "grad_norm": 0.9400177001953125, "kl": 0.040577150881290436, "learning_rate": 4.4381417933730225e-07, "loss": 0.0, "reward": 1.7142858505249023, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 390 }, { "completion_length": 175.07144165039062, "epoch": 0.2736179146256123, "grad_norm": 0.005653996951878071, "kl": 0.06233478710055351, "learning_rate": 4.4344007273228434e-07, "loss": 0.0001, "reward": 1.6428571939468384, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 391 }, { "completion_length": 200.6428680419922, "epoch": 0.2743177046885934, "grad_norm": 0.004551511723548174, "kl": 0.037107814103364944, "learning_rate": 4.4306488357259213e-07, "loss": 0.0, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 392 }, { "completion_length": 173.1428680419922, "epoch": 0.27501749475157455, "grad_norm": 0.6157705187797546, "kl": 0.06554149091243744, "learning_rate": 4.4268861395790824e-07, "loss": 0.0001, "reward": 1.3571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.3571428656578064, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 393 }, { "completion_length": 187.1428680419922, "epoch": 0.27571728481455565, "grad_norm": 0.006073262542486191, "kl": 0.0513925664126873, "learning_rate": 4.4231126599396207e-07, "loss": 0.0001, "reward": 1.571428656578064, "reward_std": 0.0, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 394 }, { "completion_length": 137.6428680419922, "epoch": 0.27641707487753675, "grad_norm": 0.013888267800211906, "kl": 0.0937371701002121, "learning_rate": 4.4193284179251766e-07, "loss": 0.0001, "reward": 1.571428656578064, "reward_std": 0.0, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 395 }, { "completion_length": 194.7857208251953, "epoch": 0.27711686494051785, "grad_norm": 0.005092666018754244, "kl": 0.053173016756772995, "learning_rate": 4.4155334347136207e-07, "loss": 0.0001, "reward": 1.7142858505249023, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 396 }, { "completion_length": 152.1428680419922, "epoch": 0.27781665500349895, "grad_norm": 0.008013773709535599, "kl": 0.07163780182600021, "learning_rate": 4.4117277315429364e-07, "loss": 0.0001, "reward": 1.7142858505249023, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 397 }, { "completion_length": 243.21429443359375, "epoch": 0.27851644506648005, "grad_norm": 0.002778699854388833, "kl": 0.04184882342815399, "learning_rate": 4.407911329711097e-07, "loss": 0.0, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 398 }, { "completion_length": 148.1428680419922, "epoch": 0.27921623512946114, "grad_norm": 0.41074898838996887, "kl": 0.060933880507946014, "learning_rate": 4.4040842505759513e-07, "loss": 0.0001, "reward": 1.3571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.3571428656578064, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 399 }, { "completion_length": 144.07144165039062, "epoch": 0.27991602519244224, "grad_norm": 0.43583184480667114, "kl": 0.09480400383472443, "learning_rate": 4.4002465155551007e-07, "loss": 0.0001, "reward": 1.4285714626312256, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.4285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 400 }, { "completion_length": 146.1428680419922, "epoch": 0.2806158152554234, "grad_norm": 0.019495747983455658, "kl": 0.09843632578849792, "learning_rate": 4.3963981461257793e-07, "loss": 0.0001, "reward": 1.571428656578064, "reward_std": 0.0, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 401 }, { "completion_length": 162.71429443359375, "epoch": 0.2813156053184045, "grad_norm": 0.9602806568145752, "kl": 0.07752057909965515, "learning_rate": 4.392539163824738e-07, "loss": 0.0001, "reward": 1.2857143878936768, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.2857142984867096, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 402 }, { "completion_length": 177.85714721679688, "epoch": 0.2820153953813856, "grad_norm": 0.5292841196060181, "kl": 0.07222878932952881, "learning_rate": 4.3886695902481186e-07, "loss": 0.0001, "reward": 1.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 403 }, { "completion_length": 150.07144165039062, "epoch": 0.2827151854443667, "grad_norm": 0.011154424399137497, "kl": 0.07517305016517639, "learning_rate": 4.3847894470513346e-07, "loss": 0.0001, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 404 }, { "completion_length": 144.5, "epoch": 0.2834149755073478, "grad_norm": 1.4949623346328735, "kl": 0.060730867087841034, "learning_rate": 4.380898755948953e-07, "loss": 0.0001, "reward": 1.571428656578064, "reward_std": 0.4040610194206238, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 405 }, { "completion_length": 140.0, "epoch": 0.2841147655703289, "grad_norm": 1.2666945457458496, "kl": 0.08669928461313248, "learning_rate": 4.3769975387145684e-07, "loss": 0.0001, "reward": 1.571428656578064, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 406 }, { "completion_length": 182.92857360839844, "epoch": 0.28481455563331, "grad_norm": 0.30912238359451294, "kl": 0.05023251846432686, "learning_rate": 4.373085817180684e-07, "loss": 0.0001, "reward": 1.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 407 }, { "completion_length": 156.85714721679688, "epoch": 0.28551434569629114, "grad_norm": 1.1483815908432007, "kl": 0.08641166985034943, "learning_rate": 4.3691636132385876e-07, "loss": 0.0001, "reward": 1.7142858505249023, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 408 }, { "completion_length": 162.7857208251953, "epoch": 0.28621413575927224, "grad_norm": 0.7715860605239868, "kl": 0.06517499685287476, "learning_rate": 4.3652309488382313e-07, "loss": 0.0001, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 409 }, { "completion_length": 155.6428680419922, "epoch": 0.28691392582225334, "grad_norm": 0.7077353596687317, "kl": 0.10083460062742233, "learning_rate": 4.3612878459881064e-07, "loss": 0.0001, "reward": 1.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 410 }, { "completion_length": 194.07144165039062, "epoch": 0.28761371588523443, "grad_norm": 0.8004106283187866, "kl": 0.06316714733839035, "learning_rate": 4.357334326755122e-07, "loss": 0.0001, "reward": 1.6428571939468384, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 411 }, { "completion_length": 146.21429443359375, "epoch": 0.28831350594821553, "grad_norm": 0.8441122174263, "kl": 0.09409310668706894, "learning_rate": 4.3533704132644797e-07, "loss": 0.0001, "reward": 1.4642857313156128, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.5, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 412 }, { "completion_length": 199.85714721679688, "epoch": 0.28901329601119663, "grad_norm": 1.254517674446106, "kl": 0.07743094861507416, "learning_rate": 4.3493961276995514e-07, "loss": 0.0001, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 413 }, { "completion_length": 156.0, "epoch": 0.28971308607417773, "grad_norm": 0.006745999678969383, "kl": 0.06792478263378143, "learning_rate": 4.345411492301756e-07, "loss": 0.0001, "reward": 1.571428656578064, "reward_std": 0.0, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 414 }, { "completion_length": 160.07144165039062, "epoch": 0.2904128761371588, "grad_norm": 0.005978118162602186, "kl": 0.0698375329375267, "learning_rate": 4.3414165293704307e-07, "loss": 0.0001, "reward": 1.2142857313156128, "reward_std": 0.0, "rewards/check_gptzero_func": 0.2857142984867096, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 415 }, { "completion_length": 150.21429443359375, "epoch": 0.29111266620014, "grad_norm": 0.7730259895324707, "kl": 0.08198273181915283, "learning_rate": 4.3374112612627104e-07, "loss": 0.0001, "reward": 1.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 416 }, { "completion_length": 160.07144165039062, "epoch": 0.2918124562631211, "grad_norm": 0.3890771269798279, "kl": 0.08979778736829758, "learning_rate": 4.3333957103934015e-07, "loss": 0.0001, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 417 }, { "completion_length": 190.07144165039062, "epoch": 0.2925122463261022, "grad_norm": 0.9485540390014648, "kl": 0.054739706218242645, "learning_rate": 4.329369899234856e-07, "loss": 0.0001, "reward": 1.5000001192092896, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.5, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 418 }, { "completion_length": 143.21429443359375, "epoch": 0.2932120363890833, "grad_norm": 0.563765287399292, "kl": 0.0616917721927166, "learning_rate": 4.3253338503168456e-07, "loss": 0.0001, "reward": 1.5000001192092896, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.5, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 419 }, { "completion_length": 199.21429443359375, "epoch": 0.2939118264520644, "grad_norm": 0.23570655286312103, "kl": 0.11169233173131943, "learning_rate": 4.3212875862264353e-07, "loss": 0.0001, "reward": 1.7142858505249023, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 420 }, { "completion_length": 164.21429443359375, "epoch": 0.29461161651504547, "grad_norm": 0.8410707116127014, "kl": 0.06809322535991669, "learning_rate": 4.317231129607859e-07, "loss": 0.0001, "reward": 1.571428656578064, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 421 }, { "completion_length": 166.7857208251953, "epoch": 0.29531140657802657, "grad_norm": 1.090517520904541, "kl": 0.06529798358678818, "learning_rate": 4.31316450316239e-07, "loss": 0.0001, "reward": 1.321428656578064, "reward_std": 0.3535534143447876, "rewards/check_gptzero_func": 0.3571428656578064, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 422 }, { "completion_length": 152.42857360839844, "epoch": 0.2960111966410077, "grad_norm": 1.291213035583496, "kl": 0.08143844455480576, "learning_rate": 4.3090877296482164e-07, "loss": 0.0001, "reward": 1.25, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.3571428656578064, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 423 }, { "completion_length": 153.5, "epoch": 0.2967109867039888, "grad_norm": 0.06176059693098068, "kl": 0.15666675567626953, "learning_rate": 4.3050008318803124e-07, "loss": 0.0002, "reward": 1.571428656578064, "reward_std": 0.0, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 424 }, { "completion_length": 188.1428680419922, "epoch": 0.2974107767669699, "grad_norm": 0.0032487784046679735, "kl": 0.0545581690967083, "learning_rate": 4.3009038327303093e-07, "loss": 0.0001, "reward": 1.2857143878936768, "reward_std": 0.0, "rewards/check_gptzero_func": 0.2857142984867096, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 425 }, { "completion_length": 167.2857208251953, "epoch": 0.298110566829951, "grad_norm": 0.010537766851484776, "kl": 0.08253009617328644, "learning_rate": 4.296796755126372e-07, "loss": 0.0001, "reward": 1.1428571939468384, "reward_std": 0.0, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 426 }, { "completion_length": 156.71429443359375, "epoch": 0.2988103568929321, "grad_norm": 0.6531897187232971, "kl": 0.09781377762556076, "learning_rate": 4.2926796220530654e-07, "loss": 0.0001, "reward": 1.571428656578064, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 427 }, { "completion_length": 129.71429443359375, "epoch": 0.2995101469559132, "grad_norm": 0.746117115020752, "kl": 0.10865745693445206, "learning_rate": 4.2885524565512286e-07, "loss": 0.0001, "reward": 1.7142858505249023, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 428 }, { "completion_length": 144.42857360839844, "epoch": 0.3002099370188943, "grad_norm": 0.34383299946784973, "kl": 0.0838535875082016, "learning_rate": 4.284415281717847e-07, "loss": 0.0001, "reward": 1.5000001192092896, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.5, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 429 }, { "completion_length": 195.6428680419922, "epoch": 0.3009097270818754, "grad_norm": 0.5134794116020203, "kl": 0.06191248819231987, "learning_rate": 4.280268120705919e-07, "loss": 0.0001, "reward": 1.4285714626312256, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.4285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 430 }, { "completion_length": 164.2857208251953, "epoch": 0.30160951714485656, "grad_norm": 0.9866816997528076, "kl": 0.060178689658641815, "learning_rate": 4.2761109967243316e-07, "loss": 0.0001, "reward": 1.4642857313156128, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.5, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 431 }, { "completion_length": 143.92857360839844, "epoch": 0.30230930720783766, "grad_norm": 1.7331068515777588, "kl": 0.10264008492231369, "learning_rate": 4.2719439330377254e-07, "loss": 0.0001, "reward": 1.5000001192092896, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 432 }, { "completion_length": 144.7857208251953, "epoch": 0.30300909727081876, "grad_norm": 0.0065994649194180965, "kl": 0.0880449116230011, "learning_rate": 4.2677669529663686e-07, "loss": 0.0001, "reward": 1.5000001192092896, "reward_std": 0.0, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 433 }, { "completion_length": 165.2857208251953, "epoch": 0.30370888733379986, "grad_norm": 0.007079127244651318, "kl": 0.08474861830472946, "learning_rate": 4.263580079886024e-07, "loss": 0.0001, "reward": 1.571428656578064, "reward_std": 0.0, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 434 }, { "completion_length": 167.92857360839844, "epoch": 0.30440867739678096, "grad_norm": 0.021053476259112358, "kl": 0.09454114735126495, "learning_rate": 4.25938333722782e-07, "loss": 0.0001, "reward": 1.4285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 0.4285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 435 }, { "completion_length": 145.07144165039062, "epoch": 0.30510846745976206, "grad_norm": 0.6647583246231079, "kl": 0.09926117956638336, "learning_rate": 4.2551767484781163e-07, "loss": 0.0001, "reward": 1.3571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.4285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 436 }, { "completion_length": 113.14286041259766, "epoch": 0.30580825752274315, "grad_norm": 0.9230692386627197, "kl": 0.13964051008224487, "learning_rate": 4.250960337178377e-07, "loss": 0.0001, "reward": 1.571428656578064, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 437 }, { "completion_length": 140.2857208251953, "epoch": 0.3065080475857243, "grad_norm": 0.007137620821595192, "kl": 0.08981500566005707, "learning_rate": 4.246734126925035e-07, "loss": 0.0001, "reward": 1.571428656578064, "reward_std": 0.0, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 438 }, { "completion_length": 125.00000762939453, "epoch": 0.3072078376487054, "grad_norm": 1.6210769414901733, "kl": 0.08563573658466339, "learning_rate": 4.242498141369361e-07, "loss": 0.0001, "reward": 1.2857143878936768, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.2857142984867096, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 439 }, { "completion_length": 165.2857208251953, "epoch": 0.3079076277116865, "grad_norm": 1.2910802364349365, "kl": 0.066175676882267, "learning_rate": 4.238252404217333e-07, "loss": 0.0001, "reward": 1.571428656578064, "reward_std": 0.4040610194206238, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 440 }, { "completion_length": 122.5714340209961, "epoch": 0.3086074177746676, "grad_norm": 1.1349067687988281, "kl": 0.0820070132613182, "learning_rate": 4.2339969392295017e-07, "loss": 0.0001, "reward": 1.5000001192092896, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.5, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 441 }, { "completion_length": 159.1428680419922, "epoch": 0.3093072078376487, "grad_norm": 0.6421765685081482, "kl": 0.06103938817977905, "learning_rate": 4.229731770220857e-07, "loss": 0.0001, "reward": 1.571428656578064, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 442 }, { "completion_length": 163.7857208251953, "epoch": 0.3100069979006298, "grad_norm": 0.006931392941623926, "kl": 0.0771172046661377, "learning_rate": 4.225456921060697e-07, "loss": 0.0001, "reward": 1.571428656578064, "reward_std": 0.0, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 443 }, { "completion_length": 108.35714721679688, "epoch": 0.3107067879636109, "grad_norm": 0.690844714641571, "kl": 0.09667393565177917, "learning_rate": 4.221172415672492e-07, "loss": 0.0001, "reward": 1.5000001192092896, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.5, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 444 }, { "completion_length": 150.35714721679688, "epoch": 0.311406578026592, "grad_norm": 1.3618197441101074, "kl": 0.0785374790430069, "learning_rate": 4.2168782780337525e-07, "loss": 0.0001, "reward": 1.6785714626312256, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 445 }, { "completion_length": 145.5, "epoch": 0.31210636808957315, "grad_norm": 0.13375450670719147, "kl": 0.21437384188175201, "learning_rate": 4.2125745321758945e-07, "loss": 0.0002, "reward": 1.4285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 0.4285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 446 }, { "completion_length": 143.0, "epoch": 0.31280615815255425, "grad_norm": 0.00989518128335476, "kl": 0.0699138417840004, "learning_rate": 4.2082612021841035e-07, "loss": 0.0001, "reward": 1.7142858505249023, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 447 }, { "completion_length": 128.0, "epoch": 0.31350594821553535, "grad_norm": 0.00431466568261385, "kl": 0.07851195335388184, "learning_rate": 4.203938312197203e-07, "loss": 0.0001, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 448 }, { "completion_length": 151.21429443359375, "epoch": 0.31420573827851644, "grad_norm": 0.6046939492225647, "kl": 0.07160710543394089, "learning_rate": 4.1996058864075144e-07, "loss": 0.0001, "reward": 1.6785714626312256, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 449 }, { "completion_length": 194.6428680419922, "epoch": 0.31490552834149754, "grad_norm": 0.005276657175272703, "kl": 0.04745636135339737, "learning_rate": 4.195263949060728e-07, "loss": 0.0, "reward": 1.571428656578064, "reward_std": 0.0, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 450 }, { "completion_length": 102.00000762939453, "epoch": 0.31560531840447864, "grad_norm": 2.09375262260437, "kl": 0.12946811318397522, "learning_rate": 4.190912524455762e-07, "loss": 0.0001, "reward": 1.4642857313156128, "reward_std": 0.3535533845424652, "rewards/check_gptzero_func": 0.5, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 451 }, { "completion_length": 178.00001525878906, "epoch": 0.31630510846745974, "grad_norm": 0.8566595911979675, "kl": 0.044918280094861984, "learning_rate": 4.1865516369446277e-07, "loss": 0.0, "reward": 1.571428656578064, "reward_std": 0.4040610194206238, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 452 }, { "completion_length": 137.2857208251953, "epoch": 0.3170048985304409, "grad_norm": 1.2368760108947754, "kl": 0.11362018436193466, "learning_rate": 4.182181310932297e-07, "loss": 0.0001, "reward": 1.6071429252624512, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 453 }, { "completion_length": 108.85714721679688, "epoch": 0.317704688593422, "grad_norm": 1.150861144065857, "kl": 0.15267497301101685, "learning_rate": 4.17780157087656e-07, "loss": 0.0002, "reward": 1.2857143878936768, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.2857142984867096, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 454 }, { "completion_length": 166.6428680419922, "epoch": 0.3184044786564031, "grad_norm": 1.4333491325378418, "kl": 0.08742610365152359, "learning_rate": 4.1734124412878905e-07, "loss": 0.0001, "reward": 1.25, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.2857142984867096, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 455 }, { "completion_length": 177.42857360839844, "epoch": 0.3191042687193842, "grad_norm": 0.3596213459968567, "kl": 0.05293763428926468, "learning_rate": 4.1690139467293116e-07, "loss": 0.0001, "reward": 1.3571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.3571428656578064, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 456 }, { "completion_length": 199.2857208251953, "epoch": 0.3198040587823653, "grad_norm": 0.00455293245613575, "kl": 0.04556581750512123, "learning_rate": 4.164606111816255e-07, "loss": 0.0, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 457 }, { "completion_length": 143.1428680419922, "epoch": 0.3205038488453464, "grad_norm": 0.007389579433947802, "kl": 0.06875111907720566, "learning_rate": 4.160188961216423e-07, "loss": 0.0001, "reward": 1.571428656578064, "reward_std": 0.0, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 458 }, { "completion_length": 152.7857208251953, "epoch": 0.3212036389083275, "grad_norm": 0.005194150377064943, "kl": 0.05345143750309944, "learning_rate": 4.1557625196496537e-07, "loss": 0.0001, "reward": 1.571428656578064, "reward_std": 0.0, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 459 }, { "completion_length": 133.92857360839844, "epoch": 0.3219034289713086, "grad_norm": 0.754564642906189, "kl": 0.15649597346782684, "learning_rate": 4.151326811887778e-07, "loss": 0.0002, "reward": 1.4285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.5, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 460 }, { "completion_length": 133.2857208251953, "epoch": 0.32260321903428973, "grad_norm": 0.7599014639854431, "kl": 0.07825169712305069, "learning_rate": 4.1468818627544844e-07, "loss": 0.0001, "reward": 1.5000001192092896, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.5, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 461 }, { "completion_length": 166.07144165039062, "epoch": 0.32330300909727083, "grad_norm": 0.8863713145256042, "kl": 0.07141906023025513, "learning_rate": 4.1424276971251805e-07, "loss": 0.0001, "reward": 1.7500001192092896, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 462 }, { "completion_length": 160.0, "epoch": 0.32400279916025193, "grad_norm": 0.8611199259757996, "kl": 0.12963101267814636, "learning_rate": 4.137964339926851e-07, "loss": 0.0001, "reward": 1.4285714626312256, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.4285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 463 }, { "completion_length": 141.21429443359375, "epoch": 0.32470258922323303, "grad_norm": 1.1024808883666992, "kl": 0.07295498251914978, "learning_rate": 4.13349181613792e-07, "loss": 0.0001, "reward": 1.4285714626312256, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.4285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 464 }, { "completion_length": 192.00001525878906, "epoch": 0.3254023792862141, "grad_norm": 0.8522688150405884, "kl": 0.1319868564605713, "learning_rate": 4.1290101507881115e-07, "loss": 0.0001, "reward": 1.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 465 }, { "completion_length": 163.35714721679688, "epoch": 0.3261021693491952, "grad_norm": 0.9617162346839905, "kl": 0.06527312099933624, "learning_rate": 4.124519368958308e-07, "loss": 0.0001, "reward": 1.571428656578064, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 466 }, { "completion_length": 163.07144165039062, "epoch": 0.3268019594121763, "grad_norm": 0.5957383513450623, "kl": 0.06949684768915176, "learning_rate": 4.1200194957804113e-07, "loss": 0.0001, "reward": 1.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 467 }, { "completion_length": 132.0, "epoch": 0.3275017494751575, "grad_norm": 0.005081801675260067, "kl": 0.09255293756723404, "learning_rate": 4.1155105564372007e-07, "loss": 0.0001, "reward": 1.571428656578064, "reward_std": 0.0, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 468 }, { "completion_length": 185.35714721679688, "epoch": 0.3282015395381386, "grad_norm": 0.003354251617565751, "kl": 0.04808809235692024, "learning_rate": 4.110992576162192e-07, "loss": 0.0, "reward": 1.7142858505249023, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 469 }, { "completion_length": 160.57144165039062, "epoch": 0.3289013296011197, "grad_norm": 0.023495681583881378, "kl": 0.09319735318422318, "learning_rate": 4.106465580239501e-07, "loss": 0.0001, "reward": 1.7142858505249023, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 470 }, { "completion_length": 195.71429443359375, "epoch": 0.3296011196641008, "grad_norm": 1.0360186100006104, "kl": 0.055726487189531326, "learning_rate": 4.101929594003694e-07, "loss": 0.0001, "reward": 1.4642857313156128, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.5, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 471 }, { "completion_length": 150.1428680419922, "epoch": 0.33030090972708187, "grad_norm": 1.3954473733901978, "kl": 0.10367843508720398, "learning_rate": 4.09738464283965e-07, "loss": 0.0001, "reward": 1.6785714626312256, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 472 }, { "completion_length": 212.21429443359375, "epoch": 0.33100069979006297, "grad_norm": 0.6780316829681396, "kl": 0.062210340052843094, "learning_rate": 4.092830752182422e-07, "loss": 0.0001, "reward": 1.3571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.3571428656578064, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 473 }, { "completion_length": 129.5, "epoch": 0.33170048985304407, "grad_norm": 0.009136566892266273, "kl": 0.09670405834913254, "learning_rate": 4.0882679475170877e-07, "loss": 0.0001, "reward": 1.6428571939468384, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 474 }, { "completion_length": 118.28572082519531, "epoch": 0.33240027991602517, "grad_norm": 1.3348325490951538, "kl": 0.09543941915035248, "learning_rate": 4.0836962543786147e-07, "loss": 0.0001, "reward": 1.7142858505249023, "reward_std": 0.4040610194206238, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 475 }, { "completion_length": 191.6428680419922, "epoch": 0.3331000699790063, "grad_norm": 1.005230188369751, "kl": 0.06057954207062721, "learning_rate": 4.079115698351708e-07, "loss": 0.0001, "reward": 1.7500001192092896, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 476 }, { "completion_length": 218.7857208251953, "epoch": 0.3337998600419874, "grad_norm": 1.2993587255477905, "kl": 0.06476587802171707, "learning_rate": 4.074526305070678e-07, "loss": 0.0001, "reward": 1.5357143878936768, "reward_std": 0.45456868410110474, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 477 }, { "completion_length": 152.07144165039062, "epoch": 0.3344996501049685, "grad_norm": 0.6152059435844421, "kl": 0.0697333887219429, "learning_rate": 4.069928100219287e-07, "loss": 0.0001, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 478 }, { "completion_length": 125.71429443359375, "epoch": 0.3351994401679496, "grad_norm": 1.8699591159820557, "kl": 0.11256245523691177, "learning_rate": 4.0653211095306115e-07, "loss": 0.0001, "reward": 1.6785714626312256, "reward_std": 0.3535533845424652, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 479 }, { "completion_length": 197.2857208251953, "epoch": 0.3358992302309307, "grad_norm": 0.7929631471633911, "kl": 0.07499907165765762, "learning_rate": 4.0607053587868966e-07, "loss": 0.0001, "reward": 1.5000001192092896, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.5, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 480 }, { "completion_length": 170.07144165039062, "epoch": 0.3365990202939118, "grad_norm": 0.8650100231170654, "kl": 0.07369869202375412, "learning_rate": 4.0560808738194107e-07, "loss": 0.0001, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 481 }, { "completion_length": 125.92857360839844, "epoch": 0.3372988103568929, "grad_norm": 0.007025310769677162, "kl": 0.08110116422176361, "learning_rate": 4.051447680508303e-07, "loss": 0.0001, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 482 }, { "completion_length": 111.71429443359375, "epoch": 0.33799860041987406, "grad_norm": 1.534409523010254, "kl": 0.09726615995168686, "learning_rate": 4.046805804782456e-07, "loss": 0.0001, "reward": 1.2857143878936768, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.3571428656578064, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 483 }, { "completion_length": 157.0, "epoch": 0.33869839048285516, "grad_norm": 0.005034757778048515, "kl": 0.05555884167551994, "learning_rate": 4.0421552726193443e-07, "loss": 0.0001, "reward": 1.5000001192092896, "reward_std": 0.0, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 484 }, { "completion_length": 181.21429443359375, "epoch": 0.33939818054583626, "grad_norm": 1.4651308059692383, "kl": 0.0934150442481041, "learning_rate": 4.037496110044884e-07, "loss": 0.0001, "reward": 1.5357143878936768, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 485 }, { "completion_length": 159.0, "epoch": 0.34009797060881736, "grad_norm": 0.9954543113708496, "kl": 0.05247289314866066, "learning_rate": 4.032828343133291e-07, "loss": 0.0001, "reward": 1.3571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.3571428656578064, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 486 }, { "completion_length": 144.42857360839844, "epoch": 0.34079776067179846, "grad_norm": 0.9763630032539368, "kl": 0.09167619049549103, "learning_rate": 4.028151998006933e-07, "loss": 0.0001, "reward": 1.6428571939468384, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 487 }, { "completion_length": 150.1428680419922, "epoch": 0.34149755073477955, "grad_norm": 1.14859139919281, "kl": 0.09793388098478317, "learning_rate": 4.0234671008361875e-07, "loss": 0.0001, "reward": 1.1428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.5, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.1428571492433548, "step": 488 }, { "completion_length": 160.92857360839844, "epoch": 0.34219734079776065, "grad_norm": 0.008364005014300346, "kl": 0.06319764256477356, "learning_rate": 4.018773677839288e-07, "loss": 0.0001, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 489 }, { "completion_length": 180.07144165039062, "epoch": 0.34289713086074175, "grad_norm": 1.2543460130691528, "kl": 0.10423704236745834, "learning_rate": 4.0140717552821844e-07, "loss": 0.0001, "reward": 1.3571429252624512, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.3571428656578064, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 490 }, { "completion_length": 164.85714721679688, "epoch": 0.3435969209237229, "grad_norm": 0.6747280359268188, "kl": 0.09582118690013885, "learning_rate": 4.0093613594783897e-07, "loss": 0.0001, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 491 }, { "completion_length": 143.6428680419922, "epoch": 0.344296710986704, "grad_norm": 0.004161064513027668, "kl": 0.06550713628530502, "learning_rate": 4.00464251678884e-07, "loss": 0.0001, "reward": 1.7142858505249023, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 492 }, { "completion_length": 189.50001525878906, "epoch": 0.3449965010496851, "grad_norm": 0.006753156892955303, "kl": 0.06552206724882126, "learning_rate": 3.9999152536217387e-07, "loss": 0.0001, "reward": 1.2857143878936768, "reward_std": 0.0, "rewards/check_gptzero_func": 0.2857142984867096, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 493 }, { "completion_length": 135.71429443359375, "epoch": 0.3456962911126662, "grad_norm": 0.5121190547943115, "kl": 0.06051056459546089, "learning_rate": 3.9951795964324177e-07, "loss": 0.0001, "reward": 1.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 494 }, { "completion_length": 138.07144165039062, "epoch": 0.3463960811756473, "grad_norm": 0.8373842835426331, "kl": 0.07995127141475677, "learning_rate": 3.99043557172318e-07, "loss": 0.0001, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 495 }, { "completion_length": 202.92857360839844, "epoch": 0.3470958712386284, "grad_norm": 0.8826627135276794, "kl": 0.07032157480716705, "learning_rate": 3.9856832060431586e-07, "loss": 0.0001, "reward": 1.5000001192092896, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.5, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 496 }, { "completion_length": 162.42857360839844, "epoch": 0.3477956613016095, "grad_norm": 0.4127843677997589, "kl": 0.10454478114843369, "learning_rate": 3.980922525988166e-07, "loss": 0.0001, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 497 }, { "completion_length": 150.1428680419922, "epoch": 0.34849545136459065, "grad_norm": 1.735077977180481, "kl": 0.10004504024982452, "learning_rate": 3.9761535582005423e-07, "loss": 0.0001, "reward": 1.321428656578064, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.4285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 498 }, { "completion_length": 149.35714721679688, "epoch": 0.34919524142757175, "grad_norm": 1.9590307474136353, "kl": 0.06596639007329941, "learning_rate": 3.9713763293690103e-07, "loss": 0.0001, "reward": 1.821428656578064, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 499 }, { "completion_length": 168.1428680419922, "epoch": 0.34989503149055284, "grad_norm": 1.548545002937317, "kl": 0.07926857471466064, "learning_rate": 3.966590866228523e-07, "loss": 0.0001, "reward": 1.571428656578064, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 500 }, { "completion_length": 219.4285888671875, "epoch": 0.35059482155353394, "grad_norm": 1.271079421043396, "kl": 0.06384127587080002, "learning_rate": 3.961797195560118e-07, "loss": 0.0001, "reward": 1.5000001192092896, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.5, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 501 }, { "completion_length": 189.71429443359375, "epoch": 0.35129461161651504, "grad_norm": 0.7470355033874512, "kl": 0.06039028614759445, "learning_rate": 3.956995344190761e-07, "loss": 0.0001, "reward": 1.3571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.3571428656578064, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 502 }, { "completion_length": 247.6428680419922, "epoch": 0.35199440167949614, "grad_norm": 0.4178660809993744, "kl": 0.05700720101594925, "learning_rate": 3.9521853389932017e-07, "loss": 0.0001, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 503 }, { "completion_length": 189.21429443359375, "epoch": 0.35269419174247724, "grad_norm": 0.7499402761459351, "kl": 0.07649481296539307, "learning_rate": 3.9473672068858227e-07, "loss": 0.0001, "reward": 1.571428656578064, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 504 }, { "completion_length": 170.35714721679688, "epoch": 0.35339398180545833, "grad_norm": 1.9082164764404297, "kl": 0.08220606297254562, "learning_rate": 3.942540974832485e-07, "loss": 0.0001, "reward": 1.6428571939468384, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 505 }, { "completion_length": 122.21429443359375, "epoch": 0.3540937718684395, "grad_norm": 0.008437473326921463, "kl": 0.09470897167921066, "learning_rate": 3.937706669842381e-07, "loss": 0.0001, "reward": 1.571428656578064, "reward_std": 0.0, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 506 }, { "completion_length": 162.1428680419922, "epoch": 0.3547935619314206, "grad_norm": 0.0049139354377985, "kl": 0.07826903462409973, "learning_rate": 3.9328643189698816e-07, "loss": 0.0001, "reward": 1.4285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 0.4285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 507 }, { "completion_length": 165.7857208251953, "epoch": 0.3554933519944017, "grad_norm": 0.4739343225955963, "kl": 0.048730507493019104, "learning_rate": 3.928013949314385e-07, "loss": 0.0, "reward": 1.2142857313156128, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 508 }, { "completion_length": 189.50001525878906, "epoch": 0.3561931420573828, "grad_norm": 0.006271854974329472, "kl": 0.08321522176265717, "learning_rate": 3.923155588020165e-07, "loss": 0.0001, "reward": 1.4285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 0.4285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 509 }, { "completion_length": 193.92857360839844, "epoch": 0.3568929321203639, "grad_norm": 0.9291309714317322, "kl": 0.05537025257945061, "learning_rate": 3.9182892622762195e-07, "loss": 0.0001, "reward": 1.2857143878936768, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.2857142984867096, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 510 }, { "completion_length": 158.5, "epoch": 0.357592722183345, "grad_norm": 1.744038462638855, "kl": 0.08976755291223526, "learning_rate": 3.9134149993161183e-07, "loss": 0.0001, "reward": 1.5357143878936768, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 511 }, { "completion_length": 128.7857208251953, "epoch": 0.3582925122463261, "grad_norm": 1.1412746906280518, "kl": 0.108291395008564, "learning_rate": 3.90853282641785e-07, "loss": 0.0001, "reward": 1.571428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 512 }, { "completion_length": 175.6428680419922, "epoch": 0.35899230230930723, "grad_norm": 1.1098885536193848, "kl": 0.06192021071910858, "learning_rate": 3.90364277090367e-07, "loss": 0.0001, "reward": 1.5000001192092896, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 513 }, { "completion_length": 152.5, "epoch": 0.35969209237228833, "grad_norm": 1.4871734380722046, "kl": 0.1344575732946396, "learning_rate": 3.898744860139949e-07, "loss": 0.0001, "reward": 1.25, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.2857142984867096, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 514 }, { "completion_length": 125.00000762939453, "epoch": 0.36039188243526943, "grad_norm": 1.4426348209381104, "kl": 0.15224027633666992, "learning_rate": 3.893839121537015e-07, "loss": 0.0002, "reward": 1.571428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 515 }, { "completion_length": 142.07144165039062, "epoch": 0.3610916724982505, "grad_norm": 2.1289639472961426, "kl": 0.10609105229377747, "learning_rate": 3.8889255825490053e-07, "loss": 0.0001, "reward": 1.6071429252624512, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 516 }, { "completion_length": 150.0, "epoch": 0.3617914625612316, "grad_norm": 0.49952128529548645, "kl": 0.09887497872114182, "learning_rate": 3.8840042706737107e-07, "loss": 0.0001, "reward": 1.5000001192092896, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 517 }, { "completion_length": 154.92857360839844, "epoch": 0.3624912526242127, "grad_norm": 0.009474799036979675, "kl": 0.08007441461086273, "learning_rate": 3.879075213452422e-07, "loss": 0.0001, "reward": 1.7142858505249023, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 518 }, { "completion_length": 169.85714721679688, "epoch": 0.3631910426871938, "grad_norm": 1.1438652276992798, "kl": 0.09981156140565872, "learning_rate": 3.8741384384697743e-07, "loss": 0.0001, "reward": 1.6071429252624512, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 519 }, { "completion_length": 188.92857360839844, "epoch": 0.363890832750175, "grad_norm": 0.003731790464371443, "kl": 0.05421052500605583, "learning_rate": 3.8691939733535946e-07, "loss": 0.0001, "reward": 1.571428656578064, "reward_std": 0.0, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 520 }, { "completion_length": 172.07144165039062, "epoch": 0.3645906228131561, "grad_norm": 1.772125482559204, "kl": 0.12312778830528259, "learning_rate": 3.8642418457747453e-07, "loss": 0.0001, "reward": 1.6071429252624512, "reward_std": 0.3535534143447876, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 521 }, { "completion_length": 148.21429443359375, "epoch": 0.36529041287613717, "grad_norm": 0.015186883509159088, "kl": 0.10217848420143127, "learning_rate": 3.859282083446973e-07, "loss": 0.0001, "reward": 1.571428656578064, "reward_std": 0.0, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 522 }, { "completion_length": 157.85714721679688, "epoch": 0.36599020293911827, "grad_norm": 1.3954017162322998, "kl": 0.08463451266288757, "learning_rate": 3.8543147141267484e-07, "loss": 0.0001, "reward": 1.3928571939468384, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.4285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 523 }, { "completion_length": 135.0, "epoch": 0.36668999300209937, "grad_norm": 0.012148164212703705, "kl": 0.10243987292051315, "learning_rate": 3.8493397656131144e-07, "loss": 0.0001, "reward": 1.2142857313156128, "reward_std": 0.0, "rewards/check_gptzero_func": 0.2857142984867096, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 524 }, { "completion_length": 185.85714721679688, "epoch": 0.36738978306508047, "grad_norm": 0.976691484451294, "kl": 0.11155111342668533, "learning_rate": 3.8443572657475296e-07, "loss": 0.0001, "reward": 1.571428656578064, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 525 }, { "completion_length": 129.7857208251953, "epoch": 0.36808957312806156, "grad_norm": 1.2827129364013672, "kl": 0.14129728078842163, "learning_rate": 3.8393672424137137e-07, "loss": 0.0001, "reward": 1.571428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 526 }, { "completion_length": 138.5, "epoch": 0.36878936319104266, "grad_norm": 0.004477119538933039, "kl": 0.07689778506755829, "learning_rate": 3.8343697235374867e-07, "loss": 0.0001, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 527 }, { "completion_length": 159.1428680419922, "epoch": 0.3694891532540238, "grad_norm": 1.0313470363616943, "kl": 0.09582433104515076, "learning_rate": 3.82936473708662e-07, "loss": 0.0001, "reward": 1.571428656578064, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 528 }, { "completion_length": 170.5, "epoch": 0.3701889433170049, "grad_norm": 0.5641962289810181, "kl": 0.07037567347288132, "learning_rate": 3.8243523110706733e-07, "loss": 0.0001, "reward": 1.6428571939468384, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 529 }, { "completion_length": 132.7857208251953, "epoch": 0.370888733379986, "grad_norm": 1.5660831928253174, "kl": 0.08661636710166931, "learning_rate": 3.819332473540843e-07, "loss": 0.0001, "reward": 1.7500001192092896, "reward_std": 0.3535533845424652, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 530 }, { "completion_length": 203.2857208251953, "epoch": 0.3715885234429671, "grad_norm": 0.45002859830856323, "kl": 0.08430561423301697, "learning_rate": 3.8143052525898005e-07, "loss": 0.0001, "reward": 1.7142858505249023, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 531 }, { "completion_length": 155.2857208251953, "epoch": 0.3722883135059482, "grad_norm": 0.42886802554130554, "kl": 0.06451994180679321, "learning_rate": 3.809270676351538e-07, "loss": 0.0001, "reward": 1.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 532 }, { "completion_length": 165.6428680419922, "epoch": 0.3729881035689293, "grad_norm": 1.1764765977859497, "kl": 0.07099022716283798, "learning_rate": 3.804228773001211e-07, "loss": 0.0001, "reward": 1.6428571939468384, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 533 }, { "completion_length": 175.42857360839844, "epoch": 0.3736878936319104, "grad_norm": 0.99241703748703, "kl": 0.06925707310438156, "learning_rate": 3.79917957075498e-07, "loss": 0.0001, "reward": 1.571428656578064, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 534 }, { "completion_length": 177.07144165039062, "epoch": 0.37438768369489156, "grad_norm": 0.4666481614112854, "kl": 0.07404585182666779, "learning_rate": 3.7941230978698513e-07, "loss": 0.0001, "reward": 1.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 535 }, { "completion_length": 209.57144165039062, "epoch": 0.37508747375787266, "grad_norm": 0.003499638522043824, "kl": 0.05574522912502289, "learning_rate": 3.78905938264352e-07, "loss": 0.0001, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 536 }, { "completion_length": 155.85714721679688, "epoch": 0.37578726382085376, "grad_norm": 0.6798691749572754, "kl": 0.10059011727571487, "learning_rate": 3.783988453414215e-07, "loss": 0.0001, "reward": 1.571428656578064, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 537 }, { "completion_length": 212.42857360839844, "epoch": 0.37648705388383485, "grad_norm": 0.34626370668411255, "kl": 0.09395918250083923, "learning_rate": 3.778910338560533e-07, "loss": 0.0001, "reward": 1.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 538 }, { "completion_length": 189.1428680419922, "epoch": 0.37718684394681595, "grad_norm": 0.32203903794288635, "kl": 0.044019315391778946, "learning_rate": 3.773825066501285e-07, "loss": 0.0, "reward": 1.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 539 }, { "completion_length": 138.42857360839844, "epoch": 0.37788663400979705, "grad_norm": 0.009647911414504051, "kl": 0.09944557398557663, "learning_rate": 3.768732665695338e-07, "loss": 0.0001, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 540 }, { "completion_length": 141.42857360839844, "epoch": 0.37858642407277815, "grad_norm": 0.745865523815155, "kl": 0.08834861218929291, "learning_rate": 3.7636331646414523e-07, "loss": 0.0001, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 541 }, { "completion_length": 118.64286041259766, "epoch": 0.37928621413575925, "grad_norm": 0.012071012519299984, "kl": 0.11386111378669739, "learning_rate": 3.758526591878123e-07, "loss": 0.0001, "reward": 1.571428656578064, "reward_std": 0.0, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 542 }, { "completion_length": 193.07144165039062, "epoch": 0.3799860041987404, "grad_norm": 1.077238917350769, "kl": 0.04925287887454033, "learning_rate": 3.753412975983421e-07, "loss": 0.0, "reward": 1.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 543 }, { "completion_length": 158.92857360839844, "epoch": 0.3806857942617215, "grad_norm": 0.9441022872924805, "kl": 0.08041739463806152, "learning_rate": 3.7482923455748347e-07, "loss": 0.0001, "reward": 1.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 544 }, { "completion_length": 196.92857360839844, "epoch": 0.3813855843247026, "grad_norm": 0.5632001757621765, "kl": 0.08155044913291931, "learning_rate": 3.7431647293091075e-07, "loss": 0.0001, "reward": 1.6785714626312256, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 545 }, { "completion_length": 218.50001525878906, "epoch": 0.3820853743876837, "grad_norm": 0.5315311551094055, "kl": 0.06944762915372849, "learning_rate": 3.738030155882074e-07, "loss": 0.0001, "reward": 1.571428656578064, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 546 }, { "completion_length": 154.1428680419922, "epoch": 0.3827851644506648, "grad_norm": 0.8924309611320496, "kl": 0.06932975351810455, "learning_rate": 3.73288865402851e-07, "loss": 0.0001, "reward": 1.3571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.4285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 547 }, { "completion_length": 185.92857360839844, "epoch": 0.3834849545136459, "grad_norm": 0.01033222209662199, "kl": 0.08086119592189789, "learning_rate": 3.7277402525219597e-07, "loss": 0.0001, "reward": 1.571428656578064, "reward_std": 0.0, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 548 }, { "completion_length": 158.0, "epoch": 0.384184744576627, "grad_norm": 0.006359913852065802, "kl": 0.08321193605661392, "learning_rate": 3.722584980174583e-07, "loss": 0.0001, "reward": 1.4285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 0.4285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 549 }, { "completion_length": 189.85714721679688, "epoch": 0.38488453463960814, "grad_norm": 0.8907008767127991, "kl": 0.07143823802471161, "learning_rate": 3.71742286583699e-07, "loss": 0.0001, "reward": 1.7142858505249023, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 550 }, { "completion_length": 203.85714721679688, "epoch": 0.38558432470258924, "grad_norm": 0.9854149222373962, "kl": 0.05446535348892212, "learning_rate": 3.712253938398081e-07, "loss": 0.0001, "reward": 1.5000001192092896, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 551 }, { "completion_length": 180.71429443359375, "epoch": 0.38628411476557034, "grad_norm": 0.011824723333120346, "kl": 0.09022179991006851, "learning_rate": 3.707078226784885e-07, "loss": 0.0001, "reward": 1.5000001192092896, "reward_std": 0.0, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 552 }, { "completion_length": 173.00001525878906, "epoch": 0.38698390482855144, "grad_norm": 0.5999806523323059, "kl": 0.07996483892202377, "learning_rate": 3.7018957599623966e-07, "loss": 0.0001, "reward": 1.571428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 553 }, { "completion_length": 126.14286041259766, "epoch": 0.38768369489153254, "grad_norm": 0.8349685668945312, "kl": 0.06610356271266937, "learning_rate": 3.696706566933416e-07, "loss": 0.0001, "reward": 1.3928571939468384, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.4285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 554 }, { "completion_length": 152.71429443359375, "epoch": 0.38838348495451364, "grad_norm": 0.003661968046799302, "kl": 0.06520053744316101, "learning_rate": 3.691510676738387e-07, "loss": 0.0001, "reward": 1.571428656578064, "reward_std": 0.0, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 555 }, { "completion_length": 126.35714721679688, "epoch": 0.38908327501749473, "grad_norm": 1.1900231838226318, "kl": 0.1030726209282875, "learning_rate": 3.6863081184552276e-07, "loss": 0.0001, "reward": 1.821428656578064, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 556 }, { "completion_length": 131.5, "epoch": 0.38978306508047583, "grad_norm": 1.0486801862716675, "kl": 0.07672800868749619, "learning_rate": 3.681098921199177e-07, "loss": 0.0001, "reward": 1.7142858505249023, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 557 }, { "completion_length": 185.1428680419922, "epoch": 0.390482855143457, "grad_norm": 1.160973310470581, "kl": 0.0711272731423378, "learning_rate": 3.675883114122629e-07, "loss": 0.0001, "reward": 1.6071429252624512, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 558 }, { "completion_length": 171.42857360839844, "epoch": 0.3911826452064381, "grad_norm": 0.6586304306983948, "kl": 0.06277971714735031, "learning_rate": 3.6706607264149644e-07, "loss": 0.0001, "reward": 1.7142858505249023, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 559 }, { "completion_length": 183.85714721679688, "epoch": 0.3918824352694192, "grad_norm": 0.6972989439964294, "kl": 0.05722213536500931, "learning_rate": 3.665431787302393e-07, "loss": 0.0001, "reward": 1.6428571939468384, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 560 }, { "completion_length": 192.07144165039062, "epoch": 0.3925822253324003, "grad_norm": 0.6393983960151672, "kl": 0.10029980540275574, "learning_rate": 3.660196326047792e-07, "loss": 0.0001, "reward": 1.571428656578064, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 561 }, { "completion_length": 163.5, "epoch": 0.3932820153953814, "grad_norm": 0.7198165655136108, "kl": 0.09041617810726166, "learning_rate": 3.654954371950532e-07, "loss": 0.0001, "reward": 1.7500001192092896, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 562 }, { "completion_length": 183.57144165039062, "epoch": 0.3939818054583625, "grad_norm": 0.29942193627357483, "kl": 0.05989314243197441, "learning_rate": 3.649705954346324e-07, "loss": 0.0001, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 563 }, { "completion_length": 228.6428680419922, "epoch": 0.3946815955213436, "grad_norm": 0.6346117854118347, "kl": 0.04510258883237839, "learning_rate": 3.6444511026070515e-07, "loss": 0.0, "reward": 1.5357143878936768, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3214285969734192, "step": 564 }, { "completion_length": 153.42857360839844, "epoch": 0.39538138558432473, "grad_norm": 0.019877756014466286, "kl": 0.1297934353351593, "learning_rate": 3.639189846140604e-07, "loss": 0.0001, "reward": 1.571428656578064, "reward_std": 0.0, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 565 }, { "completion_length": 135.21429443359375, "epoch": 0.3960811756473058, "grad_norm": 1.1314646005630493, "kl": 0.09674588590860367, "learning_rate": 3.633922214390712e-07, "loss": 0.0001, "reward": 1.5357143878936768, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 566 }, { "completion_length": 167.5, "epoch": 0.3967809657102869, "grad_norm": 1.6090608835220337, "kl": 0.08364291489124298, "learning_rate": 3.6286482368367887e-07, "loss": 0.0001, "reward": 1.6428571939468384, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 567 }, { "completion_length": 196.1428680419922, "epoch": 0.397480755773268, "grad_norm": 0.8878325819969177, "kl": 0.05023238807916641, "learning_rate": 3.623367942993757e-07, "loss": 0.0001, "reward": 1.571428656578064, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 568 }, { "completion_length": 125.00000762939453, "epoch": 0.3981805458362491, "grad_norm": 1.8332659006118774, "kl": 0.10462432354688644, "learning_rate": 3.618081362411889e-07, "loss": 0.0001, "reward": 1.5357143878936768, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 569 }, { "completion_length": 146.71429443359375, "epoch": 0.3988803358992302, "grad_norm": 0.8163487911224365, "kl": 0.0895436480641365, "learning_rate": 3.612788524676639e-07, "loss": 0.0001, "reward": 1.5357143878936768, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 570 }, { "completion_length": 123.28572082519531, "epoch": 0.3995801259622113, "grad_norm": 1.6905393600463867, "kl": 0.11094065010547638, "learning_rate": 3.60748945940848e-07, "loss": 0.0001, "reward": 1.571428656578064, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 571 }, { "completion_length": 188.50001525878906, "epoch": 0.4002799160251924, "grad_norm": 1.1494596004486084, "kl": 0.06669045984745026, "learning_rate": 3.602184196262735e-07, "loss": 0.0001, "reward": 1.6428571939468384, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 572 }, { "completion_length": 178.2857208251953, "epoch": 0.40097970608817357, "grad_norm": 1.0010849237442017, "kl": 0.08969301730394363, "learning_rate": 3.596872764929413e-07, "loss": 0.0001, "reward": 1.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 573 }, { "completion_length": 171.35714721679688, "epoch": 0.40167949615115467, "grad_norm": 0.0049744355492293835, "kl": 0.08287398517131805, "learning_rate": 3.5915551951330397e-07, "loss": 0.0001, "reward": 1.4285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 0.4285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 574 }, { "completion_length": 253.50001525878906, "epoch": 0.40237928621413577, "grad_norm": 0.0023723295889794827, "kl": 0.0318870022892952, "learning_rate": 3.586231516632498e-07, "loss": 0.0, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 575 }, { "completion_length": 122.71429443359375, "epoch": 0.40307907627711687, "grad_norm": 0.0052050878293812275, "kl": 0.09065335243940353, "learning_rate": 3.5809017592208536e-07, "loss": 0.0001, "reward": 1.4285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 0.4285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 576 }, { "completion_length": 222.7857208251953, "epoch": 0.40377886634009796, "grad_norm": 0.44019705057144165, "kl": 0.08172738552093506, "learning_rate": 3.5755659527251926e-07, "loss": 0.0001, "reward": 1.7142858505249023, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 577 }, { "completion_length": 160.5, "epoch": 0.40447865640307906, "grad_norm": 1.277113914489746, "kl": 0.11303502321243286, "learning_rate": 3.570224127006456e-07, "loss": 0.0001, "reward": 1.6785714626312256, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 578 }, { "completion_length": 180.92857360839844, "epoch": 0.40517844646606016, "grad_norm": 1.0300122499465942, "kl": 0.07609526067972183, "learning_rate": 3.5648763119592684e-07, "loss": 0.0001, "reward": 1.6428571939468384, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 579 }, { "completion_length": 171.2857208251953, "epoch": 0.4058782365290413, "grad_norm": 1.2583365440368652, "kl": 0.07199601083993912, "learning_rate": 3.559522537511771e-07, "loss": 0.0001, "reward": 1.6071429252624512, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 580 }, { "completion_length": 167.71429443359375, "epoch": 0.4065780265920224, "grad_norm": 0.5407224893569946, "kl": 0.08278383314609528, "learning_rate": 3.55416283362546e-07, "loss": 0.0001, "reward": 1.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 581 }, { "completion_length": 218.7857208251953, "epoch": 0.4072778166550035, "grad_norm": 0.6477207541465759, "kl": 0.06366579234600067, "learning_rate": 3.548797230295011e-07, "loss": 0.0001, "reward": 1.7142858505249023, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 582 }, { "completion_length": 179.07144165039062, "epoch": 0.4079776067179846, "grad_norm": 1.5613583326339722, "kl": 0.07433398813009262, "learning_rate": 3.5434257575481166e-07, "loss": 0.0001, "reward": 1.571428656578064, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 583 }, { "completion_length": 185.1428680419922, "epoch": 0.4086773967809657, "grad_norm": 0.9865217208862305, "kl": 0.11372844874858856, "learning_rate": 3.5380484454453154e-07, "loss": 0.0001, "reward": 1.4285714626312256, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.4285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 584 }, { "completion_length": 183.71429443359375, "epoch": 0.4093771868439468, "grad_norm": 0.49564844369888306, "kl": 0.1376696676015854, "learning_rate": 3.5326653240798274e-07, "loss": 0.0001, "reward": 1.5000001192092896, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 585 }, { "completion_length": 253.35714721679688, "epoch": 0.4100769769069279, "grad_norm": 0.6566737294197083, "kl": 0.027259845286607742, "learning_rate": 3.52727642357738e-07, "loss": 0.0, "reward": 1.5357143878936768, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 586 }, { "completion_length": 142.85714721679688, "epoch": 0.410776766969909, "grad_norm": 0.3752553462982178, "kl": 0.07509765028953552, "learning_rate": 3.5218817740960447e-07, "loss": 0.0001, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 587 }, { "completion_length": 169.5, "epoch": 0.41147655703289016, "grad_norm": 0.5774813294410706, "kl": 0.07071255147457123, "learning_rate": 3.516481405826066e-07, "loss": 0.0001, "reward": 1.7142858505249023, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 588 }, { "completion_length": 148.7857208251953, "epoch": 0.41217634709587125, "grad_norm": 1.2628300189971924, "kl": 0.06514041870832443, "learning_rate": 3.511075348989692e-07, "loss": 0.0001, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 589 }, { "completion_length": 171.71429443359375, "epoch": 0.41287613715885235, "grad_norm": 1.2177170515060425, "kl": 0.08499522507190704, "learning_rate": 3.505663633841006e-07, "loss": 0.0001, "reward": 1.6785714626312256, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 590 }, { "completion_length": 164.71429443359375, "epoch": 0.41357592722183345, "grad_norm": 1.2109373807907104, "kl": 0.07347279042005539, "learning_rate": 3.5002462906657575e-07, "loss": 0.0001, "reward": 1.7857143878936768, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 591 }, { "completion_length": 156.07144165039062, "epoch": 0.41427571728481455, "grad_norm": 1.1151254177093506, "kl": 0.12254299968481064, "learning_rate": 3.4948233497811916e-07, "loss": 0.0001, "reward": 1.3928571939468384, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.4285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 592 }, { "completion_length": 186.35714721679688, "epoch": 0.41497550734779565, "grad_norm": 1.2969390153884888, "kl": 0.06270726770162582, "learning_rate": 3.4893948415358797e-07, "loss": 0.0001, "reward": 1.4285714626312256, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.5, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 593 }, { "completion_length": 153.6428680419922, "epoch": 0.41567529741077675, "grad_norm": 0.23953630030155182, "kl": 0.07134068012237549, "learning_rate": 3.483960796309552e-07, "loss": 0.0001, "reward": 1.4285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.5, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 594 }, { "completion_length": 165.42857360839844, "epoch": 0.4163750874737579, "grad_norm": 0.32146522402763367, "kl": 0.06677267700433731, "learning_rate": 3.4785212445129244e-07, "loss": 0.0001, "reward": 1.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 595 }, { "completion_length": 154.92857360839844, "epoch": 0.417074877536739, "grad_norm": 0.8877961039543152, "kl": 0.08647654950618744, "learning_rate": 3.473076216587528e-07, "loss": 0.0001, "reward": 1.7142858505249023, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 596 }, { "completion_length": 135.07144165039062, "epoch": 0.4177746675997201, "grad_norm": 0.020083080977201462, "kl": 0.14254732429981232, "learning_rate": 3.467625743005543e-07, "loss": 0.0001, "reward": 1.5000001192092896, "reward_std": 0.0, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 597 }, { "completion_length": 140.57144165039062, "epoch": 0.4184744576627012, "grad_norm": 1.3831158876419067, "kl": 0.09030907601118088, "learning_rate": 3.4621698542696216e-07, "loss": 0.0001, "reward": 1.5357143878936768, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 598 }, { "completion_length": 187.00001525878906, "epoch": 0.4191742477256823, "grad_norm": 0.96335768699646, "kl": 0.1670377105474472, "learning_rate": 3.4567085809127245e-07, "loss": 0.0002, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 599 }, { "completion_length": 194.07144165039062, "epoch": 0.4198740377886634, "grad_norm": 1.0578110218048096, "kl": 0.060925018042325974, "learning_rate": 3.451241953497944e-07, "loss": 0.0001, "reward": 1.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 600 }, { "completion_length": 145.92857360839844, "epoch": 0.4205738278516445, "grad_norm": 1.1632345914840698, "kl": 0.1145569309592247, "learning_rate": 3.445770002618337e-07, "loss": 0.0001, "reward": 1.3928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.4285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 601 }, { "completion_length": 165.35714721679688, "epoch": 0.4212736179146256, "grad_norm": 0.3005192279815674, "kl": 0.07536184042692184, "learning_rate": 3.4402927588967535e-07, "loss": 0.0001, "reward": 1.571428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 602 }, { "completion_length": 161.92857360839844, "epoch": 0.42197340797760674, "grad_norm": 1.048603892326355, "kl": 0.07374842464923859, "learning_rate": 3.43481025298566e-07, "loss": 0.0001, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 603 }, { "completion_length": 156.71429443359375, "epoch": 0.42267319804058784, "grad_norm": 0.005877501331269741, "kl": 0.07264689356088638, "learning_rate": 3.4293225155669745e-07, "loss": 0.0001, "reward": 1.4285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 0.4285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 604 }, { "completion_length": 155.0, "epoch": 0.42337298810356894, "grad_norm": 0.0045142630115151405, "kl": 0.09070201963186264, "learning_rate": 3.4238295773518924e-07, "loss": 0.0001, "reward": 1.6428571939468384, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 605 }, { "completion_length": 172.85714721679688, "epoch": 0.42407277816655004, "grad_norm": 0.5130675435066223, "kl": 0.06578822433948517, "learning_rate": 3.4183314690807143e-07, "loss": 0.0001, "reward": 1.571428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 606 }, { "completion_length": 203.21429443359375, "epoch": 0.42477256822953113, "grad_norm": 0.7278613448143005, "kl": 0.05420098453760147, "learning_rate": 3.412828221522673e-07, "loss": 0.0001, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 607 }, { "completion_length": 132.71429443359375, "epoch": 0.42547235829251223, "grad_norm": 1.81039297580719, "kl": 0.10995319485664368, "learning_rate": 3.407319865475766e-07, "loss": 0.0001, "reward": 1.571428656578064, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 608 }, { "completion_length": 201.71429443359375, "epoch": 0.42617214835549333, "grad_norm": 0.006350979674607515, "kl": 0.0654965415596962, "learning_rate": 3.401806431766574e-07, "loss": 0.0001, "reward": 1.7857143878936768, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 609 }, { "completion_length": 169.7857208251953, "epoch": 0.4268719384184745, "grad_norm": 0.7703147530555725, "kl": 0.07624612748622894, "learning_rate": 3.396287951250098e-07, "loss": 0.0001, "reward": 1.571428656578064, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 610 }, { "completion_length": 115.5714340209961, "epoch": 0.4275717284814556, "grad_norm": 1.578274130821228, "kl": 0.13712795078754425, "learning_rate": 3.3907644548095813e-07, "loss": 0.0001, "reward": 1.4642857313156128, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.5, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 611 }, { "completion_length": 126.28572082519531, "epoch": 0.4282715185444367, "grad_norm": 1.4740409851074219, "kl": 0.114041268825531, "learning_rate": 3.385235973356339e-07, "loss": 0.0001, "reward": 1.6428571939468384, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 612 }, { "completion_length": 127.85714721679688, "epoch": 0.4289713086074178, "grad_norm": 0.006609841249883175, "kl": 0.13091127574443817, "learning_rate": 3.3797025378295826e-07, "loss": 0.0001, "reward": 1.571428656578064, "reward_std": 0.0, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 613 }, { "completion_length": 131.0, "epoch": 0.4296710986703989, "grad_norm": 1.025025725364685, "kl": 0.08791717141866684, "learning_rate": 3.3741641791962493e-07, "loss": 0.0001, "reward": 1.7142858505249023, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 614 }, { "completion_length": 161.07144165039062, "epoch": 0.43037088873338, "grad_norm": 0.6503204703330994, "kl": 0.08773945271968842, "learning_rate": 3.368620928450826e-07, "loss": 0.0001, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 615 }, { "completion_length": 216.07144165039062, "epoch": 0.4310706787963611, "grad_norm": 0.0037097164895385504, "kl": 0.07662048935890198, "learning_rate": 3.36307281661518e-07, "loss": 0.0001, "reward": 1.7142858505249023, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 616 }, { "completion_length": 211.92857360839844, "epoch": 0.43177046885934217, "grad_norm": 1.239851713180542, "kl": 0.08453672379255295, "learning_rate": 3.3575198747383815e-07, "loss": 0.0001, "reward": 1.821428656578064, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 617 }, { "completion_length": 219.07144165039062, "epoch": 0.4324702589223233, "grad_norm": 1.0168261528015137, "kl": 0.08302226662635803, "learning_rate": 3.3519621338965306e-07, "loss": 0.0001, "reward": 1.5000001192092896, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.5, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 618 }, { "completion_length": 165.1428680419922, "epoch": 0.4331700489853044, "grad_norm": 1.0173319578170776, "kl": 0.8803849816322327, "learning_rate": 3.3463996251925836e-07, "loss": 0.0009, "reward": 1.7142858505249023, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 619 }, { "completion_length": 140.85714721679688, "epoch": 0.4338698390482855, "grad_norm": 0.4436799883842468, "kl": 0.08320984244346619, "learning_rate": 3.340832379756183e-07, "loss": 0.0001, "reward": 1.5000001192092896, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.5, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 620 }, { "completion_length": 193.2857208251953, "epoch": 0.4345696291112666, "grad_norm": 0.9798906445503235, "kl": 0.07030647248029709, "learning_rate": 3.3352604287434747e-07, "loss": 0.0001, "reward": 1.4285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.5, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 621 }, { "completion_length": 148.92857360839844, "epoch": 0.4352694191742477, "grad_norm": 1.6037248373031616, "kl": 0.12679950892925262, "learning_rate": 3.3296838033369413e-07, "loss": 0.0001, "reward": 1.571428656578064, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 622 }, { "completion_length": 149.7857208251953, "epoch": 0.4359692092372288, "grad_norm": 1.0563029050827026, "kl": 0.08229831606149673, "learning_rate": 3.324102534745225e-07, "loss": 0.0001, "reward": 1.7500001192092896, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 623 }, { "completion_length": 175.71429443359375, "epoch": 0.4366689993002099, "grad_norm": 1.1535431146621704, "kl": 0.08126385509967804, "learning_rate": 3.3185166542029526e-07, "loss": 0.0001, "reward": 1.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 624 }, { "completion_length": 133.1428680419922, "epoch": 0.43736878936319107, "grad_norm": 1.5946522951126099, "kl": 0.1409570723772049, "learning_rate": 3.312926192970559e-07, "loss": 0.0001, "reward": 1.821428656578064, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 625 }, { "completion_length": 141.92857360839844, "epoch": 0.43806857942617217, "grad_norm": 0.3123253881931305, "kl": 0.1362275332212448, "learning_rate": 3.307331182334117e-07, "loss": 0.0001, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 626 }, { "completion_length": 211.6428680419922, "epoch": 0.43876836948915326, "grad_norm": 0.27792471647262573, "kl": 0.08928539603948593, "learning_rate": 3.3017316536051574e-07, "loss": 0.0001, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 627 }, { "completion_length": 160.5, "epoch": 0.43946815955213436, "grad_norm": 0.732595682144165, "kl": 0.06787537783384323, "learning_rate": 3.2961276381204965e-07, "loss": 0.0001, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 628 }, { "completion_length": 155.85714721679688, "epoch": 0.44016794961511546, "grad_norm": 0.933816134929657, "kl": 0.1013822928071022, "learning_rate": 3.29051916724206e-07, "loss": 0.0001, "reward": 1.5357143878936768, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 629 }, { "completion_length": 113.5714340209961, "epoch": 0.44086773967809656, "grad_norm": 2.7310433387756348, "kl": 0.16081348061561584, "learning_rate": 3.2849062723567066e-07, "loss": 0.0002, "reward": 1.6071429252624512, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 630 }, { "completion_length": 149.6428680419922, "epoch": 0.44156752974107766, "grad_norm": 0.9071423411369324, "kl": 0.1442834734916687, "learning_rate": 3.279288984876055e-07, "loss": 0.0001, "reward": 1.7142858505249023, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 631 }, { "completion_length": 155.0, "epoch": 0.44226731980405876, "grad_norm": 0.008171296678483486, "kl": 0.11737126857042313, "learning_rate": 3.2736673362363045e-07, "loss": 0.0001, "reward": 1.7142858505249023, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 632 }, { "completion_length": 139.71429443359375, "epoch": 0.4429671098670399, "grad_norm": 2.173647880554199, "kl": 0.08801593631505966, "learning_rate": 3.268041357898062e-07, "loss": 0.0001, "reward": 1.4285714626312256, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.5, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 633 }, { "completion_length": 203.85714721679688, "epoch": 0.443666899930021, "grad_norm": 1.1349865198135376, "kl": 0.09640464186668396, "learning_rate": 3.2624110813461644e-07, "loss": 0.0001, "reward": 1.7142858505249023, "reward_std": 0.4040610194206238, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 634 }, { "completion_length": 146.6428680419922, "epoch": 0.4443666899930021, "grad_norm": 1.2135082483291626, "kl": 0.08870533108711243, "learning_rate": 3.2567765380895024e-07, "loss": 0.0001, "reward": 1.571428656578064, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 635 }, { "completion_length": 133.92857360839844, "epoch": 0.4450664800559832, "grad_norm": 1.1322942972183228, "kl": 0.10924336314201355, "learning_rate": 3.2511377596608445e-07, "loss": 0.0001, "reward": 1.6785714626312256, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 636 }, { "completion_length": 162.07144165039062, "epoch": 0.4457662701189643, "grad_norm": 0.7404430508613586, "kl": 0.06720083206892014, "learning_rate": 3.2454947776166636e-07, "loss": 0.0001, "reward": 1.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 637 }, { "completion_length": 173.92857360839844, "epoch": 0.4464660601819454, "grad_norm": 1.1222801208496094, "kl": 0.10577866435050964, "learning_rate": 3.239847623536953e-07, "loss": 0.0001, "reward": 1.7142858505249023, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 638 }, { "completion_length": 139.42857360839844, "epoch": 0.4471658502449265, "grad_norm": 1.3313777446746826, "kl": 0.09354926645755768, "learning_rate": 3.234196329025056e-07, "loss": 0.0001, "reward": 1.25, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.3571428656578064, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 639 }, { "completion_length": 157.85714721679688, "epoch": 0.44786564030790765, "grad_norm": 0.7976996898651123, "kl": 0.14111362397670746, "learning_rate": 3.2285409257074883e-07, "loss": 0.0001, "reward": 1.6428571939468384, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 640 }, { "completion_length": 180.6428680419922, "epoch": 0.44856543037088875, "grad_norm": 0.010826917365193367, "kl": 0.10925174504518509, "learning_rate": 3.2228814452337583e-07, "loss": 0.0001, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 641 }, { "completion_length": 147.1428680419922, "epoch": 0.44926522043386985, "grad_norm": 0.003285217797383666, "kl": 0.07662175595760345, "learning_rate": 3.2172179192761917e-07, "loss": 0.0001, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 642 }, { "completion_length": 157.5, "epoch": 0.44996501049685095, "grad_norm": 0.0029764294158667326, "kl": 0.06789866089820862, "learning_rate": 3.2115503795297543e-07, "loss": 0.0001, "reward": 1.7142858505249023, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 643 }, { "completion_length": 124.14286041259766, "epoch": 0.45066480055983205, "grad_norm": 1.153645396232605, "kl": 0.12138595432043076, "learning_rate": 3.2058788577118754e-07, "loss": 0.0001, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 644 }, { "completion_length": 158.92857360839844, "epoch": 0.45136459062281314, "grad_norm": 1.6021900177001953, "kl": 0.10063384473323822, "learning_rate": 3.200203385562268e-07, "loss": 0.0001, "reward": 1.7142858505249023, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 645 }, { "completion_length": 138.7857208251953, "epoch": 0.45206438068579424, "grad_norm": 0.5161796808242798, "kl": 0.0738629549741745, "learning_rate": 3.194523994842751e-07, "loss": 0.0001, "reward": 1.7142858505249023, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 646 }, { "completion_length": 186.71429443359375, "epoch": 0.45276417074877534, "grad_norm": 0.6150098443031311, "kl": 0.11273457109928131, "learning_rate": 3.1888407173370767e-07, "loss": 0.0001, "reward": 1.6428571939468384, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 647 }, { "completion_length": 197.21429443359375, "epoch": 0.4534639608117565, "grad_norm": 1.627325177192688, "kl": 0.2841565012931824, "learning_rate": 3.1831535848507476e-07, "loss": 0.0003, "reward": 1.821428656578064, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 648 }, { "completion_length": 137.5, "epoch": 0.4541637508747376, "grad_norm": 0.9735137224197388, "kl": 0.10142991691827774, "learning_rate": 3.1774626292108373e-07, "loss": 0.0001, "reward": 1.7857143878936768, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 649 }, { "completion_length": 191.7857208251953, "epoch": 0.4548635409377187, "grad_norm": 0.010576860047876835, "kl": 0.10462839901447296, "learning_rate": 3.171767882265819e-07, "loss": 0.0001, "reward": 1.571428656578064, "reward_std": 0.0, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 650 }, { "completion_length": 161.57144165039062, "epoch": 0.4555633310006998, "grad_norm": 1.0209048986434937, "kl": 0.08857616782188416, "learning_rate": 3.1660693758853806e-07, "loss": 0.0001, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 651 }, { "completion_length": 210.71429443359375, "epoch": 0.4562631210636809, "grad_norm": 0.00469197379425168, "kl": 0.06558775156736374, "learning_rate": 3.160367141960251e-07, "loss": 0.0001, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 652 }, { "completion_length": 133.07144165039062, "epoch": 0.456962911126662, "grad_norm": 0.007951144129037857, "kl": 0.11750414222478867, "learning_rate": 3.1546612124020167e-07, "loss": 0.0001, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 653 }, { "completion_length": 166.07144165039062, "epoch": 0.4576627011896431, "grad_norm": 1.5879830121994019, "kl": 0.07737138122320175, "learning_rate": 3.14895161914295e-07, "loss": 0.0001, "reward": 1.7142858505249023, "reward_std": 0.4040610194206238, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 654 }, { "completion_length": 135.21429443359375, "epoch": 0.45836249125262424, "grad_norm": 0.011608157306909561, "kl": 0.15805354714393616, "learning_rate": 3.1432383941358245e-07, "loss": 0.0002, "reward": 1.7142858505249023, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 655 }, { "completion_length": 162.35714721679688, "epoch": 0.45906228131560534, "grad_norm": 0.006215684115886688, "kl": 0.11150137335062027, "learning_rate": 3.137521569353737e-07, "loss": 0.0001, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 656 }, { "completion_length": 156.07144165039062, "epoch": 0.45976207137858643, "grad_norm": 0.9788245558738708, "kl": 0.11876959353685379, "learning_rate": 3.131801176789933e-07, "loss": 0.0001, "reward": 1.7857143878936768, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 657 }, { "completion_length": 146.6428680419922, "epoch": 0.46046186144156753, "grad_norm": 0.8113498091697693, "kl": 0.1853734701871872, "learning_rate": 3.1260772484576213e-07, "loss": 0.0002, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 658 }, { "completion_length": 188.57144165039062, "epoch": 0.46116165150454863, "grad_norm": 1.2978044748306274, "kl": 0.1329558938741684, "learning_rate": 3.1203498163898004e-07, "loss": 0.0001, "reward": 1.821428656578064, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 659 }, { "completion_length": 149.6428680419922, "epoch": 0.46186144156752973, "grad_norm": 1.6993205547332764, "kl": 0.13925708830356598, "learning_rate": 3.114618912639075e-07, "loss": 0.0001, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 660 }, { "completion_length": 109.78572082519531, "epoch": 0.4625612316305108, "grad_norm": 1.8763138055801392, "kl": 0.28969159722328186, "learning_rate": 3.1088845692774795e-07, "loss": 0.0003, "reward": 1.6428571939468384, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 661 }, { "completion_length": 194.6428680419922, "epoch": 0.4632610216934919, "grad_norm": 0.8858591318130493, "kl": 0.0812605544924736, "learning_rate": 3.103146818396299e-07, "loss": 0.0001, "reward": 1.571428656578064, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 662 }, { "completion_length": 144.0, "epoch": 0.4639608117564731, "grad_norm": 1.3499542474746704, "kl": 0.10175241529941559, "learning_rate": 3.097405692105884e-07, "loss": 0.0001, "reward": 1.6071429252624512, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 663 }, { "completion_length": 143.07144165039062, "epoch": 0.4646606018194542, "grad_norm": 1.2323224544525146, "kl": 0.10948854684829712, "learning_rate": 3.091661222535479e-07, "loss": 0.0001, "reward": 1.7500001192092896, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 664 }, { "completion_length": 136.57144165039062, "epoch": 0.4653603918824353, "grad_norm": 2.3692777156829834, "kl": 0.14500729739665985, "learning_rate": 3.085913441833037e-07, "loss": 0.0001, "reward": 1.6428571939468384, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 665 }, { "completion_length": 158.7857208251953, "epoch": 0.4660601819454164, "grad_norm": 0.01958485133945942, "kl": 0.1482079029083252, "learning_rate": 3.0801623821650415e-07, "loss": 0.0001, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 666 }, { "completion_length": 154.71429443359375, "epoch": 0.4667599720083975, "grad_norm": 2.0009067058563232, "kl": 0.14186322689056396, "learning_rate": 3.0744080757163243e-07, "loss": 0.0001, "reward": 1.571428656578064, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 667 }, { "completion_length": 148.0, "epoch": 0.46745976207137857, "grad_norm": 2.462761640548706, "kl": 0.1825760006904602, "learning_rate": 3.068650554689892e-07, "loss": 0.0002, "reward": 1.5357143878936768, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 668 }, { "completion_length": 203.50001525878906, "epoch": 0.46815955213435967, "grad_norm": 1.1733652353286743, "kl": 0.11574812233448029, "learning_rate": 3.062889851306735e-07, "loss": 0.0001, "reward": 1.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 669 }, { "completion_length": 241.00001525878906, "epoch": 0.4688593421973408, "grad_norm": 0.3351525068283081, "kl": 0.08379165828227997, "learning_rate": 3.0571259978056575e-07, "loss": 0.0001, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 670 }, { "completion_length": 155.57144165039062, "epoch": 0.4695591322603219, "grad_norm": 2.2182371616363525, "kl": 0.11414230614900589, "learning_rate": 3.0513590264430917e-07, "loss": 0.0001, "reward": 1.5000001192092896, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 671 }, { "completion_length": 129.57144165039062, "epoch": 0.470258922323303, "grad_norm": 1.2790042161941528, "kl": 0.23131538927555084, "learning_rate": 3.045588969492918e-07, "loss": 0.0002, "reward": 1.571428656578064, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 672 }, { "completion_length": 185.57144165039062, "epoch": 0.4709587123862841, "grad_norm": 1.1393929719924927, "kl": 0.13166576623916626, "learning_rate": 3.039815859246284e-07, "loss": 0.0001, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 673 }, { "completion_length": 127.35714721679688, "epoch": 0.4716585024492652, "grad_norm": 1.289710283279419, "kl": 0.15005119144916534, "learning_rate": 3.034039728011427e-07, "loss": 0.0002, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 674 }, { "completion_length": 169.21429443359375, "epoch": 0.4723582925122463, "grad_norm": 0.010627866722643375, "kl": 0.16326935589313507, "learning_rate": 3.0282606081134885e-07, "loss": 0.0002, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 675 }, { "completion_length": 167.42857360839844, "epoch": 0.4730580825752274, "grad_norm": 1.030028223991394, "kl": 0.12199504673480988, "learning_rate": 3.022478531894336e-07, "loss": 0.0001, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 676 }, { "completion_length": 146.92857360839844, "epoch": 0.4737578726382085, "grad_norm": 0.006876370403915644, "kl": 0.15125451982021332, "learning_rate": 3.016693531712382e-07, "loss": 0.0002, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 677 }, { "completion_length": 178.85714721679688, "epoch": 0.47445766270118966, "grad_norm": 1.6082566976547241, "kl": 0.10764802992343903, "learning_rate": 3.010905639942403e-07, "loss": 0.0001, "reward": 1.5000001192092896, "reward_std": 0.4040610194206238, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 678 }, { "completion_length": 207.6428680419922, "epoch": 0.47515745276417076, "grad_norm": 0.010926097631454468, "kl": 0.11946795880794525, "learning_rate": 3.005114888975356e-07, "loss": 0.0001, "reward": 1.7142858505249023, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 679 }, { "completion_length": 150.1428680419922, "epoch": 0.47585724282715186, "grad_norm": 0.7955170273780823, "kl": 0.13336814939975739, "learning_rate": 2.9993213112182024e-07, "loss": 0.0001, "reward": 1.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 680 }, { "completion_length": 125.92857360839844, "epoch": 0.47655703289013296, "grad_norm": 1.1400665044784546, "kl": 0.18289294838905334, "learning_rate": 2.993524939093718e-07, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 681 }, { "completion_length": 134.57144165039062, "epoch": 0.47725682295311406, "grad_norm": 1.3634496927261353, "kl": 0.145736962556839, "learning_rate": 2.987725805040321e-07, "loss": 0.0001, "reward": 1.7857143878936768, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 682 }, { "completion_length": 164.21429443359375, "epoch": 0.47795661301609516, "grad_norm": 1.834984302520752, "kl": 0.13639846444129944, "learning_rate": 2.9819239415118845e-07, "loss": 0.0001, "reward": 1.6071429252624512, "reward_std": 0.3535534143447876, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 683 }, { "completion_length": 174.35714721679688, "epoch": 0.47865640307907625, "grad_norm": 0.6305102705955505, "kl": 0.14860427379608154, "learning_rate": 2.976119380977558e-07, "loss": 0.0001, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 684 }, { "completion_length": 114.5714340209961, "epoch": 0.4793561931420574, "grad_norm": 2.749418020248413, "kl": 0.38339167833328247, "learning_rate": 2.970312155921584e-07, "loss": 0.0004, "reward": 1.6071429252624512, "reward_std": 0.3535534143447876, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 685 }, { "completion_length": 168.92857360839844, "epoch": 0.4800559832050385, "grad_norm": 1.3380252122879028, "kl": 0.12004484236240387, "learning_rate": 2.964502298843116e-07, "loss": 0.0001, "reward": 1.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 686 }, { "completion_length": 169.6428680419922, "epoch": 0.4807557732680196, "grad_norm": 1.4518190622329712, "kl": 0.15931259095668793, "learning_rate": 2.958689842256035e-07, "loss": 0.0002, "reward": 1.7857143878936768, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 687 }, { "completion_length": 153.92857360839844, "epoch": 0.4814555633310007, "grad_norm": 1.4006552696228027, "kl": 0.12549205124378204, "learning_rate": 2.952874818688774e-07, "loss": 0.0001, "reward": 1.7857143878936768, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 688 }, { "completion_length": 173.85714721679688, "epoch": 0.4821553533939818, "grad_norm": 1.09505295753479, "kl": 0.12138742208480835, "learning_rate": 2.947057260684129e-07, "loss": 0.0001, "reward": 1.571428656578064, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 689 }, { "completion_length": 176.42857360839844, "epoch": 0.4828551434569629, "grad_norm": 0.8625134825706482, "kl": 0.18156056106090546, "learning_rate": 2.941237200799081e-07, "loss": 0.0002, "reward": 1.6785714626312256, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 690 }, { "completion_length": 147.71429443359375, "epoch": 0.483554933519944, "grad_norm": 1.3153311014175415, "kl": 0.1469305008649826, "learning_rate": 2.93541467160461e-07, "loss": 0.0001, "reward": 1.821428656578064, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 691 }, { "completion_length": 191.35714721679688, "epoch": 0.4842547235829251, "grad_norm": 0.8037663698196411, "kl": 0.10981877893209457, "learning_rate": 2.9295897056855183e-07, "loss": 0.0001, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 692 }, { "completion_length": 175.07144165039062, "epoch": 0.48495451364590625, "grad_norm": 1.4569430351257324, "kl": 0.12230105698108673, "learning_rate": 2.9237623356402417e-07, "loss": 0.0001, "reward": 1.7500001192092896, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 693 }, { "completion_length": 156.1428680419922, "epoch": 0.48565430370888735, "grad_norm": 0.01559736579656601, "kl": 0.19703175127506256, "learning_rate": 2.9179325940806724e-07, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 694 }, { "completion_length": 152.42857360839844, "epoch": 0.48635409377186845, "grad_norm": 0.9599546790122986, "kl": 0.1494840830564499, "learning_rate": 2.9121005136319735e-07, "loss": 0.0001, "reward": 1.571428656578064, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 695 }, { "completion_length": 163.5, "epoch": 0.48705388383484954, "grad_norm": 1.1406296491622925, "kl": 0.15769533812999725, "learning_rate": 2.9062661269323987e-07, "loss": 0.0002, "reward": 1.6785714626312256, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 696 }, { "completion_length": 163.92857360839844, "epoch": 0.48775367389783064, "grad_norm": 1.3594409227371216, "kl": 0.22166045010089874, "learning_rate": 2.900429466633106e-07, "loss": 0.0002, "reward": 1.6428571939468384, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 697 }, { "completion_length": 148.71429443359375, "epoch": 0.48845346396081174, "grad_norm": 1.4635483026504517, "kl": 0.19263698160648346, "learning_rate": 2.8945905653979786e-07, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 698 }, { "completion_length": 185.07144165039062, "epoch": 0.48915325402379284, "grad_norm": 0.016811292618513107, "kl": 0.1698264479637146, "learning_rate": 2.8887494559034396e-07, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 699 }, { "completion_length": 143.71429443359375, "epoch": 0.489853044086774, "grad_norm": 1.3379565477371216, "kl": 0.1874609738588333, "learning_rate": 2.882906170838273e-07, "loss": 0.0002, "reward": 1.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 700 }, { "completion_length": 177.85714721679688, "epoch": 0.4905528341497551, "grad_norm": 1.3974772691726685, "kl": 0.16976402699947357, "learning_rate": 2.877060742903435e-07, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 701 }, { "completion_length": 198.21429443359375, "epoch": 0.4912526242127362, "grad_norm": 1.0022296905517578, "kl": 0.12503929436206818, "learning_rate": 2.871213204811874e-07, "loss": 0.0001, "reward": 1.7142858505249023, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 702 }, { "completion_length": 148.92857360839844, "epoch": 0.4919524142757173, "grad_norm": 0.9043747186660767, "kl": 0.17095284163951874, "learning_rate": 2.8653635892883515e-07, "loss": 0.0002, "reward": 1.5000001192092896, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 703 }, { "completion_length": 176.21429443359375, "epoch": 0.4926522043386984, "grad_norm": 1.2036052942276, "kl": 0.2543278634548187, "learning_rate": 2.859511929069249e-07, "loss": 0.0003, "reward": 1.821428656578064, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 704 }, { "completion_length": 135.0, "epoch": 0.4933519944016795, "grad_norm": 1.292114019393921, "kl": 0.19982536137104034, "learning_rate": 2.853658256902396e-07, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 705 }, { "completion_length": 182.42857360839844, "epoch": 0.4940517844646606, "grad_norm": 1.3373770713806152, "kl": 0.14790092408657074, "learning_rate": 2.847802605546879e-07, "loss": 0.0001, "reward": 1.6428571939468384, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 706 }, { "completion_length": 171.07144165039062, "epoch": 0.4947515745276417, "grad_norm": 1.1665095090866089, "kl": 0.12240372598171234, "learning_rate": 2.841945007772861e-07, "loss": 0.0001, "reward": 1.5000001192092896, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.5, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 707 }, { "completion_length": 172.6428680419922, "epoch": 0.49545136459062283, "grad_norm": 1.5502398014068604, "kl": 0.13512901961803436, "learning_rate": 2.8360854963613986e-07, "loss": 0.0001, "reward": 1.6785714626312256, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 708 }, { "completion_length": 177.42857360839844, "epoch": 0.49615115465360393, "grad_norm": 0.5891575217247009, "kl": 0.1244434118270874, "learning_rate": 2.8302241041042564e-07, "loss": 0.0001, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 709 }, { "completion_length": 156.57144165039062, "epoch": 0.49685094471658503, "grad_norm": 0.7724350690841675, "kl": 0.13233307003974915, "learning_rate": 2.8243608638037245e-07, "loss": 0.0001, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 710 }, { "completion_length": 152.2857208251953, "epoch": 0.49755073477956613, "grad_norm": 1.9077880382537842, "kl": 0.16397586464881897, "learning_rate": 2.818495808272439e-07, "loss": 0.0002, "reward": 1.821428656578064, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 711 }, { "completion_length": 153.35714721679688, "epoch": 0.4982505248425472, "grad_norm": 1.4308534860610962, "kl": 0.15561728179454803, "learning_rate": 2.812628970333189e-07, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 712 }, { "completion_length": 146.42857360839844, "epoch": 0.4989503149055283, "grad_norm": 1.9188984632492065, "kl": 0.15746258199214935, "learning_rate": 2.8067603828187443e-07, "loss": 0.0002, "reward": 1.821428656578064, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 713 }, { "completion_length": 180.1428680419922, "epoch": 0.4996501049685094, "grad_norm": 0.4946199357509613, "kl": 0.16803774237632751, "learning_rate": 2.800890078571661e-07, "loss": 0.0002, "reward": 1.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 714 }, { "completion_length": 114.21429443359375, "epoch": 0.5003498950314905, "grad_norm": 2.6544110774993896, "kl": 0.3250288963317871, "learning_rate": 2.7950180904441055e-07, "loss": 0.0003, "reward": 1.6428571939468384, "reward_std": 0.4040610194206238, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 715 }, { "completion_length": 189.42857360839844, "epoch": 0.5010496850944717, "grad_norm": 1.5207462310791016, "kl": 0.10813356935977936, "learning_rate": 2.7891444512976675e-07, "loss": 0.0001, "reward": 1.7142858505249023, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 716 }, { "completion_length": 139.07144165039062, "epoch": 0.5017494751574527, "grad_norm": 0.030628575012087822, "kl": 0.17998436093330383, "learning_rate": 2.783269194003175e-07, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 717 }, { "completion_length": 166.5, "epoch": 0.5024492652204339, "grad_norm": 0.9601883888244629, "kl": 0.1395782232284546, "learning_rate": 2.777392351440512e-07, "loss": 0.0001, "reward": 1.7142858505249023, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 718 }, { "completion_length": 133.1428680419922, "epoch": 0.503149055283415, "grad_norm": 1.0115604400634766, "kl": 0.1677847057580948, "learning_rate": 2.7715139564984357e-07, "loss": 0.0002, "reward": 1.5357143878936768, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 719 }, { "completion_length": 165.71429443359375, "epoch": 0.5038488453463961, "grad_norm": 1.812052607536316, "kl": 0.20493188500404358, "learning_rate": 2.7656340420743895e-07, "loss": 0.0002, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 720 }, { "completion_length": 144.57144165039062, "epoch": 0.5045486354093772, "grad_norm": 1.2718943357467651, "kl": 0.201004758477211, "learning_rate": 2.7597526410743214e-07, "loss": 0.0002, "reward": 1.7142858505249023, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 721 }, { "completion_length": 174.21429443359375, "epoch": 0.5052484254723583, "grad_norm": 0.8097813725471497, "kl": 0.19658099114894867, "learning_rate": 2.753869786412497e-07, "loss": 0.0002, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 722 }, { "completion_length": 167.7857208251953, "epoch": 0.5059482155353394, "grad_norm": 1.9228341579437256, "kl": 0.24919047951698303, "learning_rate": 2.7479855110113185e-07, "loss": 0.0002, "reward": 1.821428656578064, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 723 }, { "completion_length": 155.1428680419922, "epoch": 0.5066480055983205, "grad_norm": 1.7304898500442505, "kl": 0.16656360030174255, "learning_rate": 2.742099847801139e-07, "loss": 0.0002, "reward": 1.571428656578064, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 724 }, { "completion_length": 165.5, "epoch": 0.5073477956613016, "grad_norm": 0.7819796204566956, "kl": 0.1699274331331253, "learning_rate": 2.736212829720078e-07, "loss": 0.0002, "reward": 1.571428656578064, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 725 }, { "completion_length": 167.0, "epoch": 0.5080475857242828, "grad_norm": 0.496711790561676, "kl": 0.15047401189804077, "learning_rate": 2.730324489713837e-07, "loss": 0.0002, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 726 }, { "completion_length": 204.92857360839844, "epoch": 0.5087473757872638, "grad_norm": 1.3005293607711792, "kl": 0.14505939185619354, "learning_rate": 2.724434860735516e-07, "loss": 0.0001, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 727 }, { "completion_length": 188.71429443359375, "epoch": 0.509447165850245, "grad_norm": 0.41962334513664246, "kl": 0.14153024554252625, "learning_rate": 2.7185439757454277e-07, "loss": 0.0001, "reward": 1.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 728 }, { "completion_length": 186.92857360839844, "epoch": 0.510146955913226, "grad_norm": 0.008359805680811405, "kl": 0.15726059675216675, "learning_rate": 2.712651867710914e-07, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 729 }, { "completion_length": 133.1428680419922, "epoch": 0.5108467459762072, "grad_norm": 1.0693659782409668, "kl": 0.21974904835224152, "learning_rate": 2.706758569606163e-07, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 730 }, { "completion_length": 158.85714721679688, "epoch": 0.5115465360391882, "grad_norm": 1.9266133308410645, "kl": 0.19451983273029327, "learning_rate": 2.7008641144120205e-07, "loss": 0.0002, "reward": 1.571428656578064, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 731 }, { "completion_length": 214.92857360839844, "epoch": 0.5122463261021694, "grad_norm": 0.3528324365615845, "kl": 0.10781332850456238, "learning_rate": 2.694968535115809e-07, "loss": 0.0001, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 732 }, { "completion_length": 179.07144165039062, "epoch": 0.5129461161651504, "grad_norm": 2.4797956943511963, "kl": 0.1654929220676422, "learning_rate": 2.689071864711142e-07, "loss": 0.0002, "reward": 1.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 733 }, { "completion_length": 152.85714721679688, "epoch": 0.5136459062281316, "grad_norm": 1.4804738759994507, "kl": 0.21136166155338287, "learning_rate": 2.6831741361977384e-07, "loss": 0.0002, "reward": 1.6785714626312256, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 734 }, { "completion_length": 146.71429443359375, "epoch": 0.5143456962911127, "grad_norm": 1.0583168268203735, "kl": 0.1778593212366104, "learning_rate": 2.6772753825812397e-07, "loss": 0.0002, "reward": 1.6785714626312256, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 735 }, { "completion_length": 125.64286041259766, "epoch": 0.5150454863540938, "grad_norm": 0.9237343072891235, "kl": 0.2732640206813812, "learning_rate": 2.6713756368730217e-07, "loss": 0.0003, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 736 }, { "completion_length": 147.71429443359375, "epoch": 0.5157452764170749, "grad_norm": 1.6291470527648926, "kl": 0.2336374968290329, "learning_rate": 2.6654749320900165e-07, "loss": 0.0002, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 737 }, { "completion_length": 194.00001525878906, "epoch": 0.516445066480056, "grad_norm": 2.4692771434783936, "kl": 0.18203742802143097, "learning_rate": 2.659573301254521e-07, "loss": 0.0002, "reward": 1.6785714626312256, "reward_std": 0.3535534143447876, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 738 }, { "completion_length": 180.6428680419922, "epoch": 0.5171448565430371, "grad_norm": 1.6918152570724487, "kl": 0.21276965737342834, "learning_rate": 2.653670777394013e-07, "loss": 0.0002, "reward": 1.7857143878936768, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 739 }, { "completion_length": 173.92857360839844, "epoch": 0.5178446466060181, "grad_norm": 0.0516674667596817, "kl": 0.26975953578948975, "learning_rate": 2.647767393540971e-07, "loss": 0.0003, "reward": 1.7142858505249023, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 740 }, { "completion_length": 185.85714721679688, "epoch": 0.5185444366689993, "grad_norm": 0.6655036211013794, "kl": 0.15261439979076385, "learning_rate": 2.641863182732685e-07, "loss": 0.0002, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 741 }, { "completion_length": 128.5, "epoch": 0.5192442267319805, "grad_norm": 2.0728025436401367, "kl": 0.25508400797843933, "learning_rate": 2.6359581780110727e-07, "loss": 0.0003, "reward": 1.5357143878936768, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 742 }, { "completion_length": 180.50001525878906, "epoch": 0.5199440167949615, "grad_norm": 0.9152865409851074, "kl": 0.17936374247074127, "learning_rate": 2.6300524124224943e-07, "loss": 0.0002, "reward": 1.6428571939468384, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 743 }, { "completion_length": 174.42857360839844, "epoch": 0.5206438068579426, "grad_norm": 1.058858036994934, "kl": 0.24119043350219727, "learning_rate": 2.62414591901757e-07, "loss": 0.0002, "reward": 1.7142858505249023, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 744 }, { "completion_length": 154.6428680419922, "epoch": 0.5213435969209237, "grad_norm": 1.310758113861084, "kl": 0.1945667415857315, "learning_rate": 2.6182387308509925e-07, "loss": 0.0002, "reward": 1.6785714626312256, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 745 }, { "completion_length": 163.42857360839844, "epoch": 0.5220433869839048, "grad_norm": 0.899238646030426, "kl": 0.1935267150402069, "learning_rate": 2.6123308809813395e-07, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 746 }, { "completion_length": 140.1428680419922, "epoch": 0.5227431770468859, "grad_norm": 1.7584214210510254, "kl": 0.2770542502403259, "learning_rate": 2.606422402470896e-07, "loss": 0.0003, "reward": 1.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 747 }, { "completion_length": 146.2857208251953, "epoch": 0.523442967109867, "grad_norm": 1.9394080638885498, "kl": 0.24726396799087524, "learning_rate": 2.600513328385463e-07, "loss": 0.0002, "reward": 1.4642857313156128, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3214285969734192, "step": 748 }, { "completion_length": 165.7857208251953, "epoch": 0.5241427571728482, "grad_norm": 1.1644587516784668, "kl": 0.2679723799228668, "learning_rate": 2.594603691794176e-07, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 749 }, { "completion_length": 189.1428680419922, "epoch": 0.5248425472358292, "grad_norm": 0.7964421510696411, "kl": 0.1589353382587433, "learning_rate": 2.588693525769315e-07, "loss": 0.0002, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 750 }, { "completion_length": 147.6428680419922, "epoch": 0.5255423372988104, "grad_norm": 1.1175942420959473, "kl": 0.24483604729175568, "learning_rate": 2.582782863386129e-07, "loss": 0.0002, "reward": 1.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 751 }, { "completion_length": 160.21429443359375, "epoch": 0.5262421273617914, "grad_norm": 0.007799674291163683, "kl": 0.19534777104854584, "learning_rate": 2.576871737722638e-07, "loss": 0.0002, "reward": 1.3571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.4285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 752 }, { "completion_length": 148.5, "epoch": 0.5269419174247726, "grad_norm": 1.3509938716888428, "kl": 0.2288237065076828, "learning_rate": 2.570960181859458e-07, "loss": 0.0002, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 753 }, { "completion_length": 133.92857360839844, "epoch": 0.5276417074877536, "grad_norm": 2.783106565475464, "kl": 0.2117948681116104, "learning_rate": 2.5650482288796134e-07, "loss": 0.0002, "reward": 1.6785714626312256, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 754 }, { "completion_length": 195.50001525878906, "epoch": 0.5283414975507348, "grad_norm": 0.8224034309387207, "kl": 0.17428778111934662, "learning_rate": 2.559135911868349e-07, "loss": 0.0002, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 755 }, { "completion_length": 143.42857360839844, "epoch": 0.5290412876137159, "grad_norm": 1.2765090465545654, "kl": 0.2426329404115677, "learning_rate": 2.553223263912949e-07, "loss": 0.0002, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 756 }, { "completion_length": 180.07144165039062, "epoch": 0.529741077676697, "grad_norm": 0.761875331401825, "kl": 0.16167525947093964, "learning_rate": 2.5473103181025476e-07, "loss": 0.0002, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 757 }, { "completion_length": 209.42857360839844, "epoch": 0.5304408677396781, "grad_norm": 0.7743602395057678, "kl": 0.21465730667114258, "learning_rate": 2.541397107527947e-07, "loss": 0.0002, "reward": 1.7857143878936768, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 758 }, { "completion_length": 152.1428680419922, "epoch": 0.5311406578026592, "grad_norm": 1.3296393156051636, "kl": 0.24849344789981842, "learning_rate": 2.5354836652814293e-07, "loss": 0.0002, "reward": 1.7857143878936768, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 759 }, { "completion_length": 179.92857360839844, "epoch": 0.5318404478656403, "grad_norm": 1.6183704137802124, "kl": 0.20579315721988678, "learning_rate": 2.529570024456576e-07, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 760 }, { "completion_length": 191.35714721679688, "epoch": 0.5325402379286214, "grad_norm": 0.9989681243896484, "kl": 0.1664944440126419, "learning_rate": 2.523656218148079e-07, "loss": 0.0002, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 761 }, { "completion_length": 181.71429443359375, "epoch": 0.5332400279916025, "grad_norm": 0.01457426231354475, "kl": 0.1968359351158142, "learning_rate": 2.5177422794515554e-07, "loss": 0.0002, "reward": 1.7857143878936768, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 762 }, { "completion_length": 155.57144165039062, "epoch": 0.5339398180545836, "grad_norm": 1.0912683010101318, "kl": 0.13575726747512817, "learning_rate": 2.5118282414633635e-07, "loss": 0.0001, "reward": 1.821428656578064, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 763 }, { "completion_length": 197.21429443359375, "epoch": 0.5346396081175647, "grad_norm": 1.3180564641952515, "kl": 0.14729838073253632, "learning_rate": 2.5059141372804167e-07, "loss": 0.0001, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 764 }, { "completion_length": 231.2857208251953, "epoch": 0.5353393981805459, "grad_norm": 1.2307592630386353, "kl": 0.1371045708656311, "learning_rate": 2.5e-07, "loss": 0.0001, "reward": 1.7500001192092896, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 765 }, { "completion_length": 194.2857208251953, "epoch": 0.5360391882435269, "grad_norm": 0.5052676200866699, "kl": 0.13933691382408142, "learning_rate": 2.4940858627195836e-07, "loss": 0.0001, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 766 }, { "completion_length": 181.85714721679688, "epoch": 0.5367389783065081, "grad_norm": 1.1939496994018555, "kl": 0.20680053532123566, "learning_rate": 2.488171758536637e-07, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 767 }, { "completion_length": 144.6428680419922, "epoch": 0.5374387683694891, "grad_norm": 1.9086456298828125, "kl": 0.18679527938365936, "learning_rate": 2.4822577205484444e-07, "loss": 0.0002, "reward": 1.571428656578064, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 768 }, { "completion_length": 175.6428680419922, "epoch": 0.5381385584324703, "grad_norm": 1.4252856969833374, "kl": 0.18739019334316254, "learning_rate": 2.4763437818519205e-07, "loss": 0.0002, "reward": 1.6428571939468384, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 769 }, { "completion_length": 196.92857360839844, "epoch": 0.5388383484954513, "grad_norm": 1.1461747884750366, "kl": 0.13078981637954712, "learning_rate": 2.4704299755434234e-07, "loss": 0.0001, "reward": 1.4642857313156128, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.5, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 770 }, { "completion_length": 127.85714721679688, "epoch": 0.5395381385584325, "grad_norm": 4.226094722747803, "kl": 0.21880467236042023, "learning_rate": 2.464516334718571e-07, "loss": 0.0002, "reward": 1.6428571939468384, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 771 }, { "completion_length": 143.07144165039062, "epoch": 0.5402379286214136, "grad_norm": 0.775764524936676, "kl": 0.20816804468631744, "learning_rate": 2.458602892472054e-07, "loss": 0.0002, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 772 }, { "completion_length": 175.07144165039062, "epoch": 0.5409377186843947, "grad_norm": 1.5386402606964111, "kl": 0.17499686777591705, "learning_rate": 2.452689681897453e-07, "loss": 0.0002, "reward": 1.7857143878936768, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 773 }, { "completion_length": 135.0, "epoch": 0.5416375087473758, "grad_norm": 1.0938345193862915, "kl": 0.22121502459049225, "learning_rate": 2.446776736087051e-07, "loss": 0.0002, "reward": 1.4285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.5, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 774 }, { "completion_length": 178.71429443359375, "epoch": 0.5423372988103569, "grad_norm": 1.667083740234375, "kl": 0.128261536359787, "learning_rate": 2.440864088131651e-07, "loss": 0.0001, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 775 }, { "completion_length": 156.85714721679688, "epoch": 0.543037088873338, "grad_norm": 0.008071779273450375, "kl": 0.1659298688173294, "learning_rate": 2.434951771120387e-07, "loss": 0.0002, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 776 }, { "completion_length": 161.07144165039062, "epoch": 0.5437368789363191, "grad_norm": 0.007303138263523579, "kl": 0.1527920812368393, "learning_rate": 2.429039818140542e-07, "loss": 0.0002, "reward": 1.7142858505249023, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 777 }, { "completion_length": 173.21429443359375, "epoch": 0.5444366689993002, "grad_norm": 0.9514021873474121, "kl": 0.1167156770825386, "learning_rate": 2.4231282622773623e-07, "loss": 0.0001, "reward": 1.6428571939468384, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 778 }, { "completion_length": 197.21429443359375, "epoch": 0.5451364590622814, "grad_norm": 1.1039326190948486, "kl": 0.18756325542926788, "learning_rate": 2.4172171366138714e-07, "loss": 0.0002, "reward": 1.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 779 }, { "completion_length": 193.6428680419922, "epoch": 0.5458362491252624, "grad_norm": 0.9844845533370972, "kl": 0.1270662099123001, "learning_rate": 2.4113064742306845e-07, "loss": 0.0001, "reward": 1.7500001192092896, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 780 }, { "completion_length": 126.0714340209961, "epoch": 0.5465360391882436, "grad_norm": 2.517832040786743, "kl": 0.21809427440166473, "learning_rate": 2.4053963082058243e-07, "loss": 0.0002, "reward": 1.6071429252624512, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 781 }, { "completion_length": 159.42857360839844, "epoch": 0.5472358292512246, "grad_norm": 1.8299363851547241, "kl": 0.14245009422302246, "learning_rate": 2.3994866716145365e-07, "loss": 0.0001, "reward": 1.4642857313156128, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.5, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 782 }, { "completion_length": 163.42857360839844, "epoch": 0.5479356193142058, "grad_norm": 0.732767641544342, "kl": 0.18744169175624847, "learning_rate": 2.3935775975291045e-07, "loss": 0.0002, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 783 }, { "completion_length": 137.2857208251953, "epoch": 0.5486354093771868, "grad_norm": 4.943038463592529, "kl": 0.2658807039260864, "learning_rate": 2.387669119018661e-07, "loss": 0.0003, "reward": 1.6785714626312256, "reward_std": 0.3535534143447876, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 784 }, { "completion_length": 186.57144165039062, "epoch": 0.549335199440168, "grad_norm": 0.005315029993653297, "kl": 0.11508393287658691, "learning_rate": 2.3817612691490086e-07, "loss": 0.0001, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 785 }, { "completion_length": 163.92857360839844, "epoch": 0.5500349895031491, "grad_norm": 0.8293732404708862, "kl": 0.18727651238441467, "learning_rate": 2.37585408098243e-07, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 786 }, { "completion_length": 142.71429443359375, "epoch": 0.5507347795661302, "grad_norm": 1.1716316938400269, "kl": 0.25468987226486206, "learning_rate": 2.3699475875775057e-07, "loss": 0.0003, "reward": 1.6071429252624512, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 787 }, { "completion_length": 221.21429443359375, "epoch": 0.5514345696291113, "grad_norm": 0.023676056414842606, "kl": 0.1703471541404724, "learning_rate": 2.3640418219889274e-07, "loss": 0.0002, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 788 }, { "completion_length": 168.7857208251953, "epoch": 0.5521343596920923, "grad_norm": 0.007940025068819523, "kl": 0.14952266216278076, "learning_rate": 2.3581368172673148e-07, "loss": 0.0001, "reward": 1.7142858505249023, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 789 }, { "completion_length": 211.2857208251953, "epoch": 0.5528341497550735, "grad_norm": 0.4833106994628906, "kl": 0.14444862306118011, "learning_rate": 2.352232606459029e-07, "loss": 0.0001, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 790 }, { "completion_length": 173.50001525878906, "epoch": 0.5535339398180545, "grad_norm": 0.8782841563224792, "kl": 0.11432904750108719, "learning_rate": 2.346329222605987e-07, "loss": 0.0001, "reward": 1.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 791 }, { "completion_length": 163.35714721679688, "epoch": 0.5542337298810357, "grad_norm": 1.0878326892852783, "kl": 0.14284256100654602, "learning_rate": 2.3404266987454788e-07, "loss": 0.0001, "reward": 1.6428571939468384, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 792 }, { "completion_length": 160.21429443359375, "epoch": 0.5549335199440167, "grad_norm": 1.8105785846710205, "kl": 0.2008804976940155, "learning_rate": 2.3345250679099828e-07, "loss": 0.0002, "reward": 1.571428656578064, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 793 }, { "completion_length": 145.0, "epoch": 0.5556333100069979, "grad_norm": 1.4133714437484741, "kl": 0.17985039949417114, "learning_rate": 2.3286243631269778e-07, "loss": 0.0002, "reward": 1.4642857313156128, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.5, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 794 }, { "completion_length": 194.50001525878906, "epoch": 0.556333100069979, "grad_norm": 0.860791802406311, "kl": 0.14414426684379578, "learning_rate": 2.3227246174187614e-07, "loss": 0.0001, "reward": 1.5000001192092896, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.5, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 795 }, { "completion_length": 136.85714721679688, "epoch": 0.5570328901329601, "grad_norm": 1.6177887916564941, "kl": 0.22329559922218323, "learning_rate": 2.316825863802262e-07, "loss": 0.0002, "reward": 1.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 796 }, { "completion_length": 186.57144165039062, "epoch": 0.5577326801959412, "grad_norm": 1.6921424865722656, "kl": 0.1834661364555359, "learning_rate": 2.310928135288859e-07, "loss": 0.0002, "reward": 1.7500001192092896, "reward_std": 0.3535533845424652, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 797 }, { "completion_length": 150.1428680419922, "epoch": 0.5584324702589223, "grad_norm": 0.03219272941350937, "kl": 0.2264881134033203, "learning_rate": 2.305031464884191e-07, "loss": 0.0002, "reward": 1.6428571939468384, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 798 }, { "completion_length": 143.0, "epoch": 0.5591322603219034, "grad_norm": 1.8323851823806763, "kl": 0.21593062579631805, "learning_rate": 2.2991358855879798e-07, "loss": 0.0002, "reward": 1.6785714626312256, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 799 }, { "completion_length": 146.85714721679688, "epoch": 0.5598320503848845, "grad_norm": 0.7887182235717773, "kl": 0.19635961949825287, "learning_rate": 2.2932414303938374e-07, "loss": 0.0002, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 800 }, { "completion_length": 129.85714721679688, "epoch": 0.5605318404478656, "grad_norm": 1.2324298620224, "kl": 0.2163587212562561, "learning_rate": 2.2873481322890862e-07, "loss": 0.0002, "reward": 1.7500001192092896, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 801 }, { "completion_length": 177.42857360839844, "epoch": 0.5612316305108468, "grad_norm": 0.4080323576927185, "kl": 0.15288673341274261, "learning_rate": 2.2814560242545723e-07, "loss": 0.0002, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 802 }, { "completion_length": 195.57144165039062, "epoch": 0.5619314205738278, "grad_norm": 0.9656206965446472, "kl": 0.14835630357265472, "learning_rate": 2.2755651392644839e-07, "loss": 0.0001, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 803 }, { "completion_length": 156.21429443359375, "epoch": 0.562631210636809, "grad_norm": 2.2364025115966797, "kl": 0.2269601970911026, "learning_rate": 2.2696755102861628e-07, "loss": 0.0002, "reward": 1.7142858505249023, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 804 }, { "completion_length": 175.07144165039062, "epoch": 0.56333100069979, "grad_norm": 0.47596055269241333, "kl": 0.1697089821100235, "learning_rate": 2.2637871702799219e-07, "loss": 0.0002, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 805 }, { "completion_length": 148.42857360839844, "epoch": 0.5640307907627712, "grad_norm": 2.0827200412750244, "kl": 0.1916750967502594, "learning_rate": 2.2579001521988603e-07, "loss": 0.0002, "reward": 1.6428571939468384, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 806 }, { "completion_length": 213.71429443359375, "epoch": 0.5647305808257522, "grad_norm": 0.012787840329110622, "kl": 0.14254257082939148, "learning_rate": 2.2520144889886818e-07, "loss": 0.0001, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 807 }, { "completion_length": 230.00001525878906, "epoch": 0.5654303708887334, "grad_norm": 0.8881184458732605, "kl": 0.083969347178936, "learning_rate": 2.2461302135875032e-07, "loss": 0.0001, "reward": 1.821428656578064, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 808 }, { "completion_length": 180.1428680419922, "epoch": 0.5661301609517145, "grad_norm": 0.9786560535430908, "kl": 0.1263761669397354, "learning_rate": 2.240247358925679e-07, "loss": 0.0001, "reward": 1.7857143878936768, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 809 }, { "completion_length": 132.71429443359375, "epoch": 0.5668299510146956, "grad_norm": 1.1269447803497314, "kl": 0.17150788009166718, "learning_rate": 2.2343659579256106e-07, "loss": 0.0002, "reward": 1.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 810 }, { "completion_length": 165.21429443359375, "epoch": 0.5675297410776767, "grad_norm": 2.079000473022461, "kl": 0.18628434836864471, "learning_rate": 2.2284860435015646e-07, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 811 }, { "completion_length": 126.21429443359375, "epoch": 0.5682295311406578, "grad_norm": 1.7653608322143555, "kl": 0.1837475448846817, "learning_rate": 2.2226076485594876e-07, "loss": 0.0002, "reward": 1.6071429252624512, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 812 }, { "completion_length": 206.50001525878906, "epoch": 0.5689293212036389, "grad_norm": 0.5196667313575745, "kl": 0.1711001843214035, "learning_rate": 2.2167308059968255e-07, "loss": 0.0002, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 813 }, { "completion_length": 252.00001525878906, "epoch": 0.56962911126662, "grad_norm": 1.0780601501464844, "kl": 0.10021327435970306, "learning_rate": 2.2108555487023325e-07, "loss": 0.0001, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 814 }, { "completion_length": 186.71429443359375, "epoch": 0.5703289013296011, "grad_norm": 1.6806303262710571, "kl": 0.19783681631088257, "learning_rate": 2.2049819095558943e-07, "loss": 0.0002, "reward": 1.821428656578064, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 815 }, { "completion_length": 173.85714721679688, "epoch": 0.5710286913925823, "grad_norm": 1.1859450340270996, "kl": 0.1451978087425232, "learning_rate": 2.1991099214283386e-07, "loss": 0.0001, "reward": 1.8928571939468384, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 816 }, { "completion_length": 197.57144165039062, "epoch": 0.5717284814555633, "grad_norm": 0.005376486107707024, "kl": 0.11479634046554565, "learning_rate": 2.1932396171812557e-07, "loss": 0.0001, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 817 }, { "completion_length": 196.35714721679688, "epoch": 0.5724282715185445, "grad_norm": 0.5410878658294678, "kl": 0.11745567619800568, "learning_rate": 2.1873710296668102e-07, "loss": 0.0001, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 818 }, { "completion_length": 187.2857208251953, "epoch": 0.5731280615815255, "grad_norm": 0.4786253571510315, "kl": 0.23107686638832092, "learning_rate": 2.1815041917275617e-07, "loss": 0.0002, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 819 }, { "completion_length": 175.7857208251953, "epoch": 0.5738278516445067, "grad_norm": 1.0638198852539062, "kl": 0.11271245032548904, "learning_rate": 2.1756391361962755e-07, "loss": 0.0001, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 820 }, { "completion_length": 188.00001525878906, "epoch": 0.5745276417074877, "grad_norm": 1.1761832237243652, "kl": 0.15078361332416534, "learning_rate": 2.1697758958957447e-07, "loss": 0.0002, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 821 }, { "completion_length": 160.2857208251953, "epoch": 0.5752274317704689, "grad_norm": 1.535273790359497, "kl": 0.20061665773391724, "learning_rate": 2.1639145036386017e-07, "loss": 0.0002, "reward": 1.7857143878936768, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 822 }, { "completion_length": 187.7857208251953, "epoch": 0.57592722183345, "grad_norm": 0.8115896582603455, "kl": 0.16780106723308563, "learning_rate": 2.1580549922271387e-07, "loss": 0.0002, "reward": 1.8928571939468384, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 823 }, { "completion_length": 132.21429443359375, "epoch": 0.5766270118964311, "grad_norm": 1.233547329902649, "kl": 0.21519875526428223, "learning_rate": 2.1521973944531208e-07, "loss": 0.0002, "reward": 1.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 824 }, { "completion_length": 140.5, "epoch": 0.5773268019594122, "grad_norm": 1.8561373949050903, "kl": 0.24315886199474335, "learning_rate": 2.1463417430976038e-07, "loss": 0.0002, "reward": 1.7500001192092896, "reward_std": 0.3535533845424652, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 825 }, { "completion_length": 173.2857208251953, "epoch": 0.5780265920223933, "grad_norm": 1.0453978776931763, "kl": 0.1836603432893753, "learning_rate": 2.1404880709307505e-07, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 826 }, { "completion_length": 157.42857360839844, "epoch": 0.5787263820853744, "grad_norm": 1.165291428565979, "kl": 0.17072586715221405, "learning_rate": 2.1346364107116488e-07, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 827 }, { "completion_length": 176.2857208251953, "epoch": 0.5794261721483555, "grad_norm": 1.318305253982544, "kl": 0.22624251246452332, "learning_rate": 2.1287867951881256e-07, "loss": 0.0002, "reward": 1.5357143878936768, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 828 }, { "completion_length": 102.35714721679688, "epoch": 0.5801259622113366, "grad_norm": 2.365300178527832, "kl": 0.27987831830978394, "learning_rate": 2.1229392570965654e-07, "loss": 0.0003, "reward": 1.5000001192092896, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 829 }, { "completion_length": 199.71429443359375, "epoch": 0.5808257522743177, "grad_norm": 0.012346138246357441, "kl": 0.15743298828601837, "learning_rate": 2.1170938291617266e-07, "loss": 0.0002, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 830 }, { "completion_length": 148.7857208251953, "epoch": 0.5815255423372988, "grad_norm": 1.0664339065551758, "kl": 0.24007420241832733, "learning_rate": 2.1112505440965607e-07, "loss": 0.0002, "reward": 1.6071429252624512, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 831 }, { "completion_length": 155.35714721679688, "epoch": 0.58222533240028, "grad_norm": 0.39355382323265076, "kl": 0.22916604578495026, "learning_rate": 2.1054094346020217e-07, "loss": 0.0002, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 832 }, { "completion_length": 176.92857360839844, "epoch": 0.582925122463261, "grad_norm": 0.012202534824609756, "kl": 0.227097749710083, "learning_rate": 2.0995705333668944e-07, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 833 }, { "completion_length": 160.1428680419922, "epoch": 0.5836249125262422, "grad_norm": 1.7501753568649292, "kl": 0.2093331515789032, "learning_rate": 2.0937338730676016e-07, "loss": 0.0002, "reward": 1.821428656578064, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 834 }, { "completion_length": 207.1428680419922, "epoch": 0.5843247025892232, "grad_norm": 1.0226585865020752, "kl": 0.15040700137615204, "learning_rate": 2.0878994863680263e-07, "loss": 0.0002, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 835 }, { "completion_length": 128.85714721679688, "epoch": 0.5850244926522044, "grad_norm": 3.3700737953186035, "kl": 0.29976505041122437, "learning_rate": 2.0820674059193274e-07, "loss": 0.0003, "reward": 1.7857143878936768, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 836 }, { "completion_length": 144.21429443359375, "epoch": 0.5857242827151854, "grad_norm": 1.5458950996398926, "kl": 0.20280548930168152, "learning_rate": 2.076237664359758e-07, "loss": 0.0002, "reward": 1.5000001192092896, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.5, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 837 }, { "completion_length": 158.92857360839844, "epoch": 0.5864240727781665, "grad_norm": 0.011991043575108051, "kl": 0.2772923409938812, "learning_rate": 2.0704102943144818e-07, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 838 }, { "completion_length": 215.1428680419922, "epoch": 0.5871238628411477, "grad_norm": 0.004565827548503876, "kl": 0.14662621915340424, "learning_rate": 2.0645853283953896e-07, "loss": 0.0001, "reward": 1.571428656578064, "reward_std": 0.0, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 839 }, { "completion_length": 161.7857208251953, "epoch": 0.5878236529041287, "grad_norm": 1.6561652421951294, "kl": 0.17183317244052887, "learning_rate": 2.0587627992009188e-07, "loss": 0.0002, "reward": 1.7142858505249023, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 840 }, { "completion_length": 226.85714721679688, "epoch": 0.5885234429671099, "grad_norm": 0.4825284481048584, "kl": 0.17638690769672394, "learning_rate": 2.0529427393158703e-07, "loss": 0.0002, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 841 }, { "completion_length": 185.1428680419922, "epoch": 0.5892232330300909, "grad_norm": 0.9059503078460693, "kl": 0.17314548790454865, "learning_rate": 2.0471251813112257e-07, "loss": 0.0002, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 842 }, { "completion_length": 165.57144165039062, "epoch": 0.5899230230930721, "grad_norm": 1.532495379447937, "kl": 0.18971356749534607, "learning_rate": 2.0413101577439653e-07, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 843 }, { "completion_length": 176.92857360839844, "epoch": 0.5906228131560531, "grad_norm": 1.5365586280822754, "kl": 0.2407262623310089, "learning_rate": 2.035497701156885e-07, "loss": 0.0002, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 844 }, { "completion_length": 176.35714721679688, "epoch": 0.5913226032190343, "grad_norm": 0.8651817440986633, "kl": 0.20362547039985657, "learning_rate": 2.029687844078416e-07, "loss": 0.0002, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 845 }, { "completion_length": 185.92857360839844, "epoch": 0.5920223932820154, "grad_norm": 0.8897268176078796, "kl": 0.16057699918746948, "learning_rate": 2.0238806190224418e-07, "loss": 0.0002, "reward": 1.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 846 }, { "completion_length": 256.0, "epoch": 0.5927221833449965, "grad_norm": 0.008452930487692356, "kl": 0.1544729322195053, "learning_rate": 2.018076058488115e-07, "loss": 0.0002, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 847 }, { "completion_length": 152.07144165039062, "epoch": 0.5934219734079776, "grad_norm": 2.349240303039551, "kl": 0.24645240604877472, "learning_rate": 2.0122741949596793e-07, "loss": 0.0002, "reward": 1.7142858505249023, "reward_std": 0.4040610194206238, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 848 }, { "completion_length": 130.6428680419922, "epoch": 0.5941217634709587, "grad_norm": 1.8523670434951782, "kl": 0.22494810819625854, "learning_rate": 2.0064750609062826e-07, "loss": 0.0002, "reward": 1.6785714626312256, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 849 }, { "completion_length": 179.7857208251953, "epoch": 0.5948215535339398, "grad_norm": 1.866913080215454, "kl": 0.19455549120903015, "learning_rate": 2.000678688781798e-07, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 850 }, { "completion_length": 240.07144165039062, "epoch": 0.5955213435969209, "grad_norm": 1.1706783771514893, "kl": 0.1493280827999115, "learning_rate": 1.9948851110246427e-07, "loss": 0.0001, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 851 }, { "completion_length": 131.6428680419922, "epoch": 0.596221133659902, "grad_norm": 1.4381402730941772, "kl": 0.33166804909706116, "learning_rate": 1.9890943600575966e-07, "loss": 0.0003, "reward": 1.7500001192092896, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 852 }, { "completion_length": 149.7857208251953, "epoch": 0.5969209237228832, "grad_norm": 0.6219565272331238, "kl": 0.20014925301074982, "learning_rate": 1.9833064682876175e-07, "loss": 0.0002, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 853 }, { "completion_length": 159.6428680419922, "epoch": 0.5976207137858642, "grad_norm": 0.8595949411392212, "kl": 0.19390232861042023, "learning_rate": 1.9775214681056642e-07, "loss": 0.0002, "reward": 1.7142858505249023, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 854 }, { "completion_length": 186.92857360839844, "epoch": 0.5983205038488454, "grad_norm": 0.7010074853897095, "kl": 0.1672981232404709, "learning_rate": 1.971739391886512e-07, "loss": 0.0002, "reward": 1.821428656578064, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 855 }, { "completion_length": 175.85714721679688, "epoch": 0.5990202939118264, "grad_norm": 1.0448294878005981, "kl": 0.227269247174263, "learning_rate": 1.9659602719885736e-07, "loss": 0.0002, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 856 }, { "completion_length": 186.92857360839844, "epoch": 0.5997200839748076, "grad_norm": 0.6769022941589355, "kl": 0.16444669663906097, "learning_rate": 1.9601841407537155e-07, "loss": 0.0002, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 857 }, { "completion_length": 169.42857360839844, "epoch": 0.6004198740377886, "grad_norm": 0.008497945964336395, "kl": 0.17197445034980774, "learning_rate": 1.9544110305070825e-07, "loss": 0.0002, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 858 }, { "completion_length": 165.85714721679688, "epoch": 0.6011196641007698, "grad_norm": 1.0927510261535645, "kl": 0.18087631464004517, "learning_rate": 1.9486409735569084e-07, "loss": 0.0002, "reward": 1.7500001192092896, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 859 }, { "completion_length": 166.6428680419922, "epoch": 0.6018194541637508, "grad_norm": 1.02528715133667, "kl": 0.19241982698440552, "learning_rate": 1.9428740021943425e-07, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 860 }, { "completion_length": 214.71429443359375, "epoch": 0.602519244226732, "grad_norm": 1.0351279973983765, "kl": 0.149065300822258, "learning_rate": 1.937110148693265e-07, "loss": 0.0001, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 861 }, { "completion_length": 209.35714721679688, "epoch": 0.6032190342897131, "grad_norm": 1.3731406927108765, "kl": 0.1712058186531067, "learning_rate": 1.931349445310108e-07, "loss": 0.0002, "reward": 1.7142858505249023, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 862 }, { "completion_length": 194.7857208251953, "epoch": 0.6039188243526942, "grad_norm": 1.7344189882278442, "kl": 0.16901424527168274, "learning_rate": 1.9255919242836752e-07, "loss": 0.0002, "reward": 1.7857143878936768, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 863 }, { "completion_length": 120.5714340209961, "epoch": 0.6046186144156753, "grad_norm": 0.026887858286499977, "kl": 0.3052825331687927, "learning_rate": 1.919837617834959e-07, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 864 }, { "completion_length": 172.00001525878906, "epoch": 0.6053184044786564, "grad_norm": 0.9924876689910889, "kl": 0.16669687628746033, "learning_rate": 1.9140865581669623e-07, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 865 }, { "completion_length": 168.21429443359375, "epoch": 0.6060181945416375, "grad_norm": 1.088887333869934, "kl": 0.17015387117862701, "learning_rate": 1.9083387774645216e-07, "loss": 0.0002, "reward": 1.7500001192092896, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 866 }, { "completion_length": 165.71429443359375, "epoch": 0.6067179846046186, "grad_norm": 0.0055878073908388615, "kl": 0.1800890862941742, "learning_rate": 1.9025943078941163e-07, "loss": 0.0002, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 867 }, { "completion_length": 224.7857208251953, "epoch": 0.6074177746675997, "grad_norm": 1.0890886783599854, "kl": 0.1475372165441513, "learning_rate": 1.8968531816037016e-07, "loss": 0.0001, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 868 }, { "completion_length": 164.0, "epoch": 0.6081175647305809, "grad_norm": 1.215153455734253, "kl": 0.21680432558059692, "learning_rate": 1.8911154307225203e-07, "loss": 0.0002, "reward": 1.7142858505249023, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 869 }, { "completion_length": 144.0, "epoch": 0.6088173547935619, "grad_norm": 1.794198751449585, "kl": 0.303621768951416, "learning_rate": 1.8853810873609253e-07, "loss": 0.0003, "reward": 1.571428656578064, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 870 }, { "completion_length": 158.21429443359375, "epoch": 0.6095171448565431, "grad_norm": 1.3698910474777222, "kl": 0.20273838937282562, "learning_rate": 1.8796501836101996e-07, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 871 }, { "completion_length": 98.71428680419922, "epoch": 0.6102169349195241, "grad_norm": 1.2380337715148926, "kl": 0.3376390039920807, "learning_rate": 1.8739227515423782e-07, "loss": 0.0003, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 872 }, { "completion_length": 152.85714721679688, "epoch": 0.6109167249825053, "grad_norm": 1.6009806394577026, "kl": 0.18066494166851044, "learning_rate": 1.8681988232100672e-07, "loss": 0.0002, "reward": 1.6071429252624512, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 873 }, { "completion_length": 171.1428680419922, "epoch": 0.6116165150454863, "grad_norm": 0.717288613319397, "kl": 0.21399514377117157, "learning_rate": 1.8624784306462626e-07, "loss": 0.0002, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 874 }, { "completion_length": 232.6428680419922, "epoch": 0.6123163051084675, "grad_norm": 0.37405624985694885, "kl": 0.15109014511108398, "learning_rate": 1.8567616058641753e-07, "loss": 0.0002, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 875 }, { "completion_length": 191.1428680419922, "epoch": 0.6130160951714486, "grad_norm": 1.0392940044403076, "kl": 0.2378709316253662, "learning_rate": 1.8510483808570492e-07, "loss": 0.0002, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 876 }, { "completion_length": 118.71429443359375, "epoch": 0.6137158852344297, "grad_norm": 1.1159281730651855, "kl": 0.3038044571876526, "learning_rate": 1.845338787597983e-07, "loss": 0.0003, "reward": 1.6785714626312256, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 877 }, { "completion_length": 174.50001525878906, "epoch": 0.6144156752974108, "grad_norm": 0.8659864068031311, "kl": 0.1927037537097931, "learning_rate": 1.8396328580397503e-07, "loss": 0.0002, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 878 }, { "completion_length": 175.50001525878906, "epoch": 0.6151154653603919, "grad_norm": 0.008233739994466305, "kl": 0.16364799439907074, "learning_rate": 1.83393062411462e-07, "loss": 0.0002, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 879 }, { "completion_length": 222.4285888671875, "epoch": 0.615815255423373, "grad_norm": 0.9414530992507935, "kl": 0.13570350408554077, "learning_rate": 1.828232117734182e-07, "loss": 0.0001, "reward": 1.7500001192092896, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 880 }, { "completion_length": 167.5, "epoch": 0.616515045486354, "grad_norm": 0.9398598670959473, "kl": 0.17222319543361664, "learning_rate": 1.8225373707891627e-07, "loss": 0.0002, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 881 }, { "completion_length": 144.2857208251953, "epoch": 0.6172148355493352, "grad_norm": 1.8485746383666992, "kl": 0.2285045087337494, "learning_rate": 1.816846415149253e-07, "loss": 0.0002, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 882 }, { "completion_length": 134.7857208251953, "epoch": 0.6179146256123164, "grad_norm": 1.7787970304489136, "kl": 0.2089158594608307, "learning_rate": 1.8111592826629233e-07, "loss": 0.0002, "reward": 1.821428656578064, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 883 }, { "completion_length": 199.85714721679688, "epoch": 0.6186144156752974, "grad_norm": 1.3203638792037964, "kl": 0.15567763149738312, "learning_rate": 1.8054760051572492e-07, "loss": 0.0002, "reward": 1.7142858505249023, "reward_std": 0.4040610194206238, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 884 }, { "completion_length": 162.0, "epoch": 0.6193142057382786, "grad_norm": 0.7157139778137207, "kl": 0.19189192354679108, "learning_rate": 1.7997966144377326e-07, "loss": 0.0002, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 885 }, { "completion_length": 151.2857208251953, "epoch": 0.6200139958012596, "grad_norm": 1.7402904033660889, "kl": 0.20553071796894073, "learning_rate": 1.7941211422881243e-07, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 886 }, { "completion_length": 142.7857208251953, "epoch": 0.6207137858642408, "grad_norm": 1.029819130897522, "kl": 0.23870159685611725, "learning_rate": 1.7884496204702455e-07, "loss": 0.0002, "reward": 1.6071429252624512, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 887 }, { "completion_length": 151.92857360839844, "epoch": 0.6214135759272218, "grad_norm": 2.511532783508301, "kl": 0.20830222964286804, "learning_rate": 1.7827820807238083e-07, "loss": 0.0002, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 888 }, { "completion_length": 148.07144165039062, "epoch": 0.622113365990203, "grad_norm": 1.6617085933685303, "kl": 0.22244317829608917, "learning_rate": 1.7771185547662414e-07, "loss": 0.0002, "reward": 1.6785714626312256, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 889 }, { "completion_length": 167.92857360839844, "epoch": 0.622813156053184, "grad_norm": 0.4730815291404724, "kl": 0.13272641599178314, "learning_rate": 1.7714590742925123e-07, "loss": 0.0001, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 890 }, { "completion_length": 226.2857208251953, "epoch": 0.6235129461161651, "grad_norm": 0.04165325313806534, "kl": 0.2448219656944275, "learning_rate": 1.765803670974944e-07, "loss": 0.0002, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 891 }, { "completion_length": 162.42857360839844, "epoch": 0.6242127361791463, "grad_norm": 1.5527950525283813, "kl": 0.19318516552448273, "learning_rate": 1.7601523764630474e-07, "loss": 0.0002, "reward": 1.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 892 }, { "completion_length": 169.07144165039062, "epoch": 0.6249125262421273, "grad_norm": 1.2021851539611816, "kl": 0.19472841918468475, "learning_rate": 1.7545052223833367e-07, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 893 }, { "completion_length": 137.07144165039062, "epoch": 0.6256123163051085, "grad_norm": 1.1029552221298218, "kl": 0.20346197485923767, "learning_rate": 1.748862240339155e-07, "loss": 0.0002, "reward": 1.2142857313156128, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 894 }, { "completion_length": 240.07144165039062, "epoch": 0.6263121063680895, "grad_norm": 1.951439619064331, "kl": 0.1709141880273819, "learning_rate": 1.7432234619104977e-07, "loss": 0.0002, "reward": 1.6428571939468384, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 895 }, { "completion_length": 167.6428680419922, "epoch": 0.6270118964310707, "grad_norm": 0.6033363938331604, "kl": 0.24781446158885956, "learning_rate": 1.7375889186538356e-07, "loss": 0.0002, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 896 }, { "completion_length": 161.2857208251953, "epoch": 0.6277116864940517, "grad_norm": 1.191978096961975, "kl": 0.19380377233028412, "learning_rate": 1.7319586421019381e-07, "loss": 0.0002, "reward": 1.7857143878936768, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 897 }, { "completion_length": 196.57144165039062, "epoch": 0.6284114765570329, "grad_norm": 0.006588555872440338, "kl": 0.15645967423915863, "learning_rate": 1.7263326637636955e-07, "loss": 0.0002, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 898 }, { "completion_length": 156.42857360839844, "epoch": 0.629111266620014, "grad_norm": 1.2757123708724976, "kl": 0.21630361676216125, "learning_rate": 1.7207110151239445e-07, "loss": 0.0002, "reward": 1.571428656578064, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 899 }, { "completion_length": 180.85714721679688, "epoch": 0.6298110566829951, "grad_norm": 0.9167988896369934, "kl": 0.22591190040111542, "learning_rate": 1.715093727643293e-07, "loss": 0.0002, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 900 }, { "completion_length": 159.07144165039062, "epoch": 0.6305108467459762, "grad_norm": 1.544127106666565, "kl": 0.17687027156352997, "learning_rate": 1.70948083275794e-07, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 901 }, { "completion_length": 189.35714721679688, "epoch": 0.6312106368089573, "grad_norm": 0.010835451073944569, "kl": 0.18956896662712097, "learning_rate": 1.7038723618795038e-07, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 902 }, { "completion_length": 137.0, "epoch": 0.6319104268719384, "grad_norm": 0.018081799149513245, "kl": 0.23992720246315002, "learning_rate": 1.698268346394843e-07, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 903 }, { "completion_length": 164.5, "epoch": 0.6326102169349195, "grad_norm": 1.500994324684143, "kl": 0.15611298382282257, "learning_rate": 1.6926688176658834e-07, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 904 }, { "completion_length": 131.07144165039062, "epoch": 0.6333100069979006, "grad_norm": 1.0094021558761597, "kl": 0.24395494163036346, "learning_rate": 1.687073807029441e-07, "loss": 0.0002, "reward": 1.5000001192092896, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.5, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 905 }, { "completion_length": 170.7857208251953, "epoch": 0.6340097970608818, "grad_norm": 1.2373005151748657, "kl": 0.1788647472858429, "learning_rate": 1.681483345797048e-07, "loss": 0.0002, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 906 }, { "completion_length": 121.78572082519531, "epoch": 0.6347095871238628, "grad_norm": 1.374475359916687, "kl": 0.24508827924728394, "learning_rate": 1.6758974652547747e-07, "loss": 0.0002, "reward": 1.6428571939468384, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 907 }, { "completion_length": 172.21429443359375, "epoch": 0.635409377186844, "grad_norm": 0.7142678499221802, "kl": 0.16253429651260376, "learning_rate": 1.6703161966630587e-07, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 908 }, { "completion_length": 258.9285888671875, "epoch": 0.636109167249825, "grad_norm": 0.019807111471891403, "kl": 0.14491768181324005, "learning_rate": 1.6647395712565254e-07, "loss": 0.0001, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 909 }, { "completion_length": 181.57144165039062, "epoch": 0.6368089573128062, "grad_norm": 0.9091986417770386, "kl": 0.1761297881603241, "learning_rate": 1.659167620243817e-07, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 910 }, { "completion_length": 159.71429443359375, "epoch": 0.6375087473757872, "grad_norm": 0.7970837950706482, "kl": 0.15495167672634125, "learning_rate": 1.653600374807416e-07, "loss": 0.0002, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 911 }, { "completion_length": 155.42857360839844, "epoch": 0.6382085374387684, "grad_norm": 1.8701930046081543, "kl": 0.25284096598625183, "learning_rate": 1.64803786610347e-07, "loss": 0.0003, "reward": 1.7142858505249023, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 912 }, { "completion_length": 172.1428680419922, "epoch": 0.6389083275017495, "grad_norm": 1.6203675270080566, "kl": 0.15912678837776184, "learning_rate": 1.6424801252616183e-07, "loss": 0.0002, "reward": 1.6071429252624512, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 913 }, { "completion_length": 176.92857360839844, "epoch": 0.6396081175647306, "grad_norm": 1.2082090377807617, "kl": 0.31424060463905334, "learning_rate": 1.6369271833848203e-07, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 914 }, { "completion_length": 235.6428680419922, "epoch": 0.6403079076277117, "grad_norm": 0.6299129128456116, "kl": 0.10698655992746353, "learning_rate": 1.6313790715491737e-07, "loss": 0.0001, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 915 }, { "completion_length": 112.00000762939453, "epoch": 0.6410076976906928, "grad_norm": 2.1483614444732666, "kl": 0.2961471676826477, "learning_rate": 1.625835820803751e-07, "loss": 0.0003, "reward": 1.4642857313156128, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.5, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 916 }, { "completion_length": 156.42857360839844, "epoch": 0.6417074877536739, "grad_norm": 0.010090836323797703, "kl": 0.16118846833705902, "learning_rate": 1.6202974621704174e-07, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 917 }, { "completion_length": 208.57144165039062, "epoch": 0.642407277816655, "grad_norm": 0.007181811146438122, "kl": 0.14046595990657806, "learning_rate": 1.6147640266436612e-07, "loss": 0.0001, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 918 }, { "completion_length": 137.71429443359375, "epoch": 0.6431070678796361, "grad_norm": 1.5873363018035889, "kl": 0.19496656954288483, "learning_rate": 1.6092355451904182e-07, "loss": 0.0002, "reward": 1.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 919 }, { "completion_length": 131.1428680419922, "epoch": 0.6438068579426172, "grad_norm": 0.058042287826538086, "kl": 0.31744757294654846, "learning_rate": 1.6037120487499022e-07, "loss": 0.0003, "reward": 1.7142858505249023, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 920 }, { "completion_length": 193.07144165039062, "epoch": 0.6445066480055983, "grad_norm": 0.484791100025177, "kl": 0.19058968126773834, "learning_rate": 1.5981935682334263e-07, "loss": 0.0002, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 921 }, { "completion_length": 182.50001525878906, "epoch": 0.6452064380685795, "grad_norm": 0.013481308706104755, "kl": 0.1722410023212433, "learning_rate": 1.5926801345242345e-07, "loss": 0.0002, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 922 }, { "completion_length": 185.21429443359375, "epoch": 0.6459062281315605, "grad_norm": 1.4077775478363037, "kl": 0.2380852848291397, "learning_rate": 1.5871717784773258e-07, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 923 }, { "completion_length": 200.2857208251953, "epoch": 0.6466060181945417, "grad_norm": 1.437928557395935, "kl": 0.18210062384605408, "learning_rate": 1.5816685309192858e-07, "loss": 0.0002, "reward": 1.6785714626312256, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 924 }, { "completion_length": 121.71429443359375, "epoch": 0.6473058082575227, "grad_norm": 1.5237213373184204, "kl": 0.22129568457603455, "learning_rate": 1.5761704226481076e-07, "loss": 0.0002, "reward": 1.6071429252624512, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 925 }, { "completion_length": 193.1428680419922, "epoch": 0.6480055983205039, "grad_norm": 0.0045374599285423756, "kl": 0.1180194839835167, "learning_rate": 1.570677484433026e-07, "loss": 0.0001, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 926 }, { "completion_length": 147.35714721679688, "epoch": 0.6487053883834849, "grad_norm": 1.773646593093872, "kl": 0.20480181276798248, "learning_rate": 1.565189747014341e-07, "loss": 0.0002, "reward": 1.7857143878936768, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 927 }, { "completion_length": 163.85714721679688, "epoch": 0.6494051784464661, "grad_norm": 2.0437304973602295, "kl": 0.21040508151054382, "learning_rate": 1.5597072411032468e-07, "loss": 0.0002, "reward": 1.571428656578064, "reward_std": 0.4040610194206238, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 928 }, { "completion_length": 171.35714721679688, "epoch": 0.6501049685094472, "grad_norm": 1.009155035018921, "kl": 0.15763044357299805, "learning_rate": 1.5542299973816626e-07, "loss": 0.0002, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 929 }, { "completion_length": 150.92857360839844, "epoch": 0.6508047585724283, "grad_norm": 0.9501484036445618, "kl": 0.22952969372272491, "learning_rate": 1.5487580465020557e-07, "loss": 0.0002, "reward": 1.821428656578064, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 930 }, { "completion_length": 148.35714721679688, "epoch": 0.6515045486354094, "grad_norm": 0.017871538177132607, "kl": 0.23398879170417786, "learning_rate": 1.5432914190872756e-07, "loss": 0.0002, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 931 }, { "completion_length": 207.7857208251953, "epoch": 0.6522043386983905, "grad_norm": 2.1803815364837646, "kl": 0.1370982676744461, "learning_rate": 1.5378301457303782e-07, "loss": 0.0001, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 932 }, { "completion_length": 188.57144165039062, "epoch": 0.6529041287613716, "grad_norm": 0.0056706503964960575, "kl": 0.14348086714744568, "learning_rate": 1.532374256994457e-07, "loss": 0.0001, "reward": 1.7142858505249023, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 933 }, { "completion_length": 201.50001525878906, "epoch": 0.6536039188243526, "grad_norm": 0.008086386136710644, "kl": 0.1556430459022522, "learning_rate": 1.5269237834124712e-07, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 934 }, { "completion_length": 175.21429443359375, "epoch": 0.6543037088873338, "grad_norm": 1.2199071645736694, "kl": 0.15192537009716034, "learning_rate": 1.521478755487075e-07, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 935 }, { "completion_length": 177.50001525878906, "epoch": 0.655003498950315, "grad_norm": 0.8560691475868225, "kl": 0.1556873768568039, "learning_rate": 1.5160392036904472e-07, "loss": 0.0002, "reward": 1.821428656578064, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 936 }, { "completion_length": 109.42857360839844, "epoch": 0.655703289013296, "grad_norm": 1.2047864198684692, "kl": 0.2379571944475174, "learning_rate": 1.5106051584641204e-07, "loss": 0.0002, "reward": 1.571428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 937 }, { "completion_length": 138.42857360839844, "epoch": 0.6564030790762772, "grad_norm": 0.6178500056266785, "kl": 0.18617750704288483, "learning_rate": 1.505176650218809e-07, "loss": 0.0002, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 938 }, { "completion_length": 165.2857208251953, "epoch": 0.6571028691392582, "grad_norm": 0.966590404510498, "kl": 0.22101904451847076, "learning_rate": 1.4997537093342434e-07, "loss": 0.0002, "reward": 1.7142858505249023, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 939 }, { "completion_length": 179.71429443359375, "epoch": 0.6578026592022393, "grad_norm": 1.4209133386611938, "kl": 0.15065574645996094, "learning_rate": 1.4943363661589935e-07, "loss": 0.0002, "reward": 1.7857143878936768, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 940 }, { "completion_length": 209.7857208251953, "epoch": 0.6585024492652204, "grad_norm": 0.007459959015250206, "kl": 0.15067200362682343, "learning_rate": 1.4889246510103075e-07, "loss": 0.0002, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 941 }, { "completion_length": 167.0, "epoch": 0.6592022393282015, "grad_norm": 0.6800208687782288, "kl": 0.1823139786720276, "learning_rate": 1.4835185941739337e-07, "loss": 0.0002, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 942 }, { "completion_length": 248.2857208251953, "epoch": 0.6599020293911827, "grad_norm": 0.0087742293253541, "kl": 0.1312844455242157, "learning_rate": 1.478118225903955e-07, "loss": 0.0001, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 943 }, { "completion_length": 171.57144165039062, "epoch": 0.6606018194541637, "grad_norm": 0.9539096355438232, "kl": 0.17217165231704712, "learning_rate": 1.4727235764226199e-07, "loss": 0.0002, "reward": 1.7142858505249023, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 944 }, { "completion_length": 147.57144165039062, "epoch": 0.6613016095171449, "grad_norm": 2.052114725112915, "kl": 0.19877298176288605, "learning_rate": 1.4673346759201726e-07, "loss": 0.0002, "reward": 1.7142858505249023, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 945 }, { "completion_length": 146.07144165039062, "epoch": 0.6620013995801259, "grad_norm": 0.7366251349449158, "kl": 0.19033154845237732, "learning_rate": 1.4619515545546846e-07, "loss": 0.0002, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 946 }, { "completion_length": 172.50001525878906, "epoch": 0.6627011896431071, "grad_norm": 0.5749490857124329, "kl": 0.21772313117980957, "learning_rate": 1.456574242451884e-07, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 947 }, { "completion_length": 132.21429443359375, "epoch": 0.6634009797060881, "grad_norm": 0.010821877047419548, "kl": 0.20041915774345398, "learning_rate": 1.451202769704989e-07, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 948 }, { "completion_length": 151.71429443359375, "epoch": 0.6641007697690693, "grad_norm": 2.190380811691284, "kl": 0.22342224419116974, "learning_rate": 1.44583716637454e-07, "loss": 0.0002, "reward": 1.7142858505249023, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 949 }, { "completion_length": 132.0, "epoch": 0.6648005598320503, "grad_norm": 1.7269536256790161, "kl": 0.28627052903175354, "learning_rate": 1.4404774624882294e-07, "loss": 0.0003, "reward": 1.6071429252624512, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 950 }, { "completion_length": 149.5, "epoch": 0.6655003498950315, "grad_norm": 2.3918309211730957, "kl": 0.19576948881149292, "learning_rate": 1.435123688040732e-07, "loss": 0.0002, "reward": 1.821428656578064, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 951 }, { "completion_length": 176.07144165039062, "epoch": 0.6662001399580126, "grad_norm": 0.017122935503721237, "kl": 0.21910613775253296, "learning_rate": 1.4297758729935432e-07, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 952 }, { "completion_length": 197.2857208251953, "epoch": 0.6668999300209937, "grad_norm": 0.7992007732391357, "kl": 0.17327755689620972, "learning_rate": 1.4244340472748074e-07, "loss": 0.0002, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 953 }, { "completion_length": 195.50001525878906, "epoch": 0.6675997200839748, "grad_norm": 1.2242493629455566, "kl": 0.20231805741786957, "learning_rate": 1.4190982407791467e-07, "loss": 0.0002, "reward": 1.7857143878936768, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 954 }, { "completion_length": 153.85714721679688, "epoch": 0.6682995101469559, "grad_norm": 0.4692648649215698, "kl": 0.186329647898674, "learning_rate": 1.413768483367503e-07, "loss": 0.0002, "reward": 1.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 955 }, { "completion_length": 184.6428680419922, "epoch": 0.668999300209937, "grad_norm": 0.04933379963040352, "kl": 0.2289222776889801, "learning_rate": 1.40844480486696e-07, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 956 }, { "completion_length": 174.21429443359375, "epoch": 0.6696990902729181, "grad_norm": 0.9510395526885986, "kl": 0.22124411165714264, "learning_rate": 1.403127235070587e-07, "loss": 0.0002, "reward": 1.7142858505249023, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 957 }, { "completion_length": 188.1428680419922, "epoch": 0.6703988803358992, "grad_norm": 1.1093510389328003, "kl": 0.17723356187343597, "learning_rate": 1.3978158037372646e-07, "loss": 0.0002, "reward": 1.4285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.0, "step": 958 }, { "completion_length": 134.07144165039062, "epoch": 0.6710986703988804, "grad_norm": 0.8156931400299072, "kl": 0.227275550365448, "learning_rate": 1.3925105405915188e-07, "loss": 0.0002, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 959 }, { "completion_length": 200.57144165039062, "epoch": 0.6717984604618614, "grad_norm": 0.009073573164641857, "kl": 0.1601400375366211, "learning_rate": 1.3872114753233595e-07, "loss": 0.0002, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 960 }, { "completion_length": 129.35714721679688, "epoch": 0.6724982505248426, "grad_norm": 2.2486445903778076, "kl": 0.2627028524875641, "learning_rate": 1.3819186375881116e-07, "loss": 0.0003, "reward": 1.7142858505249023, "reward_std": 0.4040610194206238, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 961 }, { "completion_length": 152.35714721679688, "epoch": 0.6731980405878236, "grad_norm": 1.1180939674377441, "kl": 0.21694982051849365, "learning_rate": 1.3766320570062434e-07, "loss": 0.0002, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 962 }, { "completion_length": 159.21429443359375, "epoch": 0.6738978306508048, "grad_norm": 1.7788225412368774, "kl": 0.2097286432981491, "learning_rate": 1.371351763163211e-07, "loss": 0.0002, "reward": 1.821428656578064, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 963 }, { "completion_length": 202.35714721679688, "epoch": 0.6745976207137858, "grad_norm": 1.1609289646148682, "kl": 0.2902553677558899, "learning_rate": 1.3660777856092882e-07, "loss": 0.0003, "reward": 1.6785714626312256, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 964 }, { "completion_length": 167.35714721679688, "epoch": 0.675297410776767, "grad_norm": 1.7155505418777466, "kl": 0.1652243584394455, "learning_rate": 1.3608101538593964e-07, "loss": 0.0002, "reward": 1.6428571939468384, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 965 }, { "completion_length": 154.71429443359375, "epoch": 0.6759972008397481, "grad_norm": 1.3016905784606934, "kl": 0.20431198179721832, "learning_rate": 1.3555488973929474e-07, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 966 }, { "completion_length": 190.50001525878906, "epoch": 0.6766969909027292, "grad_norm": 0.006406620144844055, "kl": 0.17844071984291077, "learning_rate": 1.3502940456536754e-07, "loss": 0.0002, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 967 }, { "completion_length": 182.2857208251953, "epoch": 0.6773967809657103, "grad_norm": 0.8308671116828918, "kl": 0.17164650559425354, "learning_rate": 1.345045628049468e-07, "loss": 0.0002, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 968 }, { "completion_length": 152.42857360839844, "epoch": 0.6780965710286914, "grad_norm": 1.9635697603225708, "kl": 0.4182888865470886, "learning_rate": 1.3398036739522086e-07, "loss": 0.0004, "reward": 1.7500001192092896, "reward_std": 0.3535534143447876, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 969 }, { "completion_length": 220.4285888671875, "epoch": 0.6787963610916725, "grad_norm": 1.027940273284912, "kl": 0.14217722415924072, "learning_rate": 1.334568212697606e-07, "loss": 0.0001, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 970 }, { "completion_length": 155.2857208251953, "epoch": 0.6794961511546536, "grad_norm": 1.299883484840393, "kl": 0.17374972999095917, "learning_rate": 1.3293392735850354e-07, "loss": 0.0002, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 971 }, { "completion_length": 177.57144165039062, "epoch": 0.6801959412176347, "grad_norm": 0.7912220358848572, "kl": 0.22420614957809448, "learning_rate": 1.3241168858773712e-07, "loss": 0.0002, "reward": 1.821428656578064, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 972 }, { "completion_length": 146.71429443359375, "epoch": 0.6808957312806159, "grad_norm": 1.396368145942688, "kl": 0.20293959975242615, "learning_rate": 1.318901078800823e-07, "loss": 0.0002, "reward": 1.7142858505249023, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 973 }, { "completion_length": 166.5, "epoch": 0.6815955213435969, "grad_norm": 1.7856972217559814, "kl": 0.21109966933727264, "learning_rate": 1.3136918815447727e-07, "loss": 0.0002, "reward": 1.7142858505249023, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 974 }, { "completion_length": 180.42857360839844, "epoch": 0.6822953114065781, "grad_norm": 0.9459384679794312, "kl": 0.2076464742422104, "learning_rate": 1.308489323261614e-07, "loss": 0.0002, "reward": 1.7142858505249023, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 975 }, { "completion_length": 251.9285888671875, "epoch": 0.6829951014695591, "grad_norm": 0.006227850914001465, "kl": 0.12587934732437134, "learning_rate": 1.3032934330665835e-07, "loss": 0.0001, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 976 }, { "completion_length": 150.42857360839844, "epoch": 0.6836948915325403, "grad_norm": 1.2627391815185547, "kl": 0.2202000916004181, "learning_rate": 1.298104240037603e-07, "loss": 0.0002, "reward": 1.821428656578064, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 977 }, { "completion_length": 165.42857360839844, "epoch": 0.6843946815955213, "grad_norm": 0.9425470232963562, "kl": 0.2140873223543167, "learning_rate": 1.2929217732151158e-07, "loss": 0.0002, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 978 }, { "completion_length": 174.71429443359375, "epoch": 0.6850944716585025, "grad_norm": 0.9264910817146301, "kl": 0.23042811453342438, "learning_rate": 1.2877460616019192e-07, "loss": 0.0002, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 979 }, { "completion_length": 191.92857360839844, "epoch": 0.6857942617214835, "grad_norm": 0.00905297789722681, "kl": 0.18348918855190277, "learning_rate": 1.2825771341630094e-07, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 980 }, { "completion_length": 162.35714721679688, "epoch": 0.6864940517844647, "grad_norm": 0.00970553606748581, "kl": 0.1959858238697052, "learning_rate": 1.277415019825417e-07, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 981 }, { "completion_length": 143.71429443359375, "epoch": 0.6871938418474458, "grad_norm": 0.008567526936531067, "kl": 0.21772439777851105, "learning_rate": 1.2722597474780398e-07, "loss": 0.0002, "reward": 1.7857143878936768, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 982 }, { "completion_length": 172.21429443359375, "epoch": 0.6878936319104269, "grad_norm": 0.0204619150608778, "kl": 0.25340214371681213, "learning_rate": 1.2671113459714903e-07, "loss": 0.0003, "reward": 1.6428571939468384, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 983 }, { "completion_length": 218.07144165039062, "epoch": 0.688593421973408, "grad_norm": 0.006225613411515951, "kl": 0.1314661204814911, "learning_rate": 1.2619698441179255e-07, "loss": 0.0001, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 984 }, { "completion_length": 166.21429443359375, "epoch": 0.689293212036389, "grad_norm": 1.8639769554138184, "kl": 0.26866239309310913, "learning_rate": 1.2568352706908936e-07, "loss": 0.0003, "reward": 1.571428656578064, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 985 }, { "completion_length": 163.0, "epoch": 0.6899930020993702, "grad_norm": 2.256131172180176, "kl": 0.2243950068950653, "learning_rate": 1.2517076544251648e-07, "loss": 0.0002, "reward": 1.6428571939468384, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 986 }, { "completion_length": 154.2857208251953, "epoch": 0.6906927921623512, "grad_norm": 1.1380915641784668, "kl": 0.32689398527145386, "learning_rate": 1.2465870240165792e-07, "loss": 0.0003, "reward": 1.6428571939468384, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 987 }, { "completion_length": 155.85714721679688, "epoch": 0.6913925822253324, "grad_norm": 0.6734870076179504, "kl": 0.19095809757709503, "learning_rate": 1.2414734081218775e-07, "loss": 0.0002, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 988 }, { "completion_length": 141.42857360839844, "epoch": 0.6920923722883136, "grad_norm": 0.40393969416618347, "kl": 0.18952377140522003, "learning_rate": 1.2363668353585485e-07, "loss": 0.0002, "reward": 1.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 989 }, { "completion_length": 163.5, "epoch": 0.6927921623512946, "grad_norm": 0.007434159517288208, "kl": 0.2066141664981842, "learning_rate": 1.231267334304662e-07, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 990 }, { "completion_length": 162.7857208251953, "epoch": 0.6934919524142757, "grad_norm": 0.91849684715271, "kl": 0.2436225712299347, "learning_rate": 1.2261749334987147e-07, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 991 }, { "completion_length": 211.42857360839844, "epoch": 0.6941917424772568, "grad_norm": 0.010118327103555202, "kl": 0.1718306690454483, "learning_rate": 1.2210896614394675e-07, "loss": 0.0002, "reward": 1.7142858505249023, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 992 }, { "completion_length": 211.21429443359375, "epoch": 0.6948915325402379, "grad_norm": 0.7302444577217102, "kl": 0.17798253893852234, "learning_rate": 1.2160115465857847e-07, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 993 }, { "completion_length": 158.7857208251953, "epoch": 0.695591322603219, "grad_norm": 0.9647810459136963, "kl": 0.246633842587471, "learning_rate": 1.2109406173564787e-07, "loss": 0.0002, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 994 }, { "completion_length": 169.07144165039062, "epoch": 0.6962911126662001, "grad_norm": 0.9758308529853821, "kl": 0.23313549160957336, "learning_rate": 1.2058769021301487e-07, "loss": 0.0002, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 995 }, { "completion_length": 193.71429443359375, "epoch": 0.6969909027291813, "grad_norm": 0.9888157248497009, "kl": 0.1746714860200882, "learning_rate": 1.2008204292450197e-07, "loss": 0.0002, "reward": 1.821428656578064, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 996 }, { "completion_length": 173.35714721679688, "epoch": 0.6976906927921623, "grad_norm": 1.8949143886566162, "kl": 0.1747930347919464, "learning_rate": 1.1957712269987887e-07, "loss": 0.0002, "reward": 1.571428656578064, "reward_std": 0.6060914993286133, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 997 }, { "completion_length": 148.21429443359375, "epoch": 0.6983904828551435, "grad_norm": 0.008470345288515091, "kl": 0.21822790801525116, "learning_rate": 1.1907293236484625e-07, "loss": 0.0002, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 998 }, { "completion_length": 173.6428680419922, "epoch": 0.6990902729181245, "grad_norm": 0.8820856213569641, "kl": 0.21209432184696198, "learning_rate": 1.1856947474102e-07, "loss": 0.0002, "reward": 1.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 999 }, { "completion_length": 190.92857360839844, "epoch": 0.6997900629811057, "grad_norm": 1.1691746711730957, "kl": 0.17540523409843445, "learning_rate": 1.180667526459157e-07, "loss": 0.0002, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1000 }, { "completion_length": 190.71429443359375, "epoch": 0.7004898530440867, "grad_norm": 0.0091703487560153, "kl": 0.18992015719413757, "learning_rate": 1.1756476889293268e-07, "loss": 0.0002, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1001 }, { "completion_length": 181.1428680419922, "epoch": 0.7011896431070679, "grad_norm": 1.2779645919799805, "kl": 0.16875801980495453, "learning_rate": 1.1706352629133801e-07, "loss": 0.0002, "reward": 1.7500001192092896, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1002 }, { "completion_length": 188.1428680419922, "epoch": 0.701889433170049, "grad_norm": 1.1230698823928833, "kl": 0.22026388347148895, "learning_rate": 1.1656302764625137e-07, "loss": 0.0002, "reward": 1.7857143878936768, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1003 }, { "completion_length": 172.2857208251953, "epoch": 0.7025892232330301, "grad_norm": 0.6152905225753784, "kl": 0.20545285940170288, "learning_rate": 1.1606327575862868e-07, "loss": 0.0002, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1004 }, { "completion_length": 178.35714721679688, "epoch": 0.7032890132960112, "grad_norm": 0.6955828666687012, "kl": 0.22097721695899963, "learning_rate": 1.1556427342524696e-07, "loss": 0.0002, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1005 }, { "completion_length": 145.71429443359375, "epoch": 0.7039888033589923, "grad_norm": 0.7483034729957581, "kl": 0.2220589518547058, "learning_rate": 1.1506602343868856e-07, "loss": 0.0002, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1006 }, { "completion_length": 216.42857360839844, "epoch": 0.7046885934219734, "grad_norm": 0.007535002660006285, "kl": 0.1627509593963623, "learning_rate": 1.1456852858732513e-07, "loss": 0.0002, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1007 }, { "completion_length": 171.42857360839844, "epoch": 0.7053883834849545, "grad_norm": 0.9205120205879211, "kl": 0.19327817857265472, "learning_rate": 1.1407179165530265e-07, "loss": 0.0002, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1008 }, { "completion_length": 165.6428680419922, "epoch": 0.7060881735479356, "grad_norm": 0.9747820496559143, "kl": 0.2033904641866684, "learning_rate": 1.1357581542252554e-07, "loss": 0.0002, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1009 }, { "completion_length": 179.71429443359375, "epoch": 0.7067879636109167, "grad_norm": 0.010711228474974632, "kl": 0.23487378656864166, "learning_rate": 1.1308060266464062e-07, "loss": 0.0002, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1010 }, { "completion_length": 197.35714721679688, "epoch": 0.7074877536738978, "grad_norm": 0.8751528263092041, "kl": 0.17783333361148834, "learning_rate": 1.1258615615302256e-07, "loss": 0.0002, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1011 }, { "completion_length": 145.5, "epoch": 0.708187543736879, "grad_norm": 1.082824468612671, "kl": 0.2301623821258545, "learning_rate": 1.1209247865475783e-07, "loss": 0.0002, "reward": 1.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1012 }, { "completion_length": 142.2857208251953, "epoch": 0.70888733379986, "grad_norm": 1.3315398693084717, "kl": 0.2126808613538742, "learning_rate": 1.1159957293262887e-07, "loss": 0.0002, "reward": 1.7500001192092896, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1013 }, { "completion_length": 177.6428680419922, "epoch": 0.7095871238628412, "grad_norm": 0.7687322497367859, "kl": 0.23538866639137268, "learning_rate": 1.1110744174509951e-07, "loss": 0.0002, "reward": 1.6428571939468384, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1014 }, { "completion_length": 149.7857208251953, "epoch": 0.7102869139258222, "grad_norm": 1.6101713180541992, "kl": 0.23341014981269836, "learning_rate": 1.1061608784629853e-07, "loss": 0.0002, "reward": 1.7857143878936768, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1015 }, { "completion_length": 151.35714721679688, "epoch": 0.7109867039888034, "grad_norm": 1.005165696144104, "kl": 0.2624002993106842, "learning_rate": 1.1012551398600508e-07, "loss": 0.0003, "reward": 1.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1016 }, { "completion_length": 120.28572082519531, "epoch": 0.7116864940517844, "grad_norm": 1.2380473613739014, "kl": 0.30483269691467285, "learning_rate": 1.0963572290963297e-07, "loss": 0.0003, "reward": 1.6428571939468384, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1017 }, { "completion_length": 150.7857208251953, "epoch": 0.7123862841147656, "grad_norm": 0.015446364879608154, "kl": 0.2904357612133026, "learning_rate": 1.0914671735821496e-07, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1018 }, { "completion_length": 180.35714721679688, "epoch": 0.7130860741777467, "grad_norm": 0.8005344867706299, "kl": 0.19870442152023315, "learning_rate": 1.0865850006838811e-07, "loss": 0.0002, "reward": 1.7142858505249023, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1019 }, { "completion_length": 129.35714721679688, "epoch": 0.7137858642407278, "grad_norm": 0.9900004267692566, "kl": 0.285817950963974, "learning_rate": 1.0817107377237813e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1020 }, { "completion_length": 175.50001525878906, "epoch": 0.7144856543037089, "grad_norm": 1.6074936389923096, "kl": 0.1556868851184845, "learning_rate": 1.0768444119798356e-07, "loss": 0.0002, "reward": 1.7857143878936768, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1021 }, { "completion_length": 154.07144165039062, "epoch": 0.71518544436669, "grad_norm": 0.7815014123916626, "kl": 0.212956964969635, "learning_rate": 1.0719860506856151e-07, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1022 }, { "completion_length": 201.7857208251953, "epoch": 0.7158852344296711, "grad_norm": 0.0077285743318498135, "kl": 0.19681662321090698, "learning_rate": 1.0671356810301188e-07, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1023 }, { "completion_length": 144.6428680419922, "epoch": 0.7165850244926522, "grad_norm": 1.0257624387741089, "kl": 0.22791138291358948, "learning_rate": 1.062293330157619e-07, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1024 }, { "completion_length": 208.00001525878906, "epoch": 0.7172848145556333, "grad_norm": 0.5651824474334717, "kl": 0.19887785613536835, "learning_rate": 1.0574590251675144e-07, "loss": 0.0002, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1025 }, { "completion_length": 153.1428680419922, "epoch": 0.7179846046186145, "grad_norm": 0.008925264701247215, "kl": 0.23088547587394714, "learning_rate": 1.0526327931141774e-07, "loss": 0.0002, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1026 }, { "completion_length": 215.1428680419922, "epoch": 0.7186843946815955, "grad_norm": 0.006793211679905653, "kl": 0.17881551384925842, "learning_rate": 1.0478146610067978e-07, "loss": 0.0002, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1027 }, { "completion_length": 155.21429443359375, "epoch": 0.7193841847445767, "grad_norm": 0.011401226744055748, "kl": 0.19790159165859222, "learning_rate": 1.0430046558092397e-07, "loss": 0.0002, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1028 }, { "completion_length": 181.2857208251953, "epoch": 0.7200839748075577, "grad_norm": 1.4518769979476929, "kl": 0.1697734147310257, "learning_rate": 1.0382028044398822e-07, "loss": 0.0002, "reward": 1.6428571939468384, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1029 }, { "completion_length": 142.6428680419922, "epoch": 0.7207837648705389, "grad_norm": 1.0903130769729614, "kl": 0.23007123172283173, "learning_rate": 1.0334091337714761e-07, "loss": 0.0002, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1030 }, { "completion_length": 142.7857208251953, "epoch": 0.7214835549335199, "grad_norm": 1.9411664009094238, "kl": 0.23579557240009308, "learning_rate": 1.0286236706309897e-07, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1031 }, { "completion_length": 174.07144165039062, "epoch": 0.722183344996501, "grad_norm": 0.03770539537072182, "kl": 0.26109400391578674, "learning_rate": 1.0238464417994583e-07, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1032 }, { "completion_length": 169.0, "epoch": 0.7228831350594822, "grad_norm": 0.692987859249115, "kl": 0.35571086406707764, "learning_rate": 1.019077474011834e-07, "loss": 0.0004, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1033 }, { "completion_length": 155.21429443359375, "epoch": 0.7235829251224632, "grad_norm": 0.4788121283054352, "kl": 0.20701532065868378, "learning_rate": 1.0143167939568417e-07, "loss": 0.0002, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1034 }, { "completion_length": 167.21429443359375, "epoch": 0.7242827151854444, "grad_norm": 0.01393273938447237, "kl": 0.2405117303133011, "learning_rate": 1.0095644282768204e-07, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1035 }, { "completion_length": 169.5, "epoch": 0.7249825052484254, "grad_norm": 1.8390034437179565, "kl": 0.3016516864299774, "learning_rate": 1.0048204035675825e-07, "loss": 0.0003, "reward": 1.7500001192092896, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1036 }, { "completion_length": 168.21429443359375, "epoch": 0.7256822953114066, "grad_norm": 0.6504518985748291, "kl": 0.171627938747406, "learning_rate": 1.0000847463782614e-07, "loss": 0.0002, "reward": 1.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1037 }, { "completion_length": 178.71429443359375, "epoch": 0.7263820853743876, "grad_norm": 0.5257077813148499, "kl": 0.20457741618156433, "learning_rate": 9.953574832111602e-08, "loss": 0.0002, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1038 }, { "completion_length": 136.92857360839844, "epoch": 0.7270818754373688, "grad_norm": 1.5041779279708862, "kl": 0.2566531300544739, "learning_rate": 9.906386405216097e-08, "loss": 0.0003, "reward": 1.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1039 }, { "completion_length": 180.1428680419922, "epoch": 0.72778166550035, "grad_norm": 0.01437221746891737, "kl": 0.24935373663902283, "learning_rate": 9.85928244717816e-08, "loss": 0.0002, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1040 }, { "completion_length": 157.0, "epoch": 0.728481455563331, "grad_norm": 0.808308482170105, "kl": 0.1823839247226715, "learning_rate": 9.812263221607112e-08, "loss": 0.0002, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1041 }, { "completion_length": 204.85714721679688, "epoch": 0.7291812456263121, "grad_norm": 0.4334215819835663, "kl": 0.1695767343044281, "learning_rate": 9.765328991638125e-08, "loss": 0.0002, "reward": 1.7142858505249023, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1042 }, { "completion_length": 166.92857360839844, "epoch": 0.7298810356892932, "grad_norm": 1.0468811988830566, "kl": 0.21447373926639557, "learning_rate": 9.718480019930661e-08, "loss": 0.0002, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1043 }, { "completion_length": 175.2857208251953, "epoch": 0.7305808257522743, "grad_norm": 0.7147412896156311, "kl": 0.18650026619434357, "learning_rate": 9.671716568667096e-08, "loss": 0.0002, "reward": 1.7142858505249023, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1044 }, { "completion_length": 186.71429443359375, "epoch": 0.7312806158152554, "grad_norm": 0.007167481351643801, "kl": 0.17807652056217194, "learning_rate": 9.625038899551161e-08, "loss": 0.0002, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1045 }, { "completion_length": 156.92857360839844, "epoch": 0.7319804058782365, "grad_norm": 0.6537052989006042, "kl": 0.21359063684940338, "learning_rate": 9.578447273806558e-08, "loss": 0.0002, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1046 }, { "completion_length": 169.6428680419922, "epoch": 0.7326801959412176, "grad_norm": 1.030092716217041, "kl": 0.19438377022743225, "learning_rate": 9.531941952175434e-08, "loss": 0.0002, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1047 }, { "completion_length": 256.14288330078125, "epoch": 0.7333799860041987, "grad_norm": 0.8888174295425415, "kl": 0.14532265067100525, "learning_rate": 9.485523194916976e-08, "loss": 0.0001, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1048 }, { "completion_length": 156.35714721679688, "epoch": 0.7340797760671799, "grad_norm": 1.3874386548995972, "kl": 0.2217440903186798, "learning_rate": 9.439191261805893e-08, "loss": 0.0002, "reward": 1.7142858505249023, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1049 }, { "completion_length": 166.7857208251953, "epoch": 0.7347795661301609, "grad_norm": 0.006922485772520304, "kl": 0.1924188733100891, "learning_rate": 9.392946412131033e-08, "loss": 0.0002, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1050 }, { "completion_length": 201.1428680419922, "epoch": 0.7354793561931421, "grad_norm": 0.4025443494319916, "kl": 0.20718589425086975, "learning_rate": 9.34678890469389e-08, "loss": 0.0002, "reward": 1.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1051 }, { "completion_length": 160.7857208251953, "epoch": 0.7361791462561231, "grad_norm": 1.24512779712677, "kl": 0.18339328467845917, "learning_rate": 9.30071899780713e-08, "loss": 0.0002, "reward": 1.7500001192092896, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1052 }, { "completion_length": 214.2857208251953, "epoch": 0.7368789363191043, "grad_norm": 1.297933578491211, "kl": 0.2691657841205597, "learning_rate": 9.254736949293215e-08, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1053 }, { "completion_length": 162.35714721679688, "epoch": 0.7375787263820853, "grad_norm": 1.1341309547424316, "kl": 0.19209903478622437, "learning_rate": 9.208843016482914e-08, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1054 }, { "completion_length": 187.6428680419922, "epoch": 0.7382785164450665, "grad_norm": 0.8116550445556641, "kl": 0.2059214562177658, "learning_rate": 9.163037456213851e-08, "loss": 0.0002, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1055 }, { "completion_length": 198.6428680419922, "epoch": 0.7389783065080476, "grad_norm": 0.8020315766334534, "kl": 0.17613746225833893, "learning_rate": 9.11732052482912e-08, "loss": 0.0002, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1056 }, { "completion_length": 142.85714721679688, "epoch": 0.7396780965710287, "grad_norm": 1.6413614749908447, "kl": 0.23158229887485504, "learning_rate": 9.071692478175788e-08, "loss": 0.0002, "reward": 1.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1057 }, { "completion_length": 185.07144165039062, "epoch": 0.7403778866340098, "grad_norm": 1.2444591522216797, "kl": 0.19675491750240326, "learning_rate": 9.026153571603504e-08, "loss": 0.0002, "reward": 1.7142858505249023, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1058 }, { "completion_length": 160.57144165039062, "epoch": 0.7410776766969909, "grad_norm": 0.7727208733558655, "kl": 0.23629315197467804, "learning_rate": 8.980704059963066e-08, "loss": 0.0002, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1059 }, { "completion_length": 126.35714721679688, "epoch": 0.741777466759972, "grad_norm": 1.4181745052337646, "kl": 0.25063177943229675, "learning_rate": 8.935344197604991e-08, "loss": 0.0003, "reward": 1.571428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1060 }, { "completion_length": 190.1428680419922, "epoch": 0.7424772568229531, "grad_norm": 1.1808803081512451, "kl": 0.18802045285701752, "learning_rate": 8.890074238378073e-08, "loss": 0.0002, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1061 }, { "completion_length": 154.21429443359375, "epoch": 0.7431770468859342, "grad_norm": 1.5520745515823364, "kl": 0.2316076159477234, "learning_rate": 8.844894435628e-08, "loss": 0.0002, "reward": 1.7142858505249023, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 1062 }, { "completion_length": 200.6428680419922, "epoch": 0.7438768369489154, "grad_norm": 0.01017883699387312, "kl": 0.2200857400894165, "learning_rate": 8.79980504219589e-08, "loss": 0.0002, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1063 }, { "completion_length": 160.5, "epoch": 0.7445766270118964, "grad_norm": 1.8919148445129395, "kl": 0.20429299771785736, "learning_rate": 8.754806310416915e-08, "loss": 0.0002, "reward": 1.7857143878936768, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1064 }, { "completion_length": 177.7857208251953, "epoch": 0.7452764170748776, "grad_norm": 0.011788447387516499, "kl": 0.23186638951301575, "learning_rate": 8.709898492118883e-08, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1065 }, { "completion_length": 173.21429443359375, "epoch": 0.7459762071378586, "grad_norm": 0.0077976989559829235, "kl": 0.19231411814689636, "learning_rate": 8.665081838620794e-08, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1066 }, { "completion_length": 157.21429443359375, "epoch": 0.7466759972008398, "grad_norm": 1.3523648977279663, "kl": 0.22446668148040771, "learning_rate": 8.620356600731485e-08, "loss": 0.0002, "reward": 1.6071429252624512, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 1067 }, { "completion_length": 143.07144165039062, "epoch": 0.7473757872638208, "grad_norm": 1.4021105766296387, "kl": 0.22770735621452332, "learning_rate": 8.575723028748202e-08, "loss": 0.0002, "reward": 1.821428656578064, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1068 }, { "completion_length": 150.35714721679688, "epoch": 0.748075577326802, "grad_norm": 1.552819848060608, "kl": 0.22746649384498596, "learning_rate": 8.53118137245516e-08, "loss": 0.0002, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1069 }, { "completion_length": 149.42857360839844, "epoch": 0.7487753673897831, "grad_norm": 0.010056705214083195, "kl": 0.22622084617614746, "learning_rate": 8.486731881122225e-08, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1070 }, { "completion_length": 151.42857360839844, "epoch": 0.7494751574527642, "grad_norm": 1.314794659614563, "kl": 0.24155405163764954, "learning_rate": 8.442374803503469e-08, "loss": 0.0002, "reward": 1.6428571939468384, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1071 }, { "completion_length": 146.42857360839844, "epoch": 0.7501749475157453, "grad_norm": 2.1845922470092773, "kl": 0.2684437036514282, "learning_rate": 8.398110387835766e-08, "loss": 0.0003, "reward": 1.4642857313156128, "reward_std": 0.3535534143447876, "rewards/check_gptzero_func": 0.5, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1072 }, { "completion_length": 185.85714721679688, "epoch": 0.7508747375787264, "grad_norm": 0.6714044809341431, "kl": 0.16590483486652374, "learning_rate": 8.353938881837444e-08, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1073 }, { "completion_length": 160.5, "epoch": 0.7515745276417075, "grad_norm": 0.8972455859184265, "kl": 0.2132020890712738, "learning_rate": 8.309860532706881e-08, "loss": 0.0002, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1074 }, { "completion_length": 182.92857360839844, "epoch": 0.7522743177046886, "grad_norm": 0.6655084490776062, "kl": 0.20527049899101257, "learning_rate": 8.26587558712109e-08, "loss": 0.0002, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1075 }, { "completion_length": 139.35714721679688, "epoch": 0.7529741077676697, "grad_norm": 1.6883835792541504, "kl": 0.24122567474842072, "learning_rate": 8.221984291234404e-08, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1076 }, { "completion_length": 143.6428680419922, "epoch": 0.7536738978306508, "grad_norm": 0.8895137906074524, "kl": 0.2152090221643448, "learning_rate": 8.178186890677027e-08, "loss": 0.0002, "reward": 1.7142858505249023, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1077 }, { "completion_length": 146.7857208251953, "epoch": 0.7543736878936319, "grad_norm": 5.121913433074951, "kl": 0.21037350594997406, "learning_rate": 8.134483630553712e-08, "loss": 0.0002, "reward": 1.7857143878936768, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1078 }, { "completion_length": 155.35714721679688, "epoch": 0.7550734779566131, "grad_norm": 0.006447081454098225, "kl": 0.20492307841777802, "learning_rate": 8.090874755442381e-08, "loss": 0.0002, "reward": 1.7142858505249023, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1079 }, { "completion_length": 128.85714721679688, "epoch": 0.7557732680195941, "grad_norm": 0.010179801844060421, "kl": 0.28304314613342285, "learning_rate": 8.047360509392725e-08, "loss": 0.0003, "reward": 1.7142858505249023, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1080 }, { "completion_length": 160.92857360839844, "epoch": 0.7564730580825753, "grad_norm": 1.3864848613739014, "kl": 0.2240566909313202, "learning_rate": 8.003941135924858e-08, "loss": 0.0002, "reward": 1.7142858505249023, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1081 }, { "completion_length": 158.2857208251953, "epoch": 0.7571728481455563, "grad_norm": 1.2483444213867188, "kl": 0.1892787516117096, "learning_rate": 7.960616878027981e-08, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1082 }, { "completion_length": 189.1428680419922, "epoch": 0.7578726382085375, "grad_norm": 1.2969609498977661, "kl": 0.20024828612804413, "learning_rate": 7.917387978158968e-08, "loss": 0.0002, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1083 }, { "completion_length": 186.85714721679688, "epoch": 0.7585724282715185, "grad_norm": 0.015368812717497349, "kl": 0.2436189353466034, "learning_rate": 7.874254678241054e-08, "loss": 0.0002, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1084 }, { "completion_length": 170.7857208251953, "epoch": 0.7592722183344996, "grad_norm": 0.6496590971946716, "kl": 0.19450576603412628, "learning_rate": 7.831217219662478e-08, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1085 }, { "completion_length": 172.00001525878906, "epoch": 0.7599720083974808, "grad_norm": 0.7505138516426086, "kl": 0.26276451349258423, "learning_rate": 7.78827584327508e-08, "loss": 0.0003, "reward": 1.821428656578064, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1086 }, { "completion_length": 221.85714721679688, "epoch": 0.7606717984604618, "grad_norm": 0.006493070628494024, "kl": 0.17950274050235748, "learning_rate": 7.745430789393026e-08, "loss": 0.0002, "reward": 1.7857143878936768, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1087 }, { "completion_length": 180.1428680419922, "epoch": 0.761371588523443, "grad_norm": 0.9771869778633118, "kl": 0.1948787271976471, "learning_rate": 7.70268229779143e-08, "loss": 0.0002, "reward": 1.7500001192092896, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1088 }, { "completion_length": 177.6428680419922, "epoch": 0.762071378586424, "grad_norm": 1.0726661682128906, "kl": 0.17724980413913727, "learning_rate": 7.660030607704979e-08, "loss": 0.0002, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1089 }, { "completion_length": 178.7857208251953, "epoch": 0.7627711686494052, "grad_norm": 1.2632213830947876, "kl": 0.27130839228630066, "learning_rate": 7.617475957826668e-08, "loss": 0.0003, "reward": 1.7857143878936768, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1090 }, { "completion_length": 229.2857208251953, "epoch": 0.7634709587123862, "grad_norm": 0.4676748514175415, "kl": 0.19139538705348969, "learning_rate": 7.575018586306389e-08, "loss": 0.0002, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1091 }, { "completion_length": 184.00001525878906, "epoch": 0.7641707487753674, "grad_norm": 1.584816575050354, "kl": 0.18049544095993042, "learning_rate": 7.532658730749655e-08, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1092 }, { "completion_length": 153.35714721679688, "epoch": 0.7648705388383485, "grad_norm": 0.009945012629032135, "kl": 0.27313506603240967, "learning_rate": 7.490396628216236e-08, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1093 }, { "completion_length": 178.92857360839844, "epoch": 0.7655703289013296, "grad_norm": 1.7595086097717285, "kl": 0.22992803156375885, "learning_rate": 7.448232515218839e-08, "loss": 0.0002, "reward": 1.821428656578064, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1094 }, { "completion_length": 117.21429443359375, "epoch": 0.7662701189643107, "grad_norm": 2.2487285137176514, "kl": 0.34105950593948364, "learning_rate": 7.406166627721803e-08, "loss": 0.0003, "reward": 1.7857143878936768, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1095 }, { "completion_length": 171.85714721679688, "epoch": 0.7669699090272918, "grad_norm": 0.8847843408584595, "kl": 0.2094418704509735, "learning_rate": 7.364199201139763e-08, "loss": 0.0002, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1096 }, { "completion_length": 150.6428680419922, "epoch": 0.7676696990902729, "grad_norm": 2.2001891136169434, "kl": 0.3334418833255768, "learning_rate": 7.322330470336313e-08, "loss": 0.0003, "reward": 1.821428656578064, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1097 }, { "completion_length": 165.92857360839844, "epoch": 0.768369489153254, "grad_norm": 1.4109801054000854, "kl": 0.21212869882583618, "learning_rate": 7.280560669622742e-08, "loss": 0.0002, "reward": 1.7142858505249023, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1098 }, { "completion_length": 134.92857360839844, "epoch": 0.7690692792162351, "grad_norm": 1.4650187492370605, "kl": 0.2684599757194519, "learning_rate": 7.238890032756684e-08, "loss": 0.0003, "reward": 1.7142858505249023, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1099 }, { "completion_length": 203.42857360839844, "epoch": 0.7697690692792163, "grad_norm": 0.010684155859053135, "kl": 0.1764412522315979, "learning_rate": 7.197318792940804e-08, "loss": 0.0002, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1100 }, { "completion_length": 208.21429443359375, "epoch": 0.7704688593421973, "grad_norm": 0.006470104213804007, "kl": 0.20753923058509827, "learning_rate": 7.155847182821522e-08, "loss": 0.0002, "reward": 1.7142858505249023, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1101 }, { "completion_length": 240.9285888671875, "epoch": 0.7711686494051785, "grad_norm": 0.008687240071594715, "kl": 0.14837592840194702, "learning_rate": 7.114475434487708e-08, "loss": 0.0001, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1102 }, { "completion_length": 159.7857208251953, "epoch": 0.7718684394681595, "grad_norm": 0.8867753148078918, "kl": 0.22225448489189148, "learning_rate": 7.073203779469347e-08, "loss": 0.0002, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1103 }, { "completion_length": 148.57144165039062, "epoch": 0.7725682295311407, "grad_norm": 2.1539390087127686, "kl": 0.24582090973854065, "learning_rate": 7.032032448736278e-08, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1104 }, { "completion_length": 151.57144165039062, "epoch": 0.7732680195941217, "grad_norm": 1.4536081552505493, "kl": 0.21733474731445312, "learning_rate": 6.990961672696908e-08, "loss": 0.0002, "reward": 1.821428656578064, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1105 }, { "completion_length": 193.71429443359375, "epoch": 0.7739678096571029, "grad_norm": 0.5715238451957703, "kl": 0.22165124118328094, "learning_rate": 6.949991681196877e-08, "loss": 0.0002, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1106 }, { "completion_length": 122.42857360839844, "epoch": 0.7746675997200839, "grad_norm": 1.8503363132476807, "kl": 0.2978855073451996, "learning_rate": 6.909122703517836e-08, "loss": 0.0003, "reward": 1.6428571939468384, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1107 }, { "completion_length": 104.5714340209961, "epoch": 0.7753673897830651, "grad_norm": 0.016594173386693, "kl": 0.3288668394088745, "learning_rate": 6.868354968376097e-08, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1108 }, { "completion_length": 175.00001525878906, "epoch": 0.7760671798460462, "grad_norm": 1.6050766706466675, "kl": 0.22226838767528534, "learning_rate": 6.827688703921405e-08, "loss": 0.0002, "reward": 1.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1109 }, { "completion_length": 133.57144165039062, "epoch": 0.7767669699090273, "grad_norm": 2.0014398097991943, "kl": 0.294974148273468, "learning_rate": 6.787124137735647e-08, "loss": 0.0003, "reward": 1.7142858505249023, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1110 }, { "completion_length": 117.21429443359375, "epoch": 0.7774667599720084, "grad_norm": 1.6581988334655762, "kl": 0.2920078635215759, "learning_rate": 6.746661496831543e-08, "loss": 0.0003, "reward": 1.6785714626312256, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 1111 }, { "completion_length": 154.07144165039062, "epoch": 0.7781665500349895, "grad_norm": 0.6985070109367371, "kl": 0.2321082055568695, "learning_rate": 6.706301007651433e-08, "loss": 0.0002, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1112 }, { "completion_length": 168.21429443359375, "epoch": 0.7788663400979706, "grad_norm": 1.3603790998458862, "kl": 0.22756832838058472, "learning_rate": 6.666042896065982e-08, "loss": 0.0002, "reward": 1.7500001192092896, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1113 }, { "completion_length": 177.21429443359375, "epoch": 0.7795661301609517, "grad_norm": 0.011437885463237762, "kl": 0.18410103023052216, "learning_rate": 6.625887387372891e-08, "loss": 0.0002, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1114 }, { "completion_length": 175.1428680419922, "epoch": 0.7802659202239328, "grad_norm": 1.6120368242263794, "kl": 0.24572807550430298, "learning_rate": 6.585834706295695e-08, "loss": 0.0002, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1115 }, { "completion_length": 147.35714721679688, "epoch": 0.780965710286914, "grad_norm": 0.01100208330899477, "kl": 0.2827099859714508, "learning_rate": 6.545885076982444e-08, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1116 }, { "completion_length": 188.7857208251953, "epoch": 0.781665500349895, "grad_norm": 1.478312373161316, "kl": 0.20310620963573456, "learning_rate": 6.506038723004483e-08, "loss": 0.0002, "reward": 1.7500001192092896, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1117 }, { "completion_length": 153.1428680419922, "epoch": 0.7823652904128762, "grad_norm": 0.9191446900367737, "kl": 0.31532570719718933, "learning_rate": 6.466295867355203e-08, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1118 }, { "completion_length": 148.21429443359375, "epoch": 0.7830650804758572, "grad_norm": 1.0591237545013428, "kl": 0.25294220447540283, "learning_rate": 6.426656732448785e-08, "loss": 0.0003, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1119 }, { "completion_length": 174.6428680419922, "epoch": 0.7837648705388384, "grad_norm": 1.3166435956954956, "kl": 0.23963408172130585, "learning_rate": 6.387121540118936e-08, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1120 }, { "completion_length": 191.71429443359375, "epoch": 0.7844646606018194, "grad_norm": 1.2181185483932495, "kl": 0.20837150514125824, "learning_rate": 6.347690511617693e-08, "loss": 0.0002, "reward": 1.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1121 }, { "completion_length": 194.85714721679688, "epoch": 0.7851644506648006, "grad_norm": 0.5446606874465942, "kl": 0.19041968882083893, "learning_rate": 6.308363867614125e-08, "loss": 0.0002, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1122 }, { "completion_length": 145.2857208251953, "epoch": 0.7858642407277817, "grad_norm": 1.1802663803100586, "kl": 0.2160012423992157, "learning_rate": 6.269141828193159e-08, "loss": 0.0002, "reward": 1.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1123 }, { "completion_length": 180.42857360839844, "epoch": 0.7865640307907628, "grad_norm": 0.010205530561506748, "kl": 0.20438823103904724, "learning_rate": 6.230024612854318e-08, "loss": 0.0002, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1124 }, { "completion_length": 176.50001525878906, "epoch": 0.7872638208537439, "grad_norm": 0.46397289633750916, "kl": 0.2814481556415558, "learning_rate": 6.191012440510468e-08, "loss": 0.0003, "reward": 1.7142858505249023, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1125 }, { "completion_length": 134.7857208251953, "epoch": 0.787963610916725, "grad_norm": 0.008362777531147003, "kl": 0.26523175835609436, "learning_rate": 6.152105529486648e-08, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1126 }, { "completion_length": 170.1428680419922, "epoch": 0.7886634009797061, "grad_norm": 2.0450756549835205, "kl": 0.2036227583885193, "learning_rate": 6.113304097518823e-08, "loss": 0.0002, "reward": 1.7500001192092896, "reward_std": 0.3535533845424652, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1127 }, { "completion_length": 193.7857208251953, "epoch": 0.7893631910426872, "grad_norm": 1.3295738697052002, "kl": 0.21037621796131134, "learning_rate": 6.074608361752622e-08, "loss": 0.0002, "reward": 1.7142858505249023, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 1128 }, { "completion_length": 231.71429443359375, "epoch": 0.7900629811056683, "grad_norm": 0.5518699288368225, "kl": 0.19303752481937408, "learning_rate": 6.036018538742207e-08, "loss": 0.0002, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1129 }, { "completion_length": 151.2857208251953, "epoch": 0.7907627711686495, "grad_norm": 1.2919535636901855, "kl": 0.2545446455478668, "learning_rate": 5.997534844449001e-08, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1130 }, { "completion_length": 161.57144165039062, "epoch": 0.7914625612316305, "grad_norm": 0.965070366859436, "kl": 0.195675328373909, "learning_rate": 5.9591574942404885e-08, "loss": 0.0002, "reward": 1.821428656578064, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1131 }, { "completion_length": 186.92857360839844, "epoch": 0.7921623512946117, "grad_norm": 1.750278353691101, "kl": 0.19543519616127014, "learning_rate": 5.920886702889025e-08, "loss": 0.0002, "reward": 1.7857143878936768, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1132 }, { "completion_length": 159.7857208251953, "epoch": 0.7928621413575927, "grad_norm": 0.9532786011695862, "kl": 0.24044682085514069, "learning_rate": 5.8827226845706375e-08, "loss": 0.0002, "reward": 1.821428656578064, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1133 }, { "completion_length": 153.21429443359375, "epoch": 0.7935619314205739, "grad_norm": 1.212231993675232, "kl": 0.2419227808713913, "learning_rate": 5.8446656528637865e-08, "loss": 0.0002, "reward": 1.7142858505249023, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1134 }, { "completion_length": 177.2857208251953, "epoch": 0.7942617214835549, "grad_norm": 1.2694288492202759, "kl": 0.18155987560749054, "learning_rate": 5.806715820748237e-08, "loss": 0.0002, "reward": 1.7142858505249023, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1135 }, { "completion_length": 189.2857208251953, "epoch": 0.794961511546536, "grad_norm": 0.9698265194892883, "kl": 0.18346425890922546, "learning_rate": 5.768873400603791e-08, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1136 }, { "completion_length": 183.2857208251953, "epoch": 0.7956613016095171, "grad_norm": 1.3478466272354126, "kl": 0.19630123674869537, "learning_rate": 5.731138604209168e-08, "loss": 0.0002, "reward": 1.821428656578064, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1137 }, { "completion_length": 187.92857360839844, "epoch": 0.7963610916724982, "grad_norm": 0.4501980245113373, "kl": 0.19250676035881042, "learning_rate": 5.6935116427407866e-08, "loss": 0.0002, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1138 }, { "completion_length": 127.85714721679688, "epoch": 0.7970608817354794, "grad_norm": 0.7858787178993225, "kl": 0.28234609961509705, "learning_rate": 5.655992726771566e-08, "loss": 0.0003, "reward": 1.7142858505249023, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1139 }, { "completion_length": 175.57144165039062, "epoch": 0.7977606717984604, "grad_norm": 0.009066554717719555, "kl": 0.19913817942142487, "learning_rate": 5.618582066269775e-08, "loss": 0.0002, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1140 }, { "completion_length": 207.21429443359375, "epoch": 0.7984604618614416, "grad_norm": 0.6075648665428162, "kl": 0.19196923077106476, "learning_rate": 5.581279870597866e-08, "loss": 0.0002, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1141 }, { "completion_length": 136.1428680419922, "epoch": 0.7991602519244226, "grad_norm": 0.009156833402812481, "kl": 0.24330392479896545, "learning_rate": 5.5440863485112596e-08, "loss": 0.0002, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1142 }, { "completion_length": 162.21429443359375, "epoch": 0.7998600419874038, "grad_norm": 2.1215896606445312, "kl": 0.2636703550815582, "learning_rate": 5.507001708157219e-08, "loss": 0.0003, "reward": 1.821428656578064, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1143 }, { "completion_length": 184.92857360839844, "epoch": 0.8005598320503848, "grad_norm": 0.7196187973022461, "kl": 0.19072465598583221, "learning_rate": 5.470026157073679e-08, "loss": 0.0002, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1144 }, { "completion_length": 246.35714721679688, "epoch": 0.801259622113366, "grad_norm": 0.9996775984764099, "kl": 0.16130535304546356, "learning_rate": 5.433159902188042e-08, "loss": 0.0002, "reward": 1.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1145 }, { "completion_length": 150.35714721679688, "epoch": 0.8019594121763471, "grad_norm": 1.8043763637542725, "kl": 0.2661333978176117, "learning_rate": 5.396403149816087e-08, "loss": 0.0003, "reward": 1.821428656578064, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1146 }, { "completion_length": 122.85714721679688, "epoch": 0.8026592022393282, "grad_norm": 1.6296087503433228, "kl": 0.30425775051116943, "learning_rate": 5.3597561056607646e-08, "loss": 0.0003, "reward": 1.6428571939468384, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1147 }, { "completion_length": 197.6428680419922, "epoch": 0.8033589923023093, "grad_norm": 0.007470742799341679, "kl": 0.1895866096019745, "learning_rate": 5.3232189748110584e-08, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1148 }, { "completion_length": 208.85714721679688, "epoch": 0.8040587823652904, "grad_norm": 0.5293805003166199, "kl": 0.18043257296085358, "learning_rate": 5.2867919617408553e-08, "loss": 0.0002, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1149 }, { "completion_length": 115.64286041259766, "epoch": 0.8047585724282715, "grad_norm": 0.8847793936729431, "kl": 0.25427746772766113, "learning_rate": 5.2504752703077666e-08, "loss": 0.0003, "reward": 1.5000001192092896, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.5, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1150 }, { "completion_length": 246.71429443359375, "epoch": 0.8054583624912526, "grad_norm": 0.006377246230840683, "kl": 0.14152392745018005, "learning_rate": 5.214269103752028e-08, "loss": 0.0001, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1151 }, { "completion_length": 159.92857360839844, "epoch": 0.8061581525542337, "grad_norm": 0.5868619680404663, "kl": 0.2678522765636444, "learning_rate": 5.178173664695323e-08, "loss": 0.0003, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1152 }, { "completion_length": 208.50001525878906, "epoch": 0.8068579426172149, "grad_norm": 0.982399582862854, "kl": 0.17481723427772522, "learning_rate": 5.142189155139684e-08, "loss": 0.0002, "reward": 1.7500001192092896, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 1153 }, { "completion_length": 193.50001525878906, "epoch": 0.8075577326801959, "grad_norm": 0.5438684225082397, "kl": 0.22392788529396057, "learning_rate": 5.1063157764663274e-08, "loss": 0.0002, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 1154 }, { "completion_length": 175.42857360839844, "epoch": 0.8082575227431771, "grad_norm": 0.007278852630406618, "kl": 0.2030791938304901, "learning_rate": 5.070553729434565e-08, "loss": 0.0002, "reward": 1.7142858505249023, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1155 }, { "completion_length": 194.7857208251953, "epoch": 0.8089573128061581, "grad_norm": 0.5347967147827148, "kl": 0.24053919315338135, "learning_rate": 5.0349032141806385e-08, "loss": 0.0002, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1156 }, { "completion_length": 146.21429443359375, "epoch": 0.8096571028691393, "grad_norm": 0.013106808066368103, "kl": 0.24973909556865692, "learning_rate": 4.9993644302166374e-08, "loss": 0.0002, "reward": 1.7857143878936768, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1157 }, { "completion_length": 193.42857360839844, "epoch": 0.8103568929321203, "grad_norm": 0.626917839050293, "kl": 0.19321061670780182, "learning_rate": 4.963937576429364e-08, "loss": 0.0002, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1158 }, { "completion_length": 157.35714721679688, "epoch": 0.8110566829951015, "grad_norm": 0.012417580932378769, "kl": 0.2344369888305664, "learning_rate": 4.9286228510792064e-08, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1159 }, { "completion_length": 165.2857208251953, "epoch": 0.8117564730580826, "grad_norm": 0.007472768425941467, "kl": 0.21227319538593292, "learning_rate": 4.893420451799063e-08, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1160 }, { "completion_length": 154.6428680419922, "epoch": 0.8124562631210637, "grad_norm": 0.8728052377700806, "kl": 0.23276230692863464, "learning_rate": 4.85833057559322e-08, "loss": 0.0002, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1161 }, { "completion_length": 202.00001525878906, "epoch": 0.8131560531840448, "grad_norm": 1.3674808740615845, "kl": 0.2640719413757324, "learning_rate": 4.823353418836224e-08, "loss": 0.0003, "reward": 1.6071429252624512, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 1162 }, { "completion_length": 127.50000762939453, "epoch": 0.8138558432470259, "grad_norm": 0.6345393061637878, "kl": 0.23137511312961578, "learning_rate": 4.788489177271834e-08, "loss": 0.0002, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1163 }, { "completion_length": 199.92857360839844, "epoch": 0.814555633310007, "grad_norm": 0.016269003972411156, "kl": 0.1905609667301178, "learning_rate": 4.7537380460118845e-08, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1164 }, { "completion_length": 161.5, "epoch": 0.8152554233729881, "grad_norm": 1.631848931312561, "kl": 0.26761165261268616, "learning_rate": 4.719100219535194e-08, "loss": 0.0003, "reward": 1.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1165 }, { "completion_length": 188.21429443359375, "epoch": 0.8159552134359692, "grad_norm": 1.4352449178695679, "kl": 0.20568834245204926, "learning_rate": 4.684575891686515e-08, "loss": 0.0002, "reward": 1.7857143878936768, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1166 }, { "completion_length": 121.50000762939453, "epoch": 0.8166550034989503, "grad_norm": 2.5432016849517822, "kl": 0.29240328073501587, "learning_rate": 4.6501652556754204e-08, "loss": 0.0003, "reward": 1.7142858505249023, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1167 }, { "completion_length": 174.2857208251953, "epoch": 0.8173547935619314, "grad_norm": 0.9358087778091431, "kl": 0.24679502844810486, "learning_rate": 4.615868504075207e-08, "loss": 0.0002, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1168 }, { "completion_length": 201.6428680419922, "epoch": 0.8180545836249126, "grad_norm": 1.6887545585632324, "kl": 0.21144524216651917, "learning_rate": 4.581685828821857e-08, "loss": 0.0002, "reward": 1.7500001192092896, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 1169 }, { "completion_length": 174.00001525878906, "epoch": 0.8187543736878936, "grad_norm": 1.3306705951690674, "kl": 0.22896404564380646, "learning_rate": 4.547617421212926e-08, "loss": 0.0002, "reward": 1.571428656578064, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1170 }, { "completion_length": 217.7857208251953, "epoch": 0.8194541637508748, "grad_norm": 0.008754800073802471, "kl": 0.16183727979660034, "learning_rate": 4.513663471906507e-08, "loss": 0.0002, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1171 }, { "completion_length": 200.35714721679688, "epoch": 0.8201539538138558, "grad_norm": 0.5710361003875732, "kl": 0.17051121592521667, "learning_rate": 4.4798241709201406e-08, "loss": 0.0002, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1172 }, { "completion_length": 158.0, "epoch": 0.820853743876837, "grad_norm": 0.011072482913732529, "kl": 0.21519379317760468, "learning_rate": 4.44609970762975e-08, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 1173 }, { "completion_length": 188.35714721679688, "epoch": 0.821553533939818, "grad_norm": 1.1083258390426636, "kl": 0.2219666838645935, "learning_rate": 4.412490270768593e-08, "loss": 0.0002, "reward": 1.7500001192092896, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1174 }, { "completion_length": 170.0, "epoch": 0.8222533240027992, "grad_norm": 0.6573085784912109, "kl": 0.2177230715751648, "learning_rate": 4.37899604842622e-08, "loss": 0.0002, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1175 }, { "completion_length": 217.2857208251953, "epoch": 0.8229531140657803, "grad_norm": 1.1000984907150269, "kl": 0.13960961997509003, "learning_rate": 4.345617228047358e-08, "loss": 0.0001, "reward": 1.821428656578064, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1176 }, { "completion_length": 195.92857360839844, "epoch": 0.8236529041287614, "grad_norm": 1.88215970993042, "kl": 0.20975305140018463, "learning_rate": 4.312353996430948e-08, "loss": 0.0002, "reward": 1.821428656578064, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1177 }, { "completion_length": 194.2857208251953, "epoch": 0.8243526941917425, "grad_norm": 0.009198215790092945, "kl": 0.1882149875164032, "learning_rate": 4.279206539729052e-08, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1178 }, { "completion_length": 166.92857360839844, "epoch": 0.8250524842547236, "grad_norm": 0.8167291283607483, "kl": 0.19863860309123993, "learning_rate": 4.2461750434457935e-08, "loss": 0.0002, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1179 }, { "completion_length": 177.92857360839844, "epoch": 0.8257522743177047, "grad_norm": 0.9815044403076172, "kl": 0.28455451130867004, "learning_rate": 4.213259692436366e-08, "loss": 0.0003, "reward": 1.7142858505249023, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1180 }, { "completion_length": 183.50001525878906, "epoch": 0.8264520643806857, "grad_norm": 0.6266160011291504, "kl": 0.23878343403339386, "learning_rate": 4.180460670905977e-08, "loss": 0.0002, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1181 }, { "completion_length": 150.07144165039062, "epoch": 0.8271518544436669, "grad_norm": 1.1328206062316895, "kl": 0.25891032814979553, "learning_rate": 4.1477781624087935e-08, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1182 }, { "completion_length": 172.42857360839844, "epoch": 0.827851644506648, "grad_norm": 1.2734407186508179, "kl": 0.20092228055000305, "learning_rate": 4.115212349846961e-08, "loss": 0.0002, "reward": 1.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1183 }, { "completion_length": 237.7857208251953, "epoch": 0.8285514345696291, "grad_norm": 0.025107843801379204, "kl": 0.2145889550447464, "learning_rate": 4.082763415469534e-08, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1184 }, { "completion_length": 125.35714721679688, "epoch": 0.8292512246326103, "grad_norm": 2.0415592193603516, "kl": 0.3111453950405121, "learning_rate": 4.050431540871499e-08, "loss": 0.0003, "reward": 1.7142858505249023, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1185 }, { "completion_length": 177.50001525878906, "epoch": 0.8299510146955913, "grad_norm": 0.00889210682362318, "kl": 0.226608008146286, "learning_rate": 4.018216906992739e-08, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1186 }, { "completion_length": 183.50001525878906, "epoch": 0.8306508047585724, "grad_norm": 0.2604890465736389, "kl": 0.26834410429000854, "learning_rate": 3.986119694116979e-08, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1187 }, { "completion_length": 151.21429443359375, "epoch": 0.8313505948215535, "grad_norm": 1.1256896257400513, "kl": 0.25610828399658203, "learning_rate": 3.954140081870866e-08, "loss": 0.0003, "reward": 1.7500001192092896, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1188 }, { "completion_length": 183.00001525878906, "epoch": 0.8320503848845346, "grad_norm": 1.422790765762329, "kl": 0.1856001913547516, "learning_rate": 3.9222782492228934e-08, "loss": 0.0002, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1189 }, { "completion_length": 177.50001525878906, "epoch": 0.8327501749475158, "grad_norm": 0.8109291791915894, "kl": 0.20620295405387878, "learning_rate": 3.890534374482415e-08, "loss": 0.0002, "reward": 1.8928571939468384, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 1190 }, { "completion_length": 159.6428680419922, "epoch": 0.8334499650104968, "grad_norm": 0.011064517311751842, "kl": 0.24281471967697144, "learning_rate": 3.858908635298669e-08, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1191 }, { "completion_length": 180.71429443359375, "epoch": 0.834149755073478, "grad_norm": 0.748881995677948, "kl": 0.25378844141960144, "learning_rate": 3.827401208659761e-08, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1192 }, { "completion_length": 147.07144165039062, "epoch": 0.834849545136459, "grad_norm": 1.466080904006958, "kl": 0.2583223581314087, "learning_rate": 3.796012270891672e-08, "loss": 0.0003, "reward": 1.5000001192092896, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1193 }, { "completion_length": 157.07144165039062, "epoch": 0.8355493351994402, "grad_norm": 1.030420184135437, "kl": 0.22054100036621094, "learning_rate": 3.764741997657292e-08, "loss": 0.0002, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1194 }, { "completion_length": 195.50001525878906, "epoch": 0.8362491252624212, "grad_norm": 1.176583170890808, "kl": 0.1911887526512146, "learning_rate": 3.733590563955416e-08, "loss": 0.0002, "reward": 1.821428656578064, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1195 }, { "completion_length": 193.42857360839844, "epoch": 0.8369489153254024, "grad_norm": 0.0058119636960327625, "kl": 0.16394147276878357, "learning_rate": 3.70255814411978e-08, "loss": 0.0002, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1196 }, { "completion_length": 167.07144165039062, "epoch": 0.8376487053883834, "grad_norm": 0.9175024628639221, "kl": 0.18412145972251892, "learning_rate": 3.671644911818084e-08, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1197 }, { "completion_length": 171.92857360839844, "epoch": 0.8383484954513646, "grad_norm": 0.9832919239997864, "kl": 0.2076382040977478, "learning_rate": 3.6408510400510015e-08, "loss": 0.0002, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1198 }, { "completion_length": 110.0714340209961, "epoch": 0.8390482855143457, "grad_norm": 1.178932785987854, "kl": 0.28909793496131897, "learning_rate": 3.610176701151224e-08, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1199 }, { "completion_length": 228.85714721679688, "epoch": 0.8397480755773268, "grad_norm": 0.6401776671409607, "kl": 0.224664568901062, "learning_rate": 3.579622066782523e-08, "loss": 0.0002, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1200 }, { "completion_length": 152.6428680419922, "epoch": 0.8404478656403079, "grad_norm": 1.522802472114563, "kl": 0.250542014837265, "learning_rate": 3.549187307938725e-08, "loss": 0.0003, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1201 }, { "completion_length": 143.35714721679688, "epoch": 0.841147655703289, "grad_norm": 1.3310799598693848, "kl": 0.19353748857975006, "learning_rate": 3.518872594942826e-08, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1202 }, { "completion_length": 167.0, "epoch": 0.8418474457662701, "grad_norm": 0.03111882321536541, "kl": 0.33703145384788513, "learning_rate": 3.488678097445999e-08, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1203 }, { "completion_length": 154.71429443359375, "epoch": 0.8425472358292512, "grad_norm": 0.03491406887769699, "kl": 0.2930026948451996, "learning_rate": 3.458603984426634e-08, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1204 }, { "completion_length": 158.2857208251953, "epoch": 0.8432470258922323, "grad_norm": 1.1521835327148438, "kl": 0.17690785229206085, "learning_rate": 3.4286504241894275e-08, "loss": 0.0002, "reward": 1.7142858505249023, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1205 }, { "completion_length": 132.21429443359375, "epoch": 0.8439468159552135, "grad_norm": 1.5571599006652832, "kl": 0.2840757966041565, "learning_rate": 3.398817584364422e-08, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1206 }, { "completion_length": 159.6428680419922, "epoch": 0.8446466060181945, "grad_norm": 1.252862811088562, "kl": 0.21614797413349152, "learning_rate": 3.3691056319060554e-08, "loss": 0.0002, "reward": 1.7500001192092896, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1207 }, { "completion_length": 165.57144165039062, "epoch": 0.8453463960811757, "grad_norm": 1.8168431520462036, "kl": 0.19976088404655457, "learning_rate": 3.3395147330922524e-08, "loss": 0.0002, "reward": 1.6428571939468384, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1208 }, { "completion_length": 174.7857208251953, "epoch": 0.8460461861441567, "grad_norm": 1.0342302322387695, "kl": 0.2281290888786316, "learning_rate": 3.310045053523475e-08, "loss": 0.0002, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1209 }, { "completion_length": 174.7857208251953, "epoch": 0.8467459762071379, "grad_norm": 1.1021450757980347, "kl": 0.21363146603107452, "learning_rate": 3.280696758121809e-08, "loss": 0.0002, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1210 }, { "completion_length": 162.5, "epoch": 0.8474457662701189, "grad_norm": 0.009312832728028297, "kl": 0.21054936945438385, "learning_rate": 3.2514700111300196e-08, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1211 }, { "completion_length": 165.2857208251953, "epoch": 0.8481455563331001, "grad_norm": 1.3904316425323486, "kl": 0.23806852102279663, "learning_rate": 3.22236497611067e-08, "loss": 0.0002, "reward": 1.7857143878936768, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1212 }, { "completion_length": 158.21429443359375, "epoch": 0.8488453463960812, "grad_norm": 0.8142730593681335, "kl": 0.2035927027463913, "learning_rate": 3.1933818159451565e-08, "loss": 0.0002, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1213 }, { "completion_length": 171.42857360839844, "epoch": 0.8495451364590623, "grad_norm": 2.200524091720581, "kl": 0.22164471447467804, "learning_rate": 3.164520692832848e-08, "loss": 0.0002, "reward": 1.7857143878936768, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1214 }, { "completion_length": 156.0, "epoch": 0.8502449265220434, "grad_norm": 0.009058347903192043, "kl": 0.23640725016593933, "learning_rate": 3.135781768290133e-08, "loss": 0.0002, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1215 }, { "completion_length": 173.1428680419922, "epoch": 0.8509447165850245, "grad_norm": 1.4817057847976685, "kl": 0.24117514491081238, "learning_rate": 3.107165203149551e-08, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1216 }, { "completion_length": 177.57144165039062, "epoch": 0.8516445066480056, "grad_norm": 1.3294895887374878, "kl": 0.1925000548362732, "learning_rate": 3.0786711575588766e-08, "loss": 0.0002, "reward": 1.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1217 }, { "completion_length": 219.50001525878906, "epoch": 0.8523442967109867, "grad_norm": 0.6173551082611084, "kl": 0.2001303732395172, "learning_rate": 3.0502997909802067e-08, "loss": 0.0002, "reward": 1.821428656578064, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1218 }, { "completion_length": 168.2857208251953, "epoch": 0.8530440867739678, "grad_norm": 1.4694284200668335, "kl": 0.20262514054775238, "learning_rate": 3.02205126218911e-08, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1219 }, { "completion_length": 228.9285888671875, "epoch": 0.853743876836949, "grad_norm": 0.00779617577791214, "kl": 0.18553800880908966, "learning_rate": 2.9939257292737055e-08, "loss": 0.0002, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1220 }, { "completion_length": 226.7857208251953, "epoch": 0.85444366689993, "grad_norm": 1.0544668436050415, "kl": 0.1732667088508606, "learning_rate": 2.965923349633778e-08, "loss": 0.0002, "reward": 1.8928571939468384, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 1221 }, { "completion_length": 150.42857360839844, "epoch": 0.8551434569629112, "grad_norm": 0.8481256365776062, "kl": 0.26625412702560425, "learning_rate": 2.9380442799799175e-08, "loss": 0.0003, "reward": 1.8928571939468384, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 1222 }, { "completion_length": 214.2857208251953, "epoch": 0.8558432470258922, "grad_norm": 1.345398187637329, "kl": 0.17363551259040833, "learning_rate": 2.9102886763326256e-08, "loss": 0.0002, "reward": 1.6071429252624512, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1223 }, { "completion_length": 168.5, "epoch": 0.8565430370888734, "grad_norm": 1.2501178979873657, "kl": 0.22232374548912048, "learning_rate": 2.8826566940214375e-08, "loss": 0.0002, "reward": 1.821428656578064, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1224 }, { "completion_length": 170.1428680419922, "epoch": 0.8572428271518544, "grad_norm": 0.734455943107605, "kl": 0.2079610973596573, "learning_rate": 2.855148487684081e-08, "loss": 0.0002, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1225 }, { "completion_length": 138.7857208251953, "epoch": 0.8579426172148356, "grad_norm": 1.7934653759002686, "kl": 0.25681307911872864, "learning_rate": 2.8277642112655874e-08, "loss": 0.0003, "reward": 1.821428656578064, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 1226 }, { "completion_length": 157.42857360839844, "epoch": 0.8586424072778167, "grad_norm": 2.162432909011841, "kl": 0.27890822291374207, "learning_rate": 2.8005040180174216e-08, "loss": 0.0003, "reward": 1.7857143878936768, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1227 }, { "completion_length": 257.5714416503906, "epoch": 0.8593421973407978, "grad_norm": 1.0700682401657104, "kl": 0.1525479108095169, "learning_rate": 2.7733680604966587e-08, "loss": 0.0002, "reward": 1.7142858505249023, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1228 }, { "completion_length": 158.5, "epoch": 0.8600419874037789, "grad_norm": 0.9438952803611755, "kl": 0.19214357435703278, "learning_rate": 2.7463564905650853e-08, "loss": 0.0002, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1229 }, { "completion_length": 113.92857360839844, "epoch": 0.86074177746676, "grad_norm": 0.010960743762552738, "kl": 0.28666526079177856, "learning_rate": 2.719469459388396e-08, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1230 }, { "completion_length": 183.07144165039062, "epoch": 0.8614415675297411, "grad_norm": 1.133632779121399, "kl": 0.16968460381031036, "learning_rate": 2.6927071174353222e-08, "loss": 0.0002, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1231 }, { "completion_length": 128.07144165039062, "epoch": 0.8621413575927221, "grad_norm": 0.011521591804921627, "kl": 0.2793659567832947, "learning_rate": 2.6660696144767763e-08, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 1232 }, { "completion_length": 151.0, "epoch": 0.8628411476557033, "grad_norm": 1.957326889038086, "kl": 0.2992720603942871, "learning_rate": 2.6395570995850464e-08, "loss": 0.0003, "reward": 1.7857143878936768, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1233 }, { "completion_length": 239.4285888671875, "epoch": 0.8635409377186843, "grad_norm": 0.010318828746676445, "kl": 0.17106643319129944, "learning_rate": 2.6131697211329517e-08, "loss": 0.0002, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1234 }, { "completion_length": 169.42857360839844, "epoch": 0.8642407277816655, "grad_norm": 1.2298396825790405, "kl": 0.2535679042339325, "learning_rate": 2.5869076267929752e-08, "loss": 0.0003, "reward": 1.6428571939468384, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1235 }, { "completion_length": 161.6428680419922, "epoch": 0.8649405178446467, "grad_norm": 0.6738013625144958, "kl": 0.2053150236606598, "learning_rate": 2.560770963536499e-08, "loss": 0.0002, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1236 }, { "completion_length": 137.57144165039062, "epoch": 0.8656403079076277, "grad_norm": 1.1220225095748901, "kl": 0.25959500670433044, "learning_rate": 2.53475987763295e-08, "loss": 0.0003, "reward": 1.571428656578064, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1237 }, { "completion_length": 133.1428680419922, "epoch": 0.8663400979706088, "grad_norm": 1.157094955444336, "kl": 0.3509102761745453, "learning_rate": 2.5088745146489592e-08, "loss": 0.0004, "reward": 1.8571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1238 }, { "completion_length": 189.6428680419922, "epoch": 0.8670398880335899, "grad_norm": 1.262062668800354, "kl": 0.20733730494976044, "learning_rate": 2.483115019447596e-08, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1239 }, { "completion_length": 181.07144165039062, "epoch": 0.867739678096571, "grad_norm": 0.010919081047177315, "kl": 0.18784569203853607, "learning_rate": 2.457481536187525e-08, "loss": 0.0002, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1240 }, { "completion_length": 148.42857360839844, "epoch": 0.8684394681595521, "grad_norm": 0.9293107986450195, "kl": 0.274272084236145, "learning_rate": 2.4319742083221906e-08, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1241 }, { "completion_length": 130.42857360839844, "epoch": 0.8691392582225332, "grad_norm": 1.4843753576278687, "kl": 0.274969220161438, "learning_rate": 2.406593178599056e-08, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1242 }, { "completion_length": 179.6428680419922, "epoch": 0.8698390482855144, "grad_norm": 1.1576449871063232, "kl": 0.18304726481437683, "learning_rate": 2.381338589058751e-08, "loss": 0.0002, "reward": 1.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1243 }, { "completion_length": 159.0, "epoch": 0.8705388383484954, "grad_norm": 1.0154595375061035, "kl": 0.22255823016166687, "learning_rate": 2.3562105810343265e-08, "loss": 0.0002, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1244 }, { "completion_length": 196.42857360839844, "epoch": 0.8712386284114766, "grad_norm": 1.8300260305404663, "kl": 0.26673492789268494, "learning_rate": 2.3312092951504353e-08, "loss": 0.0003, "reward": 1.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1245 }, { "completion_length": 207.2857208251953, "epoch": 0.8719384184744576, "grad_norm": 0.007578478194773197, "kl": 0.1769677847623825, "learning_rate": 2.3063348713225433e-08, "loss": 0.0002, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1246 }, { "completion_length": 187.35714721679688, "epoch": 0.8726382085374388, "grad_norm": 0.01032357756048441, "kl": 0.21311494708061218, "learning_rate": 2.281587448756153e-08, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1247 }, { "completion_length": 191.6428680419922, "epoch": 0.8733379986004198, "grad_norm": 0.01939639262855053, "kl": 0.22273212671279907, "learning_rate": 2.256967165946047e-08, "loss": 0.0002, "reward": 1.7857143878936768, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1248 }, { "completion_length": 207.85714721679688, "epoch": 0.874037788663401, "grad_norm": 0.9737390875816345, "kl": 0.17670346796512604, "learning_rate": 2.2324741606754628e-08, "loss": 0.0002, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1249 }, { "completion_length": 140.85714721679688, "epoch": 0.8747375787263821, "grad_norm": 1.2022452354431152, "kl": 0.29929178953170776, "learning_rate": 2.2081085700153714e-08, "loss": 0.0003, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1250 }, { "completion_length": 143.42857360839844, "epoch": 0.8754373687893632, "grad_norm": 1.3373767137527466, "kl": 0.2747289836406708, "learning_rate": 2.183870530323689e-08, "loss": 0.0003, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1251 }, { "completion_length": 133.35714721679688, "epoch": 0.8761371588523443, "grad_norm": 2.3575704097747803, "kl": 0.25871407985687256, "learning_rate": 2.159760177244496e-08, "loss": 0.0003, "reward": 1.7142858505249023, "reward_std": 0.4040610194206238, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1252 }, { "completion_length": 165.92857360839844, "epoch": 0.8768369489153254, "grad_norm": 1.4299527406692505, "kl": 0.22480441629886627, "learning_rate": 2.135777645707318e-08, "loss": 0.0002, "reward": 1.7500001192092896, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1253 }, { "completion_length": 171.57144165039062, "epoch": 0.8775367389783065, "grad_norm": 1.2487300634384155, "kl": 0.2744445502758026, "learning_rate": 2.1119230699263384e-08, "loss": 0.0003, "reward": 1.7142858505249023, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1254 }, { "completion_length": 137.7857208251953, "epoch": 0.8782365290412876, "grad_norm": 0.7264689803123474, "kl": 0.2586050033569336, "learning_rate": 2.088196583399651e-08, "loss": 0.0003, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1255 }, { "completion_length": 204.85714721679688, "epoch": 0.8789363191042687, "grad_norm": 0.012499237433075905, "kl": 0.20196619629859924, "learning_rate": 2.064598318908542e-08, "loss": 0.0002, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1256 }, { "completion_length": 133.71429443359375, "epoch": 0.8796361091672499, "grad_norm": 1.1827751398086548, "kl": 0.31316077709198, "learning_rate": 2.0411284085166957e-08, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1257 }, { "completion_length": 155.5, "epoch": 0.8803358992302309, "grad_norm": 1.4089956283569336, "kl": 0.2613886594772339, "learning_rate": 2.0177869835695112e-08, "loss": 0.0003, "reward": 1.5000001192092896, "reward_std": 0.4040610194206238, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1258 }, { "completion_length": 151.92857360839844, "epoch": 0.8810356892932121, "grad_norm": 0.009331272915005684, "kl": 0.24874532222747803, "learning_rate": 1.9945741746933196e-08, "loss": 0.0002, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1259 }, { "completion_length": 167.35714721679688, "epoch": 0.8817354793561931, "grad_norm": 1.3569661378860474, "kl": 0.1788841038942337, "learning_rate": 1.9714901117946853e-08, "loss": 0.0002, "reward": 1.7142858505249023, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 1260 }, { "completion_length": 225.7857208251953, "epoch": 0.8824352694191743, "grad_norm": 0.008111832663416862, "kl": 0.1696583777666092, "learning_rate": 1.948534924059661e-08, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1261 }, { "completion_length": 167.0, "epoch": 0.8831350594821553, "grad_norm": 1.8644587993621826, "kl": 0.22662027180194855, "learning_rate": 1.925708739953086e-08, "loss": 0.0002, "reward": 1.7857143878936768, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1262 }, { "completion_length": 214.6428680419922, "epoch": 0.8838348495451365, "grad_norm": 0.6280895471572876, "kl": 0.1796744018793106, "learning_rate": 1.9030116872178314e-08, "loss": 0.0002, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1263 }, { "completion_length": 138.21429443359375, "epoch": 0.8845346396081175, "grad_norm": 0.009797152131795883, "kl": 0.27188169956207275, "learning_rate": 1.88044389287412e-08, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1264 }, { "completion_length": 174.50001525878906, "epoch": 0.8852344296710987, "grad_norm": 1.8737318515777588, "kl": 0.2397805005311966, "learning_rate": 1.8580054832188052e-08, "loss": 0.0002, "reward": 1.7500001192092896, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1265 }, { "completion_length": 143.6428680419922, "epoch": 0.8859342197340798, "grad_norm": 0.010810981504619122, "kl": 0.2526929974555969, "learning_rate": 1.8356965838246445e-08, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1266 }, { "completion_length": 191.2857208251953, "epoch": 0.8866340097970609, "grad_norm": 0.7175751328468323, "kl": 0.1973368525505066, "learning_rate": 1.8135173195396285e-08, "loss": 0.0002, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1267 }, { "completion_length": 157.7857208251953, "epoch": 0.887333799860042, "grad_norm": 1.3207117319107056, "kl": 0.2283293902873993, "learning_rate": 1.791467814486261e-08, "loss": 0.0002, "reward": 1.821428656578064, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1268 }, { "completion_length": 154.85714721679688, "epoch": 0.8880335899230231, "grad_norm": 0.46074673533439636, "kl": 0.25197938084602356, "learning_rate": 1.7695481920608712e-08, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1269 }, { "completion_length": 151.1428680419922, "epoch": 0.8887333799860042, "grad_norm": 1.3868297338485718, "kl": 0.2485383152961731, "learning_rate": 1.7477585749329172e-08, "loss": 0.0002, "reward": 1.7857143878936768, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1270 }, { "completion_length": 157.6428680419922, "epoch": 0.8894331700489853, "grad_norm": 1.9650871753692627, "kl": 0.2657938599586487, "learning_rate": 1.7260990850443137e-08, "loss": 0.0003, "reward": 1.821428656578064, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1271 }, { "completion_length": 155.35714721679688, "epoch": 0.8901329601119664, "grad_norm": 1.4333562850952148, "kl": 0.19904175400733948, "learning_rate": 1.7045698436087302e-08, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1272 }, { "completion_length": 153.0, "epoch": 0.8908327501749476, "grad_norm": 1.2434886693954468, "kl": 0.278282105922699, "learning_rate": 1.6831709711109337e-08, "loss": 0.0003, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1273 }, { "completion_length": 177.7857208251953, "epoch": 0.8915325402379286, "grad_norm": 1.3807754516601562, "kl": 0.26332956552505493, "learning_rate": 1.661902587306091e-08, "loss": 0.0003, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1274 }, { "completion_length": 202.35714721679688, "epoch": 0.8922323303009098, "grad_norm": 0.007266457192599773, "kl": 0.18284882605075836, "learning_rate": 1.6407648112191195e-08, "loss": 0.0002, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1275 }, { "completion_length": 163.85714721679688, "epoch": 0.8929321203638908, "grad_norm": 1.9630389213562012, "kl": 0.20370155572891235, "learning_rate": 1.6197577611440167e-08, "loss": 0.0002, "reward": 1.7500001192092896, "reward_std": 0.3535534143447876, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1276 }, { "completion_length": 152.0, "epoch": 0.893631910426872, "grad_norm": 1.3909683227539062, "kl": 0.26763638854026794, "learning_rate": 1.5988815546431806e-08, "loss": 0.0003, "reward": 1.821428656578064, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 1277 }, { "completion_length": 194.00001525878906, "epoch": 0.894331700489853, "grad_norm": 0.9055130481719971, "kl": 0.23034092783927917, "learning_rate": 1.5781363085467763e-08, "loss": 0.0002, "reward": 1.6785714626312256, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1278 }, { "completion_length": 209.35714721679688, "epoch": 0.8950314905528342, "grad_norm": 0.01719479449093342, "kl": 0.23232384026050568, "learning_rate": 1.5575221389520694e-08, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1279 }, { "completion_length": 181.85714721679688, "epoch": 0.8957312806158153, "grad_norm": 0.011499833315610886, "kl": 0.21650569140911102, "learning_rate": 1.5370391612227695e-08, "loss": 0.0002, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1280 }, { "completion_length": 179.2857208251953, "epoch": 0.8964310706787963, "grad_norm": 0.00905691273510456, "kl": 0.2066732496023178, "learning_rate": 1.5166874899884053e-08, "loss": 0.0002, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1281 }, { "completion_length": 176.6428680419922, "epoch": 0.8971308607417775, "grad_norm": 0.9170935750007629, "kl": 0.2411300241947174, "learning_rate": 1.4964672391436672e-08, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1282 }, { "completion_length": 194.7857208251953, "epoch": 0.8978306508047585, "grad_norm": 0.01169458869844675, "kl": 0.22938889265060425, "learning_rate": 1.4763785218477625e-08, "loss": 0.0002, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1283 }, { "completion_length": 137.57144165039062, "epoch": 0.8985304408677397, "grad_norm": 1.4941312074661255, "kl": 0.4233577847480774, "learning_rate": 1.4564214505238093e-08, "loss": 0.0004, "reward": 1.8928571939468384, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 1284 }, { "completion_length": 166.42857360839844, "epoch": 0.8992302309307207, "grad_norm": 1.468335509300232, "kl": 0.2325616031885147, "learning_rate": 1.4365961368581841e-08, "loss": 0.0002, "reward": 1.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1285 }, { "completion_length": 186.00001525878906, "epoch": 0.8999300209937019, "grad_norm": 1.6134655475616455, "kl": 0.2264670729637146, "learning_rate": 1.4169026917999011e-08, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1286 }, { "completion_length": 166.71429443359375, "epoch": 0.900629811056683, "grad_norm": 0.008176122792065144, "kl": 0.18503962457180023, "learning_rate": 1.3973412255600031e-08, "loss": 0.0002, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1287 }, { "completion_length": 115.21429443359375, "epoch": 0.9013296011196641, "grad_norm": 1.8215909004211426, "kl": 0.3324131965637207, "learning_rate": 1.3779118476109242e-08, "loss": 0.0003, "reward": 1.6785714626312256, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 1288 }, { "completion_length": 168.2857208251953, "epoch": 0.9020293911826452, "grad_norm": 0.8763949275016785, "kl": 0.2117134928703308, "learning_rate": 1.3586146666858922e-08, "loss": 0.0002, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1289 }, { "completion_length": 124.14286041259766, "epoch": 0.9027291812456263, "grad_norm": 1.4530029296875, "kl": 0.2741888463497162, "learning_rate": 1.3394497907783297e-08, "loss": 0.0003, "reward": 1.821428656578064, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1290 }, { "completion_length": 193.35714721679688, "epoch": 0.9034289713086074, "grad_norm": 0.010304938070476055, "kl": 0.22521556913852692, "learning_rate": 1.3204173271412156e-08, "loss": 0.0002, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1291 }, { "completion_length": 187.21429443359375, "epoch": 0.9041287613715885, "grad_norm": 1.5378167629241943, "kl": 0.20625072717666626, "learning_rate": 1.3015173822865183e-08, "loss": 0.0002, "reward": 1.7142858505249023, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1292 }, { "completion_length": 169.6428680419922, "epoch": 0.9048285514345696, "grad_norm": 1.5174146890640259, "kl": 0.21786990761756897, "learning_rate": 1.2827500619845916e-08, "loss": 0.0002, "reward": 1.7142858505249023, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1293 }, { "completion_length": 131.07144165039062, "epoch": 0.9055283414975507, "grad_norm": 2.1000311374664307, "kl": 0.31029078364372253, "learning_rate": 1.2641154712635604e-08, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1294 }, { "completion_length": 200.71429443359375, "epoch": 0.9062281315605318, "grad_norm": 1.6551883220672607, "kl": 0.22392137348651886, "learning_rate": 1.2456137144087691e-08, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1295 }, { "completion_length": 211.21429443359375, "epoch": 0.906927921623513, "grad_norm": 0.008668345399200916, "kl": 0.21250571310520172, "learning_rate": 1.227244894962176e-08, "loss": 0.0002, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1296 }, { "completion_length": 187.71429443359375, "epoch": 0.907627711686494, "grad_norm": 0.013726574368774891, "kl": 0.23482680320739746, "learning_rate": 1.2090091157217652e-08, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1297 }, { "completion_length": 168.85714721679688, "epoch": 0.9083275017494752, "grad_norm": 1.0472482442855835, "kl": 0.2717699408531189, "learning_rate": 1.1909064787409995e-08, "loss": 0.0003, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1298 }, { "completion_length": 157.1428680419922, "epoch": 0.9090272918124562, "grad_norm": 1.5324705839157104, "kl": 0.272755891084671, "learning_rate": 1.1729370853282294e-08, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1299 }, { "completion_length": 146.71429443359375, "epoch": 0.9097270818754374, "grad_norm": 1.9537763595581055, "kl": 0.35159483551979065, "learning_rate": 1.1551010360461244e-08, "loss": 0.0004, "reward": 1.7857143878936768, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1300 }, { "completion_length": 202.85714721679688, "epoch": 0.9104268719384184, "grad_norm": 1.213789463043213, "kl": 0.30573272705078125, "learning_rate": 1.1373984307111228e-08, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1301 }, { "completion_length": 167.21429443359375, "epoch": 0.9111266620013996, "grad_norm": 0.020943453535437584, "kl": 0.2514796257019043, "learning_rate": 1.1198293683928579e-08, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1302 }, { "completion_length": 159.42857360839844, "epoch": 0.9118264520643807, "grad_norm": 0.7576661109924316, "kl": 0.32443612813949585, "learning_rate": 1.1023939474136185e-08, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1303 }, { "completion_length": 189.35714721679688, "epoch": 0.9125262421273618, "grad_norm": 0.01495310664176941, "kl": 0.25289759039878845, "learning_rate": 1.0850922653478007e-08, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1304 }, { "completion_length": 163.92857360839844, "epoch": 0.9132260321903429, "grad_norm": 1.457454800605774, "kl": 0.21855024993419647, "learning_rate": 1.0679244190213377e-08, "loss": 0.0002, "reward": 1.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1305 }, { "completion_length": 135.07144165039062, "epoch": 0.913925822253324, "grad_norm": 0.010427487082779408, "kl": 0.2591363191604614, "learning_rate": 1.050890504511176e-08, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1306 }, { "completion_length": 134.2857208251953, "epoch": 0.9146256123163051, "grad_norm": 1.2386841773986816, "kl": 0.3952588737010956, "learning_rate": 1.033990617144745e-08, "loss": 0.0004, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1307 }, { "completion_length": 178.57144165039062, "epoch": 0.9153254023792862, "grad_norm": 0.009921030141413212, "kl": 0.2069104164838791, "learning_rate": 1.0172248514994041e-08, "loss": 0.0002, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1308 }, { "completion_length": 191.6428680419922, "epoch": 0.9160251924422673, "grad_norm": 0.8973679542541504, "kl": 0.16617006063461304, "learning_rate": 1.0005933014019307e-08, "loss": 0.0002, "reward": 1.7500001192092896, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1309 }, { "completion_length": 146.35714721679688, "epoch": 0.9167249825052485, "grad_norm": 1.1217337846755981, "kl": 0.30808383226394653, "learning_rate": 9.840960599279857e-09, "loss": 0.0003, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1310 }, { "completion_length": 127.21429443359375, "epoch": 0.9174247725682295, "grad_norm": 0.015861397609114647, "kl": 0.2740711569786072, "learning_rate": 9.677332194015841e-09, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1311 }, { "completion_length": 113.85714721679688, "epoch": 0.9181245626312107, "grad_norm": 1.3558783531188965, "kl": 0.36586087942123413, "learning_rate": 9.515048713946067e-09, "loss": 0.0004, "reward": 1.4285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.5, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1312 }, { "completion_length": 173.2857208251953, "epoch": 0.9188243526941917, "grad_norm": 0.010661440901458263, "kl": 0.17641493678092957, "learning_rate": 9.354111067262582e-09, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1313 }, { "completion_length": 146.1428680419922, "epoch": 0.9195241427571729, "grad_norm": 2.779055118560791, "kl": 0.27099573612213135, "learning_rate": 9.194520154625684e-09, "loss": 0.0003, "reward": 1.6071429252624512, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 1314 }, { "completion_length": 118.92857360839844, "epoch": 0.9202239328201539, "grad_norm": 0.010712092742323875, "kl": 0.24216696619987488, "learning_rate": 9.036276869159004e-09, "loss": 0.0002, "reward": 1.6428571939468384, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1315 }, { "completion_length": 167.42857360839844, "epoch": 0.9209237228831351, "grad_norm": 0.02462656795978546, "kl": 0.3253130316734314, "learning_rate": 8.879382096444287e-09, "loss": 0.0003, "reward": 1.7857143878936768, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1316 }, { "completion_length": 140.71429443359375, "epoch": 0.9216235129461162, "grad_norm": 1.3606350421905518, "kl": 0.29509207606315613, "learning_rate": 8.72383671451668e-09, "loss": 0.0003, "reward": 1.7142858505249023, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 1317 }, { "completion_length": 205.6428680419922, "epoch": 0.9223233030090973, "grad_norm": 0.727387547492981, "kl": 0.18626686930656433, "learning_rate": 8.569641593859562e-09, "loss": 0.0002, "reward": 1.6785714626312256, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1318 }, { "completion_length": 143.7857208251953, "epoch": 0.9230230930720784, "grad_norm": 1.3535363674163818, "kl": 0.2516903579235077, "learning_rate": 8.416797597399882e-09, "loss": 0.0003, "reward": 1.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1319 }, { "completion_length": 156.71429443359375, "epoch": 0.9237228831350595, "grad_norm": 0.07291042059659958, "kl": 0.3211541175842285, "learning_rate": 8.265305580503257e-09, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1320 }, { "completion_length": 163.42857360839844, "epoch": 0.9244226731980406, "grad_norm": 1.3988401889801025, "kl": 0.23229822516441345, "learning_rate": 8.115166390969125e-09, "loss": 0.0002, "reward": 1.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1321 }, { "completion_length": 147.0, "epoch": 0.9251224632610217, "grad_norm": 0.010127603076398373, "kl": 0.26489800214767456, "learning_rate": 7.966380869026095e-09, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1322 }, { "completion_length": 211.2857208251953, "epoch": 0.9258222533240028, "grad_norm": 0.8250149488449097, "kl": 0.18736618757247925, "learning_rate": 7.818949847327227e-09, "loss": 0.0002, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1323 }, { "completion_length": 158.5, "epoch": 0.9265220433869839, "grad_norm": 1.714956521987915, "kl": 0.30067819356918335, "learning_rate": 7.67287415094528e-09, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1324 }, { "completion_length": 138.71429443359375, "epoch": 0.927221833449965, "grad_norm": 2.128505229949951, "kl": 0.25607195496559143, "learning_rate": 7.528154597368192e-09, "loss": 0.0003, "reward": 1.7142858505249023, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1325 }, { "completion_length": 152.71429443359375, "epoch": 0.9279216235129462, "grad_norm": 1.44500732421875, "kl": 0.28000885248184204, "learning_rate": 7.384791996494527e-09, "loss": 0.0003, "reward": 1.7500001192092896, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1326 }, { "completion_length": 166.7857208251953, "epoch": 0.9286214135759272, "grad_norm": 0.008409437723457813, "kl": 0.2216724455356598, "learning_rate": 7.242787150628843e-09, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1327 }, { "completion_length": 155.0, "epoch": 0.9293212036389084, "grad_norm": 0.5981183052062988, "kl": 0.21657848358154297, "learning_rate": 7.1021408544772175e-09, "loss": 0.0002, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1328 }, { "completion_length": 194.35714721679688, "epoch": 0.9300209937018894, "grad_norm": 0.7098842263221741, "kl": 0.18943746387958527, "learning_rate": 6.962853895142923e-09, "loss": 0.0002, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1329 }, { "completion_length": 181.1428680419922, "epoch": 0.9307207837648706, "grad_norm": 1.1748435497283936, "kl": 0.23245880007743835, "learning_rate": 6.824927052121898e-09, "loss": 0.0002, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1330 }, { "completion_length": 148.6428680419922, "epoch": 0.9314205738278516, "grad_norm": 1.26004958152771, "kl": 0.24964681267738342, "learning_rate": 6.68836109729834e-09, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1331 }, { "completion_length": 147.07144165039062, "epoch": 0.9321203638908327, "grad_norm": 0.93543541431427, "kl": 0.23571574687957764, "learning_rate": 6.55315679494059e-09, "loss": 0.0002, "reward": 1.821428656578064, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1332 }, { "completion_length": 169.6428680419922, "epoch": 0.9328201539538139, "grad_norm": 1.8365111351013184, "kl": 0.2561831474304199, "learning_rate": 6.4193149016966704e-09, "loss": 0.0003, "reward": 1.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1333 }, { "completion_length": 217.00001525878906, "epoch": 0.933519944016795, "grad_norm": 0.9619185924530029, "kl": 0.1601485311985016, "learning_rate": 6.2868361665900926e-09, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1334 }, { "completion_length": 172.42857360839844, "epoch": 0.9342197340797761, "grad_norm": 0.6807326674461365, "kl": 0.18687467277050018, "learning_rate": 6.155721331015745e-09, "loss": 0.0002, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1335 }, { "completion_length": 160.7857208251953, "epoch": 0.9349195241427571, "grad_norm": 1.0178474187850952, "kl": 0.3675042688846588, "learning_rate": 6.0259711287355964e-09, "loss": 0.0004, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1336 }, { "completion_length": 172.1428680419922, "epoch": 0.9356193142057383, "grad_norm": 1.0840686559677124, "kl": 0.30300799012184143, "learning_rate": 5.89758628587475e-09, "loss": 0.0003, "reward": 1.6428571939468384, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1337 }, { "completion_length": 176.21429443359375, "epoch": 0.9363191042687193, "grad_norm": 1.017836332321167, "kl": 0.22980433702468872, "learning_rate": 5.7705675209173135e-09, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1338 }, { "completion_length": 126.42857360839844, "epoch": 0.9370188943317005, "grad_norm": 1.6122280359268188, "kl": 0.2759954631328583, "learning_rate": 5.6449155447022835e-09, "loss": 0.0003, "reward": 1.7142858505249023, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1339 }, { "completion_length": 160.0, "epoch": 0.9377186843946816, "grad_norm": 0.009955769404768944, "kl": 0.2207319736480713, "learning_rate": 5.52063106041975e-09, "loss": 0.0002, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1340 }, { "completion_length": 155.0, "epoch": 0.9384184744576627, "grad_norm": 1.804633617401123, "kl": 0.24793581664562225, "learning_rate": 5.397714763606842e-09, "loss": 0.0002, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1341 }, { "completion_length": 189.71429443359375, "epoch": 0.9391182645206438, "grad_norm": 1.373291254043579, "kl": 0.18734394013881683, "learning_rate": 5.276167342143756e-09, "loss": 0.0002, "reward": 1.7857143878936768, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1342 }, { "completion_length": 198.42857360839844, "epoch": 0.9398180545836249, "grad_norm": 2.6510205268859863, "kl": 0.2277909815311432, "learning_rate": 5.15598947625015e-09, "loss": 0.0002, "reward": 1.5357143878936768, "reward_std": 0.3535534143447876, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 1343 }, { "completion_length": 176.1428680419922, "epoch": 0.940517844646606, "grad_norm": 0.010027957148849964, "kl": 0.18944154679775238, "learning_rate": 5.037181838481147e-09, "loss": 0.0002, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1344 }, { "completion_length": 163.71429443359375, "epoch": 0.9412176347095871, "grad_norm": 0.008884276263415813, "kl": 0.22666656970977783, "learning_rate": 4.919745093723587e-09, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1345 }, { "completion_length": 132.85714721679688, "epoch": 0.9419174247725682, "grad_norm": 1.0608068704605103, "kl": 0.280362993478775, "learning_rate": 4.803679899192392e-09, "loss": 0.0003, "reward": 1.6785714626312256, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1346 }, { "completion_length": 153.5, "epoch": 0.9426172148355494, "grad_norm": 1.2285606861114502, "kl": 0.2226695567369461, "learning_rate": 4.688986904426789e-09, "loss": 0.0002, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1347 }, { "completion_length": 182.7857208251953, "epoch": 0.9433170048985304, "grad_norm": 0.012076051905751228, "kl": 0.2336723953485489, "learning_rate": 4.575666751286733e-09, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1348 }, { "completion_length": 138.42857360839844, "epoch": 0.9440167949615116, "grad_norm": 1.6312142610549927, "kl": 0.6449800729751587, "learning_rate": 4.463720073949351e-09, "loss": 0.0006, "reward": 1.5000001192092896, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 1349 }, { "completion_length": 158.85714721679688, "epoch": 0.9447165850244926, "grad_norm": 1.676668405532837, "kl": 0.26072657108306885, "learning_rate": 4.353147498905224e-09, "loss": 0.0003, "reward": 1.821428656578064, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1350 }, { "completion_length": 185.50001525878906, "epoch": 0.9454163750874738, "grad_norm": 1.5997549295425415, "kl": 0.28716832399368286, "learning_rate": 4.2439496449551416e-09, "loss": 0.0003, "reward": 1.7142858505249023, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 1351 }, { "completion_length": 208.21429443359375, "epoch": 0.9461161651504548, "grad_norm": 1.3174644708633423, "kl": 0.1744014322757721, "learning_rate": 4.136127123206462e-09, "loss": 0.0002, "reward": 1.821428656578064, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1352 }, { "completion_length": 128.6428680419922, "epoch": 0.946815955213436, "grad_norm": 0.018840759992599487, "kl": 0.2715713083744049, "learning_rate": 4.029680537069646e-09, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1353 }, { "completion_length": 164.1428680419922, "epoch": 0.947515745276417, "grad_norm": 2.114229440689087, "kl": 0.265103280544281, "learning_rate": 3.9246104822550364e-09, "loss": 0.0003, "reward": 1.7857143878936768, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1354 }, { "completion_length": 204.7857208251953, "epoch": 0.9482155353393982, "grad_norm": 1.5136994123458862, "kl": 0.24005940556526184, "learning_rate": 3.820917546769442e-09, "loss": 0.0002, "reward": 1.821428656578064, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1355 }, { "completion_length": 198.1428680419922, "epoch": 0.9489153254023793, "grad_norm": 1.5311776399612427, "kl": 0.21294938027858734, "learning_rate": 3.7186023109128663e-09, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1356 }, { "completion_length": 236.00001525878906, "epoch": 0.9496151154653604, "grad_norm": 0.009830577298998833, "kl": 0.170041024684906, "learning_rate": 3.6176653472752006e-09, "loss": 0.0002, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1357 }, { "completion_length": 208.6428680419922, "epoch": 0.9503149055283415, "grad_norm": 0.008553055115044117, "kl": 0.22487834095954895, "learning_rate": 3.518107220733174e-09, "loss": 0.0002, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1358 }, { "completion_length": 146.0, "epoch": 0.9510146955913226, "grad_norm": 1.3218826055526733, "kl": 0.2508351504802704, "learning_rate": 3.4199284884469104e-09, "loss": 0.0003, "reward": 1.6785714626312256, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3214285969734192, "step": 1359 }, { "completion_length": 150.6428680419922, "epoch": 0.9517144856543037, "grad_norm": 0.011008813045918941, "kl": 0.25399619340896606, "learning_rate": 3.3231296998572077e-09, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1360 }, { "completion_length": 120.78572082519531, "epoch": 0.9524142757172848, "grad_norm": 1.7923227548599243, "kl": 0.27266737818717957, "learning_rate": 3.2277113966820146e-09, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1361 }, { "completion_length": 157.92857360839844, "epoch": 0.9531140657802659, "grad_norm": 0.011260618455708027, "kl": 0.20497430860996246, "learning_rate": 3.13367411291382e-09, "loss": 0.0002, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1362 }, { "completion_length": 191.1428680419922, "epoch": 0.9538138558432471, "grad_norm": 1.496407151222229, "kl": 0.2190258651971817, "learning_rate": 3.041018374816351e-09, "loss": 0.0002, "reward": 1.7857143878936768, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1363 }, { "completion_length": 132.92857360839844, "epoch": 0.9545136459062281, "grad_norm": 1.8555550575256348, "kl": 0.3724459707736969, "learning_rate": 2.9497447009218246e-09, "loss": 0.0004, "reward": 1.6071429252624512, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1364 }, { "completion_length": 139.35714721679688, "epoch": 0.9552134359692093, "grad_norm": 0.9099873304367065, "kl": 0.25135380029678345, "learning_rate": 2.8598536020278673e-09, "loss": 0.0003, "reward": 1.8928571939468384, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 1365 }, { "completion_length": 233.57144165039062, "epoch": 0.9559132260321903, "grad_norm": 0.0288273673504591, "kl": 0.1828179806470871, "learning_rate": 2.7713455811948782e-09, "loss": 0.0002, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1366 }, { "completion_length": 173.00001525878906, "epoch": 0.9566130160951715, "grad_norm": 1.131606101989746, "kl": 0.18701843917369843, "learning_rate": 2.6842211337430034e-09, "loss": 0.0002, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1367 }, { "completion_length": 168.21429443359375, "epoch": 0.9573128061581525, "grad_norm": 1.3751652240753174, "kl": 0.26451778411865234, "learning_rate": 2.5984807472494985e-09, "loss": 0.0003, "reward": 1.821428656578064, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 1368 }, { "completion_length": 169.7857208251953, "epoch": 0.9580125962211337, "grad_norm": 1.066636323928833, "kl": 0.21851639449596405, "learning_rate": 2.514124901545983e-09, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1369 }, { "completion_length": 150.1428680419922, "epoch": 0.9587123862841148, "grad_norm": 0.011070491746068, "kl": 0.2514040768146515, "learning_rate": 2.431154068715635e-09, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1370 }, { "completion_length": 165.0, "epoch": 0.9594121763470959, "grad_norm": 0.6672722101211548, "kl": 0.23562565445899963, "learning_rate": 2.3495687130907215e-09, "loss": 0.0002, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1371 }, { "completion_length": 168.7857208251953, "epoch": 0.960111966410077, "grad_norm": 0.01058859471231699, "kl": 0.2453652173280716, "learning_rate": 2.2693692912498785e-09, "loss": 0.0002, "reward": 1.7142858505249023, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 1372 }, { "completion_length": 143.07144165039062, "epoch": 0.960811756473058, "grad_norm": 0.018368232995271683, "kl": 0.28745004534721375, "learning_rate": 2.1905562520156683e-09, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1373 }, { "completion_length": 154.35714721679688, "epoch": 0.9615115465360392, "grad_norm": 1.6187763214111328, "kl": 0.23598992824554443, "learning_rate": 2.1131300364518876e-09, "loss": 0.0002, "reward": 1.6428571939468384, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1374 }, { "completion_length": 173.00001525878906, "epoch": 0.9622113365990203, "grad_norm": 0.4382344186306, "kl": 0.21257831156253815, "learning_rate": 2.0370910778612913e-09, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1375 }, { "completion_length": 156.2857208251953, "epoch": 0.9629111266620014, "grad_norm": 1.6540101766586304, "kl": 0.2726787328720093, "learning_rate": 1.962439801783067e-09, "loss": 0.0003, "reward": 1.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1376 }, { "completion_length": 255.21429443359375, "epoch": 0.9636109167249826, "grad_norm": 0.007218874990940094, "kl": 0.1748599112033844, "learning_rate": 1.8891766259904185e-09, "loss": 0.0002, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1377 }, { "completion_length": 172.50001525878906, "epoch": 0.9643107067879636, "grad_norm": 1.5081721544265747, "kl": 0.23074223101139069, "learning_rate": 1.8173019604884044e-09, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1378 }, { "completion_length": 148.21429443359375, "epoch": 0.9650104968509448, "grad_norm": 0.8677284121513367, "kl": 0.24074393510818481, "learning_rate": 1.7468162075113813e-09, "loss": 0.0002, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1379 }, { "completion_length": 162.7857208251953, "epoch": 0.9657102869139258, "grad_norm": 1.5817475318908691, "kl": 0.2689761519432068, "learning_rate": 1.6777197615209792e-09, "loss": 0.0003, "reward": 1.6071429252624512, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1380 }, { "completion_length": 142.07144165039062, "epoch": 0.966410076976907, "grad_norm": 1.2715890407562256, "kl": 0.25541952252388, "learning_rate": 1.61001300920377e-09, "loss": 0.0003, "reward": 1.7142858505249023, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1381 }, { "completion_length": 201.2857208251953, "epoch": 0.967109867039888, "grad_norm": 1.5113792419433594, "kl": 0.17820926010608673, "learning_rate": 1.5436963294691574e-09, "loss": 0.0002, "reward": 1.7857143878936768, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1382 }, { "completion_length": 178.21429443359375, "epoch": 0.9678096571028691, "grad_norm": 2.151468276977539, "kl": 0.2177211046218872, "learning_rate": 1.4787700934472403e-09, "loss": 0.0002, "reward": 1.7142858505249023, "reward_std": 0.4040610194206238, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1383 }, { "completion_length": 145.92857360839844, "epoch": 0.9685094471658502, "grad_norm": 1.3491851091384888, "kl": 0.30592823028564453, "learning_rate": 1.415234664486703e-09, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1384 }, { "completion_length": 133.6428680419922, "epoch": 0.9692092372288313, "grad_norm": 2.111081600189209, "kl": 0.21826377511024475, "learning_rate": 1.3530903981528452e-09, "loss": 0.0002, "reward": 1.7142858505249023, "reward_std": 0.4040610194206238, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1385 }, { "completion_length": 180.21429443359375, "epoch": 0.9699090272918125, "grad_norm": 0.8409843444824219, "kl": 0.23169487714767456, "learning_rate": 1.292337642225555e-09, "loss": 0.0002, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1386 }, { "completion_length": 123.0714340209961, "epoch": 0.9706088173547935, "grad_norm": 2.594151258468628, "kl": 0.41949573159217834, "learning_rate": 1.2329767366973664e-09, "loss": 0.0004, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 1387 }, { "completion_length": 132.21429443359375, "epoch": 0.9713086074177747, "grad_norm": 1.3233803510665894, "kl": 0.2518426179885864, "learning_rate": 1.1750080137715446e-09, "loss": 0.0003, "reward": 1.7142858505249023, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1388 }, { "completion_length": 131.6428680419922, "epoch": 0.9720083974807557, "grad_norm": 0.009249989874660969, "kl": 0.2377672791481018, "learning_rate": 1.1184317978602808e-09, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1389 }, { "completion_length": 162.71429443359375, "epoch": 0.9727081875437369, "grad_norm": 2.012877941131592, "kl": 0.20712321996688843, "learning_rate": 1.0632484055827507e-09, "loss": 0.0002, "reward": 1.821428656578064, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1390 }, { "completion_length": 183.7857208251953, "epoch": 0.9734079776067179, "grad_norm": 0.7238763570785522, "kl": 0.20208999514579773, "learning_rate": 1.0094581457635033e-09, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1391 }, { "completion_length": 173.35714721679688, "epoch": 0.9741077676696991, "grad_norm": 2.1276025772094727, "kl": 0.2521856725215912, "learning_rate": 9.570613194306577e-10, "loss": 0.0003, "reward": 1.6428571939468384, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1392 }, { "completion_length": 208.21429443359375, "epoch": 0.9748075577326802, "grad_norm": 0.005789514631032944, "kl": 0.16112348437309265, "learning_rate": 9.060582198141819e-10, "loss": 0.0002, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1393 }, { "completion_length": 136.35714721679688, "epoch": 0.9755073477956613, "grad_norm": 1.632681131362915, "kl": 0.25972557067871094, "learning_rate": 8.564491323443113e-10, "loss": 0.0003, "reward": 1.7142858505249023, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1394 }, { "completion_length": 184.50001525878906, "epoch": 0.9762071378586424, "grad_norm": 0.7313697338104248, "kl": 0.20892004668712616, "learning_rate": 8.082343346499099e-10, "loss": 0.0002, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1395 }, { "completion_length": 174.1428680419922, "epoch": 0.9769069279216235, "grad_norm": 0.9866244792938232, "kl": 0.25609859824180603, "learning_rate": 7.614140965569449e-10, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1396 }, { "completion_length": 181.85714721679688, "epoch": 0.9776067179846046, "grad_norm": 1.6926835775375366, "kl": 0.2273755818605423, "learning_rate": 7.159886800869874e-10, "loss": 0.0002, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1397 }, { "completion_length": 161.85714721679688, "epoch": 0.9783065080475857, "grad_norm": 2.109154224395752, "kl": 0.3050709664821625, "learning_rate": 6.719583394556861e-10, "loss": 0.0003, "reward": 1.6785714626312256, "reward_std": 0.45456868410110474, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1398 }, { "completion_length": 178.1428680419922, "epoch": 0.9790062981105668, "grad_norm": 1.1959236860275269, "kl": 0.2432025671005249, "learning_rate": 6.293233210713789e-10, "loss": 0.0002, "reward": 1.7500001192092896, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1399 }, { "completion_length": 159.42857360839844, "epoch": 0.979706088173548, "grad_norm": 0.802379310131073, "kl": 0.2260698676109314, "learning_rate": 5.880838635337892e-10, "loss": 0.0002, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1400 }, { "completion_length": 152.7857208251953, "epoch": 0.980405878236529, "grad_norm": 0.6966069340705872, "kl": 0.28575843572616577, "learning_rate": 5.482401976325268e-10, "loss": 0.0003, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1401 }, { "completion_length": 163.21429443359375, "epoch": 0.9811056682995102, "grad_norm": 1.7385739088058472, "kl": 0.2833951413631439, "learning_rate": 5.097925463459496e-10, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1402 }, { "completion_length": 183.50001525878906, "epoch": 0.9818054583624912, "grad_norm": 1.0388801097869873, "kl": 0.2027069628238678, "learning_rate": 4.727411248398317e-10, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1403 }, { "completion_length": 173.57144165039062, "epoch": 0.9825052484254724, "grad_norm": 0.007538371253758669, "kl": 0.21798591315746307, "learning_rate": 4.3708614046622516e-10, "loss": 0.0002, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1404 }, { "completion_length": 158.71429443359375, "epoch": 0.9832050384884534, "grad_norm": 1.3134726285934448, "kl": 0.2546008229255676, "learning_rate": 4.028277927621837e-10, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1405 }, { "completion_length": 124.71429443359375, "epoch": 0.9839048285514346, "grad_norm": 1.0072579383850098, "kl": 0.439302533864975, "learning_rate": 3.699662734487907e-10, "loss": 0.0004, "reward": 1.8571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1406 }, { "completion_length": 154.21429443359375, "epoch": 0.9846046186144157, "grad_norm": 0.011274220421910286, "kl": 0.29214927554130554, "learning_rate": 3.3850176642996607e-10, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1407 }, { "completion_length": 160.2857208251953, "epoch": 0.9853044086773968, "grad_norm": 0.00780492601916194, "kl": 0.21676279604434967, "learning_rate": 3.084344477915224e-10, "loss": 0.0002, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1408 }, { "completion_length": 155.35714721679688, "epoch": 0.9860041987403779, "grad_norm": 0.7840172648429871, "kl": 0.20884963870048523, "learning_rate": 2.797644858000825e-10, "loss": 0.0002, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1409 }, { "completion_length": 188.21429443359375, "epoch": 0.986703988803359, "grad_norm": 0.007712265010923147, "kl": 0.1881667524576187, "learning_rate": 2.524920409023024e-10, "loss": 0.0002, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1410 }, { "completion_length": 167.42857360839844, "epoch": 0.9874037788663401, "grad_norm": 1.0450371503829956, "kl": 0.20845170319080353, "learning_rate": 2.2661726572378857e-10, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1411 }, { "completion_length": 149.42857360839844, "epoch": 0.9881035689293212, "grad_norm": 1.1238044500350952, "kl": 0.22389788925647736, "learning_rate": 2.02140305068349e-10, "loss": 0.0002, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1412 }, { "completion_length": 150.2857208251953, "epoch": 0.9888033589923023, "grad_norm": 0.8623078465461731, "kl": 0.25521120429039, "learning_rate": 1.7906129591713227e-10, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1413 }, { "completion_length": 148.35714721679688, "epoch": 0.9895031490552834, "grad_norm": 0.9464666247367859, "kl": 0.2525961995124817, "learning_rate": 1.5738036742796168e-10, "loss": 0.0003, "reward": 1.821428656578064, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1414 }, { "completion_length": 172.7857208251953, "epoch": 0.9902029391182645, "grad_norm": 1.3495320081710815, "kl": 0.19000789523124695, "learning_rate": 1.3709764093441934e-10, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1415 }, { "completion_length": 122.14286041259766, "epoch": 0.9909027291812457, "grad_norm": 0.0093265725299716, "kl": 0.24771440029144287, "learning_rate": 1.182132299454297e-10, "loss": 0.0002, "reward": 1.7142858505249023, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 1416 }, { "completion_length": 135.71429443359375, "epoch": 0.9916025192442267, "grad_norm": 0.009984687902033329, "kl": 0.25959300994873047, "learning_rate": 1.0072724014437151e-10, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1417 }, { "completion_length": 175.1428680419922, "epoch": 0.9923023093072079, "grad_norm": 0.01301694754511118, "kl": 0.2382923662662506, "learning_rate": 8.463976938860584e-11, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1418 }, { "completion_length": 184.21429443359375, "epoch": 0.9930020993701889, "grad_norm": 1.2169538736343384, "kl": 0.2526746094226837, "learning_rate": 6.995090770900436e-11, "loss": 0.0003, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1419 }, { "completion_length": 189.92857360839844, "epoch": 0.9937018894331701, "grad_norm": 0.00896223820745945, "kl": 0.2307371199131012, "learning_rate": 5.666073730925536e-11, "loss": 0.0002, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1420 }, { "completion_length": 183.35714721679688, "epoch": 0.9944016794961511, "grad_norm": 1.4747896194458008, "kl": 0.2407531440258026, "learning_rate": 4.476933256555848e-11, "loss": 0.0002, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1421 }, { "completion_length": 129.92857360839844, "epoch": 0.9951014695591323, "grad_norm": 1.8872802257537842, "kl": 0.2457588165998459, "learning_rate": 3.427676002615287e-11, "loss": 0.0002, "reward": 1.6071429252624512, "reward_std": 0.3535534143447876, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1422 }, { "completion_length": 227.6428680419922, "epoch": 0.9958012596221134, "grad_norm": 0.6039772629737854, "kl": 0.15836921334266663, "learning_rate": 2.518307841095635e-11, "loss": 0.0002, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1423 }, { "completion_length": 152.57144165039062, "epoch": 0.9965010496850945, "grad_norm": 0.010559028945863247, "kl": 0.2539207935333252, "learning_rate": 1.7488338611232334e-11, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 1424 }, { "completion_length": 226.21429443359375, "epoch": 0.9972008397480756, "grad_norm": 0.8837776184082031, "kl": 0.16597408056259155, "learning_rate": 1.1192583689256796e-11, "loss": 0.0002, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1425 }, { "completion_length": 143.92857360839844, "epoch": 0.9979006298110566, "grad_norm": 0.021532166749238968, "kl": 0.2903480529785156, "learning_rate": 6.2958488781794664e-12, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1426 }, { "completion_length": 160.57144165039062, "epoch": 0.9986004198740378, "grad_norm": 1.9143493175506592, "kl": 0.25154876708984375, "learning_rate": 2.7981615817185277e-12, "loss": 0.0003, "reward": 1.6428571939468384, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.2142857313156128, "step": 1427 }, { "completion_length": 101.50000762939453, "epoch": 0.9993002099370188, "grad_norm": 1.844730257987976, "kl": 0.38079506158828735, "learning_rate": 6.995413741606171e-13, "loss": 0.0004, "reward": 1.6428571939468384, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1428 }, { "completion_length": 159.92857360839844, "epoch": 1.0, "grad_norm": 0.008693267591297626, "kl": 0.2142767310142517, "learning_rate": 0.0, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1429 } ], "logging_steps": 1, "max_steps": 1429, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }