{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.8397480755773268, "eval_steps": 500, "global_step": 300, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "completion_length": 187.95238494873047, "epoch": 0.0027991602519244225, "grad_norm": 1.0234375, "kl": 0.0, "learning_rate": 1.3888888888888888e-07, "loss": 0.0, "reward": 0.46513424068689346, "reward_std": 0.20957323536276817, "rewards/check_gptzero_func": 0.46513424068689346, "step": 1 }, { "completion_length": 204.64286041259766, "epoch": 0.005598320503848845, "grad_norm": 0.859375, "kl": 0.0, "learning_rate": 2.7777777777777776e-07, "loss": 0.0, "reward": 0.3453420288860798, "reward_std": 0.19838641211390495, "rewards/check_gptzero_func": 0.3453420288860798, "step": 2 }, { "completion_length": 172.09524154663086, "epoch": 0.008397480755773267, "grad_norm": 1.0234375, "kl": 8.45193862915039e-05, "learning_rate": 4.1666666666666667e-07, "loss": 0.0, "reward": 0.4417309910058975, "reward_std": 0.15838673152029514, "rewards/check_gptzero_func": 0.4417309910058975, "step": 3 }, { "completion_length": 182.4761962890625, "epoch": 0.01119664100769769, "grad_norm": 1.015625, "kl": 0.00014972686767578125, "learning_rate": 5.555555555555555e-07, "loss": 0.0, "reward": 0.381104938685894, "reward_std": 0.18715333193540573, "rewards/check_gptzero_func": 0.381104938685894, "step": 4 }, { "completion_length": 174.01190948486328, "epoch": 0.013995801259622114, "grad_norm": 1.078125, "kl": 0.000102996826171875, "learning_rate": 6.944444444444446e-07, "loss": 0.0, "reward": 0.47953247278928757, "reward_std": 0.1845148727297783, "rewards/check_gptzero_func": 0.47953247278928757, "step": 5 }, { "completion_length": 201.01190948486328, "epoch": 0.016794961511546535, "grad_norm": 0.91015625, "kl": 0.00011652708053588867, "learning_rate": 8.333333333333333e-07, "loss": 0.0, "reward": 0.42353298515081406, "reward_std": 0.1985670831054449, "rewards/check_gptzero_func": 0.42353298515081406, "step": 6 }, { "completion_length": 175.90476989746094, "epoch": 0.01959412176347096, "grad_norm": 1.015625, "kl": 0.00013816356658935547, "learning_rate": 9.722222222222224e-07, "loss": 0.0, "reward": 0.5062773898243904, "reward_std": 0.1975775510072708, "rewards/check_gptzero_func": 0.5062773898243904, "step": 7 }, { "completion_length": 177.42857360839844, "epoch": 0.02239328201539538, "grad_norm": 0.9765625, "kl": 0.00012969970703125, "learning_rate": 1.111111111111111e-06, "loss": 0.0, "reward": 0.3951154872775078, "reward_std": 0.1862633414566517, "rewards/check_gptzero_func": 0.3951154872775078, "step": 8 }, { "completion_length": 187.6785774230957, "epoch": 0.025192442267319804, "grad_norm": 1.1171875, "kl": 0.00012481212615966797, "learning_rate": 1.25e-06, "loss": 0.0, "reward": 0.5369215086102486, "reward_std": 0.27992387115955353, "rewards/check_gptzero_func": 0.5369215086102486, "step": 9 }, { "completion_length": 184.0595245361328, "epoch": 0.02799160251924423, "grad_norm": 0.890625, "kl": 0.00013375282287597656, "learning_rate": 1.3888888888888892e-06, "loss": 0.0, "reward": 0.4447266310453415, "reward_std": 0.25660283863544464, "rewards/check_gptzero_func": 0.4447266310453415, "step": 10 }, { "completion_length": 178.57143020629883, "epoch": 0.03079076277116865, "grad_norm": 1.109375, "kl": 0.00011909008026123047, "learning_rate": 1.527777777777778e-06, "loss": 0.0, "reward": 0.4782513678073883, "reward_std": 0.19694043323397636, "rewards/check_gptzero_func": 0.4782513678073883, "step": 11 }, { "completion_length": 166.57143020629883, "epoch": 0.03358992302309307, "grad_norm": 1.078125, "kl": 0.00013256072998046875, "learning_rate": 1.6666666666666667e-06, "loss": 0.0, "reward": 0.2991659436374903, "reward_std": 0.2037220150232315, "rewards/check_gptzero_func": 0.2991659436374903, "step": 12 }, { "completion_length": 179.64286422729492, "epoch": 0.0363890832750175, "grad_norm": 0.921875, "kl": 0.00010061264038085938, "learning_rate": 1.8055555555555557e-06, "loss": 0.0, "reward": 0.35697006061673164, "reward_std": 0.16125616803765297, "rewards/check_gptzero_func": 0.35697006061673164, "step": 13 }, { "completion_length": 195.26191329956055, "epoch": 0.03918824352694192, "grad_norm": 1.0078125, "kl": 0.00013875961303710938, "learning_rate": 1.944444444444445e-06, "loss": 0.0, "reward": 0.5023506954312325, "reward_std": 0.2251712903380394, "rewards/check_gptzero_func": 0.5023506954312325, "step": 14 }, { "completion_length": 163.0952377319336, "epoch": 0.04198740377886634, "grad_norm": 1.15625, "kl": 0.00012624263763427734, "learning_rate": 2.0833333333333334e-06, "loss": 0.0, "reward": 0.5686202421784401, "reward_std": 0.16065805964171886, "rewards/check_gptzero_func": 0.5686202421784401, "step": 15 }, { "completion_length": 180.0714340209961, "epoch": 0.04478656403079076, "grad_norm": 1.03125, "kl": 0.00016486644744873047, "learning_rate": 2.222222222222222e-06, "loss": 0.0, "reward": 0.4932379499077797, "reward_std": 0.2570475209504366, "rewards/check_gptzero_func": 0.4932379499077797, "step": 16 }, { "completion_length": 193.03571701049805, "epoch": 0.04758572428271519, "grad_norm": 1.0078125, "kl": 0.00022602081298828125, "learning_rate": 2.361111111111111e-06, "loss": 0.0, "reward": 0.34498296678066254, "reward_std": 0.23837541788816452, "rewards/check_gptzero_func": 0.34498296678066254, "step": 17 }, { "completion_length": 179.34524536132812, "epoch": 0.05038488453463961, "grad_norm": 0.94921875, "kl": 0.00022101402282714844, "learning_rate": 2.5e-06, "loss": 0.0, "reward": 0.5981053188443184, "reward_std": 0.16939585842192173, "rewards/check_gptzero_func": 0.5981053188443184, "step": 18 }, { "completion_length": 182.52381134033203, "epoch": 0.05318404478656403, "grad_norm": 0.91796875, "kl": 0.00026416778564453125, "learning_rate": 2.6388888888888893e-06, "loss": 0.0, "reward": 0.4666922390460968, "reward_std": 0.19714651349931955, "rewards/check_gptzero_func": 0.4666922390460968, "step": 19 }, { "completion_length": 172.7023811340332, "epoch": 0.05598320503848846, "grad_norm": 0.98828125, "kl": 0.0003151893615722656, "learning_rate": 2.7777777777777783e-06, "loss": 0.0, "reward": 0.5293586701154709, "reward_std": 0.1787981353700161, "rewards/check_gptzero_func": 0.5293586701154709, "step": 20 }, { "completion_length": 199.94047927856445, "epoch": 0.05878236529041288, "grad_norm": 1.0546875, "kl": 0.00033283233642578125, "learning_rate": 2.916666666666667e-06, "loss": 0.0, "reward": 0.4446847140789032, "reward_std": 0.18958063051104546, "rewards/check_gptzero_func": 0.4446847140789032, "step": 21 }, { "completion_length": 198.52381134033203, "epoch": 0.0615815255423373, "grad_norm": 0.8359375, "kl": 0.00037479400634765625, "learning_rate": 3.055555555555556e-06, "loss": 0.0, "reward": 0.5041225850582123, "reward_std": 0.1401257887482643, "rewards/check_gptzero_func": 0.5041225850582123, "step": 22 }, { "completion_length": 177.72619247436523, "epoch": 0.06438068579426172, "grad_norm": 0.99609375, "kl": 0.0007195472717285156, "learning_rate": 3.1944444444444443e-06, "loss": 0.0001, "reward": 0.472538560628891, "reward_std": 0.25678203627467155, "rewards/check_gptzero_func": 0.472538560628891, "step": 23 }, { "completion_length": 201.9761962890625, "epoch": 0.06717984604618614, "grad_norm": 0.8984375, "kl": 0.0009317398071289062, "learning_rate": 3.3333333333333333e-06, "loss": 0.0001, "reward": 0.5138305872678757, "reward_std": 0.19977859780192375, "rewards/check_gptzero_func": 0.5138305872678757, "step": 24 }, { "completion_length": 211.83333587646484, "epoch": 0.06997900629811056, "grad_norm": 0.78125, "kl": 0.0008678436279296875, "learning_rate": 3.4722222222222224e-06, "loss": 0.0001, "reward": 0.4136114977300167, "reward_std": 0.1939336434006691, "rewards/check_gptzero_func": 0.4136114977300167, "step": 25 }, { "completion_length": 173.82143020629883, "epoch": 0.072778166550035, "grad_norm": 0.9375, "kl": 0.0014667510986328125, "learning_rate": 3.6111111111111115e-06, "loss": 0.0001, "reward": 0.5154794007539749, "reward_std": 0.22610028088092804, "rewards/check_gptzero_func": 0.5154794007539749, "step": 26 }, { "completion_length": 190.48809814453125, "epoch": 0.07557732680195942, "grad_norm": 0.87890625, "kl": 0.00168609619140625, "learning_rate": 3.7500000000000005e-06, "loss": 0.0002, "reward": 0.5691990107297897, "reward_std": 0.1696779690682888, "rewards/check_gptzero_func": 0.5691990107297897, "step": 27 }, { "completion_length": 188.2738151550293, "epoch": 0.07837648705388384, "grad_norm": 0.9375, "kl": 0.00185394287109375, "learning_rate": 3.88888888888889e-06, "loss": 0.0002, "reward": 0.5436510145664215, "reward_std": 0.24924885854125023, "rewards/check_gptzero_func": 0.5436510145664215, "step": 28 }, { "completion_length": 205.52381134033203, "epoch": 0.08117564730580826, "grad_norm": 0.765625, "kl": 0.001617431640625, "learning_rate": 4.027777777777779e-06, "loss": 0.0002, "reward": 0.4470183253288269, "reward_std": 0.20528827793896198, "rewards/check_gptzero_func": 0.4470183253288269, "step": 29 }, { "completion_length": 171.84524154663086, "epoch": 0.08397480755773268, "grad_norm": 0.9609375, "kl": 0.003101348876953125, "learning_rate": 4.166666666666667e-06, "loss": 0.0003, "reward": 0.46852748841047287, "reward_std": 0.16202964074909687, "rewards/check_gptzero_func": 0.46852748841047287, "step": 30 }, { "completion_length": 180.92857360839844, "epoch": 0.0867739678096571, "grad_norm": 1.34375, "kl": 0.003253936767578125, "learning_rate": 4.305555555555556e-06, "loss": 0.0003, "reward": 0.43846940994262695, "reward_std": 0.16219847835600376, "rewards/check_gptzero_func": 0.43846940994262695, "step": 31 }, { "completion_length": 200.4285774230957, "epoch": 0.08957312806158152, "grad_norm": 0.8125, "kl": 0.00244140625, "learning_rate": 4.444444444444444e-06, "loss": 0.0003, "reward": 0.5283855870366096, "reward_std": 0.20884095132350922, "rewards/check_gptzero_func": 0.5283855870366096, "step": 32 }, { "completion_length": 178.6190528869629, "epoch": 0.09237228831350595, "grad_norm": 0.83984375, "kl": 0.00383758544921875, "learning_rate": 4.583333333333333e-06, "loss": 0.0004, "reward": 0.5132240653038025, "reward_std": 0.15939988382160664, "rewards/check_gptzero_func": 0.5132240653038025, "step": 33 }, { "completion_length": 195.8095245361328, "epoch": 0.09517144856543037, "grad_norm": 0.9609375, "kl": 0.0034942626953125, "learning_rate": 4.722222222222222e-06, "loss": 0.0003, "reward": 0.4721037596464157, "reward_std": 0.22807539626955986, "rewards/check_gptzero_func": 0.4721037596464157, "step": 34 }, { "completion_length": 187.72619247436523, "epoch": 0.0979706088173548, "grad_norm": 0.78515625, "kl": 0.004547119140625, "learning_rate": 4.861111111111111e-06, "loss": 0.0005, "reward": 0.4753117114305496, "reward_std": 0.21822157502174377, "rewards/check_gptzero_func": 0.4753117114305496, "step": 35 }, { "completion_length": 189.3452377319336, "epoch": 0.10076976906927922, "grad_norm": 1.015625, "kl": 0.005115509033203125, "learning_rate": 5e-06, "loss": 0.0005, "reward": 0.5079967528581619, "reward_std": 0.1728131715208292, "rewards/check_gptzero_func": 0.5079967528581619, "step": 36 }, { "completion_length": 182.50000381469727, "epoch": 0.10356892932120364, "grad_norm": 0.859375, "kl": 0.006103515625, "learning_rate": 4.999880271862381e-06, "loss": 0.0006, "reward": 0.5491276457905769, "reward_std": 0.29580989107489586, "rewards/check_gptzero_func": 0.5491276457905769, "step": 37 }, { "completion_length": 201.61904907226562, "epoch": 0.10636808957312806, "grad_norm": 0.7890625, "kl": 0.00431060791015625, "learning_rate": 4.999521098917384e-06, "loss": 0.0005, "reward": 0.7083312571048737, "reward_std": 0.17898299172520638, "rewards/check_gptzero_func": 0.7083312571048737, "step": 38 }, { "completion_length": 183.6904754638672, "epoch": 0.10916724982505248, "grad_norm": 0.828125, "kl": 0.00653076171875, "learning_rate": 4.998922515567496e-06, "loss": 0.0007, "reward": 0.5663967505097389, "reward_std": 0.21669673547148705, "rewards/check_gptzero_func": 0.5663967505097389, "step": 39 }, { "completion_length": 176.16667556762695, "epoch": 0.11196641007697691, "grad_norm": 0.83984375, "kl": 0.007232666015625, "learning_rate": 4.998084579146533e-06, "loss": 0.0007, "reward": 0.592326283454895, "reward_std": 0.1653369516134262, "rewards/check_gptzero_func": 0.592326283454895, "step": 40 }, { "completion_length": 191.34524154663086, "epoch": 0.11476557032890133, "grad_norm": 0.78515625, "kl": 0.006866455078125, "learning_rate": 4.997007369914149e-06, "loss": 0.0007, "reward": 0.5438110902905464, "reward_std": 0.150531854480505, "rewards/check_gptzero_func": 0.5438110902905464, "step": 41 }, { "completion_length": 173.26190567016602, "epoch": 0.11756473058082575, "grad_norm": 0.90234375, "kl": 0.00835418701171875, "learning_rate": 4.9956909910481465e-06, "loss": 0.0008, "reward": 0.496019683778286, "reward_std": 0.13957532681524754, "rewards/check_gptzero_func": 0.496019683778286, "step": 42 }, { "completion_length": 197.21428680419922, "epoch": 0.12036389083275018, "grad_norm": 0.7578125, "kl": 0.00658416748046875, "learning_rate": 4.994135568634598e-06, "loss": 0.0007, "reward": 0.5706812366843224, "reward_std": 0.22384651005268097, "rewards/check_gptzero_func": 0.5706812366843224, "step": 43 }, { "completion_length": 195.50000381469727, "epoch": 0.1231630510846746, "grad_norm": 0.8046875, "kl": 0.0059814453125, "learning_rate": 4.992341251655768e-06, "loss": 0.0006, "reward": 0.49248379468917847, "reward_std": 0.1679957453161478, "rewards/check_gptzero_func": 0.49248379468917847, "step": 44 }, { "completion_length": 174.83333206176758, "epoch": 0.12596221133659902, "grad_norm": 0.83984375, "kl": 0.00934600830078125, "learning_rate": 4.99030821197584e-06, "loss": 0.001, "reward": 0.5782680213451385, "reward_std": 0.18842186219990253, "rewards/check_gptzero_func": 0.5782680213451385, "step": 45 }, { "completion_length": 180.71428680419922, "epoch": 0.12876137158852344, "grad_norm": 0.83203125, "kl": 0.009490966796875, "learning_rate": 4.988036644324457e-06, "loss": 0.001, "reward": 0.5191970095038414, "reward_std": 0.2319345511496067, "rewards/check_gptzero_func": 0.5191970095038414, "step": 46 }, { "completion_length": 185.46429061889648, "epoch": 0.13156053184044786, "grad_norm": 0.8125, "kl": 0.00946044921875, "learning_rate": 4.9855267662780715e-06, "loss": 0.0009, "reward": 0.6461608409881592, "reward_std": 0.1471536885946989, "rewards/check_gptzero_func": 0.6461608409881592, "step": 47 }, { "completion_length": 176.53571701049805, "epoch": 0.13435969209237228, "grad_norm": 0.82421875, "kl": 0.01032257080078125, "learning_rate": 4.982778818239101e-06, "loss": 0.001, "reward": 0.49297887086868286, "reward_std": 0.1380300959572196, "rewards/check_gptzero_func": 0.49297887086868286, "step": 48 }, { "completion_length": 189.15476608276367, "epoch": 0.1371588523442967, "grad_norm": 0.9765625, "kl": 0.01055145263671875, "learning_rate": 4.979793063412909e-06, "loss": 0.0011, "reward": 0.5546858608722687, "reward_std": 0.20936554670333862, "rewards/check_gptzero_func": 0.5546858608722687, "step": 49 }, { "completion_length": 175.32143020629883, "epoch": 0.13995801259622112, "grad_norm": 0.80859375, "kl": 0.0104827880859375, "learning_rate": 4.9765697877825844e-06, "loss": 0.001, "reward": 0.5866354256868362, "reward_std": 0.1675815749913454, "rewards/check_gptzero_func": 0.5866354256868362, "step": 50 }, { "completion_length": 178.88095092773438, "epoch": 0.14275717284814557, "grad_norm": 0.921875, "kl": 0.01032257080078125, "learning_rate": 4.97310930008156e-06, "loss": 0.001, "reward": 0.6669622659683228, "reward_std": 0.21926475502550602, "rewards/check_gptzero_func": 0.6669622659683228, "step": 51 }, { "completion_length": 180.7738151550293, "epoch": 0.14555633310007, "grad_norm": 0.83984375, "kl": 0.0123748779296875, "learning_rate": 4.969411931764033e-06, "loss": 0.0012, "reward": 0.567020371556282, "reward_std": 0.17968417704105377, "rewards/check_gptzero_func": 0.567020371556282, "step": 52 }, { "completion_length": 195.5357208251953, "epoch": 0.1483554933519944, "grad_norm": 0.796875, "kl": 0.0104827880859375, "learning_rate": 4.965478036973221e-06, "loss": 0.001, "reward": 0.6985915303230286, "reward_std": 0.17377906665205956, "rewards/check_gptzero_func": 0.6985915303230286, "step": 53 }, { "completion_length": 178.00000381469727, "epoch": 0.15115465360391883, "grad_norm": 1.0234375, "kl": 0.01174163818359375, "learning_rate": 4.9613079925074435e-06, "loss": 0.0012, "reward": 0.6754837036132812, "reward_std": 0.2185358963906765, "rewards/check_gptzero_func": 0.6754837036132812, "step": 54 }, { "completion_length": 172.64286041259766, "epoch": 0.15395381385584325, "grad_norm": 0.8203125, "kl": 0.0126953125, "learning_rate": 4.956902197784025e-06, "loss": 0.0013, "reward": 0.5507477447390556, "reward_std": 0.12427662499248981, "rewards/check_gptzero_func": 0.5507477447390556, "step": 55 }, { "completion_length": 178.40476608276367, "epoch": 0.15675297410776767, "grad_norm": 0.83984375, "kl": 0.01100921630859375, "learning_rate": 4.952261074801043e-06, "loss": 0.0011, "reward": 0.7200377136468887, "reward_std": 0.1754942275583744, "rewards/check_gptzero_func": 0.7200377136468887, "step": 56 }, { "completion_length": 177.02381134033203, "epoch": 0.1595521343596921, "grad_norm": 0.859375, "kl": 0.01055908203125, "learning_rate": 4.947385068096907e-06, "loss": 0.0011, "reward": 0.7536474019289017, "reward_std": 0.12843344174325466, "rewards/check_gptzero_func": 0.7536474019289017, "step": 57 }, { "completion_length": 197.3571434020996, "epoch": 0.16235129461161651, "grad_norm": 0.8515625, "kl": 0.00899505615234375, "learning_rate": 4.942274644707778e-06, "loss": 0.0009, "reward": 0.5334034785628319, "reward_std": 0.22946524992585182, "rewards/check_gptzero_func": 0.5334034785628319, "step": 58 }, { "completion_length": 182.14286041259766, "epoch": 0.16515045486354094, "grad_norm": 0.921875, "kl": 0.01129150390625, "learning_rate": 4.936930294122838e-06, "loss": 0.0011, "reward": 0.5683771669864655, "reward_std": 0.1916387351229787, "rewards/check_gptzero_func": 0.5683771669864655, "step": 59 }, { "completion_length": 173.2023811340332, "epoch": 0.16794961511546536, "grad_norm": 0.92578125, "kl": 0.01171875, "learning_rate": 4.931352528237398e-06, "loss": 0.0012, "reward": 0.6447094231843948, "reward_std": 0.19410214200615883, "rewards/check_gptzero_func": 0.6447094231843948, "step": 60 }, { "completion_length": 185.6428565979004, "epoch": 0.17074877536738978, "grad_norm": 0.8515625, "kl": 0.00968170166015625, "learning_rate": 4.925541881303876e-06, "loss": 0.001, "reward": 0.6383133828639984, "reward_std": 0.18484976701438427, "rewards/check_gptzero_func": 0.6383133828639984, "step": 61 }, { "completion_length": 189.1785774230957, "epoch": 0.1735479356193142, "grad_norm": 1.0078125, "kl": 0.008331298828125, "learning_rate": 4.919498909880621e-06, "loss": 0.0008, "reward": 0.6155931651592255, "reward_std": 0.18777143955230713, "rewards/check_gptzero_func": 0.6155931651592255, "step": 62 }, { "completion_length": 191.80952835083008, "epoch": 0.17634709587123862, "grad_norm": 0.8046875, "kl": 0.00836181640625, "learning_rate": 4.913224192778604e-06, "loss": 0.0008, "reward": 0.6793939918279648, "reward_std": 0.09720544703304768, "rewards/check_gptzero_func": 0.6793939918279648, "step": 63 }, { "completion_length": 171.97619247436523, "epoch": 0.17914625612316304, "grad_norm": 0.88671875, "kl": 0.00908660888671875, "learning_rate": 4.906718331005979e-06, "loss": 0.0009, "reward": 0.44309166073799133, "reward_std": 0.2190867941826582, "rewards/check_gptzero_func": 0.44309166073799133, "step": 64 }, { "completion_length": 175.59524154663086, "epoch": 0.1819454163750875, "grad_norm": 0.9140625, "kl": 0.00946044921875, "learning_rate": 4.899981947710518e-06, "loss": 0.0009, "reward": 0.5109639540314674, "reward_std": 0.15947622060775757, "rewards/check_gptzero_func": 0.5109639540314674, "step": 65 }, { "completion_length": 184.76190948486328, "epoch": 0.1847445766270119, "grad_norm": 0.90234375, "kl": 0.009246826171875, "learning_rate": 4.893015688119921e-06, "loss": 0.0009, "reward": 0.5143184289336205, "reward_std": 0.2525811605155468, "rewards/check_gptzero_func": 0.5143184289336205, "step": 66 }, { "completion_length": 174.76191329956055, "epoch": 0.18754373687893633, "grad_norm": 0.890625, "kl": 0.0095367431640625, "learning_rate": 4.885820219480018e-06, "loss": 0.001, "reward": 0.6203874498605728, "reward_std": 0.20078162848949432, "rewards/check_gptzero_func": 0.6203874498605728, "step": 67 }, { "completion_length": 184.2738151550293, "epoch": 0.19034289713086075, "grad_norm": 0.98046875, "kl": 0.008697509765625, "learning_rate": 4.8783962309908564e-06, "loss": 0.0009, "reward": 0.5816539227962494, "reward_std": 0.23236144706606865, "rewards/check_gptzero_func": 0.5816539227962494, "step": 68 }, { "completion_length": 184.0, "epoch": 0.19314205738278517, "grad_norm": 0.8984375, "kl": 0.00894927978515625, "learning_rate": 4.870744433740688e-06, "loss": 0.0009, "reward": 0.6825973987579346, "reward_std": 0.17790008522570133, "rewards/check_gptzero_func": 0.6825973987579346, "step": 69 }, { "completion_length": 204.1785774230957, "epoch": 0.1959412176347096, "grad_norm": 0.8203125, "kl": 0.00753021240234375, "learning_rate": 4.8628655606378625e-06, "loss": 0.0008, "reward": 0.6512226462364197, "reward_std": 0.22282536327838898, "rewards/check_gptzero_func": 0.6512226462364197, "step": 70 }, { "completion_length": 188.47619247436523, "epoch": 0.198740377886634, "grad_norm": 0.96875, "kl": 0.00823974609375, "learning_rate": 4.854760366340619e-06, "loss": 0.0008, "reward": 0.6291620433330536, "reward_std": 0.18235865235328674, "rewards/check_gptzero_func": 0.6291620433330536, "step": 71 }, { "completion_length": 191.6428565979004, "epoch": 0.20153953813855843, "grad_norm": 1.0078125, "kl": 0.00788116455078125, "learning_rate": 4.846429627184816e-06, "loss": 0.0008, "reward": 0.5630831271409988, "reward_std": 0.20192383974790573, "rewards/check_gptzero_func": 0.5630831271409988, "step": 72 }, { "completion_length": 186.39286422729492, "epoch": 0.20433869839048285, "grad_norm": 1.0078125, "kl": 0.0083770751953125, "learning_rate": 4.837874141109557e-06, "loss": 0.0008, "reward": 0.566518671810627, "reward_std": 0.2747541069984436, "rewards/check_gptzero_func": 0.566518671810627, "step": 73 }, { "completion_length": 197.75000381469727, "epoch": 0.20713785864240727, "grad_norm": 0.9140625, "kl": 0.0078887939453125, "learning_rate": 4.829094727580775e-06, "loss": 0.0008, "reward": 0.6935234069824219, "reward_std": 0.12828794866800308, "rewards/check_gptzero_func": 0.6935234069824219, "step": 74 }, { "completion_length": 182.94047927856445, "epoch": 0.2099370188943317, "grad_norm": 1.0859375, "kl": 0.0101470947265625, "learning_rate": 4.820092227512736e-06, "loss": 0.001, "reward": 0.5632592514157295, "reward_std": 0.20659778825938702, "rewards/check_gptzero_func": 0.5632592514157295, "step": 75 }, { "completion_length": 189.26190948486328, "epoch": 0.21273617914625612, "grad_norm": 0.97265625, "kl": 0.0099639892578125, "learning_rate": 4.810867503187492e-06, "loss": 0.001, "reward": 0.6693995073437691, "reward_std": 0.15574552537873387, "rewards/check_gptzero_func": 0.6693995073437691, "step": 76 }, { "completion_length": 202.20238494873047, "epoch": 0.21553533939818054, "grad_norm": 1.046875, "kl": 0.0087127685546875, "learning_rate": 4.8014214381722945e-06, "loss": 0.0009, "reward": 0.6062769070267677, "reward_std": 0.269734937697649, "rewards/check_gptzero_func": 0.6062769070267677, "step": 77 }, { "completion_length": 201.22619247436523, "epoch": 0.21833449965010496, "grad_norm": 0.953125, "kl": 0.00868988037109375, "learning_rate": 4.791754937234962e-06, "loss": 0.0009, "reward": 0.5902510657906532, "reward_std": 0.20445209927856922, "rewards/check_gptzero_func": 0.5902510657906532, "step": 78 }, { "completion_length": 191.41666793823242, "epoch": 0.22113365990202938, "grad_norm": 0.859375, "kl": 0.0099029541015625, "learning_rate": 4.781868926257216e-06, "loss": 0.001, "reward": 0.7119332551956177, "reward_std": 0.1272009308449924, "rewards/check_gptzero_func": 0.7119332551956177, "step": 79 }, { "completion_length": 201.95238494873047, "epoch": 0.22393282015395383, "grad_norm": 0.9609375, "kl": 0.01006317138671875, "learning_rate": 4.771764352146005e-06, "loss": 0.001, "reward": 0.6730313450098038, "reward_std": 0.23449595272541046, "rewards/check_gptzero_func": 0.6730313450098038, "step": 80 }, { "completion_length": 181.53571701049805, "epoch": 0.22673198040587825, "grad_norm": 1.15625, "kl": 0.0106964111328125, "learning_rate": 4.761442182742799e-06, "loss": 0.0011, "reward": 0.5613239407539368, "reward_std": 0.21234364807605743, "rewards/check_gptzero_func": 0.5613239407539368, "step": 81 }, { "completion_length": 184.67857360839844, "epoch": 0.22953114065780267, "grad_norm": 0.9765625, "kl": 0.0101776123046875, "learning_rate": 4.750903406730895e-06, "loss": 0.001, "reward": 0.5844283923506737, "reward_std": 0.21481262892484665, "rewards/check_gptzero_func": 0.5844283923506737, "step": 82 }, { "completion_length": 169.38095474243164, "epoch": 0.2323303009097271, "grad_norm": 1.1484375, "kl": 0.0128021240234375, "learning_rate": 4.740149033540711e-06, "loss": 0.0013, "reward": 0.6525488644838333, "reward_std": 0.19818515330553055, "rewards/check_gptzero_func": 0.6525488644838333, "step": 83 }, { "completion_length": 178.3809585571289, "epoch": 0.2351294611616515, "grad_norm": 1.0859375, "kl": 0.010101318359375, "learning_rate": 4.729180093253106e-06, "loss": 0.001, "reward": 0.6054461151361465, "reward_std": 0.22233787178993225, "rewards/check_gptzero_func": 0.6054461151361465, "step": 84 }, { "completion_length": 185.47619247436523, "epoch": 0.23792862141357593, "grad_norm": 0.93359375, "kl": 0.0096588134765625, "learning_rate": 4.717997636500715e-06, "loss": 0.001, "reward": 0.5751017481088638, "reward_std": 0.19414596632122993, "rewards/check_gptzero_func": 0.5751017481088638, "step": 85 }, { "completion_length": 176.61904907226562, "epoch": 0.24072778166550035, "grad_norm": 0.90234375, "kl": 0.0117034912109375, "learning_rate": 4.706602734367314e-06, "loss": 0.0012, "reward": 0.6652742102742195, "reward_std": 0.1461728010326624, "rewards/check_gptzero_func": 0.6652742102742195, "step": 86 }, { "completion_length": 204.6785774230957, "epoch": 0.24352694191742477, "grad_norm": 0.8203125, "kl": 0.00868988037109375, "learning_rate": 4.694996478285232e-06, "loss": 0.0009, "reward": 0.5329968556761742, "reward_std": 0.23187025263905525, "rewards/check_gptzero_func": 0.5329968556761742, "step": 87 }, { "completion_length": 191.7738151550293, "epoch": 0.2463261021693492, "grad_norm": 0.9140625, "kl": 0.0092010498046875, "learning_rate": 4.683179979930808e-06, "loss": 0.0009, "reward": 0.660212829709053, "reward_std": 0.17499383352696896, "rewards/check_gptzero_func": 0.660212829709053, "step": 88 }, { "completion_length": 188.29762268066406, "epoch": 0.2491252624212736, "grad_norm": 0.85546875, "kl": 0.01043701171875, "learning_rate": 4.6711543711179155e-06, "loss": 0.001, "reward": 0.6105453222990036, "reward_std": 0.1719030626118183, "rewards/check_gptzero_func": 0.6105453222990036, "step": 89 }, { "completion_length": 185.86904907226562, "epoch": 0.25192442267319803, "grad_norm": 1.0234375, "kl": 0.01029205322265625, "learning_rate": 4.658920803689553e-06, "loss": 0.001, "reward": 0.6307502388954163, "reward_std": 0.26598427444696426, "rewards/check_gptzero_func": 0.6307502388954163, "step": 90 }, { "completion_length": 180.8571434020996, "epoch": 0.2547235829251225, "grad_norm": 0.9765625, "kl": 0.0113525390625, "learning_rate": 4.646480449407516e-06, "loss": 0.0011, "reward": 0.661887601017952, "reward_std": 0.17039467580616474, "rewards/check_gptzero_func": 0.661887601017952, "step": 91 }, { "completion_length": 175.97619247436523, "epoch": 0.2575227431770469, "grad_norm": 1.09375, "kl": 0.0103912353515625, "learning_rate": 4.633834499840164e-06, "loss": 0.001, "reward": 0.6470509469509125, "reward_std": 0.11666383501142263, "rewards/check_gptzero_func": 0.6470509469509125, "step": 92 }, { "completion_length": 196.95238494873047, "epoch": 0.2603219034289713, "grad_norm": 0.87890625, "kl": 0.00783538818359375, "learning_rate": 4.620984166248288e-06, "loss": 0.0008, "reward": 0.645782083272934, "reward_std": 0.16378989815711975, "rewards/check_gptzero_func": 0.645782083272934, "step": 93 }, { "completion_length": 184.09524536132812, "epoch": 0.2631210636808957, "grad_norm": 0.92578125, "kl": 0.00963592529296875, "learning_rate": 4.607930679469096e-06, "loss": 0.001, "reward": 0.6375631093978882, "reward_std": 0.12679122015833855, "rewards/check_gptzero_func": 0.6375631093978882, "step": 94 }, { "completion_length": 187.78571701049805, "epoch": 0.26592022393282017, "grad_norm": 0.84375, "kl": 0.009124755859375, "learning_rate": 4.594675289798317e-06, "loss": 0.0009, "reward": 0.6679813116788864, "reward_std": 0.19044114090502262, "rewards/check_gptzero_func": 0.6679813116788864, "step": 95 }, { "completion_length": 174.94048309326172, "epoch": 0.26871938418474456, "grad_norm": 0.9609375, "kl": 0.01029205322265625, "learning_rate": 4.5812192668704454e-06, "loss": 0.001, "reward": 0.5234083607792854, "reward_std": 0.2531973347067833, "rewards/check_gptzero_func": 0.5234083607792854, "step": 96 }, { "completion_length": 183.19048309326172, "epoch": 0.271518544436669, "grad_norm": 0.90234375, "kl": 0.00909423828125, "learning_rate": 4.5675638995371355e-06, "loss": 0.0009, "reward": 0.5966005846858025, "reward_std": 0.22473083063960075, "rewards/check_gptzero_func": 0.5966005846858025, "step": 97 }, { "completion_length": 183.14286041259766, "epoch": 0.2743177046885934, "grad_norm": 0.85546875, "kl": 0.0090484619140625, "learning_rate": 4.553710495743744e-06, "loss": 0.0009, "reward": 0.5813074707984924, "reward_std": 0.1640096753835678, "rewards/check_gptzero_func": 0.5813074707984924, "step": 98 }, { "completion_length": 198.46429061889648, "epoch": 0.27711686494051785, "grad_norm": 0.88671875, "kl": 0.0083465576171875, "learning_rate": 4.53966038240406e-06, "loss": 0.0008, "reward": 0.48446883261203766, "reward_std": 0.14858301915228367, "rewards/check_gptzero_func": 0.48446883261203766, "step": 99 }, { "completion_length": 158.04762268066406, "epoch": 0.27991602519244224, "grad_norm": 1.0625, "kl": 0.0104522705078125, "learning_rate": 4.525414905273208e-06, "loss": 0.001, "reward": 0.6134350448846817, "reward_std": 0.15003260038793087, "rewards/check_gptzero_func": 0.6134350448846817, "step": 100 }, { "completion_length": 178.0595245361328, "epoch": 0.2827151854443667, "grad_norm": 0.90234375, "kl": 0.0092010498046875, "learning_rate": 4.510975428818743e-06, "loss": 0.0009, "reward": 0.6400493085384369, "reward_std": 0.12321909703314304, "rewards/check_gptzero_func": 0.6400493085384369, "step": 101 }, { "completion_length": 202.76190567016602, "epoch": 0.28551434569629114, "grad_norm": 0.9140625, "kl": 0.007781982421875, "learning_rate": 4.496343336089965e-06, "loss": 0.0008, "reward": 0.5859105363488197, "reward_std": 0.12806045822799206, "rewards/check_gptzero_func": 0.5859105363488197, "step": 102 }, { "completion_length": 171.26190948486328, "epoch": 0.28831350594821553, "grad_norm": 1.1015625, "kl": 0.0096588134765625, "learning_rate": 4.481520028585445e-06, "loss": 0.001, "reward": 0.5852581560611725, "reward_std": 0.22868289425969124, "rewards/check_gptzero_func": 0.5852581560611725, "step": 103 }, { "completion_length": 180.51190948486328, "epoch": 0.29111266620014, "grad_norm": 1.0, "kl": 0.010009765625, "learning_rate": 4.466506926118782e-06, "loss": 0.001, "reward": 0.6214292347431183, "reward_std": 0.20645314827561378, "rewards/check_gptzero_func": 0.6214292347431183, "step": 104 }, { "completion_length": 195.03571701049805, "epoch": 0.2939118264520644, "grad_norm": 0.8359375, "kl": 0.00890350341796875, "learning_rate": 4.451305466682615e-06, "loss": 0.0009, "reward": 0.5131512135267258, "reward_std": 0.22282657399773598, "rewards/check_gptzero_func": 0.5131512135267258, "step": 105 }, { "completion_length": 185.69047927856445, "epoch": 0.2967109867039888, "grad_norm": 0.8515625, "kl": 0.01049041748046875, "learning_rate": 4.435917106310887e-06, "loss": 0.0011, "reward": 0.6954147666692734, "reward_std": 0.11661373171955347, "rewards/check_gptzero_func": 0.6954147666692734, "step": 106 }, { "completion_length": 184.50000381469727, "epoch": 0.2995101469559132, "grad_norm": 0.80078125, "kl": 0.0090484619140625, "learning_rate": 4.420343318939378e-06, "loss": 0.0009, "reward": 0.7220865786075592, "reward_std": 0.11773823061957955, "rewards/check_gptzero_func": 0.7220865786075592, "step": 107 }, { "completion_length": 193.82143020629883, "epoch": 0.30230930720783766, "grad_norm": 0.88671875, "kl": 0.0087432861328125, "learning_rate": 4.404585596264537e-06, "loss": 0.0009, "reward": 0.6370752304792404, "reward_std": 0.22320828214287758, "rewards/check_gptzero_func": 0.6370752304792404, "step": 108 }, { "completion_length": 184.55952835083008, "epoch": 0.30510846745976206, "grad_norm": 0.8671875, "kl": 0.009765625, "learning_rate": 4.388645447600593e-06, "loss": 0.001, "reward": 0.7004173994064331, "reward_std": 0.14368313550949097, "rewards/check_gptzero_func": 0.7004173994064331, "step": 109 }, { "completion_length": 186.66666793823242, "epoch": 0.3079076277116865, "grad_norm": 0.875, "kl": 0.01068115234375, "learning_rate": 4.372524399734998e-06, "loss": 0.0011, "reward": 0.6180136650800705, "reward_std": 0.16962039656937122, "rewards/check_gptzero_func": 0.6180136650800705, "step": 110 }, { "completion_length": 182.40476608276367, "epoch": 0.3107067879636109, "grad_norm": 0.875, "kl": 0.00982666015625, "learning_rate": 4.356223996782181e-06, "loss": 0.001, "reward": 0.6765602007508278, "reward_std": 0.15866447985172272, "rewards/check_gptzero_func": 0.6765602007508278, "step": 111 }, { "completion_length": 186.48810195922852, "epoch": 0.31350594821553535, "grad_norm": 0.92578125, "kl": 0.0094451904296875, "learning_rate": 4.339745800035652e-06, "loss": 0.0009, "reward": 0.6582833528518677, "reward_std": 0.15360314585268497, "rewards/check_gptzero_func": 0.6582833528518677, "step": 112 }, { "completion_length": 184.5238151550293, "epoch": 0.31630510846745974, "grad_norm": 0.92578125, "kl": 0.01065826416015625, "learning_rate": 4.323091387818459e-06, "loss": 0.0011, "reward": 0.5341470539569855, "reward_std": 0.23594587668776512, "rewards/check_gptzero_func": 0.5341470539569855, "step": 113 }, { "completion_length": 194.97619247436523, "epoch": 0.3191042687193842, "grad_norm": 0.83203125, "kl": 0.00930023193359375, "learning_rate": 4.306262355332006e-06, "loss": 0.0009, "reward": 0.6334607377648354, "reward_std": 0.17327153496444225, "rewards/check_gptzero_func": 0.6334607377648354, "step": 114 }, { "completion_length": 188.2857208251953, "epoch": 0.3219034289713086, "grad_norm": 0.875, "kl": 0.01202392578125, "learning_rate": 4.2892603145032684e-06, "loss": 0.0012, "reward": 0.6626207306981087, "reward_std": 0.18220025673508644, "rewards/check_gptzero_func": 0.6626207306981087, "step": 115 }, { "completion_length": 182.67857360839844, "epoch": 0.32470258922323303, "grad_norm": 0.88671875, "kl": 0.01158905029296875, "learning_rate": 4.272086893830394e-06, "loss": 0.0012, "reward": 0.6602620035409927, "reward_std": 0.15469545125961304, "rewards/check_gptzero_func": 0.6602620035409927, "step": 116 }, { "completion_length": 185.90476608276367, "epoch": 0.3275017494751575, "grad_norm": 0.83984375, "kl": 0.011932373046875, "learning_rate": 4.254743738226721e-06, "loss": 0.0012, "reward": 0.6419829577207565, "reward_std": 0.18072698265314102, "rewards/check_gptzero_func": 0.6419829577207565, "step": 117 }, { "completion_length": 189.59524154663086, "epoch": 0.33030090972708187, "grad_norm": 0.87109375, "kl": 0.01031494140625, "learning_rate": 4.237232508863226e-06, "loss": 0.001, "reward": 0.8117964118719101, "reward_std": 0.11239873245358467, "rewards/check_gptzero_func": 0.8117964118719101, "step": 118 }, { "completion_length": 179.75, "epoch": 0.3331000699790063, "grad_norm": 0.921875, "kl": 0.01169586181640625, "learning_rate": 4.219554883009412e-06, "loss": 0.0012, "reward": 0.6252808570861816, "reward_std": 0.16097365505993366, "rewards/check_gptzero_func": 0.6252808570861816, "step": 119 }, { "completion_length": 191.64286041259766, "epoch": 0.3358992302309307, "grad_norm": 0.9921875, "kl": 0.013031005859375, "learning_rate": 4.2017125538726574e-06, "loss": 0.0013, "reward": 0.656028687953949, "reward_std": 0.21221196837723255, "rewards/check_gptzero_func": 0.656028687953949, "step": 120 }, { "completion_length": 189.71429061889648, "epoch": 0.33869839048285516, "grad_norm": 0.890625, "kl": 0.010650634765625, "learning_rate": 4.183707230436032e-06, "loss": 0.0011, "reward": 0.6637793928384781, "reward_std": 0.21517397835850716, "rewards/check_gptzero_func": 0.6637793928384781, "step": 121 }, { "completion_length": 201.35714721679688, "epoch": 0.34149755073477955, "grad_norm": 0.8203125, "kl": 0.00860595703125, "learning_rate": 4.165540637294608e-06, "loss": 0.0009, "reward": 0.6951557993888855, "reward_std": 0.20356887206435204, "rewards/check_gptzero_func": 0.6951557993888855, "step": 122 }, { "completion_length": 174.3690528869629, "epoch": 0.344296710986704, "grad_norm": 1.015625, "kl": 0.0137176513671875, "learning_rate": 4.147214514490278e-06, "loss": 0.0014, "reward": 0.6053376868367195, "reward_std": 0.14120884239673615, "rewards/check_gptzero_func": 0.6053376868367195, "step": 123 }, { "completion_length": 191.35715103149414, "epoch": 0.3470958712386284, "grad_norm": 0.87890625, "kl": 0.0111083984375, "learning_rate": 4.128730617345085e-06, "loss": 0.0011, "reward": 0.6748835146427155, "reward_std": 0.15946420282125473, "rewards/check_gptzero_func": 0.6748835146427155, "step": 124 }, { "completion_length": 186.58333587646484, "epoch": 0.34989503149055284, "grad_norm": 0.8671875, "kl": 0.01071929931640625, "learning_rate": 4.110090716293093e-06, "loss": 0.0011, "reward": 0.5565099567174911, "reward_std": 0.14936872385442257, "rewards/check_gptzero_func": 0.5565099567174911, "step": 125 }, { "completion_length": 177.53571319580078, "epoch": 0.35269419174247724, "grad_norm": 0.88671875, "kl": 0.0113983154296875, "learning_rate": 4.091296596710812e-06, "loss": 0.0011, "reward": 0.6619952172040939, "reward_std": 0.1810350650921464, "rewards/check_gptzero_func": 0.6619952172040939, "step": 126 }, { "completion_length": 173.91666793823242, "epoch": 0.3554933519944017, "grad_norm": 0.90234375, "kl": 0.0118255615234375, "learning_rate": 4.072350058746193e-06, "loss": 0.0012, "reward": 0.6603459864854813, "reward_std": 0.22001322731375694, "rewards/check_gptzero_func": 0.6603459864854813, "step": 127 }, { "completion_length": 173.76190567016602, "epoch": 0.3582925122463261, "grad_norm": 0.9921875, "kl": 0.0127410888671875, "learning_rate": 4.053252917146198e-06, "loss": 0.0013, "reward": 0.7352914214134216, "reward_std": 0.1779029555618763, "rewards/check_gptzero_func": 0.7352914214134216, "step": 128 }, { "completion_length": 171.26190567016602, "epoch": 0.3610916724982505, "grad_norm": 0.84765625, "kl": 0.012054443359375, "learning_rate": 4.034007001082985e-06, "loss": 0.0012, "reward": 0.6395312622189522, "reward_std": 0.15299177914857864, "rewards/check_gptzero_func": 0.6395312622189522, "step": 129 }, { "completion_length": 179.78571701049805, "epoch": 0.363890832750175, "grad_norm": 0.92578125, "kl": 0.01220703125, "learning_rate": 4.014614153978704e-06, "loss": 0.0012, "reward": 0.6132207363843918, "reward_std": 0.21152934804558754, "rewards/check_gptzero_func": 0.6132207363843918, "step": 130 }, { "completion_length": 195.92857360839844, "epoch": 0.36668999300209937, "grad_norm": 0.78125, "kl": 0.01036834716796875, "learning_rate": 3.99507623332893e-06, "loss": 0.001, "reward": 0.7027375251054764, "reward_std": 0.15015212446451187, "rewards/check_gptzero_func": 0.7027375251054764, "step": 131 }, { "completion_length": 180.5357208251953, "epoch": 0.3694891532540238, "grad_norm": 0.828125, "kl": 0.0123291015625, "learning_rate": 3.975395110524742e-06, "loss": 0.0012, "reward": 0.6938442587852478, "reward_std": 0.13277364149689674, "rewards/check_gptzero_func": 0.6938442587852478, "step": 132 }, { "completion_length": 189.3333396911621, "epoch": 0.3722883135059482, "grad_norm": 0.890625, "kl": 0.0106048583984375, "learning_rate": 3.955572670673486e-06, "loss": 0.0011, "reward": 0.6352255120873451, "reward_std": 0.16060136631131172, "rewards/check_gptzero_func": 0.6352255120873451, "step": 133 }, { "completion_length": 180.3690528869629, "epoch": 0.37508747375787266, "grad_norm": 0.93359375, "kl": 0.0124359130859375, "learning_rate": 3.935610812418207e-06, "loss": 0.0012, "reward": 0.6548151075839996, "reward_std": 0.20537016168236732, "rewards/check_gptzero_func": 0.6548151075839996, "step": 134 }, { "completion_length": 167.08333587646484, "epoch": 0.37788663400979705, "grad_norm": 1.15625, "kl": 0.01214599609375, "learning_rate": 3.915511447755793e-06, "loss": 0.0012, "reward": 0.6178692057728767, "reward_std": 0.17171020805835724, "rewards/check_gptzero_func": 0.6178692057728767, "step": 135 }, { "completion_length": 185.71429061889648, "epoch": 0.3806857942617215, "grad_norm": 0.85546875, "kl": 0.0107269287109375, "learning_rate": 3.895276501853846e-06, "loss": 0.0011, "reward": 0.5686581507325172, "reward_std": 0.17122036777436733, "rewards/check_gptzero_func": 0.5686581507325172, "step": 136 }, { "completion_length": 195.21429061889648, "epoch": 0.3834849545136459, "grad_norm": 0.875, "kl": 0.0097503662109375, "learning_rate": 3.8749079128662715e-06, "loss": 0.001, "reward": 0.6965835765004158, "reward_std": 0.11745740473270416, "rewards/check_gptzero_func": 0.6965835765004158, "step": 137 }, { "completion_length": 192.04762649536133, "epoch": 0.38628411476557034, "grad_norm": 0.84375, "kl": 0.01324462890625, "learning_rate": 3.854407631747653e-06, "loss": 0.0013, "reward": 0.6779208928346634, "reward_std": 0.17420672625303268, "rewards/check_gptzero_func": 0.6779208928346634, "step": 138 }, { "completion_length": 186.09524154663086, "epoch": 0.38908327501749473, "grad_norm": 0.8671875, "kl": 0.0122528076171875, "learning_rate": 3.833777622066374e-06, "loss": 0.0012, "reward": 0.6140344738960266, "reward_std": 0.1967415027320385, "rewards/check_gptzero_func": 0.6140344738960266, "step": 139 }, { "completion_length": 188.20238494873047, "epoch": 0.3918824352694192, "grad_norm": 0.83203125, "kl": 0.0146331787109375, "learning_rate": 3.8130198598165447e-06, "loss": 0.0015, "reward": 0.7608011960983276, "reward_std": 0.1561479065567255, "rewards/check_gptzero_func": 0.7608011960983276, "step": 140 }, { "completion_length": 197.88095092773438, "epoch": 0.3946815955213436, "grad_norm": 0.8515625, "kl": 0.0110931396484375, "learning_rate": 3.7921363332287354e-06, "loss": 0.0011, "reward": 0.6567741185426712, "reward_std": 0.186597790569067, "rewards/check_gptzero_func": 0.6567741185426712, "step": 141 }, { "completion_length": 191.01190567016602, "epoch": 0.397480755773268, "grad_norm": 0.87109375, "kl": 0.01143646240234375, "learning_rate": 3.7711290425795453e-06, "loss": 0.0011, "reward": 0.769344687461853, "reward_std": 0.14905713684856892, "rewards/check_gptzero_func": 0.769344687461853, "step": 142 }, { "completion_length": 195.35714721679688, "epoch": 0.4002799160251924, "grad_norm": 0.91015625, "kl": 0.01220703125, "learning_rate": 3.7500000000000005e-06, "loss": 0.0012, "reward": 0.5832869336009026, "reward_std": 0.17275189980864525, "rewards/check_gptzero_func": 0.5832869336009026, "step": 143 }, { "completion_length": 187.6428565979004, "epoch": 0.40307907627711687, "grad_norm": 0.87890625, "kl": 0.0144500732421875, "learning_rate": 3.7287512292828364e-06, "loss": 0.0014, "reward": 0.6654903590679169, "reward_std": 0.12800591439008713, "rewards/check_gptzero_func": 0.6654903590679169, "step": 144 }, { "completion_length": 182.00000381469727, "epoch": 0.4058782365290413, "grad_norm": 0.87890625, "kl": 0.014068603515625, "learning_rate": 3.707384765688649e-06, "loss": 0.0014, "reward": 0.6202088594436646, "reward_std": 0.1452749650925398, "rewards/check_gptzero_func": 0.6202088594436646, "step": 145 }, { "completion_length": 183.88095474243164, "epoch": 0.4086773967809657, "grad_norm": 1.015625, "kl": 0.013336181640625, "learning_rate": 3.6859026557509525e-06, "loss": 0.0013, "reward": 0.6348527073860168, "reward_std": 0.1373548824340105, "rewards/check_gptzero_func": 0.6348527073860168, "step": 146 }, { "completion_length": 194.55952835083008, "epoch": 0.41147655703289016, "grad_norm": 1.1328125, "kl": 0.0120391845703125, "learning_rate": 3.6643069570801593e-06, "loss": 0.0012, "reward": 0.6138034015893936, "reward_std": 0.2177984118461609, "rewards/check_gptzero_func": 0.6138034015893936, "step": 147 }, { "completion_length": 203.25, "epoch": 0.41427571728481455, "grad_norm": 0.75, "kl": 0.0098419189453125, "learning_rate": 3.6425997381664955e-06, "loss": 0.001, "reward": 0.6724039763212204, "reward_std": 0.14088603854179382, "rewards/check_gptzero_func": 0.6724039763212204, "step": 148 }, { "completion_length": 180.29762268066406, "epoch": 0.417074877536739, "grad_norm": 1.078125, "kl": 0.0143280029296875, "learning_rate": 3.6207830781818753e-06, "loss": 0.0014, "reward": 0.6434099301695824, "reward_std": 0.1766284443438053, "rewards/check_gptzero_func": 0.6434099301695824, "step": 149 }, { "completion_length": 196.88095474243164, "epoch": 0.4198740377886634, "grad_norm": 0.796875, "kl": 0.0113067626953125, "learning_rate": 3.5988590667807542e-06, "loss": 0.0011, "reward": 0.6718230247497559, "reward_std": 0.13120126724243164, "rewards/check_gptzero_func": 0.6718230247497559, "step": 150 }, { "completion_length": 205.48809814453125, "epoch": 0.42267319804058784, "grad_norm": 0.8359375, "kl": 0.0093536376953125, "learning_rate": 3.576829803899976e-06, "loss": 0.0009, "reward": 0.6097076088190079, "reward_std": 0.14795276708900928, "rewards/check_gptzero_func": 0.6097076088190079, "step": 151 }, { "completion_length": 171.79762649536133, "epoch": 0.42547235829251223, "grad_norm": 1.1484375, "kl": 0.014373779296875, "learning_rate": 3.554697399557634e-06, "loss": 0.0014, "reward": 0.6360819041728973, "reward_std": 0.16829469613730907, "rewards/check_gptzero_func": 0.6360819041728973, "step": 152 }, { "completion_length": 185.02381134033203, "epoch": 0.4282715185444367, "grad_norm": 0.90234375, "kl": 0.01239013671875, "learning_rate": 3.532463973650971e-06, "loss": 0.0012, "reward": 0.4822230823338032, "reward_std": 0.1834505433216691, "rewards/check_gptzero_func": 0.4822230823338032, "step": 153 }, { "completion_length": 194.2738151550293, "epoch": 0.4310706787963611, "grad_norm": 0.84375, "kl": 0.0112152099609375, "learning_rate": 3.5101316557533293e-06, "loss": 0.0011, "reward": 0.6571109592914581, "reward_std": 0.17676730267703533, "rewards/check_gptzero_func": 0.6571109592914581, "step": 154 }, { "completion_length": 185.50000381469727, "epoch": 0.4338698390482855, "grad_norm": 0.921875, "kl": 0.0124969482421875, "learning_rate": 3.487702584910172e-06, "loss": 0.0013, "reward": 0.6433713883161545, "reward_std": 0.18067739717662334, "rewards/check_gptzero_func": 0.6433713883161545, "step": 155 }, { "completion_length": 205.00000381469727, "epoch": 0.4366689993002099, "grad_norm": 0.859375, "kl": 0.00884246826171875, "learning_rate": 3.4651789094342043e-06, "loss": 0.0009, "reward": 0.7346427142620087, "reward_std": 0.15348245482891798, "rewards/check_gptzero_func": 0.7346427142620087, "step": 156 }, { "completion_length": 174.3690528869629, "epoch": 0.43946815955213436, "grad_norm": 1.0859375, "kl": 0.01556396484375, "learning_rate": 3.4425627866996003e-06, "loss": 0.0016, "reward": 0.6470814943313599, "reward_std": 0.15813233144581318, "rewards/check_gptzero_func": 0.6470814943313599, "step": 157 }, { "completion_length": 179.40476608276367, "epoch": 0.44226731980405876, "grad_norm": 0.89453125, "kl": 0.012420654296875, "learning_rate": 3.4198563829353624e-06, "loss": 0.0012, "reward": 0.6753295511007309, "reward_std": 0.19164511188864708, "rewards/check_gptzero_func": 0.6753295511007309, "step": 158 }, { "completion_length": 196.00000381469727, "epoch": 0.4450664800559832, "grad_norm": 0.83984375, "kl": 0.0102386474609375, "learning_rate": 3.39706187301784e-06, "loss": 0.001, "reward": 0.6890220493078232, "reward_std": 0.11566946748644114, "rewards/check_gptzero_func": 0.6890220493078232, "step": 159 }, { "completion_length": 176.21428680419922, "epoch": 0.44786564030790765, "grad_norm": 0.99609375, "kl": 0.0136871337890625, "learning_rate": 3.3741814402624094e-06, "loss": 0.0014, "reward": 0.5286017879843712, "reward_std": 0.22522129118442535, "rewards/check_gptzero_func": 0.5286017879843712, "step": 160 }, { "completion_length": 194.3571434020996, "epoch": 0.45066480055983205, "grad_norm": 0.81640625, "kl": 0.0122833251953125, "learning_rate": 3.351217276214351e-06, "loss": 0.0012, "reward": 0.4715605303645134, "reward_std": 0.18703988194465637, "rewards/check_gptzero_func": 0.4715605303645134, "step": 161 }, { "completion_length": 178.90476608276367, "epoch": 0.4534639608117565, "grad_norm": 1.0078125, "kl": 0.012786865234375, "learning_rate": 3.32817158043894e-06, "loss": 0.0013, "reward": 0.5694275945425034, "reward_std": 0.24036183580756187, "rewards/check_gptzero_func": 0.5694275945425034, "step": 162 }, { "completion_length": 183.09524536132812, "epoch": 0.4562631210636809, "grad_norm": 0.98046875, "kl": 0.0117034912109375, "learning_rate": 3.305046560310766e-06, "loss": 0.0012, "reward": 0.7210999131202698, "reward_std": 0.1915903128683567, "rewards/check_gptzero_func": 0.7210999131202698, "step": 163 }, { "completion_length": 207.96428680419922, "epoch": 0.45906228131560534, "grad_norm": 0.84375, "kl": 0.00853729248046875, "learning_rate": 3.2818444308023e-06, "loss": 0.0009, "reward": 0.5639151483774185, "reward_std": 0.18663722090423107, "rewards/check_gptzero_func": 0.5639151483774185, "step": 164 }, { "completion_length": 195.96429061889648, "epoch": 0.46186144156752973, "grad_norm": 1.078125, "kl": 0.0107574462890625, "learning_rate": 3.2585674142717483e-06, "loss": 0.0011, "reward": 0.6154336631298065, "reward_std": 0.19948378019034863, "rewards/check_gptzero_func": 0.6154336631298065, "step": 165 }, { "completion_length": 196.94047927856445, "epoch": 0.4646606018194542, "grad_norm": 0.86328125, "kl": 0.012969970703125, "learning_rate": 3.2352177402501813e-06, "loss": 0.0013, "reward": 0.5297554209828377, "reward_std": 0.16210689023137093, "rewards/check_gptzero_func": 0.5297554209828377, "step": 166 }, { "completion_length": 181.3333396911621, "epoch": 0.46745976207137857, "grad_norm": 0.9609375, "kl": 0.010589599609375, "learning_rate": 3.2117976452279854e-06, "loss": 0.0011, "reward": 0.6036887094378471, "reward_std": 0.21627848595380783, "rewards/check_gptzero_func": 0.6036887094378471, "step": 167 }, { "completion_length": 198.38095474243164, "epoch": 0.470258922323303, "grad_norm": 0.875, "kl": 0.0103607177734375, "learning_rate": 3.18830937244065e-06, "loss": 0.001, "reward": 0.6530143320560455, "reward_std": 0.18831264041364193, "rewards/check_gptzero_func": 0.6530143320560455, "step": 168 }, { "completion_length": 192.27381134033203, "epoch": 0.4730580825752274, "grad_norm": 0.86328125, "kl": 0.01263427734375, "learning_rate": 3.1647551716539004e-06, "loss": 0.0013, "reward": 0.6262907981872559, "reward_std": 0.11849029827862978, "rewards/check_gptzero_func": 0.6262907981872559, "step": 169 }, { "completion_length": 203.8333396911621, "epoch": 0.47585724282715186, "grad_norm": 0.76171875, "kl": 0.0087738037109375, "learning_rate": 3.1411372989482105e-06, "loss": 0.0009, "reward": 0.6544186323881149, "reward_std": 0.13314771838486195, "rewards/check_gptzero_func": 0.6544186323881149, "step": 170 }, { "completion_length": 172.25000381469727, "epoch": 0.47865640307907625, "grad_norm": 1.0390625, "kl": 0.01348876953125, "learning_rate": 3.1174580165027106e-06, "loss": 0.0014, "reward": 0.7334302663803101, "reward_std": 0.19380612671375275, "rewards/check_gptzero_func": 0.7334302663803101, "step": 171 }, { "completion_length": 185.2738151550293, "epoch": 0.4814555633310007, "grad_norm": 0.9453125, "kl": 0.0138092041015625, "learning_rate": 3.0937195923785124e-06, "loss": 0.0014, "reward": 0.6392181292176247, "reward_std": 0.20777087286114693, "rewards/check_gptzero_func": 0.6392181292176247, "step": 172 }, { "completion_length": 192.71429061889648, "epoch": 0.4842547235829251, "grad_norm": 0.9453125, "kl": 0.01061248779296875, "learning_rate": 3.069924300301463e-06, "loss": 0.0011, "reward": 0.6806470304727554, "reward_std": 0.21131999045610428, "rewards/check_gptzero_func": 0.6806470304727554, "step": 173 }, { "completion_length": 179.77380752563477, "epoch": 0.48705388383484954, "grad_norm": 0.83984375, "kl": 0.0126800537109375, "learning_rate": 3.0460744194443658e-06, "loss": 0.0013, "reward": 0.47618968039751053, "reward_std": 0.17172732576727867, "rewards/check_gptzero_func": 0.47618968039751053, "step": 174 }, { "completion_length": 181.53571701049805, "epoch": 0.489853044086774, "grad_norm": 0.94921875, "kl": 0.0154266357421875, "learning_rate": 3.0221722342086762e-06, "loss": 0.0015, "reward": 0.7085084468126297, "reward_std": 0.17813345789909363, "rewards/check_gptzero_func": 0.7085084468126297, "step": 175 }, { "completion_length": 196.54762268066406, "epoch": 0.4926522043386984, "grad_norm": 0.875, "kl": 0.00936126708984375, "learning_rate": 2.9982200340056916e-06, "loss": 0.0009, "reward": 0.6359190493822098, "reward_std": 0.16200686059892178, "rewards/check_gptzero_func": 0.6359190493822098, "step": 176 }, { "completion_length": 192.2857208251953, "epoch": 0.49545136459062283, "grad_norm": 0.83984375, "kl": 0.010528564453125, "learning_rate": 2.9742201130372693e-06, "loss": 0.0011, "reward": 0.6544022858142853, "reward_std": 0.20328444987535477, "rewards/check_gptzero_func": 0.6544022858142853, "step": 177 }, { "completion_length": 182.63095474243164, "epoch": 0.4982505248425472, "grad_norm": 0.86328125, "kl": 0.0142669677734375, "learning_rate": 2.9501747700760834e-06, "loss": 0.0014, "reward": 0.5758941918611526, "reward_std": 0.1651664450764656, "rewards/check_gptzero_func": 0.5758941918611526, "step": 178 }, { "completion_length": 192.67857360839844, "epoch": 0.5010496850944717, "grad_norm": 0.85546875, "kl": 0.01207733154296875, "learning_rate": 2.9260863082454377e-06, "loss": 0.0012, "reward": 0.7050619274377823, "reward_std": 0.18743818067014217, "rewards/check_gptzero_func": 0.7050619274377823, "step": 179 }, { "completion_length": 181.1547622680664, "epoch": 0.5038488453463961, "grad_norm": 0.89453125, "kl": 0.0139617919921875, "learning_rate": 2.901957034798671e-06, "loss": 0.0014, "reward": 0.6579191908240318, "reward_std": 0.23193923011422157, "rewards/check_gptzero_func": 0.6579191908240318, "step": 180 }, { "completion_length": 198.88095474243164, "epoch": 0.5066480055983205, "grad_norm": 0.87890625, "kl": 0.0117340087890625, "learning_rate": 2.8777892608981605e-06, "loss": 0.0012, "reward": 0.7810440808534622, "reward_std": 0.14343063719570637, "rewards/check_gptzero_func": 0.7810440808534622, "step": 181 }, { "completion_length": 183.89286041259766, "epoch": 0.509447165850245, "grad_norm": 0.83203125, "kl": 0.0113983154296875, "learning_rate": 2.853585301393954e-06, "loss": 0.0012, "reward": 0.5110857635736465, "reward_std": 0.17657889798283577, "rewards/check_gptzero_func": 0.5110857635736465, "step": 182 }, { "completion_length": 183.53571701049805, "epoch": 0.5122463261021694, "grad_norm": 1.046875, "kl": 0.0143585205078125, "learning_rate": 2.829347474602047e-06, "loss": 0.0014, "reward": 0.7601535469293594, "reward_std": 0.13078506011515856, "rewards/check_gptzero_func": 0.7601535469293594, "step": 183 }, { "completion_length": 191.05952835083008, "epoch": 0.5150454863540938, "grad_norm": 1.0390625, "kl": 0.01151275634765625, "learning_rate": 2.80507810208233e-06, "loss": 0.0012, "reward": 0.7207075506448746, "reward_std": 0.23219925537705421, "rewards/check_gptzero_func": 0.7207075506448746, "step": 184 }, { "completion_length": 174.4761962890625, "epoch": 0.5178446466060181, "grad_norm": 1.0, "kl": 0.0149383544921875, "learning_rate": 2.780779508416219e-06, "loss": 0.0015, "reward": 0.7076038122177124, "reward_std": 0.15643260441720486, "rewards/check_gptzero_func": 0.7076038122177124, "step": 185 }, { "completion_length": 186.65476608276367, "epoch": 0.5206438068579426, "grad_norm": 0.87890625, "kl": 0.0148773193359375, "learning_rate": 2.756454020984009e-06, "loss": 0.0015, "reward": 0.6423147022724152, "reward_std": 0.12505882722325623, "rewards/check_gptzero_func": 0.6423147022724152, "step": 186 }, { "completion_length": 197.96429061889648, "epoch": 0.523442967109867, "grad_norm": 0.84375, "kl": 0.010894775390625, "learning_rate": 2.7321039697419453e-06, "loss": 0.0011, "reward": 0.5366896614432335, "reward_std": 0.1639111079275608, "rewards/check_gptzero_func": 0.5366896614432335, "step": 187 }, { "completion_length": 180.73810195922852, "epoch": 0.5262421273617914, "grad_norm": 0.89453125, "kl": 0.0135650634765625, "learning_rate": 2.707731686999056e-06, "loss": 0.0014, "reward": 0.7638429999351501, "reward_std": 0.11215963400900364, "rewards/check_gptzero_func": 0.7638429999351501, "step": 188 }, { "completion_length": 177.96429061889648, "epoch": 0.5290412876137159, "grad_norm": 0.90625, "kl": 0.0150146484375, "learning_rate": 2.68333950719376e-06, "loss": 0.0015, "reward": 0.7288801521062851, "reward_std": 0.13601511158049107, "rewards/check_gptzero_func": 0.7288801521062851, "step": 189 }, { "completion_length": 197.54762268066406, "epoch": 0.5318404478656403, "grad_norm": 0.80859375, "kl": 0.0102691650390625, "learning_rate": 2.658929766670266e-06, "loss": 0.0011, "reward": 0.5935259684920311, "reward_std": 0.12766608223319054, "rewards/check_gptzero_func": 0.5935259684920311, "step": 190 }, { "completion_length": 189.6547622680664, "epoch": 0.5346396081175647, "grad_norm": 0.859375, "kl": 0.0122833251953125, "learning_rate": 2.63450480345479e-06, "loss": 0.0012, "reward": 0.7497572600841522, "reward_std": 0.13245987240225077, "rewards/check_gptzero_func": 0.7497572600841522, "step": 191 }, { "completion_length": 180.51190948486328, "epoch": 0.5374387683694891, "grad_norm": 0.91015625, "kl": 0.0144500732421875, "learning_rate": 2.6100669570316194e-06, "loss": 0.0014, "reward": 0.7178633213043213, "reward_std": 0.1744341142475605, "rewards/check_gptzero_func": 0.7178633213043213, "step": 192 }, { "completion_length": 195.26190567016602, "epoch": 0.5402379286214136, "grad_norm": 0.81640625, "kl": 0.012542724609375, "learning_rate": 2.585618568119027e-06, "loss": 0.0013, "reward": 0.6315608844161034, "reward_std": 0.14562865998595953, "rewards/check_gptzero_func": 0.6315608844161034, "step": 193 }, { "completion_length": 190.47619247436523, "epoch": 0.543037088873338, "grad_norm": 0.94140625, "kl": 0.0131378173828125, "learning_rate": 2.561161978445068e-06, "loss": 0.0013, "reward": 0.65364570915699, "reward_std": 0.14526648819446564, "rewards/check_gptzero_func": 0.65364570915699, "step": 194 }, { "completion_length": 160.94047927856445, "epoch": 0.5458362491252624, "grad_norm": 1.0234375, "kl": 0.020477294921875, "learning_rate": 2.536699530523292e-06, "loss": 0.0021, "reward": 0.7076306045055389, "reward_std": 0.15630067139863968, "rewards/check_gptzero_func": 0.7076306045055389, "step": 195 }, { "completion_length": 184.70238494873047, "epoch": 0.5486354093771868, "grad_norm": 0.94921875, "kl": 0.0129241943359375, "learning_rate": 2.5122335674283625e-06, "loss": 0.0013, "reward": 0.5801831930875778, "reward_std": 0.18576505780220032, "rewards/check_gptzero_func": 0.5801831930875778, "step": 196 }, { "completion_length": 186.60714721679688, "epoch": 0.5514345696291113, "grad_norm": 0.87890625, "kl": 0.0130615234375, "learning_rate": 2.4877664325716383e-06, "loss": 0.0013, "reward": 0.7721930146217346, "reward_std": 0.1962270326912403, "rewards/check_gptzero_func": 0.7721930146217346, "step": 197 }, { "completion_length": 182.42857360839844, "epoch": 0.5542337298810357, "grad_norm": 0.9765625, "kl": 0.01416015625, "learning_rate": 2.463300469476709e-06, "loss": 0.0014, "reward": 0.6073248982429504, "reward_std": 0.17869799211621284, "rewards/check_gptzero_func": 0.6073248982429504, "step": 198 }, { "completion_length": 178.32143020629883, "epoch": 0.5570328901329601, "grad_norm": 1.0546875, "kl": 0.01686859130859375, "learning_rate": 2.4388380215549332e-06, "loss": 0.0017, "reward": 0.670776292681694, "reward_std": 0.18726542592048645, "rewards/check_gptzero_func": 0.670776292681694, "step": 199 }, { "completion_length": 193.4047622680664, "epoch": 0.5598320503848845, "grad_norm": 0.8046875, "kl": 0.0124969482421875, "learning_rate": 2.414381431880974e-06, "loss": 0.0013, "reward": 0.5982818156480789, "reward_std": 0.16670218110084534, "rewards/check_gptzero_func": 0.5982818156480789, "step": 200 }, { "completion_length": 190.5833396911621, "epoch": 0.562631210636809, "grad_norm": 0.8046875, "kl": 0.0111846923828125, "learning_rate": 2.389933042968381e-06, "loss": 0.0011, "reward": 0.7207561880350113, "reward_std": 0.1551688564941287, "rewards/check_gptzero_func": 0.7207561880350113, "step": 201 }, { "completion_length": 183.8690528869629, "epoch": 0.5654303708887334, "grad_norm": 0.859375, "kl": 0.014251708984375, "learning_rate": 2.365495196545211e-06, "loss": 0.0014, "reward": 0.6653933525085449, "reward_std": 0.18065885081887245, "rewards/check_gptzero_func": 0.6653933525085449, "step": 202 }, { "completion_length": 191.30952835083008, "epoch": 0.5682295311406578, "grad_norm": 0.8828125, "kl": 0.0137786865234375, "learning_rate": 2.3410702333297358e-06, "loss": 0.0014, "reward": 0.7060705721378326, "reward_std": 0.14498403668403625, "rewards/check_gptzero_func": 0.7060705721378326, "step": 203 }, { "completion_length": 170.05952835083008, "epoch": 0.5710286913925823, "grad_norm": 0.88671875, "kl": 0.0150909423828125, "learning_rate": 2.3166604928062407e-06, "loss": 0.0015, "reward": 0.6380977034568787, "reward_std": 0.14762726612389088, "rewards/check_gptzero_func": 0.6380977034568787, "step": 204 }, { "completion_length": 187.05952835083008, "epoch": 0.5738278516445067, "grad_norm": 0.82421875, "kl": 0.0121002197265625, "learning_rate": 2.292268313000945e-06, "loss": 0.0012, "reward": 0.7264238968491554, "reward_std": 0.12679270654916763, "rewards/check_gptzero_func": 0.7264238968491554, "step": 205 }, { "completion_length": 183.28571701049805, "epoch": 0.5766270118964311, "grad_norm": 0.8828125, "kl": 0.0143280029296875, "learning_rate": 2.267896030258056e-06, "loss": 0.0014, "reward": 0.6818206459283829, "reward_std": 0.1636413224041462, "rewards/check_gptzero_func": 0.6818206459283829, "step": 206 }, { "completion_length": 177.84524154663086, "epoch": 0.5794261721483555, "grad_norm": 0.8984375, "kl": 0.0160980224609375, "learning_rate": 2.243545979015992e-06, "loss": 0.0016, "reward": 0.5764013379812241, "reward_std": 0.17973252199590206, "rewards/check_gptzero_func": 0.5764013379812241, "step": 207 }, { "completion_length": 189.97619247436523, "epoch": 0.58222533240028, "grad_norm": 0.7734375, "kl": 0.010650634765625, "learning_rate": 2.219220491583782e-06, "loss": 0.0011, "reward": 0.6200987994670868, "reward_std": 0.15654520690441132, "rewards/check_gptzero_func": 0.6200987994670868, "step": 208 }, { "completion_length": 174.96429061889648, "epoch": 0.5850244926522044, "grad_norm": 0.9609375, "kl": 0.0125885009765625, "learning_rate": 2.1949218979176718e-06, "loss": 0.0013, "reward": 0.7681840658187866, "reward_std": 0.17539203353226185, "rewards/check_gptzero_func": 0.7681840658187866, "step": 209 }, { "completion_length": 160.4047622680664, "epoch": 0.5878236529041287, "grad_norm": 1.0546875, "kl": 0.016204833984375, "learning_rate": 2.1706525253979533e-06, "loss": 0.0016, "reward": 0.6341628283262253, "reward_std": 0.17086376622319221, "rewards/check_gptzero_func": 0.6341628283262253, "step": 210 }, { "completion_length": 184.96429061889648, "epoch": 0.5906228131560531, "grad_norm": 1.1015625, "kl": 0.012847900390625, "learning_rate": 2.146414698606047e-06, "loss": 0.0013, "reward": 0.6099446341395378, "reward_std": 0.26053616404533386, "rewards/check_gptzero_func": 0.6099446341395378, "step": 211 }, { "completion_length": 173.38095474243164, "epoch": 0.5934219734079776, "grad_norm": 1.0234375, "kl": 0.0145111083984375, "learning_rate": 2.1222107391018403e-06, "loss": 0.0015, "reward": 0.6794377863407135, "reward_std": 0.1412256360054016, "rewards/check_gptzero_func": 0.6794377863407135, "step": 212 }, { "completion_length": 190.47619247436523, "epoch": 0.596221133659902, "grad_norm": 0.87890625, "kl": 0.013580322265625, "learning_rate": 2.09804296520133e-06, "loss": 0.0014, "reward": 0.5798576474189758, "reward_std": 0.1953704133629799, "rewards/check_gptzero_func": 0.5798576474189758, "step": 213 }, { "completion_length": 199.23809814453125, "epoch": 0.5990202939118264, "grad_norm": 0.890625, "kl": 0.010894775390625, "learning_rate": 2.0739136917545636e-06, "loss": 0.0011, "reward": 0.5883476734161377, "reward_std": 0.19741847924888134, "rewards/check_gptzero_func": 0.5883476734161377, "step": 214 }, { "completion_length": 194.63095474243164, "epoch": 0.6018194541637508, "grad_norm": 0.796875, "kl": 0.0112457275390625, "learning_rate": 2.0498252299239175e-06, "loss": 0.0011, "reward": 0.739928811788559, "reward_std": 0.17222343757748604, "rewards/check_gptzero_func": 0.739928811788559, "step": 215 }, { "completion_length": 181.78571701049805, "epoch": 0.6046186144156753, "grad_norm": 0.90234375, "kl": 0.0135040283203125, "learning_rate": 2.025779886962731e-06, "loss": 0.0014, "reward": 0.7212615758180618, "reward_std": 0.11260060407221317, "rewards/check_gptzero_func": 0.7212615758180618, "step": 216 }, { "completion_length": 202.21428680419922, "epoch": 0.6074177746675997, "grad_norm": 0.8671875, "kl": 0.0114288330078125, "learning_rate": 2.00177996599431e-06, "loss": 0.0012, "reward": 0.6235032379627228, "reward_std": 0.20338322408497334, "rewards/check_gptzero_func": 0.6235032379627228, "step": 217 }, { "completion_length": 164.75000381469727, "epoch": 0.6102169349195241, "grad_norm": 0.90625, "kl": 0.0148773193359375, "learning_rate": 1.9778277657913246e-06, "loss": 0.0015, "reward": 0.8035698980093002, "reward_std": 0.12413663975894451, "rewards/check_gptzero_func": 0.8035698980093002, "step": 218 }, { "completion_length": 180.75000381469727, "epoch": 0.6130160951714486, "grad_norm": 0.9296875, "kl": 0.01385498046875, "learning_rate": 1.9539255805556346e-06, "loss": 0.0014, "reward": 0.6889385357499123, "reward_std": 0.13095776550471783, "rewards/check_gptzero_func": 0.6889385357499123, "step": 219 }, { "completion_length": 193.34524536132812, "epoch": 0.615815255423373, "grad_norm": 0.9296875, "kl": 0.0128631591796875, "learning_rate": 1.9300756996985383e-06, "loss": 0.0013, "reward": 0.6453644558787346, "reward_std": 0.1899284292012453, "rewards/check_gptzero_func": 0.6453644558787346, "step": 220 }, { "completion_length": 187.0714340209961, "epoch": 0.6186144156752974, "grad_norm": 0.90625, "kl": 0.011749267578125, "learning_rate": 1.9062804076214889e-06, "loss": 0.0012, "reward": 0.8237078785896301, "reward_std": 0.10535579361021519, "rewards/check_gptzero_func": 0.8237078785896301, "step": 221 }, { "completion_length": 203.67856979370117, "epoch": 0.6214135759272218, "grad_norm": 0.890625, "kl": 0.00980377197265625, "learning_rate": 1.8825419834972902e-06, "loss": 0.001, "reward": 0.5983466356992722, "reward_std": 0.22100866585969925, "rewards/check_gptzero_func": 0.5983466356992722, "step": 222 }, { "completion_length": 173.16666793823242, "epoch": 0.6242127361791463, "grad_norm": 0.93359375, "kl": 0.0145721435546875, "learning_rate": 1.8588627010517912e-06, "loss": 0.0015, "reward": 0.6490365564823151, "reward_std": 0.215117909014225, "rewards/check_gptzero_func": 0.6490365564823151, "step": 223 }, { "completion_length": 188.9166717529297, "epoch": 0.6270118964310707, "grad_norm": 0.8515625, "kl": 0.01177978515625, "learning_rate": 1.835244828346101e-06, "loss": 0.0012, "reward": 0.6734350174665451, "reward_std": 0.17084914818406105, "rewards/check_gptzero_func": 0.6734350174665451, "step": 224 }, { "completion_length": 176.91666793823242, "epoch": 0.6298110566829951, "grad_norm": 1.078125, "kl": 0.0153656005859375, "learning_rate": 1.811690627559351e-06, "loss": 0.0015, "reward": 0.7332676947116852, "reward_std": 0.2090182527899742, "rewards/check_gptzero_func": 0.7332676947116852, "step": 225 }, { "completion_length": 201.5357208251953, "epoch": 0.6326102169349195, "grad_norm": 0.78125, "kl": 0.0106964111328125, "learning_rate": 1.7882023547720156e-06, "loss": 0.0011, "reward": 0.5684466883540154, "reward_std": 0.19617649912834167, "rewards/check_gptzero_func": 0.5684466883540154, "step": 226 }, { "completion_length": 193.35714721679688, "epoch": 0.635409377186844, "grad_norm": 0.80078125, "kl": 0.01116943359375, "learning_rate": 1.7647822597498204e-06, "loss": 0.0011, "reward": 0.6939697265625, "reward_std": 0.08986328635364771, "rewards/check_gptzero_func": 0.6939697265625, "step": 227 }, { "completion_length": 169.85714721679688, "epoch": 0.6382085374387684, "grad_norm": 1.0078125, "kl": 0.0150146484375, "learning_rate": 1.7414325857282528e-06, "loss": 0.0015, "reward": 0.6364180445671082, "reward_std": 0.211682990193367, "rewards/check_gptzero_func": 0.6364180445671082, "step": 228 }, { "completion_length": 206.7023811340332, "epoch": 0.6410076976906928, "grad_norm": 0.79296875, "kl": 0.0116424560546875, "learning_rate": 1.718155569197701e-06, "loss": 0.0012, "reward": 0.6597686931490898, "reward_std": 0.14480283856391907, "rewards/check_gptzero_func": 0.6597686931490898, "step": 229 }, { "completion_length": 196.5238151550293, "epoch": 0.6438068579426172, "grad_norm": 0.91796875, "kl": 0.01229095458984375, "learning_rate": 1.6949534396892358e-06, "loss": 0.0013, "reward": 0.7675946801900864, "reward_std": 0.08783328998833895, "rewards/check_gptzero_func": 0.7675946801900864, "step": 230 }, { "completion_length": 185.5238151550293, "epoch": 0.6466060181945417, "grad_norm": 0.88671875, "kl": 0.0150909423828125, "learning_rate": 1.6718284195610607e-06, "loss": 0.0015, "reward": 0.5960735529661179, "reward_std": 0.17222833260893822, "rewards/check_gptzero_func": 0.5960735529661179, "step": 231 }, { "completion_length": 202.67857360839844, "epoch": 0.6494051784464661, "grad_norm": 0.859375, "kl": 0.0126190185546875, "learning_rate": 1.6487827237856503e-06, "loss": 0.0013, "reward": 0.6651804447174072, "reward_std": 0.19576009269803762, "rewards/check_gptzero_func": 0.6651804447174072, "step": 232 }, { "completion_length": 193.60714721679688, "epoch": 0.6522043386983905, "grad_norm": 0.83984375, "kl": 0.00897216796875, "learning_rate": 1.6258185597375919e-06, "loss": 0.0009, "reward": 0.6716840863227844, "reward_std": 0.14614208973944187, "rewards/check_gptzero_func": 0.6716840863227844, "step": 233 }, { "completion_length": 178.55952835083008, "epoch": 0.655003498950315, "grad_norm": 0.9921875, "kl": 0.0128173828125, "learning_rate": 1.6029381269821607e-06, "loss": 0.0013, "reward": 0.8011642247438431, "reward_std": 0.17047418653964996, "rewards/check_gptzero_func": 0.8011642247438431, "step": 234 }, { "completion_length": 184.25000381469727, "epoch": 0.6578026592022393, "grad_norm": 0.9375, "kl": 0.0157012939453125, "learning_rate": 1.5801436170646386e-06, "loss": 0.0016, "reward": 0.7015040963888168, "reward_std": 0.12212707288563251, "rewards/check_gptzero_func": 0.7015040963888168, "step": 235 }, { "completion_length": 184.33333587646484, "epoch": 0.6606018194541637, "grad_norm": 0.89453125, "kl": 0.01324462890625, "learning_rate": 1.5574372133004012e-06, "loss": 0.0014, "reward": 0.8126765042543411, "reward_std": 0.1537869544699788, "rewards/check_gptzero_func": 0.8126765042543411, "step": 236 }, { "completion_length": 203.8095245361328, "epoch": 0.6634009797060881, "grad_norm": 0.87890625, "kl": 0.0106201171875, "learning_rate": 1.5348210905657962e-06, "loss": 0.0011, "reward": 0.7201628535985947, "reward_std": 0.164525730535388, "rewards/check_gptzero_func": 0.7201628535985947, "step": 237 }, { "completion_length": 182.16666793823242, "epoch": 0.6662001399580126, "grad_norm": 0.96875, "kl": 0.01495361328125, "learning_rate": 1.512297415089829e-06, "loss": 0.0015, "reward": 0.7377509474754333, "reward_std": 0.1474976148456335, "rewards/check_gptzero_func": 0.7377509474754333, "step": 238 }, { "completion_length": 171.9166717529297, "epoch": 0.668999300209937, "grad_norm": 0.91796875, "kl": 0.0146942138671875, "learning_rate": 1.4898683442466715e-06, "loss": 0.0015, "reward": 0.6876519098877907, "reward_std": 0.17211773619055748, "rewards/check_gptzero_func": 0.6876519098877907, "step": 239 }, { "completion_length": 200.85714721679688, "epoch": 0.6717984604618614, "grad_norm": 0.8671875, "kl": 0.010711669921875, "learning_rate": 1.4675360263490296e-06, "loss": 0.0011, "reward": 0.6152354925870895, "reward_std": 0.16585622262209654, "rewards/check_gptzero_func": 0.6152354925870895, "step": 240 }, { "completion_length": 188.75000381469727, "epoch": 0.6745976207137858, "grad_norm": 0.86328125, "kl": 0.0137176513671875, "learning_rate": 1.4453026004423664e-06, "loss": 0.0014, "reward": 0.6593173295259476, "reward_std": 0.1960100382566452, "rewards/check_gptzero_func": 0.6593173295259476, "step": 241 }, { "completion_length": 188.8214340209961, "epoch": 0.6773967809657103, "grad_norm": 0.80078125, "kl": 0.0121612548828125, "learning_rate": 1.4231701961000256e-06, "loss": 0.0012, "reward": 0.7077113464474678, "reward_std": 0.08986913226544857, "rewards/check_gptzero_func": 0.7077113464474678, "step": 242 }, { "completion_length": 192.89286041259766, "epoch": 0.6801959412176347, "grad_norm": 0.79296875, "kl": 0.01239013671875, "learning_rate": 1.4011409332192472e-06, "loss": 0.0012, "reward": 0.7247354537248611, "reward_std": 0.16528335958719254, "rewards/check_gptzero_func": 0.7247354537248611, "step": 243 }, { "completion_length": 197.1190528869629, "epoch": 0.6829951014695591, "grad_norm": 0.82421875, "kl": 0.01019287109375, "learning_rate": 1.379216921818126e-06, "loss": 0.001, "reward": 0.6609435975551605, "reward_std": 0.20204732194542885, "rewards/check_gptzero_func": 0.6609435975551605, "step": 244 }, { "completion_length": 184.35714721679688, "epoch": 0.6857942617214835, "grad_norm": 1.015625, "kl": 0.0119171142578125, "learning_rate": 1.3574002618335055e-06, "loss": 0.0012, "reward": 0.6553308963775635, "reward_std": 0.18806752562522888, "rewards/check_gptzero_func": 0.6553308963775635, "step": 245 }, { "completion_length": 189.16666793823242, "epoch": 0.688593421973408, "grad_norm": 0.859375, "kl": 0.01226043701171875, "learning_rate": 1.335693042919841e-06, "loss": 0.0012, "reward": 0.6279339641332626, "reward_std": 0.1844564937055111, "rewards/check_gptzero_func": 0.6279339641332626, "step": 246 }, { "completion_length": 192.04762268066406, "epoch": 0.6913925822253324, "grad_norm": 0.8359375, "kl": 0.01172637939453125, "learning_rate": 1.314097344249048e-06, "loss": 0.0012, "reward": 0.6242645084857941, "reward_std": 0.1471152976155281, "rewards/check_gptzero_func": 0.6242645084857941, "step": 247 }, { "completion_length": 191.33333587646484, "epoch": 0.6941917424772568, "grad_norm": 0.90234375, "kl": 0.0138397216796875, "learning_rate": 1.2926152343113525e-06, "loss": 0.0014, "reward": 0.5745993703603745, "reward_std": 0.15316335577517748, "rewards/check_gptzero_func": 0.5745993703603745, "step": 248 }, { "completion_length": 210.39286041259766, "epoch": 0.6969909027291813, "grad_norm": 0.8203125, "kl": 0.00994873046875, "learning_rate": 1.2712487707171645e-06, "loss": 0.001, "reward": 0.7345138937234879, "reward_std": 0.1424336303025484, "rewards/check_gptzero_func": 0.7345138937234879, "step": 249 }, { "completion_length": 191.95238494873047, "epoch": 0.6997900629811057, "grad_norm": 0.88671875, "kl": 0.0122833251953125, "learning_rate": 1.2500000000000007e-06, "loss": 0.0012, "reward": 0.755773201584816, "reward_std": 0.14831538125872612, "rewards/check_gptzero_func": 0.755773201584816, "step": 250 }, { "completion_length": 175.86904907226562, "epoch": 0.7025892232330301, "grad_norm": 1.09375, "kl": 0.01434326171875, "learning_rate": 1.2288709574204561e-06, "loss": 0.0014, "reward": 0.635523222386837, "reward_std": 0.21710951253771782, "rewards/check_gptzero_func": 0.635523222386837, "step": 251 }, { "completion_length": 191.80952835083008, "epoch": 0.7053883834849545, "grad_norm": 0.94140625, "kl": 0.013092041015625, "learning_rate": 1.2078636667712648e-06, "loss": 0.0013, "reward": 0.6860392540693283, "reward_std": 0.16151536628603935, "rewards/check_gptzero_func": 0.6860392540693283, "step": 252 }, { "completion_length": 211.09524536132812, "epoch": 0.708187543736879, "grad_norm": 0.80078125, "kl": 0.0114593505859375, "learning_rate": 1.1869801401834563e-06, "loss": 0.0012, "reward": 0.5822405442595482, "reward_std": 0.1631794534623623, "rewards/check_gptzero_func": 0.5822405442595482, "step": 253 }, { "completion_length": 198.07143020629883, "epoch": 0.7109867039888034, "grad_norm": 0.92578125, "kl": 0.0108795166015625, "learning_rate": 1.1662223779336272e-06, "loss": 0.0011, "reward": 0.6223282963037491, "reward_std": 0.23277926445007324, "rewards/check_gptzero_func": 0.6223282963037491, "step": 254 }, { "completion_length": 170.01190567016602, "epoch": 0.7137858642407278, "grad_norm": 0.796875, "kl": 0.0137481689453125, "learning_rate": 1.1455923682523476e-06, "loss": 0.0014, "reward": 0.7666629701852798, "reward_std": 0.12203127704560757, "rewards/check_gptzero_func": 0.7666629701852798, "step": 255 }, { "completion_length": 182.51190948486328, "epoch": 0.7165850244926522, "grad_norm": 0.9296875, "kl": 0.0144500732421875, "learning_rate": 1.1250920871337296e-06, "loss": 0.0014, "reward": 0.5818550065159798, "reward_std": 0.21111036837100983, "rewards/check_gptzero_func": 0.5818550065159798, "step": 256 }, { "completion_length": 178.8690528869629, "epoch": 0.7193841847445767, "grad_norm": 0.81640625, "kl": 0.013885498046875, "learning_rate": 1.104723498146156e-06, "loss": 0.0014, "reward": 0.5987424030900002, "reward_std": 0.21208756789565086, "rewards/check_gptzero_func": 0.5987424030900002, "step": 257 }, { "completion_length": 173.29762268066406, "epoch": 0.722183344996501, "grad_norm": 0.83984375, "kl": 0.0154876708984375, "learning_rate": 1.0844885522442076e-06, "loss": 0.0016, "reward": 0.7769442051649094, "reward_std": 0.09991182293742895, "rewards/check_gptzero_func": 0.7769442051649094, "step": 258 }, { "completion_length": 194.32143020629883, "epoch": 0.7249825052484254, "grad_norm": 0.8046875, "kl": 0.0103759765625, "learning_rate": 1.064389187581794e-06, "loss": 0.001, "reward": 0.6640415489673615, "reward_std": 0.09457994624972343, "rewards/check_gptzero_func": 0.6640415489673615, "step": 259 }, { "completion_length": 191.02381134033203, "epoch": 0.72778166550035, "grad_norm": 0.859375, "kl": 0.012786865234375, "learning_rate": 1.044427329326515e-06, "loss": 0.0013, "reward": 0.6643179804086685, "reward_std": 0.1974339596927166, "rewards/check_gptzero_func": 0.6643179804086685, "step": 260 }, { "completion_length": 182.0238151550293, "epoch": 0.7305808257522743, "grad_norm": 0.984375, "kl": 0.0133209228515625, "learning_rate": 1.024604889475259e-06, "loss": 0.0013, "reward": 0.7576928585767746, "reward_std": 0.17764172703027725, "rewards/check_gptzero_func": 0.7576928585767746, "step": 261 }, { "completion_length": 197.76190567016602, "epoch": 0.7333799860041987, "grad_norm": 0.79296875, "kl": 0.011962890625, "learning_rate": 1.0049237666710713e-06, "loss": 0.0012, "reward": 0.6300367414951324, "reward_std": 0.1478472277522087, "rewards/check_gptzero_func": 0.6300367414951324, "step": 262 }, { "completion_length": 203.90476608276367, "epoch": 0.7361791462561231, "grad_norm": 0.7734375, "kl": 0.00923919677734375, "learning_rate": 9.853858460212961e-07, "loss": 0.0009, "reward": 0.7148824632167816, "reward_std": 0.13644199073314667, "rewards/check_gptzero_func": 0.7148824632167816, "step": 263 }, { "completion_length": 161.45238494873047, "epoch": 0.7389783065080476, "grad_norm": 0.9453125, "kl": 0.0169677734375, "learning_rate": 9.659929989170156e-07, "loss": 0.0017, "reward": 0.6777283996343613, "reward_std": 0.1693093739449978, "rewards/check_gptzero_func": 0.6777283996343613, "step": 264 }, { "completion_length": 187.28571701049805, "epoch": 0.741777466759972, "grad_norm": 0.86328125, "kl": 0.01239776611328125, "learning_rate": 9.467470828538028e-07, "loss": 0.0012, "reward": 0.6955645084381104, "reward_std": 0.11928121093660593, "rewards/check_gptzero_func": 0.6955645084381104, "step": 265 }, { "completion_length": 199.65476608276367, "epoch": 0.7445766270118964, "grad_norm": 0.81640625, "kl": 0.0113372802734375, "learning_rate": 9.276499412538082e-07, "loss": 0.0011, "reward": 0.6984945237636566, "reward_std": 0.12425749842077494, "rewards/check_gptzero_func": 0.6984945237636566, "step": 266 }, { "completion_length": 191.65476989746094, "epoch": 0.7473757872638208, "grad_norm": 0.9375, "kl": 0.0126953125, "learning_rate": 9.087034032891884e-07, "loss": 0.0013, "reward": 0.5988369584083557, "reward_std": 0.22750091180205345, "rewards/check_gptzero_func": 0.5988369584083557, "step": 267 }, { "completion_length": 190.25000381469727, "epoch": 0.7501749475157453, "grad_norm": 0.90625, "kl": 0.0141143798828125, "learning_rate": 8.899092837069081e-07, "loss": 0.0014, "reward": 0.7432132065296173, "reward_std": 0.1290474236011505, "rewards/check_gptzero_func": 0.7432132065296173, "step": 268 }, { "completion_length": 177.40476989746094, "epoch": 0.7529741077676697, "grad_norm": 0.875, "kl": 0.013031005859375, "learning_rate": 8.71269382654916e-07, "loss": 0.0013, "reward": 0.6381399929523468, "reward_std": 0.13523080106824636, "rewards/check_gptzero_func": 0.6381399929523468, "step": 269 }, { "completion_length": 182.97619247436523, "epoch": 0.7557732680195941, "grad_norm": 0.984375, "kl": 0.01318359375, "learning_rate": 8.527854855097226e-07, "loss": 0.0013, "reward": 0.6455244570970535, "reward_std": 0.142228739336133, "rewards/check_gptzero_func": 0.6455244570970535, "step": 270 }, { "completion_length": 179.0, "epoch": 0.7585724282715185, "grad_norm": 0.8515625, "kl": 0.01285552978515625, "learning_rate": 8.344593627053926e-07, "loss": 0.0013, "reward": 0.6351892277598381, "reward_std": 0.16661302000284195, "rewards/check_gptzero_func": 0.6351892277598381, "step": 271 }, { "completion_length": 203.76190948486328, "epoch": 0.761371588523443, "grad_norm": 0.79296875, "kl": 0.01068878173828125, "learning_rate": 8.162927695639699e-07, "loss": 0.0011, "reward": 0.6384782642126083, "reward_std": 0.17978323996067047, "rewards/check_gptzero_func": 0.6384782642126083, "step": 272 }, { "completion_length": 189.80952835083008, "epoch": 0.7641707487753674, "grad_norm": 0.9921875, "kl": 0.0136566162109375, "learning_rate": 7.982874461273438e-07, "loss": 0.0014, "reward": 0.5513089373707771, "reward_std": 0.21978427842259407, "rewards/check_gptzero_func": 0.5513089373707771, "step": 273 }, { "completion_length": 182.42857360839844, "epoch": 0.7669699090272918, "grad_norm": 1.015625, "kl": 0.0137786865234375, "learning_rate": 7.804451169905882e-07, "loss": 0.0014, "reward": 0.6128961741924286, "reward_std": 0.1331999460235238, "rewards/check_gptzero_func": 0.6128961741924286, "step": 274 }, { "completion_length": 185.23809814453125, "epoch": 0.7697690692792163, "grad_norm": 0.94921875, "kl": 0.0132598876953125, "learning_rate": 7.627674911367747e-07, "loss": 0.0013, "reward": 0.686809316277504, "reward_std": 0.1671704165637493, "rewards/check_gptzero_func": 0.686809316277504, "step": 275 }, { "completion_length": 184.73810195922852, "epoch": 0.7725682295311407, "grad_norm": 0.96875, "kl": 0.0143890380859375, "learning_rate": 7.452562617732795e-07, "loss": 0.0014, "reward": 0.6087932512164116, "reward_std": 0.18696350045502186, "rewards/check_gptzero_func": 0.6087932512164116, "step": 276 }, { "completion_length": 186.3928565979004, "epoch": 0.7753673897830651, "grad_norm": 0.9140625, "kl": 0.01251220703125, "learning_rate": 7.279131061696062e-07, "loss": 0.0013, "reward": 0.6093617677688599, "reward_std": 0.16512912511825562, "rewards/check_gptzero_func": 0.6093617677688599, "step": 277 }, { "completion_length": 194.29762649536133, "epoch": 0.7781665500349895, "grad_norm": 0.84375, "kl": 0.012054443359375, "learning_rate": 7.107396854967322e-07, "loss": 0.0013, "reward": 0.6530048102140427, "reward_std": 0.1365496888756752, "rewards/check_gptzero_func": 0.6530048102140427, "step": 278 }, { "completion_length": 185.23809814453125, "epoch": 0.780965710286914, "grad_norm": 0.8984375, "kl": 0.0136260986328125, "learning_rate": 6.93737644667995e-07, "loss": 0.0014, "reward": 0.5730803310871124, "reward_std": 0.23035263270139694, "rewards/check_gptzero_func": 0.5730803310871124, "step": 279 }, { "completion_length": 196.7738151550293, "epoch": 0.7837648705388384, "grad_norm": 0.8984375, "kl": 0.01300048828125, "learning_rate": 6.769086121815424e-07, "loss": 0.0013, "reward": 0.7231508791446686, "reward_std": 0.1479925811290741, "rewards/check_gptzero_func": 0.7231508791446686, "step": 280 }, { "completion_length": 173.71428680419922, "epoch": 0.7865640307907628, "grad_norm": 0.890625, "kl": 0.0151519775390625, "learning_rate": 6.602541999643486e-07, "loss": 0.0015, "reward": 0.714839443564415, "reward_std": 0.18807288724929094, "rewards/check_gptzero_func": 0.714839443564415, "step": 281 }, { "completion_length": 181.16666793823242, "epoch": 0.7893631910426872, "grad_norm": 0.890625, "kl": 0.0139617919921875, "learning_rate": 6.4377600321782e-07, "loss": 0.0014, "reward": 0.6251032203435898, "reward_std": 0.18542934395372868, "rewards/check_gptzero_func": 0.6251032203435898, "step": 282 }, { "completion_length": 183.8571434020996, "epoch": 0.7921623512946117, "grad_norm": 0.765625, "kl": 0.0149688720703125, "learning_rate": 6.274756002650034e-07, "loss": 0.0015, "reward": 0.6130138486623764, "reward_std": 0.12219419237226248, "rewards/check_gptzero_func": 0.6130138486623764, "step": 283 }, { "completion_length": 191.50000381469727, "epoch": 0.794961511546536, "grad_norm": 0.86328125, "kl": 0.014312744140625, "learning_rate": 6.11354552399408e-07, "loss": 0.0014, "reward": 0.5668933913111687, "reward_std": 0.19107018411159515, "rewards/check_gptzero_func": 0.5668933913111687, "step": 284 }, { "completion_length": 200.03571701049805, "epoch": 0.7977606717984604, "grad_norm": 0.875, "kl": 0.012359619140625, "learning_rate": 5.954144037354645e-07, "loss": 0.0012, "reward": 0.6846682727336884, "reward_std": 0.1509340275079012, "rewards/check_gptzero_func": 0.6846682727336884, "step": 285 }, { "completion_length": 189.97619247436523, "epoch": 0.8005598320503848, "grad_norm": 0.93359375, "kl": 0.014251708984375, "learning_rate": 5.796566810606227e-07, "loss": 0.0014, "reward": 0.6802646964788437, "reward_std": 0.17098304629325867, "rewards/check_gptzero_func": 0.6802646964788437, "step": 286 }, { "completion_length": 178.20238876342773, "epoch": 0.8033589923023093, "grad_norm": 0.91796875, "kl": 0.0152130126953125, "learning_rate": 5.640828936891144e-07, "loss": 0.0015, "reward": 0.5891979560256004, "reward_std": 0.18455617874860764, "rewards/check_gptzero_func": 0.5891979560256004, "step": 287 }, { "completion_length": 169.11904907226562, "epoch": 0.8061581525542337, "grad_norm": 1.0546875, "kl": 0.0149993896484375, "learning_rate": 5.486945333173852e-07, "loss": 0.0015, "reward": 0.6522376388311386, "reward_std": 0.16162380203604698, "rewards/check_gptzero_func": 0.6522376388311386, "step": 288 }, { "completion_length": 190.00000381469727, "epoch": 0.8089573128061581, "grad_norm": 0.9140625, "kl": 0.010894775390625, "learning_rate": 5.334930738812188e-07, "loss": 0.0011, "reward": 0.6731359958648682, "reward_std": 0.14657550491392612, "rewards/check_gptzero_func": 0.6731359958648682, "step": 289 }, { "completion_length": 187.7976188659668, "epoch": 0.8117564730580826, "grad_norm": 1.109375, "kl": 0.013214111328125, "learning_rate": 5.184799714145558e-07, "loss": 0.0013, "reward": 0.6758114099502563, "reward_std": 0.1736396849155426, "rewards/check_gptzero_func": 0.6758114099502563, "step": 290 }, { "completion_length": 200.3690528869629, "epoch": 0.814555633310007, "grad_norm": 0.85546875, "kl": 0.011383056640625, "learning_rate": 5.036566639100351e-07, "loss": 0.0011, "reward": 0.7276384383440018, "reward_std": 0.12691646441817284, "rewards/check_gptzero_func": 0.7276384383440018, "step": 291 }, { "completion_length": 169.78571319580078, "epoch": 0.8173547935619314, "grad_norm": 1.0234375, "kl": 0.0178375244140625, "learning_rate": 4.890245711812577e-07, "loss": 0.0018, "reward": 0.746677041053772, "reward_std": 0.22585053741931915, "rewards/check_gptzero_func": 0.746677041053772, "step": 292 }, { "completion_length": 187.03571319580078, "epoch": 0.8201539538138558, "grad_norm": 0.88671875, "kl": 0.01416015625, "learning_rate": 4.74585094726793e-07, "loss": 0.0014, "reward": 0.7156971842050552, "reward_std": 0.18219392374157906, "rewards/check_gptzero_func": 0.7156971842050552, "step": 293 }, { "completion_length": 176.9523811340332, "epoch": 0.8229531140657803, "grad_norm": 0.96875, "kl": 0.0149993896484375, "learning_rate": 4.6033961759594045e-07, "loss": 0.0015, "reward": 0.6982993930578232, "reward_std": 0.18162141740322113, "rewards/check_gptzero_func": 0.6982993930578232, "step": 294 }, { "completion_length": 175.6666717529297, "epoch": 0.8257522743177047, "grad_norm": 0.92578125, "kl": 0.01531982421875, "learning_rate": 4.462895042562576e-07, "loss": 0.0015, "reward": 0.7019955068826675, "reward_std": 0.16880467906594276, "rewards/check_gptzero_func": 0.7019955068826675, "step": 295 }, { "completion_length": 173.41666793823242, "epoch": 0.8285514345696291, "grad_norm": 1.1484375, "kl": 0.0169219970703125, "learning_rate": 4.324361004628658e-07, "loss": 0.0017, "reward": 0.6873890459537506, "reward_std": 0.1467819530516863, "rewards/check_gptzero_func": 0.6873890459537506, "step": 296 }, { "completion_length": 188.85714721679688, "epoch": 0.8313505948215535, "grad_norm": 0.93359375, "kl": 0.0130767822265625, "learning_rate": 4.1878073312955486e-07, "loss": 0.0013, "reward": 0.6262213513255119, "reward_std": 0.15299665369093418, "rewards/check_gptzero_func": 0.6262213513255119, "step": 297 }, { "completion_length": 201.33333587646484, "epoch": 0.834149755073478, "grad_norm": 0.87890625, "kl": 0.01001739501953125, "learning_rate": 4.0532471020168386e-07, "loss": 0.001, "reward": 0.708094909787178, "reward_std": 0.15163133665919304, "rewards/check_gptzero_func": 0.708094909787178, "step": 298 }, { "completion_length": 192.10714721679688, "epoch": 0.8369489153254024, "grad_norm": 0.9921875, "kl": 0.0134735107421875, "learning_rate": 3.920693205309048e-07, "loss": 0.0013, "reward": 0.5948657244443893, "reward_std": 0.20524189993739128, "rewards/check_gptzero_func": 0.5948657244443893, "step": 299 }, { "completion_length": 196.40476608276367, "epoch": 0.8397480755773268, "grad_norm": 0.86328125, "kl": 0.015106201171875, "learning_rate": 3.7901583375171277e-07, "loss": 0.0015, "reward": 0.6475347355008125, "reward_std": 0.13522333092987537, "rewards/check_gptzero_func": 0.6475347355008125, "step": 300 } ], "logging_steps": 1, "max_steps": 357, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }