Qwen-zero / trainer_state.json
zaddyzaddy's picture
Upload folder using huggingface_hub
122e0e1 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.8397480755773268,
"eval_steps": 500,
"global_step": 300,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"completion_length": 187.95238494873047,
"epoch": 0.0027991602519244225,
"grad_norm": 1.0234375,
"kl": 0.0,
"learning_rate": 1.3888888888888888e-07,
"loss": 0.0,
"reward": 0.46513424068689346,
"reward_std": 0.20957323536276817,
"rewards/check_gptzero_func": 0.46513424068689346,
"step": 1
},
{
"completion_length": 204.64286041259766,
"epoch": 0.005598320503848845,
"grad_norm": 0.859375,
"kl": 0.0,
"learning_rate": 2.7777777777777776e-07,
"loss": 0.0,
"reward": 0.3453420288860798,
"reward_std": 0.19838641211390495,
"rewards/check_gptzero_func": 0.3453420288860798,
"step": 2
},
{
"completion_length": 172.09524154663086,
"epoch": 0.008397480755773267,
"grad_norm": 1.0234375,
"kl": 8.45193862915039e-05,
"learning_rate": 4.1666666666666667e-07,
"loss": 0.0,
"reward": 0.4417309910058975,
"reward_std": 0.15838673152029514,
"rewards/check_gptzero_func": 0.4417309910058975,
"step": 3
},
{
"completion_length": 182.4761962890625,
"epoch": 0.01119664100769769,
"grad_norm": 1.015625,
"kl": 0.00014972686767578125,
"learning_rate": 5.555555555555555e-07,
"loss": 0.0,
"reward": 0.381104938685894,
"reward_std": 0.18715333193540573,
"rewards/check_gptzero_func": 0.381104938685894,
"step": 4
},
{
"completion_length": 174.01190948486328,
"epoch": 0.013995801259622114,
"grad_norm": 1.078125,
"kl": 0.000102996826171875,
"learning_rate": 6.944444444444446e-07,
"loss": 0.0,
"reward": 0.47953247278928757,
"reward_std": 0.1845148727297783,
"rewards/check_gptzero_func": 0.47953247278928757,
"step": 5
},
{
"completion_length": 201.01190948486328,
"epoch": 0.016794961511546535,
"grad_norm": 0.91015625,
"kl": 0.00011652708053588867,
"learning_rate": 8.333333333333333e-07,
"loss": 0.0,
"reward": 0.42353298515081406,
"reward_std": 0.1985670831054449,
"rewards/check_gptzero_func": 0.42353298515081406,
"step": 6
},
{
"completion_length": 175.90476989746094,
"epoch": 0.01959412176347096,
"grad_norm": 1.015625,
"kl": 0.00013816356658935547,
"learning_rate": 9.722222222222224e-07,
"loss": 0.0,
"reward": 0.5062773898243904,
"reward_std": 0.1975775510072708,
"rewards/check_gptzero_func": 0.5062773898243904,
"step": 7
},
{
"completion_length": 177.42857360839844,
"epoch": 0.02239328201539538,
"grad_norm": 0.9765625,
"kl": 0.00012969970703125,
"learning_rate": 1.111111111111111e-06,
"loss": 0.0,
"reward": 0.3951154872775078,
"reward_std": 0.1862633414566517,
"rewards/check_gptzero_func": 0.3951154872775078,
"step": 8
},
{
"completion_length": 187.6785774230957,
"epoch": 0.025192442267319804,
"grad_norm": 1.1171875,
"kl": 0.00012481212615966797,
"learning_rate": 1.25e-06,
"loss": 0.0,
"reward": 0.5369215086102486,
"reward_std": 0.27992387115955353,
"rewards/check_gptzero_func": 0.5369215086102486,
"step": 9
},
{
"completion_length": 184.0595245361328,
"epoch": 0.02799160251924423,
"grad_norm": 0.890625,
"kl": 0.00013375282287597656,
"learning_rate": 1.3888888888888892e-06,
"loss": 0.0,
"reward": 0.4447266310453415,
"reward_std": 0.25660283863544464,
"rewards/check_gptzero_func": 0.4447266310453415,
"step": 10
},
{
"completion_length": 178.57143020629883,
"epoch": 0.03079076277116865,
"grad_norm": 1.109375,
"kl": 0.00011909008026123047,
"learning_rate": 1.527777777777778e-06,
"loss": 0.0,
"reward": 0.4782513678073883,
"reward_std": 0.19694043323397636,
"rewards/check_gptzero_func": 0.4782513678073883,
"step": 11
},
{
"completion_length": 166.57143020629883,
"epoch": 0.03358992302309307,
"grad_norm": 1.078125,
"kl": 0.00013256072998046875,
"learning_rate": 1.6666666666666667e-06,
"loss": 0.0,
"reward": 0.2991659436374903,
"reward_std": 0.2037220150232315,
"rewards/check_gptzero_func": 0.2991659436374903,
"step": 12
},
{
"completion_length": 179.64286422729492,
"epoch": 0.0363890832750175,
"grad_norm": 0.921875,
"kl": 0.00010061264038085938,
"learning_rate": 1.8055555555555557e-06,
"loss": 0.0,
"reward": 0.35697006061673164,
"reward_std": 0.16125616803765297,
"rewards/check_gptzero_func": 0.35697006061673164,
"step": 13
},
{
"completion_length": 195.26191329956055,
"epoch": 0.03918824352694192,
"grad_norm": 1.0078125,
"kl": 0.00013875961303710938,
"learning_rate": 1.944444444444445e-06,
"loss": 0.0,
"reward": 0.5023506954312325,
"reward_std": 0.2251712903380394,
"rewards/check_gptzero_func": 0.5023506954312325,
"step": 14
},
{
"completion_length": 163.0952377319336,
"epoch": 0.04198740377886634,
"grad_norm": 1.15625,
"kl": 0.00012624263763427734,
"learning_rate": 2.0833333333333334e-06,
"loss": 0.0,
"reward": 0.5686202421784401,
"reward_std": 0.16065805964171886,
"rewards/check_gptzero_func": 0.5686202421784401,
"step": 15
},
{
"completion_length": 180.0714340209961,
"epoch": 0.04478656403079076,
"grad_norm": 1.03125,
"kl": 0.00016486644744873047,
"learning_rate": 2.222222222222222e-06,
"loss": 0.0,
"reward": 0.4932379499077797,
"reward_std": 0.2570475209504366,
"rewards/check_gptzero_func": 0.4932379499077797,
"step": 16
},
{
"completion_length": 193.03571701049805,
"epoch": 0.04758572428271519,
"grad_norm": 1.0078125,
"kl": 0.00022602081298828125,
"learning_rate": 2.361111111111111e-06,
"loss": 0.0,
"reward": 0.34498296678066254,
"reward_std": 0.23837541788816452,
"rewards/check_gptzero_func": 0.34498296678066254,
"step": 17
},
{
"completion_length": 179.34524536132812,
"epoch": 0.05038488453463961,
"grad_norm": 0.94921875,
"kl": 0.00022101402282714844,
"learning_rate": 2.5e-06,
"loss": 0.0,
"reward": 0.5981053188443184,
"reward_std": 0.16939585842192173,
"rewards/check_gptzero_func": 0.5981053188443184,
"step": 18
},
{
"completion_length": 182.52381134033203,
"epoch": 0.05318404478656403,
"grad_norm": 0.91796875,
"kl": 0.00026416778564453125,
"learning_rate": 2.6388888888888893e-06,
"loss": 0.0,
"reward": 0.4666922390460968,
"reward_std": 0.19714651349931955,
"rewards/check_gptzero_func": 0.4666922390460968,
"step": 19
},
{
"completion_length": 172.7023811340332,
"epoch": 0.05598320503848846,
"grad_norm": 0.98828125,
"kl": 0.0003151893615722656,
"learning_rate": 2.7777777777777783e-06,
"loss": 0.0,
"reward": 0.5293586701154709,
"reward_std": 0.1787981353700161,
"rewards/check_gptzero_func": 0.5293586701154709,
"step": 20
},
{
"completion_length": 199.94047927856445,
"epoch": 0.05878236529041288,
"grad_norm": 1.0546875,
"kl": 0.00033283233642578125,
"learning_rate": 2.916666666666667e-06,
"loss": 0.0,
"reward": 0.4446847140789032,
"reward_std": 0.18958063051104546,
"rewards/check_gptzero_func": 0.4446847140789032,
"step": 21
},
{
"completion_length": 198.52381134033203,
"epoch": 0.0615815255423373,
"grad_norm": 0.8359375,
"kl": 0.00037479400634765625,
"learning_rate": 3.055555555555556e-06,
"loss": 0.0,
"reward": 0.5041225850582123,
"reward_std": 0.1401257887482643,
"rewards/check_gptzero_func": 0.5041225850582123,
"step": 22
},
{
"completion_length": 177.72619247436523,
"epoch": 0.06438068579426172,
"grad_norm": 0.99609375,
"kl": 0.0007195472717285156,
"learning_rate": 3.1944444444444443e-06,
"loss": 0.0001,
"reward": 0.472538560628891,
"reward_std": 0.25678203627467155,
"rewards/check_gptzero_func": 0.472538560628891,
"step": 23
},
{
"completion_length": 201.9761962890625,
"epoch": 0.06717984604618614,
"grad_norm": 0.8984375,
"kl": 0.0009317398071289062,
"learning_rate": 3.3333333333333333e-06,
"loss": 0.0001,
"reward": 0.5138305872678757,
"reward_std": 0.19977859780192375,
"rewards/check_gptzero_func": 0.5138305872678757,
"step": 24
},
{
"completion_length": 211.83333587646484,
"epoch": 0.06997900629811056,
"grad_norm": 0.78125,
"kl": 0.0008678436279296875,
"learning_rate": 3.4722222222222224e-06,
"loss": 0.0001,
"reward": 0.4136114977300167,
"reward_std": 0.1939336434006691,
"rewards/check_gptzero_func": 0.4136114977300167,
"step": 25
},
{
"completion_length": 173.82143020629883,
"epoch": 0.072778166550035,
"grad_norm": 0.9375,
"kl": 0.0014667510986328125,
"learning_rate": 3.6111111111111115e-06,
"loss": 0.0001,
"reward": 0.5154794007539749,
"reward_std": 0.22610028088092804,
"rewards/check_gptzero_func": 0.5154794007539749,
"step": 26
},
{
"completion_length": 190.48809814453125,
"epoch": 0.07557732680195942,
"grad_norm": 0.87890625,
"kl": 0.00168609619140625,
"learning_rate": 3.7500000000000005e-06,
"loss": 0.0002,
"reward": 0.5691990107297897,
"reward_std": 0.1696779690682888,
"rewards/check_gptzero_func": 0.5691990107297897,
"step": 27
},
{
"completion_length": 188.2738151550293,
"epoch": 0.07837648705388384,
"grad_norm": 0.9375,
"kl": 0.00185394287109375,
"learning_rate": 3.88888888888889e-06,
"loss": 0.0002,
"reward": 0.5436510145664215,
"reward_std": 0.24924885854125023,
"rewards/check_gptzero_func": 0.5436510145664215,
"step": 28
},
{
"completion_length": 205.52381134033203,
"epoch": 0.08117564730580826,
"grad_norm": 0.765625,
"kl": 0.001617431640625,
"learning_rate": 4.027777777777779e-06,
"loss": 0.0002,
"reward": 0.4470183253288269,
"reward_std": 0.20528827793896198,
"rewards/check_gptzero_func": 0.4470183253288269,
"step": 29
},
{
"completion_length": 171.84524154663086,
"epoch": 0.08397480755773268,
"grad_norm": 0.9609375,
"kl": 0.003101348876953125,
"learning_rate": 4.166666666666667e-06,
"loss": 0.0003,
"reward": 0.46852748841047287,
"reward_std": 0.16202964074909687,
"rewards/check_gptzero_func": 0.46852748841047287,
"step": 30
},
{
"completion_length": 180.92857360839844,
"epoch": 0.0867739678096571,
"grad_norm": 1.34375,
"kl": 0.003253936767578125,
"learning_rate": 4.305555555555556e-06,
"loss": 0.0003,
"reward": 0.43846940994262695,
"reward_std": 0.16219847835600376,
"rewards/check_gptzero_func": 0.43846940994262695,
"step": 31
},
{
"completion_length": 200.4285774230957,
"epoch": 0.08957312806158152,
"grad_norm": 0.8125,
"kl": 0.00244140625,
"learning_rate": 4.444444444444444e-06,
"loss": 0.0003,
"reward": 0.5283855870366096,
"reward_std": 0.20884095132350922,
"rewards/check_gptzero_func": 0.5283855870366096,
"step": 32
},
{
"completion_length": 178.6190528869629,
"epoch": 0.09237228831350595,
"grad_norm": 0.83984375,
"kl": 0.00383758544921875,
"learning_rate": 4.583333333333333e-06,
"loss": 0.0004,
"reward": 0.5132240653038025,
"reward_std": 0.15939988382160664,
"rewards/check_gptzero_func": 0.5132240653038025,
"step": 33
},
{
"completion_length": 195.8095245361328,
"epoch": 0.09517144856543037,
"grad_norm": 0.9609375,
"kl": 0.0034942626953125,
"learning_rate": 4.722222222222222e-06,
"loss": 0.0003,
"reward": 0.4721037596464157,
"reward_std": 0.22807539626955986,
"rewards/check_gptzero_func": 0.4721037596464157,
"step": 34
},
{
"completion_length": 187.72619247436523,
"epoch": 0.0979706088173548,
"grad_norm": 0.78515625,
"kl": 0.004547119140625,
"learning_rate": 4.861111111111111e-06,
"loss": 0.0005,
"reward": 0.4753117114305496,
"reward_std": 0.21822157502174377,
"rewards/check_gptzero_func": 0.4753117114305496,
"step": 35
},
{
"completion_length": 189.3452377319336,
"epoch": 0.10076976906927922,
"grad_norm": 1.015625,
"kl": 0.005115509033203125,
"learning_rate": 5e-06,
"loss": 0.0005,
"reward": 0.5079967528581619,
"reward_std": 0.1728131715208292,
"rewards/check_gptzero_func": 0.5079967528581619,
"step": 36
},
{
"completion_length": 182.50000381469727,
"epoch": 0.10356892932120364,
"grad_norm": 0.859375,
"kl": 0.006103515625,
"learning_rate": 4.999880271862381e-06,
"loss": 0.0006,
"reward": 0.5491276457905769,
"reward_std": 0.29580989107489586,
"rewards/check_gptzero_func": 0.5491276457905769,
"step": 37
},
{
"completion_length": 201.61904907226562,
"epoch": 0.10636808957312806,
"grad_norm": 0.7890625,
"kl": 0.00431060791015625,
"learning_rate": 4.999521098917384e-06,
"loss": 0.0005,
"reward": 0.7083312571048737,
"reward_std": 0.17898299172520638,
"rewards/check_gptzero_func": 0.7083312571048737,
"step": 38
},
{
"completion_length": 183.6904754638672,
"epoch": 0.10916724982505248,
"grad_norm": 0.828125,
"kl": 0.00653076171875,
"learning_rate": 4.998922515567496e-06,
"loss": 0.0007,
"reward": 0.5663967505097389,
"reward_std": 0.21669673547148705,
"rewards/check_gptzero_func": 0.5663967505097389,
"step": 39
},
{
"completion_length": 176.16667556762695,
"epoch": 0.11196641007697691,
"grad_norm": 0.83984375,
"kl": 0.007232666015625,
"learning_rate": 4.998084579146533e-06,
"loss": 0.0007,
"reward": 0.592326283454895,
"reward_std": 0.1653369516134262,
"rewards/check_gptzero_func": 0.592326283454895,
"step": 40
},
{
"completion_length": 191.34524154663086,
"epoch": 0.11476557032890133,
"grad_norm": 0.78515625,
"kl": 0.006866455078125,
"learning_rate": 4.997007369914149e-06,
"loss": 0.0007,
"reward": 0.5438110902905464,
"reward_std": 0.150531854480505,
"rewards/check_gptzero_func": 0.5438110902905464,
"step": 41
},
{
"completion_length": 173.26190567016602,
"epoch": 0.11756473058082575,
"grad_norm": 0.90234375,
"kl": 0.00835418701171875,
"learning_rate": 4.9956909910481465e-06,
"loss": 0.0008,
"reward": 0.496019683778286,
"reward_std": 0.13957532681524754,
"rewards/check_gptzero_func": 0.496019683778286,
"step": 42
},
{
"completion_length": 197.21428680419922,
"epoch": 0.12036389083275018,
"grad_norm": 0.7578125,
"kl": 0.00658416748046875,
"learning_rate": 4.994135568634598e-06,
"loss": 0.0007,
"reward": 0.5706812366843224,
"reward_std": 0.22384651005268097,
"rewards/check_gptzero_func": 0.5706812366843224,
"step": 43
},
{
"completion_length": 195.50000381469727,
"epoch": 0.1231630510846746,
"grad_norm": 0.8046875,
"kl": 0.0059814453125,
"learning_rate": 4.992341251655768e-06,
"loss": 0.0006,
"reward": 0.49248379468917847,
"reward_std": 0.1679957453161478,
"rewards/check_gptzero_func": 0.49248379468917847,
"step": 44
},
{
"completion_length": 174.83333206176758,
"epoch": 0.12596221133659902,
"grad_norm": 0.83984375,
"kl": 0.00934600830078125,
"learning_rate": 4.99030821197584e-06,
"loss": 0.001,
"reward": 0.5782680213451385,
"reward_std": 0.18842186219990253,
"rewards/check_gptzero_func": 0.5782680213451385,
"step": 45
},
{
"completion_length": 180.71428680419922,
"epoch": 0.12876137158852344,
"grad_norm": 0.83203125,
"kl": 0.009490966796875,
"learning_rate": 4.988036644324457e-06,
"loss": 0.001,
"reward": 0.5191970095038414,
"reward_std": 0.2319345511496067,
"rewards/check_gptzero_func": 0.5191970095038414,
"step": 46
},
{
"completion_length": 185.46429061889648,
"epoch": 0.13156053184044786,
"grad_norm": 0.8125,
"kl": 0.00946044921875,
"learning_rate": 4.9855267662780715e-06,
"loss": 0.0009,
"reward": 0.6461608409881592,
"reward_std": 0.1471536885946989,
"rewards/check_gptzero_func": 0.6461608409881592,
"step": 47
},
{
"completion_length": 176.53571701049805,
"epoch": 0.13435969209237228,
"grad_norm": 0.82421875,
"kl": 0.01032257080078125,
"learning_rate": 4.982778818239101e-06,
"loss": 0.001,
"reward": 0.49297887086868286,
"reward_std": 0.1380300959572196,
"rewards/check_gptzero_func": 0.49297887086868286,
"step": 48
},
{
"completion_length": 189.15476608276367,
"epoch": 0.1371588523442967,
"grad_norm": 0.9765625,
"kl": 0.01055145263671875,
"learning_rate": 4.979793063412909e-06,
"loss": 0.0011,
"reward": 0.5546858608722687,
"reward_std": 0.20936554670333862,
"rewards/check_gptzero_func": 0.5546858608722687,
"step": 49
},
{
"completion_length": 175.32143020629883,
"epoch": 0.13995801259622112,
"grad_norm": 0.80859375,
"kl": 0.0104827880859375,
"learning_rate": 4.9765697877825844e-06,
"loss": 0.001,
"reward": 0.5866354256868362,
"reward_std": 0.1675815749913454,
"rewards/check_gptzero_func": 0.5866354256868362,
"step": 50
},
{
"completion_length": 178.88095092773438,
"epoch": 0.14275717284814557,
"grad_norm": 0.921875,
"kl": 0.01032257080078125,
"learning_rate": 4.97310930008156e-06,
"loss": 0.001,
"reward": 0.6669622659683228,
"reward_std": 0.21926475502550602,
"rewards/check_gptzero_func": 0.6669622659683228,
"step": 51
},
{
"completion_length": 180.7738151550293,
"epoch": 0.14555633310007,
"grad_norm": 0.83984375,
"kl": 0.0123748779296875,
"learning_rate": 4.969411931764033e-06,
"loss": 0.0012,
"reward": 0.567020371556282,
"reward_std": 0.17968417704105377,
"rewards/check_gptzero_func": 0.567020371556282,
"step": 52
},
{
"completion_length": 195.5357208251953,
"epoch": 0.1483554933519944,
"grad_norm": 0.796875,
"kl": 0.0104827880859375,
"learning_rate": 4.965478036973221e-06,
"loss": 0.001,
"reward": 0.6985915303230286,
"reward_std": 0.17377906665205956,
"rewards/check_gptzero_func": 0.6985915303230286,
"step": 53
},
{
"completion_length": 178.00000381469727,
"epoch": 0.15115465360391883,
"grad_norm": 1.0234375,
"kl": 0.01174163818359375,
"learning_rate": 4.9613079925074435e-06,
"loss": 0.0012,
"reward": 0.6754837036132812,
"reward_std": 0.2185358963906765,
"rewards/check_gptzero_func": 0.6754837036132812,
"step": 54
},
{
"completion_length": 172.64286041259766,
"epoch": 0.15395381385584325,
"grad_norm": 0.8203125,
"kl": 0.0126953125,
"learning_rate": 4.956902197784025e-06,
"loss": 0.0013,
"reward": 0.5507477447390556,
"reward_std": 0.12427662499248981,
"rewards/check_gptzero_func": 0.5507477447390556,
"step": 55
},
{
"completion_length": 178.40476608276367,
"epoch": 0.15675297410776767,
"grad_norm": 0.83984375,
"kl": 0.01100921630859375,
"learning_rate": 4.952261074801043e-06,
"loss": 0.0011,
"reward": 0.7200377136468887,
"reward_std": 0.1754942275583744,
"rewards/check_gptzero_func": 0.7200377136468887,
"step": 56
},
{
"completion_length": 177.02381134033203,
"epoch": 0.1595521343596921,
"grad_norm": 0.859375,
"kl": 0.01055908203125,
"learning_rate": 4.947385068096907e-06,
"loss": 0.0011,
"reward": 0.7536474019289017,
"reward_std": 0.12843344174325466,
"rewards/check_gptzero_func": 0.7536474019289017,
"step": 57
},
{
"completion_length": 197.3571434020996,
"epoch": 0.16235129461161651,
"grad_norm": 0.8515625,
"kl": 0.00899505615234375,
"learning_rate": 4.942274644707778e-06,
"loss": 0.0009,
"reward": 0.5334034785628319,
"reward_std": 0.22946524992585182,
"rewards/check_gptzero_func": 0.5334034785628319,
"step": 58
},
{
"completion_length": 182.14286041259766,
"epoch": 0.16515045486354094,
"grad_norm": 0.921875,
"kl": 0.01129150390625,
"learning_rate": 4.936930294122838e-06,
"loss": 0.0011,
"reward": 0.5683771669864655,
"reward_std": 0.1916387351229787,
"rewards/check_gptzero_func": 0.5683771669864655,
"step": 59
},
{
"completion_length": 173.2023811340332,
"epoch": 0.16794961511546536,
"grad_norm": 0.92578125,
"kl": 0.01171875,
"learning_rate": 4.931352528237398e-06,
"loss": 0.0012,
"reward": 0.6447094231843948,
"reward_std": 0.19410214200615883,
"rewards/check_gptzero_func": 0.6447094231843948,
"step": 60
},
{
"completion_length": 185.6428565979004,
"epoch": 0.17074877536738978,
"grad_norm": 0.8515625,
"kl": 0.00968170166015625,
"learning_rate": 4.925541881303876e-06,
"loss": 0.001,
"reward": 0.6383133828639984,
"reward_std": 0.18484976701438427,
"rewards/check_gptzero_func": 0.6383133828639984,
"step": 61
},
{
"completion_length": 189.1785774230957,
"epoch": 0.1735479356193142,
"grad_norm": 1.0078125,
"kl": 0.008331298828125,
"learning_rate": 4.919498909880621e-06,
"loss": 0.0008,
"reward": 0.6155931651592255,
"reward_std": 0.18777143955230713,
"rewards/check_gptzero_func": 0.6155931651592255,
"step": 62
},
{
"completion_length": 191.80952835083008,
"epoch": 0.17634709587123862,
"grad_norm": 0.8046875,
"kl": 0.00836181640625,
"learning_rate": 4.913224192778604e-06,
"loss": 0.0008,
"reward": 0.6793939918279648,
"reward_std": 0.09720544703304768,
"rewards/check_gptzero_func": 0.6793939918279648,
"step": 63
},
{
"completion_length": 171.97619247436523,
"epoch": 0.17914625612316304,
"grad_norm": 0.88671875,
"kl": 0.00908660888671875,
"learning_rate": 4.906718331005979e-06,
"loss": 0.0009,
"reward": 0.44309166073799133,
"reward_std": 0.2190867941826582,
"rewards/check_gptzero_func": 0.44309166073799133,
"step": 64
},
{
"completion_length": 175.59524154663086,
"epoch": 0.1819454163750875,
"grad_norm": 0.9140625,
"kl": 0.00946044921875,
"learning_rate": 4.899981947710518e-06,
"loss": 0.0009,
"reward": 0.5109639540314674,
"reward_std": 0.15947622060775757,
"rewards/check_gptzero_func": 0.5109639540314674,
"step": 65
},
{
"completion_length": 184.76190948486328,
"epoch": 0.1847445766270119,
"grad_norm": 0.90234375,
"kl": 0.009246826171875,
"learning_rate": 4.893015688119921e-06,
"loss": 0.0009,
"reward": 0.5143184289336205,
"reward_std": 0.2525811605155468,
"rewards/check_gptzero_func": 0.5143184289336205,
"step": 66
},
{
"completion_length": 174.76191329956055,
"epoch": 0.18754373687893633,
"grad_norm": 0.890625,
"kl": 0.0095367431640625,
"learning_rate": 4.885820219480018e-06,
"loss": 0.001,
"reward": 0.6203874498605728,
"reward_std": 0.20078162848949432,
"rewards/check_gptzero_func": 0.6203874498605728,
"step": 67
},
{
"completion_length": 184.2738151550293,
"epoch": 0.19034289713086075,
"grad_norm": 0.98046875,
"kl": 0.008697509765625,
"learning_rate": 4.8783962309908564e-06,
"loss": 0.0009,
"reward": 0.5816539227962494,
"reward_std": 0.23236144706606865,
"rewards/check_gptzero_func": 0.5816539227962494,
"step": 68
},
{
"completion_length": 184.0,
"epoch": 0.19314205738278517,
"grad_norm": 0.8984375,
"kl": 0.00894927978515625,
"learning_rate": 4.870744433740688e-06,
"loss": 0.0009,
"reward": 0.6825973987579346,
"reward_std": 0.17790008522570133,
"rewards/check_gptzero_func": 0.6825973987579346,
"step": 69
},
{
"completion_length": 204.1785774230957,
"epoch": 0.1959412176347096,
"grad_norm": 0.8203125,
"kl": 0.00753021240234375,
"learning_rate": 4.8628655606378625e-06,
"loss": 0.0008,
"reward": 0.6512226462364197,
"reward_std": 0.22282536327838898,
"rewards/check_gptzero_func": 0.6512226462364197,
"step": 70
},
{
"completion_length": 188.47619247436523,
"epoch": 0.198740377886634,
"grad_norm": 0.96875,
"kl": 0.00823974609375,
"learning_rate": 4.854760366340619e-06,
"loss": 0.0008,
"reward": 0.6291620433330536,
"reward_std": 0.18235865235328674,
"rewards/check_gptzero_func": 0.6291620433330536,
"step": 71
},
{
"completion_length": 191.6428565979004,
"epoch": 0.20153953813855843,
"grad_norm": 1.0078125,
"kl": 0.00788116455078125,
"learning_rate": 4.846429627184816e-06,
"loss": 0.0008,
"reward": 0.5630831271409988,
"reward_std": 0.20192383974790573,
"rewards/check_gptzero_func": 0.5630831271409988,
"step": 72
},
{
"completion_length": 186.39286422729492,
"epoch": 0.20433869839048285,
"grad_norm": 1.0078125,
"kl": 0.0083770751953125,
"learning_rate": 4.837874141109557e-06,
"loss": 0.0008,
"reward": 0.566518671810627,
"reward_std": 0.2747541069984436,
"rewards/check_gptzero_func": 0.566518671810627,
"step": 73
},
{
"completion_length": 197.75000381469727,
"epoch": 0.20713785864240727,
"grad_norm": 0.9140625,
"kl": 0.0078887939453125,
"learning_rate": 4.829094727580775e-06,
"loss": 0.0008,
"reward": 0.6935234069824219,
"reward_std": 0.12828794866800308,
"rewards/check_gptzero_func": 0.6935234069824219,
"step": 74
},
{
"completion_length": 182.94047927856445,
"epoch": 0.2099370188943317,
"grad_norm": 1.0859375,
"kl": 0.0101470947265625,
"learning_rate": 4.820092227512736e-06,
"loss": 0.001,
"reward": 0.5632592514157295,
"reward_std": 0.20659778825938702,
"rewards/check_gptzero_func": 0.5632592514157295,
"step": 75
},
{
"completion_length": 189.26190948486328,
"epoch": 0.21273617914625612,
"grad_norm": 0.97265625,
"kl": 0.0099639892578125,
"learning_rate": 4.810867503187492e-06,
"loss": 0.001,
"reward": 0.6693995073437691,
"reward_std": 0.15574552537873387,
"rewards/check_gptzero_func": 0.6693995073437691,
"step": 76
},
{
"completion_length": 202.20238494873047,
"epoch": 0.21553533939818054,
"grad_norm": 1.046875,
"kl": 0.0087127685546875,
"learning_rate": 4.8014214381722945e-06,
"loss": 0.0009,
"reward": 0.6062769070267677,
"reward_std": 0.269734937697649,
"rewards/check_gptzero_func": 0.6062769070267677,
"step": 77
},
{
"completion_length": 201.22619247436523,
"epoch": 0.21833449965010496,
"grad_norm": 0.953125,
"kl": 0.00868988037109375,
"learning_rate": 4.791754937234962e-06,
"loss": 0.0009,
"reward": 0.5902510657906532,
"reward_std": 0.20445209927856922,
"rewards/check_gptzero_func": 0.5902510657906532,
"step": 78
},
{
"completion_length": 191.41666793823242,
"epoch": 0.22113365990202938,
"grad_norm": 0.859375,
"kl": 0.0099029541015625,
"learning_rate": 4.781868926257216e-06,
"loss": 0.001,
"reward": 0.7119332551956177,
"reward_std": 0.1272009308449924,
"rewards/check_gptzero_func": 0.7119332551956177,
"step": 79
},
{
"completion_length": 201.95238494873047,
"epoch": 0.22393282015395383,
"grad_norm": 0.9609375,
"kl": 0.01006317138671875,
"learning_rate": 4.771764352146005e-06,
"loss": 0.001,
"reward": 0.6730313450098038,
"reward_std": 0.23449595272541046,
"rewards/check_gptzero_func": 0.6730313450098038,
"step": 80
},
{
"completion_length": 181.53571701049805,
"epoch": 0.22673198040587825,
"grad_norm": 1.15625,
"kl": 0.0106964111328125,
"learning_rate": 4.761442182742799e-06,
"loss": 0.0011,
"reward": 0.5613239407539368,
"reward_std": 0.21234364807605743,
"rewards/check_gptzero_func": 0.5613239407539368,
"step": 81
},
{
"completion_length": 184.67857360839844,
"epoch": 0.22953114065780267,
"grad_norm": 0.9765625,
"kl": 0.0101776123046875,
"learning_rate": 4.750903406730895e-06,
"loss": 0.001,
"reward": 0.5844283923506737,
"reward_std": 0.21481262892484665,
"rewards/check_gptzero_func": 0.5844283923506737,
"step": 82
},
{
"completion_length": 169.38095474243164,
"epoch": 0.2323303009097271,
"grad_norm": 1.1484375,
"kl": 0.0128021240234375,
"learning_rate": 4.740149033540711e-06,
"loss": 0.0013,
"reward": 0.6525488644838333,
"reward_std": 0.19818515330553055,
"rewards/check_gptzero_func": 0.6525488644838333,
"step": 83
},
{
"completion_length": 178.3809585571289,
"epoch": 0.2351294611616515,
"grad_norm": 1.0859375,
"kl": 0.010101318359375,
"learning_rate": 4.729180093253106e-06,
"loss": 0.001,
"reward": 0.6054461151361465,
"reward_std": 0.22233787178993225,
"rewards/check_gptzero_func": 0.6054461151361465,
"step": 84
},
{
"completion_length": 185.47619247436523,
"epoch": 0.23792862141357593,
"grad_norm": 0.93359375,
"kl": 0.0096588134765625,
"learning_rate": 4.717997636500715e-06,
"loss": 0.001,
"reward": 0.5751017481088638,
"reward_std": 0.19414596632122993,
"rewards/check_gptzero_func": 0.5751017481088638,
"step": 85
},
{
"completion_length": 176.61904907226562,
"epoch": 0.24072778166550035,
"grad_norm": 0.90234375,
"kl": 0.0117034912109375,
"learning_rate": 4.706602734367314e-06,
"loss": 0.0012,
"reward": 0.6652742102742195,
"reward_std": 0.1461728010326624,
"rewards/check_gptzero_func": 0.6652742102742195,
"step": 86
},
{
"completion_length": 204.6785774230957,
"epoch": 0.24352694191742477,
"grad_norm": 0.8203125,
"kl": 0.00868988037109375,
"learning_rate": 4.694996478285232e-06,
"loss": 0.0009,
"reward": 0.5329968556761742,
"reward_std": 0.23187025263905525,
"rewards/check_gptzero_func": 0.5329968556761742,
"step": 87
},
{
"completion_length": 191.7738151550293,
"epoch": 0.2463261021693492,
"grad_norm": 0.9140625,
"kl": 0.0092010498046875,
"learning_rate": 4.683179979930808e-06,
"loss": 0.0009,
"reward": 0.660212829709053,
"reward_std": 0.17499383352696896,
"rewards/check_gptzero_func": 0.660212829709053,
"step": 88
},
{
"completion_length": 188.29762268066406,
"epoch": 0.2491252624212736,
"grad_norm": 0.85546875,
"kl": 0.01043701171875,
"learning_rate": 4.6711543711179155e-06,
"loss": 0.001,
"reward": 0.6105453222990036,
"reward_std": 0.1719030626118183,
"rewards/check_gptzero_func": 0.6105453222990036,
"step": 89
},
{
"completion_length": 185.86904907226562,
"epoch": 0.25192442267319803,
"grad_norm": 1.0234375,
"kl": 0.01029205322265625,
"learning_rate": 4.658920803689553e-06,
"loss": 0.001,
"reward": 0.6307502388954163,
"reward_std": 0.26598427444696426,
"rewards/check_gptzero_func": 0.6307502388954163,
"step": 90
},
{
"completion_length": 180.8571434020996,
"epoch": 0.2547235829251225,
"grad_norm": 0.9765625,
"kl": 0.0113525390625,
"learning_rate": 4.646480449407516e-06,
"loss": 0.0011,
"reward": 0.661887601017952,
"reward_std": 0.17039467580616474,
"rewards/check_gptzero_func": 0.661887601017952,
"step": 91
},
{
"completion_length": 175.97619247436523,
"epoch": 0.2575227431770469,
"grad_norm": 1.09375,
"kl": 0.0103912353515625,
"learning_rate": 4.633834499840164e-06,
"loss": 0.001,
"reward": 0.6470509469509125,
"reward_std": 0.11666383501142263,
"rewards/check_gptzero_func": 0.6470509469509125,
"step": 92
},
{
"completion_length": 196.95238494873047,
"epoch": 0.2603219034289713,
"grad_norm": 0.87890625,
"kl": 0.00783538818359375,
"learning_rate": 4.620984166248288e-06,
"loss": 0.0008,
"reward": 0.645782083272934,
"reward_std": 0.16378989815711975,
"rewards/check_gptzero_func": 0.645782083272934,
"step": 93
},
{
"completion_length": 184.09524536132812,
"epoch": 0.2631210636808957,
"grad_norm": 0.92578125,
"kl": 0.00963592529296875,
"learning_rate": 4.607930679469096e-06,
"loss": 0.001,
"reward": 0.6375631093978882,
"reward_std": 0.12679122015833855,
"rewards/check_gptzero_func": 0.6375631093978882,
"step": 94
},
{
"completion_length": 187.78571701049805,
"epoch": 0.26592022393282017,
"grad_norm": 0.84375,
"kl": 0.009124755859375,
"learning_rate": 4.594675289798317e-06,
"loss": 0.0009,
"reward": 0.6679813116788864,
"reward_std": 0.19044114090502262,
"rewards/check_gptzero_func": 0.6679813116788864,
"step": 95
},
{
"completion_length": 174.94048309326172,
"epoch": 0.26871938418474456,
"grad_norm": 0.9609375,
"kl": 0.01029205322265625,
"learning_rate": 4.5812192668704454e-06,
"loss": 0.001,
"reward": 0.5234083607792854,
"reward_std": 0.2531973347067833,
"rewards/check_gptzero_func": 0.5234083607792854,
"step": 96
},
{
"completion_length": 183.19048309326172,
"epoch": 0.271518544436669,
"grad_norm": 0.90234375,
"kl": 0.00909423828125,
"learning_rate": 4.5675638995371355e-06,
"loss": 0.0009,
"reward": 0.5966005846858025,
"reward_std": 0.22473083063960075,
"rewards/check_gptzero_func": 0.5966005846858025,
"step": 97
},
{
"completion_length": 183.14286041259766,
"epoch": 0.2743177046885934,
"grad_norm": 0.85546875,
"kl": 0.0090484619140625,
"learning_rate": 4.553710495743744e-06,
"loss": 0.0009,
"reward": 0.5813074707984924,
"reward_std": 0.1640096753835678,
"rewards/check_gptzero_func": 0.5813074707984924,
"step": 98
},
{
"completion_length": 198.46429061889648,
"epoch": 0.27711686494051785,
"grad_norm": 0.88671875,
"kl": 0.0083465576171875,
"learning_rate": 4.53966038240406e-06,
"loss": 0.0008,
"reward": 0.48446883261203766,
"reward_std": 0.14858301915228367,
"rewards/check_gptzero_func": 0.48446883261203766,
"step": 99
},
{
"completion_length": 158.04762268066406,
"epoch": 0.27991602519244224,
"grad_norm": 1.0625,
"kl": 0.0104522705078125,
"learning_rate": 4.525414905273208e-06,
"loss": 0.001,
"reward": 0.6134350448846817,
"reward_std": 0.15003260038793087,
"rewards/check_gptzero_func": 0.6134350448846817,
"step": 100
},
{
"completion_length": 178.0595245361328,
"epoch": 0.2827151854443667,
"grad_norm": 0.90234375,
"kl": 0.0092010498046875,
"learning_rate": 4.510975428818743e-06,
"loss": 0.0009,
"reward": 0.6400493085384369,
"reward_std": 0.12321909703314304,
"rewards/check_gptzero_func": 0.6400493085384369,
"step": 101
},
{
"completion_length": 202.76190567016602,
"epoch": 0.28551434569629114,
"grad_norm": 0.9140625,
"kl": 0.007781982421875,
"learning_rate": 4.496343336089965e-06,
"loss": 0.0008,
"reward": 0.5859105363488197,
"reward_std": 0.12806045822799206,
"rewards/check_gptzero_func": 0.5859105363488197,
"step": 102
},
{
"completion_length": 171.26190948486328,
"epoch": 0.28831350594821553,
"grad_norm": 1.1015625,
"kl": 0.0096588134765625,
"learning_rate": 4.481520028585445e-06,
"loss": 0.001,
"reward": 0.5852581560611725,
"reward_std": 0.22868289425969124,
"rewards/check_gptzero_func": 0.5852581560611725,
"step": 103
},
{
"completion_length": 180.51190948486328,
"epoch": 0.29111266620014,
"grad_norm": 1.0,
"kl": 0.010009765625,
"learning_rate": 4.466506926118782e-06,
"loss": 0.001,
"reward": 0.6214292347431183,
"reward_std": 0.20645314827561378,
"rewards/check_gptzero_func": 0.6214292347431183,
"step": 104
},
{
"completion_length": 195.03571701049805,
"epoch": 0.2939118264520644,
"grad_norm": 0.8359375,
"kl": 0.00890350341796875,
"learning_rate": 4.451305466682615e-06,
"loss": 0.0009,
"reward": 0.5131512135267258,
"reward_std": 0.22282657399773598,
"rewards/check_gptzero_func": 0.5131512135267258,
"step": 105
},
{
"completion_length": 185.69047927856445,
"epoch": 0.2967109867039888,
"grad_norm": 0.8515625,
"kl": 0.01049041748046875,
"learning_rate": 4.435917106310887e-06,
"loss": 0.0011,
"reward": 0.6954147666692734,
"reward_std": 0.11661373171955347,
"rewards/check_gptzero_func": 0.6954147666692734,
"step": 106
},
{
"completion_length": 184.50000381469727,
"epoch": 0.2995101469559132,
"grad_norm": 0.80078125,
"kl": 0.0090484619140625,
"learning_rate": 4.420343318939378e-06,
"loss": 0.0009,
"reward": 0.7220865786075592,
"reward_std": 0.11773823061957955,
"rewards/check_gptzero_func": 0.7220865786075592,
"step": 107
},
{
"completion_length": 193.82143020629883,
"epoch": 0.30230930720783766,
"grad_norm": 0.88671875,
"kl": 0.0087432861328125,
"learning_rate": 4.404585596264537e-06,
"loss": 0.0009,
"reward": 0.6370752304792404,
"reward_std": 0.22320828214287758,
"rewards/check_gptzero_func": 0.6370752304792404,
"step": 108
},
{
"completion_length": 184.55952835083008,
"epoch": 0.30510846745976206,
"grad_norm": 0.8671875,
"kl": 0.009765625,
"learning_rate": 4.388645447600593e-06,
"loss": 0.001,
"reward": 0.7004173994064331,
"reward_std": 0.14368313550949097,
"rewards/check_gptzero_func": 0.7004173994064331,
"step": 109
},
{
"completion_length": 186.66666793823242,
"epoch": 0.3079076277116865,
"grad_norm": 0.875,
"kl": 0.01068115234375,
"learning_rate": 4.372524399734998e-06,
"loss": 0.0011,
"reward": 0.6180136650800705,
"reward_std": 0.16962039656937122,
"rewards/check_gptzero_func": 0.6180136650800705,
"step": 110
},
{
"completion_length": 182.40476608276367,
"epoch": 0.3107067879636109,
"grad_norm": 0.875,
"kl": 0.00982666015625,
"learning_rate": 4.356223996782181e-06,
"loss": 0.001,
"reward": 0.6765602007508278,
"reward_std": 0.15866447985172272,
"rewards/check_gptzero_func": 0.6765602007508278,
"step": 111
},
{
"completion_length": 186.48810195922852,
"epoch": 0.31350594821553535,
"grad_norm": 0.92578125,
"kl": 0.0094451904296875,
"learning_rate": 4.339745800035652e-06,
"loss": 0.0009,
"reward": 0.6582833528518677,
"reward_std": 0.15360314585268497,
"rewards/check_gptzero_func": 0.6582833528518677,
"step": 112
},
{
"completion_length": 184.5238151550293,
"epoch": 0.31630510846745974,
"grad_norm": 0.92578125,
"kl": 0.01065826416015625,
"learning_rate": 4.323091387818459e-06,
"loss": 0.0011,
"reward": 0.5341470539569855,
"reward_std": 0.23594587668776512,
"rewards/check_gptzero_func": 0.5341470539569855,
"step": 113
},
{
"completion_length": 194.97619247436523,
"epoch": 0.3191042687193842,
"grad_norm": 0.83203125,
"kl": 0.00930023193359375,
"learning_rate": 4.306262355332006e-06,
"loss": 0.0009,
"reward": 0.6334607377648354,
"reward_std": 0.17327153496444225,
"rewards/check_gptzero_func": 0.6334607377648354,
"step": 114
},
{
"completion_length": 188.2857208251953,
"epoch": 0.3219034289713086,
"grad_norm": 0.875,
"kl": 0.01202392578125,
"learning_rate": 4.2892603145032684e-06,
"loss": 0.0012,
"reward": 0.6626207306981087,
"reward_std": 0.18220025673508644,
"rewards/check_gptzero_func": 0.6626207306981087,
"step": 115
},
{
"completion_length": 182.67857360839844,
"epoch": 0.32470258922323303,
"grad_norm": 0.88671875,
"kl": 0.01158905029296875,
"learning_rate": 4.272086893830394e-06,
"loss": 0.0012,
"reward": 0.6602620035409927,
"reward_std": 0.15469545125961304,
"rewards/check_gptzero_func": 0.6602620035409927,
"step": 116
},
{
"completion_length": 185.90476608276367,
"epoch": 0.3275017494751575,
"grad_norm": 0.83984375,
"kl": 0.011932373046875,
"learning_rate": 4.254743738226721e-06,
"loss": 0.0012,
"reward": 0.6419829577207565,
"reward_std": 0.18072698265314102,
"rewards/check_gptzero_func": 0.6419829577207565,
"step": 117
},
{
"completion_length": 189.59524154663086,
"epoch": 0.33030090972708187,
"grad_norm": 0.87109375,
"kl": 0.01031494140625,
"learning_rate": 4.237232508863226e-06,
"loss": 0.001,
"reward": 0.8117964118719101,
"reward_std": 0.11239873245358467,
"rewards/check_gptzero_func": 0.8117964118719101,
"step": 118
},
{
"completion_length": 179.75,
"epoch": 0.3331000699790063,
"grad_norm": 0.921875,
"kl": 0.01169586181640625,
"learning_rate": 4.219554883009412e-06,
"loss": 0.0012,
"reward": 0.6252808570861816,
"reward_std": 0.16097365505993366,
"rewards/check_gptzero_func": 0.6252808570861816,
"step": 119
},
{
"completion_length": 191.64286041259766,
"epoch": 0.3358992302309307,
"grad_norm": 0.9921875,
"kl": 0.013031005859375,
"learning_rate": 4.2017125538726574e-06,
"loss": 0.0013,
"reward": 0.656028687953949,
"reward_std": 0.21221196837723255,
"rewards/check_gptzero_func": 0.656028687953949,
"step": 120
},
{
"completion_length": 189.71429061889648,
"epoch": 0.33869839048285516,
"grad_norm": 0.890625,
"kl": 0.010650634765625,
"learning_rate": 4.183707230436032e-06,
"loss": 0.0011,
"reward": 0.6637793928384781,
"reward_std": 0.21517397835850716,
"rewards/check_gptzero_func": 0.6637793928384781,
"step": 121
},
{
"completion_length": 201.35714721679688,
"epoch": 0.34149755073477955,
"grad_norm": 0.8203125,
"kl": 0.00860595703125,
"learning_rate": 4.165540637294608e-06,
"loss": 0.0009,
"reward": 0.6951557993888855,
"reward_std": 0.20356887206435204,
"rewards/check_gptzero_func": 0.6951557993888855,
"step": 122
},
{
"completion_length": 174.3690528869629,
"epoch": 0.344296710986704,
"grad_norm": 1.015625,
"kl": 0.0137176513671875,
"learning_rate": 4.147214514490278e-06,
"loss": 0.0014,
"reward": 0.6053376868367195,
"reward_std": 0.14120884239673615,
"rewards/check_gptzero_func": 0.6053376868367195,
"step": 123
},
{
"completion_length": 191.35715103149414,
"epoch": 0.3470958712386284,
"grad_norm": 0.87890625,
"kl": 0.0111083984375,
"learning_rate": 4.128730617345085e-06,
"loss": 0.0011,
"reward": 0.6748835146427155,
"reward_std": 0.15946420282125473,
"rewards/check_gptzero_func": 0.6748835146427155,
"step": 124
},
{
"completion_length": 186.58333587646484,
"epoch": 0.34989503149055284,
"grad_norm": 0.8671875,
"kl": 0.01071929931640625,
"learning_rate": 4.110090716293093e-06,
"loss": 0.0011,
"reward": 0.5565099567174911,
"reward_std": 0.14936872385442257,
"rewards/check_gptzero_func": 0.5565099567174911,
"step": 125
},
{
"completion_length": 177.53571319580078,
"epoch": 0.35269419174247724,
"grad_norm": 0.88671875,
"kl": 0.0113983154296875,
"learning_rate": 4.091296596710812e-06,
"loss": 0.0011,
"reward": 0.6619952172040939,
"reward_std": 0.1810350650921464,
"rewards/check_gptzero_func": 0.6619952172040939,
"step": 126
},
{
"completion_length": 173.91666793823242,
"epoch": 0.3554933519944017,
"grad_norm": 0.90234375,
"kl": 0.0118255615234375,
"learning_rate": 4.072350058746193e-06,
"loss": 0.0012,
"reward": 0.6603459864854813,
"reward_std": 0.22001322731375694,
"rewards/check_gptzero_func": 0.6603459864854813,
"step": 127
},
{
"completion_length": 173.76190567016602,
"epoch": 0.3582925122463261,
"grad_norm": 0.9921875,
"kl": 0.0127410888671875,
"learning_rate": 4.053252917146198e-06,
"loss": 0.0013,
"reward": 0.7352914214134216,
"reward_std": 0.1779029555618763,
"rewards/check_gptzero_func": 0.7352914214134216,
"step": 128
},
{
"completion_length": 171.26190567016602,
"epoch": 0.3610916724982505,
"grad_norm": 0.84765625,
"kl": 0.012054443359375,
"learning_rate": 4.034007001082985e-06,
"loss": 0.0012,
"reward": 0.6395312622189522,
"reward_std": 0.15299177914857864,
"rewards/check_gptzero_func": 0.6395312622189522,
"step": 129
},
{
"completion_length": 179.78571701049805,
"epoch": 0.363890832750175,
"grad_norm": 0.92578125,
"kl": 0.01220703125,
"learning_rate": 4.014614153978704e-06,
"loss": 0.0012,
"reward": 0.6132207363843918,
"reward_std": 0.21152934804558754,
"rewards/check_gptzero_func": 0.6132207363843918,
"step": 130
},
{
"completion_length": 195.92857360839844,
"epoch": 0.36668999300209937,
"grad_norm": 0.78125,
"kl": 0.01036834716796875,
"learning_rate": 3.99507623332893e-06,
"loss": 0.001,
"reward": 0.7027375251054764,
"reward_std": 0.15015212446451187,
"rewards/check_gptzero_func": 0.7027375251054764,
"step": 131
},
{
"completion_length": 180.5357208251953,
"epoch": 0.3694891532540238,
"grad_norm": 0.828125,
"kl": 0.0123291015625,
"learning_rate": 3.975395110524742e-06,
"loss": 0.0012,
"reward": 0.6938442587852478,
"reward_std": 0.13277364149689674,
"rewards/check_gptzero_func": 0.6938442587852478,
"step": 132
},
{
"completion_length": 189.3333396911621,
"epoch": 0.3722883135059482,
"grad_norm": 0.890625,
"kl": 0.0106048583984375,
"learning_rate": 3.955572670673486e-06,
"loss": 0.0011,
"reward": 0.6352255120873451,
"reward_std": 0.16060136631131172,
"rewards/check_gptzero_func": 0.6352255120873451,
"step": 133
},
{
"completion_length": 180.3690528869629,
"epoch": 0.37508747375787266,
"grad_norm": 0.93359375,
"kl": 0.0124359130859375,
"learning_rate": 3.935610812418207e-06,
"loss": 0.0012,
"reward": 0.6548151075839996,
"reward_std": 0.20537016168236732,
"rewards/check_gptzero_func": 0.6548151075839996,
"step": 134
},
{
"completion_length": 167.08333587646484,
"epoch": 0.37788663400979705,
"grad_norm": 1.15625,
"kl": 0.01214599609375,
"learning_rate": 3.915511447755793e-06,
"loss": 0.0012,
"reward": 0.6178692057728767,
"reward_std": 0.17171020805835724,
"rewards/check_gptzero_func": 0.6178692057728767,
"step": 135
},
{
"completion_length": 185.71429061889648,
"epoch": 0.3806857942617215,
"grad_norm": 0.85546875,
"kl": 0.0107269287109375,
"learning_rate": 3.895276501853846e-06,
"loss": 0.0011,
"reward": 0.5686581507325172,
"reward_std": 0.17122036777436733,
"rewards/check_gptzero_func": 0.5686581507325172,
"step": 136
},
{
"completion_length": 195.21429061889648,
"epoch": 0.3834849545136459,
"grad_norm": 0.875,
"kl": 0.0097503662109375,
"learning_rate": 3.8749079128662715e-06,
"loss": 0.001,
"reward": 0.6965835765004158,
"reward_std": 0.11745740473270416,
"rewards/check_gptzero_func": 0.6965835765004158,
"step": 137
},
{
"completion_length": 192.04762649536133,
"epoch": 0.38628411476557034,
"grad_norm": 0.84375,
"kl": 0.01324462890625,
"learning_rate": 3.854407631747653e-06,
"loss": 0.0013,
"reward": 0.6779208928346634,
"reward_std": 0.17420672625303268,
"rewards/check_gptzero_func": 0.6779208928346634,
"step": 138
},
{
"completion_length": 186.09524154663086,
"epoch": 0.38908327501749473,
"grad_norm": 0.8671875,
"kl": 0.0122528076171875,
"learning_rate": 3.833777622066374e-06,
"loss": 0.0012,
"reward": 0.6140344738960266,
"reward_std": 0.1967415027320385,
"rewards/check_gptzero_func": 0.6140344738960266,
"step": 139
},
{
"completion_length": 188.20238494873047,
"epoch": 0.3918824352694192,
"grad_norm": 0.83203125,
"kl": 0.0146331787109375,
"learning_rate": 3.8130198598165447e-06,
"loss": 0.0015,
"reward": 0.7608011960983276,
"reward_std": 0.1561479065567255,
"rewards/check_gptzero_func": 0.7608011960983276,
"step": 140
},
{
"completion_length": 197.88095092773438,
"epoch": 0.3946815955213436,
"grad_norm": 0.8515625,
"kl": 0.0110931396484375,
"learning_rate": 3.7921363332287354e-06,
"loss": 0.0011,
"reward": 0.6567741185426712,
"reward_std": 0.186597790569067,
"rewards/check_gptzero_func": 0.6567741185426712,
"step": 141
},
{
"completion_length": 191.01190567016602,
"epoch": 0.397480755773268,
"grad_norm": 0.87109375,
"kl": 0.01143646240234375,
"learning_rate": 3.7711290425795453e-06,
"loss": 0.0011,
"reward": 0.769344687461853,
"reward_std": 0.14905713684856892,
"rewards/check_gptzero_func": 0.769344687461853,
"step": 142
},
{
"completion_length": 195.35714721679688,
"epoch": 0.4002799160251924,
"grad_norm": 0.91015625,
"kl": 0.01220703125,
"learning_rate": 3.7500000000000005e-06,
"loss": 0.0012,
"reward": 0.5832869336009026,
"reward_std": 0.17275189980864525,
"rewards/check_gptzero_func": 0.5832869336009026,
"step": 143
},
{
"completion_length": 187.6428565979004,
"epoch": 0.40307907627711687,
"grad_norm": 0.87890625,
"kl": 0.0144500732421875,
"learning_rate": 3.7287512292828364e-06,
"loss": 0.0014,
"reward": 0.6654903590679169,
"reward_std": 0.12800591439008713,
"rewards/check_gptzero_func": 0.6654903590679169,
"step": 144
},
{
"completion_length": 182.00000381469727,
"epoch": 0.4058782365290413,
"grad_norm": 0.87890625,
"kl": 0.014068603515625,
"learning_rate": 3.707384765688649e-06,
"loss": 0.0014,
"reward": 0.6202088594436646,
"reward_std": 0.1452749650925398,
"rewards/check_gptzero_func": 0.6202088594436646,
"step": 145
},
{
"completion_length": 183.88095474243164,
"epoch": 0.4086773967809657,
"grad_norm": 1.015625,
"kl": 0.013336181640625,
"learning_rate": 3.6859026557509525e-06,
"loss": 0.0013,
"reward": 0.6348527073860168,
"reward_std": 0.1373548824340105,
"rewards/check_gptzero_func": 0.6348527073860168,
"step": 146
},
{
"completion_length": 194.55952835083008,
"epoch": 0.41147655703289016,
"grad_norm": 1.1328125,
"kl": 0.0120391845703125,
"learning_rate": 3.6643069570801593e-06,
"loss": 0.0012,
"reward": 0.6138034015893936,
"reward_std": 0.2177984118461609,
"rewards/check_gptzero_func": 0.6138034015893936,
"step": 147
},
{
"completion_length": 203.25,
"epoch": 0.41427571728481455,
"grad_norm": 0.75,
"kl": 0.0098419189453125,
"learning_rate": 3.6425997381664955e-06,
"loss": 0.001,
"reward": 0.6724039763212204,
"reward_std": 0.14088603854179382,
"rewards/check_gptzero_func": 0.6724039763212204,
"step": 148
},
{
"completion_length": 180.29762268066406,
"epoch": 0.417074877536739,
"grad_norm": 1.078125,
"kl": 0.0143280029296875,
"learning_rate": 3.6207830781818753e-06,
"loss": 0.0014,
"reward": 0.6434099301695824,
"reward_std": 0.1766284443438053,
"rewards/check_gptzero_func": 0.6434099301695824,
"step": 149
},
{
"completion_length": 196.88095474243164,
"epoch": 0.4198740377886634,
"grad_norm": 0.796875,
"kl": 0.0113067626953125,
"learning_rate": 3.5988590667807542e-06,
"loss": 0.0011,
"reward": 0.6718230247497559,
"reward_std": 0.13120126724243164,
"rewards/check_gptzero_func": 0.6718230247497559,
"step": 150
},
{
"completion_length": 205.48809814453125,
"epoch": 0.42267319804058784,
"grad_norm": 0.8359375,
"kl": 0.0093536376953125,
"learning_rate": 3.576829803899976e-06,
"loss": 0.0009,
"reward": 0.6097076088190079,
"reward_std": 0.14795276708900928,
"rewards/check_gptzero_func": 0.6097076088190079,
"step": 151
},
{
"completion_length": 171.79762649536133,
"epoch": 0.42547235829251223,
"grad_norm": 1.1484375,
"kl": 0.014373779296875,
"learning_rate": 3.554697399557634e-06,
"loss": 0.0014,
"reward": 0.6360819041728973,
"reward_std": 0.16829469613730907,
"rewards/check_gptzero_func": 0.6360819041728973,
"step": 152
},
{
"completion_length": 185.02381134033203,
"epoch": 0.4282715185444367,
"grad_norm": 0.90234375,
"kl": 0.01239013671875,
"learning_rate": 3.532463973650971e-06,
"loss": 0.0012,
"reward": 0.4822230823338032,
"reward_std": 0.1834505433216691,
"rewards/check_gptzero_func": 0.4822230823338032,
"step": 153
},
{
"completion_length": 194.2738151550293,
"epoch": 0.4310706787963611,
"grad_norm": 0.84375,
"kl": 0.0112152099609375,
"learning_rate": 3.5101316557533293e-06,
"loss": 0.0011,
"reward": 0.6571109592914581,
"reward_std": 0.17676730267703533,
"rewards/check_gptzero_func": 0.6571109592914581,
"step": 154
},
{
"completion_length": 185.50000381469727,
"epoch": 0.4338698390482855,
"grad_norm": 0.921875,
"kl": 0.0124969482421875,
"learning_rate": 3.487702584910172e-06,
"loss": 0.0013,
"reward": 0.6433713883161545,
"reward_std": 0.18067739717662334,
"rewards/check_gptzero_func": 0.6433713883161545,
"step": 155
},
{
"completion_length": 205.00000381469727,
"epoch": 0.4366689993002099,
"grad_norm": 0.859375,
"kl": 0.00884246826171875,
"learning_rate": 3.4651789094342043e-06,
"loss": 0.0009,
"reward": 0.7346427142620087,
"reward_std": 0.15348245482891798,
"rewards/check_gptzero_func": 0.7346427142620087,
"step": 156
},
{
"completion_length": 174.3690528869629,
"epoch": 0.43946815955213436,
"grad_norm": 1.0859375,
"kl": 0.01556396484375,
"learning_rate": 3.4425627866996003e-06,
"loss": 0.0016,
"reward": 0.6470814943313599,
"reward_std": 0.15813233144581318,
"rewards/check_gptzero_func": 0.6470814943313599,
"step": 157
},
{
"completion_length": 179.40476608276367,
"epoch": 0.44226731980405876,
"grad_norm": 0.89453125,
"kl": 0.012420654296875,
"learning_rate": 3.4198563829353624e-06,
"loss": 0.0012,
"reward": 0.6753295511007309,
"reward_std": 0.19164511188864708,
"rewards/check_gptzero_func": 0.6753295511007309,
"step": 158
},
{
"completion_length": 196.00000381469727,
"epoch": 0.4450664800559832,
"grad_norm": 0.83984375,
"kl": 0.0102386474609375,
"learning_rate": 3.39706187301784e-06,
"loss": 0.001,
"reward": 0.6890220493078232,
"reward_std": 0.11566946748644114,
"rewards/check_gptzero_func": 0.6890220493078232,
"step": 159
},
{
"completion_length": 176.21428680419922,
"epoch": 0.44786564030790765,
"grad_norm": 0.99609375,
"kl": 0.0136871337890625,
"learning_rate": 3.3741814402624094e-06,
"loss": 0.0014,
"reward": 0.5286017879843712,
"reward_std": 0.22522129118442535,
"rewards/check_gptzero_func": 0.5286017879843712,
"step": 160
},
{
"completion_length": 194.3571434020996,
"epoch": 0.45066480055983205,
"grad_norm": 0.81640625,
"kl": 0.0122833251953125,
"learning_rate": 3.351217276214351e-06,
"loss": 0.0012,
"reward": 0.4715605303645134,
"reward_std": 0.18703988194465637,
"rewards/check_gptzero_func": 0.4715605303645134,
"step": 161
},
{
"completion_length": 178.90476608276367,
"epoch": 0.4534639608117565,
"grad_norm": 1.0078125,
"kl": 0.012786865234375,
"learning_rate": 3.32817158043894e-06,
"loss": 0.0013,
"reward": 0.5694275945425034,
"reward_std": 0.24036183580756187,
"rewards/check_gptzero_func": 0.5694275945425034,
"step": 162
},
{
"completion_length": 183.09524536132812,
"epoch": 0.4562631210636809,
"grad_norm": 0.98046875,
"kl": 0.0117034912109375,
"learning_rate": 3.305046560310766e-06,
"loss": 0.0012,
"reward": 0.7210999131202698,
"reward_std": 0.1915903128683567,
"rewards/check_gptzero_func": 0.7210999131202698,
"step": 163
},
{
"completion_length": 207.96428680419922,
"epoch": 0.45906228131560534,
"grad_norm": 0.84375,
"kl": 0.00853729248046875,
"learning_rate": 3.2818444308023e-06,
"loss": 0.0009,
"reward": 0.5639151483774185,
"reward_std": 0.18663722090423107,
"rewards/check_gptzero_func": 0.5639151483774185,
"step": 164
},
{
"completion_length": 195.96429061889648,
"epoch": 0.46186144156752973,
"grad_norm": 1.078125,
"kl": 0.0107574462890625,
"learning_rate": 3.2585674142717483e-06,
"loss": 0.0011,
"reward": 0.6154336631298065,
"reward_std": 0.19948378019034863,
"rewards/check_gptzero_func": 0.6154336631298065,
"step": 165
},
{
"completion_length": 196.94047927856445,
"epoch": 0.4646606018194542,
"grad_norm": 0.86328125,
"kl": 0.012969970703125,
"learning_rate": 3.2352177402501813e-06,
"loss": 0.0013,
"reward": 0.5297554209828377,
"reward_std": 0.16210689023137093,
"rewards/check_gptzero_func": 0.5297554209828377,
"step": 166
},
{
"completion_length": 181.3333396911621,
"epoch": 0.46745976207137857,
"grad_norm": 0.9609375,
"kl": 0.010589599609375,
"learning_rate": 3.2117976452279854e-06,
"loss": 0.0011,
"reward": 0.6036887094378471,
"reward_std": 0.21627848595380783,
"rewards/check_gptzero_func": 0.6036887094378471,
"step": 167
},
{
"completion_length": 198.38095474243164,
"epoch": 0.470258922323303,
"grad_norm": 0.875,
"kl": 0.0103607177734375,
"learning_rate": 3.18830937244065e-06,
"loss": 0.001,
"reward": 0.6530143320560455,
"reward_std": 0.18831264041364193,
"rewards/check_gptzero_func": 0.6530143320560455,
"step": 168
},
{
"completion_length": 192.27381134033203,
"epoch": 0.4730580825752274,
"grad_norm": 0.86328125,
"kl": 0.01263427734375,
"learning_rate": 3.1647551716539004e-06,
"loss": 0.0013,
"reward": 0.6262907981872559,
"reward_std": 0.11849029827862978,
"rewards/check_gptzero_func": 0.6262907981872559,
"step": 169
},
{
"completion_length": 203.8333396911621,
"epoch": 0.47585724282715186,
"grad_norm": 0.76171875,
"kl": 0.0087738037109375,
"learning_rate": 3.1411372989482105e-06,
"loss": 0.0009,
"reward": 0.6544186323881149,
"reward_std": 0.13314771838486195,
"rewards/check_gptzero_func": 0.6544186323881149,
"step": 170
},
{
"completion_length": 172.25000381469727,
"epoch": 0.47865640307907625,
"grad_norm": 1.0390625,
"kl": 0.01348876953125,
"learning_rate": 3.1174580165027106e-06,
"loss": 0.0014,
"reward": 0.7334302663803101,
"reward_std": 0.19380612671375275,
"rewards/check_gptzero_func": 0.7334302663803101,
"step": 171
},
{
"completion_length": 185.2738151550293,
"epoch": 0.4814555633310007,
"grad_norm": 0.9453125,
"kl": 0.0138092041015625,
"learning_rate": 3.0937195923785124e-06,
"loss": 0.0014,
"reward": 0.6392181292176247,
"reward_std": 0.20777087286114693,
"rewards/check_gptzero_func": 0.6392181292176247,
"step": 172
},
{
"completion_length": 192.71429061889648,
"epoch": 0.4842547235829251,
"grad_norm": 0.9453125,
"kl": 0.01061248779296875,
"learning_rate": 3.069924300301463e-06,
"loss": 0.0011,
"reward": 0.6806470304727554,
"reward_std": 0.21131999045610428,
"rewards/check_gptzero_func": 0.6806470304727554,
"step": 173
},
{
"completion_length": 179.77380752563477,
"epoch": 0.48705388383484954,
"grad_norm": 0.83984375,
"kl": 0.0126800537109375,
"learning_rate": 3.0460744194443658e-06,
"loss": 0.0013,
"reward": 0.47618968039751053,
"reward_std": 0.17172732576727867,
"rewards/check_gptzero_func": 0.47618968039751053,
"step": 174
},
{
"completion_length": 181.53571701049805,
"epoch": 0.489853044086774,
"grad_norm": 0.94921875,
"kl": 0.0154266357421875,
"learning_rate": 3.0221722342086762e-06,
"loss": 0.0015,
"reward": 0.7085084468126297,
"reward_std": 0.17813345789909363,
"rewards/check_gptzero_func": 0.7085084468126297,
"step": 175
},
{
"completion_length": 196.54762268066406,
"epoch": 0.4926522043386984,
"grad_norm": 0.875,
"kl": 0.00936126708984375,
"learning_rate": 2.9982200340056916e-06,
"loss": 0.0009,
"reward": 0.6359190493822098,
"reward_std": 0.16200686059892178,
"rewards/check_gptzero_func": 0.6359190493822098,
"step": 176
},
{
"completion_length": 192.2857208251953,
"epoch": 0.49545136459062283,
"grad_norm": 0.83984375,
"kl": 0.010528564453125,
"learning_rate": 2.9742201130372693e-06,
"loss": 0.0011,
"reward": 0.6544022858142853,
"reward_std": 0.20328444987535477,
"rewards/check_gptzero_func": 0.6544022858142853,
"step": 177
},
{
"completion_length": 182.63095474243164,
"epoch": 0.4982505248425472,
"grad_norm": 0.86328125,
"kl": 0.0142669677734375,
"learning_rate": 2.9501747700760834e-06,
"loss": 0.0014,
"reward": 0.5758941918611526,
"reward_std": 0.1651664450764656,
"rewards/check_gptzero_func": 0.5758941918611526,
"step": 178
},
{
"completion_length": 192.67857360839844,
"epoch": 0.5010496850944717,
"grad_norm": 0.85546875,
"kl": 0.01207733154296875,
"learning_rate": 2.9260863082454377e-06,
"loss": 0.0012,
"reward": 0.7050619274377823,
"reward_std": 0.18743818067014217,
"rewards/check_gptzero_func": 0.7050619274377823,
"step": 179
},
{
"completion_length": 181.1547622680664,
"epoch": 0.5038488453463961,
"grad_norm": 0.89453125,
"kl": 0.0139617919921875,
"learning_rate": 2.901957034798671e-06,
"loss": 0.0014,
"reward": 0.6579191908240318,
"reward_std": 0.23193923011422157,
"rewards/check_gptzero_func": 0.6579191908240318,
"step": 180
},
{
"completion_length": 198.88095474243164,
"epoch": 0.5066480055983205,
"grad_norm": 0.87890625,
"kl": 0.0117340087890625,
"learning_rate": 2.8777892608981605e-06,
"loss": 0.0012,
"reward": 0.7810440808534622,
"reward_std": 0.14343063719570637,
"rewards/check_gptzero_func": 0.7810440808534622,
"step": 181
},
{
"completion_length": 183.89286041259766,
"epoch": 0.509447165850245,
"grad_norm": 0.83203125,
"kl": 0.0113983154296875,
"learning_rate": 2.853585301393954e-06,
"loss": 0.0012,
"reward": 0.5110857635736465,
"reward_std": 0.17657889798283577,
"rewards/check_gptzero_func": 0.5110857635736465,
"step": 182
},
{
"completion_length": 183.53571701049805,
"epoch": 0.5122463261021694,
"grad_norm": 1.046875,
"kl": 0.0143585205078125,
"learning_rate": 2.829347474602047e-06,
"loss": 0.0014,
"reward": 0.7601535469293594,
"reward_std": 0.13078506011515856,
"rewards/check_gptzero_func": 0.7601535469293594,
"step": 183
},
{
"completion_length": 191.05952835083008,
"epoch": 0.5150454863540938,
"grad_norm": 1.0390625,
"kl": 0.01151275634765625,
"learning_rate": 2.80507810208233e-06,
"loss": 0.0012,
"reward": 0.7207075506448746,
"reward_std": 0.23219925537705421,
"rewards/check_gptzero_func": 0.7207075506448746,
"step": 184
},
{
"completion_length": 174.4761962890625,
"epoch": 0.5178446466060181,
"grad_norm": 1.0,
"kl": 0.0149383544921875,
"learning_rate": 2.780779508416219e-06,
"loss": 0.0015,
"reward": 0.7076038122177124,
"reward_std": 0.15643260441720486,
"rewards/check_gptzero_func": 0.7076038122177124,
"step": 185
},
{
"completion_length": 186.65476608276367,
"epoch": 0.5206438068579426,
"grad_norm": 0.87890625,
"kl": 0.0148773193359375,
"learning_rate": 2.756454020984009e-06,
"loss": 0.0015,
"reward": 0.6423147022724152,
"reward_std": 0.12505882722325623,
"rewards/check_gptzero_func": 0.6423147022724152,
"step": 186
},
{
"completion_length": 197.96429061889648,
"epoch": 0.523442967109867,
"grad_norm": 0.84375,
"kl": 0.010894775390625,
"learning_rate": 2.7321039697419453e-06,
"loss": 0.0011,
"reward": 0.5366896614432335,
"reward_std": 0.1639111079275608,
"rewards/check_gptzero_func": 0.5366896614432335,
"step": 187
},
{
"completion_length": 180.73810195922852,
"epoch": 0.5262421273617914,
"grad_norm": 0.89453125,
"kl": 0.0135650634765625,
"learning_rate": 2.707731686999056e-06,
"loss": 0.0014,
"reward": 0.7638429999351501,
"reward_std": 0.11215963400900364,
"rewards/check_gptzero_func": 0.7638429999351501,
"step": 188
},
{
"completion_length": 177.96429061889648,
"epoch": 0.5290412876137159,
"grad_norm": 0.90625,
"kl": 0.0150146484375,
"learning_rate": 2.68333950719376e-06,
"loss": 0.0015,
"reward": 0.7288801521062851,
"reward_std": 0.13601511158049107,
"rewards/check_gptzero_func": 0.7288801521062851,
"step": 189
},
{
"completion_length": 197.54762268066406,
"epoch": 0.5318404478656403,
"grad_norm": 0.80859375,
"kl": 0.0102691650390625,
"learning_rate": 2.658929766670266e-06,
"loss": 0.0011,
"reward": 0.5935259684920311,
"reward_std": 0.12766608223319054,
"rewards/check_gptzero_func": 0.5935259684920311,
"step": 190
},
{
"completion_length": 189.6547622680664,
"epoch": 0.5346396081175647,
"grad_norm": 0.859375,
"kl": 0.0122833251953125,
"learning_rate": 2.63450480345479e-06,
"loss": 0.0012,
"reward": 0.7497572600841522,
"reward_std": 0.13245987240225077,
"rewards/check_gptzero_func": 0.7497572600841522,
"step": 191
},
{
"completion_length": 180.51190948486328,
"epoch": 0.5374387683694891,
"grad_norm": 0.91015625,
"kl": 0.0144500732421875,
"learning_rate": 2.6100669570316194e-06,
"loss": 0.0014,
"reward": 0.7178633213043213,
"reward_std": 0.1744341142475605,
"rewards/check_gptzero_func": 0.7178633213043213,
"step": 192
},
{
"completion_length": 195.26190567016602,
"epoch": 0.5402379286214136,
"grad_norm": 0.81640625,
"kl": 0.012542724609375,
"learning_rate": 2.585618568119027e-06,
"loss": 0.0013,
"reward": 0.6315608844161034,
"reward_std": 0.14562865998595953,
"rewards/check_gptzero_func": 0.6315608844161034,
"step": 193
},
{
"completion_length": 190.47619247436523,
"epoch": 0.543037088873338,
"grad_norm": 0.94140625,
"kl": 0.0131378173828125,
"learning_rate": 2.561161978445068e-06,
"loss": 0.0013,
"reward": 0.65364570915699,
"reward_std": 0.14526648819446564,
"rewards/check_gptzero_func": 0.65364570915699,
"step": 194
},
{
"completion_length": 160.94047927856445,
"epoch": 0.5458362491252624,
"grad_norm": 1.0234375,
"kl": 0.020477294921875,
"learning_rate": 2.536699530523292e-06,
"loss": 0.0021,
"reward": 0.7076306045055389,
"reward_std": 0.15630067139863968,
"rewards/check_gptzero_func": 0.7076306045055389,
"step": 195
},
{
"completion_length": 184.70238494873047,
"epoch": 0.5486354093771868,
"grad_norm": 0.94921875,
"kl": 0.0129241943359375,
"learning_rate": 2.5122335674283625e-06,
"loss": 0.0013,
"reward": 0.5801831930875778,
"reward_std": 0.18576505780220032,
"rewards/check_gptzero_func": 0.5801831930875778,
"step": 196
},
{
"completion_length": 186.60714721679688,
"epoch": 0.5514345696291113,
"grad_norm": 0.87890625,
"kl": 0.0130615234375,
"learning_rate": 2.4877664325716383e-06,
"loss": 0.0013,
"reward": 0.7721930146217346,
"reward_std": 0.1962270326912403,
"rewards/check_gptzero_func": 0.7721930146217346,
"step": 197
},
{
"completion_length": 182.42857360839844,
"epoch": 0.5542337298810357,
"grad_norm": 0.9765625,
"kl": 0.01416015625,
"learning_rate": 2.463300469476709e-06,
"loss": 0.0014,
"reward": 0.6073248982429504,
"reward_std": 0.17869799211621284,
"rewards/check_gptzero_func": 0.6073248982429504,
"step": 198
},
{
"completion_length": 178.32143020629883,
"epoch": 0.5570328901329601,
"grad_norm": 1.0546875,
"kl": 0.01686859130859375,
"learning_rate": 2.4388380215549332e-06,
"loss": 0.0017,
"reward": 0.670776292681694,
"reward_std": 0.18726542592048645,
"rewards/check_gptzero_func": 0.670776292681694,
"step": 199
},
{
"completion_length": 193.4047622680664,
"epoch": 0.5598320503848845,
"grad_norm": 0.8046875,
"kl": 0.0124969482421875,
"learning_rate": 2.414381431880974e-06,
"loss": 0.0013,
"reward": 0.5982818156480789,
"reward_std": 0.16670218110084534,
"rewards/check_gptzero_func": 0.5982818156480789,
"step": 200
},
{
"completion_length": 190.5833396911621,
"epoch": 0.562631210636809,
"grad_norm": 0.8046875,
"kl": 0.0111846923828125,
"learning_rate": 2.389933042968381e-06,
"loss": 0.0011,
"reward": 0.7207561880350113,
"reward_std": 0.1551688564941287,
"rewards/check_gptzero_func": 0.7207561880350113,
"step": 201
},
{
"completion_length": 183.8690528869629,
"epoch": 0.5654303708887334,
"grad_norm": 0.859375,
"kl": 0.014251708984375,
"learning_rate": 2.365495196545211e-06,
"loss": 0.0014,
"reward": 0.6653933525085449,
"reward_std": 0.18065885081887245,
"rewards/check_gptzero_func": 0.6653933525085449,
"step": 202
},
{
"completion_length": 191.30952835083008,
"epoch": 0.5682295311406578,
"grad_norm": 0.8828125,
"kl": 0.0137786865234375,
"learning_rate": 2.3410702333297358e-06,
"loss": 0.0014,
"reward": 0.7060705721378326,
"reward_std": 0.14498403668403625,
"rewards/check_gptzero_func": 0.7060705721378326,
"step": 203
},
{
"completion_length": 170.05952835083008,
"epoch": 0.5710286913925823,
"grad_norm": 0.88671875,
"kl": 0.0150909423828125,
"learning_rate": 2.3166604928062407e-06,
"loss": 0.0015,
"reward": 0.6380977034568787,
"reward_std": 0.14762726612389088,
"rewards/check_gptzero_func": 0.6380977034568787,
"step": 204
},
{
"completion_length": 187.05952835083008,
"epoch": 0.5738278516445067,
"grad_norm": 0.82421875,
"kl": 0.0121002197265625,
"learning_rate": 2.292268313000945e-06,
"loss": 0.0012,
"reward": 0.7264238968491554,
"reward_std": 0.12679270654916763,
"rewards/check_gptzero_func": 0.7264238968491554,
"step": 205
},
{
"completion_length": 183.28571701049805,
"epoch": 0.5766270118964311,
"grad_norm": 0.8828125,
"kl": 0.0143280029296875,
"learning_rate": 2.267896030258056e-06,
"loss": 0.0014,
"reward": 0.6818206459283829,
"reward_std": 0.1636413224041462,
"rewards/check_gptzero_func": 0.6818206459283829,
"step": 206
},
{
"completion_length": 177.84524154663086,
"epoch": 0.5794261721483555,
"grad_norm": 0.8984375,
"kl": 0.0160980224609375,
"learning_rate": 2.243545979015992e-06,
"loss": 0.0016,
"reward": 0.5764013379812241,
"reward_std": 0.17973252199590206,
"rewards/check_gptzero_func": 0.5764013379812241,
"step": 207
},
{
"completion_length": 189.97619247436523,
"epoch": 0.58222533240028,
"grad_norm": 0.7734375,
"kl": 0.010650634765625,
"learning_rate": 2.219220491583782e-06,
"loss": 0.0011,
"reward": 0.6200987994670868,
"reward_std": 0.15654520690441132,
"rewards/check_gptzero_func": 0.6200987994670868,
"step": 208
},
{
"completion_length": 174.96429061889648,
"epoch": 0.5850244926522044,
"grad_norm": 0.9609375,
"kl": 0.0125885009765625,
"learning_rate": 2.1949218979176718e-06,
"loss": 0.0013,
"reward": 0.7681840658187866,
"reward_std": 0.17539203353226185,
"rewards/check_gptzero_func": 0.7681840658187866,
"step": 209
},
{
"completion_length": 160.4047622680664,
"epoch": 0.5878236529041287,
"grad_norm": 1.0546875,
"kl": 0.016204833984375,
"learning_rate": 2.1706525253979533e-06,
"loss": 0.0016,
"reward": 0.6341628283262253,
"reward_std": 0.17086376622319221,
"rewards/check_gptzero_func": 0.6341628283262253,
"step": 210
},
{
"completion_length": 184.96429061889648,
"epoch": 0.5906228131560531,
"grad_norm": 1.1015625,
"kl": 0.012847900390625,
"learning_rate": 2.146414698606047e-06,
"loss": 0.0013,
"reward": 0.6099446341395378,
"reward_std": 0.26053616404533386,
"rewards/check_gptzero_func": 0.6099446341395378,
"step": 211
},
{
"completion_length": 173.38095474243164,
"epoch": 0.5934219734079776,
"grad_norm": 1.0234375,
"kl": 0.0145111083984375,
"learning_rate": 2.1222107391018403e-06,
"loss": 0.0015,
"reward": 0.6794377863407135,
"reward_std": 0.1412256360054016,
"rewards/check_gptzero_func": 0.6794377863407135,
"step": 212
},
{
"completion_length": 190.47619247436523,
"epoch": 0.596221133659902,
"grad_norm": 0.87890625,
"kl": 0.013580322265625,
"learning_rate": 2.09804296520133e-06,
"loss": 0.0014,
"reward": 0.5798576474189758,
"reward_std": 0.1953704133629799,
"rewards/check_gptzero_func": 0.5798576474189758,
"step": 213
},
{
"completion_length": 199.23809814453125,
"epoch": 0.5990202939118264,
"grad_norm": 0.890625,
"kl": 0.010894775390625,
"learning_rate": 2.0739136917545636e-06,
"loss": 0.0011,
"reward": 0.5883476734161377,
"reward_std": 0.19741847924888134,
"rewards/check_gptzero_func": 0.5883476734161377,
"step": 214
},
{
"completion_length": 194.63095474243164,
"epoch": 0.6018194541637508,
"grad_norm": 0.796875,
"kl": 0.0112457275390625,
"learning_rate": 2.0498252299239175e-06,
"loss": 0.0011,
"reward": 0.739928811788559,
"reward_std": 0.17222343757748604,
"rewards/check_gptzero_func": 0.739928811788559,
"step": 215
},
{
"completion_length": 181.78571701049805,
"epoch": 0.6046186144156753,
"grad_norm": 0.90234375,
"kl": 0.0135040283203125,
"learning_rate": 2.025779886962731e-06,
"loss": 0.0014,
"reward": 0.7212615758180618,
"reward_std": 0.11260060407221317,
"rewards/check_gptzero_func": 0.7212615758180618,
"step": 216
},
{
"completion_length": 202.21428680419922,
"epoch": 0.6074177746675997,
"grad_norm": 0.8671875,
"kl": 0.0114288330078125,
"learning_rate": 2.00177996599431e-06,
"loss": 0.0012,
"reward": 0.6235032379627228,
"reward_std": 0.20338322408497334,
"rewards/check_gptzero_func": 0.6235032379627228,
"step": 217
},
{
"completion_length": 164.75000381469727,
"epoch": 0.6102169349195241,
"grad_norm": 0.90625,
"kl": 0.0148773193359375,
"learning_rate": 1.9778277657913246e-06,
"loss": 0.0015,
"reward": 0.8035698980093002,
"reward_std": 0.12413663975894451,
"rewards/check_gptzero_func": 0.8035698980093002,
"step": 218
},
{
"completion_length": 180.75000381469727,
"epoch": 0.6130160951714486,
"grad_norm": 0.9296875,
"kl": 0.01385498046875,
"learning_rate": 1.9539255805556346e-06,
"loss": 0.0014,
"reward": 0.6889385357499123,
"reward_std": 0.13095776550471783,
"rewards/check_gptzero_func": 0.6889385357499123,
"step": 219
},
{
"completion_length": 193.34524536132812,
"epoch": 0.615815255423373,
"grad_norm": 0.9296875,
"kl": 0.0128631591796875,
"learning_rate": 1.9300756996985383e-06,
"loss": 0.0013,
"reward": 0.6453644558787346,
"reward_std": 0.1899284292012453,
"rewards/check_gptzero_func": 0.6453644558787346,
"step": 220
},
{
"completion_length": 187.0714340209961,
"epoch": 0.6186144156752974,
"grad_norm": 0.90625,
"kl": 0.011749267578125,
"learning_rate": 1.9062804076214889e-06,
"loss": 0.0012,
"reward": 0.8237078785896301,
"reward_std": 0.10535579361021519,
"rewards/check_gptzero_func": 0.8237078785896301,
"step": 221
},
{
"completion_length": 203.67856979370117,
"epoch": 0.6214135759272218,
"grad_norm": 0.890625,
"kl": 0.00980377197265625,
"learning_rate": 1.8825419834972902e-06,
"loss": 0.001,
"reward": 0.5983466356992722,
"reward_std": 0.22100866585969925,
"rewards/check_gptzero_func": 0.5983466356992722,
"step": 222
},
{
"completion_length": 173.16666793823242,
"epoch": 0.6242127361791463,
"grad_norm": 0.93359375,
"kl": 0.0145721435546875,
"learning_rate": 1.8588627010517912e-06,
"loss": 0.0015,
"reward": 0.6490365564823151,
"reward_std": 0.215117909014225,
"rewards/check_gptzero_func": 0.6490365564823151,
"step": 223
},
{
"completion_length": 188.9166717529297,
"epoch": 0.6270118964310707,
"grad_norm": 0.8515625,
"kl": 0.01177978515625,
"learning_rate": 1.835244828346101e-06,
"loss": 0.0012,
"reward": 0.6734350174665451,
"reward_std": 0.17084914818406105,
"rewards/check_gptzero_func": 0.6734350174665451,
"step": 224
},
{
"completion_length": 176.91666793823242,
"epoch": 0.6298110566829951,
"grad_norm": 1.078125,
"kl": 0.0153656005859375,
"learning_rate": 1.811690627559351e-06,
"loss": 0.0015,
"reward": 0.7332676947116852,
"reward_std": 0.2090182527899742,
"rewards/check_gptzero_func": 0.7332676947116852,
"step": 225
},
{
"completion_length": 201.5357208251953,
"epoch": 0.6326102169349195,
"grad_norm": 0.78125,
"kl": 0.0106964111328125,
"learning_rate": 1.7882023547720156e-06,
"loss": 0.0011,
"reward": 0.5684466883540154,
"reward_std": 0.19617649912834167,
"rewards/check_gptzero_func": 0.5684466883540154,
"step": 226
},
{
"completion_length": 193.35714721679688,
"epoch": 0.635409377186844,
"grad_norm": 0.80078125,
"kl": 0.01116943359375,
"learning_rate": 1.7647822597498204e-06,
"loss": 0.0011,
"reward": 0.6939697265625,
"reward_std": 0.08986328635364771,
"rewards/check_gptzero_func": 0.6939697265625,
"step": 227
},
{
"completion_length": 169.85714721679688,
"epoch": 0.6382085374387684,
"grad_norm": 1.0078125,
"kl": 0.0150146484375,
"learning_rate": 1.7414325857282528e-06,
"loss": 0.0015,
"reward": 0.6364180445671082,
"reward_std": 0.211682990193367,
"rewards/check_gptzero_func": 0.6364180445671082,
"step": 228
},
{
"completion_length": 206.7023811340332,
"epoch": 0.6410076976906928,
"grad_norm": 0.79296875,
"kl": 0.0116424560546875,
"learning_rate": 1.718155569197701e-06,
"loss": 0.0012,
"reward": 0.6597686931490898,
"reward_std": 0.14480283856391907,
"rewards/check_gptzero_func": 0.6597686931490898,
"step": 229
},
{
"completion_length": 196.5238151550293,
"epoch": 0.6438068579426172,
"grad_norm": 0.91796875,
"kl": 0.01229095458984375,
"learning_rate": 1.6949534396892358e-06,
"loss": 0.0013,
"reward": 0.7675946801900864,
"reward_std": 0.08783328998833895,
"rewards/check_gptzero_func": 0.7675946801900864,
"step": 230
},
{
"completion_length": 185.5238151550293,
"epoch": 0.6466060181945417,
"grad_norm": 0.88671875,
"kl": 0.0150909423828125,
"learning_rate": 1.6718284195610607e-06,
"loss": 0.0015,
"reward": 0.5960735529661179,
"reward_std": 0.17222833260893822,
"rewards/check_gptzero_func": 0.5960735529661179,
"step": 231
},
{
"completion_length": 202.67857360839844,
"epoch": 0.6494051784464661,
"grad_norm": 0.859375,
"kl": 0.0126190185546875,
"learning_rate": 1.6487827237856503e-06,
"loss": 0.0013,
"reward": 0.6651804447174072,
"reward_std": 0.19576009269803762,
"rewards/check_gptzero_func": 0.6651804447174072,
"step": 232
},
{
"completion_length": 193.60714721679688,
"epoch": 0.6522043386983905,
"grad_norm": 0.83984375,
"kl": 0.00897216796875,
"learning_rate": 1.6258185597375919e-06,
"loss": 0.0009,
"reward": 0.6716840863227844,
"reward_std": 0.14614208973944187,
"rewards/check_gptzero_func": 0.6716840863227844,
"step": 233
},
{
"completion_length": 178.55952835083008,
"epoch": 0.655003498950315,
"grad_norm": 0.9921875,
"kl": 0.0128173828125,
"learning_rate": 1.6029381269821607e-06,
"loss": 0.0013,
"reward": 0.8011642247438431,
"reward_std": 0.17047418653964996,
"rewards/check_gptzero_func": 0.8011642247438431,
"step": 234
},
{
"completion_length": 184.25000381469727,
"epoch": 0.6578026592022393,
"grad_norm": 0.9375,
"kl": 0.0157012939453125,
"learning_rate": 1.5801436170646386e-06,
"loss": 0.0016,
"reward": 0.7015040963888168,
"reward_std": 0.12212707288563251,
"rewards/check_gptzero_func": 0.7015040963888168,
"step": 235
},
{
"completion_length": 184.33333587646484,
"epoch": 0.6606018194541637,
"grad_norm": 0.89453125,
"kl": 0.01324462890625,
"learning_rate": 1.5574372133004012e-06,
"loss": 0.0014,
"reward": 0.8126765042543411,
"reward_std": 0.1537869544699788,
"rewards/check_gptzero_func": 0.8126765042543411,
"step": 236
},
{
"completion_length": 203.8095245361328,
"epoch": 0.6634009797060881,
"grad_norm": 0.87890625,
"kl": 0.0106201171875,
"learning_rate": 1.5348210905657962e-06,
"loss": 0.0011,
"reward": 0.7201628535985947,
"reward_std": 0.164525730535388,
"rewards/check_gptzero_func": 0.7201628535985947,
"step": 237
},
{
"completion_length": 182.16666793823242,
"epoch": 0.6662001399580126,
"grad_norm": 0.96875,
"kl": 0.01495361328125,
"learning_rate": 1.512297415089829e-06,
"loss": 0.0015,
"reward": 0.7377509474754333,
"reward_std": 0.1474976148456335,
"rewards/check_gptzero_func": 0.7377509474754333,
"step": 238
},
{
"completion_length": 171.9166717529297,
"epoch": 0.668999300209937,
"grad_norm": 0.91796875,
"kl": 0.0146942138671875,
"learning_rate": 1.4898683442466715e-06,
"loss": 0.0015,
"reward": 0.6876519098877907,
"reward_std": 0.17211773619055748,
"rewards/check_gptzero_func": 0.6876519098877907,
"step": 239
},
{
"completion_length": 200.85714721679688,
"epoch": 0.6717984604618614,
"grad_norm": 0.8671875,
"kl": 0.010711669921875,
"learning_rate": 1.4675360263490296e-06,
"loss": 0.0011,
"reward": 0.6152354925870895,
"reward_std": 0.16585622262209654,
"rewards/check_gptzero_func": 0.6152354925870895,
"step": 240
},
{
"completion_length": 188.75000381469727,
"epoch": 0.6745976207137858,
"grad_norm": 0.86328125,
"kl": 0.0137176513671875,
"learning_rate": 1.4453026004423664e-06,
"loss": 0.0014,
"reward": 0.6593173295259476,
"reward_std": 0.1960100382566452,
"rewards/check_gptzero_func": 0.6593173295259476,
"step": 241
},
{
"completion_length": 188.8214340209961,
"epoch": 0.6773967809657103,
"grad_norm": 0.80078125,
"kl": 0.0121612548828125,
"learning_rate": 1.4231701961000256e-06,
"loss": 0.0012,
"reward": 0.7077113464474678,
"reward_std": 0.08986913226544857,
"rewards/check_gptzero_func": 0.7077113464474678,
"step": 242
},
{
"completion_length": 192.89286041259766,
"epoch": 0.6801959412176347,
"grad_norm": 0.79296875,
"kl": 0.01239013671875,
"learning_rate": 1.4011409332192472e-06,
"loss": 0.0012,
"reward": 0.7247354537248611,
"reward_std": 0.16528335958719254,
"rewards/check_gptzero_func": 0.7247354537248611,
"step": 243
},
{
"completion_length": 197.1190528869629,
"epoch": 0.6829951014695591,
"grad_norm": 0.82421875,
"kl": 0.01019287109375,
"learning_rate": 1.379216921818126e-06,
"loss": 0.001,
"reward": 0.6609435975551605,
"reward_std": 0.20204732194542885,
"rewards/check_gptzero_func": 0.6609435975551605,
"step": 244
},
{
"completion_length": 184.35714721679688,
"epoch": 0.6857942617214835,
"grad_norm": 1.015625,
"kl": 0.0119171142578125,
"learning_rate": 1.3574002618335055e-06,
"loss": 0.0012,
"reward": 0.6553308963775635,
"reward_std": 0.18806752562522888,
"rewards/check_gptzero_func": 0.6553308963775635,
"step": 245
},
{
"completion_length": 189.16666793823242,
"epoch": 0.688593421973408,
"grad_norm": 0.859375,
"kl": 0.01226043701171875,
"learning_rate": 1.335693042919841e-06,
"loss": 0.0012,
"reward": 0.6279339641332626,
"reward_std": 0.1844564937055111,
"rewards/check_gptzero_func": 0.6279339641332626,
"step": 246
},
{
"completion_length": 192.04762268066406,
"epoch": 0.6913925822253324,
"grad_norm": 0.8359375,
"kl": 0.01172637939453125,
"learning_rate": 1.314097344249048e-06,
"loss": 0.0012,
"reward": 0.6242645084857941,
"reward_std": 0.1471152976155281,
"rewards/check_gptzero_func": 0.6242645084857941,
"step": 247
},
{
"completion_length": 191.33333587646484,
"epoch": 0.6941917424772568,
"grad_norm": 0.90234375,
"kl": 0.0138397216796875,
"learning_rate": 1.2926152343113525e-06,
"loss": 0.0014,
"reward": 0.5745993703603745,
"reward_std": 0.15316335577517748,
"rewards/check_gptzero_func": 0.5745993703603745,
"step": 248
},
{
"completion_length": 210.39286041259766,
"epoch": 0.6969909027291813,
"grad_norm": 0.8203125,
"kl": 0.00994873046875,
"learning_rate": 1.2712487707171645e-06,
"loss": 0.001,
"reward": 0.7345138937234879,
"reward_std": 0.1424336303025484,
"rewards/check_gptzero_func": 0.7345138937234879,
"step": 249
},
{
"completion_length": 191.95238494873047,
"epoch": 0.6997900629811057,
"grad_norm": 0.88671875,
"kl": 0.0122833251953125,
"learning_rate": 1.2500000000000007e-06,
"loss": 0.0012,
"reward": 0.755773201584816,
"reward_std": 0.14831538125872612,
"rewards/check_gptzero_func": 0.755773201584816,
"step": 250
},
{
"completion_length": 175.86904907226562,
"epoch": 0.7025892232330301,
"grad_norm": 1.09375,
"kl": 0.01434326171875,
"learning_rate": 1.2288709574204561e-06,
"loss": 0.0014,
"reward": 0.635523222386837,
"reward_std": 0.21710951253771782,
"rewards/check_gptzero_func": 0.635523222386837,
"step": 251
},
{
"completion_length": 191.80952835083008,
"epoch": 0.7053883834849545,
"grad_norm": 0.94140625,
"kl": 0.013092041015625,
"learning_rate": 1.2078636667712648e-06,
"loss": 0.0013,
"reward": 0.6860392540693283,
"reward_std": 0.16151536628603935,
"rewards/check_gptzero_func": 0.6860392540693283,
"step": 252
},
{
"completion_length": 211.09524536132812,
"epoch": 0.708187543736879,
"grad_norm": 0.80078125,
"kl": 0.0114593505859375,
"learning_rate": 1.1869801401834563e-06,
"loss": 0.0012,
"reward": 0.5822405442595482,
"reward_std": 0.1631794534623623,
"rewards/check_gptzero_func": 0.5822405442595482,
"step": 253
},
{
"completion_length": 198.07143020629883,
"epoch": 0.7109867039888034,
"grad_norm": 0.92578125,
"kl": 0.0108795166015625,
"learning_rate": 1.1662223779336272e-06,
"loss": 0.0011,
"reward": 0.6223282963037491,
"reward_std": 0.23277926445007324,
"rewards/check_gptzero_func": 0.6223282963037491,
"step": 254
},
{
"completion_length": 170.01190567016602,
"epoch": 0.7137858642407278,
"grad_norm": 0.796875,
"kl": 0.0137481689453125,
"learning_rate": 1.1455923682523476e-06,
"loss": 0.0014,
"reward": 0.7666629701852798,
"reward_std": 0.12203127704560757,
"rewards/check_gptzero_func": 0.7666629701852798,
"step": 255
},
{
"completion_length": 182.51190948486328,
"epoch": 0.7165850244926522,
"grad_norm": 0.9296875,
"kl": 0.0144500732421875,
"learning_rate": 1.1250920871337296e-06,
"loss": 0.0014,
"reward": 0.5818550065159798,
"reward_std": 0.21111036837100983,
"rewards/check_gptzero_func": 0.5818550065159798,
"step": 256
},
{
"completion_length": 178.8690528869629,
"epoch": 0.7193841847445767,
"grad_norm": 0.81640625,
"kl": 0.013885498046875,
"learning_rate": 1.104723498146156e-06,
"loss": 0.0014,
"reward": 0.5987424030900002,
"reward_std": 0.21208756789565086,
"rewards/check_gptzero_func": 0.5987424030900002,
"step": 257
},
{
"completion_length": 173.29762268066406,
"epoch": 0.722183344996501,
"grad_norm": 0.83984375,
"kl": 0.0154876708984375,
"learning_rate": 1.0844885522442076e-06,
"loss": 0.0016,
"reward": 0.7769442051649094,
"reward_std": 0.09991182293742895,
"rewards/check_gptzero_func": 0.7769442051649094,
"step": 258
},
{
"completion_length": 194.32143020629883,
"epoch": 0.7249825052484254,
"grad_norm": 0.8046875,
"kl": 0.0103759765625,
"learning_rate": 1.064389187581794e-06,
"loss": 0.001,
"reward": 0.6640415489673615,
"reward_std": 0.09457994624972343,
"rewards/check_gptzero_func": 0.6640415489673615,
"step": 259
},
{
"completion_length": 191.02381134033203,
"epoch": 0.72778166550035,
"grad_norm": 0.859375,
"kl": 0.012786865234375,
"learning_rate": 1.044427329326515e-06,
"loss": 0.0013,
"reward": 0.6643179804086685,
"reward_std": 0.1974339596927166,
"rewards/check_gptzero_func": 0.6643179804086685,
"step": 260
},
{
"completion_length": 182.0238151550293,
"epoch": 0.7305808257522743,
"grad_norm": 0.984375,
"kl": 0.0133209228515625,
"learning_rate": 1.024604889475259e-06,
"loss": 0.0013,
"reward": 0.7576928585767746,
"reward_std": 0.17764172703027725,
"rewards/check_gptzero_func": 0.7576928585767746,
"step": 261
},
{
"completion_length": 197.76190567016602,
"epoch": 0.7333799860041987,
"grad_norm": 0.79296875,
"kl": 0.011962890625,
"learning_rate": 1.0049237666710713e-06,
"loss": 0.0012,
"reward": 0.6300367414951324,
"reward_std": 0.1478472277522087,
"rewards/check_gptzero_func": 0.6300367414951324,
"step": 262
},
{
"completion_length": 203.90476608276367,
"epoch": 0.7361791462561231,
"grad_norm": 0.7734375,
"kl": 0.00923919677734375,
"learning_rate": 9.853858460212961e-07,
"loss": 0.0009,
"reward": 0.7148824632167816,
"reward_std": 0.13644199073314667,
"rewards/check_gptzero_func": 0.7148824632167816,
"step": 263
},
{
"completion_length": 161.45238494873047,
"epoch": 0.7389783065080476,
"grad_norm": 0.9453125,
"kl": 0.0169677734375,
"learning_rate": 9.659929989170156e-07,
"loss": 0.0017,
"reward": 0.6777283996343613,
"reward_std": 0.1693093739449978,
"rewards/check_gptzero_func": 0.6777283996343613,
"step": 264
},
{
"completion_length": 187.28571701049805,
"epoch": 0.741777466759972,
"grad_norm": 0.86328125,
"kl": 0.01239776611328125,
"learning_rate": 9.467470828538028e-07,
"loss": 0.0012,
"reward": 0.6955645084381104,
"reward_std": 0.11928121093660593,
"rewards/check_gptzero_func": 0.6955645084381104,
"step": 265
},
{
"completion_length": 199.65476608276367,
"epoch": 0.7445766270118964,
"grad_norm": 0.81640625,
"kl": 0.0113372802734375,
"learning_rate": 9.276499412538082e-07,
"loss": 0.0011,
"reward": 0.6984945237636566,
"reward_std": 0.12425749842077494,
"rewards/check_gptzero_func": 0.6984945237636566,
"step": 266
},
{
"completion_length": 191.65476989746094,
"epoch": 0.7473757872638208,
"grad_norm": 0.9375,
"kl": 0.0126953125,
"learning_rate": 9.087034032891884e-07,
"loss": 0.0013,
"reward": 0.5988369584083557,
"reward_std": 0.22750091180205345,
"rewards/check_gptzero_func": 0.5988369584083557,
"step": 267
},
{
"completion_length": 190.25000381469727,
"epoch": 0.7501749475157453,
"grad_norm": 0.90625,
"kl": 0.0141143798828125,
"learning_rate": 8.899092837069081e-07,
"loss": 0.0014,
"reward": 0.7432132065296173,
"reward_std": 0.1290474236011505,
"rewards/check_gptzero_func": 0.7432132065296173,
"step": 268
},
{
"completion_length": 177.40476989746094,
"epoch": 0.7529741077676697,
"grad_norm": 0.875,
"kl": 0.013031005859375,
"learning_rate": 8.71269382654916e-07,
"loss": 0.0013,
"reward": 0.6381399929523468,
"reward_std": 0.13523080106824636,
"rewards/check_gptzero_func": 0.6381399929523468,
"step": 269
},
{
"completion_length": 182.97619247436523,
"epoch": 0.7557732680195941,
"grad_norm": 0.984375,
"kl": 0.01318359375,
"learning_rate": 8.527854855097226e-07,
"loss": 0.0013,
"reward": 0.6455244570970535,
"reward_std": 0.142228739336133,
"rewards/check_gptzero_func": 0.6455244570970535,
"step": 270
},
{
"completion_length": 179.0,
"epoch": 0.7585724282715185,
"grad_norm": 0.8515625,
"kl": 0.01285552978515625,
"learning_rate": 8.344593627053926e-07,
"loss": 0.0013,
"reward": 0.6351892277598381,
"reward_std": 0.16661302000284195,
"rewards/check_gptzero_func": 0.6351892277598381,
"step": 271
},
{
"completion_length": 203.76190948486328,
"epoch": 0.761371588523443,
"grad_norm": 0.79296875,
"kl": 0.01068878173828125,
"learning_rate": 8.162927695639699e-07,
"loss": 0.0011,
"reward": 0.6384782642126083,
"reward_std": 0.17978323996067047,
"rewards/check_gptzero_func": 0.6384782642126083,
"step": 272
},
{
"completion_length": 189.80952835083008,
"epoch": 0.7641707487753674,
"grad_norm": 0.9921875,
"kl": 0.0136566162109375,
"learning_rate": 7.982874461273438e-07,
"loss": 0.0014,
"reward": 0.5513089373707771,
"reward_std": 0.21978427842259407,
"rewards/check_gptzero_func": 0.5513089373707771,
"step": 273
},
{
"completion_length": 182.42857360839844,
"epoch": 0.7669699090272918,
"grad_norm": 1.015625,
"kl": 0.0137786865234375,
"learning_rate": 7.804451169905882e-07,
"loss": 0.0014,
"reward": 0.6128961741924286,
"reward_std": 0.1331999460235238,
"rewards/check_gptzero_func": 0.6128961741924286,
"step": 274
},
{
"completion_length": 185.23809814453125,
"epoch": 0.7697690692792163,
"grad_norm": 0.94921875,
"kl": 0.0132598876953125,
"learning_rate": 7.627674911367747e-07,
"loss": 0.0013,
"reward": 0.686809316277504,
"reward_std": 0.1671704165637493,
"rewards/check_gptzero_func": 0.686809316277504,
"step": 275
},
{
"completion_length": 184.73810195922852,
"epoch": 0.7725682295311407,
"grad_norm": 0.96875,
"kl": 0.0143890380859375,
"learning_rate": 7.452562617732795e-07,
"loss": 0.0014,
"reward": 0.6087932512164116,
"reward_std": 0.18696350045502186,
"rewards/check_gptzero_func": 0.6087932512164116,
"step": 276
},
{
"completion_length": 186.3928565979004,
"epoch": 0.7753673897830651,
"grad_norm": 0.9140625,
"kl": 0.01251220703125,
"learning_rate": 7.279131061696062e-07,
"loss": 0.0013,
"reward": 0.6093617677688599,
"reward_std": 0.16512912511825562,
"rewards/check_gptzero_func": 0.6093617677688599,
"step": 277
},
{
"completion_length": 194.29762649536133,
"epoch": 0.7781665500349895,
"grad_norm": 0.84375,
"kl": 0.012054443359375,
"learning_rate": 7.107396854967322e-07,
"loss": 0.0013,
"reward": 0.6530048102140427,
"reward_std": 0.1365496888756752,
"rewards/check_gptzero_func": 0.6530048102140427,
"step": 278
},
{
"completion_length": 185.23809814453125,
"epoch": 0.780965710286914,
"grad_norm": 0.8984375,
"kl": 0.0136260986328125,
"learning_rate": 6.93737644667995e-07,
"loss": 0.0014,
"reward": 0.5730803310871124,
"reward_std": 0.23035263270139694,
"rewards/check_gptzero_func": 0.5730803310871124,
"step": 279
},
{
"completion_length": 196.7738151550293,
"epoch": 0.7837648705388384,
"grad_norm": 0.8984375,
"kl": 0.01300048828125,
"learning_rate": 6.769086121815424e-07,
"loss": 0.0013,
"reward": 0.7231508791446686,
"reward_std": 0.1479925811290741,
"rewards/check_gptzero_func": 0.7231508791446686,
"step": 280
},
{
"completion_length": 173.71428680419922,
"epoch": 0.7865640307907628,
"grad_norm": 0.890625,
"kl": 0.0151519775390625,
"learning_rate": 6.602541999643486e-07,
"loss": 0.0015,
"reward": 0.714839443564415,
"reward_std": 0.18807288724929094,
"rewards/check_gptzero_func": 0.714839443564415,
"step": 281
},
{
"completion_length": 181.16666793823242,
"epoch": 0.7893631910426872,
"grad_norm": 0.890625,
"kl": 0.0139617919921875,
"learning_rate": 6.4377600321782e-07,
"loss": 0.0014,
"reward": 0.6251032203435898,
"reward_std": 0.18542934395372868,
"rewards/check_gptzero_func": 0.6251032203435898,
"step": 282
},
{
"completion_length": 183.8571434020996,
"epoch": 0.7921623512946117,
"grad_norm": 0.765625,
"kl": 0.0149688720703125,
"learning_rate": 6.274756002650034e-07,
"loss": 0.0015,
"reward": 0.6130138486623764,
"reward_std": 0.12219419237226248,
"rewards/check_gptzero_func": 0.6130138486623764,
"step": 283
},
{
"completion_length": 191.50000381469727,
"epoch": 0.794961511546536,
"grad_norm": 0.86328125,
"kl": 0.014312744140625,
"learning_rate": 6.11354552399408e-07,
"loss": 0.0014,
"reward": 0.5668933913111687,
"reward_std": 0.19107018411159515,
"rewards/check_gptzero_func": 0.5668933913111687,
"step": 284
},
{
"completion_length": 200.03571701049805,
"epoch": 0.7977606717984604,
"grad_norm": 0.875,
"kl": 0.012359619140625,
"learning_rate": 5.954144037354645e-07,
"loss": 0.0012,
"reward": 0.6846682727336884,
"reward_std": 0.1509340275079012,
"rewards/check_gptzero_func": 0.6846682727336884,
"step": 285
},
{
"completion_length": 189.97619247436523,
"epoch": 0.8005598320503848,
"grad_norm": 0.93359375,
"kl": 0.014251708984375,
"learning_rate": 5.796566810606227e-07,
"loss": 0.0014,
"reward": 0.6802646964788437,
"reward_std": 0.17098304629325867,
"rewards/check_gptzero_func": 0.6802646964788437,
"step": 286
},
{
"completion_length": 178.20238876342773,
"epoch": 0.8033589923023093,
"grad_norm": 0.91796875,
"kl": 0.0152130126953125,
"learning_rate": 5.640828936891144e-07,
"loss": 0.0015,
"reward": 0.5891979560256004,
"reward_std": 0.18455617874860764,
"rewards/check_gptzero_func": 0.5891979560256004,
"step": 287
},
{
"completion_length": 169.11904907226562,
"epoch": 0.8061581525542337,
"grad_norm": 1.0546875,
"kl": 0.0149993896484375,
"learning_rate": 5.486945333173852e-07,
"loss": 0.0015,
"reward": 0.6522376388311386,
"reward_std": 0.16162380203604698,
"rewards/check_gptzero_func": 0.6522376388311386,
"step": 288
},
{
"completion_length": 190.00000381469727,
"epoch": 0.8089573128061581,
"grad_norm": 0.9140625,
"kl": 0.010894775390625,
"learning_rate": 5.334930738812188e-07,
"loss": 0.0011,
"reward": 0.6731359958648682,
"reward_std": 0.14657550491392612,
"rewards/check_gptzero_func": 0.6731359958648682,
"step": 289
},
{
"completion_length": 187.7976188659668,
"epoch": 0.8117564730580826,
"grad_norm": 1.109375,
"kl": 0.013214111328125,
"learning_rate": 5.184799714145558e-07,
"loss": 0.0013,
"reward": 0.6758114099502563,
"reward_std": 0.1736396849155426,
"rewards/check_gptzero_func": 0.6758114099502563,
"step": 290
},
{
"completion_length": 200.3690528869629,
"epoch": 0.814555633310007,
"grad_norm": 0.85546875,
"kl": 0.011383056640625,
"learning_rate": 5.036566639100351e-07,
"loss": 0.0011,
"reward": 0.7276384383440018,
"reward_std": 0.12691646441817284,
"rewards/check_gptzero_func": 0.7276384383440018,
"step": 291
},
{
"completion_length": 169.78571319580078,
"epoch": 0.8173547935619314,
"grad_norm": 1.0234375,
"kl": 0.0178375244140625,
"learning_rate": 4.890245711812577e-07,
"loss": 0.0018,
"reward": 0.746677041053772,
"reward_std": 0.22585053741931915,
"rewards/check_gptzero_func": 0.746677041053772,
"step": 292
},
{
"completion_length": 187.03571319580078,
"epoch": 0.8201539538138558,
"grad_norm": 0.88671875,
"kl": 0.01416015625,
"learning_rate": 4.74585094726793e-07,
"loss": 0.0014,
"reward": 0.7156971842050552,
"reward_std": 0.18219392374157906,
"rewards/check_gptzero_func": 0.7156971842050552,
"step": 293
},
{
"completion_length": 176.9523811340332,
"epoch": 0.8229531140657803,
"grad_norm": 0.96875,
"kl": 0.0149993896484375,
"learning_rate": 4.6033961759594045e-07,
"loss": 0.0015,
"reward": 0.6982993930578232,
"reward_std": 0.18162141740322113,
"rewards/check_gptzero_func": 0.6982993930578232,
"step": 294
},
{
"completion_length": 175.6666717529297,
"epoch": 0.8257522743177047,
"grad_norm": 0.92578125,
"kl": 0.01531982421875,
"learning_rate": 4.462895042562576e-07,
"loss": 0.0015,
"reward": 0.7019955068826675,
"reward_std": 0.16880467906594276,
"rewards/check_gptzero_func": 0.7019955068826675,
"step": 295
},
{
"completion_length": 173.41666793823242,
"epoch": 0.8285514345696291,
"grad_norm": 1.1484375,
"kl": 0.0169219970703125,
"learning_rate": 4.324361004628658e-07,
"loss": 0.0017,
"reward": 0.6873890459537506,
"reward_std": 0.1467819530516863,
"rewards/check_gptzero_func": 0.6873890459537506,
"step": 296
},
{
"completion_length": 188.85714721679688,
"epoch": 0.8313505948215535,
"grad_norm": 0.93359375,
"kl": 0.0130767822265625,
"learning_rate": 4.1878073312955486e-07,
"loss": 0.0013,
"reward": 0.6262213513255119,
"reward_std": 0.15299665369093418,
"rewards/check_gptzero_func": 0.6262213513255119,
"step": 297
},
{
"completion_length": 201.33333587646484,
"epoch": 0.834149755073478,
"grad_norm": 0.87890625,
"kl": 0.01001739501953125,
"learning_rate": 4.0532471020168386e-07,
"loss": 0.001,
"reward": 0.708094909787178,
"reward_std": 0.15163133665919304,
"rewards/check_gptzero_func": 0.708094909787178,
"step": 298
},
{
"completion_length": 192.10714721679688,
"epoch": 0.8369489153254024,
"grad_norm": 0.9921875,
"kl": 0.0134735107421875,
"learning_rate": 3.920693205309048e-07,
"loss": 0.0013,
"reward": 0.5948657244443893,
"reward_std": 0.20524189993739128,
"rewards/check_gptzero_func": 0.5948657244443893,
"step": 299
},
{
"completion_length": 196.40476608276367,
"epoch": 0.8397480755773268,
"grad_norm": 0.86328125,
"kl": 0.015106201171875,
"learning_rate": 3.7901583375171277e-07,
"loss": 0.0015,
"reward": 0.6475347355008125,
"reward_std": 0.13522333092987537,
"rewards/check_gptzero_func": 0.6475347355008125,
"step": 300
}
],
"logging_steps": 1,
"max_steps": 357,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 50,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}