| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.8391608391608392, | |
| "eval_steps": 500, | |
| "global_step": 600, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 94.14286041259766, | |
| "epoch": 0.0013986013986013986, | |
| "grad_norm": 0.09098726750251449, | |
| "kl": 0.0003147125244140625, | |
| "learning_rate": 3.496503496503497e-07, | |
| "loss": -0.0001, | |
| "num_tokens": 19338.0, | |
| "reward": 1.3853150606155396, | |
| "reward_std": 0.4827312231063843, | |
| "rewards/check_gptzero_func": 0.3214285671710968, | |
| "rewards/check_perplexity_diff_func": 0.6428571343421936, | |
| "rewards/check_winston_local_func": 0.42102929949760437, | |
| "step": 1 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.002797202797202797, | |
| "grad_norm": 0.09100260462019598, | |
| "kl": 0.0003147125244140625, | |
| "learning_rate": 6.993006993006994e-07, | |
| "loss": -0.0001, | |
| "step": 2 | |
| }, | |
| { | |
| "clip_ratio": 0.0031990089919418097, | |
| "completion_length": 97.73214721679688, | |
| "epoch": 0.004195804195804196, | |
| "grad_norm": 0.16978190242842756, | |
| "kl": 0.0003662109375, | |
| "learning_rate": 1.0489510489510491e-06, | |
| "loss": -0.0017, | |
| "num_tokens": 39487.0, | |
| "reward": 0.9710169434547424, | |
| "reward_std": 0.6376833319664001, | |
| "rewards/check_gptzero_func": 0.125, | |
| "rewards/check_perplexity_diff_func": 0.5, | |
| "rewards/check_winston_local_func": 0.34601688385009766, | |
| "step": 3 | |
| }, | |
| { | |
| "clip_ratio": 0.0018373647471889853, | |
| "epoch": 0.005594405594405594, | |
| "grad_norm": 0.17663660241053314, | |
| "kl": 0.000423431396484375, | |
| "learning_rate": 1.3986013986013987e-06, | |
| "loss": -0.0013, | |
| "step": 4 | |
| }, | |
| { | |
| "clip_ratio": 0.0016871786210685968, | |
| "completion_length": 72.83928680419922, | |
| "epoch": 0.006993006993006993, | |
| "grad_norm": 0.09466520543424979, | |
| "kl": 0.00034332275390625, | |
| "learning_rate": 1.7482517482517485e-06, | |
| "loss": 0.0008, | |
| "num_tokens": 56082.0, | |
| "reward": 1.0736479759216309, | |
| "reward_std": 0.6862176656723022, | |
| "rewards/check_gptzero_func": 0.2321428507566452, | |
| "rewards/check_perplexity_diff_func": 0.3928571343421936, | |
| "rewards/check_winston_local_func": 0.4486479163169861, | |
| "step": 5 | |
| }, | |
| { | |
| "clip_ratio": 0.0010098000057041645, | |
| "epoch": 0.008391608391608392, | |
| "grad_norm": 0.09539550614656178, | |
| "kl": 0.0003376007080078125, | |
| "learning_rate": 2.0979020979020983e-06, | |
| "loss": 0.001, | |
| "step": 6 | |
| }, | |
| { | |
| "clip_ratio": 0.00184189947322011, | |
| "completion_length": 95.92857360839844, | |
| "epoch": 0.009790209790209791, | |
| "grad_norm": 0.10573631695411624, | |
| "kl": 0.0003757476806640625, | |
| "learning_rate": 2.4475524475524477e-06, | |
| "loss": 0.001, | |
| "num_tokens": 75906.0, | |
| "reward": 1.135968804359436, | |
| "reward_std": 0.48974505066871643, | |
| "rewards/check_gptzero_func": 0.2142857164144516, | |
| "rewards/check_perplexity_diff_func": 0.5, | |
| "rewards/check_winston_local_func": 0.42168304324150085, | |
| "step": 7 | |
| }, | |
| { | |
| "clip_ratio": 0.0018530809320509434, | |
| "epoch": 0.011188811188811189, | |
| "grad_norm": 0.10614532740192534, | |
| "kl": 0.0004119873046875, | |
| "learning_rate": 2.7972027972027974e-06, | |
| "loss": 0.0011, | |
| "step": 8 | |
| }, | |
| { | |
| "clip_ratio": 0.0017129909247159958, | |
| "completion_length": 106.76786041259766, | |
| "epoch": 0.012587412587412588, | |
| "grad_norm": 0.07916450383171113, | |
| "kl": 0.000377655029296875, | |
| "learning_rate": 3.1468531468531472e-06, | |
| "loss": -0.0012, | |
| "num_tokens": 96989.0, | |
| "reward": 0.9590713977813721, | |
| "reward_std": 0.5206417441368103, | |
| "rewards/check_gptzero_func": 0.2321428507566452, | |
| "rewards/check_perplexity_diff_func": 0.3928571343421936, | |
| "rewards/check_winston_local_func": 0.3340713679790497, | |
| "step": 9 | |
| }, | |
| { | |
| "clip_ratio": 0.0034840807784348726, | |
| "epoch": 0.013986013986013986, | |
| "grad_norm": 0.08061029037170311, | |
| "kl": 0.00037384033203125, | |
| "learning_rate": 3.496503496503497e-06, | |
| "loss": -0.0008, | |
| "step": 10 | |
| }, | |
| { | |
| "clip_ratio": 0.0017933619674295187, | |
| "completion_length": 102.85714721679688, | |
| "epoch": 0.015384615384615385, | |
| "grad_norm": 0.07930375667041344, | |
| "kl": 0.00030517578125, | |
| "learning_rate": 3.846153846153847e-06, | |
| "loss": -0.0038, | |
| "num_tokens": 117271.0, | |
| "reward": 1.1903148889541626, | |
| "reward_std": 0.4722224473953247, | |
| "rewards/check_gptzero_func": 0.2857142984867096, | |
| "rewards/check_perplexity_diff_func": 0.5357142686843872, | |
| "rewards/check_winston_local_func": 0.368886262178421, | |
| "step": 11 | |
| }, | |
| { | |
| "clip_ratio": 0.0015239757485687733, | |
| "epoch": 0.016783216783216783, | |
| "grad_norm": 0.07898151410343454, | |
| "kl": 0.0003871917724609375, | |
| "learning_rate": 4.195804195804197e-06, | |
| "loss": -0.0041, | |
| "step": 12 | |
| }, | |
| { | |
| "clip_ratio": 0.0029670600779354572, | |
| "completion_length": 104.37500762939453, | |
| "epoch": 0.01818181818181818, | |
| "grad_norm": 0.09247854491588844, | |
| "kl": 0.0003986358642578125, | |
| "learning_rate": 4.5454545454545455e-06, | |
| "loss": -0.0049, | |
| "num_tokens": 138174.0, | |
| "reward": 0.9787324070930481, | |
| "reward_std": 0.63657546043396, | |
| "rewards/check_gptzero_func": 0.1964285671710968, | |
| "rewards/check_perplexity_diff_func": 0.5714285969734192, | |
| "rewards/check_winston_local_func": 0.21087531745433807, | |
| "step": 13 | |
| }, | |
| { | |
| "clip_ratio": 0.003217793768271804, | |
| "epoch": 0.019580419580419582, | |
| "grad_norm": 0.09293749933034297, | |
| "kl": 0.0003204345703125, | |
| "learning_rate": 4.895104895104895e-06, | |
| "loss": -0.0045, | |
| "step": 14 | |
| }, | |
| { | |
| "clip_ratio": 0.002677519340068102, | |
| "completion_length": 67.41072082519531, | |
| "epoch": 0.02097902097902098, | |
| "grad_norm": 0.14020673218962584, | |
| "kl": 0.000499725341796875, | |
| "learning_rate": 5.244755244755245e-06, | |
| "loss": -0.008, | |
| "num_tokens": 154385.0, | |
| "reward": 1.0573337078094482, | |
| "reward_std": 0.5388314127922058, | |
| "rewards/check_gptzero_func": 0.2857142984867096, | |
| "rewards/check_perplexity_diff_func": 0.3571428656578064, | |
| "rewards/check_winston_local_func": 0.41447654366493225, | |
| "step": 15 | |
| }, | |
| { | |
| "clip_ratio": 0.0021408216562122107, | |
| "epoch": 0.022377622377622378, | |
| "grad_norm": 0.16110555979669544, | |
| "kl": 0.000530242919921875, | |
| "learning_rate": 5.594405594405595e-06, | |
| "loss": -0.0081, | |
| "step": 16 | |
| }, | |
| { | |
| "clip_ratio": 0.002852825215086341, | |
| "completion_length": 89.83928680419922, | |
| "epoch": 0.023776223776223775, | |
| "grad_norm": 0.1066932843002634, | |
| "kl": 0.000415802001953125, | |
| "learning_rate": 5.944055944055945e-06, | |
| "loss": 0.0166, | |
| "num_tokens": 173158.0, | |
| "reward": 1.2226747274398804, | |
| "reward_std": 0.45578521490097046, | |
| "rewards/check_gptzero_func": 0.3392857015132904, | |
| "rewards/check_perplexity_diff_func": 0.5, | |
| "rewards/check_winston_local_func": 0.3833889663219452, | |
| "step": 17 | |
| }, | |
| { | |
| "clip_ratio": 0.002018069615587592, | |
| "epoch": 0.025174825174825177, | |
| "grad_norm": 0.10578254537451186, | |
| "kl": 0.0003833770751953125, | |
| "learning_rate": 6.2937062937062944e-06, | |
| "loss": 0.0164, | |
| "step": 18 | |
| }, | |
| { | |
| "clip_ratio": 0.0032663617748767138, | |
| "completion_length": 105.83928680419922, | |
| "epoch": 0.026573426573426574, | |
| "grad_norm": 0.11554707163597037, | |
| "kl": 0.0003719329833984375, | |
| "learning_rate": 6.643356643356643e-06, | |
| "loss": -0.0005, | |
| "num_tokens": 194649.0, | |
| "reward": 0.8957912921905518, | |
| "reward_std": 0.47702187299728394, | |
| "rewards/check_gptzero_func": 0.1785714328289032, | |
| "rewards/check_perplexity_diff_func": 0.3571428656578064, | |
| "rewards/check_winston_local_func": 0.3600769340991974, | |
| "step": 19 | |
| }, | |
| { | |
| "clip_ratio": 0.0024422537535429, | |
| "epoch": 0.027972027972027972, | |
| "grad_norm": 0.11417666172406592, | |
| "kl": 0.0003814697265625, | |
| "learning_rate": 6.993006993006994e-06, | |
| "loss": -0.0008, | |
| "step": 20 | |
| }, | |
| { | |
| "clip_ratio": 0.0013639701064676046, | |
| "completion_length": 107.39286041259766, | |
| "epoch": 0.02937062937062937, | |
| "grad_norm": 0.09634196153318929, | |
| "kl": 0.000396728515625, | |
| "learning_rate": 7.342657342657343e-06, | |
| "loss": -0.0011, | |
| "num_tokens": 215987.0, | |
| "reward": 0.9197577238082886, | |
| "reward_std": 0.39100831747055054, | |
| "rewards/check_gptzero_func": 0.2321428507566452, | |
| "rewards/check_perplexity_diff_func": 0.3571428656578064, | |
| "rewards/check_winston_local_func": 0.33047202229499817, | |
| "step": 21 | |
| }, | |
| { | |
| "clip_ratio": 0.003595889313146472, | |
| "epoch": 0.03076923076923077, | |
| "grad_norm": 0.09779857975187953, | |
| "kl": 0.00052642822265625, | |
| "learning_rate": 7.692307692307694e-06, | |
| "loss": -0.0013, | |
| "step": 22 | |
| }, | |
| { | |
| "clip_ratio": 0.0024316231720149517, | |
| "completion_length": 89.08928680419922, | |
| "epoch": 0.032167832167832165, | |
| "grad_norm": 0.13189477135999747, | |
| "kl": 0.0003414154052734375, | |
| "learning_rate": 8.041958041958042e-06, | |
| "loss": -0.0009, | |
| "num_tokens": 234678.0, | |
| "reward": 0.9593304395675659, | |
| "reward_std": 0.5972030758857727, | |
| "rewards/check_gptzero_func": 0.1964285671710968, | |
| "rewards/check_perplexity_diff_func": 0.4285714328289032, | |
| "rewards/check_winston_local_func": 0.33433040976524353, | |
| "step": 23 | |
| }, | |
| { | |
| "clip_ratio": 0.000728283659555018, | |
| "epoch": 0.033566433566433566, | |
| "grad_norm": 0.12953291972672934, | |
| "kl": 0.000335693359375, | |
| "learning_rate": 8.391608391608393e-06, | |
| "loss": -0.0011, | |
| "step": 24 | |
| }, | |
| { | |
| "clip_ratio": 0.0021546650677919388, | |
| "completion_length": 90.98214721679688, | |
| "epoch": 0.03496503496503497, | |
| "grad_norm": 0.07682474826469807, | |
| "kl": 0.0004558563232421875, | |
| "learning_rate": 8.741258741258741e-06, | |
| "loss": 0.0077, | |
| "num_tokens": 253833.0, | |
| "reward": 0.8255766034126282, | |
| "reward_std": 0.3988816440105438, | |
| "rewards/check_gptzero_func": 0.2678571343421936, | |
| "rewards/check_perplexity_diff_func": 0.2857142984867096, | |
| "rewards/check_winston_local_func": 0.272005170583725, | |
| "step": 25 | |
| }, | |
| { | |
| "clip_ratio": 0.0019250252516940236, | |
| "epoch": 0.03636363636363636, | |
| "grad_norm": 0.07873511079065725, | |
| "kl": 0.0003490447998046875, | |
| "learning_rate": 9.090909090909091e-06, | |
| "loss": 0.0075, | |
| "step": 26 | |
| }, | |
| { | |
| "clip_ratio": 0.001886485842987895, | |
| "completion_length": 74.4464340209961, | |
| "epoch": 0.03776223776223776, | |
| "grad_norm": 0.06765277577538054, | |
| "kl": 0.000354766845703125, | |
| "learning_rate": 9.44055944055944e-06, | |
| "loss": -0.0003, | |
| "num_tokens": 270908.0, | |
| "reward": 1.3086735010147095, | |
| "reward_std": 0.3881511986255646, | |
| "rewards/check_gptzero_func": 0.4107142984867096, | |
| "rewards/check_perplexity_diff_func": 0.4642857015132904, | |
| "rewards/check_winston_local_func": 0.4336733818054199, | |
| "step": 27 | |
| }, | |
| { | |
| "clip_ratio": 0.0020898371003568172, | |
| "epoch": 0.039160839160839164, | |
| "grad_norm": 0.06722581226675682, | |
| "kl": 0.000453948974609375, | |
| "learning_rate": 9.79020979020979e-06, | |
| "loss": -0.0006, | |
| "step": 28 | |
| }, | |
| { | |
| "clip_ratio": 0.002855924190953374, | |
| "completion_length": 103.64286041259766, | |
| "epoch": 0.04055944055944056, | |
| "grad_norm": 0.16301390061045865, | |
| "kl": 0.0003948211669921875, | |
| "learning_rate": 1.013986013986014e-05, | |
| "loss": 0.0063, | |
| "num_tokens": 292060.0, | |
| "reward": 1.22488272190094, | |
| "reward_std": 0.8093323111534119, | |
| "rewards/check_gptzero_func": 0.2678571343421936, | |
| "rewards/check_perplexity_diff_func": 0.6071428656578064, | |
| "rewards/check_winston_local_func": 0.34988275170326233, | |
| "step": 29 | |
| }, | |
| { | |
| "clip_ratio": 0.0016208746237680316, | |
| "epoch": 0.04195804195804196, | |
| "grad_norm": 0.17335125351231856, | |
| "kl": 0.000507354736328125, | |
| "learning_rate": 1.048951048951049e-05, | |
| "loss": 0.0059, | |
| "step": 30 | |
| }, | |
| { | |
| "clip_ratio": 0.0011572305811569095, | |
| "completion_length": 82.91072082519531, | |
| "epoch": 0.043356643356643354, | |
| "grad_norm": 0.11830097508590764, | |
| "kl": 0.0006256103515625, | |
| "learning_rate": 1.0839160839160838e-05, | |
| "loss": 0.002, | |
| "num_tokens": 310163.0, | |
| "reward": 0.9069231748580933, | |
| "reward_std": 0.4621748924255371, | |
| "rewards/check_gptzero_func": 0.25, | |
| "rewards/check_perplexity_diff_func": 0.3571428656578064, | |
| "rewards/check_winston_local_func": 0.29978030920028687, | |
| "step": 31 | |
| }, | |
| { | |
| "clip_ratio": 0.00402362085878849, | |
| "epoch": 0.044755244755244755, | |
| "grad_norm": 0.11861334422671065, | |
| "kl": 0.000762939453125, | |
| "learning_rate": 1.118881118881119e-05, | |
| "loss": 0.0019, | |
| "step": 32 | |
| }, | |
| { | |
| "clip_ratio": 0.0017474278574809432, | |
| "completion_length": 90.9464340209961, | |
| "epoch": 0.046153846153846156, | |
| "grad_norm": 0.1025496804751534, | |
| "kl": 0.000701904296875, | |
| "learning_rate": 1.153846153846154e-05, | |
| "loss": -0.0014, | |
| "num_tokens": 328922.0, | |
| "reward": 1.1198338270187378, | |
| "reward_std": 0.5045111775398254, | |
| "rewards/check_gptzero_func": 0.0892857164144516, | |
| "rewards/check_perplexity_diff_func": 0.7857142686843872, | |
| "rewards/check_winston_local_func": 0.24483375251293182, | |
| "step": 33 | |
| }, | |
| { | |
| "clip_ratio": 0.0012307984288781881, | |
| "epoch": 0.04755244755244755, | |
| "grad_norm": 0.10274008519956918, | |
| "kl": 0.00087738037109375, | |
| "learning_rate": 1.188811188811189e-05, | |
| "loss": -0.0015, | |
| "step": 34 | |
| }, | |
| { | |
| "clip_ratio": 0.0029886537231504917, | |
| "completion_length": 93.00000762939453, | |
| "epoch": 0.04895104895104895, | |
| "grad_norm": 0.1311802256012859, | |
| "kl": 0.001190185546875, | |
| "learning_rate": 1.2237762237762239e-05, | |
| "loss": -0.0141, | |
| "num_tokens": 348852.0, | |
| "reward": 0.9456299543380737, | |
| "reward_std": 0.6374980807304382, | |
| "rewards/check_gptzero_func": 0.2321428507566452, | |
| "rewards/check_perplexity_diff_func": 0.4285714328289032, | |
| "rewards/check_winston_local_func": 0.28491565585136414, | |
| "step": 35 | |
| }, | |
| { | |
| "clip_ratio": 0.0028984802775084972, | |
| "epoch": 0.05034965034965035, | |
| "grad_norm": 0.13267305607921692, | |
| "kl": 0.00145721435546875, | |
| "learning_rate": 1.2587412587412589e-05, | |
| "loss": -0.0147, | |
| "step": 36 | |
| }, | |
| { | |
| "clip_ratio": 0.0018746532732620835, | |
| "completion_length": 89.85714721679688, | |
| "epoch": 0.05174825174825175, | |
| "grad_norm": 0.1497182815907709, | |
| "kl": 0.002044677734375, | |
| "learning_rate": 1.2937062937062939e-05, | |
| "loss": -0.0253, | |
| "num_tokens": 367482.0, | |
| "reward": 1.2605940103530884, | |
| "reward_std": 0.66156405210495, | |
| "rewards/check_gptzero_func": 0.1785714328289032, | |
| "rewards/check_perplexity_diff_func": 0.75, | |
| "rewards/check_winston_local_func": 0.3320225179195404, | |
| "step": 37 | |
| }, | |
| { | |
| "clip_ratio": 0.0020914783235639334, | |
| "epoch": 0.05314685314685315, | |
| "grad_norm": 0.15027543310666272, | |
| "kl": 0.0026092529296875, | |
| "learning_rate": 1.3286713286713287e-05, | |
| "loss": -0.0257, | |
| "step": 38 | |
| }, | |
| { | |
| "clip_ratio": 0.0017028081929311156, | |
| "completion_length": 101.58928680419922, | |
| "epoch": 0.05454545454545454, | |
| "grad_norm": 0.1306748742984494, | |
| "kl": 0.0027008056640625, | |
| "learning_rate": 1.3636363636363637e-05, | |
| "loss": -0.0091, | |
| "num_tokens": 388365.0, | |
| "reward": 1.1187876462936401, | |
| "reward_std": 0.4044211804866791, | |
| "rewards/check_gptzero_func": 0.2321428507566452, | |
| "rewards/check_perplexity_diff_func": 0.5357142686843872, | |
| "rewards/check_winston_local_func": 0.3509305417537689, | |
| "step": 39 | |
| }, | |
| { | |
| "clip_ratio": 0.003386714030057192, | |
| "epoch": 0.055944055944055944, | |
| "grad_norm": 0.12880289464580882, | |
| "kl": 0.003265380859375, | |
| "learning_rate": 1.3986013986013988e-05, | |
| "loss": -0.0086, | |
| "step": 40 | |
| }, | |
| { | |
| "clip_ratio": 0.0024523374158889055, | |
| "completion_length": 100.41072082519531, | |
| "epoch": 0.057342657342657345, | |
| "grad_norm": 0.12385816200639352, | |
| "kl": 0.004547119140625, | |
| "learning_rate": 1.4335664335664336e-05, | |
| "loss": -0.0161, | |
| "num_tokens": 409074.0, | |
| "reward": 0.804709792137146, | |
| "reward_std": 0.5129754543304443, | |
| "rewards/check_gptzero_func": 0.1428571492433548, | |
| "rewards/check_perplexity_diff_func": 0.3928571343421936, | |
| "rewards/check_winston_local_func": 0.2689954936504364, | |
| "step": 41 | |
| }, | |
| { | |
| "clip_ratio": 0.004267544951289892, | |
| "epoch": 0.05874125874125874, | |
| "grad_norm": 0.12301160152684054, | |
| "kl": 0.005218505859375, | |
| "learning_rate": 1.4685314685314686e-05, | |
| "loss": -0.0161, | |
| "step": 42 | |
| }, | |
| { | |
| "clip_ratio": 0.0019341636216267943, | |
| "completion_length": 90.46428680419922, | |
| "epoch": 0.06013986013986014, | |
| "grad_norm": 0.1390908713431379, | |
| "kl": 0.0048828125, | |
| "learning_rate": 1.5034965034965034e-05, | |
| "loss": 0.0068, | |
| "num_tokens": 428262.0, | |
| "reward": 1.087609887123108, | |
| "reward_std": 0.6448312997817993, | |
| "rewards/check_gptzero_func": 0.2321428507566452, | |
| "rewards/check_perplexity_diff_func": 0.3928571343421936, | |
| "rewards/check_winston_local_func": 0.46260982751846313, | |
| "step": 43 | |
| }, | |
| { | |
| "clip_ratio": 0.0016832423862069845, | |
| "epoch": 0.06153846153846154, | |
| "grad_norm": 0.13873080950458352, | |
| "kl": 0.00567626953125, | |
| "learning_rate": 1.5384615384615387e-05, | |
| "loss": 0.0065, | |
| "step": 44 | |
| }, | |
| { | |
| "clip_ratio": 0.0019954824820160866, | |
| "completion_length": 96.92857360839844, | |
| "epoch": 0.06293706293706294, | |
| "grad_norm": 0.11630124487143968, | |
| "kl": 0.00897216796875, | |
| "learning_rate": 1.5734265734265734e-05, | |
| "loss": 0.008, | |
| "num_tokens": 448480.0, | |
| "reward": 1.0604994297027588, | |
| "reward_std": 0.5378546118736267, | |
| "rewards/check_gptzero_func": 0.2321428507566452, | |
| "rewards/check_perplexity_diff_func": 0.4642857015132904, | |
| "rewards/check_winston_local_func": 0.36407074332237244, | |
| "step": 45 | |
| }, | |
| { | |
| "clip_ratio": 0.002663462422788143, | |
| "epoch": 0.06433566433566433, | |
| "grad_norm": 0.1149796219701467, | |
| "kl": 0.01007080078125, | |
| "learning_rate": 1.6083916083916083e-05, | |
| "loss": 0.0081, | |
| "step": 46 | |
| }, | |
| { | |
| "clip_ratio": 0.0018039485439658165, | |
| "completion_length": 79.51786041259766, | |
| "epoch": 0.06573426573426573, | |
| "grad_norm": 0.12721969778213826, | |
| "kl": 0.01123046875, | |
| "learning_rate": 1.6433566433566433e-05, | |
| "loss": 0.0112, | |
| "num_tokens": 465671.0, | |
| "reward": 0.9921315312385559, | |
| "reward_std": 0.38579052686691284, | |
| "rewards/check_gptzero_func": 0.1607142835855484, | |
| "rewards/check_perplexity_diff_func": 0.3928571343421936, | |
| "rewards/check_winston_local_func": 0.43856000900268555, | |
| "step": 47 | |
| }, | |
| { | |
| "clip_ratio": 0.0027954555116593838, | |
| "epoch": 0.06713286713286713, | |
| "grad_norm": 0.12930272448754576, | |
| "kl": 0.01300048828125, | |
| "learning_rate": 1.6783216783216786e-05, | |
| "loss": 0.0107, | |
| "step": 48 | |
| }, | |
| { | |
| "clip_ratio": 0.0028269642498344183, | |
| "completion_length": 116.5714340209961, | |
| "epoch": 0.06853146853146853, | |
| "grad_norm": 0.12430052891449103, | |
| "kl": 0.01226806640625, | |
| "learning_rate": 1.7132867132867133e-05, | |
| "loss": 0.025, | |
| "num_tokens": 488367.0, | |
| "reward": 0.8830849528312683, | |
| "reward_std": 0.5276607871055603, | |
| "rewards/check_gptzero_func": 0.25, | |
| "rewards/check_perplexity_diff_func": 0.3214285671710968, | |
| "rewards/check_winston_local_func": 0.31165632605552673, | |
| "step": 49 | |
| }, | |
| { | |
| "clip_ratio": 0.0023628019262105227, | |
| "epoch": 0.06993006993006994, | |
| "grad_norm": 0.12293590613717971, | |
| "kl": 0.0142822265625, | |
| "learning_rate": 1.7482517482517483e-05, | |
| "loss": 0.0249, | |
| "step": 50 | |
| }, | |
| { | |
| "clip_ratio": 0.0033425339497625828, | |
| "completion_length": 83.39286041259766, | |
| "epoch": 0.07132867132867132, | |
| "grad_norm": 0.11106847343390151, | |
| "kl": 0.01177978515625, | |
| "learning_rate": 1.7832167832167836e-05, | |
| "loss": -0.0041, | |
| "num_tokens": 506551.0, | |
| "reward": 1.029296875, | |
| "reward_std": 0.4901208281517029, | |
| "rewards/check_gptzero_func": 0.2857142984867096, | |
| "rewards/check_perplexity_diff_func": 0.3214285671710968, | |
| "rewards/check_winston_local_func": 0.42215394973754883, | |
| "step": 51 | |
| }, | |
| { | |
| "clip_ratio": 0.0022109400015324354, | |
| "epoch": 0.07272727272727272, | |
| "grad_norm": 0.11017795132415774, | |
| "kl": 0.013671875, | |
| "learning_rate": 1.8181818181818182e-05, | |
| "loss": -0.0043, | |
| "step": 52 | |
| }, | |
| { | |
| "clip_ratio": 0.0019256824161857367, | |
| "completion_length": 86.5714340209961, | |
| "epoch": 0.07412587412587412, | |
| "grad_norm": 0.11010712395827207, | |
| "kl": 0.0208740234375, | |
| "learning_rate": 1.8531468531468532e-05, | |
| "loss": -0.0032, | |
| "num_tokens": 524863.0, | |
| "reward": 0.962996244430542, | |
| "reward_std": 0.6607940196990967, | |
| "rewards/check_gptzero_func": 0.1071428582072258, | |
| "rewards/check_perplexity_diff_func": 0.5, | |
| "rewards/check_winston_local_func": 0.355853408575058, | |
| "step": 53 | |
| }, | |
| { | |
| "clip_ratio": 0.0025832760147750378, | |
| "epoch": 0.07552447552447553, | |
| "grad_norm": 0.10695378216267973, | |
| "kl": 0.0250244140625, | |
| "learning_rate": 1.888111888111888e-05, | |
| "loss": -0.0034, | |
| "step": 54 | |
| }, | |
| { | |
| "clip_ratio": 0.0017639577854424715, | |
| "completion_length": 96.08928680419922, | |
| "epoch": 0.07692307692307693, | |
| "grad_norm": 0.17472941592839772, | |
| "kl": 0.0322265625, | |
| "learning_rate": 1.923076923076923e-05, | |
| "loss": -0.0141, | |
| "num_tokens": 544736.0, | |
| "reward": 1.3478963375091553, | |
| "reward_std": 0.8002303242683411, | |
| "rewards/check_gptzero_func": 0.2857142984867096, | |
| "rewards/check_perplexity_diff_func": 0.7142857313156128, | |
| "rewards/check_winston_local_func": 0.34789615869522095, | |
| "step": 55 | |
| }, | |
| { | |
| "clip_ratio": 0.0034089265391230583, | |
| "epoch": 0.07832167832167833, | |
| "grad_norm": 0.14666884899599142, | |
| "kl": 0.041748046875, | |
| "learning_rate": 1.958041958041958e-05, | |
| "loss": -0.0155, | |
| "step": 56 | |
| }, | |
| { | |
| "clip_ratio": 0.0026615143287926912, | |
| "completion_length": 96.91072082519531, | |
| "epoch": 0.07972027972027972, | |
| "grad_norm": 0.1400337476406121, | |
| "kl": 0.035400390625, | |
| "learning_rate": 1.993006993006993e-05, | |
| "loss": -0.027, | |
| "num_tokens": 564803.0, | |
| "reward": 1.0594887733459473, | |
| "reward_std": 0.6912091970443726, | |
| "rewards/check_gptzero_func": 0.2857142984867096, | |
| "rewards/check_perplexity_diff_func": 0.4285714328289032, | |
| "rewards/check_winston_local_func": 0.34520307183265686, | |
| "step": 57 | |
| }, | |
| { | |
| "clip_ratio": 0.003153084311634302, | |
| "epoch": 0.08111888111888112, | |
| "grad_norm": 0.138816359725178, | |
| "kl": 0.042236328125, | |
| "learning_rate": 2.027972027972028e-05, | |
| "loss": -0.0274, | |
| "step": 58 | |
| }, | |
| { | |
| "clip_ratio": 0.0012626759707927704, | |
| "completion_length": 76.35714721679688, | |
| "epoch": 0.08251748251748252, | |
| "grad_norm": 0.15940476459351663, | |
| "kl": 0.0556640625, | |
| "learning_rate": 2.062937062937063e-05, | |
| "loss": -0.0057, | |
| "num_tokens": 582265.0, | |
| "reward": 0.992591381072998, | |
| "reward_std": 0.5938137769699097, | |
| "rewards/check_gptzero_func": 0.125, | |
| "rewards/check_perplexity_diff_func": 0.4642857015132904, | |
| "rewards/check_winston_local_func": 0.4033055901527405, | |
| "step": 59 | |
| }, | |
| { | |
| "clip_ratio": 0.004643784370273352, | |
| "epoch": 0.08391608391608392, | |
| "grad_norm": 0.15305834024157844, | |
| "kl": 0.07421875, | |
| "learning_rate": 2.097902097902098e-05, | |
| "loss": -0.0064, | |
| "step": 60 | |
| }, | |
| { | |
| "clip_ratio": 0.0032869603019207716, | |
| "completion_length": 103.33928680419922, | |
| "epoch": 0.08531468531468532, | |
| "grad_norm": 0.12260760902895611, | |
| "kl": 0.0810546875, | |
| "learning_rate": 2.132867132867133e-05, | |
| "loss": 0.0066, | |
| "num_tokens": 603454.0, | |
| "reward": 1.191124439239502, | |
| "reward_std": 0.5557684898376465, | |
| "rewards/check_gptzero_func": 0.2321428507566452, | |
| "rewards/check_perplexity_diff_func": 0.5357142686843872, | |
| "rewards/check_winston_local_func": 0.4232672154903412, | |
| "step": 61 | |
| }, | |
| { | |
| "clip_ratio": 0.008058370091021061, | |
| "epoch": 0.08671328671328671, | |
| "grad_norm": 0.10224499878092017, | |
| "kl": 0.1162109375, | |
| "learning_rate": 2.1678321678321677e-05, | |
| "loss": 0.0058, | |
| "step": 62 | |
| }, | |
| { | |
| "clip_ratio": 0.002202474046498537, | |
| "completion_length": 91.4464340209961, | |
| "epoch": 0.08811188811188811, | |
| "grad_norm": 0.09776882919444682, | |
| "kl": 0.2138671875, | |
| "learning_rate": 2.202797202797203e-05, | |
| "loss": 0.0021, | |
| "num_tokens": 622705.0, | |
| "reward": 1.1320958137512207, | |
| "reward_std": 0.4060821831226349, | |
| "rewards/check_gptzero_func": 0.3928571343421936, | |
| "rewards/check_perplexity_diff_func": 0.3214285671710968, | |
| "rewards/check_winston_local_func": 0.41781002283096313, | |
| "step": 63 | |
| }, | |
| { | |
| "clip_ratio": 0.0025903189089149237, | |
| "epoch": 0.08951048951048951, | |
| "grad_norm": 0.0963149975780195, | |
| "kl": 0.2109375, | |
| "learning_rate": 2.237762237762238e-05, | |
| "loss": 0.0014, | |
| "step": 64 | |
| }, | |
| { | |
| "clip_ratio": 0.0016666523879393935, | |
| "completion_length": 100.00000762939453, | |
| "epoch": 0.09090909090909091, | |
| "grad_norm": 0.12490001498293682, | |
| "kl": 0.1220703125, | |
| "learning_rate": 2.272727272727273e-05, | |
| "loss": 0.0081, | |
| "num_tokens": 643191.0, | |
| "reward": 1.274580478668213, | |
| "reward_std": 0.5277792811393738, | |
| "rewards/check_gptzero_func": 0.2321428507566452, | |
| "rewards/check_perplexity_diff_func": 0.5357142686843872, | |
| "rewards/check_winston_local_func": 0.5067232847213745, | |
| "step": 65 | |
| }, | |
| { | |
| "clip_ratio": 0.0032784033101052046, | |
| "epoch": 0.09230769230769231, | |
| "grad_norm": 0.12114304348028067, | |
| "kl": 0.134765625, | |
| "learning_rate": 2.307692307692308e-05, | |
| "loss": 0.0074, | |
| "step": 66 | |
| }, | |
| { | |
| "clip_ratio": 0.0017520035617053509, | |
| "completion_length": 95.16072082519531, | |
| "epoch": 0.0937062937062937, | |
| "grad_norm": 0.14292853522837679, | |
| "kl": 0.216796875, | |
| "learning_rate": 2.342657342657343e-05, | |
| "loss": -0.0018, | |
| "num_tokens": 662736.0, | |
| "reward": 1.1007850170135498, | |
| "reward_std": 0.45985397696495056, | |
| "rewards/check_gptzero_func": 0.25, | |
| "rewards/check_perplexity_diff_func": 0.4642857015132904, | |
| "rewards/check_winston_local_func": 0.38649922609329224, | |
| "step": 67 | |
| }, | |
| { | |
| "clip_ratio": 0.00579434959217906, | |
| "epoch": 0.0951048951048951, | |
| "grad_norm": 0.14340586759573087, | |
| "kl": 0.2431640625, | |
| "learning_rate": 2.377622377622378e-05, | |
| "loss": -0.0029, | |
| "step": 68 | |
| }, | |
| { | |
| "clip_ratio": 0.0022546499967575073, | |
| "completion_length": 86.17857360839844, | |
| "epoch": 0.0965034965034965, | |
| "grad_norm": 0.17287042335415018, | |
| "kl": 0.26171875, | |
| "learning_rate": 2.4125874125874125e-05, | |
| "loss": 0.0059, | |
| "num_tokens": 681030.0, | |
| "reward": 1.4391711950302124, | |
| "reward_std": 0.6516181230545044, | |
| "rewards/check_gptzero_func": 0.5178571343421936, | |
| "rewards/check_perplexity_diff_func": 0.4642857015132904, | |
| "rewards/check_winston_local_func": 0.45702821016311646, | |
| "step": 69 | |
| }, | |
| { | |
| "clip_ratio": 0.004240955226123333, | |
| "epoch": 0.0979020979020979, | |
| "grad_norm": 0.16371494613893736, | |
| "kl": 0.28125, | |
| "learning_rate": 2.4475524475524478e-05, | |
| "loss": 0.0048, | |
| "step": 70 | |
| }, | |
| { | |
| "clip_ratio": 0.003548440057784319, | |
| "completion_length": 102.42857360839844, | |
| "epoch": 0.0993006993006993, | |
| "grad_norm": 0.1615418523026911, | |
| "kl": 0.1708984375, | |
| "learning_rate": 2.4825174825174828e-05, | |
| "loss": 0.0015, | |
| "num_tokens": 701874.0, | |
| "reward": 1.4929367303848267, | |
| "reward_std": 0.6083499193191528, | |
| "rewards/check_gptzero_func": 0.3214285671710968, | |
| "rewards/check_perplexity_diff_func": 0.7142857313156128, | |
| "rewards/check_winston_local_func": 0.4572224020957947, | |
| "step": 71 | |
| }, | |
| { | |
| "clip_ratio": 0.004331245087087154, | |
| "epoch": 0.1006993006993007, | |
| "grad_norm": 0.12876017762018707, | |
| "kl": 0.216796875, | |
| "learning_rate": 2.5174825174825178e-05, | |
| "loss": 0.0, | |
| "step": 72 | |
| }, | |
| { | |
| "clip_ratio": 0.003951852675527334, | |
| "completion_length": 73.46428680419922, | |
| "epoch": 0.1020979020979021, | |
| "grad_norm": 0.20791748542634808, | |
| "kl": 0.365234375, | |
| "learning_rate": 2.5524475524475528e-05, | |
| "loss": -0.0037, | |
| "num_tokens": 718488.0, | |
| "reward": 1.7953797578811646, | |
| "reward_std": 0.5393027067184448, | |
| "rewards/check_gptzero_func": 0.4464285671710968, | |
| "rewards/check_perplexity_diff_func": 0.8571428656578064, | |
| "rewards/check_winston_local_func": 0.49180838465690613, | |
| "step": 73 | |
| }, | |
| { | |
| "clip_ratio": 0.008312534540891647, | |
| "epoch": 0.1034965034965035, | |
| "grad_norm": 0.19232465302245624, | |
| "kl": 0.5390625, | |
| "learning_rate": 2.5874125874125877e-05, | |
| "loss": -0.0053, | |
| "step": 74 | |
| }, | |
| { | |
| "clip_ratio": 0.0021736263297498226, | |
| "completion_length": 97.73214721679688, | |
| "epoch": 0.1048951048951049, | |
| "grad_norm": 0.18504738647098173, | |
| "kl": 0.2421875, | |
| "learning_rate": 2.6223776223776224e-05, | |
| "loss": -0.0023, | |
| "num_tokens": 738619.0, | |
| "reward": 1.284185767173767, | |
| "reward_std": 0.6959513425827026, | |
| "rewards/check_gptzero_func": 0.4464285671710968, | |
| "rewards/check_perplexity_diff_func": 0.3928571343421936, | |
| "rewards/check_winston_local_func": 0.4449000358581543, | |
| "step": 75 | |
| }, | |
| { | |
| "clip_ratio": 0.0033756059128791094, | |
| "epoch": 0.1062937062937063, | |
| "grad_norm": 0.17585639043073228, | |
| "kl": 0.255859375, | |
| "learning_rate": 2.6573426573426574e-05, | |
| "loss": -0.0043, | |
| "step": 76 | |
| }, | |
| { | |
| "clip_ratio": 0.003740633837878704, | |
| "completion_length": 95.67857360839844, | |
| "epoch": 0.1076923076923077, | |
| "grad_norm": 0.21573775826044048, | |
| "kl": 0.2451171875, | |
| "learning_rate": 2.6923076923076923e-05, | |
| "loss": 0.0105, | |
| "num_tokens": 758111.0, | |
| "reward": 1.4672702550888062, | |
| "reward_std": 0.85318922996521, | |
| "rewards/check_gptzero_func": 0.25, | |
| "rewards/check_perplexity_diff_func": 0.7857142686843872, | |
| "rewards/check_winston_local_func": 0.43155592679977417, | |
| "step": 77 | |
| }, | |
| { | |
| "clip_ratio": 0.004517109598964453, | |
| "epoch": 0.10909090909090909, | |
| "grad_norm": 0.2071235680791843, | |
| "kl": 0.302734375, | |
| "learning_rate": 2.7272727272727273e-05, | |
| "loss": 0.009, | |
| "step": 78 | |
| }, | |
| { | |
| "clip_ratio": 0.004641580395400524, | |
| "completion_length": 117.75000762939453, | |
| "epoch": 0.11048951048951049, | |
| "grad_norm": 0.19278645376555428, | |
| "kl": 0.259765625, | |
| "learning_rate": 2.762237762237762e-05, | |
| "loss": -0.0004, | |
| "num_tokens": 780403.0, | |
| "reward": 1.4974778890609741, | |
| "reward_std": 0.59557044506073, | |
| "rewards/check_gptzero_func": 0.4285714328289032, | |
| "rewards/check_perplexity_diff_func": 0.6785714030265808, | |
| "rewards/check_winston_local_func": 0.39033493399620056, | |
| "step": 79 | |
| }, | |
| { | |
| "clip_ratio": 0.006305322516709566, | |
| "epoch": 0.11188811188811189, | |
| "grad_norm": 0.13786850726930885, | |
| "kl": 0.3125, | |
| "learning_rate": 2.7972027972027976e-05, | |
| "loss": -0.0019, | |
| "step": 80 | |
| }, | |
| { | |
| "clip_ratio": 0.0017778041074052453, | |
| "completion_length": 101.03572082519531, | |
| "epoch": 0.11328671328671329, | |
| "grad_norm": 0.18099911272050398, | |
| "kl": 1.7578125, | |
| "learning_rate": 2.8321678321678326e-05, | |
| "loss": 0.0053, | |
| "num_tokens": 800231.0, | |
| "reward": 1.420699954032898, | |
| "reward_std": 0.5104399919509888, | |
| "rewards/check_gptzero_func": 0.2678571343421936, | |
| "rewards/check_perplexity_diff_func": 0.6428571343421936, | |
| "rewards/check_winston_local_func": 0.5099858045578003, | |
| "step": 81 | |
| }, | |
| { | |
| "clip_ratio": 0.0030425102449953556, | |
| "epoch": 0.11468531468531469, | |
| "grad_norm": 0.10725025403922342, | |
| "kl": 0.9921875, | |
| "learning_rate": 2.8671328671328672e-05, | |
| "loss": 0.0043, | |
| "step": 82 | |
| }, | |
| { | |
| "clip_ratio": 0.0030319676734507084, | |
| "completion_length": 93.3214340209961, | |
| "epoch": 0.11608391608391608, | |
| "grad_norm": 0.22858957476463732, | |
| "kl": 0.35546875, | |
| "learning_rate": 2.9020979020979022e-05, | |
| "loss": 0.0021, | |
| "num_tokens": 819773.0, | |
| "reward": 1.4201573133468628, | |
| "reward_std": 0.761705756187439, | |
| "rewards/check_gptzero_func": 0.3392857015132904, | |
| "rewards/check_perplexity_diff_func": 0.5714285969734192, | |
| "rewards/check_winston_local_func": 0.5094431042671204, | |
| "step": 83 | |
| }, | |
| { | |
| "clip_ratio": 0.0027519434224814177, | |
| "epoch": 0.11748251748251748, | |
| "grad_norm": 0.180287994009811, | |
| "kl": 0.392578125, | |
| "learning_rate": 2.9370629370629372e-05, | |
| "loss": -0.0002, | |
| "step": 84 | |
| }, | |
| { | |
| "clip_ratio": 0.0034523813519626856, | |
| "completion_length": 102.66072082519531, | |
| "epoch": 0.11888111888111888, | |
| "grad_norm": 0.25571068617285075, | |
| "kl": 0.494140625, | |
| "learning_rate": 2.972027972027972e-05, | |
| "loss": -0.0103, | |
| "num_tokens": 840280.0, | |
| "reward": 1.7226576805114746, | |
| "reward_std": 0.7545516490936279, | |
| "rewards/check_gptzero_func": 0.5714285969734192, | |
| "rewards/check_perplexity_diff_func": 0.6785714030265808, | |
| "rewards/check_winston_local_func": 0.4726576805114746, | |
| "step": 85 | |
| }, | |
| { | |
| "clip_ratio": 0.00493992306292057, | |
| "epoch": 0.12027972027972028, | |
| "grad_norm": 0.2172932526081326, | |
| "kl": 0.490234375, | |
| "learning_rate": 3.0069930069930068e-05, | |
| "loss": -0.013, | |
| "step": 86 | |
| }, | |
| { | |
| "clip_ratio": 0.003154533449560404, | |
| "completion_length": 98.96428680419922, | |
| "epoch": 0.12167832167832168, | |
| "grad_norm": 0.3651883135439141, | |
| "kl": 0.5859375, | |
| "learning_rate": 3.0419580419580425e-05, | |
| "loss": -0.0129, | |
| "num_tokens": 860422.0, | |
| "reward": 1.6449967622756958, | |
| "reward_std": 0.7163441777229309, | |
| "rewards/check_gptzero_func": 0.3928571343421936, | |
| "rewards/check_perplexity_diff_func": 0.7142857313156128, | |
| "rewards/check_winston_local_func": 0.5378537178039551, | |
| "step": 87 | |
| }, | |
| { | |
| "clip_ratio": 0.007247431669384241, | |
| "epoch": 0.12307692307692308, | |
| "grad_norm": 0.2167770735292921, | |
| "kl": 0.61328125, | |
| "learning_rate": 3.0769230769230774e-05, | |
| "loss": -0.0168, | |
| "step": 88 | |
| }, | |
| { | |
| "clip_ratio": 0.0027036736719310284, | |
| "completion_length": 108.03572082519531, | |
| "epoch": 0.12447552447552447, | |
| "grad_norm": 0.12604084880729077, | |
| "kl": 0.291015625, | |
| "learning_rate": 3.111888111888112e-05, | |
| "loss": -0.0039, | |
| "num_tokens": 881364.0, | |
| "reward": 1.3799673318862915, | |
| "reward_std": 0.6881024837493896, | |
| "rewards/check_gptzero_func": 0.3928571343421936, | |
| "rewards/check_perplexity_diff_func": 0.5714285969734192, | |
| "rewards/check_winston_local_func": 0.4156815707683563, | |
| "step": 89 | |
| }, | |
| { | |
| "clip_ratio": 0.005238416139036417, | |
| "epoch": 0.1258741258741259, | |
| "grad_norm": 0.12103303780018655, | |
| "kl": 0.30859375, | |
| "learning_rate": 3.146853146853147e-05, | |
| "loss": -0.005, | |
| "step": 90 | |
| }, | |
| { | |
| "clip_ratio": 0.003768512513488531, | |
| "completion_length": 122.80357360839844, | |
| "epoch": 0.12727272727272726, | |
| "grad_norm": 0.18423815092289067, | |
| "kl": 0.67578125, | |
| "learning_rate": 3.181818181818182e-05, | |
| "loss": -0.0171, | |
| "num_tokens": 904487.0, | |
| "reward": 1.6101170778274536, | |
| "reward_std": 0.6241902709007263, | |
| "rewards/check_gptzero_func": 0.3392857015132904, | |
| "rewards/check_perplexity_diff_func": 0.7857142686843872, | |
| "rewards/check_winston_local_func": 0.48511695861816406, | |
| "step": 91 | |
| }, | |
| { | |
| "clip_ratio": 0.005407070741057396, | |
| "epoch": 0.12867132867132866, | |
| "grad_norm": 0.15457101650134836, | |
| "kl": 0.66796875, | |
| "learning_rate": 3.216783216783217e-05, | |
| "loss": -0.0182, | |
| "step": 92 | |
| }, | |
| { | |
| "clip_ratio": 0.002042532665655017, | |
| "completion_length": 108.17857360839844, | |
| "epoch": 0.13006993006993006, | |
| "grad_norm": 0.19588538280583437, | |
| "kl": 1.75, | |
| "learning_rate": 3.251748251748252e-05, | |
| "loss": -0.0023, | |
| "num_tokens": 925633.0, | |
| "reward": 1.3932582139968872, | |
| "reward_std": 0.5087055563926697, | |
| "rewards/check_gptzero_func": 0.4107142984867096, | |
| "rewards/check_perplexity_diff_func": 0.4642857015132904, | |
| "rewards/check_winston_local_func": 0.5182580351829529, | |
| "step": 93 | |
| }, | |
| { | |
| "clip_ratio": 0.00876330491155386, | |
| "epoch": 0.13146853146853146, | |
| "grad_norm": 0.1506256243597478, | |
| "kl": 0.80078125, | |
| "learning_rate": 3.2867132867132866e-05, | |
| "loss": -0.0035, | |
| "step": 94 | |
| }, | |
| { | |
| "clip_ratio": 0.003118924330919981, | |
| "completion_length": 77.4464340209961, | |
| "epoch": 0.13286713286713286, | |
| "grad_norm": 0.34784170699852607, | |
| "kl": 0.72265625, | |
| "learning_rate": 3.321678321678322e-05, | |
| "loss": -0.0134, | |
| "num_tokens": 942892.0, | |
| "reward": 1.832355260848999, | |
| "reward_std": 1.064102292060852, | |
| "rewards/check_gptzero_func": 0.4821428656578064, | |
| "rewards/check_perplexity_diff_func": 0.8214285969734192, | |
| "rewards/check_winston_local_func": 0.5287837982177734, | |
| "step": 95 | |
| }, | |
| { | |
| "clip_ratio": 0.00509096821770072, | |
| "epoch": 0.13426573426573427, | |
| "grad_norm": 0.25968424971577175, | |
| "kl": 0.96875, | |
| "learning_rate": 3.356643356643357e-05, | |
| "loss": -0.0176, | |
| "step": 96 | |
| }, | |
| { | |
| "clip_ratio": 0.0037272910121828318, | |
| "completion_length": 95.26786041259766, | |
| "epoch": 0.13566433566433567, | |
| "grad_norm": 0.16914051199341906, | |
| "kl": 0.349609375, | |
| "learning_rate": 3.391608391608392e-05, | |
| "loss": 0.0037, | |
| "num_tokens": 962317.0, | |
| "reward": 1.805726170539856, | |
| "reward_std": 0.831261396408081, | |
| "rewards/check_gptzero_func": 0.4107142984867096, | |
| "rewards/check_perplexity_diff_func": 0.9285714030265808, | |
| "rewards/check_winston_local_func": 0.46644020080566406, | |
| "step": 97 | |
| }, | |
| { | |
| "clip_ratio": 0.0034601751249283552, | |
| "epoch": 0.13706293706293707, | |
| "grad_norm": 0.16538870434198547, | |
| "kl": 0.388671875, | |
| "learning_rate": 3.4265734265734265e-05, | |
| "loss": 0.0019, | |
| "step": 98 | |
| }, | |
| { | |
| "clip_ratio": 0.004487877711653709, | |
| "completion_length": 111.8214340209961, | |
| "epoch": 0.13846153846153847, | |
| "grad_norm": 0.12415078751808917, | |
| "kl": 0.384765625, | |
| "learning_rate": 3.461538461538462e-05, | |
| "loss": -0.001, | |
| "num_tokens": 983513.0, | |
| "reward": 1.2062333822250366, | |
| "reward_std": 0.591974139213562, | |
| "rewards/check_gptzero_func": 0.3214285671710968, | |
| "rewards/check_perplexity_diff_func": 0.3571428656578064, | |
| "rewards/check_winston_local_func": 0.5276618599891663, | |
| "step": 99 | |
| }, | |
| { | |
| "clip_ratio": 0.00284260674379766, | |
| "epoch": 0.13986013986013987, | |
| "grad_norm": 0.1197068572237121, | |
| "kl": 0.376953125, | |
| "learning_rate": 3.4965034965034965e-05, | |
| "loss": -0.0017, | |
| "step": 100 | |
| }, | |
| { | |
| "clip_ratio": 0.0023206709884107113, | |
| "completion_length": 101.08928680419922, | |
| "epoch": 0.14125874125874127, | |
| "grad_norm": 0.16400691003183243, | |
| "kl": 0.30078125, | |
| "learning_rate": 3.531468531468531e-05, | |
| "loss": 0.0033, | |
| "num_tokens": 1003878.0, | |
| "reward": 1.631854772567749, | |
| "reward_std": 0.5223387479782104, | |
| "rewards/check_gptzero_func": 0.3928571343421936, | |
| "rewards/check_perplexity_diff_func": 0.7142857313156128, | |
| "rewards/check_winston_local_func": 0.5247119665145874, | |
| "step": 101 | |
| }, | |
| { | |
| "clip_ratio": 0.0016618981026113033, | |
| "epoch": 0.14265734265734265, | |
| "grad_norm": 0.1573929247787687, | |
| "kl": 0.326171875, | |
| "learning_rate": 3.566433566433567e-05, | |
| "loss": 0.0012, | |
| "step": 102 | |
| }, | |
| { | |
| "clip_ratio": 0.0022201864048838615, | |
| "completion_length": 124.46429443359375, | |
| "epoch": 0.14405594405594405, | |
| "grad_norm": 0.1353861256020412, | |
| "kl": 0.3828125, | |
| "learning_rate": 3.601398601398602e-05, | |
| "loss": 0.005, | |
| "num_tokens": 1027066.0, | |
| "reward": 1.6920486688613892, | |
| "reward_std": 0.48560601472854614, | |
| "rewards/check_gptzero_func": 0.3928571343421936, | |
| "rewards/check_perplexity_diff_func": 0.75, | |
| "rewards/check_winston_local_func": 0.5491914749145508, | |
| "step": 103 | |
| }, | |
| { | |
| "clip_ratio": 0.0029075751081109047, | |
| "epoch": 0.14545454545454545, | |
| "grad_norm": 0.13328591024866146, | |
| "kl": 0.39453125, | |
| "learning_rate": 3.6363636363636364e-05, | |
| "loss": 0.0034, | |
| "step": 104 | |
| }, | |
| { | |
| "clip_ratio": 0.0019614642951637506, | |
| "completion_length": 85.26786041259766, | |
| "epoch": 0.14685314685314685, | |
| "grad_norm": 0.19674478002774554, | |
| "kl": 0.333984375, | |
| "learning_rate": 3.671328671328672e-05, | |
| "loss": -0.0188, | |
| "num_tokens": 1045567.0, | |
| "reward": 1.6822034120559692, | |
| "reward_std": 0.7559517621994019, | |
| "rewards/check_gptzero_func": 0.5357142686843872, | |
| "rewards/check_perplexity_diff_func": 0.6071428656578064, | |
| "rewards/check_winston_local_func": 0.5393460988998413, | |
| "step": 105 | |
| }, | |
| { | |
| "clip_ratio": 0.004534203093498945, | |
| "epoch": 0.14825174825174825, | |
| "grad_norm": 0.16787611299305724, | |
| "kl": 0.3671875, | |
| "learning_rate": 3.7062937062937064e-05, | |
| "loss": -0.0217, | |
| "step": 106 | |
| }, | |
| { | |
| "clip_ratio": 0.003009920008480549, | |
| "completion_length": 106.55357360839844, | |
| "epoch": 0.14965034965034965, | |
| "grad_norm": 0.19751132540015684, | |
| "kl": 0.455078125, | |
| "learning_rate": 3.741258741258741e-05, | |
| "loss": 0.0111, | |
| "num_tokens": 1066466.0, | |
| "reward": 2.178619861602783, | |
| "reward_std": 0.7016831040382385, | |
| "rewards/check_gptzero_func": 0.6071428656578064, | |
| "rewards/check_perplexity_diff_func": 0.8928571343421936, | |
| "rewards/check_winston_local_func": 0.6786197423934937, | |
| "step": 107 | |
| }, | |
| { | |
| "clip_ratio": 0.004473494831472635, | |
| "epoch": 0.15104895104895105, | |
| "grad_norm": 0.1431470008572649, | |
| "kl": 0.4375, | |
| "learning_rate": 3.776223776223776e-05, | |
| "loss": 0.0095, | |
| "step": 108 | |
| }, | |
| { | |
| "clip_ratio": 0.0026541000697761774, | |
| "completion_length": 100.6964340209961, | |
| "epoch": 0.15244755244755245, | |
| "grad_norm": 0.33135604940901486, | |
| "kl": 0.310546875, | |
| "learning_rate": 3.811188811188811e-05, | |
| "loss": 0.0138, | |
| "num_tokens": 1086751.0, | |
| "reward": 1.6315226554870605, | |
| "reward_std": 0.7806248068809509, | |
| "rewards/check_gptzero_func": 0.4642857015132904, | |
| "rewards/check_perplexity_diff_func": 0.6071428656578064, | |
| "rewards/check_winston_local_func": 0.5600939393043518, | |
| "step": 109 | |
| }, | |
| { | |
| "clip_ratio": 0.0028892713598906994, | |
| "epoch": 0.15384615384615385, | |
| "grad_norm": 0.24217207616877234, | |
| "kl": 0.361328125, | |
| "learning_rate": 3.846153846153846e-05, | |
| "loss": 0.0074, | |
| "step": 110 | |
| }, | |
| { | |
| "clip_ratio": 0.0021747422870248556, | |
| "completion_length": 84.76786041259766, | |
| "epoch": 0.15524475524475526, | |
| "grad_norm": 0.15803622964815373, | |
| "kl": 0.66796875, | |
| "learning_rate": 3.8811188811188816e-05, | |
| "loss": -0.0045, | |
| "num_tokens": 1104518.0, | |
| "reward": 1.6692500114440918, | |
| "reward_std": 0.5965060591697693, | |
| "rewards/check_gptzero_func": 0.5535714030265808, | |
| "rewards/check_perplexity_diff_func": 0.6071428656578064, | |
| "rewards/check_winston_local_func": 0.5085356831550598, | |
| "step": 111 | |
| }, | |
| { | |
| "clip_ratio": 0.004626331850886345, | |
| "epoch": 0.15664335664335666, | |
| "grad_norm": 0.14920406573741435, | |
| "kl": 0.7578125, | |
| "learning_rate": 3.916083916083916e-05, | |
| "loss": -0.0064, | |
| "step": 112 | |
| }, | |
| { | |
| "clip_ratio": 0.002694516209885478, | |
| "completion_length": 92.17857360839844, | |
| "epoch": 0.15804195804195803, | |
| "grad_norm": 0.161427063551978, | |
| "kl": 0.361328125, | |
| "learning_rate": 3.9510489510489516e-05, | |
| "loss": 0.0058, | |
| "num_tokens": 1123466.0, | |
| "reward": 1.353637456893921, | |
| "reward_std": 0.5352396965026855, | |
| "rewards/check_gptzero_func": 0.4821428656578064, | |
| "rewards/check_perplexity_diff_func": 0.3571428656578064, | |
| "rewards/check_winston_local_func": 0.5143517255783081, | |
| "step": 113 | |
| }, | |
| { | |
| "clip_ratio": 0.00391918933019042, | |
| "epoch": 0.15944055944055943, | |
| "grad_norm": 0.14561571783883442, | |
| "kl": 0.39453125, | |
| "learning_rate": 3.986013986013986e-05, | |
| "loss": 0.0036, | |
| "step": 114 | |
| }, | |
| { | |
| "clip_ratio": 0.001278667594306171, | |
| "completion_length": 107.12500762939453, | |
| "epoch": 0.16083916083916083, | |
| "grad_norm": 0.1805624176716803, | |
| "kl": 0.3046875, | |
| "learning_rate": 4.020979020979021e-05, | |
| "loss": -0.001, | |
| "num_tokens": 1144619.0, | |
| "reward": 1.6282455921173096, | |
| "reward_std": 0.7325619459152222, | |
| "rewards/check_gptzero_func": 0.4464285671710968, | |
| "rewards/check_perplexity_diff_func": 0.5357142686843872, | |
| "rewards/check_winston_local_func": 0.6461027264595032, | |
| "step": 115 | |
| }, | |
| { | |
| "clip_ratio": 0.0017050534952431917, | |
| "epoch": 0.16223776223776223, | |
| "grad_norm": 0.16942268791902212, | |
| "kl": 0.337890625, | |
| "learning_rate": 4.055944055944056e-05, | |
| "loss": -0.0027, | |
| "step": 116 | |
| }, | |
| { | |
| "clip_ratio": 0.0015462420415133238, | |
| "completion_length": 114.8214340209961, | |
| "epoch": 0.16363636363636364, | |
| "grad_norm": 0.1777133538888581, | |
| "kl": 0.322265625, | |
| "learning_rate": 4.0909090909090915e-05, | |
| "loss": -0.0028, | |
| "num_tokens": 1166185.0, | |
| "reward": 1.566156268119812, | |
| "reward_std": 0.38663557171821594, | |
| "rewards/check_gptzero_func": 0.4642857015132904, | |
| "rewards/check_perplexity_diff_func": 0.5, | |
| "rewards/check_winston_local_func": 0.6018705368041992, | |
| "step": 117 | |
| }, | |
| { | |
| "clip_ratio": 0.0030793240293860435, | |
| "epoch": 0.16503496503496504, | |
| "grad_norm": 0.15084856022622706, | |
| "kl": 0.2890625, | |
| "learning_rate": 4.125874125874126e-05, | |
| "loss": -0.0048, | |
| "step": 118 | |
| }, | |
| { | |
| "clip_ratio": 0.0018360918620601296, | |
| "completion_length": 100.25000762939453, | |
| "epoch": 0.16643356643356644, | |
| "grad_norm": 0.10943003413749244, | |
| "kl": 0.83984375, | |
| "learning_rate": 4.1608391608391614e-05, | |
| "loss": 0.0043, | |
| "num_tokens": 1186317.0, | |
| "reward": 1.7977957725524902, | |
| "reward_std": 0.5172301530838013, | |
| "rewards/check_gptzero_func": 0.5714285969734192, | |
| "rewards/check_perplexity_diff_func": 0.7142857313156128, | |
| "rewards/check_winston_local_func": 0.5120813250541687, | |
| "step": 119 | |
| }, | |
| { | |
| "clip_ratio": 0.0025589358992874622, | |
| "epoch": 0.16783216783216784, | |
| "grad_norm": 0.10431032316313237, | |
| "kl": 0.68359375, | |
| "learning_rate": 4.195804195804196e-05, | |
| "loss": 0.0029, | |
| "step": 120 | |
| }, | |
| { | |
| "clip_ratio": 0.002800893737003207, | |
| "completion_length": 120.91072082519531, | |
| "epoch": 0.16923076923076924, | |
| "grad_norm": 0.1567960287886374, | |
| "kl": 0.37890625, | |
| "learning_rate": 4.230769230769231e-05, | |
| "loss": -0.0016, | |
| "num_tokens": 1208530.0, | |
| "reward": 1.5369055271148682, | |
| "reward_std": 0.7073518633842468, | |
| "rewards/check_gptzero_func": 0.3214285671710968, | |
| "rewards/check_perplexity_diff_func": 0.6785714030265808, | |
| "rewards/check_winston_local_func": 0.5369054079055786, | |
| "step": 121 | |
| }, | |
| { | |
| "clip_ratio": 0.0026409339625388384, | |
| "epoch": 0.17062937062937064, | |
| "grad_norm": 0.15013150065673506, | |
| "kl": 0.37890625, | |
| "learning_rate": 4.265734265734266e-05, | |
| "loss": -0.0035, | |
| "step": 122 | |
| }, | |
| { | |
| "clip_ratio": 0.002211854327470064, | |
| "completion_length": 110.66072082519531, | |
| "epoch": 0.17202797202797201, | |
| "grad_norm": 0.17274409069851862, | |
| "kl": 0.380859375, | |
| "learning_rate": 4.300699300699301e-05, | |
| "loss": -0.0062, | |
| "num_tokens": 1229171.0, | |
| "reward": 1.6140996217727661, | |
| "reward_std": 0.7221139669418335, | |
| "rewards/check_gptzero_func": 0.4642857015132904, | |
| "rewards/check_perplexity_diff_func": 0.5357142686843872, | |
| "rewards/check_winston_local_func": 0.6140995025634766, | |
| "step": 123 | |
| }, | |
| { | |
| "clip_ratio": 0.005159804597496986, | |
| "epoch": 0.17342657342657342, | |
| "grad_norm": 0.13306053466541726, | |
| "kl": 0.443359375, | |
| "learning_rate": 4.335664335664335e-05, | |
| "loss": -0.009, | |
| "step": 124 | |
| }, | |
| { | |
| "clip_ratio": 0.0031610180158168077, | |
| "completion_length": 104.30357360839844, | |
| "epoch": 0.17482517482517482, | |
| "grad_norm": 0.19261275892706695, | |
| "kl": 0.30078125, | |
| "learning_rate": 4.370629370629371e-05, | |
| "loss": -0.0129, | |
| "num_tokens": 1249350.0, | |
| "reward": 1.9047484397888184, | |
| "reward_std": 0.6767317652702332, | |
| "rewards/check_gptzero_func": 0.6071428656578064, | |
| "rewards/check_perplexity_diff_func": 0.6071428656578064, | |
| "rewards/check_winston_local_func": 0.6904626488685608, | |
| "step": 125 | |
| }, | |
| { | |
| "clip_ratio": 0.004231306724250317, | |
| "epoch": 0.17622377622377622, | |
| "grad_norm": 0.1752737652506695, | |
| "kl": 0.33203125, | |
| "learning_rate": 4.405594405594406e-05, | |
| "loss": -0.0162, | |
| "step": 126 | |
| }, | |
| { | |
| "clip_ratio": 0.003981932066380978, | |
| "completion_length": 109.53572082519531, | |
| "epoch": 0.17762237762237762, | |
| "grad_norm": 0.1423250496935692, | |
| "kl": 0.48828125, | |
| "learning_rate": 4.4405594405594406e-05, | |
| "loss": -0.0102, | |
| "num_tokens": 1269848.0, | |
| "reward": 1.7591207027435303, | |
| "reward_std": 0.5321380496025085, | |
| "rewards/check_gptzero_func": 0.5, | |
| "rewards/check_perplexity_diff_func": 0.6785714030265808, | |
| "rewards/check_winston_local_func": 0.5805493593215942, | |
| "step": 127 | |
| }, | |
| { | |
| "clip_ratio": 0.005287667270749807, | |
| "epoch": 0.17902097902097902, | |
| "grad_norm": 0.13255848380623775, | |
| "kl": 0.498046875, | |
| "learning_rate": 4.475524475524476e-05, | |
| "loss": -0.0121, | |
| "step": 128 | |
| }, | |
| { | |
| "clip_ratio": 0.0025025398936122656, | |
| "completion_length": 105.30357360839844, | |
| "epoch": 0.18041958041958042, | |
| "grad_norm": 0.1177341259986552, | |
| "kl": 0.283203125, | |
| "learning_rate": 4.5104895104895105e-05, | |
| "loss": -0.0016, | |
| "num_tokens": 1290033.0, | |
| "reward": 1.7174798250198364, | |
| "reward_std": 0.5096268057823181, | |
| "rewards/check_gptzero_func": 0.5357142686843872, | |
| "rewards/check_perplexity_diff_func": 0.5357142686843872, | |
| "rewards/check_winston_local_func": 0.6460510492324829, | |
| "step": 129 | |
| }, | |
| { | |
| "clip_ratio": 0.0033291254658252, | |
| "epoch": 0.18181818181818182, | |
| "grad_norm": 0.11141937591016414, | |
| "kl": 0.294921875, | |
| "learning_rate": 4.545454545454546e-05, | |
| "loss": -0.0032, | |
| "step": 130 | |
| }, | |
| { | |
| "clip_ratio": 0.0024425899609923363, | |
| "completion_length": 96.30357360839844, | |
| "epoch": 0.18321678321678322, | |
| "grad_norm": 0.23593816935965856, | |
| "kl": 0.341796875, | |
| "learning_rate": 4.5804195804195805e-05, | |
| "loss": -0.006, | |
| "num_tokens": 1309356.0, | |
| "reward": 1.7568891048431396, | |
| "reward_std": 0.7255779504776001, | |
| "rewards/check_gptzero_func": 0.5892857313156128, | |
| "rewards/check_perplexity_diff_func": 0.5357142686843872, | |
| "rewards/check_winston_local_func": 0.6318890452384949, | |
| "step": 131 | |
| }, | |
| { | |
| "clip_ratio": 0.004331678152084351, | |
| "epoch": 0.18461538461538463, | |
| "grad_norm": 0.2135627059179365, | |
| "kl": 0.349609375, | |
| "learning_rate": 4.615384615384616e-05, | |
| "loss": -0.0089, | |
| "step": 132 | |
| }, | |
| { | |
| "clip_ratio": 0.0028827630449086428, | |
| "completion_length": 111.6964340209961, | |
| "epoch": 0.18601398601398603, | |
| "grad_norm": 0.1622976058137894, | |
| "kl": 0.369140625, | |
| "learning_rate": 4.6503496503496505e-05, | |
| "loss": 0.014, | |
| "num_tokens": 1330017.0, | |
| "reward": 1.6692737340927124, | |
| "reward_std": 0.628279983997345, | |
| "rewards/check_gptzero_func": 0.4464285671710968, | |
| "rewards/check_perplexity_diff_func": 0.6428571343421936, | |
| "rewards/check_winston_local_func": 0.5799878835678101, | |
| "step": 133 | |
| }, | |
| { | |
| "clip_ratio": 0.004608546383678913, | |
| "epoch": 0.1874125874125874, | |
| "grad_norm": 0.16503328062657166, | |
| "kl": 0.4140625, | |
| "learning_rate": 4.685314685314686e-05, | |
| "loss": 0.0121, | |
| "step": 134 | |
| }, | |
| { | |
| "clip_ratio": 0.0027426625601947308, | |
| "completion_length": 107.58928680419922, | |
| "epoch": 0.1888111888111888, | |
| "grad_norm": 0.27926954484170907, | |
| "kl": 0.5859375, | |
| "learning_rate": 4.7202797202797204e-05, | |
| "loss": 0.0139, | |
| "num_tokens": 1350164.0, | |
| "reward": 1.9428951740264893, | |
| "reward_std": 0.6565932035446167, | |
| "rewards/check_gptzero_func": 0.6785714030265808, | |
| "rewards/check_perplexity_diff_func": 0.5714285969734192, | |
| "rewards/check_winston_local_func": 0.6928950548171997, | |
| "step": 135 | |
| }, | |
| { | |
| "clip_ratio": 0.00439854059368372, | |
| "epoch": 0.1902097902097902, | |
| "grad_norm": 0.1925514269864402, | |
| "kl": 0.5546875, | |
| "learning_rate": 4.755244755244756e-05, | |
| "loss": 0.0074, | |
| "step": 136 | |
| }, | |
| { | |
| "clip_ratio": 0.0018738384824246168, | |
| "completion_length": 116.4464340209961, | |
| "epoch": 0.1916083916083916, | |
| "grad_norm": 0.14918417137816656, | |
| "kl": 0.58984375, | |
| "learning_rate": 4.7902097902097904e-05, | |
| "loss": 0.0037, | |
| "num_tokens": 1371507.0, | |
| "reward": 2.0137577056884766, | |
| "reward_std": 0.5378711819648743, | |
| "rewards/check_gptzero_func": 0.6071428656578064, | |
| "rewards/check_perplexity_diff_func": 0.6785714030265808, | |
| "rewards/check_winston_local_func": 0.7280434370040894, | |
| "step": 137 | |
| }, | |
| { | |
| "clip_ratio": 0.003529219189658761, | |
| "epoch": 0.193006993006993, | |
| "grad_norm": 0.1301060608041501, | |
| "kl": 0.6171875, | |
| "learning_rate": 4.825174825174825e-05, | |
| "loss": 0.0016, | |
| "step": 138 | |
| }, | |
| { | |
| "clip_ratio": 0.0011985624441877007, | |
| "completion_length": 112.6964340209961, | |
| "epoch": 0.1944055944055944, | |
| "grad_norm": 0.13034934146859325, | |
| "kl": 0.322265625, | |
| "learning_rate": 4.86013986013986e-05, | |
| "loss": -0.0024, | |
| "num_tokens": 1392414.0, | |
| "reward": 1.9355616569519043, | |
| "reward_std": 0.4726971983909607, | |
| "rewards/check_gptzero_func": 0.5892857313156128, | |
| "rewards/check_perplexity_diff_func": 0.75, | |
| "rewards/check_winston_local_func": 0.596275806427002, | |
| "step": 139 | |
| }, | |
| { | |
| "clip_ratio": 0.0032699662260711193, | |
| "epoch": 0.1958041958041958, | |
| "grad_norm": 0.12091032490165694, | |
| "kl": 0.318359375, | |
| "learning_rate": 4.8951048951048956e-05, | |
| "loss": -0.0039, | |
| "step": 140 | |
| }, | |
| { | |
| "clip_ratio": 0.0013384540798142552, | |
| "completion_length": 132.05357360839844, | |
| "epoch": 0.1972027972027972, | |
| "grad_norm": 0.12728573210952063, | |
| "kl": 0.59765625, | |
| "learning_rate": 4.93006993006993e-05, | |
| "loss": 0.0045, | |
| "num_tokens": 1416177.0, | |
| "reward": 1.7924094200134277, | |
| "reward_std": 0.7202263474464417, | |
| "rewards/check_gptzero_func": 0.5714285969734192, | |
| "rewards/check_perplexity_diff_func": 0.7142857313156128, | |
| "rewards/check_winston_local_func": 0.5066950917243958, | |
| "step": 141 | |
| }, | |
| { | |
| "clip_ratio": 0.0017050639726221561, | |
| "epoch": 0.1986013986013986, | |
| "grad_norm": 0.12231361158637834, | |
| "kl": 0.65234375, | |
| "learning_rate": 4.9650349650349656e-05, | |
| "loss": 0.0025, | |
| "step": 142 | |
| }, | |
| { | |
| "clip_ratio": 0.0015860958956182003, | |
| "completion_length": 99.73214721679688, | |
| "epoch": 0.2, | |
| "grad_norm": 0.1853946177277763, | |
| "kl": 0.58984375, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0021, | |
| "num_tokens": 1435732.0, | |
| "reward": 2.0557591915130615, | |
| "reward_std": 0.503829836845398, | |
| "rewards/check_gptzero_func": 0.6964285969734192, | |
| "rewards/check_perplexity_diff_func": 0.75, | |
| "rewards/check_winston_local_func": 0.6093305945396423, | |
| "step": 143 | |
| }, | |
| { | |
| "clip_ratio": 0.002123563550412655, | |
| "epoch": 0.2013986013986014, | |
| "grad_norm": 0.13470126319919157, | |
| "kl": 0.423828125, | |
| "learning_rate": 5.0349650349650356e-05, | |
| "loss": -0.0009, | |
| "step": 144 | |
| }, | |
| { | |
| "clip_ratio": 0.0018757216166704893, | |
| "completion_length": 95.73214721679688, | |
| "epoch": 0.20279720279720279, | |
| "grad_norm": 0.14930190515465938, | |
| "kl": 0.388671875, | |
| "learning_rate": 5.06993006993007e-05, | |
| "loss": -0.0026, | |
| "num_tokens": 1454705.0, | |
| "reward": 2.0560998916625977, | |
| "reward_std": 0.5541732311248779, | |
| "rewards/check_gptzero_func": 0.6964285969734192, | |
| "rewards/check_perplexity_diff_func": 0.5714285969734192, | |
| "rewards/check_winston_local_func": 0.788242518901825, | |
| "step": 145 | |
| }, | |
| { | |
| "clip_ratio": 0.003540371311828494, | |
| "epoch": 0.2041958041958042, | |
| "grad_norm": 0.1304251509197979, | |
| "kl": 0.41796875, | |
| "learning_rate": 5.1048951048951055e-05, | |
| "loss": -0.0046, | |
| "step": 146 | |
| }, | |
| { | |
| "clip_ratio": 0.0031265008728951216, | |
| "completion_length": 80.9464340209961, | |
| "epoch": 0.2055944055944056, | |
| "grad_norm": 0.2087929657840581, | |
| "kl": 1.1328125, | |
| "learning_rate": 5.1398601398601395e-05, | |
| "loss": 0.0026, | |
| "num_tokens": 1472182.0, | |
| "reward": 1.9076076745986938, | |
| "reward_std": 0.46633273363113403, | |
| "rewards/check_gptzero_func": 0.6428571343421936, | |
| "rewards/check_perplexity_diff_func": 0.5, | |
| "rewards/check_winston_local_func": 0.7647504210472107, | |
| "step": 147 | |
| }, | |
| { | |
| "clip_ratio": 0.0029583375435322523, | |
| "epoch": 0.206993006993007, | |
| "grad_norm": 0.178151823438605, | |
| "kl": 0.66015625, | |
| "learning_rate": 5.1748251748251755e-05, | |
| "loss": -0.0014, | |
| "step": 148 | |
| }, | |
| { | |
| "clip_ratio": 0.0020277961157262325, | |
| "completion_length": 126.58929443359375, | |
| "epoch": 0.2083916083916084, | |
| "grad_norm": 0.13736153026044381, | |
| "kl": 0.373046875, | |
| "learning_rate": 5.2097902097902094e-05, | |
| "loss": 0.0127, | |
| "num_tokens": 1494581.0, | |
| "reward": 1.870171308517456, | |
| "reward_std": 0.4412032663822174, | |
| "rewards/check_gptzero_func": 0.4464285671710968, | |
| "rewards/check_perplexity_diff_func": 0.8214285969734192, | |
| "rewards/check_winston_local_func": 0.6023141741752625, | |
| "step": 149 | |
| }, | |
| { | |
| "clip_ratio": 0.0022643795236945152, | |
| "epoch": 0.2097902097902098, | |
| "grad_norm": 0.12942677801199462, | |
| "kl": 0.40625, | |
| "learning_rate": 5.244755244755245e-05, | |
| "loss": 0.0106, | |
| "step": 150 | |
| }, | |
| { | |
| "clip_ratio": 0.001886948710307479, | |
| "completion_length": 95.14286041259766, | |
| "epoch": 0.2111888111888112, | |
| "grad_norm": 0.15555312890089615, | |
| "kl": 0.62890625, | |
| "learning_rate": 5.279720279720281e-05, | |
| "loss": 0.0013, | |
| "num_tokens": 1513403.0, | |
| "reward": 1.9735006093978882, | |
| "reward_std": 0.5218394994735718, | |
| "rewards/check_gptzero_func": 0.5535714030265808, | |
| "rewards/check_perplexity_diff_func": 0.6071428656578064, | |
| "rewards/check_winston_local_func": 0.8127861022949219, | |
| "step": 151 | |
| }, | |
| { | |
| "clip_ratio": 0.003476199461147189, | |
| "epoch": 0.2125874125874126, | |
| "grad_norm": 0.13833602394582134, | |
| "kl": 0.5390625, | |
| "learning_rate": 5.314685314685315e-05, | |
| "loss": -0.0015, | |
| "step": 152 | |
| }, | |
| { | |
| "clip_ratio": 0.0029247915372252464, | |
| "completion_length": 104.1964340209961, | |
| "epoch": 0.213986013986014, | |
| "grad_norm": 0.18332277821774023, | |
| "kl": 0.451171875, | |
| "learning_rate": 5.34965034965035e-05, | |
| "loss": -0.0047, | |
| "num_tokens": 1533154.0, | |
| "reward": 1.977178931236267, | |
| "reward_std": 0.5758928656578064, | |
| "rewards/check_gptzero_func": 0.6785714030265808, | |
| "rewards/check_perplexity_diff_func": 0.6785714030265808, | |
| "rewards/check_winston_local_func": 0.6200361251831055, | |
| "step": 153 | |
| }, | |
| { | |
| "clip_ratio": 0.002894646255299449, | |
| "epoch": 0.2153846153846154, | |
| "grad_norm": 0.12417162413759981, | |
| "kl": 0.48828125, | |
| "learning_rate": 5.384615384615385e-05, | |
| "loss": -0.0082, | |
| "step": 154 | |
| }, | |
| { | |
| "clip_ratio": 0.002533347113057971, | |
| "completion_length": 106.6964340209961, | |
| "epoch": 0.21678321678321677, | |
| "grad_norm": 0.2165608004438418, | |
| "kl": 0.4140625, | |
| "learning_rate": 5.41958041958042e-05, | |
| "loss": 0.0367, | |
| "num_tokens": 1554005.0, | |
| "reward": 2.254620313644409, | |
| "reward_std": 0.6860859394073486, | |
| "rewards/check_gptzero_func": 0.7857142686843872, | |
| "rewards/check_perplexity_diff_func": 0.7142857313156128, | |
| "rewards/check_winston_local_func": 0.7546200752258301, | |
| "step": 155 | |
| }, | |
| { | |
| "clip_ratio": 0.005156368017196655, | |
| "epoch": 0.21818181818181817, | |
| "grad_norm": 0.16185524517286934, | |
| "kl": 0.4609375, | |
| "learning_rate": 5.4545454545454546e-05, | |
| "loss": 0.0322, | |
| "step": 156 | |
| }, | |
| { | |
| "clip_ratio": 0.002192203886806965, | |
| "completion_length": 129.55357360839844, | |
| "epoch": 0.21958041958041957, | |
| "grad_norm": 0.13686573639431518, | |
| "kl": 0.421875, | |
| "learning_rate": 5.48951048951049e-05, | |
| "loss": 0.0095, | |
| "num_tokens": 1576988.0, | |
| "reward": 2.202296495437622, | |
| "reward_std": 0.4840867817401886, | |
| "rewards/check_gptzero_func": 0.6428571343421936, | |
| "rewards/check_perplexity_diff_func": 0.8928571343421936, | |
| "rewards/check_winston_local_func": 0.6665821075439453, | |
| "step": 157 | |
| }, | |
| { | |
| "clip_ratio": 0.0037054666317999363, | |
| "epoch": 0.22097902097902097, | |
| "grad_norm": 0.12040805378810834, | |
| "kl": 0.45703125, | |
| "learning_rate": 5.524475524475524e-05, | |
| "loss": 0.0077, | |
| "step": 158 | |
| }, | |
| { | |
| "clip_ratio": 0.002324200002476573, | |
| "completion_length": 141.10714721679688, | |
| "epoch": 0.22237762237762237, | |
| "grad_norm": 0.1722210877266639, | |
| "kl": 0.9765625, | |
| "learning_rate": 5.55944055944056e-05, | |
| "loss": -0.0049, | |
| "num_tokens": 1600826.0, | |
| "reward": 2.0245378017425537, | |
| "reward_std": 0.45877185463905334, | |
| "rewards/check_gptzero_func": 0.5892857313156128, | |
| "rewards/check_perplexity_diff_func": 0.75, | |
| "rewards/check_winston_local_func": 0.6852518916130066, | |
| "step": 159 | |
| }, | |
| { | |
| "clip_ratio": 0.003470373572781682, | |
| "epoch": 0.22377622377622378, | |
| "grad_norm": 0.13356363193434526, | |
| "kl": 0.65234375, | |
| "learning_rate": 5.594405594405595e-05, | |
| "loss": -0.0085, | |
| "step": 160 | |
| }, | |
| { | |
| "clip_ratio": 0.0026535126380622387, | |
| "completion_length": 109.76786041259766, | |
| "epoch": 0.22517482517482518, | |
| "grad_norm": 0.409404915148595, | |
| "kl": 0.515625, | |
| "learning_rate": 5.629370629370629e-05, | |
| "loss": -0.0051, | |
| "num_tokens": 1621103.0, | |
| "reward": 2.0040969848632812, | |
| "reward_std": 0.5942196249961853, | |
| "rewards/check_gptzero_func": 0.5178571343421936, | |
| "rewards/check_perplexity_diff_func": 0.7857142686843872, | |
| "rewards/check_winston_local_func": 0.7005256414413452, | |
| "step": 161 | |
| }, | |
| { | |
| "clip_ratio": 0.0058495416305959225, | |
| "epoch": 0.22657342657342658, | |
| "grad_norm": 0.16376328147020022, | |
| "kl": 0.5078125, | |
| "learning_rate": 5.664335664335665e-05, | |
| "loss": -0.0129, | |
| "step": 162 | |
| }, | |
| { | |
| "clip_ratio": 0.002924378262832761, | |
| "completion_length": 114.3214340209961, | |
| "epoch": 0.22797202797202798, | |
| "grad_norm": 0.17698045766822795, | |
| "kl": 0.62109375, | |
| "learning_rate": 5.699300699300699e-05, | |
| "loss": -0.0053, | |
| "num_tokens": 1642629.0, | |
| "reward": 2.0913121700286865, | |
| "reward_std": 0.533393144607544, | |
| "rewards/check_gptzero_func": 0.6785714030265808, | |
| "rewards/check_perplexity_diff_func": 0.6785714030265808, | |
| "rewards/check_winston_local_func": 0.7341693639755249, | |
| "step": 163 | |
| }, | |
| { | |
| "clip_ratio": 0.006533453240990639, | |
| "epoch": 0.22937062937062938, | |
| "grad_norm": 0.15566114152311913, | |
| "kl": 0.66796875, | |
| "learning_rate": 5.7342657342657345e-05, | |
| "loss": -0.0085, | |
| "step": 164 | |
| }, | |
| { | |
| "clip_ratio": 0.002033352619037032, | |
| "completion_length": 110.48214721679688, | |
| "epoch": 0.23076923076923078, | |
| "grad_norm": 0.19340316318251602, | |
| "kl": 0.4453125, | |
| "learning_rate": 5.769230769230769e-05, | |
| "loss": 0.0054, | |
| "num_tokens": 1663000.0, | |
| "reward": 1.809409499168396, | |
| "reward_std": 0.5209853053092957, | |
| "rewards/check_gptzero_func": 0.6428571343421936, | |
| "rewards/check_perplexity_diff_func": 0.3571428656578064, | |
| "rewards/check_winston_local_func": 0.809409499168396, | |
| "step": 165 | |
| }, | |
| { | |
| "clip_ratio": 0.005329853855073452, | |
| "epoch": 0.23216783216783216, | |
| "grad_norm": 0.15299329983895626, | |
| "kl": 0.55078125, | |
| "learning_rate": 5.8041958041958044e-05, | |
| "loss": 0.0018, | |
| "step": 166 | |
| }, | |
| { | |
| "clip_ratio": 0.0025733087677508593, | |
| "completion_length": 95.73214721679688, | |
| "epoch": 0.23356643356643356, | |
| "grad_norm": 0.16102285972903455, | |
| "kl": 0.458984375, | |
| "learning_rate": 5.83916083916084e-05, | |
| "loss": -0.0008, | |
| "num_tokens": 1682123.0, | |
| "reward": 1.9705681800842285, | |
| "reward_std": 0.5465096235275269, | |
| "rewards/check_gptzero_func": 0.6428571343421936, | |
| "rewards/check_perplexity_diff_func": 0.6428571343421936, | |
| "rewards/check_winston_local_func": 0.6848538517951965, | |
| "step": 167 | |
| }, | |
| { | |
| "clip_ratio": 0.0034487086813896894, | |
| "epoch": 0.23496503496503496, | |
| "grad_norm": 0.12896214559568192, | |
| "kl": 0.474609375, | |
| "learning_rate": 5.8741258741258744e-05, | |
| "loss": -0.0039, | |
| "step": 168 | |
| }, | |
| { | |
| "clip_ratio": 0.0012721805833280087, | |
| "completion_length": 72.125, | |
| "epoch": 0.23636363636363636, | |
| "grad_norm": 0.19976827264055144, | |
| "kl": 0.5703125, | |
| "learning_rate": 5.90909090909091e-05, | |
| "loss": -0.0046, | |
| "num_tokens": 1698078.0, | |
| "reward": 1.8698246479034424, | |
| "reward_std": 0.4430284798145294, | |
| "rewards/check_gptzero_func": 0.7142857313156128, | |
| "rewards/check_perplexity_diff_func": 0.3571428656578064, | |
| "rewards/check_winston_local_func": 0.7983959913253784, | |
| "step": 169 | |
| }, | |
| { | |
| "clip_ratio": 0.005269515328109264, | |
| "epoch": 0.23776223776223776, | |
| "grad_norm": 0.14855282442633314, | |
| "kl": 0.61328125, | |
| "learning_rate": 5.944055944055944e-05, | |
| "loss": -0.0079, | |
| "step": 170 | |
| }, | |
| { | |
| "clip_ratio": 0.0027731256559491158, | |
| "completion_length": 117.83929443359375, | |
| "epoch": 0.23916083916083916, | |
| "grad_norm": 0.23058249563979205, | |
| "kl": 0.412109375, | |
| "learning_rate": 5.9790209790209796e-05, | |
| "loss": 0.0055, | |
| "num_tokens": 1719673.0, | |
| "reward": 2.2663450241088867, | |
| "reward_std": 0.46630793809890747, | |
| "rewards/check_gptzero_func": 0.6785714030265808, | |
| "rewards/check_perplexity_diff_func": 0.7142857313156128, | |
| "rewards/check_winston_local_func": 0.8734878301620483, | |
| "step": 171 | |
| }, | |
| { | |
| "clip_ratio": 0.0034511894918978214, | |
| "epoch": 0.24055944055944056, | |
| "grad_norm": 0.17368207545699044, | |
| "kl": 0.373046875, | |
| "learning_rate": 6.0139860139860136e-05, | |
| "loss": -0.0007, | |
| "step": 172 | |
| }, | |
| { | |
| "clip_ratio": 0.0032431341242045164, | |
| "completion_length": 126.85714721679688, | |
| "epoch": 0.24195804195804196, | |
| "grad_norm": 0.17995690811350934, | |
| "kl": 0.353515625, | |
| "learning_rate": 6.048951048951049e-05, | |
| "loss": -0.0011, | |
| "num_tokens": 1742569.0, | |
| "reward": 2.258990526199341, | |
| "reward_std": 0.6008436679840088, | |
| "rewards/check_gptzero_func": 0.75, | |
| "rewards/check_perplexity_diff_func": 0.75, | |
| "rewards/check_winston_local_func": 0.7589904069900513, | |
| "step": 173 | |
| }, | |
| { | |
| "clip_ratio": 0.006950656417757273, | |
| "epoch": 0.24335664335664337, | |
| "grad_norm": 0.13892848528506246, | |
| "kl": 0.359375, | |
| "learning_rate": 6.083916083916085e-05, | |
| "loss": -0.0033, | |
| "step": 174 | |
| }, | |
| { | |
| "clip_ratio": 0.0027662119828164577, | |
| "completion_length": 130.8928680419922, | |
| "epoch": 0.24475524475524477, | |
| "grad_norm": 0.10040261340554028, | |
| "kl": 0.345703125, | |
| "learning_rate": 6.118881118881119e-05, | |
| "loss": -0.0032, | |
| "num_tokens": 1765643.0, | |
| "reward": 2.074061632156372, | |
| "reward_std": 0.3745954632759094, | |
| "rewards/check_gptzero_func": 0.6607142686843872, | |
| "rewards/check_perplexity_diff_func": 0.6785714030265808, | |
| "rewards/check_winston_local_func": 0.7347758412361145, | |
| "step": 175 | |
| }, | |
| { | |
| "clip_ratio": 0.0030742601957172155, | |
| "epoch": 0.24615384615384617, | |
| "grad_norm": 0.0921134439221548, | |
| "kl": 0.3359375, | |
| "learning_rate": 6.153846153846155e-05, | |
| "loss": -0.0044, | |
| "step": 176 | |
| }, | |
| { | |
| "clip_ratio": 0.002027927665039897, | |
| "completion_length": 89.73214721679688, | |
| "epoch": 0.24755244755244754, | |
| "grad_norm": 0.1709976566266231, | |
| "kl": 0.50390625, | |
| "learning_rate": 6.188811188811188e-05, | |
| "loss": 0.0051, | |
| "num_tokens": 1783674.0, | |
| "reward": 1.7810758352279663, | |
| "reward_std": 0.4943030774593353, | |
| "rewards/check_gptzero_func": 0.6428571343421936, | |
| "rewards/check_perplexity_diff_func": 0.4285714328289032, | |
| "rewards/check_winston_local_func": 0.7096471786499023, | |
| "step": 177 | |
| }, | |
| { | |
| "clip_ratio": 0.0020612890366464853, | |
| "epoch": 0.24895104895104894, | |
| "grad_norm": 0.1477275348028994, | |
| "kl": 0.50390625, | |
| "learning_rate": 6.223776223776224e-05, | |
| "loss": 0.0011, | |
| "step": 178 | |
| }, | |
| { | |
| "clip_ratio": 0.0021104796323925257, | |
| "completion_length": 132.10714721679688, | |
| "epoch": 0.25034965034965034, | |
| "grad_norm": 0.08842848405697774, | |
| "kl": 0.39453125, | |
| "learning_rate": 6.258741258741259e-05, | |
| "loss": 0.0005, | |
| "num_tokens": 1806754.0, | |
| "reward": 2.107767105102539, | |
| "reward_std": 0.269815057516098, | |
| "rewards/check_gptzero_func": 0.75, | |
| "rewards/check_perplexity_diff_func": 0.5357142686843872, | |
| "rewards/check_winston_local_func": 0.8220529556274414, | |
| "step": 179 | |
| }, | |
| { | |
| "clip_ratio": 0.004407648928463459, | |
| "epoch": 0.2517482517482518, | |
| "grad_norm": 0.07542595290297384, | |
| "kl": 0.416015625, | |
| "learning_rate": 6.293706293706293e-05, | |
| "loss": -0.0006, | |
| "step": 180 | |
| }, | |
| { | |
| "clip_ratio": 0.0018615310546010733, | |
| "completion_length": 89.30357360839844, | |
| "epoch": 0.25314685314685315, | |
| "grad_norm": 0.2895549842864223, | |
| "kl": 0.498046875, | |
| "learning_rate": 6.32867132867133e-05, | |
| "loss": -0.0082, | |
| "num_tokens": 1824795.0, | |
| "reward": 1.8801069259643555, | |
| "reward_std": 0.5523244738578796, | |
| "rewards/check_gptzero_func": 0.6071428656578064, | |
| "rewards/check_perplexity_diff_func": 0.5, | |
| "rewards/check_winston_local_func": 0.7729640603065491, | |
| "step": 181 | |
| }, | |
| { | |
| "clip_ratio": 0.007555535528808832, | |
| "epoch": 0.2545454545454545, | |
| "grad_norm": 0.20874610098022578, | |
| "kl": 0.59375, | |
| "learning_rate": 6.363636363636364e-05, | |
| "loss": -0.016, | |
| "step": 182 | |
| }, | |
| { | |
| "clip_ratio": 0.0016618422232568264, | |
| "completion_length": 84.46428680419922, | |
| "epoch": 0.25594405594405595, | |
| "grad_norm": 0.20653753743630154, | |
| "kl": 0.5625, | |
| "learning_rate": 6.398601398601399e-05, | |
| "loss": 0.0058, | |
| "num_tokens": 1842573.0, | |
| "reward": 2.1319751739501953, | |
| "reward_std": 0.5030335187911987, | |
| "rewards/check_gptzero_func": 0.7321428656578064, | |
| "rewards/check_perplexity_diff_func": 0.5714285969734192, | |
| "rewards/check_winston_local_func": 0.8284037709236145, | |
| "step": 183 | |
| }, | |
| { | |
| "clip_ratio": 0.00579726742580533, | |
| "epoch": 0.2573426573426573, | |
| "grad_norm": 0.15782001907156346, | |
| "kl": 0.578125, | |
| "learning_rate": 6.433566433566433e-05, | |
| "loss": 0.0001, | |
| "step": 184 | |
| }, | |
| { | |
| "clip_ratio": 0.0016176491044461727, | |
| "completion_length": 125.39286041259766, | |
| "epoch": 0.25874125874125875, | |
| "grad_norm": 0.12385736188741701, | |
| "kl": 0.4921875, | |
| "learning_rate": 6.46853146853147e-05, | |
| "loss": -0.0027, | |
| "num_tokens": 1864297.0, | |
| "reward": 1.7732529640197754, | |
| "reward_std": 0.3774341642856598, | |
| "rewards/check_gptzero_func": 0.625, | |
| "rewards/check_perplexity_diff_func": 0.4285714328289032, | |
| "rewards/check_winston_local_func": 0.7196813821792603, | |
| "step": 185 | |
| }, | |
| { | |
| "clip_ratio": 0.00623862212523818, | |
| "epoch": 0.2601398601398601, | |
| "grad_norm": 0.10877531810926387, | |
| "kl": 0.53125, | |
| "learning_rate": 6.503496503496504e-05, | |
| "loss": -0.0048, | |
| "step": 186 | |
| }, | |
| { | |
| "clip_ratio": 0.0031279984395951033, | |
| "completion_length": 98.00000762939453, | |
| "epoch": 0.26153846153846155, | |
| "grad_norm": 0.3661429387824046, | |
| "kl": 0.6328125, | |
| "learning_rate": 6.538461538461539e-05, | |
| "loss": -0.0063, | |
| "num_tokens": 1882931.0, | |
| "reward": 2.0694050788879395, | |
| "reward_std": 0.6090536117553711, | |
| "rewards/check_gptzero_func": 0.6785714030265808, | |
| "rewards/check_perplexity_diff_func": 0.5714285969734192, | |
| "rewards/check_winston_local_func": 0.819405198097229, | |
| "step": 187 | |
| }, | |
| { | |
| "clip_ratio": 0.010017934255301952, | |
| "epoch": 0.2629370629370629, | |
| "grad_norm": 0.34977841113727764, | |
| "kl": 0.65234375, | |
| "learning_rate": 6.573426573426573e-05, | |
| "loss": -0.0185, | |
| "step": 188 | |
| }, | |
| { | |
| "clip_ratio": 0.00222155568189919, | |
| "completion_length": 133.44644165039062, | |
| "epoch": 0.26433566433566436, | |
| "grad_norm": 0.18715243199492715, | |
| "kl": 0.46484375, | |
| "learning_rate": 6.608391608391609e-05, | |
| "loss": 0.0062, | |
| "num_tokens": 1906084.0, | |
| "reward": 2.364468812942505, | |
| "reward_std": 0.5593475699424744, | |
| "rewards/check_gptzero_func": 0.6964285969734192, | |
| "rewards/check_perplexity_diff_func": 0.8571428656578064, | |
| "rewards/check_winston_local_func": 0.8108974099159241, | |
| "step": 189 | |
| }, | |
| { | |
| "clip_ratio": 0.008103223517537117, | |
| "epoch": 0.26573426573426573, | |
| "grad_norm": 0.14058389303467, | |
| "kl": 0.45703125, | |
| "learning_rate": 6.643356643356644e-05, | |
| "loss": 0.0025, | |
| "step": 190 | |
| }, | |
| { | |
| "clip_ratio": 0.0019668787717819214, | |
| "completion_length": 142.07144165039062, | |
| "epoch": 0.26713286713286716, | |
| "grad_norm": 0.14399812178023874, | |
| "kl": 0.43359375, | |
| "learning_rate": 6.678321678321679e-05, | |
| "loss": -0.0031, | |
| "num_tokens": 1930224.0, | |
| "reward": 2.1098926067352295, | |
| "reward_std": 0.5390018820762634, | |
| "rewards/check_gptzero_func": 0.8214285969734192, | |
| "rewards/check_perplexity_diff_func": 0.4642857015132904, | |
| "rewards/check_winston_local_func": 0.8241782784461975, | |
| "step": 191 | |
| }, | |
| { | |
| "clip_ratio": 0.005082913674414158, | |
| "epoch": 0.26853146853146853, | |
| "grad_norm": 0.12024703070308183, | |
| "kl": 0.447265625, | |
| "learning_rate": 6.713286713286715e-05, | |
| "loss": -0.0055, | |
| "step": 192 | |
| }, | |
| { | |
| "clip_ratio": 0.0032251765951514244, | |
| "completion_length": 84.98214721679688, | |
| "epoch": 0.2699300699300699, | |
| "grad_norm": 0.32527529059153654, | |
| "kl": 0.6640625, | |
| "learning_rate": 6.748251748251748e-05, | |
| "loss": 0.0186, | |
| "num_tokens": 1947399.0, | |
| "reward": 1.9443087577819824, | |
| "reward_std": 0.6417390704154968, | |
| "rewards/check_gptzero_func": 0.7678571343421936, | |
| "rewards/check_perplexity_diff_func": 0.2857142984867096, | |
| "rewards/check_winston_local_func": 0.8907372355461121, | |
| "step": 193 | |
| }, | |
| { | |
| "clip_ratio": 0.008085422217845917, | |
| "epoch": 0.27132867132867133, | |
| "grad_norm": 0.21806984416977268, | |
| "kl": 0.69140625, | |
| "learning_rate": 6.783216783216784e-05, | |
| "loss": 0.0068, | |
| "step": 194 | |
| }, | |
| { | |
| "clip_ratio": 0.0025381618179380894, | |
| "completion_length": 91.66072082519531, | |
| "epoch": 0.2727272727272727, | |
| "grad_norm": 0.1831840025015104, | |
| "kl": 0.53125, | |
| "learning_rate": 6.818181818181818e-05, | |
| "loss": 0.0014, | |
| "num_tokens": 1965402.0, | |
| "reward": 2.3979477882385254, | |
| "reward_std": 0.45264866948127747, | |
| "rewards/check_gptzero_func": 0.7678571343421936, | |
| "rewards/check_perplexity_diff_func": 0.7857142686843872, | |
| "rewards/check_winston_local_func": 0.8443759083747864, | |
| "step": 195 | |
| }, | |
| { | |
| "clip_ratio": 0.005082620773464441, | |
| "epoch": 0.27412587412587414, | |
| "grad_norm": 0.14399845631168656, | |
| "kl": 0.515625, | |
| "learning_rate": 6.853146853146853e-05, | |
| "loss": -0.0023, | |
| "step": 196 | |
| }, | |
| { | |
| "clip_ratio": 0.0025359569117426872, | |
| "completion_length": 118.80357360839844, | |
| "epoch": 0.2755244755244755, | |
| "grad_norm": 0.1617404225919446, | |
| "kl": 0.5625, | |
| "learning_rate": 6.888111888111889e-05, | |
| "loss": 0.0146, | |
| "num_tokens": 1986731.0, | |
| "reward": 2.078895330429077, | |
| "reward_std": 0.31289052963256836, | |
| "rewards/check_gptzero_func": 0.8035714030265808, | |
| "rewards/check_perplexity_diff_func": 0.3928571343421936, | |
| "rewards/check_winston_local_func": 0.882466733455658, | |
| "step": 197 | |
| }, | |
| { | |
| "clip_ratio": 0.0062335156835615635, | |
| "epoch": 0.27692307692307694, | |
| "grad_norm": 0.12006362566688389, | |
| "kl": 0.69921875, | |
| "learning_rate": 6.923076923076924e-05, | |
| "loss": 0.0125, | |
| "step": 198 | |
| }, | |
| { | |
| "clip_ratio": 0.0023628328926861286, | |
| "completion_length": 113.76786041259766, | |
| "epoch": 0.2783216783216783, | |
| "grad_norm": 0.19235514876641358, | |
| "kl": 0.58984375, | |
| "learning_rate": 6.958041958041958e-05, | |
| "loss": -0.0069, | |
| "num_tokens": 2007822.0, | |
| "reward": 2.0453529357910156, | |
| "reward_std": 0.576248288154602, | |
| "rewards/check_gptzero_func": 0.7321428656578064, | |
| "rewards/check_perplexity_diff_func": 0.5714285969734192, | |
| "rewards/check_winston_local_func": 0.7417814135551453, | |
| "step": 199 | |
| }, | |
| { | |
| "clip_ratio": 0.0030245708767324686, | |
| "epoch": 0.27972027972027974, | |
| "grad_norm": 0.15320753759426498, | |
| "kl": 0.578125, | |
| "learning_rate": 6.993006993006993e-05, | |
| "loss": -0.0115, | |
| "step": 200 | |
| }, | |
| { | |
| "clip_ratio": 0.0028001146856695414, | |
| "completion_length": 121.51786041259766, | |
| "epoch": 0.2811188811188811, | |
| "grad_norm": 0.2770839119590159, | |
| "kl": 0.5859375, | |
| "learning_rate": 7.027972027972029e-05, | |
| "loss": -0.0321, | |
| "num_tokens": 2030153.0, | |
| "reward": 2.223505735397339, | |
| "reward_std": 0.580276370048523, | |
| "rewards/check_gptzero_func": 0.8392857313156128, | |
| "rewards/check_perplexity_diff_func": 0.5, | |
| "rewards/check_winston_local_func": 0.8842198252677917, | |
| "step": 201 | |
| }, | |
| { | |
| "clip_ratio": 0.004435424692928791, | |
| "epoch": 0.28251748251748254, | |
| "grad_norm": 0.1752123363810394, | |
| "kl": 0.61328125, | |
| "learning_rate": 7.062937062937062e-05, | |
| "loss": -0.0398, | |
| "step": 202 | |
| }, | |
| { | |
| "clip_ratio": 0.003775001736357808, | |
| "completion_length": 132.96429443359375, | |
| "epoch": 0.2839160839160839, | |
| "grad_norm": 0.20528144857768493, | |
| "kl": 0.5390625, | |
| "learning_rate": 7.097902097902098e-05, | |
| "loss": -0.0054, | |
| "num_tokens": 2053611.0, | |
| "reward": 2.4398131370544434, | |
| "reward_std": 0.6120996475219727, | |
| "rewards/check_gptzero_func": 0.8214285969734192, | |
| "rewards/check_perplexity_diff_func": 0.8571428656578064, | |
| "rewards/check_winston_local_func": 0.761241614818573, | |
| "step": 203 | |
| }, | |
| { | |
| "clip_ratio": 0.0037133130244910717, | |
| "epoch": 0.2853146853146853, | |
| "grad_norm": 0.14916355666745199, | |
| "kl": 0.55859375, | |
| "learning_rate": 7.132867132867134e-05, | |
| "loss": -0.0112, | |
| "step": 204 | |
| }, | |
| { | |
| "clip_ratio": 0.002573356730863452, | |
| "completion_length": 109.51786041259766, | |
| "epoch": 0.2867132867132867, | |
| "grad_norm": 0.15672915743695726, | |
| "kl": 1.4921875, | |
| "learning_rate": 7.167832167832168e-05, | |
| "loss": 0.0032, | |
| "num_tokens": 2073846.0, | |
| "reward": 2.221467971801758, | |
| "reward_std": 0.492183119058609, | |
| "rewards/check_gptzero_func": 0.6964285969734192, | |
| "rewards/check_perplexity_diff_func": 0.6428571343421936, | |
| "rewards/check_winston_local_func": 0.8821821212768555, | |
| "step": 205 | |
| }, | |
| { | |
| "clip_ratio": 0.0036338225472718477, | |
| "epoch": 0.2881118881118881, | |
| "grad_norm": 0.27785877634425765, | |
| "kl": 0.74609375, | |
| "learning_rate": 7.202797202797204e-05, | |
| "loss": 0.0013, | |
| "step": 206 | |
| }, | |
| { | |
| "clip_ratio": 0.004580673761665821, | |
| "completion_length": 95.05357360839844, | |
| "epoch": 0.2895104895104895, | |
| "grad_norm": 0.1746155777114636, | |
| "kl": 0.671875, | |
| "learning_rate": 7.237762237762238e-05, | |
| "loss": -0.0068, | |
| "num_tokens": 2092355.0, | |
| "reward": 2.0429205894470215, | |
| "reward_std": 0.36572587490081787, | |
| "rewards/check_gptzero_func": 0.6964285969734192, | |
| "rewards/check_perplexity_diff_func": 0.4642857015132904, | |
| "rewards/check_winston_local_func": 0.8822061419487, | |
| "step": 207 | |
| }, | |
| { | |
| "clip_ratio": 0.007289381232112646, | |
| "epoch": 0.2909090909090909, | |
| "grad_norm": 0.12704093759252294, | |
| "kl": 0.75390625, | |
| "learning_rate": 7.272727272727273e-05, | |
| "loss": -0.0111, | |
| "step": 208 | |
| }, | |
| { | |
| "clip_ratio": 0.0038105440326035023, | |
| "completion_length": 113.4464340209961, | |
| "epoch": 0.2923076923076923, | |
| "grad_norm": 0.20958397036994925, | |
| "kl": 0.6015625, | |
| "learning_rate": 7.307692307692307e-05, | |
| "loss": 0.0082, | |
| "num_tokens": 2113542.0, | |
| "reward": 2.162785530090332, | |
| "reward_std": 0.49807849526405334, | |
| "rewards/check_gptzero_func": 0.75, | |
| "rewards/check_perplexity_diff_func": 0.5714285969734192, | |
| "rewards/check_winston_local_func": 0.8413568139076233, | |
| "step": 209 | |
| }, | |
| { | |
| "clip_ratio": 0.007606918923556805, | |
| "epoch": 0.2937062937062937, | |
| "grad_norm": 0.1766466418371376, | |
| "kl": 0.63671875, | |
| "learning_rate": 7.342657342657343e-05, | |
| "loss": 0.0025, | |
| "step": 210 | |
| }, | |
| { | |
| "clip_ratio": 0.0019986790139228106, | |
| "completion_length": 92.73214721679688, | |
| "epoch": 0.2951048951048951, | |
| "grad_norm": 0.2716166901527731, | |
| "kl": 0.6171875, | |
| "learning_rate": 7.377622377622378e-05, | |
| "loss": -0.0005, | |
| "num_tokens": 2131873.0, | |
| "reward": 2.099912405014038, | |
| "reward_std": 0.5661742687225342, | |
| "rewards/check_gptzero_func": 0.8214285969734192, | |
| "rewards/check_perplexity_diff_func": 0.4285714328289032, | |
| "rewards/check_winston_local_func": 0.8499122858047485, | |
| "step": 211 | |
| }, | |
| { | |
| "clip_ratio": 0.005917501635849476, | |
| "epoch": 0.2965034965034965, | |
| "grad_norm": 0.19270405158731238, | |
| "kl": 0.62890625, | |
| "learning_rate": 7.412587412587413e-05, | |
| "loss": -0.0079, | |
| "step": 212 | |
| }, | |
| { | |
| "clip_ratio": 0.002530448604375124, | |
| "completion_length": 96.51786041259766, | |
| "epoch": 0.29790209790209793, | |
| "grad_norm": 0.22723256705117592, | |
| "kl": 0.69921875, | |
| "learning_rate": 7.447552447552449e-05, | |
| "loss": -0.009, | |
| "num_tokens": 2150462.0, | |
| "reward": 2.1355700492858887, | |
| "reward_std": 0.3738899827003479, | |
| "rewards/check_gptzero_func": 0.8571428656578064, | |
| "rewards/check_perplexity_diff_func": 0.3571428656578064, | |
| "rewards/check_winston_local_func": 0.9212842583656311, | |
| "step": 213 | |
| }, | |
| { | |
| "clip_ratio": 0.011397747322916985, | |
| "epoch": 0.2993006993006993, | |
| "grad_norm": 0.18189402035580596, | |
| "kl": 0.7265625, | |
| "learning_rate": 7.482517482517482e-05, | |
| "loss": -0.0139, | |
| "step": 214 | |
| }, | |
| { | |
| "clip_ratio": 0.004513947293162346, | |
| "completion_length": 94.66072082519531, | |
| "epoch": 0.3006993006993007, | |
| "grad_norm": 0.2675295151019955, | |
| "kl": 0.67578125, | |
| "learning_rate": 7.517482517482518e-05, | |
| "loss": 0.0014, | |
| "num_tokens": 2169243.0, | |
| "reward": 2.2817916870117188, | |
| "reward_std": 0.4118366539478302, | |
| "rewards/check_gptzero_func": 0.8571428656578064, | |
| "rewards/check_perplexity_diff_func": 0.4642857015132904, | |
| "rewards/check_winston_local_func": 0.9603630900382996, | |
| "step": 215 | |
| }, | |
| { | |
| "clip_ratio": 0.010964194312691689, | |
| "epoch": 0.3020979020979021, | |
| "grad_norm": 0.17031623307626814, | |
| "kl": 0.875, | |
| "learning_rate": 7.552447552447553e-05, | |
| "loss": -0.0057, | |
| "step": 216 | |
| }, | |
| { | |
| "clip_ratio": 0.003371638245880604, | |
| "completion_length": 118.17857360839844, | |
| "epoch": 0.3034965034965035, | |
| "grad_norm": 0.1847364030092435, | |
| "kl": 0.60546875, | |
| "learning_rate": 7.587412587412587e-05, | |
| "loss": -0.0022, | |
| "num_tokens": 2190571.0, | |
| "reward": 2.1225900650024414, | |
| "reward_std": 0.5403507947921753, | |
| "rewards/check_gptzero_func": 0.75, | |
| "rewards/check_perplexity_diff_func": 0.4642857015132904, | |
| "rewards/check_winston_local_func": 0.9083043336868286, | |
| "step": 217 | |
| }, | |
| { | |
| "clip_ratio": 0.01002263929694891, | |
| "epoch": 0.3048951048951049, | |
| "grad_norm": 0.1449002041728082, | |
| "kl": 0.61328125, | |
| "learning_rate": 7.622377622377622e-05, | |
| "loss": -0.0067, | |
| "step": 218 | |
| }, | |
| { | |
| "clip_ratio": 0.0013253266224637628, | |
| "completion_length": 129.33929443359375, | |
| "epoch": 0.3062937062937063, | |
| "grad_norm": 0.1504069021149342, | |
| "kl": 0.578125, | |
| "learning_rate": 7.657342657342658e-05, | |
| "loss": -0.0075, | |
| "num_tokens": 2213408.0, | |
| "reward": 2.1974618434906006, | |
| "reward_std": 0.4602964520454407, | |
| "rewards/check_gptzero_func": 0.7857142686843872, | |
| "rewards/check_perplexity_diff_func": 0.5, | |
| "rewards/check_winston_local_func": 0.911747395992279, | |
| "step": 219 | |
| }, | |
| { | |
| "clip_ratio": 0.006876418832689524, | |
| "epoch": 0.3076923076923077, | |
| "grad_norm": 0.11992971275284438, | |
| "kl": 0.57421875, | |
| "learning_rate": 7.692307692307693e-05, | |
| "loss": -0.0101, | |
| "step": 220 | |
| }, | |
| { | |
| "clip_ratio": 0.0024474726524204016, | |
| "completion_length": 89.67857360839844, | |
| "epoch": 0.3090909090909091, | |
| "grad_norm": 0.46915411380362765, | |
| "kl": 0.73046875, | |
| "learning_rate": 7.727272727272727e-05, | |
| "loss": 0.0333, | |
| "num_tokens": 2231248.0, | |
| "reward": 2.161350727081299, | |
| "reward_std": 0.47136664390563965, | |
| "rewards/check_gptzero_func": 0.8214285969734192, | |
| "rewards/check_perplexity_diff_func": 0.4642857015132904, | |
| "rewards/check_winston_local_func": 0.8756363987922668, | |
| "step": 221 | |
| }, | |
| { | |
| "clip_ratio": 0.02464432455599308, | |
| "epoch": 0.3104895104895105, | |
| "grad_norm": 0.30204889343007607, | |
| "kl": 0.8046875, | |
| "learning_rate": 7.762237762237763e-05, | |
| "loss": 0.0211, | |
| "step": 222 | |
| }, | |
| { | |
| "clip_ratio": 0.0017527465242892504, | |
| "completion_length": 134.17857360839844, | |
| "epoch": 0.3118881118881119, | |
| "grad_norm": 0.18948229022832336, | |
| "kl": 0.6328125, | |
| "learning_rate": 7.797202797202798e-05, | |
| "loss": -0.0049, | |
| "num_tokens": 2255162.0, | |
| "reward": 2.4446465969085693, | |
| "reward_std": 0.5171672105789185, | |
| "rewards/check_gptzero_func": 0.7678571343421936, | |
| "rewards/check_perplexity_diff_func": 0.7857142686843872, | |
| "rewards/check_winston_local_func": 0.8910752534866333, | |
| "step": 223 | |
| }, | |
| { | |
| "clip_ratio": 0.006593752186745405, | |
| "epoch": 0.3132867132867133, | |
| "grad_norm": 0.15013602627394823, | |
| "kl": 0.640625, | |
| "learning_rate": 7.832167832167832e-05, | |
| "loss": -0.0097, | |
| "step": 224 | |
| }, | |
| { | |
| "clip_ratio": 0.0019926591776311398, | |
| "completion_length": 127.00000762939453, | |
| "epoch": 0.3146853146853147, | |
| "grad_norm": 0.22875481051435598, | |
| "kl": 0.6875, | |
| "learning_rate": 7.867132867132867e-05, | |
| "loss": 0.0021, | |
| "num_tokens": 2277320.0, | |
| "reward": 2.2040371894836426, | |
| "reward_std": 0.5528277158737183, | |
| "rewards/check_gptzero_func": 0.7321428656578064, | |
| "rewards/check_perplexity_diff_func": 0.5357142686843872, | |
| "rewards/check_winston_local_func": 0.936180055141449, | |
| "step": 225 | |
| }, | |
| { | |
| "clip_ratio": 0.005839359946548939, | |
| "epoch": 0.31608391608391606, | |
| "grad_norm": 0.17829200080123941, | |
| "kl": 0.68359375, | |
| "learning_rate": 7.902097902097903e-05, | |
| "loss": -0.0053, | |
| "step": 226 | |
| }, | |
| { | |
| "clip_ratio": 0.0026252593379467726, | |
| "completion_length": 130.0357208251953, | |
| "epoch": 0.3174825174825175, | |
| "grad_norm": 0.16076101957107508, | |
| "kl": 0.66015625, | |
| "learning_rate": 7.937062937062938e-05, | |
| "loss": -0.0033, | |
| "num_tokens": 2299818.0, | |
| "reward": 2.3278117179870605, | |
| "reward_std": 0.48680880665779114, | |
| "rewards/check_gptzero_func": 0.8035714030265808, | |
| "rewards/check_perplexity_diff_func": 0.6428571343421936, | |
| "rewards/check_winston_local_func": 0.8813830614089966, | |
| "step": 227 | |
| }, | |
| { | |
| "clip_ratio": 0.004954828415066004, | |
| "epoch": 0.31888111888111886, | |
| "grad_norm": 0.12734264227867195, | |
| "kl": 0.6875, | |
| "learning_rate": 7.972027972027972e-05, | |
| "loss": -0.0071, | |
| "step": 228 | |
| }, | |
| { | |
| "clip_ratio": 0.0034341278951615095, | |
| "completion_length": 133.7857208251953, | |
| "epoch": 0.3202797202797203, | |
| "grad_norm": 0.24065024754984402, | |
| "kl": 0.6875, | |
| "learning_rate": 8.006993006993007e-05, | |
| "loss": -0.0052, | |
| "num_tokens": 2322948.0, | |
| "reward": 2.5632615089416504, | |
| "reward_std": 0.6206622123718262, | |
| "rewards/check_gptzero_func": 0.9642857313156128, | |
| "rewards/check_perplexity_diff_func": 0.6785714030265808, | |
| "rewards/check_winston_local_func": 0.9204041361808777, | |
| "step": 229 | |
| }, | |
| { | |
| "clip_ratio": 0.006521092262119055, | |
| "epoch": 0.32167832167832167, | |
| "grad_norm": 0.18602361781154253, | |
| "kl": 0.6796875, | |
| "learning_rate": 8.041958041958042e-05, | |
| "loss": -0.013, | |
| "step": 230 | |
| }, | |
| { | |
| "clip_ratio": 0.004207184072583914, | |
| "completion_length": 116.53572082519531, | |
| "epoch": 0.3230769230769231, | |
| "grad_norm": 0.36704569865090053, | |
| "kl": 0.69921875, | |
| "learning_rate": 8.076923076923078e-05, | |
| "loss": 0.0055, | |
| "num_tokens": 2344106.0, | |
| "reward": 2.4917171001434326, | |
| "reward_std": 0.6073458790779114, | |
| "rewards/check_gptzero_func": 0.8392857313156128, | |
| "rewards/check_perplexity_diff_func": 0.75, | |
| "rewards/check_winston_local_func": 0.902431309223175, | |
| "step": 231 | |
| }, | |
| { | |
| "clip_ratio": 0.013317321427166462, | |
| "epoch": 0.32447552447552447, | |
| "grad_norm": 0.13844634287775134, | |
| "kl": 0.73046875, | |
| "learning_rate": 8.111888111888112e-05, | |
| "loss": -0.0039, | |
| "step": 232 | |
| }, | |
| { | |
| "clip_ratio": 0.004024635534733534, | |
| "completion_length": 134.92857360839844, | |
| "epoch": 0.3258741258741259, | |
| "grad_norm": 0.34168858248313216, | |
| "kl": 0.64453125, | |
| "learning_rate": 8.146853146853147e-05, | |
| "loss": -0.0057, | |
| "num_tokens": 2367724.0, | |
| "reward": 2.20210599899292, | |
| "reward_std": 0.5888614654541016, | |
| "rewards/check_gptzero_func": 0.7321428656578064, | |
| "rewards/check_perplexity_diff_func": 0.6071428656578064, | |
| "rewards/check_winston_local_func": 0.8628200888633728, | |
| "step": 233 | |
| }, | |
| { | |
| "clip_ratio": 0.009708845987915993, | |
| "epoch": 0.32727272727272727, | |
| "grad_norm": 0.19135959991877635, | |
| "kl": 0.67578125, | |
| "learning_rate": 8.181818181818183e-05, | |
| "loss": -0.0146, | |
| "step": 234 | |
| }, | |
| { | |
| "clip_ratio": 0.005571329966187477, | |
| "completion_length": 98.75000762939453, | |
| "epoch": 0.32867132867132864, | |
| "grad_norm": 0.32328761096657005, | |
| "kl": 0.77734375, | |
| "learning_rate": 8.216783216783218e-05, | |
| "loss": 0.0128, | |
| "num_tokens": 2386636.0, | |
| "reward": 2.2358410358428955, | |
| "reward_std": 0.599101185798645, | |
| "rewards/check_gptzero_func": 0.8214285969734192, | |
| "rewards/check_perplexity_diff_func": 0.5357142686843872, | |
| "rewards/check_winston_local_func": 0.8786982297897339, | |
| "step": 235 | |
| }, | |
| { | |
| "clip_ratio": 0.009436835534870625, | |
| "epoch": 0.3300699300699301, | |
| "grad_norm": 0.19557085227388898, | |
| "kl": 0.8515625, | |
| "learning_rate": 8.251748251748252e-05, | |
| "loss": 0.0022, | |
| "step": 236 | |
| }, | |
| { | |
| "clip_ratio": 0.006164635997265577, | |
| "completion_length": 79.75, | |
| "epoch": 0.33146853146853145, | |
| "grad_norm": 0.3250433111655094, | |
| "kl": 0.98828125, | |
| "learning_rate": 8.286713286713287e-05, | |
| "loss": -0.0126, | |
| "num_tokens": 2403400.0, | |
| "reward": 2.1074206829071045, | |
| "reward_std": 0.41642776131629944, | |
| "rewards/check_gptzero_func": 0.9107142686843872, | |
| "rewards/check_perplexity_diff_func": 0.25, | |
| "rewards/check_winston_local_func": 0.946706235408783, | |
| "step": 237 | |
| }, | |
| { | |
| "clip_ratio": 0.0066314926370978355, | |
| "epoch": 0.3328671328671329, | |
| "grad_norm": 0.1612667131666411, | |
| "kl": 0.98828125, | |
| "learning_rate": 8.321678321678323e-05, | |
| "loss": -0.0225, | |
| "step": 238 | |
| }, | |
| { | |
| "clip_ratio": 0.003079179208725691, | |
| "completion_length": 117.10714721679688, | |
| "epoch": 0.33426573426573425, | |
| "grad_norm": 0.2505534626607256, | |
| "kl": 0.6796875, | |
| "learning_rate": 8.356643356643356e-05, | |
| "loss": -0.0103, | |
| "num_tokens": 2425354.0, | |
| "reward": 2.3166799545288086, | |
| "reward_std": 0.5270359516143799, | |
| "rewards/check_gptzero_func": 0.8214285969734192, | |
| "rewards/check_perplexity_diff_func": 0.5714285969734192, | |
| "rewards/check_winston_local_func": 0.9238227605819702, | |
| "step": 239 | |
| }, | |
| { | |
| "clip_ratio": 0.006801524665206671, | |
| "epoch": 0.3356643356643357, | |
| "grad_norm": 0.16826419365505382, | |
| "kl": 0.671875, | |
| "learning_rate": 8.391608391608392e-05, | |
| "loss": -0.0182, | |
| "step": 240 | |
| }, | |
| { | |
| "clip_ratio": 0.004480496048927307, | |
| "completion_length": 125.05357360839844, | |
| "epoch": 0.33706293706293705, | |
| "grad_norm": 0.26807661638826913, | |
| "kl": 0.60546875, | |
| "learning_rate": 8.426573426573428e-05, | |
| "loss": -0.0086, | |
| "num_tokens": 2447333.0, | |
| "reward": 2.2939677238464355, | |
| "reward_std": 0.39558398723602295, | |
| "rewards/check_gptzero_func": 0.9107142686843872, | |
| "rewards/check_perplexity_diff_func": 0.5357142686843872, | |
| "rewards/check_winston_local_func": 0.8475390672683716, | |
| "step": 241 | |
| }, | |
| { | |
| "clip_ratio": 0.007587091531604528, | |
| "epoch": 0.3384615384615385, | |
| "grad_norm": 0.13602673124742348, | |
| "kl": 0.57421875, | |
| "learning_rate": 8.461538461538461e-05, | |
| "loss": -0.0152, | |
| "step": 242 | |
| }, | |
| { | |
| "clip_ratio": 0.0034923183266073465, | |
| "completion_length": 98.71428680419922, | |
| "epoch": 0.33986013986013985, | |
| "grad_norm": 0.7773422467135167, | |
| "kl": 6.5, | |
| "learning_rate": 8.496503496503497e-05, | |
| "loss": 0.0071, | |
| "num_tokens": 2466435.0, | |
| "reward": 2.4026196002960205, | |
| "reward_std": 0.4264836013317108, | |
| "rewards/check_gptzero_func": 0.8571428656578064, | |
| "rewards/check_perplexity_diff_func": 0.6785714030265808, | |
| "rewards/check_winston_local_func": 0.8669052124023438, | |
| "step": 243 | |
| }, | |
| { | |
| "clip_ratio": 0.026189187541604042, | |
| "epoch": 0.3412587412587413, | |
| "grad_norm": 130.9971047023429, | |
| "kl": 0.70703125, | |
| "learning_rate": 8.531468531468532e-05, | |
| "loss": 1.8998, | |
| "step": 244 | |
| }, | |
| { | |
| "clip_ratio": 0.004780410788953304, | |
| "completion_length": 135.0, | |
| "epoch": 0.34265734265734266, | |
| "grad_norm": 0.2329479243774761, | |
| "kl": 0.87109375, | |
| "learning_rate": 8.566433566433567e-05, | |
| "loss": 0.0011, | |
| "num_tokens": 2489651.0, | |
| "reward": 2.2166569232940674, | |
| "reward_std": 0.5797281265258789, | |
| "rewards/check_gptzero_func": 0.9107142686843872, | |
| "rewards/check_perplexity_diff_func": 0.4285714328289032, | |
| "rewards/check_winston_local_func": 0.8773713111877441, | |
| "step": 245 | |
| }, | |
| { | |
| "clip_ratio": 0.021273447200655937, | |
| "epoch": 0.34405594405594403, | |
| "grad_norm": 0.22672502397660207, | |
| "kl": 0.9609375, | |
| "learning_rate": 8.601398601398601e-05, | |
| "loss": -0.0045, | |
| "step": 246 | |
| }, | |
| { | |
| "clip_ratio": 0.002856873208656907, | |
| "completion_length": 122.66072082519531, | |
| "epoch": 0.34545454545454546, | |
| "grad_norm": 0.19772681964626543, | |
| "kl": 0.984375, | |
| "learning_rate": 8.636363636363637e-05, | |
| "loss": 0.0028, | |
| "num_tokens": 2510506.0, | |
| "reward": 2.323245048522949, | |
| "reward_std": 0.46726977825164795, | |
| "rewards/check_gptzero_func": 0.9107142686843872, | |
| "rewards/check_perplexity_diff_func": 0.5, | |
| "rewards/check_winston_local_func": 0.9125306010246277, | |
| "step": 247 | |
| }, | |
| { | |
| "clip_ratio": 0.02299562282860279, | |
| "epoch": 0.34685314685314683, | |
| "grad_norm": 0.1812005085005865, | |
| "kl": 0.87890625, | |
| "learning_rate": 8.67132867132867e-05, | |
| "loss": -0.001, | |
| "step": 248 | |
| }, | |
| { | |
| "clip_ratio": 0.0026335662696510553, | |
| "completion_length": 116.85714721679688, | |
| "epoch": 0.34825174825174826, | |
| "grad_norm": 0.28731547808119623, | |
| "kl": 0.76953125, | |
| "learning_rate": 8.706293706293707e-05, | |
| "loss": 0.024, | |
| "num_tokens": 2531254.0, | |
| "reward": 2.5312695503234863, | |
| "reward_std": 0.5851892828941345, | |
| "rewards/check_gptzero_func": 0.8571428656578064, | |
| "rewards/check_perplexity_diff_func": 0.8214285969734192, | |
| "rewards/check_winston_local_func": 0.8526979088783264, | |
| "step": 249 | |
| }, | |
| { | |
| "clip_ratio": 0.012148472480475903, | |
| "epoch": 0.34965034965034963, | |
| "grad_norm": 0.21250398409595742, | |
| "kl": 0.734375, | |
| "learning_rate": 8.741258741258743e-05, | |
| "loss": 0.0156, | |
| "step": 250 | |
| }, | |
| { | |
| "clip_ratio": 0.0036881309933960438, | |
| "completion_length": 109.12500762939453, | |
| "epoch": 0.35104895104895106, | |
| "grad_norm": 0.278787459589393, | |
| "kl": 1.3671875, | |
| "learning_rate": 8.776223776223776e-05, | |
| "loss": -0.0127, | |
| "num_tokens": 2551265.0, | |
| "reward": 2.24423885345459, | |
| "reward_std": 0.6221095323562622, | |
| "rewards/check_gptzero_func": 0.875, | |
| "rewards/check_perplexity_diff_func": 0.5, | |
| "rewards/check_winston_local_func": 0.8692389130592346, | |
| "step": 251 | |
| }, | |
| { | |
| "clip_ratio": 0.011438491754233837, | |
| "epoch": 0.35244755244755244, | |
| "grad_norm": 0.21324998809887244, | |
| "kl": 1.0859375, | |
| "learning_rate": 8.811188811188812e-05, | |
| "loss": -0.023, | |
| "step": 252 | |
| }, | |
| { | |
| "clip_ratio": 0.0039367591962218285, | |
| "completion_length": 112.14286041259766, | |
| "epoch": 0.35384615384615387, | |
| "grad_norm": 0.31069817037713615, | |
| "kl": 0.91015625, | |
| "learning_rate": 8.846153846153847e-05, | |
| "loss": 0.0167, | |
| "num_tokens": 2572119.0, | |
| "reward": 2.492161989212036, | |
| "reward_std": 0.6407585144042969, | |
| "rewards/check_gptzero_func": 0.875, | |
| "rewards/check_perplexity_diff_func": 0.75, | |
| "rewards/check_winston_local_func": 0.8671619296073914, | |
| "step": 253 | |
| }, | |
| { | |
| "clip_ratio": 0.005880096461623907, | |
| "epoch": 0.35524475524475524, | |
| "grad_norm": 0.19008007229653096, | |
| "kl": 0.90234375, | |
| "learning_rate": 8.881118881118881e-05, | |
| "loss": 0.0057, | |
| "step": 254 | |
| }, | |
| { | |
| "clip_ratio": 0.003151817014440894, | |
| "completion_length": 120.78572082519531, | |
| "epoch": 0.35664335664335667, | |
| "grad_norm": 0.4113945542101958, | |
| "kl": 0.890625, | |
| "learning_rate": 8.916083916083916e-05, | |
| "loss": 0.0079, | |
| "num_tokens": 2593081.0, | |
| "reward": 2.3228745460510254, | |
| "reward_std": 0.5329480171203613, | |
| "rewards/check_gptzero_func": 0.875, | |
| "rewards/check_perplexity_diff_func": 0.6428571343421936, | |
| "rewards/check_winston_local_func": 0.8050170540809631, | |
| "step": 255 | |
| }, | |
| { | |
| "clip_ratio": 0.01224872563034296, | |
| "epoch": 0.35804195804195804, | |
| "grad_norm": 0.24677625747594412, | |
| "kl": 1.0078125, | |
| "learning_rate": 8.951048951048952e-05, | |
| "loss": 0.0006, | |
| "step": 256 | |
| }, | |
| { | |
| "clip_ratio": 0.003666934324428439, | |
| "completion_length": 146.5178680419922, | |
| "epoch": 0.3594405594405594, | |
| "grad_norm": 0.21537926236506497, | |
| "kl": 0.484375, | |
| "learning_rate": 8.986013986013986e-05, | |
| "loss": 0.0068, | |
| "num_tokens": 2618758.0, | |
| "reward": 2.198406934738159, | |
| "reward_std": 0.3875991106033325, | |
| "rewards/check_gptzero_func": 0.9285714030265808, | |
| "rewards/check_perplexity_diff_func": 0.4285714328289032, | |
| "rewards/check_winston_local_func": 0.8412641882896423, | |
| "step": 257 | |
| }, | |
| { | |
| "clip_ratio": 0.012731654569506645, | |
| "epoch": 0.36083916083916084, | |
| "grad_norm": 0.1543455831571161, | |
| "kl": 0.478515625, | |
| "learning_rate": 9.020979020979021e-05, | |
| "loss": 0.0025, | |
| "step": 258 | |
| }, | |
| { | |
| "clip_ratio": 0.0012050624936819077, | |
| "completion_length": 129.375, | |
| "epoch": 0.3622377622377622, | |
| "grad_norm": 0.16107873823673136, | |
| "kl": 1.6640625, | |
| "learning_rate": 9.055944055944057e-05, | |
| "loss": 0.0054, | |
| "num_tokens": 2641815.0, | |
| "reward": 2.4805288314819336, | |
| "reward_std": 0.3510296940803528, | |
| "rewards/check_gptzero_func": 0.8392857313156128, | |
| "rewards/check_perplexity_diff_func": 0.8214285969734192, | |
| "rewards/check_winston_local_func": 0.8198142647743225, | |
| "step": 259 | |
| }, | |
| { | |
| "clip_ratio": 0.006208478473126888, | |
| "epoch": 0.36363636363636365, | |
| "grad_norm": 0.10546346547579268, | |
| "kl": 0.8046875, | |
| "learning_rate": 9.090909090909092e-05, | |
| "loss": 0.0019, | |
| "step": 260 | |
| }, | |
| { | |
| "clip_ratio": 0.0025593352038413286, | |
| "completion_length": 128.57144165039062, | |
| "epoch": 0.365034965034965, | |
| "grad_norm": 0.1986264685447053, | |
| "kl": 0.6171875, | |
| "learning_rate": 9.125874125874126e-05, | |
| "loss": -0.0025, | |
| "num_tokens": 2664343.0, | |
| "reward": 2.261420249938965, | |
| "reward_std": 0.5704914927482605, | |
| "rewards/check_gptzero_func": 0.7857142686843872, | |
| "rewards/check_perplexity_diff_func": 0.6428571343421936, | |
| "rewards/check_winston_local_func": 0.8328486680984497, | |
| "step": 261 | |
| }, | |
| { | |
| "clip_ratio": 0.002767725382000208, | |
| "epoch": 0.36643356643356645, | |
| "grad_norm": 0.14267481856078115, | |
| "kl": 0.66015625, | |
| "learning_rate": 9.160839160839161e-05, | |
| "loss": -0.0071, | |
| "step": 262 | |
| }, | |
| { | |
| "clip_ratio": 0.0036019759718328714, | |
| "completion_length": 108.30357360839844, | |
| "epoch": 0.3678321678321678, | |
| "grad_norm": 0.25429899311591847, | |
| "kl": 0.61328125, | |
| "learning_rate": 9.195804195804196e-05, | |
| "loss": 0.0114, | |
| "num_tokens": 2684460.0, | |
| "reward": 2.1853349208831787, | |
| "reward_std": 0.5545978546142578, | |
| "rewards/check_gptzero_func": 0.7142857313156128, | |
| "rewards/check_perplexity_diff_func": 0.6428571343421936, | |
| "rewards/check_winston_local_func": 0.828191876411438, | |
| "step": 263 | |
| }, | |
| { | |
| "clip_ratio": 0.002995865885168314, | |
| "epoch": 0.36923076923076925, | |
| "grad_norm": 0.17454926732818832, | |
| "kl": 0.62109375, | |
| "learning_rate": 9.230769230769232e-05, | |
| "loss": 0.0028, | |
| "step": 264 | |
| }, | |
| { | |
| "clip_ratio": 0.002558046253398061, | |
| "completion_length": 102.80357360839844, | |
| "epoch": 0.3706293706293706, | |
| "grad_norm": 0.18701691067416304, | |
| "kl": 0.734375, | |
| "learning_rate": 9.265734265734266e-05, | |
| "loss": 0.0401, | |
| "num_tokens": 2704225.0, | |
| "reward": 2.1770143508911133, | |
| "reward_std": 0.573693037033081, | |
| "rewards/check_gptzero_func": 0.8035714030265808, | |
| "rewards/check_perplexity_diff_func": 0.5357142686843872, | |
| "rewards/check_winston_local_func": 0.8377286195755005, | |
| "step": 265 | |
| }, | |
| { | |
| "clip_ratio": 0.006838452070951462, | |
| "epoch": 0.37202797202797205, | |
| "grad_norm": 0.13942096443359253, | |
| "kl": 0.71875, | |
| "learning_rate": 9.300699300699301e-05, | |
| "loss": 0.0354, | |
| "step": 266 | |
| }, | |
| { | |
| "clip_ratio": 0.00375689216889441, | |
| "completion_length": 106.37500762939453, | |
| "epoch": 0.3734265734265734, | |
| "grad_norm": 0.25266992310982356, | |
| "kl": 0.7421875, | |
| "learning_rate": 9.335664335664336e-05, | |
| "loss": -0.0156, | |
| "num_tokens": 2724530.0, | |
| "reward": 2.11887526512146, | |
| "reward_std": 0.5366143584251404, | |
| "rewards/check_gptzero_func": 0.875, | |
| "rewards/check_perplexity_diff_func": 0.4642857015132904, | |
| "rewards/check_winston_local_func": 0.7795897126197815, | |
| "step": 267 | |
| }, | |
| { | |
| "clip_ratio": 0.006593361962586641, | |
| "epoch": 0.3748251748251748, | |
| "grad_norm": 0.18497903526945206, | |
| "kl": 0.7265625, | |
| "learning_rate": 9.370629370629372e-05, | |
| "loss": -0.0243, | |
| "step": 268 | |
| }, | |
| { | |
| "clip_ratio": 0.004569682292640209, | |
| "completion_length": 124.39286041259766, | |
| "epoch": 0.37622377622377623, | |
| "grad_norm": 0.21331332062489702, | |
| "kl": 0.953125, | |
| "learning_rate": 9.405594405594406e-05, | |
| "loss": 0.0346, | |
| "num_tokens": 2746246.0, | |
| "reward": 2.403602123260498, | |
| "reward_std": 0.5621734857559204, | |
| "rewards/check_gptzero_func": 0.7857142686843872, | |
| "rewards/check_perplexity_diff_func": 0.75, | |
| "rewards/check_winston_local_func": 0.8678879141807556, | |
| "step": 269 | |
| }, | |
| { | |
| "clip_ratio": 0.006006812676787376, | |
| "epoch": 0.3776223776223776, | |
| "grad_norm": 0.15420322253654764, | |
| "kl": 0.8671875, | |
| "learning_rate": 9.440559440559441e-05, | |
| "loss": 0.0283, | |
| "step": 270 | |
| }, | |
| { | |
| "clip_ratio": 0.002397142816334963, | |
| "completion_length": 102.41072082519531, | |
| "epoch": 0.37902097902097903, | |
| "grad_norm": 0.22567114655173642, | |
| "kl": 0.609375, | |
| "learning_rate": 9.475524475524477e-05, | |
| "loss": 0.0231, | |
| "num_tokens": 2765203.0, | |
| "reward": 2.649076223373413, | |
| "reward_std": 0.47521141171455383, | |
| "rewards/check_gptzero_func": 0.875, | |
| "rewards/check_perplexity_diff_func": 1.0, | |
| "rewards/check_winston_local_func": 0.7740762829780579, | |
| "step": 271 | |
| }, | |
| { | |
| "clip_ratio": 0.007639365270733833, | |
| "epoch": 0.3804195804195804, | |
| "grad_norm": 0.18662930190408594, | |
| "kl": 0.578125, | |
| "learning_rate": 9.510489510489511e-05, | |
| "loss": 0.0165, | |
| "step": 272 | |
| }, | |
| { | |
| "clip_ratio": 0.0010056205792352557, | |
| "completion_length": 115.28572082519531, | |
| "epoch": 0.38181818181818183, | |
| "grad_norm": 0.14954423121719349, | |
| "kl": 0.625, | |
| "learning_rate": 9.545454545454546e-05, | |
| "loss": 0.0237, | |
| "num_tokens": 2785985.0, | |
| "reward": 2.3541975021362305, | |
| "reward_std": 0.5445494651794434, | |
| "rewards/check_gptzero_func": 0.8392857313156128, | |
| "rewards/check_perplexity_diff_func": 0.75, | |
| "rewards/check_winston_local_func": 0.7649118304252625, | |
| "step": 273 | |
| }, | |
| { | |
| "clip_ratio": 0.003297280054539442, | |
| "epoch": 0.3832167832167832, | |
| "grad_norm": 0.11713806873969078, | |
| "kl": 0.640625, | |
| "learning_rate": 9.580419580419581e-05, | |
| "loss": 0.0206, | |
| "step": 274 | |
| }, | |
| { | |
| "clip_ratio": 0.0016172031173482537, | |
| "completion_length": 128.73214721679688, | |
| "epoch": 0.38461538461538464, | |
| "grad_norm": 0.155009547701029, | |
| "kl": 0.51171875, | |
| "learning_rate": 9.615384615384617e-05, | |
| "loss": 0.0191, | |
| "num_tokens": 2808414.0, | |
| "reward": 2.376694679260254, | |
| "reward_std": 0.33215656876564026, | |
| "rewards/check_gptzero_func": 0.8392857313156128, | |
| "rewards/check_perplexity_diff_func": 0.8571428656578064, | |
| "rewards/check_winston_local_func": 0.6802659630775452, | |
| "step": 275 | |
| }, | |
| { | |
| "clip_ratio": 0.004294655751436949, | |
| "epoch": 0.386013986013986, | |
| "grad_norm": 0.11409067239204185, | |
| "kl": 0.515625, | |
| "learning_rate": 9.65034965034965e-05, | |
| "loss": 0.0155, | |
| "step": 276 | |
| }, | |
| { | |
| "clip_ratio": 0.002925862092524767, | |
| "completion_length": 116.41072082519531, | |
| "epoch": 0.38741258741258744, | |
| "grad_norm": 0.2764503095198425, | |
| "kl": 0.578125, | |
| "learning_rate": 9.685314685314686e-05, | |
| "loss": 0.01, | |
| "num_tokens": 2829285.0, | |
| "reward": 2.3352210521698, | |
| "reward_std": 0.7207943201065063, | |
| "rewards/check_gptzero_func": 0.6607142686843872, | |
| "rewards/check_perplexity_diff_func": 0.9642857313156128, | |
| "rewards/check_winston_local_func": 0.7102211713790894, | |
| "step": 277 | |
| }, | |
| { | |
| "clip_ratio": 0.005557883996516466, | |
| "epoch": 0.3888111888111888, | |
| "grad_norm": 0.2140511595370052, | |
| "kl": 0.5859375, | |
| "learning_rate": 9.72027972027972e-05, | |
| "loss": -0.0004, | |
| "step": 278 | |
| }, | |
| { | |
| "clip_ratio": 0.0029403052758425474, | |
| "completion_length": 114.25000762939453, | |
| "epoch": 0.3902097902097902, | |
| "grad_norm": 0.21363596109825803, | |
| "kl": 0.63671875, | |
| "learning_rate": 9.755244755244755e-05, | |
| "loss": 0.0259, | |
| "num_tokens": 2849989.0, | |
| "reward": 2.6570937633514404, | |
| "reward_std": 0.5007120370864868, | |
| "rewards/check_gptzero_func": 0.75, | |
| "rewards/check_perplexity_diff_func": 1.2142857313156128, | |
| "rewards/check_winston_local_func": 0.6928080320358276, | |
| "step": 279 | |
| }, | |
| { | |
| "clip_ratio": 0.00384224159643054, | |
| "epoch": 0.3916083916083916, | |
| "grad_norm": 0.15102706915256958, | |
| "kl": 0.6875, | |
| "learning_rate": 9.790209790209791e-05, | |
| "loss": 0.0185, | |
| "step": 280 | |
| }, | |
| { | |
| "clip_ratio": 0.0027283646631985903, | |
| "completion_length": 90.25000762939453, | |
| "epoch": 0.393006993006993, | |
| "grad_norm": 0.3585134615134873, | |
| "kl": 0.73046875, | |
| "learning_rate": 9.825174825174826e-05, | |
| "loss": -0.006, | |
| "num_tokens": 2867567.0, | |
| "reward": 2.4528698921203613, | |
| "reward_std": 0.6383817791938782, | |
| "rewards/check_gptzero_func": 0.6071428656578064, | |
| "rewards/check_perplexity_diff_func": 1.1428571939468384, | |
| "rewards/check_winston_local_func": 0.7028695940971375, | |
| "step": 281 | |
| }, | |
| { | |
| "clip_ratio": 0.016295742243528366, | |
| "epoch": 0.3944055944055944, | |
| "grad_norm": 0.2358766583499307, | |
| "kl": 0.73046875, | |
| "learning_rate": 9.86013986013986e-05, | |
| "loss": -0.0151, | |
| "step": 282 | |
| }, | |
| { | |
| "clip_ratio": 0.0025735762901604176, | |
| "completion_length": 132.125, | |
| "epoch": 0.3958041958041958, | |
| "grad_norm": 0.12427534341566092, | |
| "kl": 0.515625, | |
| "learning_rate": 9.895104895104895e-05, | |
| "loss": 0.006, | |
| "num_tokens": 2890954.0, | |
| "reward": 2.172079086303711, | |
| "reward_std": 0.31105130910873413, | |
| "rewards/check_gptzero_func": 0.6607142686843872, | |
| "rewards/check_perplexity_diff_func": 0.7857142686843872, | |
| "rewards/check_winston_local_func": 0.7256504893302917, | |
| "step": 283 | |
| }, | |
| { | |
| "clip_ratio": 0.008568070828914642, | |
| "epoch": 0.3972027972027972, | |
| "grad_norm": 0.10751149742872082, | |
| "kl": 0.5234375, | |
| "learning_rate": 9.930069930069931e-05, | |
| "loss": 0.0038, | |
| "step": 284 | |
| }, | |
| { | |
| "clip_ratio": 0.0013303400482982397, | |
| "completion_length": 115.50000762939453, | |
| "epoch": 0.3986013986013986, | |
| "grad_norm": 0.11155145645226237, | |
| "kl": 0.427734375, | |
| "learning_rate": 9.965034965034964e-05, | |
| "loss": 0.0136, | |
| "num_tokens": 2912320.0, | |
| "reward": 2.499699354171753, | |
| "reward_std": 0.22870054841041565, | |
| "rewards/check_gptzero_func": 0.6071428656578064, | |
| "rewards/check_perplexity_diff_func": 1.3214285373687744, | |
| "rewards/check_winston_local_func": 0.5711276531219482, | |
| "step": 285 | |
| }, | |
| { | |
| "clip_ratio": 0.004690885543823242, | |
| "epoch": 0.4, | |
| "grad_norm": 0.08246302090980155, | |
| "kl": 0.443359375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0117, | |
| "step": 286 | |
| }, | |
| { | |
| "clip_ratio": 0.0013457606546580791, | |
| "completion_length": 153.60714721679688, | |
| "epoch": 0.4013986013986014, | |
| "grad_norm": 0.0959414180998777, | |
| "kl": 0.458984375, | |
| "learning_rate": 9.999996275889018e-05, | |
| "loss": -0.0037, | |
| "num_tokens": 2937286.0, | |
| "reward": 2.60290789604187, | |
| "reward_std": 0.2643657624721527, | |
| "rewards/check_gptzero_func": 0.625, | |
| "rewards/check_perplexity_diff_func": 1.3214285373687744, | |
| "rewards/check_winston_local_func": 0.6564791798591614, | |
| "step": 287 | |
| }, | |
| { | |
| "clip_ratio": 0.0025680752005428076, | |
| "epoch": 0.4027972027972028, | |
| "grad_norm": 0.08295696905412306, | |
| "kl": 0.453125, | |
| "learning_rate": 9.999985103561615e-05, | |
| "loss": -0.0053, | |
| "step": 288 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 106.39286041259766, | |
| "epoch": 0.4041958041958042, | |
| "grad_norm": 0.12524695486629228, | |
| "kl": 0.3671875, | |
| "learning_rate": 9.999966483034437e-05, | |
| "loss": -0.0031, | |
| "num_tokens": 2957572.0, | |
| "reward": 2.607825994491577, | |
| "reward_std": 0.2113000899553299, | |
| "rewards/check_gptzero_func": 0.5535714030265808, | |
| "rewards/check_perplexity_diff_func": 1.4642857313156128, | |
| "rewards/check_winston_local_func": 0.5899689793586731, | |
| "step": 289 | |
| }, | |
| { | |
| "clip_ratio": 0.0015743181575089693, | |
| "epoch": 0.40559440559440557, | |
| "grad_norm": 0.08958540244405325, | |
| "kl": 0.36328125, | |
| "learning_rate": 9.999940414335222e-05, | |
| "loss": -0.0046, | |
| "step": 290 | |
| }, | |
| { | |
| "clip_ratio": 0.0017556955572217703, | |
| "completion_length": 96.0714340209961, | |
| "epoch": 0.406993006993007, | |
| "grad_norm": 0.19185238369967877, | |
| "kl": 2.109375, | |
| "learning_rate": 9.999906897502803e-05, | |
| "loss": 0.008, | |
| "num_tokens": 2976076.0, | |
| "reward": 2.6148557662963867, | |
| "reward_std": 0.24822747707366943, | |
| "rewards/check_gptzero_func": 0.5178571343421936, | |
| "rewards/check_perplexity_diff_func": 1.4285714626312256, | |
| "rewards/check_winston_local_func": 0.6684269309043884, | |
| "step": 291 | |
| }, | |
| { | |
| "clip_ratio": 0.00337741756811738, | |
| "epoch": 0.4083916083916084, | |
| "grad_norm": 0.08769186584743054, | |
| "kl": 0.671875, | |
| "learning_rate": 9.999865932587107e-05, | |
| "loss": 0.0046, | |
| "step": 292 | |
| }, | |
| { | |
| "clip_ratio": 0.0012849332997575402, | |
| "completion_length": 91.87500762939453, | |
| "epoch": 0.4097902097902098, | |
| "grad_norm": 0.13036265265723118, | |
| "kl": 0.4453125, | |
| "learning_rate": 9.999817519649158e-05, | |
| "loss": 0.0127, | |
| "num_tokens": 2994773.0, | |
| "reward": 2.552300214767456, | |
| "reward_std": 0.14500018954277039, | |
| "rewards/check_gptzero_func": 0.4285714328289032, | |
| "rewards/check_perplexity_diff_func": 1.5, | |
| "rewards/check_winston_local_func": 0.6237286329269409, | |
| "step": 293 | |
| }, | |
| { | |
| "clip_ratio": 0.0027297178748995066, | |
| "epoch": 0.4111888111888112, | |
| "grad_norm": 0.0976002240930887, | |
| "kl": 0.439453125, | |
| "learning_rate": 9.99976165876107e-05, | |
| "loss": 0.0101, | |
| "step": 294 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 113.35714721679688, | |
| "epoch": 0.4125874125874126, | |
| "grad_norm": 0.02609480806956132, | |
| "kl": 0.1591796875, | |
| "learning_rate": 9.999698350006063e-05, | |
| "loss": 0.0012, | |
| "num_tokens": 3015993.0, | |
| "reward": 2.874926805496216, | |
| "reward_std": 0.0874238833785057, | |
| "rewards/check_gptzero_func": 0.4821428656578064, | |
| "rewards/check_perplexity_diff_func": 1.8928571939468384, | |
| "rewards/check_winston_local_func": 0.4999266266822815, | |
| "step": 295 | |
| }, | |
| { | |
| "clip_ratio": 9.294544724980369e-05, | |
| "epoch": 0.413986013986014, | |
| "grad_norm": 0.02050573548257188, | |
| "kl": 0.16015625, | |
| "learning_rate": 9.999627593478442e-05, | |
| "loss": 0.0011, | |
| "step": 296 | |
| }, | |
| { | |
| "clip_ratio": 0.00012664640962611884, | |
| "completion_length": 98.5714340209961, | |
| "epoch": 0.4153846153846154, | |
| "grad_norm": 0.01680753751461254, | |
| "kl": 0.201171875, | |
| "learning_rate": 9.999549389283606e-05, | |
| "loss": -0.0003, | |
| "num_tokens": 3035155.0, | |
| "reward": 3.0224545001983643, | |
| "reward_std": 0.016296973451972008, | |
| "rewards/check_gptzero_func": 0.4107142984867096, | |
| "rewards/check_perplexity_diff_func": 1.9642857313156128, | |
| "rewards/check_winston_local_func": 0.647454559803009, | |
| "step": 297 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.4167832167832168, | |
| "grad_norm": 0.012884367061070623, | |
| "kl": 0.203125, | |
| "learning_rate": 9.999463737538053e-05, | |
| "loss": -0.0003, | |
| "step": 298 | |
| }, | |
| { | |
| "clip_ratio": 0.0003541912592481822, | |
| "completion_length": 95.76786041259766, | |
| "epoch": 0.41818181818181815, | |
| "grad_norm": 0.04526732622569909, | |
| "kl": 0.291015625, | |
| "learning_rate": 9.999370638369377e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 3054174.0, | |
| "reward": 2.3303959369659424, | |
| "reward_std": 0.08971218019723892, | |
| "rewards/check_gptzero_func": 0.3035714328289032, | |
| "rewards/check_perplexity_diff_func": 1.6071428060531616, | |
| "rewards/check_winston_local_func": 0.4196813404560089, | |
| "step": 299 | |
| }, | |
| { | |
| "clip_ratio": 0.00011806376278400421, | |
| "epoch": 0.4195804195804196, | |
| "grad_norm": 0.03862986955837794, | |
| "kl": 0.296875, | |
| "learning_rate": 9.999270091916257e-05, | |
| "loss": -0.0002, | |
| "step": 300 | |
| }, | |
| { | |
| "clip_ratio": 0.0012040403671562672, | |
| "completion_length": 101.73214721679688, | |
| "epoch": 0.42097902097902096, | |
| "grad_norm": 0.12616717395678975, | |
| "kl": 0.35546875, | |
| "learning_rate": 9.999162098328474e-05, | |
| "loss": 0.0033, | |
| "num_tokens": 3073401.0, | |
| "reward": 2.8153316974639893, | |
| "reward_std": 0.18918544054031372, | |
| "rewards/check_gptzero_func": 0.4285714328289032, | |
| "rewards/check_perplexity_diff_func": 1.7142857313156128, | |
| "rewards/check_winston_local_func": 0.6724745631217957, | |
| "step": 301 | |
| }, | |
| { | |
| "clip_ratio": 0.0013847867958247662, | |
| "epoch": 0.4223776223776224, | |
| "grad_norm": 0.06929991057149441, | |
| "kl": 0.412109375, | |
| "learning_rate": 9.999046657766903e-05, | |
| "loss": 0.0012, | |
| "step": 302 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 81.3214340209961, | |
| "epoch": 0.42377622377622376, | |
| "grad_norm": 0.06730614630696258, | |
| "kl": 0.2099609375, | |
| "learning_rate": 9.998923770403505e-05, | |
| "loss": -0.0019, | |
| "num_tokens": 3090625.0, | |
| "reward": 2.802220106124878, | |
| "reward_std": 0.03781326860189438, | |
| "rewards/check_gptzero_func": 0.375, | |
| "rewards/check_perplexity_diff_func": 2.0, | |
| "rewards/check_winston_local_func": 0.42722010612487793, | |
| "step": 303 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.4251748251748252, | |
| "grad_norm": 0.022175315902565728, | |
| "kl": 0.2109375, | |
| "learning_rate": 9.99879343642134e-05, | |
| "loss": -0.0024, | |
| "step": 304 | |
| }, | |
| { | |
| "clip_ratio": 0.00013683634460903704, | |
| "completion_length": 105.53572082519531, | |
| "epoch": 0.42657342657342656, | |
| "grad_norm": 0.04867704636152166, | |
| "kl": 0.173828125, | |
| "learning_rate": 9.998655656014561e-05, | |
| "loss": 0.0117, | |
| "num_tokens": 3110877.0, | |
| "reward": 2.6083197593688965, | |
| "reward_std": 0.13502858579158783, | |
| "rewards/check_gptzero_func": 0.3571428656578064, | |
| "rewards/check_perplexity_diff_func": 1.8928571939468384, | |
| "rewards/check_winston_local_func": 0.35831958055496216, | |
| "step": 305 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.427972027972028, | |
| "grad_norm": 0.043987709695075865, | |
| "kl": 0.1728515625, | |
| "learning_rate": 9.99851042938841e-05, | |
| "loss": 0.0113, | |
| "step": 306 | |
| }, | |
| { | |
| "clip_ratio": 0.0004155792994424701, | |
| "completion_length": 124.03572082519531, | |
| "epoch": 0.42937062937062936, | |
| "grad_norm": 0.03684760048760383, | |
| "kl": 0.2080078125, | |
| "learning_rate": 9.998357756759222e-05, | |
| "loss": -0.0258, | |
| "num_tokens": 3134097.0, | |
| "reward": 2.7439539432525635, | |
| "reward_std": 0.11075940728187561, | |
| "rewards/check_gptzero_func": 0.4642857015132904, | |
| "rewards/check_perplexity_diff_func": 1.7857142686843872, | |
| "rewards/check_winston_local_func": 0.4939536452293396, | |
| "step": 307 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.4307692307692308, | |
| "grad_norm": 0.013648247572273129, | |
| "kl": 0.20703125, | |
| "learning_rate": 9.998197638354428e-05, | |
| "loss": -0.026, | |
| "step": 308 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 89.12500762939453, | |
| "epoch": 0.43216783216783217, | |
| "grad_norm": 0.02465210474674629, | |
| "kl": 0.443359375, | |
| "learning_rate": 9.998030074412545e-05, | |
| "loss": 0.0014, | |
| "num_tokens": 3152358.0, | |
| "reward": 2.420307159423828, | |
| "reward_std": 0.11704181134700775, | |
| "rewards/check_gptzero_func": 0.2321428507566452, | |
| "rewards/check_perplexity_diff_func": 1.75, | |
| "rewards/check_winston_local_func": 0.4381641745567322, | |
| "step": 309 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.43356643356643354, | |
| "grad_norm": 0.021184461362474653, | |
| "kl": 0.48828125, | |
| "learning_rate": 9.997855065183184e-05, | |
| "loss": 0.0014, | |
| "step": 310 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 115.73214721679688, | |
| "epoch": 0.43496503496503497, | |
| "grad_norm": 0.0049134080725686995, | |
| "kl": 0.162109375, | |
| "learning_rate": 9.997672610927047e-05, | |
| "loss": -0.0047, | |
| "num_tokens": 3173997.0, | |
| "reward": 2.61786150932312, | |
| "reward_std": 0.013112460263073444, | |
| "rewards/check_gptzero_func": 0.375, | |
| "rewards/check_perplexity_diff_func": 1.8214285373687744, | |
| "rewards/check_winston_local_func": 0.4214327931404114, | |
| "step": 311 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.43636363636363634, | |
| "grad_norm": 0.00595537475288129, | |
| "kl": 0.173828125, | |
| "learning_rate": 9.997482711915927e-05, | |
| "loss": -0.0047, | |
| "step": 312 | |
| }, | |
| { | |
| "clip_ratio": 0.0001360544265480712, | |
| "completion_length": 90.42857360839844, | |
| "epoch": 0.43776223776223777, | |
| "grad_norm": 0.007409687643468917, | |
| "kl": 0.2275390625, | |
| "learning_rate": 9.997285368432703e-05, | |
| "loss": 0.0006, | |
| "num_tokens": 3192417.0, | |
| "reward": 2.682650566101074, | |
| "reward_std": 0.02417343109846115, | |
| "rewards/check_gptzero_func": 0.375, | |
| "rewards/check_perplexity_diff_func": 1.8214285373687744, | |
| "rewards/check_winston_local_func": 0.4862218499183655, | |
| "step": 313 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.43916083916083914, | |
| "grad_norm": 0.007129921917131849, | |
| "kl": 0.2275390625, | |
| "learning_rate": 9.997080580771349e-05, | |
| "loss": 0.0006, | |
| "step": 314 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 83.14286041259766, | |
| "epoch": 0.4405594405594406, | |
| "grad_norm": 1.3606081670938062e-05, | |
| "kl": 0.2255859375, | |
| "learning_rate": 9.996868349236927e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 3209875.0, | |
| "reward": 2.671295404434204, | |
| "reward_std": 0.0, | |
| "rewards/check_gptzero_func": 0.3571428656578064, | |
| "rewards/check_perplexity_diff_func": 1.8571428060531616, | |
| "rewards/check_winston_local_func": 0.45700937509536743, | |
| "step": 315 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.44195804195804195, | |
| "grad_norm": 1.3537787639105168e-05, | |
| "kl": 0.2255859375, | |
| "learning_rate": 9.996648674145583e-05, | |
| "loss": 0.0002, | |
| "step": 316 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 126.21429443359375, | |
| "epoch": 0.4433566433566434, | |
| "grad_norm": 0.00991208286904361, | |
| "kl": 0.15625, | |
| "learning_rate": 9.996421555824558e-05, | |
| "loss": 0.0103, | |
| "num_tokens": 3232963.0, | |
| "reward": 2.6654393672943115, | |
| "reward_std": 0.0725974440574646, | |
| "rewards/check_gptzero_func": 0.5357142686843872, | |
| "rewards/check_perplexity_diff_func": 1.6071428060531616, | |
| "rewards/check_winston_local_func": 0.5225821733474731, | |
| "step": 317 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.44475524475524475, | |
| "grad_norm": 0.010677817125212348, | |
| "kl": 0.154296875, | |
| "learning_rate": 9.996186994612176e-05, | |
| "loss": 0.0102, | |
| "step": 318 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 103.4464340209961, | |
| "epoch": 0.4461538461538462, | |
| "grad_norm": 5.275581365835224e-05, | |
| "kl": 0.16796875, | |
| "learning_rate": 9.995944990857849e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 3252898.0, | |
| "reward": 3.0420854091644287, | |
| "reward_std": 0.00039901022682897747, | |
| "rewards/check_gptzero_func": 0.5714285969734192, | |
| "rewards/check_perplexity_diff_func": 1.9285714626312256, | |
| "rewards/check_winston_local_func": 0.5420854687690735, | |
| "step": 319 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.44755244755244755, | |
| "grad_norm": 6.10762506510971e-05, | |
| "kl": 0.16796875, | |
| "learning_rate": 9.995695544922077e-05, | |
| "loss": 0.0002, | |
| "step": 320 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 102.28572082519531, | |
| "epoch": 0.4489510489510489, | |
| "grad_norm": 0.010696195320132886, | |
| "kl": 0.1669921875, | |
| "learning_rate": 9.995438657176448e-05, | |
| "loss": -0.0177, | |
| "num_tokens": 3272574.0, | |
| "reward": 2.7592146396636963, | |
| "reward_std": 0.03917063772678375, | |
| "rewards/check_gptzero_func": 0.3571428656578064, | |
| "rewards/check_perplexity_diff_func": 1.9642857313156128, | |
| "rewards/check_winston_local_func": 0.437785804271698, | |
| "step": 321 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.45034965034965035, | |
| "grad_norm": 0.01083841197384593, | |
| "kl": 0.1669921875, | |
| "learning_rate": 9.995174328003631e-05, | |
| "loss": -0.0177, | |
| "step": 322 | |
| }, | |
| { | |
| "clip_ratio": 8.473140769638121e-05, | |
| "completion_length": 117.41072082519531, | |
| "epoch": 0.45174825174825173, | |
| "grad_norm": 0.004522641361347185, | |
| "kl": 0.1640625, | |
| "learning_rate": 9.994902557797382e-05, | |
| "loss": -0.0002, | |
| "num_tokens": 3294025.0, | |
| "reward": 2.92221736907959, | |
| "reward_std": 0.025547320023179054, | |
| "rewards/check_gptzero_func": 0.5535714030265808, | |
| "rewards/check_perplexity_diff_func": 2.0, | |
| "rewards/check_winston_local_func": 0.36864587664604187, | |
| "step": 323 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.45314685314685316, | |
| "grad_norm": 0.004737684573095642, | |
| "kl": 0.1650390625, | |
| "learning_rate": 9.994623346962544e-05, | |
| "loss": -0.0002, | |
| "step": 324 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 95.26786041259766, | |
| "epoch": 0.45454545454545453, | |
| "grad_norm": 3.372088632491828e-06, | |
| "kl": 0.1796875, | |
| "learning_rate": 9.99433669591504e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 3312902.0, | |
| "reward": 2.595499038696289, | |
| "reward_std": 4.388691013446078e-05, | |
| "rewards/check_gptzero_func": 0.25, | |
| "rewards/check_perplexity_diff_func": 2.0, | |
| "rewards/check_winston_local_func": 0.34549909830093384, | |
| "step": 325 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.45594405594405596, | |
| "grad_norm": 3.349405120445447e-06, | |
| "kl": 0.1796875, | |
| "learning_rate": 9.994042605081879e-05, | |
| "loss": 0.0002, | |
| "step": 326 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 97.80357360839844, | |
| "epoch": 0.45734265734265733, | |
| "grad_norm": 0.010175912326793437, | |
| "kl": 0.177734375, | |
| "learning_rate": 9.993741074901153e-05, | |
| "loss": 0.0003, | |
| "num_tokens": 3331815.0, | |
| "reward": 2.496483325958252, | |
| "reward_std": 0.04960284009575844, | |
| "rewards/check_gptzero_func": 0.2142857164144516, | |
| "rewards/check_perplexity_diff_func": 1.9642857313156128, | |
| "rewards/check_winston_local_func": 0.31791168451309204, | |
| "step": 327 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.45874125874125876, | |
| "grad_norm": 0.01017493586702021, | |
| "kl": 0.177734375, | |
| "learning_rate": 9.993432105822034e-05, | |
| "loss": 0.0003, | |
| "step": 328 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 96.30357360839844, | |
| "epoch": 0.46013986013986014, | |
| "grad_norm": 0.0165654292436505, | |
| "kl": 0.1572265625, | |
| "learning_rate": 9.993115698304774e-05, | |
| "loss": 0.0003, | |
| "num_tokens": 3351210.0, | |
| "reward": 2.8074374198913574, | |
| "reward_std": 0.07505974173545837, | |
| "rewards/check_gptzero_func": 0.5178571343421936, | |
| "rewards/check_perplexity_diff_func": 1.8571428060531616, | |
| "rewards/check_winston_local_func": 0.4324372708797455, | |
| "step": 329 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.46153846153846156, | |
| "grad_norm": 0.014211382809754432, | |
| "kl": 0.16015625, | |
| "learning_rate": 9.992791852820709e-05, | |
| "loss": 0.0003, | |
| "step": 330 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 86.1964340209961, | |
| "epoch": 0.46293706293706294, | |
| "grad_norm": 0.0580909978469744, | |
| "kl": 0.2119140625, | |
| "learning_rate": 9.992460569852256e-05, | |
| "loss": 0.0013, | |
| "num_tokens": 3369025.0, | |
| "reward": 2.986760377883911, | |
| "reward_std": 0.10425443947315216, | |
| "rewards/check_gptzero_func": 0.4642857015132904, | |
| "rewards/check_perplexity_diff_func": 1.8571428060531616, | |
| "rewards/check_winston_local_func": 0.6653318405151367, | |
| "step": 331 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.4643356643356643, | |
| "grad_norm": 0.0012843504186470114, | |
| "kl": 0.2109375, | |
| "learning_rate": 9.992121849892904e-05, | |
| "loss": 0.0012, | |
| "step": 332 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 113.42857360839844, | |
| "epoch": 0.46573426573426574, | |
| "grad_norm": 0.03054414776989089, | |
| "kl": 0.1494140625, | |
| "learning_rate": 9.99177569344723e-05, | |
| "loss": -0.0, | |
| "num_tokens": 3390093.0, | |
| "reward": 2.5890934467315674, | |
| "reward_std": 0.1023683100938797, | |
| "rewards/check_gptzero_func": 0.25, | |
| "rewards/check_perplexity_diff_func": 1.9285714626312256, | |
| "rewards/check_winston_local_func": 0.41052165627479553, | |
| "step": 333 | |
| }, | |
| { | |
| "clip_ratio": 0.00022784233442507684, | |
| "epoch": 0.4671328671328671, | |
| "grad_norm": 0.006141110067038759, | |
| "kl": 0.150390625, | |
| "learning_rate": 9.99142210103088e-05, | |
| "loss": -0.0001, | |
| "step": 334 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 109.00000762939453, | |
| "epoch": 0.46853146853146854, | |
| "grad_norm": 0.011702000814979486, | |
| "kl": 0.158203125, | |
| "learning_rate": 9.991061073170585e-05, | |
| "loss": 0.0001, | |
| "num_tokens": 3410511.0, | |
| "reward": 2.601181745529175, | |
| "reward_std": 0.04859553650021553, | |
| "rewards/check_gptzero_func": 0.2857142984867096, | |
| "rewards/check_perplexity_diff_func": 1.8928571939468384, | |
| "rewards/check_winston_local_func": 0.42261001467704773, | |
| "step": 335 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.4699300699300699, | |
| "grad_norm": 0.004380253375033637, | |
| "kl": 0.158203125, | |
| "learning_rate": 9.990692610404145e-05, | |
| "loss": 0.0001, | |
| "step": 336 | |
| }, | |
| { | |
| "clip_ratio": 0.00015908369095996022, | |
| "completion_length": 124.37500762939453, | |
| "epoch": 0.47132867132867134, | |
| "grad_norm": 0.011635533606345248, | |
| "kl": 0.1318359375, | |
| "learning_rate": 9.99031671328044e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 3433250.0, | |
| "reward": 2.719287395477295, | |
| "reward_std": 0.05448899790644646, | |
| "rewards/check_gptzero_func": 0.5, | |
| "rewards/check_perplexity_diff_func": 1.75, | |
| "rewards/check_winston_local_func": 0.4692873954772949, | |
| "step": 337 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.4727272727272727, | |
| "grad_norm": 0.009393665715625162, | |
| "kl": 0.1318359375, | |
| "learning_rate": 9.989933382359422e-05, | |
| "loss": 0.0001, | |
| "step": 338 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 110.8214340209961, | |
| "epoch": 0.47412587412587415, | |
| "grad_norm": 6.954019260271828e-07, | |
| "kl": 0.15625, | |
| "learning_rate": 9.98954261821212e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 3454202.0, | |
| "reward": 2.5842461585998535, | |
| "reward_std": 0.0, | |
| "rewards/check_gptzero_func": 0.3928571343421936, | |
| "rewards/check_perplexity_diff_func": 1.7857142686843872, | |
| "rewards/check_winston_local_func": 0.40567442774772644, | |
| "step": 339 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.4755244755244755, | |
| "grad_norm": 6.417345751201852e-07, | |
| "kl": 0.15625, | |
| "learning_rate": 9.98914442142063e-05, | |
| "loss": 0.0002, | |
| "step": 340 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 76.55357360839844, | |
| "epoch": 0.47692307692307695, | |
| "grad_norm": 0.008322229771779063, | |
| "kl": 0.263671875, | |
| "learning_rate": 9.988738792578126e-05, | |
| "loss": 0.0005, | |
| "num_tokens": 3471041.0, | |
| "reward": 2.916919231414795, | |
| "reward_std": 0.05185602605342865, | |
| "rewards/check_gptzero_func": 0.3214285671710968, | |
| "rewards/check_perplexity_diff_func": 1.9642857313156128, | |
| "rewards/check_winston_local_func": 0.631205141544342, | |
| "step": 341 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.4783216783216783, | |
| "grad_norm": 0.010116392524351476, | |
| "kl": 0.263671875, | |
| "learning_rate": 9.988325732288851e-05, | |
| "loss": 0.0004, | |
| "step": 342 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 122.21429443359375, | |
| "epoch": 0.4797202797202797, | |
| "grad_norm": 1.4137707959903356e-05, | |
| "kl": 0.134765625, | |
| "learning_rate": 9.987905241168117e-05, | |
| "loss": 0.0001, | |
| "num_tokens": 3493483.0, | |
| "reward": 2.7157416343688965, | |
| "reward_std": 0.0, | |
| "rewards/check_gptzero_func": 0.3928571343421936, | |
| "rewards/check_perplexity_diff_func": 1.8571428060531616, | |
| "rewards/check_winston_local_func": 0.46574172377586365, | |
| "step": 343 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.4811188811188811, | |
| "grad_norm": 1.3729789968238485e-05, | |
| "kl": 0.134765625, | |
| "learning_rate": 9.987477319842307e-05, | |
| "loss": 0.0001, | |
| "step": 344 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 93.53572082519531, | |
| "epoch": 0.4825174825174825, | |
| "grad_norm": 7.468121696306164e-07, | |
| "kl": 0.1845703125, | |
| "learning_rate": 9.987041968948869e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 3512009.0, | |
| "reward": 2.5505218505859375, | |
| "reward_std": 0.0, | |
| "rewards/check_gptzero_func": 0.1785714328289032, | |
| "rewards/check_perplexity_diff_func": 1.9285714626312256, | |
| "rewards/check_winston_local_func": 0.44337886571884155, | |
| "step": 345 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.48391608391608393, | |
| "grad_norm": 7.137709602226676e-07, | |
| "kl": 0.1845703125, | |
| "learning_rate": 9.986599189136325e-05, | |
| "loss": 0.0002, | |
| "step": 346 | |
| }, | |
| { | |
| "clip_ratio": 0.00031959093757905066, | |
| "completion_length": 102.58928680419922, | |
| "epoch": 0.4853146853146853, | |
| "grad_norm": 0.04707770778424934, | |
| "kl": 0.1982421875, | |
| "learning_rate": 9.986148981064258e-05, | |
| "loss": 0.0004, | |
| "num_tokens": 3531574.0, | |
| "reward": 2.7341363430023193, | |
| "reward_std": 0.0231277234852314, | |
| "rewards/check_gptzero_func": 0.4107142984867096, | |
| "rewards/check_perplexity_diff_func": 1.8928571939468384, | |
| "rewards/check_winston_local_func": 0.430564820766449, | |
| "step": 347 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.48671328671328673, | |
| "grad_norm": 0.020919799034125892, | |
| "kl": 0.2060546875, | |
| "learning_rate": 9.985691345403316e-05, | |
| "loss": 0.0002, | |
| "step": 348 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 127.67857360839844, | |
| "epoch": 0.4881118881118881, | |
| "grad_norm": 6.517786151476589e-05, | |
| "kl": 0.1328125, | |
| "learning_rate": 9.985226282835216e-05, | |
| "loss": 0.0001, | |
| "num_tokens": 3554440.0, | |
| "reward": 2.926541566848755, | |
| "reward_std": 0.0005768488626927137, | |
| "rewards/check_gptzero_func": 0.5357142686843872, | |
| "rewards/check_perplexity_diff_func": 1.8571428060531616, | |
| "rewards/check_winston_local_func": 0.5336844325065613, | |
| "step": 349 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.48951048951048953, | |
| "grad_norm": 6.370254129427888e-05, | |
| "kl": 0.1328125, | |
| "learning_rate": 9.984753794052735e-05, | |
| "loss": 0.0001, | |
| "step": 350 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 98.75000762939453, | |
| "epoch": 0.4909090909090909, | |
| "grad_norm": 2.085540464425244e-05, | |
| "kl": 0.205078125, | |
| "learning_rate": 9.984273879759713e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 3573976.0, | |
| "reward": 2.8204286098480225, | |
| "reward_std": 0.0, | |
| "rewards/check_gptzero_func": 0.3928571343421936, | |
| "rewards/check_perplexity_diff_func": 1.9285714626312256, | |
| "rewards/check_winston_local_func": 0.4989997446537018, | |
| "step": 351 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.49230769230769234, | |
| "grad_norm": 2.009578048518353e-05, | |
| "kl": 0.205078125, | |
| "learning_rate": 9.983786540671051e-05, | |
| "loss": 0.0002, | |
| "step": 352 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 85.21428680419922, | |
| "epoch": 0.4937062937062937, | |
| "grad_norm": 2.1499404490472988e-06, | |
| "kl": 0.1826171875, | |
| "learning_rate": 9.983291777512711e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 3591644.0, | |
| "reward": 2.8764142990112305, | |
| "reward_std": 0.0, | |
| "rewards/check_gptzero_func": 0.4285714328289032, | |
| "rewards/check_perplexity_diff_func": 2.0, | |
| "rewards/check_winston_local_func": 0.44784265756607056, | |
| "step": 353 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.4951048951048951, | |
| "grad_norm": 2.160124410750654e-06, | |
| "kl": 0.1826171875, | |
| "learning_rate": 9.982789591021715e-05, | |
| "loss": 0.0002, | |
| "step": 354 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 112.00000762939453, | |
| "epoch": 0.4965034965034965, | |
| "grad_norm": 0.00014814082463704463, | |
| "kl": 0.1669921875, | |
| "learning_rate": 9.982279981946143e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 3612596.0, | |
| "reward": 2.8398191928863525, | |
| "reward_std": 0.0006443771999329329, | |
| "rewards/check_gptzero_func": 0.3928571343421936, | |
| "rewards/check_perplexity_diff_func": 1.9285714626312256, | |
| "rewards/check_winston_local_func": 0.5183902382850647, | |
| "step": 355 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.4979020979020979, | |
| "grad_norm": 0.00014606763289714295, | |
| "kl": 0.1669921875, | |
| "learning_rate": 9.98176295104513e-05, | |
| "loss": 0.0002, | |
| "step": 356 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 88.50000762939453, | |
| "epoch": 0.4993006993006993, | |
| "grad_norm": 3.33550016470454e-07, | |
| "kl": 0.1669921875, | |
| "learning_rate": 9.98123849908887e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 3630862.0, | |
| "reward": 2.655492067337036, | |
| "reward_std": 0.0, | |
| "rewards/check_gptzero_func": 0.3214285671710968, | |
| "rewards/check_perplexity_diff_func": 1.8571428060531616, | |
| "rewards/check_winston_local_func": 0.47692039608955383, | |
| "step": 357 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.5006993006993007, | |
| "grad_norm": 3.3462192763241657e-07, | |
| "kl": 0.1669921875, | |
| "learning_rate": 9.980706626858607e-05, | |
| "loss": 0.0002, | |
| "step": 358 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 126.39286041259766, | |
| "epoch": 0.5020979020979021, | |
| "grad_norm": 3.4860786526471938e-06, | |
| "kl": 0.1416015625, | |
| "learning_rate": 9.980167335146642e-05, | |
| "loss": 0.0001, | |
| "num_tokens": 3653738.0, | |
| "reward": 2.765826463699341, | |
| "reward_std": 0.0, | |
| "rewards/check_gptzero_func": 0.5, | |
| "rewards/check_perplexity_diff_func": 1.8571428060531616, | |
| "rewards/check_winston_local_func": 0.4086834490299225, | |
| "step": 359 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.5034965034965035, | |
| "grad_norm": 3.71619859471226e-06, | |
| "kl": 0.1416015625, | |
| "learning_rate": 9.979620624756329e-05, | |
| "loss": 0.0001, | |
| "step": 360 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 129.92857360839844, | |
| "epoch": 0.5048951048951049, | |
| "grad_norm": 8.195642286342138e-07, | |
| "kl": 0.1376953125, | |
| "learning_rate": 9.979066496502074e-05, | |
| "loss": 0.0001, | |
| "num_tokens": 3677274.0, | |
| "reward": 2.8696835041046143, | |
| "reward_std": 0.0, | |
| "rewards/check_gptzero_func": 0.5357142686843872, | |
| "rewards/check_perplexity_diff_func": 1.9285714626312256, | |
| "rewards/check_winston_local_func": 0.4053979218006134, | |
| "step": 361 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.5062937062937063, | |
| "grad_norm": 8.760444459307852e-07, | |
| "kl": 0.1376953125, | |
| "learning_rate": 9.978504951209327e-05, | |
| "loss": 0.0001, | |
| "step": 362 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 118.0714340209961, | |
| "epoch": 0.5076923076923077, | |
| "grad_norm": 1.595860947799347e-06, | |
| "kl": 0.162109375, | |
| "learning_rate": 9.977935989714595e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 3698896.0, | |
| "reward": 2.6664552688598633, | |
| "reward_std": 0.0, | |
| "rewards/check_gptzero_func": 0.2857142984867096, | |
| "rewards/check_perplexity_diff_func": 1.9285714626312256, | |
| "rewards/check_winston_local_func": 0.45216941833496094, | |
| "step": 363 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.509090909090909, | |
| "grad_norm": 1.6260194368178069e-06, | |
| "kl": 0.162109375, | |
| "learning_rate": 9.977359612865423e-05, | |
| "loss": 0.0002, | |
| "step": 364 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 85.96428680419922, | |
| "epoch": 0.5104895104895105, | |
| "grad_norm": 4.357301744161908e-06, | |
| "kl": 0.2138671875, | |
| "learning_rate": 9.976775821520412e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 3716536.0, | |
| "reward": 2.8877956867218018, | |
| "reward_std": 0.0, | |
| "rewards/check_gptzero_func": 0.3928571343421936, | |
| "rewards/check_perplexity_diff_func": 1.9285714626312256, | |
| "rewards/check_winston_local_func": 0.566366970539093, | |
| "step": 365 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.5118881118881119, | |
| "grad_norm": 4.353902612581284e-06, | |
| "kl": 0.2138671875, | |
| "learning_rate": 9.976184616549203e-05, | |
| "loss": 0.0002, | |
| "step": 366 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 116.96429443359375, | |
| "epoch": 0.5132867132867133, | |
| "grad_norm": 1.5148419096170459e-06, | |
| "kl": 0.146484375, | |
| "learning_rate": 9.97558599883248e-05, | |
| "loss": 0.0001, | |
| "num_tokens": 3737950.0, | |
| "reward": 2.687312602996826, | |
| "reward_std": 0.0, | |
| "rewards/check_gptzero_func": 0.3928571343421936, | |
| "rewards/check_perplexity_diff_func": 1.9285714626312256, | |
| "rewards/check_winston_local_func": 0.36588403582572937, | |
| "step": 367 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.5146853146853146, | |
| "grad_norm": 1.4470246412763422e-06, | |
| "kl": 0.146484375, | |
| "learning_rate": 9.97497996926197e-05, | |
| "loss": 0.0001, | |
| "step": 368 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 94.83928680419922, | |
| "epoch": 0.5160839160839161, | |
| "grad_norm": 0.0004647591690002515, | |
| "kl": 0.2138671875, | |
| "learning_rate": 9.974366528740441e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 3756935.0, | |
| "reward": 2.634523391723633, | |
| "reward_std": 0.0005499019753187895, | |
| "rewards/check_gptzero_func": 0.2857142984867096, | |
| "rewards/check_perplexity_diff_func": 1.9285714626312256, | |
| "rewards/check_winston_local_func": 0.4202377200126648, | |
| "step": 369 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.5174825174825175, | |
| "grad_norm": 0.0004885464229234562, | |
| "kl": 0.2138671875, | |
| "learning_rate": 9.973745678181705e-05, | |
| "loss": 0.0002, | |
| "step": 370 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 100.17857360839844, | |
| "epoch": 0.5188811188811189, | |
| "grad_norm": 3.0418176986251483e-06, | |
| "kl": 0.1640625, | |
| "learning_rate": 9.973117418510605e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 3776087.0, | |
| "reward": 2.5216457843780518, | |
| "reward_std": 0.0, | |
| "rewards/check_gptzero_func": 0.2857142984867096, | |
| "rewards/check_perplexity_diff_func": 1.8571428060531616, | |
| "rewards/check_winston_local_func": 0.3787885308265686, | |
| "step": 371 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.5202797202797202, | |
| "grad_norm": 3.0723635480561103e-06, | |
| "kl": 0.1640625, | |
| "learning_rate": 9.972481750663026e-05, | |
| "loss": 0.0002, | |
| "step": 372 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 89.66072082519531, | |
| "epoch": 0.5216783216783217, | |
| "grad_norm": 0.008590656277289149, | |
| "kl": 0.40234375, | |
| "learning_rate": 9.971838675585888e-05, | |
| "loss": 0.0007, | |
| "num_tokens": 3794550.0, | |
| "reward": 2.5267839431762695, | |
| "reward_std": 0.05061079189181328, | |
| "rewards/check_gptzero_func": 0.3214285671710968, | |
| "rewards/check_perplexity_diff_func": 1.8214285373687744, | |
| "rewards/check_winston_local_func": 0.383926659822464, | |
| "step": 373 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.5230769230769231, | |
| "grad_norm": 0.003770576536622864, | |
| "kl": 0.4296875, | |
| "learning_rate": 9.97118819423714e-05, | |
| "loss": 0.0007, | |
| "step": 374 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 80.17857360839844, | |
| "epoch": 0.5244755244755245, | |
| "grad_norm": 3.560813902520289e-07, | |
| "kl": 0.2216796875, | |
| "learning_rate": 9.970530307585773e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 3811756.0, | |
| "reward": 2.883481740951538, | |
| "reward_std": 0.0, | |
| "rewards/check_gptzero_func": 0.4285714328289032, | |
| "rewards/check_perplexity_diff_func": 1.9285714626312256, | |
| "rewards/check_winston_local_func": 0.5263389348983765, | |
| "step": 375 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.5258741258741259, | |
| "grad_norm": 3.5463275242856317e-07, | |
| "kl": 0.2216796875, | |
| "learning_rate": 9.9698650166118e-05, | |
| "loss": 0.0002, | |
| "step": 376 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 86.75000762939453, | |
| "epoch": 0.5272727272727272, | |
| "grad_norm": 4.776682547251814e-07, | |
| "kl": 0.205078125, | |
| "learning_rate": 9.969192322306271e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 3829616.0, | |
| "reward": 2.9529268741607666, | |
| "reward_std": 0.0, | |
| "rewards/check_gptzero_func": 0.4642857015132904, | |
| "rewards/check_perplexity_diff_func": 1.9285714626312256, | |
| "rewards/check_winston_local_func": 0.5600695610046387, | |
| "step": 377 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.5286713286713287, | |
| "grad_norm": 4.804658823710114e-07, | |
| "kl": 0.205078125, | |
| "learning_rate": 9.96851222567126e-05, | |
| "loss": 0.0002, | |
| "step": 378 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 117.89286041259766, | |
| "epoch": 0.5300699300699301, | |
| "grad_norm": 9.143102926453055e-06, | |
| "kl": 0.158203125, | |
| "learning_rate": 9.96782472771987e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 3851170.0, | |
| "reward": 2.7617037296295166, | |
| "reward_std": 0.0, | |
| "rewards/check_gptzero_func": 0.4285714328289032, | |
| "rewards/check_perplexity_diff_func": 1.9285714626312256, | |
| "rewards/check_winston_local_func": 0.40456071496009827, | |
| "step": 379 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.5314685314685315, | |
| "grad_norm": 9.41817239216292e-06, | |
| "kl": 0.158203125, | |
| "learning_rate": 9.967129829476228e-05, | |
| "loss": 0.0002, | |
| "step": 380 | |
| }, | |
| { | |
| "clip_ratio": 0.000286861730273813, | |
| "completion_length": 80.8214340209961, | |
| "epoch": 0.5328671328671328, | |
| "grad_norm": 0.03337399461775748, | |
| "kl": 0.224609375, | |
| "learning_rate": 9.966427531975483e-05, | |
| "loss": -0.0402, | |
| "num_tokens": 3868508.0, | |
| "reward": 2.820319652557373, | |
| "reward_std": 0.0946396142244339, | |
| "rewards/check_gptzero_func": 0.4285714328289032, | |
| "rewards/check_perplexity_diff_func": 1.8571428060531616, | |
| "rewards/check_winston_local_func": 0.5346053838729858, | |
| "step": 381 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.5342657342657343, | |
| "grad_norm": 0.00034351439093039966, | |
| "kl": 0.224609375, | |
| "learning_rate": 9.965717836263812e-05, | |
| "loss": -0.0403, | |
| "step": 382 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 95.03572082519531, | |
| "epoch": 0.5356643356643357, | |
| "grad_norm": 3.3945579906577137e-07, | |
| "kl": 0.216796875, | |
| "learning_rate": 9.965000743398408e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 3887044.0, | |
| "reward": 2.8529021739959717, | |
| "reward_std": 0.0, | |
| "rewards/check_gptzero_func": 0.4285714328289032, | |
| "rewards/check_perplexity_diff_func": 2.0, | |
| "rewards/check_winston_local_func": 0.4243304431438446, | |
| "step": 383 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.5370629370629371, | |
| "grad_norm": 2.8390305863820515e-07, | |
| "kl": 0.216796875, | |
| "learning_rate": 9.964276254447484e-05, | |
| "loss": 0.0002, | |
| "step": 384 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 140.1428680419922, | |
| "epoch": 0.5384615384615384, | |
| "grad_norm": 8.521451122880038e-07, | |
| "kl": 0.134765625, | |
| "learning_rate": 9.96354437049027e-05, | |
| "loss": 0.0001, | |
| "num_tokens": 3911264.0, | |
| "reward": 2.4770619869232178, | |
| "reward_std": 0.0, | |
| "rewards/check_gptzero_func": 0.2857142984867096, | |
| "rewards/check_perplexity_diff_func": 1.8571428060531616, | |
| "rewards/check_winston_local_func": 0.33420488238334656, | |
| "step": 385 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.5398601398601398, | |
| "grad_norm": 6.753306754213034e-07, | |
| "kl": 0.134765625, | |
| "learning_rate": 9.962805092617016e-05, | |
| "loss": 0.0001, | |
| "step": 386 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 97.64286041259766, | |
| "epoch": 0.5412587412587413, | |
| "grad_norm": 0.08784199050747857, | |
| "kl": 2.28125, | |
| "learning_rate": 9.962058421928979e-05, | |
| "loss": 0.0023, | |
| "num_tokens": 3930478.0, | |
| "reward": 2.897810459136963, | |
| "reward_std": 0.002231778111308813, | |
| "rewards/check_gptzero_func": 0.4285714328289032, | |
| "rewards/check_perplexity_diff_func": 2.0, | |
| "rewards/check_winston_local_func": 0.4692386984825134, | |
| "step": 387 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.5426573426573427, | |
| "grad_norm": 0.03620083942117924, | |
| "kl": 0.98828125, | |
| "learning_rate": 9.961304359538437e-05, | |
| "loss": 0.001, | |
| "step": 388 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 102.21428680419922, | |
| "epoch": 0.544055944055944, | |
| "grad_norm": 0.025447754609912356, | |
| "kl": 0.1806640625, | |
| "learning_rate": 9.96054290656867e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 3950362.0, | |
| "reward": 2.8110532760620117, | |
| "reward_std": 0.04895726963877678, | |
| "rewards/check_gptzero_func": 0.5357142686843872, | |
| "rewards/check_perplexity_diff_func": 1.8214285373687744, | |
| "rewards/check_winston_local_func": 0.45391014218330383, | |
| "step": 389 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.5454545454545454, | |
| "grad_norm": 5.1964811610508515e-05, | |
| "kl": 0.1806640625, | |
| "learning_rate": 9.959774064153977e-05, | |
| "loss": 0.0001, | |
| "step": 390 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 103.5714340209961, | |
| "epoch": 0.5468531468531469, | |
| "grad_norm": 3.816406536402076e-07, | |
| "kl": 0.173828125, | |
| "learning_rate": 9.958997833439657e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 3970164.0, | |
| "reward": 2.5441431999206543, | |
| "reward_std": 0.0, | |
| "rewards/check_gptzero_func": 0.3214285671710968, | |
| "rewards/check_perplexity_diff_func": 1.8571428060531616, | |
| "rewards/check_winston_local_func": 0.3655718266963959, | |
| "step": 391 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.5482517482517483, | |
| "grad_norm": 4.199010025491445e-07, | |
| "kl": 0.173828125, | |
| "learning_rate": 9.958214215582018e-05, | |
| "loss": 0.0002, | |
| "step": 392 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 126.8214340209961, | |
| "epoch": 0.5496503496503496, | |
| "grad_norm": 6.309179959293235e-06, | |
| "kl": 0.1533203125, | |
| "learning_rate": 9.957423211748374e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 3992994.0, | |
| "reward": 2.7432734966278076, | |
| "reward_std": 0.0, | |
| "rewards/check_gptzero_func": 0.3928571343421936, | |
| "rewards/check_perplexity_diff_func": 1.9285714626312256, | |
| "rewards/check_winston_local_func": 0.42184481024742126, | |
| "step": 393 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.551048951048951, | |
| "grad_norm": 5.547540566387994e-06, | |
| "kl": 0.1533203125, | |
| "learning_rate": 9.956624823117036e-05, | |
| "loss": 0.0002, | |
| "step": 394 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 125.62500762939453, | |
| "epoch": 0.5524475524475524, | |
| "grad_norm": 0.007987942179452279, | |
| "kl": 0.142578125, | |
| "learning_rate": 9.955819050877321e-05, | |
| "loss": 0.0001, | |
| "num_tokens": 4015669.0, | |
| "reward": 3.091567039489746, | |
| "reward_std": 0.018998777493834496, | |
| "rewards/check_gptzero_func": 0.5535714030265808, | |
| "rewards/check_perplexity_diff_func": 1.9285714626312256, | |
| "rewards/check_winston_local_func": 0.6094242334365845, | |
| "step": 395 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.5538461538461539, | |
| "grad_norm": 0.0034758785486018436, | |
| "kl": 0.142578125, | |
| "learning_rate": 9.955005896229543e-05, | |
| "loss": 0.0, | |
| "step": 396 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 96.75000762939453, | |
| "epoch": 0.5552447552447553, | |
| "grad_norm": 0.011495641299277456, | |
| "kl": 0.283203125, | |
| "learning_rate": 9.954185360385013e-05, | |
| "loss": 0.0006, | |
| "num_tokens": 4035041.0, | |
| "reward": 2.950004816055298, | |
| "reward_std": 0.01496803853660822, | |
| "rewards/check_gptzero_func": 0.4821428656578064, | |
| "rewards/check_perplexity_diff_func": 1.8928571939468384, | |
| "rewards/check_winston_local_func": 0.5750047564506531, | |
| "step": 397 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.5566433566433566, | |
| "grad_norm": 0.010082500068390739, | |
| "kl": 0.283203125, | |
| "learning_rate": 9.953357444566039e-05, | |
| "loss": 0.0006, | |
| "step": 398 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 110.8214340209961, | |
| "epoch": 0.558041958041958, | |
| "grad_norm": 8.489376613416e-06, | |
| "kl": 0.1787109375, | |
| "learning_rate": 9.952522150005919e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 4055855.0, | |
| "reward": 2.913240909576416, | |
| "reward_std": 0.0, | |
| "rewards/check_gptzero_func": 0.4642857015132904, | |
| "rewards/check_perplexity_diff_func": 1.9285714626312256, | |
| "rewards/check_winston_local_func": 0.5203836560249329, | |
| "step": 399 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.5594405594405595, | |
| "grad_norm": 8.766485288799225e-06, | |
| "kl": 0.1787109375, | |
| "learning_rate": 9.951679477948947e-05, | |
| "loss": 0.0002, | |
| "step": 400 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 105.46428680419922, | |
| "epoch": 0.5608391608391609, | |
| "grad_norm": 8.652849000461684e-06, | |
| "kl": 0.150390625, | |
| "learning_rate": 9.9508294296504e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 4076051.0, | |
| "reward": 2.871354818344116, | |
| "reward_std": 0.0, | |
| "rewards/check_gptzero_func": 0.4285714328289032, | |
| "rewards/check_perplexity_diff_func": 2.0, | |
| "rewards/check_winston_local_func": 0.44278329610824585, | |
| "step": 401 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.5622377622377622, | |
| "grad_norm": 8.18246821524729e-06, | |
| "kl": 0.150390625, | |
| "learning_rate": 9.949972006376556e-05, | |
| "loss": 0.0002, | |
| "step": 402 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 75.25, | |
| "epoch": 0.5636363636363636, | |
| "grad_norm": 7.351342876079361e-07, | |
| "kl": 0.1953125, | |
| "learning_rate": 9.949107209404665e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 4092527.0, | |
| "reward": 2.9322519302368164, | |
| "reward_std": 0.0, | |
| "rewards/check_gptzero_func": 0.3571428656578064, | |
| "rewards/check_perplexity_diff_func": 2.0, | |
| "rewards/check_winston_local_func": 0.5751089453697205, | |
| "step": 403 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.5650349650349651, | |
| "grad_norm": 7.136344464618312e-07, | |
| "kl": 0.1953125, | |
| "learning_rate": 9.948235040022966e-05, | |
| "loss": 0.0002, | |
| "step": 404 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 108.08928680419922, | |
| "epoch": 0.5664335664335665, | |
| "grad_norm": 0.007956878335048736, | |
| "kl": 0.169921875, | |
| "learning_rate": 9.947355499530683e-05, | |
| "loss": 0.0049, | |
| "num_tokens": 4113282.0, | |
| "reward": 3.1798436641693115, | |
| "reward_std": 0.06495096534490585, | |
| "rewards/check_gptzero_func": 0.6785714030265808, | |
| "rewards/check_perplexity_diff_func": 1.9642857313156128, | |
| "rewards/check_winston_local_func": 0.5369864702224731, | |
| "step": 405 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.5678321678321678, | |
| "grad_norm": 0.006640329404323574, | |
| "kl": 0.1728515625, | |
| "learning_rate": 9.946468589238021e-05, | |
| "loss": 0.0049, | |
| "step": 406 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 121.39286041259766, | |
| "epoch": 0.5692307692307692, | |
| "grad_norm": 1.5620797981026132e-06, | |
| "kl": 0.1396484375, | |
| "learning_rate": 9.945574310466159e-05, | |
| "loss": 0.0001, | |
| "num_tokens": 4135506.0, | |
| "reward": 2.573127269744873, | |
| "reward_std": 0.0, | |
| "rewards/check_gptzero_func": 0.2857142984867096, | |
| "rewards/check_perplexity_diff_func": 1.9285714626312256, | |
| "rewards/check_winston_local_func": 0.3588416278362274, | |
| "step": 407 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.5706293706293706, | |
| "grad_norm": 1.51593967204227e-06, | |
| "kl": 0.1396484375, | |
| "learning_rate": 9.944672664547252e-05, | |
| "loss": 0.0001, | |
| "step": 408 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 107.8214340209961, | |
| "epoch": 0.5720279720279721, | |
| "grad_norm": 0.0007775272826913423, | |
| "kl": 0.17578125, | |
| "learning_rate": 9.943763652824436e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 4156060.0, | |
| "reward": 2.55350661277771, | |
| "reward_std": 0.0, | |
| "rewards/check_gptzero_func": 0.3571428656578064, | |
| "rewards/check_perplexity_diff_func": 1.7142857313156128, | |
| "rewards/check_winston_local_func": 0.48207801580429077, | |
| "step": 409 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.5734265734265734, | |
| "grad_norm": 0.0008914625567854122, | |
| "kl": 0.1767578125, | |
| "learning_rate": 9.942847276651811e-05, | |
| "loss": 0.0002, | |
| "step": 410 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 101.62500762939453, | |
| "epoch": 0.5748251748251748, | |
| "grad_norm": 4.9539161428254635e-05, | |
| "kl": 0.1474609375, | |
| "learning_rate": 9.941923537394456e-05, | |
| "loss": 0.0001, | |
| "num_tokens": 4176019.0, | |
| "reward": 2.8906424045562744, | |
| "reward_std": 0.00012528452498372644, | |
| "rewards/check_gptzero_func": 0.4285714328289032, | |
| "rewards/check_perplexity_diff_func": 1.9285714626312256, | |
| "rewards/check_winston_local_func": 0.5334992408752441, | |
| "step": 411 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.5762237762237762, | |
| "grad_norm": 5.844422779033556e-05, | |
| "kl": 0.1474609375, | |
| "learning_rate": 9.940992436428409e-05, | |
| "loss": 0.0001, | |
| "step": 412 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 105.28572082519531, | |
| "epoch": 0.5776223776223777, | |
| "grad_norm": 0.0006008878616346907, | |
| "kl": 0.2216796875, | |
| "learning_rate": 9.940053975140684e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 4196667.0, | |
| "reward": 2.943253755569458, | |
| "reward_std": 0.0, | |
| "rewards/check_gptzero_func": 0.5357142686843872, | |
| "rewards/check_perplexity_diff_func": 1.8571428060531616, | |
| "rewards/check_winston_local_func": 0.5503963828086853, | |
| "step": 413 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.579020979020979, | |
| "grad_norm": 0.0003365492175069851, | |
| "kl": 0.220703125, | |
| "learning_rate": 9.939108154929252e-05, | |
| "loss": 0.0002, | |
| "step": 414 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 114.60714721679688, | |
| "epoch": 0.5804195804195804, | |
| "grad_norm": 0.008680207230787525, | |
| "kl": 0.171875, | |
| "learning_rate": 9.938154977203049e-05, | |
| "loss": -0.0005, | |
| "num_tokens": 4217727.0, | |
| "reward": 2.7773444652557373, | |
| "reward_std": 0.051937274634838104, | |
| "rewards/check_gptzero_func": 0.4285714328289032, | |
| "rewards/check_perplexity_diff_func": 1.8214285373687744, | |
| "rewards/check_winston_local_func": 0.5273441076278687, | |
| "step": 415 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.5818181818181818, | |
| "grad_norm": 0.00033864647249595475, | |
| "kl": 0.171875, | |
| "learning_rate": 9.937194443381972e-05, | |
| "loss": -0.0005, | |
| "step": 416 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 100.60714721679688, | |
| "epoch": 0.5832167832167832, | |
| "grad_norm": 4.017405087628847e-06, | |
| "kl": 0.193359375, | |
| "learning_rate": 9.936226554896875e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 4237189.0, | |
| "reward": 2.7684807777404785, | |
| "reward_std": 0.0, | |
| "rewards/check_gptzero_func": 0.3928571343421936, | |
| "rewards/check_perplexity_diff_func": 1.9285714626312256, | |
| "rewards/check_winston_local_func": 0.44705215096473694, | |
| "step": 417 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.5846153846153846, | |
| "grad_norm": 3.880139616189675e-06, | |
| "kl": 0.193359375, | |
| "learning_rate": 9.935251313189564e-05, | |
| "loss": 0.0002, | |
| "step": 418 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 107.75000762939453, | |
| "epoch": 0.586013986013986, | |
| "grad_norm": 0.00032680437267053027, | |
| "kl": 0.1826171875, | |
| "learning_rate": 9.934268719712807e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 4258213.0, | |
| "reward": 2.577641487121582, | |
| "reward_std": 0.0015505586052313447, | |
| "rewards/check_gptzero_func": 0.2857142984867096, | |
| "rewards/check_perplexity_diff_func": 1.8571428060531616, | |
| "rewards/check_winston_local_func": 0.4347843825817108, | |
| "step": 419 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.5874125874125874, | |
| "grad_norm": 0.0003238465648274268, | |
| "kl": 0.1826171875, | |
| "learning_rate": 9.933278775930317e-05, | |
| "loss": 0.0002, | |
| "step": 420 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 108.67857360839844, | |
| "epoch": 0.5888111888111888, | |
| "grad_norm": 0.015591092439371165, | |
| "kl": 0.16015625, | |
| "learning_rate": 9.932281483316758e-05, | |
| "loss": 0.0004, | |
| "num_tokens": 4278847.0, | |
| "reward": 2.615447998046875, | |
| "reward_std": 0.05428782477974892, | |
| "rewards/check_gptzero_func": 0.3214285671710968, | |
| "rewards/check_perplexity_diff_func": 1.8214285373687744, | |
| "rewards/check_winston_local_func": 0.47259071469306946, | |
| "step": 421 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.5902097902097903, | |
| "grad_norm": 0.00016911180587449477, | |
| "kl": 0.16015625, | |
| "learning_rate": 9.931276843357742e-05, | |
| "loss": 0.0003, | |
| "step": 422 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 111.16072082519531, | |
| "epoch": 0.5916083916083916, | |
| "grad_norm": 1.6847767080478394e-05, | |
| "kl": 0.1630859375, | |
| "learning_rate": 9.930264857549825e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 4300096.0, | |
| "reward": 2.857067823410034, | |
| "reward_std": 0.00101040443405509, | |
| "rewards/check_gptzero_func": 0.3928571343421936, | |
| "rewards/check_perplexity_diff_func": 1.8571428060531616, | |
| "rewards/check_winston_local_func": 0.6070676445960999, | |
| "step": 423 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.593006993006993, | |
| "grad_norm": 2.453025078669727e-05, | |
| "kl": 0.1630859375, | |
| "learning_rate": 9.929245527400503e-05, | |
| "loss": 0.0002, | |
| "step": 424 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 102.50000762939453, | |
| "epoch": 0.5944055944055944, | |
| "grad_norm": 0.0015183266942611472, | |
| "kl": 0.173828125, | |
| "learning_rate": 9.928218854428221e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 4320064.0, | |
| "reward": 2.6430578231811523, | |
| "reward_std": 5.020291791879572e-05, | |
| "rewards/check_gptzero_func": 0.3571428656578064, | |
| "rewards/check_perplexity_diff_func": 1.7857142686843872, | |
| "rewards/check_winston_local_func": 0.5002006888389587, | |
| "step": 425 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.5958041958041959, | |
| "grad_norm": 0.0013596061771866538, | |
| "kl": 0.1708984375, | |
| "learning_rate": 9.927184840162354e-05, | |
| "loss": 0.0002, | |
| "step": 426 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 110.66072082519531, | |
| "epoch": 0.5972027972027972, | |
| "grad_norm": 0.12076121626694686, | |
| "kl": 0.189453125, | |
| "learning_rate": 9.926143486143214e-05, | |
| "loss": -0.001, | |
| "num_tokens": 4341145.0, | |
| "reward": 2.6579177379608154, | |
| "reward_std": 0.05154726281762123, | |
| "rewards/check_gptzero_func": 0.4285714328289032, | |
| "rewards/check_perplexity_diff_func": 1.8214285373687744, | |
| "rewards/check_winston_local_func": 0.4079175889492035, | |
| "step": 427 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.5986013986013986, | |
| "grad_norm": 0.0008289218843548021, | |
| "kl": 0.19140625, | |
| "learning_rate": 9.92509479392205e-05, | |
| "loss": -0.0013, | |
| "step": 428 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 89.67857360839844, | |
| "epoch": 0.6, | |
| "grad_norm": 0.005379412774279031, | |
| "kl": 0.1787109375, | |
| "learning_rate": 9.924038765061042e-05, | |
| "loss": 0.0001, | |
| "num_tokens": 4359277.0, | |
| "reward": 2.633777141571045, | |
| "reward_std": 0.05039219558238983, | |
| "rewards/check_gptzero_func": 0.2857142984867096, | |
| "rewards/check_perplexity_diff_func": 1.9642857313156128, | |
| "rewards/check_winston_local_func": 0.38377735018730164, | |
| "step": 429 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.6013986013986014, | |
| "grad_norm": 0.010439668945298157, | |
| "kl": 0.1787109375, | |
| "learning_rate": 9.922975401133293e-05, | |
| "loss": 0.0002, | |
| "step": 430 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 102.67857360839844, | |
| "epoch": 0.6027972027972028, | |
| "grad_norm": 0.06317955454404899, | |
| "kl": 0.201171875, | |
| "learning_rate": 9.92190470372284e-05, | |
| "loss": 0.0068, | |
| "num_tokens": 4379109.0, | |
| "reward": 2.6650938987731934, | |
| "reward_std": 0.05930115655064583, | |
| "rewards/check_gptzero_func": 0.25, | |
| "rewards/check_perplexity_diff_func": 1.9642857313156128, | |
| "rewards/check_winston_local_func": 0.4508081078529358, | |
| "step": 431 | |
| }, | |
| { | |
| "clip_ratio": 0.0002587991766631603, | |
| "epoch": 0.6041958041958042, | |
| "grad_norm": 0.05122842630547611, | |
| "kl": 0.1865234375, | |
| "learning_rate": 9.920826674424642e-05, | |
| "loss": 0.0063, | |
| "step": 432 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 80.35714721679688, | |
| "epoch": 0.6055944055944056, | |
| "grad_norm": 7.162658137250354e-07, | |
| "kl": 0.1875, | |
| "learning_rate": 9.919741314844577e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 4396693.0, | |
| "reward": 3.0637807846069336, | |
| "reward_std": 0.0, | |
| "rewards/check_gptzero_func": 0.5357142686843872, | |
| "rewards/check_perplexity_diff_func": 1.8571428060531616, | |
| "rewards/check_winston_local_func": 0.6709235310554504, | |
| "step": 433 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.606993006993007, | |
| "grad_norm": 8.451796450955314e-07, | |
| "kl": 0.1875, | |
| "learning_rate": 9.918648626599447e-05, | |
| "loss": 0.0002, | |
| "step": 434 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 124.98214721679688, | |
| "epoch": 0.6083916083916084, | |
| "grad_norm": 0.016099687268680437, | |
| "kl": 0.1455078125, | |
| "learning_rate": 9.91754861131697e-05, | |
| "loss": -0.0001, | |
| "num_tokens": 4418950.0, | |
| "reward": 2.94807505607605, | |
| "reward_std": 0.050821539014577866, | |
| "rewards/check_gptzero_func": 0.5357142686843872, | |
| "rewards/check_perplexity_diff_func": 1.8928571939468384, | |
| "rewards/check_winston_local_func": 0.5195035338401794, | |
| "step": 435 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.6097902097902098, | |
| "grad_norm": 0.014390989758817311, | |
| "kl": 0.1455078125, | |
| "learning_rate": 9.916441270635772e-05, | |
| "loss": -0.0001, | |
| "step": 436 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 106.64286041259766, | |
| "epoch": 0.6111888111888112, | |
| "grad_norm": 1.968332582426962e-06, | |
| "kl": 0.14453125, | |
| "learning_rate": 9.915326606205404e-05, | |
| "loss": 0.0001, | |
| "num_tokens": 4439018.0, | |
| "reward": 2.77024507522583, | |
| "reward_std": 0.0, | |
| "rewards/check_gptzero_func": 0.4285714328289032, | |
| "rewards/check_perplexity_diff_func": 1.9285714626312256, | |
| "rewards/check_winston_local_func": 0.4131018817424774, | |
| "step": 437 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.6125874125874126, | |
| "grad_norm": 1.8961430119120163e-06, | |
| "kl": 0.14453125, | |
| "learning_rate": 9.914204619686314e-05, | |
| "loss": 0.0001, | |
| "step": 438 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 102.21428680419922, | |
| "epoch": 0.6139860139860139, | |
| "grad_norm": 0.000668351376631818, | |
| "kl": 0.1728515625, | |
| "learning_rate": 9.913075312749866e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 4458806.0, | |
| "reward": 2.8182172775268555, | |
| "reward_std": 0.0025242711417376995, | |
| "rewards/check_gptzero_func": 0.5357142686843872, | |
| "rewards/check_perplexity_diff_func": 1.8571428060531616, | |
| "rewards/check_winston_local_func": 0.42535990476608276, | |
| "step": 439 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.6153846153846154, | |
| "grad_norm": 0.0006751756209535994, | |
| "kl": 0.1728515625, | |
| "learning_rate": 9.911938687078324e-05, | |
| "loss": 0.0002, | |
| "step": 440 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 81.25, | |
| "epoch": 0.6167832167832168, | |
| "grad_norm": 0.007904133813646444, | |
| "kl": 0.240234375, | |
| "learning_rate": 9.910794744364857e-05, | |
| "loss": -0.0001, | |
| "num_tokens": 4475982.0, | |
| "reward": 2.8269970417022705, | |
| "reward_std": 0.05014092102646828, | |
| "rewards/check_gptzero_func": 0.3928571343421936, | |
| "rewards/check_perplexity_diff_func": 1.8928571939468384, | |
| "rewards/check_winston_local_func": 0.5412828326225281, | |
| "step": 441 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.6181818181818182, | |
| "grad_norm": 0.00827597712943806, | |
| "kl": 0.240234375, | |
| "learning_rate": 9.909643486313533e-05, | |
| "loss": -0.0001, | |
| "step": 442 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 113.00000762939453, | |
| "epoch": 0.6195804195804195, | |
| "grad_norm": 6.016032313066646e-06, | |
| "kl": 0.16796875, | |
| "learning_rate": 9.908484914639318e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 4497200.0, | |
| "reward": 2.9243876934051514, | |
| "reward_std": 0.0, | |
| "rewards/check_gptzero_func": 0.3928571343421936, | |
| "rewards/check_perplexity_diff_func": 2.0, | |
| "rewards/check_winston_local_func": 0.5315303206443787, | |
| "step": 443 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.620979020979021, | |
| "grad_norm": 7.198804696892823e-06, | |
| "kl": 0.16796875, | |
| "learning_rate": 9.90731903106807e-05, | |
| "loss": 0.0002, | |
| "step": 444 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 129.375, | |
| "epoch": 0.6223776223776224, | |
| "grad_norm": 0.02075011817403209, | |
| "kl": 0.158203125, | |
| "learning_rate": 9.90614583733654e-05, | |
| "loss": -0.0001, | |
| "num_tokens": 4519807.0, | |
| "reward": 2.866011619567871, | |
| "reward_std": 0.10101933032274246, | |
| "rewards/check_gptzero_func": 0.4642857015132904, | |
| "rewards/check_perplexity_diff_func": 1.9285714626312256, | |
| "rewards/check_winston_local_func": 0.47315436601638794, | |
| "step": 445 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.6237762237762238, | |
| "grad_norm": 0.0034619333956659956, | |
| "kl": 0.1591796875, | |
| "learning_rate": 9.904965335192373e-05, | |
| "loss": -0.0002, | |
| "step": 446 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 98.92857360839844, | |
| "epoch": 0.6251748251748251, | |
| "grad_norm": 0.010057352537761423, | |
| "kl": 0.212890625, | |
| "learning_rate": 9.903777526394094e-05, | |
| "loss": -0.0086, | |
| "num_tokens": 4539039.0, | |
| "reward": 2.980358362197876, | |
| "reward_std": 0.05100167542695999, | |
| "rewards/check_gptzero_func": 0.4285714328289032, | |
| "rewards/check_perplexity_diff_func": 1.9642857313156128, | |
| "rewards/check_winston_local_func": 0.587501049041748, | |
| "step": 447 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.6265734265734266, | |
| "grad_norm": 0.006958390301115274, | |
| "kl": 0.21875, | |
| "learning_rate": 9.90258241271112e-05, | |
| "loss": -0.0087, | |
| "step": 448 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 111.71429443359375, | |
| "epoch": 0.627972027972028, | |
| "grad_norm": 1.6536770511152556e-06, | |
| "kl": 0.142578125, | |
| "learning_rate": 9.901379995923738e-05, | |
| "loss": 0.0001, | |
| "num_tokens": 4559675.0, | |
| "reward": 2.681791067123413, | |
| "reward_std": 0.0, | |
| "rewards/check_gptzero_func": 0.2857142984867096, | |
| "rewards/check_perplexity_diff_func": 1.9285714626312256, | |
| "rewards/check_winston_local_func": 0.4675052762031555, | |
| "step": 449 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.6293706293706294, | |
| "grad_norm": 1.6773170531853214e-06, | |
| "kl": 0.142578125, | |
| "learning_rate": 9.900170277823129e-05, | |
| "loss": 0.0001, | |
| "step": 450 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 107.35714721679688, | |
| "epoch": 0.6307692307692307, | |
| "grad_norm": 2.0958023033337787e-06, | |
| "kl": 0.2109375, | |
| "learning_rate": 9.898953260211338e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 4579995.0, | |
| "reward": 2.7400293350219727, | |
| "reward_std": 0.0, | |
| "rewards/check_gptzero_func": 0.3928571343421936, | |
| "rewards/check_perplexity_diff_func": 1.9285714626312256, | |
| "rewards/check_winston_local_func": 0.4186006188392639, | |
| "step": 451 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.6321678321678321, | |
| "grad_norm": 1.982971590769392e-06, | |
| "kl": 0.2109375, | |
| "learning_rate": 9.897728944901292e-05, | |
| "loss": 0.0002, | |
| "step": 452 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 118.26786041259766, | |
| "epoch": 0.6335664335664336, | |
| "grad_norm": 0.009450375112303724, | |
| "kl": 0.1416015625, | |
| "learning_rate": 9.896497333716783e-05, | |
| "loss": -0.0051, | |
| "num_tokens": 4602048.0, | |
| "reward": 2.640944004058838, | |
| "reward_std": 0.043768420815467834, | |
| "rewards/check_gptzero_func": 0.5357142686843872, | |
| "rewards/check_perplexity_diff_func": 1.5357142686843872, | |
| "rewards/check_winston_local_func": 0.5695151090621948, | |
| "step": 453 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.634965034965035, | |
| "grad_norm": 0.0017713963386990425, | |
| "kl": 0.1416015625, | |
| "learning_rate": 9.895258428492475e-05, | |
| "loss": -0.0051, | |
| "step": 454 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 98.42857360839844, | |
| "epoch": 0.6363636363636364, | |
| "grad_norm": 0.014431904681184264, | |
| "kl": 0.1943359375, | |
| "learning_rate": 9.894012231073894e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 4621654.0, | |
| "reward": 2.7700040340423584, | |
| "reward_std": 0.03728308901190758, | |
| "rewards/check_gptzero_func": 0.4107142984867096, | |
| "rewards/check_perplexity_diff_func": 1.9285714626312256, | |
| "rewards/check_winston_local_func": 0.43071839213371277, | |
| "step": 455 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.6377622377622377, | |
| "grad_norm": 0.008239966094546339, | |
| "kl": 0.1943359375, | |
| "learning_rate": 9.892758743317434e-05, | |
| "loss": 0.0002, | |
| "step": 456 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 90.50000762939453, | |
| "epoch": 0.6391608391608392, | |
| "grad_norm": 2.579298856684113e-06, | |
| "kl": 0.1923828125, | |
| "learning_rate": 9.891497967090344e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 4640140.0, | |
| "reward": 2.6383018493652344, | |
| "reward_std": 0.0, | |
| "rewards/check_gptzero_func": 0.25, | |
| "rewards/check_perplexity_diff_func": 1.9285714626312256, | |
| "rewards/check_winston_local_func": 0.4597306251525879, | |
| "step": 457 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.6405594405594406, | |
| "grad_norm": 2.6625863420088633e-06, | |
| "kl": 0.1923828125, | |
| "learning_rate": 9.890229904270731e-05, | |
| "loss": 0.0002, | |
| "step": 458 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 113.03572082519531, | |
| "epoch": 0.641958041958042, | |
| "grad_norm": 1.7932858894376123e-05, | |
| "kl": 0.1748046875, | |
| "learning_rate": 9.888954556747563e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 4661276.0, | |
| "reward": 2.74210524559021, | |
| "reward_std": 0.0, | |
| "rewards/check_gptzero_func": 0.3571428656578064, | |
| "rewards/check_perplexity_diff_func": 2.0, | |
| "rewards/check_winston_local_func": 0.3849623203277588, | |
| "step": 459 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.6433566433566433, | |
| "grad_norm": 1.7905922488331998e-05, | |
| "kl": 0.1748046875, | |
| "learning_rate": 9.887671926420648e-05, | |
| "loss": 0.0002, | |
| "step": 460 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 112.42857360839844, | |
| "epoch": 0.6447552447552447, | |
| "grad_norm": 0.014077582219597583, | |
| "kl": 0.173828125, | |
| "learning_rate": 9.886382015200652e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 4682450.0, | |
| "reward": 2.8680295944213867, | |
| "reward_std": 0.05049533396959305, | |
| "rewards/check_gptzero_func": 0.4642857015132904, | |
| "rewards/check_perplexity_diff_func": 1.9642857313156128, | |
| "rewards/check_winston_local_func": 0.43945807218551636, | |
| "step": 461 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.6461538461538462, | |
| "grad_norm": 0.010796297977350395, | |
| "kl": 0.173828125, | |
| "learning_rate": 9.885084825009086e-05, | |
| "loss": 0.0001, | |
| "step": 462 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 105.14286041259766, | |
| "epoch": 0.6475524475524476, | |
| "grad_norm": 0.02303930816984521, | |
| "kl": 0.17578125, | |
| "learning_rate": 9.883780357778299e-05, | |
| "loss": 0.0003, | |
| "num_tokens": 4702710.0, | |
| "reward": 2.885434150695801, | |
| "reward_std": 0.06019994616508484, | |
| "rewards/check_gptzero_func": 0.4285714328289032, | |
| "rewards/check_perplexity_diff_func": 1.75, | |
| "rewards/check_winston_local_func": 0.7068625092506409, | |
| "step": 463 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.6489510489510489, | |
| "grad_norm": 0.0015050223064125838, | |
| "kl": 0.1767578125, | |
| "learning_rate": 9.882468615451484e-05, | |
| "loss": 0.0002, | |
| "step": 464 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 127.5714340209961, | |
| "epoch": 0.6503496503496503, | |
| "grad_norm": 3.0460982652821775e-06, | |
| "kl": 0.1650390625, | |
| "learning_rate": 9.881149599982671e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 4725654.0, | |
| "reward": 2.407341957092285, | |
| "reward_std": 0.0, | |
| "rewards/check_gptzero_func": 0.2857142984867096, | |
| "rewards/check_perplexity_diff_func": 1.8571428060531616, | |
| "rewards/check_winston_local_func": 0.2644847333431244, | |
| "step": 465 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.6517482517482518, | |
| "grad_norm": 2.8166666383288673e-06, | |
| "kl": 0.1650390625, | |
| "learning_rate": 9.879823313336722e-05, | |
| "loss": 0.0002, | |
| "step": 466 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 122.16072082519531, | |
| "epoch": 0.6531468531468532, | |
| "grad_norm": 0.0001021700279542065, | |
| "kl": 0.140625, | |
| "learning_rate": 9.878489757489337e-05, | |
| "loss": 0.0001, | |
| "num_tokens": 4748375.0, | |
| "reward": 2.6119117736816406, | |
| "reward_std": 2.7804879209725186e-05, | |
| "rewards/check_gptzero_func": 0.5357142686843872, | |
| "rewards/check_perplexity_diff_func": 1.5714285373687744, | |
| "rewards/check_winston_local_func": 0.5047687888145447, | |
| "step": 467 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.6545454545454545, | |
| "grad_norm": 9.33433928584176e-05, | |
| "kl": 0.140625, | |
| "learning_rate": 9.877148934427037e-05, | |
| "loss": 0.0001, | |
| "step": 468 | |
| }, | |
| { | |
| "clip_ratio": 0.0006711409660056233, | |
| "completion_length": 95.64286041259766, | |
| "epoch": 0.6559440559440559, | |
| "grad_norm": 0.0004684591425632982, | |
| "kl": 0.171875, | |
| "learning_rate": 9.87580084614717e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 4767439.0, | |
| "reward": 2.3873257637023926, | |
| "reward_std": 0.0017833748133853078, | |
| "rewards/check_gptzero_func": 0.25, | |
| "rewards/check_perplexity_diff_func": 1.7142857313156128, | |
| "rewards/check_winston_local_func": 0.42303988337516785, | |
| "step": 469 | |
| }, | |
| { | |
| "clip_ratio": 0.0004793864209204912, | |
| "epoch": 0.6573426573426573, | |
| "grad_norm": 0.00047799981287926325, | |
| "kl": 0.171875, | |
| "learning_rate": 9.874445494657911e-05, | |
| "loss": 0.0002, | |
| "step": 470 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 79.39286041259766, | |
| "epoch": 0.6587412587412588, | |
| "grad_norm": 0.011441113949782547, | |
| "kl": 0.216796875, | |
| "learning_rate": 9.873082881978251e-05, | |
| "loss": -0.0064, | |
| "num_tokens": 4784715.0, | |
| "reward": 3.037661075592041, | |
| "reward_std": 0.023328183218836784, | |
| "rewards/check_gptzero_func": 0.4821428656578064, | |
| "rewards/check_perplexity_diff_func": 1.9642857313156128, | |
| "rewards/check_winston_local_func": 0.5912323594093323, | |
| "step": 471 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.6601398601398601, | |
| "grad_norm": 0.008433986682608557, | |
| "kl": 0.216796875, | |
| "learning_rate": 9.871713010137997e-05, | |
| "loss": -0.0064, | |
| "step": 472 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 119.42857360839844, | |
| "epoch": 0.6615384615384615, | |
| "grad_norm": 7.638914800437301e-06, | |
| "kl": 0.1435546875, | |
| "learning_rate": 9.870335881177774e-05, | |
| "loss": 0.0001, | |
| "num_tokens": 4806207.0, | |
| "reward": 2.9706830978393555, | |
| "reward_std": 0.0, | |
| "rewards/check_gptzero_func": 0.4642857015132904, | |
| "rewards/check_perplexity_diff_func": 2.0, | |
| "rewards/check_winston_local_func": 0.5063972473144531, | |
| "step": 473 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.6629370629370629, | |
| "grad_norm": 7.505737589947172e-06, | |
| "kl": 0.1435546875, | |
| "learning_rate": 9.868951497149011e-05, | |
| "loss": 0.0001, | |
| "step": 474 | |
| }, | |
| { | |
| "clip_ratio": 0.00019073051225859672, | |
| "completion_length": 89.87500762939453, | |
| "epoch": 0.6643356643356644, | |
| "grad_norm": 0.03662860311910791, | |
| "kl": 0.2060546875, | |
| "learning_rate": 9.86755986011395e-05, | |
| "loss": 0.0065, | |
| "num_tokens": 4824332.0, | |
| "reward": 2.643022060394287, | |
| "reward_std": 0.04776443541049957, | |
| "rewards/check_gptzero_func": 0.3214285671710968, | |
| "rewards/check_perplexity_diff_func": 1.8928571939468384, | |
| "rewards/check_winston_local_func": 0.42873620986938477, | |
| "step": 475 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.6657342657342658, | |
| "grad_norm": 0.03126231173733144, | |
| "kl": 0.2060546875, | |
| "learning_rate": 9.866160972145634e-05, | |
| "loss": 0.0062, | |
| "step": 476 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 123.10714721679688, | |
| "epoch": 0.6671328671328671, | |
| "grad_norm": 0.0002559996362709361, | |
| "kl": 0.1357421875, | |
| "learning_rate": 9.864754835327909e-05, | |
| "loss": 0.0001, | |
| "num_tokens": 4847094.0, | |
| "reward": 2.767104387283325, | |
| "reward_std": 0.0007265785825438797, | |
| "rewards/check_gptzero_func": 0.5, | |
| "rewards/check_perplexity_diff_func": 1.7142857313156128, | |
| "rewards/check_winston_local_func": 0.5528185963630676, | |
| "step": 477 | |
| }, | |
| { | |
| "clip_ratio": 8.97343925316818e-05, | |
| "epoch": 0.6685314685314685, | |
| "grad_norm": 0.00042633622080268514, | |
| "kl": 0.1357421875, | |
| "learning_rate": 9.86334145175542e-05, | |
| "loss": 0.0001, | |
| "step": 478 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 96.67857360839844, | |
| "epoch": 0.66993006993007, | |
| "grad_norm": 0.0008693689003033054, | |
| "kl": 0.16015625, | |
| "learning_rate": 9.861920823533606e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 4866394.0, | |
| "reward": 2.5761330127716064, | |
| "reward_std": 0.0, | |
| "rewards/check_gptzero_func": 0.3571428656578064, | |
| "rewards/check_perplexity_diff_func": 1.7142857313156128, | |
| "rewards/check_winston_local_func": 0.504704475402832, | |
| "step": 479 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.6713286713286714, | |
| "grad_norm": 0.0008435837049153919, | |
| "kl": 0.16015625, | |
| "learning_rate": 9.860492952778696e-05, | |
| "loss": 0.0002, | |
| "step": 480 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 94.39286041259766, | |
| "epoch": 0.6727272727272727, | |
| "grad_norm": 3.349433978828943e-06, | |
| "kl": 0.177734375, | |
| "learning_rate": 9.859057841617709e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 4885318.0, | |
| "reward": 2.8612334728240967, | |
| "reward_std": 0.0, | |
| "rewards/check_gptzero_func": 0.3571428656578064, | |
| "rewards/check_perplexity_diff_func": 1.9285714626312256, | |
| "rewards/check_winston_local_func": 0.5755191445350647, | |
| "step": 481 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.6741258741258741, | |
| "grad_norm": 3.5095106255593373e-06, | |
| "kl": 0.177734375, | |
| "learning_rate": 9.857615492188452e-05, | |
| "loss": 0.0002, | |
| "step": 482 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 87.98214721679688, | |
| "epoch": 0.6755244755244755, | |
| "grad_norm": 0.00016582191324414645, | |
| "kl": 0.208984375, | |
| "learning_rate": 9.856165906639513e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 4903401.0, | |
| "reward": 2.797102928161621, | |
| "reward_std": 0.0008326892857439816, | |
| "rewards/check_gptzero_func": 0.3928571343421936, | |
| "rewards/check_perplexity_diff_func": 1.9285714626312256, | |
| "rewards/check_winston_local_func": 0.4756740629673004, | |
| "step": 483 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.676923076923077, | |
| "grad_norm": 0.00017592290286556547, | |
| "kl": 0.208984375, | |
| "learning_rate": 9.85470908713026e-05, | |
| "loss": 0.0002, | |
| "step": 484 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 122.14286041259766, | |
| "epoch": 0.6783216783216783, | |
| "grad_norm": 3.840262302649335e-05, | |
| "kl": 0.140625, | |
| "learning_rate": 9.853245035830834e-05, | |
| "loss": 0.0001, | |
| "num_tokens": 4925777.0, | |
| "reward": 2.80380916595459, | |
| "reward_std": 0.0, | |
| "rewards/check_gptzero_func": 0.4642857015132904, | |
| "rewards/check_perplexity_diff_func": 1.8571428060531616, | |
| "rewards/check_winston_local_func": 0.4823804199695587, | |
| "step": 485 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.6797202797202797, | |
| "grad_norm": 3.855750270963149e-05, | |
| "kl": 0.140625, | |
| "learning_rate": 9.851773754922152e-05, | |
| "loss": 0.0001, | |
| "step": 486 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 110.5714340209961, | |
| "epoch": 0.6811188811188811, | |
| "grad_norm": 1.0000521495087345e-06, | |
| "kl": 0.1572265625, | |
| "learning_rate": 9.850295246595898e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 4946515.0, | |
| "reward": 2.9591448307037354, | |
| "reward_std": 0.0, | |
| "rewards/check_gptzero_func": 0.5, | |
| "rewards/check_perplexity_diff_func": 1.9285714626312256, | |
| "rewards/check_winston_local_func": 0.5305731296539307, | |
| "step": 487 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.6825174825174826, | |
| "grad_norm": 9.579988148024946e-07, | |
| "kl": 0.1572265625, | |
| "learning_rate": 9.848809513054523e-05, | |
| "loss": 0.0002, | |
| "step": 488 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 104.67857360839844, | |
| "epoch": 0.6839160839160839, | |
| "grad_norm": 4.4290499511336513e-07, | |
| "kl": 0.1728515625, | |
| "learning_rate": 9.847316556511245e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 4966537.0, | |
| "reward": 3.0352413654327393, | |
| "reward_std": 0.0, | |
| "rewards/check_gptzero_func": 0.4642857015132904, | |
| "rewards/check_perplexity_diff_func": 2.0, | |
| "rewards/check_winston_local_func": 0.5709553956985474, | |
| "step": 489 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.6853146853146853, | |
| "grad_norm": 4.228282290104698e-07, | |
| "kl": 0.1728515625, | |
| "learning_rate": 9.845816379190036e-05, | |
| "loss": 0.0002, | |
| "step": 490 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 106.92857360839844, | |
| "epoch": 0.6867132867132867, | |
| "grad_norm": 7.161264654013381e-07, | |
| "kl": 0.1865234375, | |
| "learning_rate": 9.844308983325625e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 4986837.0, | |
| "reward": 2.8011527061462402, | |
| "reward_std": 0.0, | |
| "rewards/check_gptzero_func": 0.4642857015132904, | |
| "rewards/check_perplexity_diff_func": 1.9285714626312256, | |
| "rewards/check_winston_local_func": 0.40829533338546753, | |
| "step": 491 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.6881118881118881, | |
| "grad_norm": 6.606409864369399e-07, | |
| "kl": 0.1865234375, | |
| "learning_rate": 9.842794371163501e-05, | |
| "loss": 0.0002, | |
| "step": 492 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 127.3214340209961, | |
| "epoch": 0.6895104895104895, | |
| "grad_norm": 3.5056088482557826e-05, | |
| "kl": 0.11767578125, | |
| "learning_rate": 9.841272544959892e-05, | |
| "loss": 0.0001, | |
| "num_tokens": 5009783.0, | |
| "reward": 2.6788315773010254, | |
| "reward_std": 0.0, | |
| "rewards/check_gptzero_func": 0.5, | |
| "rewards/check_perplexity_diff_func": 1.7857142686843872, | |
| "rewards/check_winston_local_func": 0.393117219209671, | |
| "step": 493 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.6909090909090909, | |
| "grad_norm": 3.5363245519599165e-05, | |
| "kl": 0.11767578125, | |
| "learning_rate": 9.839743506981782e-05, | |
| "loss": 0.0001, | |
| "step": 494 | |
| }, | |
| { | |
| "clip_ratio": 0.0004093328316230327, | |
| "completion_length": 99.08928680419922, | |
| "epoch": 0.6923076923076923, | |
| "grad_norm": 0.0028759704706919223, | |
| "kl": 0.1650390625, | |
| "learning_rate": 9.838207259506891e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 5029238.0, | |
| "reward": 2.5216493606567383, | |
| "reward_std": 0.0011310166446492076, | |
| "rewards/check_gptzero_func": 0.3571428656578064, | |
| "rewards/check_perplexity_diff_func": 1.7142857313156128, | |
| "rewards/check_winston_local_func": 0.45022064447402954, | |
| "step": 495 | |
| }, | |
| { | |
| "clip_ratio": 0.0004093328316230327, | |
| "epoch": 0.6937062937062937, | |
| "grad_norm": 0.0035050811312865673, | |
| "kl": 0.1650390625, | |
| "learning_rate": 9.836663804823683e-05, | |
| "loss": 0.0002, | |
| "step": 496 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 124.85714721679688, | |
| "epoch": 0.6951048951048951, | |
| "grad_norm": 3.8191875557546916e-05, | |
| "kl": 0.158203125, | |
| "learning_rate": 9.835113145231356e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 5051408.0, | |
| "reward": 2.8089945316314697, | |
| "reward_std": 0.0, | |
| "rewards/check_gptzero_func": 0.4642857015132904, | |
| "rewards/check_perplexity_diff_func": 1.9285714626312256, | |
| "rewards/check_winston_local_func": 0.4161372184753418, | |
| "step": 497 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.6965034965034965, | |
| "grad_norm": 3.1854181850026e-05, | |
| "kl": 0.158203125, | |
| "learning_rate": 9.833555283039842e-05, | |
| "loss": 0.0002, | |
| "step": 498 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 115.17857360839844, | |
| "epoch": 0.6979020979020979, | |
| "grad_norm": 3.735234931957505e-07, | |
| "kl": 0.15234375, | |
| "learning_rate": 9.831990220569801e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 5072486.0, | |
| "reward": 2.7535111904144287, | |
| "reward_std": 0.0, | |
| "rewards/check_gptzero_func": 0.3571428656578064, | |
| "rewards/check_perplexity_diff_func": 1.9285714626312256, | |
| "rewards/check_winston_local_func": 0.46779707074165344, | |
| "step": 499 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.6993006993006993, | |
| "grad_norm": 3.749561048042854e-07, | |
| "kl": 0.15234375, | |
| "learning_rate": 9.83041796015262e-05, | |
| "loss": 0.0002, | |
| "step": 500 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 137.85714721679688, | |
| "epoch": 0.7006993006993008, | |
| "grad_norm": 0.004581651252064326, | |
| "kl": 0.1357421875, | |
| "learning_rate": 9.828838504130406e-05, | |
| "loss": 0.0001, | |
| "num_tokens": 24128.0, | |
| "reward": 2.5751242637634277, | |
| "reward_std": 0.050465863198041916, | |
| "rewards/check_gptzero_func": 0.3928571343421936, | |
| "rewards/check_perplexity_diff_func": 1.8214285373687744, | |
| "rewards/check_winston_local_func": 0.36083847284317017, | |
| "step": 501 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.7020979020979021, | |
| "grad_norm": 0.004623783518082634, | |
| "kl": 0.1357421875, | |
| "learning_rate": 9.827251854855991e-05, | |
| "loss": 0.0001, | |
| "step": 502 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 84.0714340209961, | |
| "epoch": 0.7034965034965035, | |
| "grad_norm": 5.4224474596128455e-06, | |
| "kl": 0.1962890625, | |
| "learning_rate": 9.825658014692914e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 41660.0, | |
| "reward": 2.881941556930542, | |
| "reward_std": 0.0, | |
| "rewards/check_gptzero_func": 0.3928571343421936, | |
| "rewards/check_perplexity_diff_func": 2.0, | |
| "rewards/check_winston_local_func": 0.48908427357673645, | |
| "step": 503 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.7048951048951049, | |
| "grad_norm": 5.774418529053615e-06, | |
| "kl": 0.1962890625, | |
| "learning_rate": 9.824056986015433e-05, | |
| "loss": 0.0002, | |
| "step": 504 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 125.76786041259766, | |
| "epoch": 0.7062937062937062, | |
| "grad_norm": 0.012190639094338367, | |
| "kl": 0.158203125, | |
| "learning_rate": 9.82244877120851e-05, | |
| "loss": -0.0, | |
| "num_tokens": 63809.0, | |
| "reward": 2.9153220653533936, | |
| "reward_std": 0.05151599273085594, | |
| "rewards/check_gptzero_func": 0.5, | |
| "rewards/check_perplexity_diff_func": 1.9642857313156128, | |
| "rewards/check_winston_local_func": 0.45103612542152405, | |
| "step": 505 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.7076923076923077, | |
| "grad_norm": 0.0008706312902749691, | |
| "kl": 0.1572265625, | |
| "learning_rate": 9.820833372667812e-05, | |
| "loss": -0.0001, | |
| "step": 506 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 106.67857360839844, | |
| "epoch": 0.7090909090909091, | |
| "grad_norm": 7.4245877265908374e-06, | |
| "kl": 0.171875, | |
| "learning_rate": 9.819210792799712e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 83877.0, | |
| "reward": 2.742607831954956, | |
| "reward_std": 0.0, | |
| "rewards/check_gptzero_func": 0.3571428656578064, | |
| "rewards/check_perplexity_diff_func": 2.0, | |
| "rewards/check_winston_local_func": 0.3854646682739258, | |
| "step": 507 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.7104895104895105, | |
| "grad_norm": 5.9863315776137925e-06, | |
| "kl": 0.171875, | |
| "learning_rate": 9.817581034021272e-05, | |
| "loss": 0.0002, | |
| "step": 508 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 112.21429443359375, | |
| "epoch": 0.7118881118881119, | |
| "grad_norm": 1.5219106062929897e-05, | |
| "kl": 0.154296875, | |
| "learning_rate": 9.815944098760257e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 105225.0, | |
| "reward": 2.886364698410034, | |
| "reward_std": 0.0, | |
| "rewards/check_gptzero_func": 0.3928571343421936, | |
| "rewards/check_perplexity_diff_func": 1.9285714626312256, | |
| "rewards/check_winston_local_func": 0.5649359226226807, | |
| "step": 509 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.7132867132867133, | |
| "grad_norm": 3.826854189322478e-05, | |
| "kl": 0.154296875, | |
| "learning_rate": 9.814299989455117e-05, | |
| "loss": 0.0002, | |
| "step": 510 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 93.50000762939453, | |
| "epoch": 0.7146853146853147, | |
| "grad_norm": 3.6634275067756628e-06, | |
| "kl": 0.18359375, | |
| "learning_rate": 9.81264870855499e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 124237.0, | |
| "reward": 2.9750092029571533, | |
| "reward_std": 0.0, | |
| "rewards/check_gptzero_func": 0.5, | |
| "rewards/check_perplexity_diff_func": 1.9285714626312256, | |
| "rewards/check_winston_local_func": 0.5464377403259277, | |
| "step": 511 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.7160839160839161, | |
| "grad_norm": 3.3120330925968346e-06, | |
| "kl": 0.18359375, | |
| "learning_rate": 9.810990258519699e-05, | |
| "loss": 0.0002, | |
| "step": 512 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 114.5714340209961, | |
| "epoch": 0.7174825174825175, | |
| "grad_norm": 1.5887563588226216e-07, | |
| "kl": 0.1474609375, | |
| "learning_rate": 9.809324641819741e-05, | |
| "loss": 0.0001, | |
| "num_tokens": 145557.0, | |
| "reward": 2.767850637435913, | |
| "reward_std": 0.0, | |
| "rewards/check_gptzero_func": 0.3571428656578064, | |
| "rewards/check_perplexity_diff_func": 2.0, | |
| "rewards/check_winston_local_func": 0.4107076823711395, | |
| "step": 513 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.7188811188811188, | |
| "grad_norm": 1.5359302478120885e-07, | |
| "kl": 0.1474609375, | |
| "learning_rate": 9.807651860936297e-05, | |
| "loss": 0.0001, | |
| "step": 514 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 110.3214340209961, | |
| "epoch": 0.7202797202797203, | |
| "grad_norm": 1.620447571811333e-07, | |
| "kl": 0.1630859375, | |
| "learning_rate": 9.805971918361214e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 165997.0, | |
| "reward": 2.9055941104888916, | |
| "reward_std": 0.0, | |
| "rewards/check_gptzero_func": 0.3928571343421936, | |
| "rewards/check_perplexity_diff_func": 2.0, | |
| "rewards/check_winston_local_func": 0.5127367377281189, | |
| "step": 515 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.7216783216783217, | |
| "grad_norm": 1.472040658632873e-07, | |
| "kl": 0.1630859375, | |
| "learning_rate": 9.804284816597008e-05, | |
| "loss": 0.0002, | |
| "step": 516 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 91.67857360839844, | |
| "epoch": 0.7230769230769231, | |
| "grad_norm": 2.4623250489986615e-06, | |
| "kl": 0.142578125, | |
| "learning_rate": 9.802590558156862e-05, | |
| "loss": 0.0001, | |
| "num_tokens": 184617.0, | |
| "reward": 2.853912830352783, | |
| "reward_std": 0.0, | |
| "rewards/check_gptzero_func": 0.3928571343421936, | |
| "rewards/check_perplexity_diff_func": 2.0, | |
| "rewards/check_winston_local_func": 0.46105554699897766, | |
| "step": 517 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.7244755244755244, | |
| "grad_norm": 2.0559549189734124e-06, | |
| "kl": 0.142578125, | |
| "learning_rate": 9.800889145564617e-05, | |
| "loss": 0.0001, | |
| "step": 518 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 127.21429443359375, | |
| "epoch": 0.7258741258741259, | |
| "grad_norm": 2.3530034340292295e-07, | |
| "kl": 0.1552734375, | |
| "learning_rate": 9.799180581354774e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 207223.0, | |
| "reward": 2.7364511489868164, | |
| "reward_std": 0.0, | |
| "rewards/check_gptzero_func": 0.3571428656578064, | |
| "rewards/check_perplexity_diff_func": 1.9285714626312256, | |
| "rewards/check_winston_local_func": 0.45073673129081726, | |
| "step": 519 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.7272727272727273, | |
| "grad_norm": 2.2322343800833895e-07, | |
| "kl": 0.1552734375, | |
| "learning_rate": 9.797464868072488e-05, | |
| "loss": 0.0002, | |
| "step": 520 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 104.5714340209961, | |
| "epoch": 0.7286713286713287, | |
| "grad_norm": 2.968346809067501e-07, | |
| "kl": 0.177734375, | |
| "learning_rate": 9.795742008273558e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 227059.0, | |
| "reward": 2.6932995319366455, | |
| "reward_std": 0.0, | |
| "rewards/check_gptzero_func": 0.3571428656578064, | |
| "rewards/check_perplexity_diff_func": 2.0, | |
| "rewards/check_winston_local_func": 0.3361565172672272, | |
| "step": 521 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.73006993006993, | |
| "grad_norm": 2.852980195095417e-07, | |
| "kl": 0.177734375, | |
| "learning_rate": 9.794012004524434e-05, | |
| "loss": 0.0002, | |
| "step": 522 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 105.42857360839844, | |
| "epoch": 0.7314685314685314, | |
| "grad_norm": 3.3796985539550523e-06, | |
| "kl": 0.1640625, | |
| "learning_rate": 9.792274859402205e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 246949.0, | |
| "reward": 2.6483211517333984, | |
| "reward_std": 0.0, | |
| "rewards/check_gptzero_func": 0.3214285671710968, | |
| "rewards/check_perplexity_diff_func": 1.9285714626312256, | |
| "rewards/check_winston_local_func": 0.3983212113380432, | |
| "step": 523 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.7328671328671329, | |
| "grad_norm": 3.1476330793325015e-06, | |
| "kl": 0.1640625, | |
| "learning_rate": 9.790530575494603e-05, | |
| "loss": 0.0002, | |
| "step": 524 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 125.03572082519531, | |
| "epoch": 0.7342657342657343, | |
| "grad_norm": 0.0060337949589422705, | |
| "kl": 0.16796875, | |
| "learning_rate": 9.788779155399987e-05, | |
| "loss": -0.0001, | |
| "num_tokens": 269441.0, | |
| "reward": 2.768662214279175, | |
| "reward_std": 0.049707408994436264, | |
| "rewards/check_gptzero_func": 0.3928571343421936, | |
| "rewards/check_perplexity_diff_func": 1.9642857313156128, | |
| "rewards/check_winston_local_func": 0.4115191400051117, | |
| "step": 525 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.7356643356643356, | |
| "grad_norm": 0.0068588182961818464, | |
| "kl": 0.16796875, | |
| "learning_rate": 9.787020601727352e-05, | |
| "loss": -0.0, | |
| "step": 526 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 78.39286041259766, | |
| "epoch": 0.737062937062937, | |
| "grad_norm": 1.3153821115529096e-05, | |
| "kl": 0.2353515625, | |
| "learning_rate": 9.785254917096318e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 286345.0, | |
| "reward": 3.074557304382324, | |
| "reward_std": 0.0, | |
| "rewards/check_gptzero_func": 0.5, | |
| "rewards/check_perplexity_diff_func": 2.0, | |
| "rewards/check_winston_local_func": 0.5745573043823242, | |
| "step": 527 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.7384615384615385, | |
| "grad_norm": 1.256427796379324e-05, | |
| "kl": 0.2353515625, | |
| "learning_rate": 9.783482104137127e-05, | |
| "loss": 0.0002, | |
| "step": 528 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 124.39286041259766, | |
| "epoch": 0.7398601398601399, | |
| "grad_norm": 1.3517164643713064e-05, | |
| "kl": 0.12353515625, | |
| "learning_rate": 9.781702165490639e-05, | |
| "loss": 0.0001, | |
| "num_tokens": 309195.0, | |
| "reward": 2.4804210662841797, | |
| "reward_std": 0.0, | |
| "rewards/check_gptzero_func": 0.3928571343421936, | |
| "rewards/check_perplexity_diff_func": 1.6428571939468384, | |
| "rewards/check_winston_local_func": 0.44470664858818054, | |
| "step": 529 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.7412587412587412, | |
| "grad_norm": 1.3131407312669822e-05, | |
| "kl": 0.12353515625, | |
| "learning_rate": 9.779915103808328e-05, | |
| "loss": 0.0001, | |
| "step": 530 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 111.53572082519531, | |
| "epoch": 0.7426573426573426, | |
| "grad_norm": 7.637416736333039e-07, | |
| "kl": 0.15625, | |
| "learning_rate": 9.778120921752285e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 330505.0, | |
| "reward": 3.070021390914917, | |
| "reward_std": 0.0, | |
| "rewards/check_gptzero_func": 0.5357142686843872, | |
| "rewards/check_perplexity_diff_func": 2.0, | |
| "rewards/check_winston_local_func": 0.5343068838119507, | |
| "step": 531 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.7440559440559441, | |
| "grad_norm": 7.316834780823122e-07, | |
| "kl": 0.15625, | |
| "learning_rate": 9.776319621995201e-05, | |
| "loss": 0.0002, | |
| "step": 532 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 106.35714721679688, | |
| "epoch": 0.7454545454545455, | |
| "grad_norm": 3.663080215251399e-07, | |
| "kl": 0.193359375, | |
| "learning_rate": 9.77451120722037e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 350813.0, | |
| "reward": 2.63775634765625, | |
| "reward_std": 0.0, | |
| "rewards/check_gptzero_func": 0.3214285671710968, | |
| "rewards/check_perplexity_diff_func": 1.9285714626312256, | |
| "rewards/check_winston_local_func": 0.3877563774585724, | |
| "step": 533 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.7468531468531469, | |
| "grad_norm": 3.8678973306591443e-07, | |
| "kl": 0.193359375, | |
| "learning_rate": 9.77269568012169e-05, | |
| "loss": 0.0002, | |
| "step": 534 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 106.67857360839844, | |
| "epoch": 0.7482517482517482, | |
| "grad_norm": 6.123173544504598e-06, | |
| "kl": 0.2216796875, | |
| "learning_rate": 9.770873043403648e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 371585.0, | |
| "reward": 2.874614953994751, | |
| "reward_std": 0.0, | |
| "rewards/check_gptzero_func": 0.4285714328289032, | |
| "rewards/check_perplexity_diff_func": 1.9285714626312256, | |
| "rewards/check_winston_local_func": 0.5174719095230103, | |
| "step": 535 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.7496503496503496, | |
| "grad_norm": 6.141198966103098e-06, | |
| "kl": 0.2216796875, | |
| "learning_rate": 9.769043299781327e-05, | |
| "loss": 0.0002, | |
| "step": 536 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 104.71428680419922, | |
| "epoch": 0.7510489510489511, | |
| "grad_norm": 6.024782598667822e-07, | |
| "kl": 0.212890625, | |
| "learning_rate": 9.767206451980394e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 391201.0, | |
| "reward": 2.666369676589966, | |
| "reward_std": 0.0, | |
| "rewards/check_gptzero_func": 0.25, | |
| "rewards/check_perplexity_diff_func": 2.0, | |
| "rewards/check_winston_local_func": 0.4163695275783539, | |
| "step": 537 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.7524475524475525, | |
| "grad_norm": 5.651815000751003e-07, | |
| "kl": 0.212890625, | |
| "learning_rate": 9.765362502737097e-05, | |
| "loss": 0.0002, | |
| "step": 538 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 107.21428680419922, | |
| "epoch": 0.7538461538461538, | |
| "grad_norm": 4.5359076717402366e-07, | |
| "kl": 0.19140625, | |
| "learning_rate": 9.763511454798268e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 412087.0, | |
| "reward": 2.660142660140991, | |
| "reward_std": 0.0, | |
| "rewards/check_gptzero_func": 0.25, | |
| "rewards/check_perplexity_diff_func": 1.9285714626312256, | |
| "rewards/check_winston_local_func": 0.4815710484981537, | |
| "step": 539 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.7552447552447552, | |
| "grad_norm": 7.72545386297213e-07, | |
| "kl": 0.19140625, | |
| "learning_rate": 9.761653310921307e-05, | |
| "loss": 0.0002, | |
| "step": 540 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 133.85714721679688, | |
| "epoch": 0.7566433566433567, | |
| "grad_norm": 8.430058973039463e-08, | |
| "kl": 0.146484375, | |
| "learning_rate": 9.759788073874189e-05, | |
| "loss": 0.0001, | |
| "num_tokens": 435535.0, | |
| "reward": 2.720244884490967, | |
| "reward_std": 0.0, | |
| "rewards/check_gptzero_func": 0.4285714328289032, | |
| "rewards/check_perplexity_diff_func": 1.9285714626312256, | |
| "rewards/check_winston_local_func": 0.3631021976470947, | |
| "step": 541 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.7580419580419581, | |
| "grad_norm": 8.327469845321351e-08, | |
| "kl": 0.146484375, | |
| "learning_rate": 9.757915746435453e-05, | |
| "loss": 0.0001, | |
| "step": 542 | |
| }, | |
| { | |
| "clip_ratio": 0.0002040816325461492, | |
| "completion_length": 102.89286041259766, | |
| "epoch": 0.7594405594405594, | |
| "grad_norm": 0.000356312443076848, | |
| "kl": 0.203125, | |
| "learning_rate": 9.756036331394202e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 455137.0, | |
| "reward": 2.6731011867523193, | |
| "reward_std": 1.646135569899343e-05, | |
| "rewards/check_gptzero_func": 0.3928571343421936, | |
| "rewards/check_perplexity_diff_func": 1.9285714626312256, | |
| "rewards/check_winston_local_func": 0.351672500371933, | |
| "step": 543 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.7608391608391608, | |
| "grad_norm": 0.00031516713281171804, | |
| "kl": 0.203125, | |
| "learning_rate": 9.754149831550098e-05, | |
| "loss": 0.0002, | |
| "step": 544 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 89.35714721679688, | |
| "epoch": 0.7622377622377622, | |
| "grad_norm": 2.70311309473727e-07, | |
| "kl": 0.2080078125, | |
| "learning_rate": 9.752256249713351e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 473297.0, | |
| "reward": 2.869070291519165, | |
| "reward_std": 0.0, | |
| "rewards/check_gptzero_func": 0.3571428656578064, | |
| "rewards/check_perplexity_diff_func": 2.0, | |
| "rewards/check_winston_local_func": 0.5119272470474243, | |
| "step": 545 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.7636363636363637, | |
| "grad_norm": 2.830814999627113e-07, | |
| "kl": 0.2080078125, | |
| "learning_rate": 9.750355588704727e-05, | |
| "loss": 0.0002, | |
| "step": 546 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 132.44644165039062, | |
| "epoch": 0.765034965034965, | |
| "grad_norm": 0.0009688545197921594, | |
| "kl": 0.1806640625, | |
| "learning_rate": 9.748447851355535e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 496926.0, | |
| "reward": 2.7388832569122314, | |
| "reward_std": 0.0023702967446297407, | |
| "rewards/check_gptzero_func": 0.5357142686843872, | |
| "rewards/check_perplexity_diff_func": 1.7142857313156128, | |
| "rewards/check_winston_local_func": 0.4888834059238434, | |
| "step": 547 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.7664335664335664, | |
| "grad_norm": 0.0016734864168510978, | |
| "kl": 0.1806640625, | |
| "learning_rate": 9.746533040507624e-05, | |
| "loss": 0.0002, | |
| "step": 548 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 97.03572082519531, | |
| "epoch": 0.7678321678321678, | |
| "grad_norm": 1.9163803223436476e-06, | |
| "kl": 0.177734375, | |
| "learning_rate": 9.744611159013381e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 516080.0, | |
| "reward": 2.9889118671417236, | |
| "reward_std": 0.0, | |
| "rewards/check_gptzero_func": 0.5, | |
| "rewards/check_perplexity_diff_func": 2.0, | |
| "rewards/check_winston_local_func": 0.48891177773475647, | |
| "step": 549 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.7692307692307693, | |
| "grad_norm": 1.2761616262245509e-06, | |
| "kl": 0.177734375, | |
| "learning_rate": 9.742682209735727e-05, | |
| "loss": 0.0002, | |
| "step": 550 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 92.39286041259766, | |
| "epoch": 0.7706293706293706, | |
| "grad_norm": 1.896094459987994e-07, | |
| "kl": 0.1923828125, | |
| "learning_rate": 9.740746195548112e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 534554.0, | |
| "reward": 2.943678855895996, | |
| "reward_std": 0.0, | |
| "rewards/check_gptzero_func": 0.4285714328289032, | |
| "rewards/check_perplexity_diff_func": 2.0, | |
| "rewards/check_winston_local_func": 0.5151072144508362, | |
| "step": 551 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.772027972027972, | |
| "grad_norm": 2.1137044966091938e-07, | |
| "kl": 0.1923828125, | |
| "learning_rate": 9.738803119334506e-05, | |
| "loss": 0.0002, | |
| "step": 552 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 103.21428680419922, | |
| "epoch": 0.7734265734265734, | |
| "grad_norm": 4.705318347406964e-07, | |
| "kl": 0.1826171875, | |
| "learning_rate": 9.736852983989404e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 554220.0, | |
| "reward": 2.9958415031433105, | |
| "reward_std": 0.0, | |
| "rewards/check_gptzero_func": 0.5, | |
| "rewards/check_perplexity_diff_func": 2.0, | |
| "rewards/check_winston_local_func": 0.4958415627479553, | |
| "step": 553 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.7748251748251749, | |
| "grad_norm": 5.24917119837944e-07, | |
| "kl": 0.1826171875, | |
| "learning_rate": 9.734895792417811e-05, | |
| "loss": 0.0002, | |
| "step": 554 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 102.53572082519531, | |
| "epoch": 0.7762237762237763, | |
| "grad_norm": 0.0005595837692618245, | |
| "kl": 0.1611328125, | |
| "learning_rate": 9.73293154753525e-05, | |
| "loss": -0.0001, | |
| "num_tokens": 574520.0, | |
| "reward": 2.660358190536499, | |
| "reward_std": 0.05061452463269234, | |
| "rewards/check_gptzero_func": 0.2857142984867096, | |
| "rewards/check_perplexity_diff_func": 1.75, | |
| "rewards/check_winston_local_func": 0.6246438026428223, | |
| "step": 555 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.7776223776223776, | |
| "grad_norm": 0.0005476655329061088, | |
| "kl": 0.1611328125, | |
| "learning_rate": 9.730960252267743e-05, | |
| "loss": -0.0001, | |
| "step": 556 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 110.28572082519531, | |
| "epoch": 0.779020979020979, | |
| "grad_norm": 1.9994241055330475e-07, | |
| "kl": 0.162109375, | |
| "learning_rate": 9.728981909551824e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 595056.0, | |
| "reward": 2.7659287452697754, | |
| "reward_std": 0.0, | |
| "rewards/check_gptzero_func": 0.3571428656578064, | |
| "rewards/check_perplexity_diff_func": 2.0, | |
| "rewards/check_winston_local_func": 0.40878555178642273, | |
| "step": 557 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.7804195804195804, | |
| "grad_norm": 2.102720872162732e-07, | |
| "kl": 0.162109375, | |
| "learning_rate": 9.726996522334516e-05, | |
| "loss": 0.0002, | |
| "step": 558 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 116.3214340209961, | |
| "epoch": 0.7818181818181819, | |
| "grad_norm": 1.4418577967265551e-06, | |
| "kl": 0.197265625, | |
| "learning_rate": 9.725004093573342e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 616218.0, | |
| "reward": 2.90523099899292, | |
| "reward_std": 0.0, | |
| "rewards/check_gptzero_func": 0.4285714328289032, | |
| "rewards/check_perplexity_diff_func": 2.0, | |
| "rewards/check_winston_local_func": 0.4766596257686615, | |
| "step": 559 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.7832167832167832, | |
| "grad_norm": 1.4761501812795077e-06, | |
| "kl": 0.197265625, | |
| "learning_rate": 9.723004626236314e-05, | |
| "loss": 0.0002, | |
| "step": 560 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 92.28572082519531, | |
| "epoch": 0.7846153846153846, | |
| "grad_norm": 6.640556191873379e-07, | |
| "kl": 0.1904296875, | |
| "learning_rate": 9.720998123301923e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 634450.0, | |
| "reward": 2.910466432571411, | |
| "reward_std": 0.0, | |
| "rewards/check_gptzero_func": 0.4642857015132904, | |
| "rewards/check_perplexity_diff_func": 2.0, | |
| "rewards/check_winston_local_func": 0.4461804926395416, | |
| "step": 561 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.786013986013986, | |
| "grad_norm": 6.785626739636823e-07, | |
| "kl": 0.1904296875, | |
| "learning_rate": 9.718984587759148e-05, | |
| "loss": 0.0002, | |
| "step": 562 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 97.67857360839844, | |
| "epoch": 0.7874125874125875, | |
| "grad_norm": 0.0002364011375932314, | |
| "kl": 0.1953125, | |
| "learning_rate": 9.71696402260744e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 653730.0, | |
| "reward": 3.09153151512146, | |
| "reward_std": 4.5663102355320007e-05, | |
| "rewards/check_gptzero_func": 0.5714285969734192, | |
| "rewards/check_perplexity_diff_func": 2.0, | |
| "rewards/check_winston_local_func": 0.5201030373573303, | |
| "step": 563 | |
| }, | |
| { | |
| "clip_ratio": 0.00014228800137061626, | |
| "epoch": 0.7888111888111888, | |
| "grad_norm": 0.00020388090420570156, | |
| "kl": 0.1953125, | |
| "learning_rate": 9.714936430856723e-05, | |
| "loss": 0.0002, | |
| "step": 564 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 120.66072082519531, | |
| "epoch": 0.7902097902097902, | |
| "grad_norm": 8.089432221384178e-05, | |
| "kl": 0.1357421875, | |
| "learning_rate": 9.712901815527386e-05, | |
| "loss": 0.0001, | |
| "num_tokens": 675657.0, | |
| "reward": 2.674098253250122, | |
| "reward_std": 0.0001108883589040488, | |
| "rewards/check_gptzero_func": 0.3571428656578064, | |
| "rewards/check_perplexity_diff_func": 1.9285714626312256, | |
| "rewards/check_winston_local_func": 0.38838380575180054, | |
| "step": 565 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.7916083916083916, | |
| "grad_norm": 8.889658368646956e-05, | |
| "kl": 0.1357421875, | |
| "learning_rate": 9.710860179650287e-05, | |
| "loss": 0.0001, | |
| "step": 566 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 107.3214340209961, | |
| "epoch": 0.793006993006993, | |
| "grad_norm": 0.0001037228042730343, | |
| "kl": 0.17578125, | |
| "learning_rate": 9.70881152626673e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 695807.0, | |
| "reward": 2.615325927734375, | |
| "reward_std": 0.0, | |
| "rewards/check_gptzero_func": 0.25, | |
| "rewards/check_perplexity_diff_func": 1.9285714626312256, | |
| "rewards/check_winston_local_func": 0.4367544949054718, | |
| "step": 567 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.7944055944055944, | |
| "grad_norm": 9.62410190276379e-05, | |
| "kl": 0.17578125, | |
| "learning_rate": 9.706755858428486e-05, | |
| "loss": 0.0002, | |
| "step": 568 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 128.3928680419922, | |
| "epoch": 0.7958041958041958, | |
| "grad_norm": 3.2052372287941215e-05, | |
| "kl": 0.12890625, | |
| "learning_rate": 9.704693179197767e-05, | |
| "loss": 0.0001, | |
| "num_tokens": 719201.0, | |
| "reward": 2.616921901702881, | |
| "reward_std": 0.0, | |
| "rewards/check_gptzero_func": 0.4285714328289032, | |
| "rewards/check_perplexity_diff_func": 1.7857142686843872, | |
| "rewards/check_winston_local_func": 0.40263620018959045, | |
| "step": 569 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.7972027972027972, | |
| "grad_norm": 3.1589569913845654e-05, | |
| "kl": 0.12890625, | |
| "learning_rate": 9.702623491647233e-05, | |
| "loss": 0.0001, | |
| "step": 570 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 97.3214340209961, | |
| "epoch": 0.7986013986013986, | |
| "grad_norm": 2.966365002312992e-07, | |
| "kl": 0.1572265625, | |
| "learning_rate": 9.70054679885998e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 738947.0, | |
| "reward": 2.835333824157715, | |
| "reward_std": 0.0, | |
| "rewards/check_gptzero_func": 0.4285714328289032, | |
| "rewards/check_perplexity_diff_func": 1.9285714626312256, | |
| "rewards/check_winston_local_func": 0.47819074988365173, | |
| "step": 571 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.8, | |
| "grad_norm": 4.1573650424054196e-07, | |
| "kl": 0.1572265625, | |
| "learning_rate": 9.698463103929542e-05, | |
| "loss": 0.0002, | |
| "step": 572 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 105.78572082519531, | |
| "epoch": 0.8013986013986014, | |
| "grad_norm": 0.000588551803293507, | |
| "kl": 0.1611328125, | |
| "learning_rate": 9.696372409959886e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 759359.0, | |
| "reward": 2.967468738555908, | |
| "reward_std": 0.0020603849552571774, | |
| "rewards/check_gptzero_func": 0.5, | |
| "rewards/check_perplexity_diff_func": 1.9285714626312256, | |
| "rewards/check_winston_local_func": 0.5388973355293274, | |
| "step": 573 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.8027972027972028, | |
| "grad_norm": 0.0006215594211925717, | |
| "kl": 0.1611328125, | |
| "learning_rate": 9.694274720065399e-05, | |
| "loss": 0.0002, | |
| "step": 574 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 97.14286041259766, | |
| "epoch": 0.8041958041958042, | |
| "grad_norm": 3.3967306751168834e-06, | |
| "kl": 0.17578125, | |
| "learning_rate": 9.692170037370898e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 778637.0, | |
| "reward": 2.9381115436553955, | |
| "reward_std": 0.0, | |
| "rewards/check_gptzero_func": 0.4285714328289032, | |
| "rewards/check_perplexity_diff_func": 2.0, | |
| "rewards/check_winston_local_func": 0.509539783000946, | |
| "step": 575 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.8055944055944056, | |
| "grad_norm": 3.2036118908588705e-06, | |
| "kl": 0.17578125, | |
| "learning_rate": 9.690058365011607e-05, | |
| "loss": 0.0002, | |
| "step": 576 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 102.64286041259766, | |
| "epoch": 0.806993006993007, | |
| "grad_norm": 8.125129903430204e-06, | |
| "kl": 0.1630859375, | |
| "learning_rate": 9.687939706133168e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 798473.0, | |
| "reward": 2.781018018722534, | |
| "reward_std": 0.025253813713788986, | |
| "rewards/check_gptzero_func": 0.4107142984867096, | |
| "rewards/check_perplexity_diff_func": 1.9285714626312256, | |
| "rewards/check_winston_local_func": 0.4417320787906647, | |
| "step": 577 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.8083916083916084, | |
| "grad_norm": 7.98774787404577e-06, | |
| "kl": 0.1630859375, | |
| "learning_rate": 9.685814063891631e-05, | |
| "loss": 0.0002, | |
| "step": 578 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 126.85714721679688, | |
| "epoch": 0.8097902097902098, | |
| "grad_norm": 3.679599498892351e-07, | |
| "kl": 0.138671875, | |
| "learning_rate": 9.683681441453445e-05, | |
| "loss": 0.0001, | |
| "num_tokens": 821081.0, | |
| "reward": 2.8629186153411865, | |
| "reward_std": 0.0, | |
| "rewards/check_gptzero_func": 0.4285714328289032, | |
| "rewards/check_perplexity_diff_func": 2.0, | |
| "rewards/check_winston_local_func": 0.43434715270996094, | |
| "step": 579 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.8111888111888111, | |
| "grad_norm": 3.93663211553172e-07, | |
| "kl": 0.138671875, | |
| "learning_rate": 9.681541841995461e-05, | |
| "loss": 0.0001, | |
| "step": 580 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 106.00000762939453, | |
| "epoch": 0.8125874125874126, | |
| "grad_norm": 8.022029708518222e-07, | |
| "kl": 0.162109375, | |
| "learning_rate": 9.67939526870492e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 841529.0, | |
| "reward": 2.8752760887145996, | |
| "reward_std": 0.0, | |
| "rewards/check_gptzero_func": 0.3928571343421936, | |
| "rewards/check_perplexity_diff_func": 1.9285714626312256, | |
| "rewards/check_winston_local_func": 0.55384761095047, | |
| "step": 581 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.813986013986014, | |
| "grad_norm": 8.501682817147847e-07, | |
| "kl": 0.162109375, | |
| "learning_rate": 9.677241724779453e-05, | |
| "loss": 0.0002, | |
| "step": 582 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 109.85714721679688, | |
| "epoch": 0.8153846153846154, | |
| "grad_norm": 1.7930012533014129e-06, | |
| "kl": 0.1689453125, | |
| "learning_rate": 9.675081213427076e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 862213.0, | |
| "reward": 2.9139628410339355, | |
| "reward_std": 0.0, | |
| "rewards/check_gptzero_func": 0.4285714328289032, | |
| "rewards/check_perplexity_diff_func": 2.0, | |
| "rewards/check_winston_local_func": 0.48539119958877563, | |
| "step": 583 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.8167832167832167, | |
| "grad_norm": 1.6519465345892093e-06, | |
| "kl": 0.1689453125, | |
| "learning_rate": 9.672913737866179e-05, | |
| "loss": 0.0002, | |
| "step": 584 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 98.66072082519531, | |
| "epoch": 0.8181818181818182, | |
| "grad_norm": 0.002787236207669909, | |
| "kl": 0.25, | |
| "learning_rate": 9.670739301325534e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 881842.0, | |
| "reward": 2.6230576038360596, | |
| "reward_std": 0.006216124631464481, | |
| "rewards/check_gptzero_func": 0.3571428656578064, | |
| "rewards/check_perplexity_diff_func": 1.7857142686843872, | |
| "rewards/check_winston_local_func": 0.48020049929618835, | |
| "step": 585 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.8195804195804196, | |
| "grad_norm": 0.002885109231591189, | |
| "kl": 0.2490234375, | |
| "learning_rate": 9.668557907044276e-05, | |
| "loss": 0.0002, | |
| "step": 586 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 79.14286041259766, | |
| "epoch": 0.820979020979021, | |
| "grad_norm": 4.462250819797621e-06, | |
| "kl": 0.1923828125, | |
| "learning_rate": 9.666369558271909e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 898716.0, | |
| "reward": 2.900649309158325, | |
| "reward_std": 0.0, | |
| "rewards/check_gptzero_func": 0.3928571343421936, | |
| "rewards/check_perplexity_diff_func": 2.0, | |
| "rewards/check_winston_local_func": 0.5077921152114868, | |
| "step": 587 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.8223776223776224, | |
| "grad_norm": 4.488075066816524e-06, | |
| "kl": 0.1923828125, | |
| "learning_rate": 9.66417425826829e-05, | |
| "loss": 0.0002, | |
| "step": 588 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 112.03572082519531, | |
| "epoch": 0.8237762237762237, | |
| "grad_norm": 1.3821264354883407e-07, | |
| "kl": 0.14453125, | |
| "learning_rate": 9.661972010303641e-05, | |
| "loss": 0.0001, | |
| "num_tokens": 919636.0, | |
| "reward": 2.7668612003326416, | |
| "reward_std": 0.0, | |
| "rewards/check_gptzero_func": 0.4642857015132904, | |
| "rewards/check_perplexity_diff_func": 1.8571428060531616, | |
| "rewards/check_winston_local_func": 0.4454323649406433, | |
| "step": 589 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.8251748251748252, | |
| "grad_norm": 1.4174011116816794e-07, | |
| "kl": 0.14453125, | |
| "learning_rate": 9.659762817658524e-05, | |
| "loss": 0.0001, | |
| "step": 590 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 107.92857360839844, | |
| "epoch": 0.8265734265734266, | |
| "grad_norm": 8.773734381605369e-07, | |
| "kl": 0.1572265625, | |
| "learning_rate": 9.65754668362385e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 940096.0, | |
| "reward": 2.8289589881896973, | |
| "reward_std": 0.0, | |
| "rewards/check_gptzero_func": 0.4642857015132904, | |
| "rewards/check_perplexity_diff_func": 1.9285714626312256, | |
| "rewards/check_winston_local_func": 0.4361015856266022, | |
| "step": 591 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.827972027972028, | |
| "grad_norm": 1.0023469469060766e-06, | |
| "kl": 0.1572265625, | |
| "learning_rate": 9.655323611500875e-05, | |
| "loss": 0.0002, | |
| "step": 592 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 113.85714721679688, | |
| "epoch": 0.8293706293706293, | |
| "grad_norm": 6.1541718025779055e-06, | |
| "kl": 0.166015625, | |
| "learning_rate": 9.653093604601183e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 961340.0, | |
| "reward": 2.9606730937957764, | |
| "reward_std": 0.0, | |
| "rewards/check_gptzero_func": 0.5357142686843872, | |
| "rewards/check_perplexity_diff_func": 2.0, | |
| "rewards/check_winston_local_func": 0.4249587059020996, | |
| "step": 593 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.8307692307692308, | |
| "grad_norm": 6.512813773208274e-06, | |
| "kl": 0.166015625, | |
| "learning_rate": 9.650856666246693e-05, | |
| "loss": 0.0002, | |
| "step": 594 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 91.60714721679688, | |
| "epoch": 0.8321678321678322, | |
| "grad_norm": 3.2190340652283214e-06, | |
| "kl": 0.1767578125, | |
| "learning_rate": 9.648612799769644e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 979916.0, | |
| "reward": 3.0096070766448975, | |
| "reward_std": 0.0, | |
| "rewards/check_gptzero_func": 0.4285714328289032, | |
| "rewards/check_perplexity_diff_func": 2.0, | |
| "rewards/check_winston_local_func": 0.5810357332229614, | |
| "step": 595 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.8335664335664336, | |
| "grad_norm": 3.1137752081162388e-06, | |
| "kl": 0.1767578125, | |
| "learning_rate": 9.646362008512602e-05, | |
| "loss": 0.0002, | |
| "step": 596 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 106.53572082519531, | |
| "epoch": 0.8349650349650349, | |
| "grad_norm": 6.812051267845749e-06, | |
| "kl": 0.185546875, | |
| "learning_rate": 9.644104295828447e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 1000300.0, | |
| "reward": 2.7739078998565674, | |
| "reward_std": 0.0, | |
| "rewards/check_gptzero_func": 0.4285714328289032, | |
| "rewards/check_perplexity_diff_func": 1.9285714626312256, | |
| "rewards/check_winston_local_func": 0.4167649447917938, | |
| "step": 597 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.8363636363636363, | |
| "grad_norm": 2.398683493432653e-06, | |
| "kl": 0.185546875, | |
| "learning_rate": 9.641839665080363e-05, | |
| "loss": 0.0002, | |
| "step": 598 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 107.41072082519531, | |
| "epoch": 0.8377622377622378, | |
| "grad_norm": 0.0008785473557125962, | |
| "kl": 0.1796875, | |
| "learning_rate": 9.63956811964185e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 1021105.0, | |
| "reward": 2.91171932220459, | |
| "reward_std": 0.00400555832311511, | |
| "rewards/check_gptzero_func": 0.5357142686843872, | |
| "rewards/check_perplexity_diff_func": 1.8571428060531616, | |
| "rewards/check_winston_local_func": 0.518862247467041, | |
| "step": 599 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "epoch": 0.8391608391608392, | |
| "grad_norm": 0.000892784560673435, | |
| "kl": 0.1796875, | |
| "learning_rate": 9.6372896628967e-05, | |
| "loss": 0.0002, | |
| "step": 600 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 2860, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 4, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |