{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.2920634920634921, "eval_steps": 500, "global_step": 4600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "completion_length": 129.71429443359375, "epoch": 6.349206349206349e-05, "grad_norm": 0.0, "kl": 0.0, "learning_rate": 1.1337868480725624e-10, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1 }, { "completion_length": 198.2857208251953, "epoch": 0.00012698412698412698, "grad_norm": 0.0, "kl": 0.0, "learning_rate": 2.2675736961451248e-10, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2 }, { "completion_length": 169.42857360839844, "epoch": 0.00019047619047619048, "grad_norm": 0.0, "kl": 0.0, "learning_rate": 3.401360544217687e-10, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3 }, { "completion_length": 213.85714721679688, "epoch": 0.00025396825396825396, "grad_norm": 0.0, "kl": 0.0, "learning_rate": 4.5351473922902496e-10, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 4 }, { "completion_length": 163.0, "epoch": 0.00031746031746031746, "grad_norm": 0.0, "kl": 0.0, "learning_rate": 5.668934240362812e-10, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 5 }, { "completion_length": 176.6428680419922, "epoch": 0.00038095238095238096, "grad_norm": 0.0, "kl": 0.0, "learning_rate": 6.802721088435374e-10, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 6 }, { "completion_length": 180.2857208251953, "epoch": 0.00044444444444444447, "grad_norm": 0.0, "kl": 0.0, "learning_rate": 7.936507936507936e-10, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 7 }, { "completion_length": 162.07144165039062, "epoch": 0.0005079365079365079, "grad_norm": 0.0, "kl": 0.0, "learning_rate": 9.070294784580499e-10, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 8 }, { "completion_length": 181.00001525878906, "epoch": 0.0005714285714285715, "grad_norm": 0.0, "kl": 0.0, "learning_rate": 1.020408163265306e-09, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 9 }, { "completion_length": 166.35714721679688, "epoch": 0.0006349206349206349, "grad_norm": 0.0, "kl": 0.0, "learning_rate": 1.1337868480725624e-09, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 10 }, { "completion_length": 155.07144165039062, "epoch": 0.0006984126984126984, "grad_norm": 0.0, "kl": 0.0, "learning_rate": 1.2471655328798184e-09, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 11 }, { "completion_length": 193.85714721679688, "epoch": 0.0007619047619047619, "grad_norm": 0.0, "kl": 0.0, "learning_rate": 1.3605442176870747e-09, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 12 }, { "completion_length": 155.2857208251953, "epoch": 0.0008253968253968254, "grad_norm": 0.0, "kl": 0.0, "learning_rate": 1.473922902494331e-09, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 13 }, { "completion_length": 150.21429443359375, "epoch": 0.0008888888888888889, "grad_norm": 0.0, "kl": 0.0, "learning_rate": 1.5873015873015873e-09, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 14 }, { "completion_length": 168.35714721679688, "epoch": 0.0009523809523809524, "grad_norm": 0.0, "kl": 0.0, "learning_rate": 1.7006802721088433e-09, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 15 }, { "completion_length": 170.35714721679688, "epoch": 0.0010158730158730158, "grad_norm": 0.0, "kl": 0.0, "learning_rate": 1.8140589569160998e-09, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 16 }, { "completion_length": 127.50000762939453, "epoch": 0.0010793650793650793, "grad_norm": 0.0, "kl": 0.0, "learning_rate": 1.927437641723356e-09, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 17 }, { "completion_length": 171.21429443359375, "epoch": 0.001142857142857143, "grad_norm": 0.0, "kl": 0.0, "learning_rate": 2.040816326530612e-09, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 18 }, { "completion_length": 140.1428680419922, "epoch": 0.0012063492063492064, "grad_norm": 0.0, "kl": 0.0, "learning_rate": 2.1541950113378684e-09, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 19 }, { "completion_length": 176.92857360839844, "epoch": 0.0012698412698412698, "grad_norm": 0.0, "kl": 0.0, "learning_rate": 2.2675736961451247e-09, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 20 }, { "completion_length": 160.85714721679688, "epoch": 0.0013333333333333333, "grad_norm": 0.0, "kl": 0.0, "learning_rate": 2.380952380952381e-09, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 21 }, { "completion_length": 198.35714721679688, "epoch": 0.0013968253968253967, "grad_norm": 0.0, "kl": 0.0, "learning_rate": 2.494331065759637e-09, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 22 }, { "completion_length": 136.85714721679688, "epoch": 0.0014603174603174604, "grad_norm": 0.0, "kl": 0.0, "learning_rate": 2.6077097505668936e-09, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 23 }, { "completion_length": 207.85714721679688, "epoch": 0.0015238095238095239, "grad_norm": 0.0, "kl": 0.0, "learning_rate": 2.7210884353741494e-09, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 24 }, { "completion_length": 183.07144165039062, "epoch": 0.0015873015873015873, "grad_norm": 0.0, "kl": 0.0, "learning_rate": 2.8344671201814057e-09, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 25 }, { "completion_length": 152.85714721679688, "epoch": 0.0016507936507936507, "grad_norm": 0.0, "kl": 0.0, "learning_rate": 2.947845804988662e-09, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 26 }, { "completion_length": 188.35714721679688, "epoch": 0.0017142857142857142, "grad_norm": 0.0, "kl": 0.0, "learning_rate": 3.0612244897959183e-09, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 27 }, { "completion_length": 150.2857208251953, "epoch": 0.0017777777777777779, "grad_norm": 0.0, "kl": 0.0, "learning_rate": 3.1746031746031745e-09, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 28 }, { "completion_length": 207.71429443359375, "epoch": 0.0018412698412698413, "grad_norm": 0.0, "kl": 0.0, "learning_rate": 3.287981859410431e-09, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 29 }, { "completion_length": 154.6428680419922, "epoch": 0.0019047619047619048, "grad_norm": 0.0, "kl": 0.0, "learning_rate": 3.4013605442176867e-09, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 30 }, { "completion_length": 200.35714721679688, "epoch": 0.0019682539682539684, "grad_norm": 0.0, "kl": 0.0, "learning_rate": 3.5147392290249434e-09, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 31 }, { "completion_length": 155.21429443359375, "epoch": 0.0020317460317460317, "grad_norm": 0.0, "kl": 0.0, "learning_rate": 3.6281179138321996e-09, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 32 }, { "completion_length": 153.07144165039062, "epoch": 0.0020952380952380953, "grad_norm": 0.0, "kl": 0.0, "learning_rate": 3.7414965986394555e-09, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 33 }, { "completion_length": 144.71429443359375, "epoch": 0.0021587301587301586, "grad_norm": 0.0, "kl": 0.0, "learning_rate": 3.854875283446712e-09, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 34 }, { "completion_length": 178.07144165039062, "epoch": 0.0022222222222222222, "grad_norm": 0.0, "kl": 0.0, "learning_rate": 3.968253968253968e-09, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 35 }, { "completion_length": 168.1428680419922, "epoch": 0.002285714285714286, "grad_norm": 0.0, "kl": 0.0, "learning_rate": 4.081632653061224e-09, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 36 }, { "completion_length": 130.92857360839844, "epoch": 0.002349206349206349, "grad_norm": 0.0, "kl": 0.0, "learning_rate": 4.195011337868481e-09, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 37 }, { "completion_length": 124.14286041259766, "epoch": 0.0024126984126984128, "grad_norm": 0.0, "kl": 0.0, "learning_rate": 4.308390022675737e-09, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 38 }, { "completion_length": 150.2857208251953, "epoch": 0.002476190476190476, "grad_norm": 0.0, "kl": 0.0, "learning_rate": 4.421768707482993e-09, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 39 }, { "completion_length": 178.57144165039062, "epoch": 0.0025396825396825397, "grad_norm": 0.0, "kl": 0.0, "learning_rate": 4.5351473922902494e-09, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 40 }, { "completion_length": 154.6428680419922, "epoch": 0.0026031746031746033, "grad_norm": 0.0, "kl": 0.0, "learning_rate": 4.648526077097505e-09, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 41 }, { "completion_length": 173.07144165039062, "epoch": 0.0026666666666666666, "grad_norm": 0.0, "kl": 0.0, "learning_rate": 4.761904761904762e-09, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 42 }, { "completion_length": 147.35714721679688, "epoch": 0.0027301587301587302, "grad_norm": 0.0, "kl": 0.0, "learning_rate": 4.875283446712018e-09, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 43 }, { "completion_length": 117.00000762939453, "epoch": 0.0027936507936507935, "grad_norm": 1.1911486387252808, "kl": 0.0, "learning_rate": 4.988662131519274e-09, "loss": -0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 44 }, { "completion_length": 146.71429443359375, "epoch": 0.002857142857142857, "grad_norm": 0.0004188590683043003, "kl": 0.0005703701754100621, "learning_rate": 5.10204081632653e-09, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 45 }, { "completion_length": 162.57144165039062, "epoch": 0.002920634920634921, "grad_norm": 0.000375465169781819, "kl": 0.0005612034583464265, "learning_rate": 5.215419501133787e-09, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 46 }, { "completion_length": 143.92857360839844, "epoch": 0.002984126984126984, "grad_norm": 0.0003840386343654245, "kl": 0.0005227452493272722, "learning_rate": 5.3287981859410426e-09, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 47 }, { "completion_length": 185.00001525878906, "epoch": 0.0030476190476190477, "grad_norm": 0.0003919118898920715, "kl": 0.00048108570626936853, "learning_rate": 5.442176870748299e-09, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 48 }, { "completion_length": 170.2857208251953, "epoch": 0.003111111111111111, "grad_norm": 0.0004359459853731096, "kl": 0.0005242484039627016, "learning_rate": 5.555555555555555e-09, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 49 }, { "completion_length": 173.6428680419922, "epoch": 0.0031746031746031746, "grad_norm": 0.0003118282766081393, "kl": 0.0004579567175824195, "learning_rate": 5.668934240362811e-09, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 50 }, { "completion_length": 157.85714721679688, "epoch": 0.0032380952380952383, "grad_norm": 0.00035151568590663373, "kl": 0.0005075942608527839, "learning_rate": 5.782312925170068e-09, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 51 }, { "completion_length": 182.71429443359375, "epoch": 0.0033015873015873015, "grad_norm": 0.0002719403419177979, "kl": 0.0004456699243746698, "learning_rate": 5.895691609977324e-09, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 52 }, { "completion_length": 161.92857360839844, "epoch": 0.003365079365079365, "grad_norm": 0.00033401267137378454, "kl": 0.0004922153893858194, "learning_rate": 6.00907029478458e-09, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 53 }, { "completion_length": 165.92857360839844, "epoch": 0.0034285714285714284, "grad_norm": 0.0003011442313436419, "kl": 0.0004092372546438128, "learning_rate": 6.1224489795918365e-09, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 54 }, { "completion_length": 147.0, "epoch": 0.003492063492063492, "grad_norm": 0.00041572513873688877, "kl": 0.0005389079451560974, "learning_rate": 6.235827664399093e-09, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 55 }, { "completion_length": 186.00001525878906, "epoch": 0.0035555555555555557, "grad_norm": 0.0002574873506091535, "kl": 0.0003872880188282579, "learning_rate": 6.349206349206349e-09, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 56 }, { "completion_length": 175.6428680419922, "epoch": 0.003619047619047619, "grad_norm": 0.00027999444864690304, "kl": 0.0004972383612766862, "learning_rate": 6.4625850340136045e-09, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 57 }, { "completion_length": 176.71429443359375, "epoch": 0.0036825396825396826, "grad_norm": 0.00032281648600474, "kl": 0.00046778706018812954, "learning_rate": 6.575963718820862e-09, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 58 }, { "completion_length": 151.1428680419922, "epoch": 0.003746031746031746, "grad_norm": 0.00039389508310705423, "kl": 0.0005644945777021348, "learning_rate": 6.689342403628118e-09, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 59 }, { "completion_length": 158.92857360839844, "epoch": 0.0038095238095238095, "grad_norm": 0.0003014710091520101, "kl": 0.00047320802696049213, "learning_rate": 6.802721088435373e-09, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 60 }, { "completion_length": 154.57144165039062, "epoch": 0.003873015873015873, "grad_norm": 0.00034780098940245807, "kl": 0.0005547214532271028, "learning_rate": 6.9160997732426305e-09, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 61 }, { "completion_length": 137.1428680419922, "epoch": 0.003936507936507937, "grad_norm": 0.00039655473665334284, "kl": 0.0004794529522769153, "learning_rate": 7.029478458049887e-09, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 62 }, { "completion_length": 163.71429443359375, "epoch": 0.004, "grad_norm": 0.00028655692585743964, "kl": 0.0004893668228760362, "learning_rate": 7.142857142857142e-09, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 63 }, { "completion_length": 164.42857360839844, "epoch": 0.004063492063492063, "grad_norm": 0.0002907978487201035, "kl": 0.00047503021778538823, "learning_rate": 7.256235827664399e-09, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 64 }, { "completion_length": 151.21429443359375, "epoch": 0.004126984126984127, "grad_norm": 0.0003190568240825087, "kl": 0.0004918621852993965, "learning_rate": 7.369614512471655e-09, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 65 }, { "completion_length": 165.21429443359375, "epoch": 0.004190476190476191, "grad_norm": 0.0004616399819497019, "kl": 0.0004925985704176128, "learning_rate": 7.482993197278911e-09, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 66 }, { "completion_length": 171.21429443359375, "epoch": 0.004253968253968254, "grad_norm": 0.00035463363747112453, "kl": 0.000492390594445169, "learning_rate": 7.596371882086168e-09, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 67 }, { "completion_length": 176.92857360839844, "epoch": 0.004317460317460317, "grad_norm": 0.00026289423112757504, "kl": 0.00047168543096631765, "learning_rate": 7.709750566893424e-09, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 68 }, { "completion_length": 191.42857360839844, "epoch": 0.004380952380952381, "grad_norm": 0.0003067901125177741, "kl": 0.00042588479118421674, "learning_rate": 7.82312925170068e-09, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 69 }, { "completion_length": 174.85714721679688, "epoch": 0.0044444444444444444, "grad_norm": 0.000381304940674454, "kl": 0.0005213138647377491, "learning_rate": 7.936507936507936e-09, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 70 }, { "completion_length": 154.2857208251953, "epoch": 0.004507936507936508, "grad_norm": 0.00026067093131132424, "kl": 0.0004498617781791836, "learning_rate": 8.049886621315193e-09, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 71 }, { "completion_length": 156.07144165039062, "epoch": 0.004571428571428572, "grad_norm": 0.00024632696295157075, "kl": 0.000373041199054569, "learning_rate": 8.163265306122449e-09, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 72 }, { "completion_length": 146.85714721679688, "epoch": 0.004634920634920635, "grad_norm": 0.00032663202728144825, "kl": 0.0004556398489512503, "learning_rate": 8.276643990929704e-09, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 73 }, { "completion_length": 159.0, "epoch": 0.004698412698412698, "grad_norm": 0.000283473520539701, "kl": 0.0005590537912212312, "learning_rate": 8.390022675736961e-09, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 74 }, { "completion_length": 164.07144165039062, "epoch": 0.004761904761904762, "grad_norm": 0.000244103284785524, "kl": 0.0004152520268689841, "learning_rate": 8.503401360544218e-09, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 75 }, { "completion_length": 126.5714340209961, "epoch": 0.0048253968253968256, "grad_norm": 0.00038855039747431874, "kl": 0.0004488737613428384, "learning_rate": 8.616780045351474e-09, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 76 }, { "completion_length": 162.92857360839844, "epoch": 0.004888888888888889, "grad_norm": 0.0002965359017252922, "kl": 0.000483156880363822, "learning_rate": 8.73015873015873e-09, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 77 }, { "completion_length": 175.42857360839844, "epoch": 0.004952380952380952, "grad_norm": 0.0002903894637711346, "kl": 0.0004703286394942552, "learning_rate": 8.843537414965986e-09, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 78 }, { "completion_length": 145.35714721679688, "epoch": 0.005015873015873016, "grad_norm": 0.0002862187393475324, "kl": 0.0004770116647705436, "learning_rate": 8.956916099773242e-09, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 79 }, { "completion_length": 181.7857208251953, "epoch": 0.005079365079365079, "grad_norm": 0.000299908104352653, "kl": 0.0004618211241904646, "learning_rate": 9.070294784580499e-09, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 80 }, { "completion_length": 146.07144165039062, "epoch": 0.005142857142857143, "grad_norm": 0.00034886624780483544, "kl": 0.0004746797785628587, "learning_rate": 9.183673469387756e-09, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 81 }, { "completion_length": 176.57144165039062, "epoch": 0.005206349206349207, "grad_norm": 0.0002547974872868508, "kl": 0.0004016870225314051, "learning_rate": 9.29705215419501e-09, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 82 }, { "completion_length": 137.35714721679688, "epoch": 0.00526984126984127, "grad_norm": 0.00030528780189342797, "kl": 0.0003933386760763824, "learning_rate": 9.410430839002267e-09, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 83 }, { "completion_length": 154.21429443359375, "epoch": 0.005333333333333333, "grad_norm": 0.0003317453374620527, "kl": 0.0005737770115956664, "learning_rate": 9.523809523809524e-09, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 84 }, { "completion_length": 170.5, "epoch": 0.005396825396825397, "grad_norm": 0.0003097050357609987, "kl": 0.00043328796164132655, "learning_rate": 9.63718820861678e-09, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 85 }, { "completion_length": 189.1428680419922, "epoch": 0.0054603174603174605, "grad_norm": 0.000276767008472234, "kl": 0.0004129233711864799, "learning_rate": 9.750566893424037e-09, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 86 }, { "completion_length": 167.2857208251953, "epoch": 0.005523809523809524, "grad_norm": 0.000300854240776971, "kl": 0.00046471142559312284, "learning_rate": 9.863945578231292e-09, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 87 }, { "completion_length": 165.92857360839844, "epoch": 0.005587301587301587, "grad_norm": 0.00043800880666822195, "kl": 0.000495001848321408, "learning_rate": 9.977324263038547e-09, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 88 }, { "completion_length": 156.07144165039062, "epoch": 0.005650793650793651, "grad_norm": 0.00034795317333191633, "kl": 0.0003974696446675807, "learning_rate": 1.0090702947845805e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 89 }, { "completion_length": 163.42857360839844, "epoch": 0.005714285714285714, "grad_norm": 0.0003089983656536788, "kl": 0.0005139251588843763, "learning_rate": 1.020408163265306e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 90 }, { "completion_length": 174.1428680419922, "epoch": 0.0057777777777777775, "grad_norm": 0.0003760692197829485, "kl": 0.0004321702290326357, "learning_rate": 1.0317460317460317e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 91 }, { "completion_length": 161.7857208251953, "epoch": 0.005841269841269842, "grad_norm": 0.0003337353409733623, "kl": 0.0005114053492434323, "learning_rate": 1.0430839002267574e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 92 }, { "completion_length": 147.21429443359375, "epoch": 0.005904761904761905, "grad_norm": 0.0002990224165841937, "kl": 0.000464920187368989, "learning_rate": 1.0544217687074828e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 93 }, { "completion_length": 160.92857360839844, "epoch": 0.005968253968253968, "grad_norm": 0.0004138964868616313, "kl": 0.0005478612147271633, "learning_rate": 1.0657596371882085e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 94 }, { "completion_length": 159.0, "epoch": 0.006031746031746032, "grad_norm": 0.0003251611487939954, "kl": 0.0005211420357227325, "learning_rate": 1.0770975056689342e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 95 }, { "completion_length": 147.42857360839844, "epoch": 0.006095238095238095, "grad_norm": 0.0003454900870565325, "kl": 0.00047703771269880235, "learning_rate": 1.0884353741496598e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 96 }, { "completion_length": 191.42857360839844, "epoch": 0.006158730158730159, "grad_norm": 0.0004019344924017787, "kl": 0.0005397621425800025, "learning_rate": 1.0997732426303855e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 97 }, { "completion_length": 143.0, "epoch": 0.006222222222222222, "grad_norm": 0.00037265857099555433, "kl": 0.0005272139678709209, "learning_rate": 1.111111111111111e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 98 }, { "completion_length": 181.42857360839844, "epoch": 0.006285714285714286, "grad_norm": 0.0008766741375438869, "kl": 0.0006115200230851769, "learning_rate": 1.1224489795918366e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 99 }, { "completion_length": 184.21429443359375, "epoch": 0.006349206349206349, "grad_norm": 0.0003244511317461729, "kl": 0.00040413025999441743, "learning_rate": 1.1337868480725623e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 100 }, { "completion_length": 180.2857208251953, "epoch": 0.006412698412698412, "grad_norm": 0.00037246235297061503, "kl": 0.0004852400452364236, "learning_rate": 1.145124716553288e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 101 }, { "completion_length": 163.21429443359375, "epoch": 0.0064761904761904765, "grad_norm": 0.0003488774527795613, "kl": 0.0005253301351331174, "learning_rate": 1.1564625850340135e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 102 }, { "completion_length": 169.5, "epoch": 0.00653968253968254, "grad_norm": 0.0003216349286958575, "kl": 0.00048304631491191685, "learning_rate": 1.1678004535147392e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 103 }, { "completion_length": 176.07144165039062, "epoch": 0.006603174603174603, "grad_norm": 0.0003818368713837117, "kl": 0.0005183910252526402, "learning_rate": 1.1791383219954648e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 104 }, { "completion_length": 191.57144165039062, "epoch": 0.006666666666666667, "grad_norm": 0.00025017448933795094, "kl": 0.0004500866343732923, "learning_rate": 1.1904761904761903e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 105 }, { "completion_length": 174.1428680419922, "epoch": 0.00673015873015873, "grad_norm": 0.0002756420290097594, "kl": 0.0004261780995875597, "learning_rate": 1.201814058956916e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 106 }, { "completion_length": 150.2857208251953, "epoch": 0.0067936507936507936, "grad_norm": 0.0003480798623058945, "kl": 0.0004418139869812876, "learning_rate": 1.2131519274376418e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 107 }, { "completion_length": 171.35714721679688, "epoch": 0.006857142857142857, "grad_norm": 0.0003937874280381948, "kl": 0.00038531393511220813, "learning_rate": 1.2244897959183673e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 108 }, { "completion_length": 157.07144165039062, "epoch": 0.006920634920634921, "grad_norm": 0.0004276853578630835, "kl": 0.0004672615905292332, "learning_rate": 1.2358276643990928e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 109 }, { "completion_length": 173.07144165039062, "epoch": 0.006984126984126984, "grad_norm": 0.0002654498675838113, "kl": 0.0004513193271122873, "learning_rate": 1.2471655328798186e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 110 }, { "completion_length": 146.42857360839844, "epoch": 0.007047619047619047, "grad_norm": 0.00035877784830518067, "kl": 0.0004150154418312013, "learning_rate": 1.2585034013605441e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 111 }, { "completion_length": 152.0, "epoch": 0.0071111111111111115, "grad_norm": 0.0004734488029498607, "kl": 0.000532975303940475, "learning_rate": 1.2698412698412698e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 112 }, { "completion_length": 171.21429443359375, "epoch": 0.007174603174603175, "grad_norm": 0.0003575083683244884, "kl": 0.00046132999705150723, "learning_rate": 1.2811791383219955e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 113 }, { "completion_length": 152.35714721679688, "epoch": 0.007238095238095238, "grad_norm": 0.000326003588270396, "kl": 0.0005311286658979952, "learning_rate": 1.2925170068027209e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 114 }, { "completion_length": 168.0, "epoch": 0.007301587301587302, "grad_norm": 0.0003058369620703161, "kl": 0.0004838834865950048, "learning_rate": 1.3038548752834466e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 115 }, { "completion_length": 184.00001525878906, "epoch": 0.007365079365079365, "grad_norm": 0.0003094715066254139, "kl": 0.0004514115280471742, "learning_rate": 1.3151927437641723e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 116 }, { "completion_length": 165.42857360839844, "epoch": 0.0074285714285714285, "grad_norm": 0.00047393381828442216, "kl": 0.0006036462145857513, "learning_rate": 1.3265306122448979e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 117 }, { "completion_length": 154.92857360839844, "epoch": 0.007492063492063492, "grad_norm": 0.0003238656499888748, "kl": 0.0004508035781327635, "learning_rate": 1.3378684807256236e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 118 }, { "completion_length": 163.2857208251953, "epoch": 0.007555555555555556, "grad_norm": 0.000271871336735785, "kl": 0.0004831435508094728, "learning_rate": 1.3492063492063493e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 119 }, { "completion_length": 195.57144165039062, "epoch": 0.007619047619047619, "grad_norm": 0.00026322927442379296, "kl": 0.0003818417608272284, "learning_rate": 1.3605442176870747e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 120 }, { "completion_length": 131.5, "epoch": 0.007682539682539682, "grad_norm": 0.00046885423944331706, "kl": 0.00044861400965601206, "learning_rate": 1.3718820861678004e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 121 }, { "completion_length": 180.57144165039062, "epoch": 0.007746031746031746, "grad_norm": 0.0003049038932658732, "kl": 0.0005344325909391046, "learning_rate": 1.3832199546485261e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 122 }, { "completion_length": 162.71429443359375, "epoch": 0.00780952380952381, "grad_norm": 0.00025985619868151844, "kl": 0.0004639705002773553, "learning_rate": 1.3945578231292516e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 123 }, { "completion_length": 135.57144165039062, "epoch": 0.007873015873015874, "grad_norm": 0.0003998467873316258, "kl": 0.0005270108813419938, "learning_rate": 1.4058956916099773e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 124 }, { "completion_length": 164.2857208251953, "epoch": 0.007936507936507936, "grad_norm": 0.00025326418108306825, "kl": 0.000374292372725904, "learning_rate": 1.4172335600907029e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 125 }, { "completion_length": 149.92857360839844, "epoch": 0.008, "grad_norm": 0.00046581553760915995, "kl": 0.0005651875399053097, "learning_rate": 1.4285714285714284e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 126 }, { "completion_length": 146.85714721679688, "epoch": 0.008063492063492064, "grad_norm": 0.0004681957943830639, "kl": 0.0005172239034436643, "learning_rate": 1.4399092970521541e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 127 }, { "completion_length": 229.35714721679688, "epoch": 0.008126984126984127, "grad_norm": 0.00027203740319237113, "kl": 0.0004332599346525967, "learning_rate": 1.4512471655328799e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 128 }, { "completion_length": 186.92857360839844, "epoch": 0.00819047619047619, "grad_norm": 0.0002801816153805703, "kl": 0.0004255258827470243, "learning_rate": 1.4625850340136054e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 129 }, { "completion_length": 160.5, "epoch": 0.008253968253968255, "grad_norm": 0.0002932633797172457, "kl": 0.0004439924377948046, "learning_rate": 1.473922902494331e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 130 }, { "completion_length": 192.21429443359375, "epoch": 0.008317460317460317, "grad_norm": 0.00027159435558132827, "kl": 0.00041336295544169843, "learning_rate": 1.4852607709750567e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 131 }, { "completion_length": 123.28572082519531, "epoch": 0.008380952380952381, "grad_norm": 0.0003718223888427019, "kl": 0.0004282594018150121, "learning_rate": 1.4965986394557822e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 132 }, { "completion_length": 210.6428680419922, "epoch": 0.008444444444444444, "grad_norm": 0.0002517009270377457, "kl": 0.00045416891225613654, "learning_rate": 1.5079365079365077e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 133 }, { "completion_length": 180.71429443359375, "epoch": 0.008507936507936508, "grad_norm": 0.0003652676532510668, "kl": 0.0005395925836637616, "learning_rate": 1.5192743764172336e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 134 }, { "completion_length": 155.42857360839844, "epoch": 0.008571428571428572, "grad_norm": 0.00035290789674036205, "kl": 0.00047099008224904537, "learning_rate": 1.5306122448979592e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 135 }, { "completion_length": 186.35714721679688, "epoch": 0.008634920634920634, "grad_norm": 0.00030813002376817167, "kl": 0.0004630918556358665, "learning_rate": 1.5419501133786847e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 136 }, { "completion_length": 163.0, "epoch": 0.008698412698412698, "grad_norm": 0.000290414085611701, "kl": 0.00039771918090991676, "learning_rate": 1.5532879818594106e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 137 }, { "completion_length": 163.21429443359375, "epoch": 0.008761904761904762, "grad_norm": 0.0003658764762803912, "kl": 0.0005081392591819167, "learning_rate": 1.564625850340136e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 138 }, { "completion_length": 174.7857208251953, "epoch": 0.008825396825396825, "grad_norm": 0.00032362641650252044, "kl": 0.0004983445978723466, "learning_rate": 1.5759637188208613e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 139 }, { "completion_length": 184.35714721679688, "epoch": 0.008888888888888889, "grad_norm": 0.00021902941807638854, "kl": 0.00043870718218386173, "learning_rate": 1.5873015873015872e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 140 }, { "completion_length": 214.85714721679688, "epoch": 0.008952380952380953, "grad_norm": 0.00025112953153438866, "kl": 0.0004793668631464243, "learning_rate": 1.5986394557823128e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 141 }, { "completion_length": 150.6428680419922, "epoch": 0.009015873015873015, "grad_norm": 0.0003026957274414599, "kl": 0.0004545800620689988, "learning_rate": 1.6099773242630386e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 142 }, { "completion_length": 174.57144165039062, "epoch": 0.00907936507936508, "grad_norm": 0.00023380519996862859, "kl": 0.000392747315345332, "learning_rate": 1.6213151927437642e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 143 }, { "completion_length": 168.71429443359375, "epoch": 0.009142857142857144, "grad_norm": 0.00040193749009631574, "kl": 0.0005375386099331081, "learning_rate": 1.6326530612244897e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 144 }, { "completion_length": 150.7857208251953, "epoch": 0.009206349206349206, "grad_norm": 0.00026833126321434975, "kl": 0.00040733805508352816, "learning_rate": 1.6439909297052153e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 145 }, { "completion_length": 150.71429443359375, "epoch": 0.00926984126984127, "grad_norm": 0.0003209297719877213, "kl": 0.0005619518342427909, "learning_rate": 1.6553287981859408e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 146 }, { "completion_length": 161.6428680419922, "epoch": 0.009333333333333334, "grad_norm": 0.00030526204500347376, "kl": 0.0004644196596927941, "learning_rate": 1.6666666666666667e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 147 }, { "completion_length": 151.7857208251953, "epoch": 0.009396825396825396, "grad_norm": 0.0003012619854416698, "kl": 0.0005008225562050939, "learning_rate": 1.6780045351473922e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 148 }, { "completion_length": 173.00001525878906, "epoch": 0.00946031746031746, "grad_norm": 0.0002887748705688864, "kl": 0.000426783983130008, "learning_rate": 1.6893424036281178e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 149 }, { "completion_length": 179.57144165039062, "epoch": 0.009523809523809525, "grad_norm": 0.0002805156400427222, "kl": 0.0003886800550390035, "learning_rate": 1.7006802721088437e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 150 }, { "completion_length": 168.71429443359375, "epoch": 0.009587301587301587, "grad_norm": 0.0003102289338130504, "kl": 0.0005339498165994883, "learning_rate": 1.712018140589569e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 151 }, { "completion_length": 129.7857208251953, "epoch": 0.009650793650793651, "grad_norm": 0.00031151168514043093, "kl": 0.0005261789774522185, "learning_rate": 1.7233560090702948e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 152 }, { "completion_length": 165.1428680419922, "epoch": 0.009714285714285713, "grad_norm": 0.0002586367481853813, "kl": 0.00042236116132698953, "learning_rate": 1.7346938775510203e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 153 }, { "completion_length": 152.21429443359375, "epoch": 0.009777777777777778, "grad_norm": 0.0003398378030396998, "kl": 0.0004475814348552376, "learning_rate": 1.746031746031746e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 154 }, { "completion_length": 169.71429443359375, "epoch": 0.009841269841269842, "grad_norm": 0.00029677277780137956, "kl": 0.00043044009362347424, "learning_rate": 1.7573696145124717e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 155 }, { "completion_length": 198.71429443359375, "epoch": 0.009904761904761904, "grad_norm": 0.0003610896528698504, "kl": 0.0005128082120791078, "learning_rate": 1.7687074829931973e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 156 }, { "completion_length": 146.85714721679688, "epoch": 0.009968253968253968, "grad_norm": 0.0005461079999804497, "kl": 0.0005476299556903541, "learning_rate": 1.7800453514739228e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 157 }, { "completion_length": 174.2857208251953, "epoch": 0.010031746031746032, "grad_norm": 0.000289255433017388, "kl": 0.00043016247218474746, "learning_rate": 1.7913832199546484e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 158 }, { "completion_length": 174.1428680419922, "epoch": 0.010095238095238095, "grad_norm": 0.00023656590201426297, "kl": 0.00039958019624464214, "learning_rate": 1.802721088435374e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 159 }, { "completion_length": 194.00001525878906, "epoch": 0.010158730158730159, "grad_norm": 0.00024978205328807235, "kl": 0.00043887406354770064, "learning_rate": 1.8140589569160998e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 160 }, { "completion_length": 146.35714721679688, "epoch": 0.010222222222222223, "grad_norm": 0.0008551095379516482, "kl": 0.0005569651257246733, "learning_rate": 1.8253968253968253e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 161 }, { "completion_length": 191.35714721679688, "epoch": 0.010285714285714285, "grad_norm": 0.0002845528651960194, "kl": 0.00040823721792548895, "learning_rate": 1.8367346938775512e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 162 }, { "completion_length": 144.1428680419922, "epoch": 0.01034920634920635, "grad_norm": 0.0003100792528130114, "kl": 0.000457381916930899, "learning_rate": 1.8480725623582764e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 163 }, { "completion_length": 194.7857208251953, "epoch": 0.010412698412698413, "grad_norm": 0.00024655708693899214, "kl": 0.0003785221779253334, "learning_rate": 1.859410430839002e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 164 }, { "completion_length": 215.85714721679688, "epoch": 0.010476190476190476, "grad_norm": 0.0002502563002053648, "kl": 0.00043424972682259977, "learning_rate": 1.870748299319728e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 165 }, { "completion_length": 176.57144165039062, "epoch": 0.01053968253968254, "grad_norm": 0.0003543515340425074, "kl": 0.00048599671572446823, "learning_rate": 1.8820861678004534e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 166 }, { "completion_length": 162.2857208251953, "epoch": 0.010603174603174604, "grad_norm": 0.0003418020496610552, "kl": 0.0004858726460952312, "learning_rate": 1.8934240362811793e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 167 }, { "completion_length": 178.2857208251953, "epoch": 0.010666666666666666, "grad_norm": 0.00021818120148964226, "kl": 0.0003644227108452469, "learning_rate": 1.9047619047619048e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 168 }, { "completion_length": 154.1428680419922, "epoch": 0.01073015873015873, "grad_norm": 0.0003432252851780504, "kl": 0.0005079217371530831, "learning_rate": 1.9160997732426303e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 169 }, { "completion_length": 179.35714721679688, "epoch": 0.010793650793650795, "grad_norm": 0.00030597104341723025, "kl": 0.00042625333298929036, "learning_rate": 1.927437641723356e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 170 }, { "completion_length": 136.0, "epoch": 0.010857142857142857, "grad_norm": 0.0004179711686447263, "kl": 0.0005339601193554699, "learning_rate": 1.9387755102040814e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 171 }, { "completion_length": 178.2857208251953, "epoch": 0.010920634920634921, "grad_norm": 0.0003201386716682464, "kl": 0.0005171161610633135, "learning_rate": 1.9501133786848073e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 172 }, { "completion_length": 162.7857208251953, "epoch": 0.010984126984126983, "grad_norm": 0.0003826562606263906, "kl": 0.0005341414362192154, "learning_rate": 1.961451247165533e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 173 }, { "completion_length": 143.7857208251953, "epoch": 0.011047619047619047, "grad_norm": 0.0003447282942943275, "kl": 0.0005233116680756211, "learning_rate": 1.9727891156462584e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 174 }, { "completion_length": 164.1428680419922, "epoch": 0.011111111111111112, "grad_norm": 0.00036380955134518445, "kl": 0.0005376891931518912, "learning_rate": 1.984126984126984e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 175 }, { "completion_length": 163.71429443359375, "epoch": 0.011174603174603174, "grad_norm": 0.0003083896590396762, "kl": 0.00046387981274165213, "learning_rate": 1.9954648526077095e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 176 }, { "completion_length": 155.07144165039062, "epoch": 0.011238095238095238, "grad_norm": 0.00028702866984531283, "kl": 0.0004080157959833741, "learning_rate": 2.0068027210884354e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 177 }, { "completion_length": 173.6428680419922, "epoch": 0.011301587301587302, "grad_norm": 0.0002663087798282504, "kl": 0.00048111731302924454, "learning_rate": 2.018140589569161e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 178 }, { "completion_length": 169.85714721679688, "epoch": 0.011365079365079364, "grad_norm": 0.00034509930992498994, "kl": 0.0005182846798561513, "learning_rate": 2.0294784580498868e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 179 }, { "completion_length": 190.00001525878906, "epoch": 0.011428571428571429, "grad_norm": 0.0003729471645783633, "kl": 0.0004779008449986577, "learning_rate": 2.040816326530612e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 180 }, { "completion_length": 183.50001525878906, "epoch": 0.011492063492063493, "grad_norm": 0.00033337154309265316, "kl": 0.0004336909332778305, "learning_rate": 2.0521541950113375e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 181 }, { "completion_length": 153.1428680419922, "epoch": 0.011555555555555555, "grad_norm": 0.00031077078892849386, "kl": 0.000436597503721714, "learning_rate": 2.0634920634920634e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 182 }, { "completion_length": 165.5, "epoch": 0.011619047619047619, "grad_norm": 0.00025738184922374785, "kl": 0.00046158541226759553, "learning_rate": 2.074829931972789e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 183 }, { "completion_length": 156.85714721679688, "epoch": 0.011682539682539683, "grad_norm": 0.0003280139353591949, "kl": 0.0004676936659961939, "learning_rate": 2.086167800453515e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 184 }, { "completion_length": 174.7857208251953, "epoch": 0.011746031746031746, "grad_norm": 0.0003697476349771023, "kl": 0.00047551249735988677, "learning_rate": 2.0975056689342404e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 185 }, { "completion_length": 129.2857208251953, "epoch": 0.01180952380952381, "grad_norm": 0.00040914907003752887, "kl": 0.0005215073470026255, "learning_rate": 2.1088435374149656e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 186 }, { "completion_length": 201.6428680419922, "epoch": 0.011873015873015874, "grad_norm": 0.0002878454397432506, "kl": 0.00042203726479783654, "learning_rate": 2.1201814058956915e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 187 }, { "completion_length": 149.85714721679688, "epoch": 0.011936507936507936, "grad_norm": 0.0002843460242729634, "kl": 0.0004121015081182122, "learning_rate": 2.131519274376417e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 188 }, { "completion_length": 144.5, "epoch": 0.012, "grad_norm": 0.00040822383016347885, "kl": 0.0004084422835148871, "learning_rate": 2.142857142857143e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 189 }, { "completion_length": 171.35714721679688, "epoch": 0.012063492063492064, "grad_norm": 0.0003576772869564593, "kl": 0.00046562618808820844, "learning_rate": 2.1541950113378684e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 190 }, { "completion_length": 183.6428680419922, "epoch": 0.012126984126984127, "grad_norm": 0.00025801590527407825, "kl": 0.0004338891012594104, "learning_rate": 2.165532879818594e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 191 }, { "completion_length": 163.35714721679688, "epoch": 0.01219047619047619, "grad_norm": 0.00030427295132540166, "kl": 0.0003940874303225428, "learning_rate": 2.1768707482993195e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 192 }, { "completion_length": 154.7857208251953, "epoch": 0.012253968253968253, "grad_norm": 0.0003152879071421921, "kl": 0.0003974809660576284, "learning_rate": 2.188208616780045e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 193 }, { "completion_length": 192.6428680419922, "epoch": 0.012317460317460317, "grad_norm": 0.00029331602854654193, "kl": 0.00045033195056021214, "learning_rate": 2.199546485260771e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 194 }, { "completion_length": 184.50001525878906, "epoch": 0.012380952380952381, "grad_norm": 0.00025794445537030697, "kl": 0.0004207179299555719, "learning_rate": 2.2108843537414965e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 195 }, { "completion_length": 154.1428680419922, "epoch": 0.012444444444444444, "grad_norm": 0.0003540542093105614, "kl": 0.0005089603364467621, "learning_rate": 2.222222222222222e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 196 }, { "completion_length": 150.42857360839844, "epoch": 0.012507936507936508, "grad_norm": 0.0003148360992781818, "kl": 0.00043182913213968277, "learning_rate": 2.233560090702948e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 197 }, { "completion_length": 170.85714721679688, "epoch": 0.012571428571428572, "grad_norm": 0.0002938616380561143, "kl": 0.0005093581276014447, "learning_rate": 2.244897959183673e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 198 }, { "completion_length": 156.0, "epoch": 0.012634920634920634, "grad_norm": 0.0002925557200796902, "kl": 0.00046418647980317473, "learning_rate": 2.256235827664399e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 199 }, { "completion_length": 148.5, "epoch": 0.012698412698412698, "grad_norm": 0.00036267039831727743, "kl": 0.0005014583584852517, "learning_rate": 2.2675736961451246e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 200 }, { "completion_length": 126.14286041259766, "epoch": 0.012761904761904763, "grad_norm": 0.00045099962153472006, "kl": 0.0005438958760350943, "learning_rate": 2.27891156462585e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 201 }, { "completion_length": 231.7857208251953, "epoch": 0.012825396825396825, "grad_norm": 0.00024824560387060046, "kl": 0.0004675445088651031, "learning_rate": 2.290249433106576e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 202 }, { "completion_length": 165.42857360839844, "epoch": 0.012888888888888889, "grad_norm": 0.8406664729118347, "kl": 0.0003705799172166735, "learning_rate": 2.3015873015873015e-08, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 203 }, { "completion_length": 165.57144165039062, "epoch": 0.012952380952380953, "grad_norm": 0.00029701998573727906, "kl": 0.0005455704522319138, "learning_rate": 2.312925170068027e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 204 }, { "completion_length": 170.42857360839844, "epoch": 0.013015873015873015, "grad_norm": 0.0003508214431349188, "kl": 0.000519107561558485, "learning_rate": 2.3242630385487526e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 205 }, { "completion_length": 149.6428680419922, "epoch": 0.01307936507936508, "grad_norm": 0.00040043285116553307, "kl": 0.0005015431670472026, "learning_rate": 2.3356009070294785e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 206 }, { "completion_length": 156.57144165039062, "epoch": 0.013142857142857144, "grad_norm": 0.000374131603166461, "kl": 0.0004782963660545647, "learning_rate": 2.346938775510204e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 207 }, { "completion_length": 190.42857360839844, "epoch": 0.013206349206349206, "grad_norm": 0.7951609492301941, "kl": 0.0004708449705503881, "learning_rate": 2.3582766439909296e-08, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 208 }, { "completion_length": 203.71429443359375, "epoch": 0.01326984126984127, "grad_norm": 0.00023062966647557914, "kl": 0.00039424755959771574, "learning_rate": 2.3696145124716555e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 209 }, { "completion_length": 164.35714721679688, "epoch": 0.013333333333333334, "grad_norm": 0.00025010702665895224, "kl": 0.0003993286518380046, "learning_rate": 2.3809523809523807e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 210 }, { "completion_length": 132.42857360839844, "epoch": 0.013396825396825397, "grad_norm": 0.000390716566471383, "kl": 0.0005154847167432308, "learning_rate": 2.3922902494331065e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 211 }, { "completion_length": 189.21429443359375, "epoch": 0.01346031746031746, "grad_norm": 0.00025364678003825247, "kl": 0.00038812236743979156, "learning_rate": 2.403628117913832e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 212 }, { "completion_length": 161.35714721679688, "epoch": 0.013523809523809523, "grad_norm": 0.00025698068202473223, "kl": 0.0004492326988838613, "learning_rate": 2.4149659863945576e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 213 }, { "completion_length": 170.5, "epoch": 0.013587301587301587, "grad_norm": 0.0003529922978486866, "kl": 0.0004970735171809793, "learning_rate": 2.4263038548752835e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 214 }, { "completion_length": 158.6428680419922, "epoch": 0.013650793650793651, "grad_norm": 0.0003584700170904398, "kl": 0.0005064843571744859, "learning_rate": 2.437641723356009e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 215 }, { "completion_length": 179.57144165039062, "epoch": 0.013714285714285714, "grad_norm": 0.0003436294209677726, "kl": 0.00044681114377453923, "learning_rate": 2.4489795918367346e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 216 }, { "completion_length": 161.35714721679688, "epoch": 0.013777777777777778, "grad_norm": 0.00028925479273311794, "kl": 0.00041266478365287185, "learning_rate": 2.46031746031746e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 217 }, { "completion_length": 206.07144165039062, "epoch": 0.013841269841269842, "grad_norm": 0.00023713329574093223, "kl": 0.0003910253872163594, "learning_rate": 2.4716553287981857e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 218 }, { "completion_length": 175.35714721679688, "epoch": 0.013904761904761904, "grad_norm": 0.00030194345163181424, "kl": 0.00048090884229168296, "learning_rate": 2.4829931972789116e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 219 }, { "completion_length": 196.92857360839844, "epoch": 0.013968253968253968, "grad_norm": 0.00025500281481072307, "kl": 0.00043309517786838114, "learning_rate": 2.494331065759637e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 220 }, { "completion_length": 175.1428680419922, "epoch": 0.014031746031746032, "grad_norm": 0.0002689639222808182, "kl": 0.0004253160732332617, "learning_rate": 2.505668934240363e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 221 }, { "completion_length": 160.6428680419922, "epoch": 0.014095238095238095, "grad_norm": 0.0002873915946111083, "kl": 0.0004545991832856089, "learning_rate": 2.5170068027210882e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 222 }, { "completion_length": 205.7857208251953, "epoch": 0.014158730158730159, "grad_norm": 0.0002858532243408263, "kl": 0.000436904258094728, "learning_rate": 2.5283446712018137e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 223 }, { "completion_length": 173.35714721679688, "epoch": 0.014222222222222223, "grad_norm": 0.00026960932882502675, "kl": 0.0004891754942946136, "learning_rate": 2.5396825396825396e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 224 }, { "completion_length": 131.1428680419922, "epoch": 0.014285714285714285, "grad_norm": 0.0005787534755654633, "kl": 0.0004862506175413728, "learning_rate": 2.5510204081632652e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 225 }, { "completion_length": 177.71429443359375, "epoch": 0.01434920634920635, "grad_norm": 0.00026912690373137593, "kl": 0.000439151976024732, "learning_rate": 2.562358276643991e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 226 }, { "completion_length": 181.35714721679688, "epoch": 0.014412698412698413, "grad_norm": 0.00029428763082250953, "kl": 0.00042451269109733403, "learning_rate": 2.5736961451247163e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 227 }, { "completion_length": 191.35714721679688, "epoch": 0.014476190476190476, "grad_norm": 0.00025099964113906026, "kl": 0.00045424196287058294, "learning_rate": 2.5850340136054418e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 228 }, { "completion_length": 159.7857208251953, "epoch": 0.01453968253968254, "grad_norm": 0.00028793359524570405, "kl": 0.00041417640750296414, "learning_rate": 2.5963718820861677e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 229 }, { "completion_length": 164.07144165039062, "epoch": 0.014603174603174604, "grad_norm": 0.00027048878837376833, "kl": 0.00045422534458339214, "learning_rate": 2.6077097505668932e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 230 }, { "completion_length": 202.42857360839844, "epoch": 0.014666666666666666, "grad_norm": 0.00025375050609000027, "kl": 0.00044217848335392773, "learning_rate": 2.619047619047619e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 231 }, { "completion_length": 183.21429443359375, "epoch": 0.01473015873015873, "grad_norm": 0.00024100783048197627, "kl": 0.0003827049513347447, "learning_rate": 2.6303854875283446e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 232 }, { "completion_length": 167.85714721679688, "epoch": 0.014793650793650793, "grad_norm": 0.00031154195312410593, "kl": 0.0004181096446700394, "learning_rate": 2.64172335600907e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 233 }, { "completion_length": 169.2857208251953, "epoch": 0.014857142857142857, "grad_norm": 1.3984670639038086, "kl": 0.00043062452459707856, "learning_rate": 2.6530612244897957e-08, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 234 }, { "completion_length": 159.1428680419922, "epoch": 0.014920634920634921, "grad_norm": 0.00031392095843330026, "kl": 0.0005131359212100506, "learning_rate": 2.6643990929705213e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 235 }, { "completion_length": 164.1428680419922, "epoch": 0.014984126984126983, "grad_norm": 0.0002601539599709213, "kl": 0.0004530359001364559, "learning_rate": 2.675736961451247e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 236 }, { "completion_length": 216.50001525878906, "epoch": 0.015047619047619048, "grad_norm": 0.00045852301991544664, "kl": 0.000468422076664865, "learning_rate": 2.6870748299319727e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 237 }, { "completion_length": 172.2857208251953, "epoch": 0.015111111111111112, "grad_norm": 0.00029106889269314706, "kl": 0.0004549097502604127, "learning_rate": 2.6984126984126986e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 238 }, { "completion_length": 154.21429443359375, "epoch": 0.015174603174603174, "grad_norm": 0.00030101914308033884, "kl": 0.000552864046767354, "learning_rate": 2.7097505668934238e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 239 }, { "completion_length": 170.2857208251953, "epoch": 0.015238095238095238, "grad_norm": 0.00046348769683390856, "kl": 0.0005055373767390847, "learning_rate": 2.7210884353741493e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 240 }, { "completion_length": 140.5, "epoch": 0.015301587301587302, "grad_norm": 0.00041661414434202015, "kl": 0.00044790469110012054, "learning_rate": 2.7324263038548752e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 241 }, { "completion_length": 169.2857208251953, "epoch": 0.015365079365079365, "grad_norm": 0.0002847449795808643, "kl": 0.0004465628298930824, "learning_rate": 2.7437641723356008e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 242 }, { "completion_length": 187.1428680419922, "epoch": 0.015428571428571429, "grad_norm": 0.0003219660429749638, "kl": 0.0004964784020558, "learning_rate": 2.7551020408163266e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 243 }, { "completion_length": 137.42857360839844, "epoch": 0.015492063492063493, "grad_norm": 0.00033525543403811753, "kl": 0.0005269063985906541, "learning_rate": 2.7664399092970522e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 244 }, { "completion_length": 127.35714721679688, "epoch": 0.015555555555555555, "grad_norm": 0.000494319130666554, "kl": 0.0005732981953769922, "learning_rate": 2.7777777777777774e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 245 }, { "completion_length": 168.07144165039062, "epoch": 0.01561904761904762, "grad_norm": 0.0002874657220672816, "kl": 0.0005341569776646793, "learning_rate": 2.7891156462585033e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 246 }, { "completion_length": 125.28572082519531, "epoch": 0.015682539682539683, "grad_norm": 0.0004289138887543231, "kl": 0.0005437976215034723, "learning_rate": 2.8004535147392288e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 247 }, { "completion_length": 176.2857208251953, "epoch": 0.015746031746031747, "grad_norm": 0.00028071910492144525, "kl": 0.00047719490248709917, "learning_rate": 2.8117913832199547e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 248 }, { "completion_length": 189.6428680419922, "epoch": 0.015809523809523808, "grad_norm": 0.0003271283349022269, "kl": 0.0005099303089082241, "learning_rate": 2.8231292517006802e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 249 }, { "completion_length": 170.5, "epoch": 0.015873015873015872, "grad_norm": 0.00030326086562126875, "kl": 0.00046061992179602385, "learning_rate": 2.8344671201814058e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 250 }, { "completion_length": 161.5, "epoch": 0.015936507936507936, "grad_norm": 0.00032322094193659723, "kl": 0.0005512756179086864, "learning_rate": 2.8458049886621313e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 251 }, { "completion_length": 160.7857208251953, "epoch": 0.016, "grad_norm": 0.00029293319676071405, "kl": 0.0005116189713589847, "learning_rate": 2.857142857142857e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 252 }, { "completion_length": 174.1428680419922, "epoch": 0.016063492063492064, "grad_norm": 0.00037796018295921385, "kl": 0.0005323332152329385, "learning_rate": 2.8684807256235827e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 253 }, { "completion_length": 178.50001525878906, "epoch": 0.01612698412698413, "grad_norm": 0.0002925560111179948, "kl": 0.0005496356170624495, "learning_rate": 2.8798185941043083e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 254 }, { "completion_length": 163.1428680419922, "epoch": 0.01619047619047619, "grad_norm": 0.00031535379821434617, "kl": 0.0004752975655719638, "learning_rate": 2.891156462585034e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 255 }, { "completion_length": 160.07144165039062, "epoch": 0.016253968253968253, "grad_norm": 0.0003380791749805212, "kl": 0.0004507832054514438, "learning_rate": 2.9024943310657597e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 256 }, { "completion_length": 202.92857360839844, "epoch": 0.016317460317460317, "grad_norm": 0.0002537252730689943, "kl": 0.00043643725803121924, "learning_rate": 2.913832199546485e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 257 }, { "completion_length": 164.07144165039062, "epoch": 0.01638095238095238, "grad_norm": 0.0002525111776776612, "kl": 0.00040046434151008725, "learning_rate": 2.9251700680272108e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 258 }, { "completion_length": 145.7857208251953, "epoch": 0.016444444444444446, "grad_norm": 1.380826473236084, "kl": 0.0005328123806975782, "learning_rate": 2.9365079365079363e-08, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 259 }, { "completion_length": 205.92857360839844, "epoch": 0.01650793650793651, "grad_norm": 0.00021727643616031855, "kl": 0.00044899090426042676, "learning_rate": 2.947845804988662e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 260 }, { "completion_length": 158.85714721679688, "epoch": 0.01657142857142857, "grad_norm": 0.000327348243445158, "kl": 0.000496258435305208, "learning_rate": 2.9591836734693878e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 261 }, { "completion_length": 130.92857360839844, "epoch": 0.016634920634920634, "grad_norm": 0.0004029552510473877, "kl": 0.0005440866807475686, "learning_rate": 2.9705215419501133e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 262 }, { "completion_length": 164.0, "epoch": 0.0166984126984127, "grad_norm": 1.0608301162719727, "kl": 0.000533950689714402, "learning_rate": 2.981859410430839e-08, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 263 }, { "completion_length": 147.1428680419922, "epoch": 0.016761904761904763, "grad_norm": 0.00033161137253046036, "kl": 0.0005769039271399379, "learning_rate": 2.9931972789115644e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 264 }, { "completion_length": 133.85714721679688, "epoch": 0.016825396825396827, "grad_norm": 0.0003835654933936894, "kl": 0.0005464914720505476, "learning_rate": 3.00453514739229e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 265 }, { "completion_length": 149.7857208251953, "epoch": 0.016888888888888887, "grad_norm": 0.0004216801899019629, "kl": 0.0004325960762798786, "learning_rate": 3.0158730158730155e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 266 }, { "completion_length": 144.07144165039062, "epoch": 0.01695238095238095, "grad_norm": 0.00032034338801167905, "kl": 0.0005638286820612848, "learning_rate": 3.027210884353742e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 267 }, { "completion_length": 180.92857360839844, "epoch": 0.017015873015873016, "grad_norm": 0.00032722813193686306, "kl": 0.0004580366949085146, "learning_rate": 3.038548752834467e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 268 }, { "completion_length": 146.0, "epoch": 0.01707936507936508, "grad_norm": 0.0004299174761399627, "kl": 0.00053329614456743, "learning_rate": 3.049886621315193e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 269 }, { "completion_length": 224.6428680419922, "epoch": 0.017142857142857144, "grad_norm": 0.00028789977659471333, "kl": 0.0004923987435176969, "learning_rate": 3.0612244897959183e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 270 }, { "completion_length": 162.2857208251953, "epoch": 0.017206349206349208, "grad_norm": 0.0002855361672118306, "kl": 0.0005059582181274891, "learning_rate": 3.072562358276644e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 271 }, { "completion_length": 179.92857360839844, "epoch": 0.01726984126984127, "grad_norm": 0.0002887144801206887, "kl": 0.0004600047250278294, "learning_rate": 3.0839002267573694e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 272 }, { "completion_length": 138.07144165039062, "epoch": 0.017333333333333333, "grad_norm": 0.00032984657445922494, "kl": 0.0005026196013204753, "learning_rate": 3.095238095238095e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 273 }, { "completion_length": 152.2857208251953, "epoch": 0.017396825396825397, "grad_norm": 0.00030989604420028627, "kl": 0.0004721737059298903, "learning_rate": 3.106575963718821e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 274 }, { "completion_length": 186.1428680419922, "epoch": 0.01746031746031746, "grad_norm": 0.0003493184340186417, "kl": 0.000495827232953161, "learning_rate": 3.117913832199546e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 275 }, { "completion_length": 151.21429443359375, "epoch": 0.017523809523809525, "grad_norm": 0.0002827192656695843, "kl": 0.00045531857176683843, "learning_rate": 3.129251700680272e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 276 }, { "completion_length": 138.21429443359375, "epoch": 0.01758730158730159, "grad_norm": 0.00040357795660384, "kl": 0.0004382599436212331, "learning_rate": 3.140589569160998e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 277 }, { "completion_length": 157.21429443359375, "epoch": 0.01765079365079365, "grad_norm": 0.0003595234884414822, "kl": 0.0005448484444059432, "learning_rate": 3.151927437641723e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 278 }, { "completion_length": 166.71429443359375, "epoch": 0.017714285714285714, "grad_norm": 0.00036622831248678267, "kl": 0.00046233911416493356, "learning_rate": 3.163265306122449e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 279 }, { "completion_length": 125.42857360839844, "epoch": 0.017777777777777778, "grad_norm": 0.00036125589394941926, "kl": 0.0005001423414796591, "learning_rate": 3.1746031746031744e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 280 }, { "completion_length": 135.21429443359375, "epoch": 0.017841269841269842, "grad_norm": 0.00026458740467205644, "kl": 0.00036813781480304897, "learning_rate": 3.1859410430839e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 281 }, { "completion_length": 157.5, "epoch": 0.017904761904761906, "grad_norm": 0.00032012449810281396, "kl": 0.00046788042527623475, "learning_rate": 3.1972789115646255e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 282 }, { "completion_length": 135.85714721679688, "epoch": 0.017968253968253967, "grad_norm": 0.0002951958740595728, "kl": 0.0005239397287368774, "learning_rate": 3.208616780045351e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 283 }, { "completion_length": 158.85714721679688, "epoch": 0.01803174603174603, "grad_norm": 0.0003293640911579132, "kl": 0.0004996260977350175, "learning_rate": 3.219954648526077e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 284 }, { "completion_length": 184.50001525878906, "epoch": 0.018095238095238095, "grad_norm": 0.0003389480116311461, "kl": 0.00043877848656848073, "learning_rate": 3.231292517006803e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 285 }, { "completion_length": 164.1428680419922, "epoch": 0.01815873015873016, "grad_norm": 0.0002563204034231603, "kl": 0.0004755652626045048, "learning_rate": 3.2426303854875284e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 286 }, { "completion_length": 156.85714721679688, "epoch": 0.018222222222222223, "grad_norm": 0.00030624333885498345, "kl": 0.00044308119686320424, "learning_rate": 3.253968253968254e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 287 }, { "completion_length": 171.21429443359375, "epoch": 0.018285714285714287, "grad_norm": 1.0581578016281128, "kl": 0.0004465602105483413, "learning_rate": 3.2653061224489795e-08, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 288 }, { "completion_length": 122.21429443359375, "epoch": 0.018349206349206348, "grad_norm": 0.0003810332855209708, "kl": 0.0005501036066561937, "learning_rate": 3.276643990929705e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 289 }, { "completion_length": 186.07144165039062, "epoch": 0.018412698412698412, "grad_norm": 0.00028593282331712544, "kl": 0.0005127409822307527, "learning_rate": 3.2879818594104306e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 290 }, { "completion_length": 151.42857360839844, "epoch": 0.018476190476190476, "grad_norm": 0.0006672388408333063, "kl": 0.0005014845519326627, "learning_rate": 3.299319727891156e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 291 }, { "completion_length": 161.07144165039062, "epoch": 0.01853968253968254, "grad_norm": 0.0003548184467945248, "kl": 0.0004745282349176705, "learning_rate": 3.3106575963718817e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 292 }, { "completion_length": 158.35714721679688, "epoch": 0.018603174603174604, "grad_norm": 0.0003449599607847631, "kl": 0.00042601217865012586, "learning_rate": 3.321995464852607e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 293 }, { "completion_length": 209.21429443359375, "epoch": 0.018666666666666668, "grad_norm": 0.0002280767512274906, "kl": 0.0004739736905321479, "learning_rate": 3.3333333333333334e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 294 }, { "completion_length": 157.7857208251953, "epoch": 0.01873015873015873, "grad_norm": 0.0002605701156426221, "kl": 0.0004281929286662489, "learning_rate": 3.344671201814059e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 295 }, { "completion_length": 160.07144165039062, "epoch": 0.018793650793650793, "grad_norm": 0.0003582253120839596, "kl": 0.0004503828240558505, "learning_rate": 3.3560090702947845e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 296 }, { "completion_length": 165.35714721679688, "epoch": 0.018857142857142857, "grad_norm": 0.0005129929049871862, "kl": 0.0005266349762678146, "learning_rate": 3.36734693877551e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 297 }, { "completion_length": 174.07144165039062, "epoch": 0.01892063492063492, "grad_norm": 0.0004302331362850964, "kl": 0.0004894385347142816, "learning_rate": 3.3786848072562356e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 298 }, { "completion_length": 188.6428680419922, "epoch": 0.018984126984126985, "grad_norm": 0.0003035777772311121, "kl": 0.00044063496170565486, "learning_rate": 3.390022675736962e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 299 }, { "completion_length": 114.71429443359375, "epoch": 0.01904761904761905, "grad_norm": 0.0005157240084372461, "kl": 0.00045538300764746964, "learning_rate": 3.4013605442176873e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 300 }, { "completion_length": 183.6428680419922, "epoch": 0.01911111111111111, "grad_norm": 0.0002474601205904037, "kl": 0.0004065422108396888, "learning_rate": 3.412698412698412e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 301 }, { "completion_length": 122.92857360839844, "epoch": 0.019174603174603174, "grad_norm": 1.366526484489441, "kl": 0.0005543703446164727, "learning_rate": 3.424036281179138e-08, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 302 }, { "completion_length": 167.1428680419922, "epoch": 0.019238095238095238, "grad_norm": 0.00033972522942349315, "kl": 0.0005043671117164195, "learning_rate": 3.435374149659863e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 303 }, { "completion_length": 182.92857360839844, "epoch": 0.019301587301587302, "grad_norm": 0.0002777881745714694, "kl": 0.0003548871900420636, "learning_rate": 3.4467120181405895e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 304 }, { "completion_length": 145.71429443359375, "epoch": 0.019365079365079366, "grad_norm": 0.0003392102080397308, "kl": 0.0005111695500090718, "learning_rate": 3.458049886621315e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 305 }, { "completion_length": 163.2857208251953, "epoch": 0.019428571428571427, "grad_norm": 0.00037009635707363486, "kl": 0.0004590954922605306, "learning_rate": 3.4693877551020406e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 306 }, { "completion_length": 175.42857360839844, "epoch": 0.01949206349206349, "grad_norm": 0.00028633602778427303, "kl": 0.00045623918413184583, "learning_rate": 3.480725623582766e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 307 }, { "completion_length": 176.35714721679688, "epoch": 0.019555555555555555, "grad_norm": 0.0003672490129247308, "kl": 0.00047114447806961834, "learning_rate": 3.492063492063492e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 308 }, { "completion_length": 164.5, "epoch": 0.01961904761904762, "grad_norm": 0.00025408779038116336, "kl": 0.0003465794725343585, "learning_rate": 3.503401360544218e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 309 }, { "completion_length": 159.71429443359375, "epoch": 0.019682539682539683, "grad_norm": 0.00034499517641961575, "kl": 0.0005446829600259662, "learning_rate": 3.5147392290249434e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 310 }, { "completion_length": 140.1428680419922, "epoch": 0.019746031746031747, "grad_norm": 0.0003878338320646435, "kl": 0.0006124518695287406, "learning_rate": 3.526077097505669e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 311 }, { "completion_length": 185.35714721679688, "epoch": 0.019809523809523808, "grad_norm": 0.00029322513728402555, "kl": 0.00047693171654827893, "learning_rate": 3.5374149659863945e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 312 }, { "completion_length": 151.42857360839844, "epoch": 0.019873015873015872, "grad_norm": 0.0003252347814850509, "kl": 0.00038513177423737943, "learning_rate": 3.5487528344671194e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 313 }, { "completion_length": 191.71429443359375, "epoch": 0.019936507936507936, "grad_norm": 0.0003277061623521149, "kl": 0.0004475179885048419, "learning_rate": 3.5600907029478456e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 314 }, { "completion_length": 186.35714721679688, "epoch": 0.02, "grad_norm": 0.00024812103947624564, "kl": 0.00046957493759691715, "learning_rate": 3.571428571428571e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 315 }, { "completion_length": 146.71429443359375, "epoch": 0.020063492063492065, "grad_norm": 0.0002627744688652456, "kl": 0.0004000271437689662, "learning_rate": 3.582766439909297e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 316 }, { "completion_length": 198.35714721679688, "epoch": 0.02012698412698413, "grad_norm": 0.0003471011877991259, "kl": 0.0004353757540229708, "learning_rate": 3.594104308390022e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 317 }, { "completion_length": 177.7857208251953, "epoch": 0.02019047619047619, "grad_norm": 0.00032262428430840373, "kl": 0.0004702220903709531, "learning_rate": 3.605442176870748e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 318 }, { "completion_length": 171.00001525878906, "epoch": 0.020253968253968253, "grad_norm": 0.000324197462759912, "kl": 0.0005547396140173078, "learning_rate": 3.616780045351474e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 319 }, { "completion_length": 159.21429443359375, "epoch": 0.020317460317460317, "grad_norm": 0.00034550955751910806, "kl": 0.0005040362011641264, "learning_rate": 3.6281179138321996e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 320 }, { "completion_length": 165.07144165039062, "epoch": 0.02038095238095238, "grad_norm": 0.0002831669698935002, "kl": 0.0004027468676213175, "learning_rate": 3.639455782312925e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 321 }, { "completion_length": 176.6428680419922, "epoch": 0.020444444444444446, "grad_norm": 0.00029700566665269434, "kl": 0.00047608744353055954, "learning_rate": 3.6507936507936506e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 322 }, { "completion_length": 178.2857208251953, "epoch": 0.020507936507936506, "grad_norm": 0.00039364007534459233, "kl": 0.0005385634140111506, "learning_rate": 3.662131519274376e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 323 }, { "completion_length": 195.57144165039062, "epoch": 0.02057142857142857, "grad_norm": 0.0002574720128905028, "kl": 0.00047318986617028713, "learning_rate": 3.6734693877551024e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 324 }, { "completion_length": 184.1428680419922, "epoch": 0.020634920634920634, "grad_norm": 0.00032994174398481846, "kl": 0.0005082415300421417, "learning_rate": 3.684807256235827e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 325 }, { "completion_length": 182.50001525878906, "epoch": 0.0206984126984127, "grad_norm": 0.00023991765920072794, "kl": 0.00042106499313376844, "learning_rate": 3.696145124716553e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 326 }, { "completion_length": 201.21429443359375, "epoch": 0.020761904761904763, "grad_norm": 0.0002034949284279719, "kl": 0.0003866736660711467, "learning_rate": 3.7074829931972784e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 327 }, { "completion_length": 204.57144165039062, "epoch": 0.020825396825396827, "grad_norm": 0.0002997429692186415, "kl": 0.0004803392803296447, "learning_rate": 3.718820861678004e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 328 }, { "completion_length": 180.2857208251953, "epoch": 0.020888888888888887, "grad_norm": 0.00031647729338146746, "kl": 0.0005053950590081513, "learning_rate": 3.73015873015873e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 329 }, { "completion_length": 213.50001525878906, "epoch": 0.02095238095238095, "grad_norm": 0.0002645960485097021, "kl": 0.000397910043830052, "learning_rate": 3.741496598639456e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 330 }, { "completion_length": 126.50000762939453, "epoch": 0.021015873015873016, "grad_norm": 0.0004186799924355, "kl": 0.0004887233371846378, "learning_rate": 3.752834467120181e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 331 }, { "completion_length": 175.42857360839844, "epoch": 0.02107936507936508, "grad_norm": 0.0003222817904315889, "kl": 0.0005749163683503866, "learning_rate": 3.764172335600907e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 332 }, { "completion_length": 151.5, "epoch": 0.021142857142857144, "grad_norm": 0.0003860134456772357, "kl": 0.0005147760966792703, "learning_rate": 3.775510204081633e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 333 }, { "completion_length": 154.85714721679688, "epoch": 0.021206349206349208, "grad_norm": 0.00037352784420363605, "kl": 0.0004876039456576109, "learning_rate": 3.7868480725623585e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 334 }, { "completion_length": 163.1428680419922, "epoch": 0.02126984126984127, "grad_norm": 0.0002991608635056764, "kl": 0.0004292850790079683, "learning_rate": 3.798185941043084e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 335 }, { "completion_length": 180.00001525878906, "epoch": 0.021333333333333333, "grad_norm": 0.00040245242416858673, "kl": 0.0005934166256338358, "learning_rate": 3.8095238095238096e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 336 }, { "completion_length": 165.2857208251953, "epoch": 0.021396825396825397, "grad_norm": 0.00024359184317290783, "kl": 0.00042942812433466315, "learning_rate": 3.8208616780045345e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 337 }, { "completion_length": 157.85714721679688, "epoch": 0.02146031746031746, "grad_norm": 0.7690045833587646, "kl": 0.00048119810526259243, "learning_rate": 3.832199546485261e-08, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 338 }, { "completion_length": 154.92857360839844, "epoch": 0.021523809523809525, "grad_norm": 0.000285096000880003, "kl": 0.0004016577440779656, "learning_rate": 3.843537414965986e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 339 }, { "completion_length": 182.7857208251953, "epoch": 0.02158730158730159, "grad_norm": 0.00037581476499326527, "kl": 0.0005157201085239649, "learning_rate": 3.854875283446712e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 340 }, { "completion_length": 143.71429443359375, "epoch": 0.02165079365079365, "grad_norm": 0.00033306339173577726, "kl": 0.0004731188528239727, "learning_rate": 3.866213151927437e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 341 }, { "completion_length": 161.42857360839844, "epoch": 0.021714285714285714, "grad_norm": 0.0003657906490843743, "kl": 0.00046987333917059004, "learning_rate": 3.877551020408163e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 342 }, { "completion_length": 161.7857208251953, "epoch": 0.021777777777777778, "grad_norm": 0.00032412068685516715, "kl": 0.0005000715609639883, "learning_rate": 3.888888888888889e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 343 }, { "completion_length": 169.6428680419922, "epoch": 0.021841269841269842, "grad_norm": 0.00030247538234107196, "kl": 0.00047184183495119214, "learning_rate": 3.9002267573696146e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 344 }, { "completion_length": 192.57144165039062, "epoch": 0.021904761904761906, "grad_norm": 0.0002831196179613471, "kl": 0.0004548761935438961, "learning_rate": 3.91156462585034e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 345 }, { "completion_length": 179.1428680419922, "epoch": 0.021968253968253967, "grad_norm": 0.0002993935195263475, "kl": 0.0004706979088950902, "learning_rate": 3.922902494331066e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 346 }, { "completion_length": 133.2857208251953, "epoch": 0.02203174603174603, "grad_norm": 0.0003891250817105174, "kl": 0.000578677689190954, "learning_rate": 3.934240362811791e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 347 }, { "completion_length": 178.42857360839844, "epoch": 0.022095238095238095, "grad_norm": 0.0002842790854629129, "kl": 0.0005143819726072252, "learning_rate": 3.945578231292517e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 348 }, { "completion_length": 157.71429443359375, "epoch": 0.02215873015873016, "grad_norm": 0.0002757193287834525, "kl": 0.0004166267754044384, "learning_rate": 3.9569160997732424e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 349 }, { "completion_length": 154.5, "epoch": 0.022222222222222223, "grad_norm": 0.0002738694893196225, "kl": 0.0005351696745492518, "learning_rate": 3.968253968253968e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 350 }, { "completion_length": 171.00001525878906, "epoch": 0.022285714285714287, "grad_norm": 0.00023661665909457952, "kl": 0.0003513368428684771, "learning_rate": 3.9795918367346934e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 351 }, { "completion_length": 141.85714721679688, "epoch": 0.022349206349206348, "grad_norm": 0.00032952846959233284, "kl": 0.00047031190479174256, "learning_rate": 3.990929705215419e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 352 }, { "completion_length": 155.1428680419922, "epoch": 0.022412698412698412, "grad_norm": 0.00033024573349393904, "kl": 0.0004523331590462476, "learning_rate": 4.002267573696145e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 353 }, { "completion_length": 185.7857208251953, "epoch": 0.022476190476190476, "grad_norm": 0.00024834906798787415, "kl": 0.00041707599302753806, "learning_rate": 4.013605442176871e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 354 }, { "completion_length": 153.57144165039062, "epoch": 0.02253968253968254, "grad_norm": 0.0003689314762596041, "kl": 0.0006120526231825352, "learning_rate": 4.024943310657596e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 355 }, { "completion_length": 141.07144165039062, "epoch": 0.022603174603174604, "grad_norm": 0.0003649714053608477, "kl": 0.0005564761813730001, "learning_rate": 4.036281179138322e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 356 }, { "completion_length": 141.7857208251953, "epoch": 0.02266666666666667, "grad_norm": 0.000542373803909868, "kl": 0.0005714527796953917, "learning_rate": 4.0476190476190474e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 357 }, { "completion_length": 137.1428680419922, "epoch": 0.02273015873015873, "grad_norm": 0.000340409082127735, "kl": 0.0004963593673892319, "learning_rate": 4.0589569160997736e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 358 }, { "completion_length": 162.21429443359375, "epoch": 0.022793650793650793, "grad_norm": 0.00020907101861666888, "kl": 0.0003753438650164753, "learning_rate": 4.070294784580499e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 359 }, { "completion_length": 164.42857360839844, "epoch": 0.022857142857142857, "grad_norm": 0.00026801080093719065, "kl": 0.00042427555308677256, "learning_rate": 4.081632653061224e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 360 }, { "completion_length": 122.00000762939453, "epoch": 0.02292063492063492, "grad_norm": 0.0003837206750176847, "kl": 0.0004891835269518197, "learning_rate": 4.0929705215419496e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 361 }, { "completion_length": 168.2857208251953, "epoch": 0.022984126984126985, "grad_norm": 0.00025256621302105486, "kl": 0.00044126511784270406, "learning_rate": 4.104308390022675e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 362 }, { "completion_length": 184.07144165039062, "epoch": 0.023047619047619046, "grad_norm": 0.00025146722327917814, "kl": 0.000406278035370633, "learning_rate": 4.115646258503401e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 363 }, { "completion_length": 156.35714721679688, "epoch": 0.02311111111111111, "grad_norm": 0.000376725074602291, "kl": 0.0005835859919898212, "learning_rate": 4.126984126984127e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 364 }, { "completion_length": 161.7857208251953, "epoch": 0.023174603174603174, "grad_norm": 0.00031100717023946345, "kl": 0.0005410161684267223, "learning_rate": 4.1383219954648524e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 365 }, { "completion_length": 172.71429443359375, "epoch": 0.023238095238095238, "grad_norm": 0.00023187325859908015, "kl": 0.00044962819083593786, "learning_rate": 4.149659863945578e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 366 }, { "completion_length": 178.07144165039062, "epoch": 0.023301587301587302, "grad_norm": 0.00023778397007845342, "kl": 0.00047702371375635266, "learning_rate": 4.1609977324263035e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 367 }, { "completion_length": 167.7857208251953, "epoch": 0.023365079365079366, "grad_norm": 0.0003222592349629849, "kl": 0.00039693250437267125, "learning_rate": 4.17233560090703e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 368 }, { "completion_length": 166.6428680419922, "epoch": 0.023428571428571427, "grad_norm": 0.0002785831457003951, "kl": 0.0004982121172361076, "learning_rate": 4.183673469387755e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 369 }, { "completion_length": 209.85714721679688, "epoch": 0.02349206349206349, "grad_norm": 0.0002449220046401024, "kl": 0.00037887899088673294, "learning_rate": 4.195011337868481e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 370 }, { "completion_length": 164.2857208251953, "epoch": 0.023555555555555555, "grad_norm": 0.0002653459960129112, "kl": 0.00037448492366820574, "learning_rate": 4.206349206349206e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 371 }, { "completion_length": 166.0, "epoch": 0.02361904761904762, "grad_norm": 0.0002967669570352882, "kl": 0.0005236875731498003, "learning_rate": 4.217687074829931e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 372 }, { "completion_length": 160.35714721679688, "epoch": 0.023682539682539683, "grad_norm": 0.0002524905721656978, "kl": 0.00042562096496112645, "learning_rate": 4.2290249433106574e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 373 }, { "completion_length": 178.2857208251953, "epoch": 0.023746031746031748, "grad_norm": 0.00026133324718102813, "kl": 0.000468613114207983, "learning_rate": 4.240362811791383e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 374 }, { "completion_length": 187.85714721679688, "epoch": 0.023809523809523808, "grad_norm": 0.0002294281148351729, "kl": 0.00040926560177467763, "learning_rate": 4.2517006802721085e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 375 }, { "completion_length": 130.35714721679688, "epoch": 0.023873015873015872, "grad_norm": 1.960017204284668, "kl": 0.0005166707560420036, "learning_rate": 4.263038548752834e-08, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 376 }, { "completion_length": 200.07144165039062, "epoch": 0.023936507936507936, "grad_norm": 0.00034270016476511955, "kl": 0.0004246254393365234, "learning_rate": 4.2743764172335596e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 377 }, { "completion_length": 182.6428680419922, "epoch": 0.024, "grad_norm": 0.0003336982917971909, "kl": 0.0004872173012699932, "learning_rate": 4.285714285714286e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 378 }, { "completion_length": 172.7857208251953, "epoch": 0.024063492063492065, "grad_norm": 0.0003608589177019894, "kl": 0.0004944293759763241, "learning_rate": 4.2970521541950113e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 379 }, { "completion_length": 156.71429443359375, "epoch": 0.02412698412698413, "grad_norm": 0.0003448152856435627, "kl": 0.000515113933943212, "learning_rate": 4.308390022675737e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 380 }, { "completion_length": 149.42857360839844, "epoch": 0.02419047619047619, "grad_norm": 0.00028668605955317616, "kl": 0.0004736771807074547, "learning_rate": 4.3197278911564624e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 381 }, { "completion_length": 169.6428680419922, "epoch": 0.024253968253968253, "grad_norm": 0.00030463276198133826, "kl": 0.00048328505363315344, "learning_rate": 4.331065759637188e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 382 }, { "completion_length": 149.92857360839844, "epoch": 0.024317460317460318, "grad_norm": 0.0004325175250414759, "kl": 0.0005554411327466369, "learning_rate": 4.342403628117914e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 383 }, { "completion_length": 127.5714340209961, "epoch": 0.02438095238095238, "grad_norm": 0.0003892966196872294, "kl": 0.0005402012611739337, "learning_rate": 4.353741496598639e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 384 }, { "completion_length": 140.5, "epoch": 0.024444444444444446, "grad_norm": 0.0004259454144630581, "kl": 0.0004249156918376684, "learning_rate": 4.3650793650793646e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 385 }, { "completion_length": 170.57144165039062, "epoch": 0.024507936507936506, "grad_norm": 0.0002476413210388273, "kl": 0.0003304881101939827, "learning_rate": 4.37641723356009e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 386 }, { "completion_length": 197.00001525878906, "epoch": 0.02457142857142857, "grad_norm": 0.0003085912321694195, "kl": 0.00037759164115414023, "learning_rate": 4.387755102040816e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 387 }, { "completion_length": 201.1428680419922, "epoch": 0.024634920634920635, "grad_norm": 0.00023466830316465348, "kl": 0.0004366265202406794, "learning_rate": 4.399092970521542e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 388 }, { "completion_length": 185.00001525878906, "epoch": 0.0246984126984127, "grad_norm": 0.0003661189111880958, "kl": 0.0005290692788548768, "learning_rate": 4.4104308390022675e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 389 }, { "completion_length": 156.07144165039062, "epoch": 0.024761904761904763, "grad_norm": 0.0002856934443116188, "kl": 0.0004797357541974634, "learning_rate": 4.421768707482993e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 390 }, { "completion_length": 162.35714721679688, "epoch": 0.024825396825396827, "grad_norm": 0.0003193554875906557, "kl": 0.0004332117096055299, "learning_rate": 4.4331065759637186e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 391 }, { "completion_length": 154.07144165039062, "epoch": 0.024888888888888887, "grad_norm": 0.0002967866603285074, "kl": 0.0005322654615156353, "learning_rate": 4.444444444444444e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 392 }, { "completion_length": 145.57144165039062, "epoch": 0.02495238095238095, "grad_norm": 0.000321428757160902, "kl": 0.0004554277693387121, "learning_rate": 4.45578231292517e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 393 }, { "completion_length": 158.42857360839844, "epoch": 0.025015873015873016, "grad_norm": 0.0003282772086095065, "kl": 0.0003939384187106043, "learning_rate": 4.467120181405896e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 394 }, { "completion_length": 165.71429443359375, "epoch": 0.02507936507936508, "grad_norm": 0.00038334171404130757, "kl": 0.000536358158569783, "learning_rate": 4.4784580498866214e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 395 }, { "completion_length": 175.2857208251953, "epoch": 0.025142857142857144, "grad_norm": 0.0002991660439874977, "kl": 0.00037745109875686467, "learning_rate": 4.489795918367346e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 396 }, { "completion_length": 189.07144165039062, "epoch": 0.025206349206349208, "grad_norm": 0.0002629028749652207, "kl": 0.0004707856569439173, "learning_rate": 4.501133786848072e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 397 }, { "completion_length": 149.5, "epoch": 0.02526984126984127, "grad_norm": 0.0002944920561276376, "kl": 0.00043871570960618556, "learning_rate": 4.512471655328798e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 398 }, { "completion_length": 198.42857360839844, "epoch": 0.025333333333333333, "grad_norm": 0.0003489935479592532, "kl": 0.00043927389197051525, "learning_rate": 4.5238095238095236e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 399 }, { "completion_length": 138.42857360839844, "epoch": 0.025396825396825397, "grad_norm": 0.0003584784280974418, "kl": 0.0005852216854691505, "learning_rate": 4.535147392290249e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 400 }, { "completion_length": 135.5, "epoch": 0.02546031746031746, "grad_norm": 0.00037279987009242177, "kl": 0.00039171508979052305, "learning_rate": 4.5464852607709747e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 401 }, { "completion_length": 159.2857208251953, "epoch": 0.025523809523809525, "grad_norm": 0.00038493488682433963, "kl": 0.0005167638300918043, "learning_rate": 4.5578231292517e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 402 }, { "completion_length": 198.21429443359375, "epoch": 0.025587301587301586, "grad_norm": 0.00029604043811559677, "kl": 0.000379821693059057, "learning_rate": 4.5691609977324264e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 403 }, { "completion_length": 150.21429443359375, "epoch": 0.02565079365079365, "grad_norm": 0.00042326500988565385, "kl": 0.0006181938806548715, "learning_rate": 4.580498866213152e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 404 }, { "completion_length": 157.92857360839844, "epoch": 0.025714285714285714, "grad_norm": 0.00033543503377586603, "kl": 0.00044146348955109715, "learning_rate": 4.5918367346938775e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 405 }, { "completion_length": 151.42857360839844, "epoch": 0.025777777777777778, "grad_norm": 0.0002830907760653645, "kl": 0.000397197378333658, "learning_rate": 4.603174603174603e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 406 }, { "completion_length": 165.0, "epoch": 0.025841269841269842, "grad_norm": 0.00027291651349514723, "kl": 0.0004499318602029234, "learning_rate": 4.614512471655328e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 407 }, { "completion_length": 125.5714340209961, "epoch": 0.025904761904761906, "grad_norm": 0.00044027972035109997, "kl": 0.00045089510967954993, "learning_rate": 4.625850340136054e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 408 }, { "completion_length": 127.0714340209961, "epoch": 0.025968253968253967, "grad_norm": 0.00035924732219427824, "kl": 0.0004286420007701963, "learning_rate": 4.63718820861678e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 409 }, { "completion_length": 128.7857208251953, "epoch": 0.02603174603174603, "grad_norm": 0.0003114960272796452, "kl": 0.0004502788942772895, "learning_rate": 4.648526077097505e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 410 }, { "completion_length": 143.85714721679688, "epoch": 0.026095238095238095, "grad_norm": 0.00038274124381132424, "kl": 0.0005085538723506033, "learning_rate": 4.659863945578231e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 411 }, { "completion_length": 149.2857208251953, "epoch": 0.02615873015873016, "grad_norm": 0.0003264278348069638, "kl": 0.0004862133355345577, "learning_rate": 4.671201814058957e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 412 }, { "completion_length": 180.35714721679688, "epoch": 0.026222222222222223, "grad_norm": 0.00026758210151456296, "kl": 0.00048367283307015896, "learning_rate": 4.6825396825396825e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 413 }, { "completion_length": 137.6428680419922, "epoch": 0.026285714285714287, "grad_norm": 0.0003349836333654821, "kl": 0.00045632594265043736, "learning_rate": 4.693877551020408e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 414 }, { "completion_length": 191.00001525878906, "epoch": 0.026349206349206348, "grad_norm": 0.00024299474898725748, "kl": 0.0003927976649720222, "learning_rate": 4.7052154195011336e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 415 }, { "completion_length": 176.85714721679688, "epoch": 0.026412698412698412, "grad_norm": 0.0002546523464843631, "kl": 0.000385827588615939, "learning_rate": 4.716553287981859e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 416 }, { "completion_length": 140.92857360839844, "epoch": 0.026476190476190476, "grad_norm": 0.0003655036853160709, "kl": 0.00047676340909674764, "learning_rate": 4.7278911564625854e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 417 }, { "completion_length": 177.85714721679688, "epoch": 0.02653968253968254, "grad_norm": 0.0003053571854252368, "kl": 0.0004668280598707497, "learning_rate": 4.739229024943311e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 418 }, { "completion_length": 162.57144165039062, "epoch": 0.026603174603174604, "grad_norm": 0.0004023555957246572, "kl": 0.00047785439528524876, "learning_rate": 4.750566893424036e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 419 }, { "completion_length": 156.0, "epoch": 0.02666666666666667, "grad_norm": 0.0002552871301304549, "kl": 0.00047040151548571885, "learning_rate": 4.7619047619047613e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 420 }, { "completion_length": 183.2857208251953, "epoch": 0.02673015873015873, "grad_norm": 0.0003585070080589503, "kl": 0.00047137276851572096, "learning_rate": 4.773242630385487e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 421 }, { "completion_length": 175.85714721679688, "epoch": 0.026793650793650793, "grad_norm": 0.0002907197631429881, "kl": 0.00042739775381051004, "learning_rate": 4.784580498866213e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 422 }, { "completion_length": 172.21429443359375, "epoch": 0.026857142857142857, "grad_norm": 0.00025322104920633137, "kl": 0.0003296777722425759, "learning_rate": 4.7959183673469386e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 423 }, { "completion_length": 156.35714721679688, "epoch": 0.02692063492063492, "grad_norm": 0.0003621754003688693, "kl": 0.0004890032578259706, "learning_rate": 4.807256235827664e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 424 }, { "completion_length": 172.1428680419922, "epoch": 0.026984126984126985, "grad_norm": 0.0002780189679469913, "kl": 0.0004659126861952245, "learning_rate": 4.81859410430839e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 425 }, { "completion_length": 159.42857360839844, "epoch": 0.027047619047619046, "grad_norm": 0.0002657343284226954, "kl": 0.0004381696053314954, "learning_rate": 4.829931972789115e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 426 }, { "completion_length": 123.85714721679688, "epoch": 0.02711111111111111, "grad_norm": 0.0004347521753516048, "kl": 0.0005362016963772476, "learning_rate": 4.8412698412698415e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 427 }, { "completion_length": 184.07144165039062, "epoch": 0.027174603174603174, "grad_norm": 0.000299336010357365, "kl": 0.00045466632582247257, "learning_rate": 4.852607709750567e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 428 }, { "completion_length": 145.6428680419922, "epoch": 0.02723809523809524, "grad_norm": 0.00025792690576054156, "kl": 0.00041170851909555495, "learning_rate": 4.8639455782312926e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 429 }, { "completion_length": 144.07144165039062, "epoch": 0.027301587301587302, "grad_norm": 0.0002897210360970348, "kl": 0.000509115110617131, "learning_rate": 4.875283446712018e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 430 }, { "completion_length": 201.00001525878906, "epoch": 0.027365079365079367, "grad_norm": 0.00023188242630567402, "kl": 0.0004060700011905283, "learning_rate": 4.886621315192743e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 431 }, { "completion_length": 178.85714721679688, "epoch": 0.027428571428571427, "grad_norm": 0.0002623998443596065, "kl": 0.0005240823375061154, "learning_rate": 4.897959183673469e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 432 }, { "completion_length": 149.35714721679688, "epoch": 0.02749206349206349, "grad_norm": 0.0003756712831091136, "kl": 0.0005048091989010572, "learning_rate": 4.909297052154195e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 433 }, { "completion_length": 171.71429443359375, "epoch": 0.027555555555555555, "grad_norm": 0.00037623412208631635, "kl": 0.0004015033773612231, "learning_rate": 4.92063492063492e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 434 }, { "completion_length": 158.7857208251953, "epoch": 0.02761904761904762, "grad_norm": 0.00042544628377072513, "kl": 0.0005942813586443663, "learning_rate": 4.931972789115646e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 435 }, { "completion_length": 141.7857208251953, "epoch": 0.027682539682539684, "grad_norm": 0.00032846617978066206, "kl": 0.0004980577505193651, "learning_rate": 4.9433106575963714e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 436 }, { "completion_length": 170.21429443359375, "epoch": 0.027746031746031748, "grad_norm": 0.0003295023925602436, "kl": 0.0004348353832028806, "learning_rate": 4.9546485260770976e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 437 }, { "completion_length": 165.85714721679688, "epoch": 0.02780952380952381, "grad_norm": 0.00029364676447585225, "kl": 0.0004941530642099679, "learning_rate": 4.965986394557823e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 438 }, { "completion_length": 176.21429443359375, "epoch": 0.027873015873015872, "grad_norm": 0.0004039835184812546, "kl": 0.0005575334071181715, "learning_rate": 4.977324263038549e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 439 }, { "completion_length": 206.6428680419922, "epoch": 0.027936507936507936, "grad_norm": 0.0003433352685533464, "kl": 0.00036074157105758786, "learning_rate": 4.988662131519274e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 440 }, { "completion_length": 178.57144165039062, "epoch": 0.028, "grad_norm": 0.0002791023871395737, "kl": 0.0004344887565821409, "learning_rate": 5e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 441 }, { "completion_length": 172.2857208251953, "epoch": 0.028063492063492065, "grad_norm": 0.00024626596132293344, "kl": 0.0004070063296239823, "learning_rate": 5.011337868480726e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 442 }, { "completion_length": 177.2857208251953, "epoch": 0.028126984126984125, "grad_norm": 0.0002807955606840551, "kl": 0.00040940416511148214, "learning_rate": 5.022675736961451e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 443 }, { "completion_length": 160.7857208251953, "epoch": 0.02819047619047619, "grad_norm": 0.0002579401188995689, "kl": 0.0004138486983720213, "learning_rate": 5.0340136054421764e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 444 }, { "completion_length": 138.85714721679688, "epoch": 0.028253968253968254, "grad_norm": 0.0003014919930137694, "kl": 0.0004850799741689116, "learning_rate": 5.045351473922902e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 445 }, { "completion_length": 146.0, "epoch": 0.028317460317460318, "grad_norm": 0.0003259092918597162, "kl": 0.00045071577187627554, "learning_rate": 5.0566893424036275e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 446 }, { "completion_length": 183.35714721679688, "epoch": 0.02838095238095238, "grad_norm": 0.0002874053025152534, "kl": 0.00040817036642692983, "learning_rate": 5.068027210884354e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 447 }, { "completion_length": 124.71429443359375, "epoch": 0.028444444444444446, "grad_norm": 0.00036565307527780533, "kl": 0.0005714930011890829, "learning_rate": 5.079365079365079e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 448 }, { "completion_length": 183.07144165039062, "epoch": 0.028507936507936506, "grad_norm": 0.00022728652402292937, "kl": 0.0004657350364141166, "learning_rate": 5.090702947845805e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 449 }, { "completion_length": 140.5, "epoch": 0.02857142857142857, "grad_norm": 0.0004939475329592824, "kl": 0.0006515054265037179, "learning_rate": 5.1020408163265303e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 450 }, { "completion_length": 142.71429443359375, "epoch": 0.028634920634920635, "grad_norm": 0.0004220785922370851, "kl": 0.0005747973918914795, "learning_rate": 5.113378684807256e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 451 }, { "completion_length": 196.85714721679688, "epoch": 0.0286984126984127, "grad_norm": 0.00026186142349615693, "kl": 0.00046916521387174726, "learning_rate": 5.124716553287982e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 452 }, { "completion_length": 163.07144165039062, "epoch": 0.028761904761904763, "grad_norm": 0.0006485763587988913, "kl": 0.0006932450924068689, "learning_rate": 5.1360544217687076e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 453 }, { "completion_length": 154.07144165039062, "epoch": 0.028825396825396827, "grad_norm": 0.00032182736322283745, "kl": 0.0005077670211903751, "learning_rate": 5.1473922902494325e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 454 }, { "completion_length": 175.35714721679688, "epoch": 0.028888888888888888, "grad_norm": 0.000375679403077811, "kl": 0.0005796492332592607, "learning_rate": 5.158730158730158e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 455 }, { "completion_length": 161.07144165039062, "epoch": 0.02895238095238095, "grad_norm": 0.0003445896727498621, "kl": 0.0005040392279624939, "learning_rate": 5.1700680272108836e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 456 }, { "completion_length": 175.92857360839844, "epoch": 0.029015873015873016, "grad_norm": 0.000230925070354715, "kl": 0.0003983239585068077, "learning_rate": 5.18140589569161e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 457 }, { "completion_length": 157.71429443359375, "epoch": 0.02907936507936508, "grad_norm": 0.0002744725497905165, "kl": 0.0004921012441627681, "learning_rate": 5.1927437641723354e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 458 }, { "completion_length": 173.07144165039062, "epoch": 0.029142857142857144, "grad_norm": 0.00034864197368733585, "kl": 0.0004693300579674542, "learning_rate": 5.204081632653061e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 459 }, { "completion_length": 147.57144165039062, "epoch": 0.029206349206349208, "grad_norm": 0.0003375157539267093, "kl": 0.0004736004339065403, "learning_rate": 5.2154195011337865e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 460 }, { "completion_length": 197.21429443359375, "epoch": 0.02926984126984127, "grad_norm": 0.0003300806856714189, "kl": 0.0005077788373455405, "learning_rate": 5.226757369614512e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 461 }, { "completion_length": 170.7857208251953, "epoch": 0.029333333333333333, "grad_norm": 0.0003456448612269014, "kl": 0.00044953665928915143, "learning_rate": 5.238095238095238e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 462 }, { "completion_length": 153.0, "epoch": 0.029396825396825397, "grad_norm": 0.0003458567662164569, "kl": 0.0005305336089804769, "learning_rate": 5.249433106575964e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 463 }, { "completion_length": 156.57144165039062, "epoch": 0.02946031746031746, "grad_norm": 0.0004041789215989411, "kl": 0.00052591972053051, "learning_rate": 5.260770975056689e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 464 }, { "completion_length": 175.00001525878906, "epoch": 0.029523809523809525, "grad_norm": 0.0002991860965266824, "kl": 0.00045712044811807573, "learning_rate": 5.272108843537415e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 465 }, { "completion_length": 164.1428680419922, "epoch": 0.029587301587301586, "grad_norm": 0.000375627278117463, "kl": 0.0004990737652406096, "learning_rate": 5.28344671201814e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 466 }, { "completion_length": 181.07144165039062, "epoch": 0.02965079365079365, "grad_norm": 0.00032703884062357247, "kl": 0.0005117087857797742, "learning_rate": 5.294784580498866e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 467 }, { "completion_length": 182.35714721679688, "epoch": 0.029714285714285714, "grad_norm": 0.00032449880382046103, "kl": 0.0005913753411732614, "learning_rate": 5.3061224489795915e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 468 }, { "completion_length": 168.2857208251953, "epoch": 0.029777777777777778, "grad_norm": 0.000280747510259971, "kl": 0.000455720437457785, "learning_rate": 5.317460317460317e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 469 }, { "completion_length": 187.07144165039062, "epoch": 0.029841269841269842, "grad_norm": 0.00028333664522506297, "kl": 0.00041174227953888476, "learning_rate": 5.3287981859410426e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 470 }, { "completion_length": 142.35714721679688, "epoch": 0.029904761904761906, "grad_norm": 0.00033055819221772254, "kl": 0.0004940662765875459, "learning_rate": 5.340136054421768e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 471 }, { "completion_length": 189.71429443359375, "epoch": 0.029968253968253967, "grad_norm": 0.0004988704458810389, "kl": 0.0006091359537094831, "learning_rate": 5.351473922902494e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 472 }, { "completion_length": 179.71429443359375, "epoch": 0.03003174603174603, "grad_norm": 0.0003298553347121924, "kl": 0.00044276154949329793, "learning_rate": 5.36281179138322e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 473 }, { "completion_length": 185.00001525878906, "epoch": 0.030095238095238095, "grad_norm": 0.00028441607719287276, "kl": 0.00045479516847990453, "learning_rate": 5.3741496598639454e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 474 }, { "completion_length": 155.07144165039062, "epoch": 0.03015873015873016, "grad_norm": 0.00033611455000936985, "kl": 0.00041680585127323866, "learning_rate": 5.385487528344671e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 475 }, { "completion_length": 128.5, "epoch": 0.030222222222222223, "grad_norm": 0.0003718034422490746, "kl": 0.0005177491111680865, "learning_rate": 5.396825396825397e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 476 }, { "completion_length": 215.50001525878906, "epoch": 0.030285714285714287, "grad_norm": 0.0002543698938097805, "kl": 0.0004338358121458441, "learning_rate": 5.408163265306123e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 477 }, { "completion_length": 170.7857208251953, "epoch": 0.030349206349206348, "grad_norm": 0.0003003122110385448, "kl": 0.0004470793064683676, "learning_rate": 5.4195011337868476e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 478 }, { "completion_length": 154.85714721679688, "epoch": 0.030412698412698412, "grad_norm": 0.00030720356153324246, "kl": 0.0004962216480635107, "learning_rate": 5.430839002267573e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 479 }, { "completion_length": 173.57144165039062, "epoch": 0.030476190476190476, "grad_norm": 0.0003024718025699258, "kl": 0.000536870036739856, "learning_rate": 5.442176870748299e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 480 }, { "completion_length": 141.1428680419922, "epoch": 0.03053968253968254, "grad_norm": 0.00029996802913956344, "kl": 0.0004747954662889242, "learning_rate": 5.453514739229024e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 481 }, { "completion_length": 155.92857360839844, "epoch": 0.030603174603174604, "grad_norm": 0.000285953312413767, "kl": 0.00038176789530552924, "learning_rate": 5.4648526077097504e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 482 }, { "completion_length": 143.35714721679688, "epoch": 0.030666666666666665, "grad_norm": 0.00026144870207645, "kl": 0.0004152388428337872, "learning_rate": 5.476190476190476e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 483 }, { "completion_length": 179.00001525878906, "epoch": 0.03073015873015873, "grad_norm": 0.0002603097236715257, "kl": 0.0004422666097525507, "learning_rate": 5.4875283446712015e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 484 }, { "completion_length": 152.35714721679688, "epoch": 0.030793650793650793, "grad_norm": 1.3125473260879517, "kl": 0.0005048373714089394, "learning_rate": 5.498866213151927e-08, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 485 }, { "completion_length": 158.21429443359375, "epoch": 0.030857142857142857, "grad_norm": 0.000242768379393965, "kl": 0.00045892855268903077, "learning_rate": 5.510204081632653e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 486 }, { "completion_length": 163.71429443359375, "epoch": 0.03092063492063492, "grad_norm": 0.00032390630804002285, "kl": 0.0005203423206694424, "learning_rate": 5.521541950113379e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 487 }, { "completion_length": 151.0, "epoch": 0.030984126984126985, "grad_norm": 0.0003691498714033514, "kl": 0.0005340186762623489, "learning_rate": 5.5328798185941044e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 488 }, { "completion_length": 162.42857360839844, "epoch": 0.031047619047619046, "grad_norm": 0.0002781849179882556, "kl": 0.00043981417547911406, "learning_rate": 5.54421768707483e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 489 }, { "completion_length": 127.35714721679688, "epoch": 0.03111111111111111, "grad_norm": 1.1494009494781494, "kl": 0.0005467257578857243, "learning_rate": 5.555555555555555e-08, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 490 }, { "completion_length": 145.71429443359375, "epoch": 0.031174603174603174, "grad_norm": 0.00030267451074905694, "kl": 0.0004817371955141425, "learning_rate": 5.566893424036281e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 491 }, { "completion_length": 167.42857360839844, "epoch": 0.03123809523809524, "grad_norm": 0.0003645888646133244, "kl": 0.00039677193854004145, "learning_rate": 5.5782312925170065e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 492 }, { "completion_length": 167.71429443359375, "epoch": 0.0313015873015873, "grad_norm": 0.00043277491931803524, "kl": 0.000569009454920888, "learning_rate": 5.589569160997732e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 493 }, { "completion_length": 188.85714721679688, "epoch": 0.03136507936507937, "grad_norm": 0.0002931810449808836, "kl": 0.0005059947725385427, "learning_rate": 5.6009070294784576e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 494 }, { "completion_length": 194.1428680419922, "epoch": 0.03142857142857143, "grad_norm": 0.00024929954088293016, "kl": 0.00047772019752301276, "learning_rate": 5.612244897959183e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 495 }, { "completion_length": 191.21429443359375, "epoch": 0.031492063492063495, "grad_norm": 0.00027077182312496006, "kl": 0.0004490582214202732, "learning_rate": 5.6235827664399094e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 496 }, { "completion_length": 150.71429443359375, "epoch": 0.03155555555555556, "grad_norm": 0.0003870462824124843, "kl": 0.0005947197205387056, "learning_rate": 5.634920634920635e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 497 }, { "completion_length": 175.00001525878906, "epoch": 0.031619047619047616, "grad_norm": 0.0003573565336409956, "kl": 0.0005089113255962729, "learning_rate": 5.6462585034013605e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 498 }, { "completion_length": 184.21429443359375, "epoch": 0.03168253968253968, "grad_norm": 0.00028165351250208914, "kl": 0.0004282791633158922, "learning_rate": 5.657596371882086e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 499 }, { "completion_length": 163.6428680419922, "epoch": 0.031746031746031744, "grad_norm": 0.0003018523857463151, "kl": 0.00047178141539916396, "learning_rate": 5.6689342403628116e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 500 }, { "completion_length": 120.00000762939453, "epoch": 0.03180952380952381, "grad_norm": 0.0004133331240154803, "kl": 0.0004091434821020812, "learning_rate": 5.680272108843538e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 501 }, { "completion_length": 204.1428680419922, "epoch": 0.03187301587301587, "grad_norm": 0.0002884137793444097, "kl": 0.00047090466250665486, "learning_rate": 5.6916099773242627e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 502 }, { "completion_length": 174.6428680419922, "epoch": 0.03193650793650794, "grad_norm": 0.0002958849654532969, "kl": 0.00037843448808416724, "learning_rate": 5.702947845804988e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 503 }, { "completion_length": 146.85714721679688, "epoch": 0.032, "grad_norm": 0.00027957820566371083, "kl": 0.0004399892932269722, "learning_rate": 5.714285714285714e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 504 }, { "completion_length": 199.21429443359375, "epoch": 0.032063492063492065, "grad_norm": 0.0002702191995922476, "kl": 0.0004141528916079551, "learning_rate": 5.725623582766439e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 505 }, { "completion_length": 172.2857208251953, "epoch": 0.03212698412698413, "grad_norm": 0.0003415723331272602, "kl": 0.0005264764185994864, "learning_rate": 5.7369614512471655e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 506 }, { "completion_length": 181.50001525878906, "epoch": 0.03219047619047619, "grad_norm": 0.00033314022584818304, "kl": 0.0005107701872475445, "learning_rate": 5.748299319727891e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 507 }, { "completion_length": 152.85714721679688, "epoch": 0.03225396825396826, "grad_norm": 0.00029408562113530934, "kl": 0.000533277343492955, "learning_rate": 5.7596371882086166e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 508 }, { "completion_length": 149.0, "epoch": 0.032317460317460314, "grad_norm": 0.0003191417781636119, "kl": 0.0005054742214269936, "learning_rate": 5.770975056689342e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 509 }, { "completion_length": 183.35714721679688, "epoch": 0.03238095238095238, "grad_norm": 0.0003354449290782213, "kl": 0.0004479390336200595, "learning_rate": 5.782312925170068e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 510 }, { "completion_length": 163.42857360839844, "epoch": 0.03244444444444444, "grad_norm": 0.0003963175695389509, "kl": 0.00045809231232851744, "learning_rate": 5.793650793650794e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 511 }, { "completion_length": 165.07144165039062, "epoch": 0.032507936507936507, "grad_norm": 0.00031914428109303117, "kl": 0.0005028158775530756, "learning_rate": 5.8049886621315194e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 512 }, { "completion_length": 180.2857208251953, "epoch": 0.03257142857142857, "grad_norm": 0.00028620229568332434, "kl": 0.0004480291099753231, "learning_rate": 5.816326530612244e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 513 }, { "completion_length": 150.35714721679688, "epoch": 0.032634920634920635, "grad_norm": 1.540122151374817, "kl": 0.0004205758450552821, "learning_rate": 5.82766439909297e-08, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 514 }, { "completion_length": 176.42857360839844, "epoch": 0.0326984126984127, "grad_norm": 0.00034596186014823616, "kl": 0.0004495619214139879, "learning_rate": 5.8390022675736954e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 515 }, { "completion_length": 170.71429443359375, "epoch": 0.03276190476190476, "grad_norm": 0.00025405234191566706, "kl": 0.00045299652265384793, "learning_rate": 5.8503401360544216e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 516 }, { "completion_length": 186.71429443359375, "epoch": 0.03282539682539683, "grad_norm": 0.00028125380049459636, "kl": 0.0004484660748858005, "learning_rate": 5.861678004535147e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 517 }, { "completion_length": 177.6428680419922, "epoch": 0.03288888888888889, "grad_norm": 0.00024084305914584547, "kl": 0.0003991686680819839, "learning_rate": 5.873015873015873e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 518 }, { "completion_length": 169.6428680419922, "epoch": 0.032952380952380955, "grad_norm": 0.0003079390444327146, "kl": 0.0004968374851159751, "learning_rate": 5.884353741496598e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 519 }, { "completion_length": 114.5714340209961, "epoch": 0.03301587301587302, "grad_norm": 0.00033329869620501995, "kl": 0.0004071069124620408, "learning_rate": 5.895691609977324e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 520 }, { "completion_length": 196.85714721679688, "epoch": 0.033079365079365076, "grad_norm": 0.0002803174720611423, "kl": 0.0005323312361724675, "learning_rate": 5.90702947845805e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 521 }, { "completion_length": 206.00001525878906, "epoch": 0.03314285714285714, "grad_norm": 0.0002548306074459106, "kl": 0.0004453138681128621, "learning_rate": 5.9183673469387755e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 522 }, { "completion_length": 175.07144165039062, "epoch": 0.033206349206349205, "grad_norm": 0.00033240599441342056, "kl": 0.0004627948219422251, "learning_rate": 5.929705215419501e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 523 }, { "completion_length": 153.71429443359375, "epoch": 0.03326984126984127, "grad_norm": 0.00031116788159124553, "kl": 0.000470956030767411, "learning_rate": 5.9410430839002266e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 524 }, { "completion_length": 129.1428680419922, "epoch": 0.03333333333333333, "grad_norm": 0.0003697672800626606, "kl": 0.0005754906451329589, "learning_rate": 5.9523809523809515e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 525 }, { "completion_length": 158.2857208251953, "epoch": 0.0333968253968254, "grad_norm": 0.0002851876779459417, "kl": 0.00040427857311442494, "learning_rate": 5.963718820861678e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 526 }, { "completion_length": 195.35714721679688, "epoch": 0.03346031746031746, "grad_norm": 0.000275452621281147, "kl": 0.0004139634838793427, "learning_rate": 5.975056689342403e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 527 }, { "completion_length": 180.92857360839844, "epoch": 0.033523809523809525, "grad_norm": 0.00022313006047625095, "kl": 0.00038933841278776526, "learning_rate": 5.986394557823129e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 528 }, { "completion_length": 167.92857360839844, "epoch": 0.03358730158730159, "grad_norm": 0.0002975994430016726, "kl": 0.00044421543134376407, "learning_rate": 5.997732426303854e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 529 }, { "completion_length": 151.07144165039062, "epoch": 0.03365079365079365, "grad_norm": 0.00021583442867267877, "kl": 0.0003770853509195149, "learning_rate": 6.00907029478458e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 530 }, { "completion_length": 154.57144165039062, "epoch": 0.03371428571428572, "grad_norm": 0.0002953166840597987, "kl": 0.00038579487591050565, "learning_rate": 6.020408163265305e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 531 }, { "completion_length": 167.35714721679688, "epoch": 0.033777777777777775, "grad_norm": 0.00030338839860633016, "kl": 0.00042375203338451684, "learning_rate": 6.031746031746031e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 532 }, { "completion_length": 169.5, "epoch": 0.03384126984126984, "grad_norm": 0.0003093494742643088, "kl": 0.0004616442893166095, "learning_rate": 6.043083900226758e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 533 }, { "completion_length": 167.07144165039062, "epoch": 0.0339047619047619, "grad_norm": 0.00044609594624489546, "kl": 0.0005028775194659829, "learning_rate": 6.054421768707483e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 534 }, { "completion_length": 178.35714721679688, "epoch": 0.03396825396825397, "grad_norm": 0.0004685649764724076, "kl": 0.0006127352244220674, "learning_rate": 6.065759637188209e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 535 }, { "completion_length": 191.35714721679688, "epoch": 0.03403174603174603, "grad_norm": 0.0003916895075235516, "kl": 0.00044062515371479094, "learning_rate": 6.077097505668934e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 536 }, { "completion_length": 138.2857208251953, "epoch": 0.034095238095238095, "grad_norm": 0.0003910493687726557, "kl": 0.0005321231437847018, "learning_rate": 6.088435374149659e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 537 }, { "completion_length": 161.92857360839844, "epoch": 0.03415873015873016, "grad_norm": 0.0002706764207687229, "kl": 0.00046702788677066565, "learning_rate": 6.099773242630386e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 538 }, { "completion_length": 153.85714721679688, "epoch": 0.03422222222222222, "grad_norm": 0.0003301760007161647, "kl": 0.00042863510316237807, "learning_rate": 6.111111111111111e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 539 }, { "completion_length": 124.85714721679688, "epoch": 0.03428571428571429, "grad_norm": 0.0003938869631383568, "kl": 0.0004934939206577837, "learning_rate": 6.122448979591837e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 540 }, { "completion_length": 142.5, "epoch": 0.03434920634920635, "grad_norm": 0.0004971408052369952, "kl": 0.0006214937893673778, "learning_rate": 6.133786848072562e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 541 }, { "completion_length": 181.71429443359375, "epoch": 0.034412698412698416, "grad_norm": 0.00023906398564577103, "kl": 0.00045323383528739214, "learning_rate": 6.145124716553288e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 542 }, { "completion_length": 186.2857208251953, "epoch": 0.03447619047619047, "grad_norm": 0.0002782460942398757, "kl": 0.0004508292768150568, "learning_rate": 6.156462585034013e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 543 }, { "completion_length": 141.07144165039062, "epoch": 0.03453968253968254, "grad_norm": 0.0004527030687313527, "kl": 0.000611641095019877, "learning_rate": 6.167800453514739e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 544 }, { "completion_length": 165.07144165039062, "epoch": 0.0346031746031746, "grad_norm": 0.0002905838773585856, "kl": 0.0004018982290290296, "learning_rate": 6.179138321995464e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 545 }, { "completion_length": 161.21429443359375, "epoch": 0.034666666666666665, "grad_norm": 0.0004729283682536334, "kl": 0.0005024958518333733, "learning_rate": 6.19047619047619e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 546 }, { "completion_length": 157.2857208251953, "epoch": 0.03473015873015873, "grad_norm": 0.00026235292898491025, "kl": 0.00038897694321349263, "learning_rate": 6.201814058956915e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 547 }, { "completion_length": 135.35714721679688, "epoch": 0.03479365079365079, "grad_norm": 0.0004153110203333199, "kl": 0.00043457961874082685, "learning_rate": 6.213151927437642e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 548 }, { "completion_length": 143.5, "epoch": 0.03485714285714286, "grad_norm": 0.00035334352287463844, "kl": 0.000528902281075716, "learning_rate": 6.224489795918367e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 549 }, { "completion_length": 133.21429443359375, "epoch": 0.03492063492063492, "grad_norm": 0.0002869433956220746, "kl": 0.0004500088980421424, "learning_rate": 6.235827664399092e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 550 }, { "completion_length": 165.1428680419922, "epoch": 0.034984126984126986, "grad_norm": 0.00028642735560424626, "kl": 0.00046201719669625163, "learning_rate": 6.247165532879818e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 551 }, { "completion_length": 142.0, "epoch": 0.03504761904761905, "grad_norm": 0.00025315035600215197, "kl": 0.00041382809286005795, "learning_rate": 6.258503401360545e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 552 }, { "completion_length": 183.42857360839844, "epoch": 0.035111111111111114, "grad_norm": 0.0002757155743893236, "kl": 0.00048912811325863, "learning_rate": 6.26984126984127e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 553 }, { "completion_length": 160.0, "epoch": 0.03517460317460318, "grad_norm": 0.0002472441701684147, "kl": 0.0004275291576050222, "learning_rate": 6.281179138321996e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 554 }, { "completion_length": 139.71429443359375, "epoch": 0.035238095238095235, "grad_norm": 0.0003998306638095528, "kl": 0.0004638073733076453, "learning_rate": 6.292517006802721e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 555 }, { "completion_length": 192.50001525878906, "epoch": 0.0353015873015873, "grad_norm": 0.00029162364080548286, "kl": 0.00042134945397265255, "learning_rate": 6.303854875283445e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 556 }, { "completion_length": 132.57144165039062, "epoch": 0.03536507936507936, "grad_norm": 0.0003683768445625901, "kl": 0.0005267352098599076, "learning_rate": 6.315192743764172e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 557 }, { "completion_length": 197.85714721679688, "epoch": 0.03542857142857143, "grad_norm": 0.0002743699587881565, "kl": 0.00045978877460584044, "learning_rate": 6.326530612244898e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 558 }, { "completion_length": 161.0, "epoch": 0.03549206349206349, "grad_norm": 0.0003511784307193011, "kl": 0.0005106369499117136, "learning_rate": 6.337868480725623e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 559 }, { "completion_length": 155.57144165039062, "epoch": 0.035555555555555556, "grad_norm": 0.00026911337045021355, "kl": 0.0005098217516206205, "learning_rate": 6.349206349206349e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 560 }, { "completion_length": 146.71429443359375, "epoch": 0.03561904761904762, "grad_norm": 1.23078453540802, "kl": 0.00046704523265361786, "learning_rate": 6.360544217687074e-08, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 561 }, { "completion_length": 145.21429443359375, "epoch": 0.035682539682539684, "grad_norm": 0.00031602170201949775, "kl": 0.00042398725054226816, "learning_rate": 6.3718820861678e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 562 }, { "completion_length": 183.21429443359375, "epoch": 0.03574603174603175, "grad_norm": 0.0003021064621862024, "kl": 0.00047302464372478426, "learning_rate": 6.383219954648527e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 563 }, { "completion_length": 144.7857208251953, "epoch": 0.03580952380952381, "grad_norm": 0.00035884976387023926, "kl": 0.0005107144243083894, "learning_rate": 6.394557823129251e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 564 }, { "completion_length": 137.42857360839844, "epoch": 0.035873015873015876, "grad_norm": 0.00032990603358484805, "kl": 0.0004942728555761278, "learning_rate": 6.405895691609978e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 565 }, { "completion_length": 176.71429443359375, "epoch": 0.03593650793650793, "grad_norm": 0.0004881109343841672, "kl": 0.0004992592730559409, "learning_rate": 6.417233560090702e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 566 }, { "completion_length": 156.7857208251953, "epoch": 0.036, "grad_norm": 0.0004207684251014143, "kl": 0.00046410749200731516, "learning_rate": 6.428571428571428e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 567 }, { "completion_length": 154.42857360839844, "epoch": 0.03606349206349206, "grad_norm": 0.0003295918577350676, "kl": 0.0005114417872391641, "learning_rate": 6.439909297052155e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 568 }, { "completion_length": 191.85714721679688, "epoch": 0.036126984126984125, "grad_norm": 0.0002526530297473073, "kl": 0.00040937861194834113, "learning_rate": 6.451247165532879e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 569 }, { "completion_length": 150.92857360839844, "epoch": 0.03619047619047619, "grad_norm": 0.00029020902002230287, "kl": 0.00047633107169531286, "learning_rate": 6.462585034013606e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 570 }, { "completion_length": 158.7857208251953, "epoch": 0.036253968253968254, "grad_norm": 0.0002608360955491662, "kl": 0.00046036706771701574, "learning_rate": 6.47392290249433e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 571 }, { "completion_length": 149.85714721679688, "epoch": 0.03631746031746032, "grad_norm": 0.00037975245504640043, "kl": 0.0005556148244068027, "learning_rate": 6.485260770975057e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 572 }, { "completion_length": 171.7857208251953, "epoch": 0.03638095238095238, "grad_norm": 0.00038842999492771924, "kl": 0.0005141167202964425, "learning_rate": 6.496598639455782e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 573 }, { "completion_length": 152.92857360839844, "epoch": 0.036444444444444446, "grad_norm": 0.0002965793537441641, "kl": 0.0005101238493807614, "learning_rate": 6.507936507936508e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 574 }, { "completion_length": 191.2857208251953, "epoch": 0.03650793650793651, "grad_norm": 0.00023935711942613125, "kl": 0.0004209555627312511, "learning_rate": 6.519274376417233e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 575 }, { "completion_length": 158.7857208251953, "epoch": 0.036571428571428574, "grad_norm": 0.00034588095149956644, "kl": 0.0005517664831131697, "learning_rate": 6.530612244897959e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 576 }, { "completion_length": 172.2857208251953, "epoch": 0.03663492063492064, "grad_norm": 0.000246095733018592, "kl": 0.00033330402220599353, "learning_rate": 6.541950113378684e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 577 }, { "completion_length": 169.57144165039062, "epoch": 0.036698412698412695, "grad_norm": 0.00038016666076146066, "kl": 0.0005035903886891901, "learning_rate": 6.55328798185941e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 578 }, { "completion_length": 152.92857360839844, "epoch": 0.03676190476190476, "grad_norm": 0.00036908232141286135, "kl": 0.0004790840612258762, "learning_rate": 6.564625850340136e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 579 }, { "completion_length": 141.07144165039062, "epoch": 0.036825396825396824, "grad_norm": 0.0003569030959624797, "kl": 0.00048594112740829587, "learning_rate": 6.575963718820861e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 580 }, { "completion_length": 167.07144165039062, "epoch": 0.03688888888888889, "grad_norm": 0.0003425359900575131, "kl": 0.00048091867938637733, "learning_rate": 6.587301587301587e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 581 }, { "completion_length": 152.5, "epoch": 0.03695238095238095, "grad_norm": 0.0002986962499562651, "kl": 0.00044590802281163633, "learning_rate": 6.598639455782312e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 582 }, { "completion_length": 177.21429443359375, "epoch": 0.037015873015873016, "grad_norm": 0.00032837496837601066, "kl": 0.0004888351541012526, "learning_rate": 6.609977324263039e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 583 }, { "completion_length": 149.21429443359375, "epoch": 0.03707936507936508, "grad_norm": 0.00030869728652760386, "kl": 0.0004813919367734343, "learning_rate": 6.621315192743763e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 584 }, { "completion_length": 168.2857208251953, "epoch": 0.037142857142857144, "grad_norm": 0.0003559202596079558, "kl": 0.00048009806778281927, "learning_rate": 6.63265306122449e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 585 }, { "completion_length": 143.57144165039062, "epoch": 0.03720634920634921, "grad_norm": 0.00037761477869935334, "kl": 0.0005029271706007421, "learning_rate": 6.643990929705214e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 586 }, { "completion_length": 183.71429443359375, "epoch": 0.03726984126984127, "grad_norm": 0.00026597341638989747, "kl": 0.0003819619014393538, "learning_rate": 6.655328798185941e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 587 }, { "completion_length": 133.57144165039062, "epoch": 0.037333333333333336, "grad_norm": 0.0003987883683294058, "kl": 0.0005971071659587324, "learning_rate": 6.666666666666667e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 588 }, { "completion_length": 176.92857360839844, "epoch": 0.037396825396825394, "grad_norm": 0.0004143371188547462, "kl": 0.0005546574248000979, "learning_rate": 6.678004535147392e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 589 }, { "completion_length": 131.07144165039062, "epoch": 0.03746031746031746, "grad_norm": 0.00040387161425314844, "kl": 0.0005467676091939211, "learning_rate": 6.689342403628118e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 590 }, { "completion_length": 168.42857360839844, "epoch": 0.03752380952380952, "grad_norm": 0.00027295202016830444, "kl": 0.00042918953113257885, "learning_rate": 6.700680272108842e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 591 }, { "completion_length": 207.57144165039062, "epoch": 0.037587301587301586, "grad_norm": 0.00026789199910126626, "kl": 0.00041323521872982383, "learning_rate": 6.712018140589569e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 592 }, { "completion_length": 159.5, "epoch": 0.03765079365079365, "grad_norm": 0.00036314592580311, "kl": 0.00048814702313393354, "learning_rate": 6.723356009070295e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 593 }, { "completion_length": 180.57144165039062, "epoch": 0.037714285714285714, "grad_norm": 0.00045527549809776247, "kl": 0.000617222860455513, "learning_rate": 6.73469387755102e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 594 }, { "completion_length": 175.00001525878906, "epoch": 0.03777777777777778, "grad_norm": 0.0003274494665674865, "kl": 0.0005309070693328977, "learning_rate": 6.746031746031746e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 595 }, { "completion_length": 166.57144165039062, "epoch": 0.03784126984126984, "grad_norm": 0.000520036555826664, "kl": 0.0005100475973449647, "learning_rate": 6.757369614512471e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 596 }, { "completion_length": 163.5, "epoch": 0.037904761904761906, "grad_norm": 0.00035718720755539834, "kl": 0.0004678899422287941, "learning_rate": 6.768707482993197e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 597 }, { "completion_length": 147.6428680419922, "epoch": 0.03796825396825397, "grad_norm": 0.00038903323002159595, "kl": 0.0004896068712696433, "learning_rate": 6.780045351473924e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 598 }, { "completion_length": 193.85714721679688, "epoch": 0.038031746031746035, "grad_norm": 0.00032590667251497507, "kl": 0.000458583643194288, "learning_rate": 6.791383219954648e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 599 }, { "completion_length": 169.92857360839844, "epoch": 0.0380952380952381, "grad_norm": 0.0002776223118416965, "kl": 0.0004698606499005109, "learning_rate": 6.802721088435375e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 600 }, { "completion_length": 177.7857208251953, "epoch": 0.038158730158730156, "grad_norm": 0.0002740903291851282, "kl": 0.00044823600910604, "learning_rate": 6.814058956916099e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 601 }, { "completion_length": 164.5, "epoch": 0.03822222222222222, "grad_norm": 0.000235788116697222, "kl": 0.00042158528231084347, "learning_rate": 6.825396825396824e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 602 }, { "completion_length": 177.1428680419922, "epoch": 0.038285714285714284, "grad_norm": 0.0002966788597404957, "kl": 0.00048654060810804367, "learning_rate": 6.836734693877551e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 603 }, { "completion_length": 163.6428680419922, "epoch": 0.03834920634920635, "grad_norm": 0.00038574106292799115, "kl": 0.0004940618528053164, "learning_rate": 6.848072562358276e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 604 }, { "completion_length": 164.0, "epoch": 0.03841269841269841, "grad_norm": 0.0004590371681842953, "kl": 0.0005455230711959302, "learning_rate": 6.859410430839002e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 605 }, { "completion_length": 196.71429443359375, "epoch": 0.038476190476190476, "grad_norm": 0.00021380334510467947, "kl": 0.00041777529986575246, "learning_rate": 6.870748299319727e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 606 }, { "completion_length": 157.6428680419922, "epoch": 0.03853968253968254, "grad_norm": 0.0003389402700122446, "kl": 0.0004882724897470325, "learning_rate": 6.882086167800453e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 607 }, { "completion_length": 169.07144165039062, "epoch": 0.038603174603174605, "grad_norm": 0.00031894168932922184, "kl": 0.0004396302974782884, "learning_rate": 6.893424036281179e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 608 }, { "completion_length": 141.0, "epoch": 0.03866666666666667, "grad_norm": 0.000328832829836756, "kl": 0.000553119636606425, "learning_rate": 6.904761904761905e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 609 }, { "completion_length": 145.1428680419922, "epoch": 0.03873015873015873, "grad_norm": 0.0004083213862031698, "kl": 0.0005531333154067397, "learning_rate": 6.91609977324263e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 610 }, { "completion_length": 149.71429443359375, "epoch": 0.0387936507936508, "grad_norm": 0.00038492996827699244, "kl": 0.000518868095241487, "learning_rate": 6.927437641723356e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 611 }, { "completion_length": 207.35714721679688, "epoch": 0.038857142857142854, "grad_norm": 0.0004162195837125182, "kl": 0.0006915413541719317, "learning_rate": 6.938775510204081e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 612 }, { "completion_length": 146.5, "epoch": 0.03892063492063492, "grad_norm": 0.0004708743654191494, "kl": 0.0006289957091212273, "learning_rate": 6.950113378684808e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 613 }, { "completion_length": 182.2857208251953, "epoch": 0.03898412698412698, "grad_norm": 0.0002714608854148537, "kl": 0.00045665301149711013, "learning_rate": 6.961451247165532e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 614 }, { "completion_length": 126.71429443359375, "epoch": 0.039047619047619046, "grad_norm": 0.00030525567126460373, "kl": 0.0004240290727466345, "learning_rate": 6.972789115646258e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 615 }, { "completion_length": 154.85714721679688, "epoch": 0.03911111111111111, "grad_norm": 0.0003042029857169837, "kl": 0.00042240103357471526, "learning_rate": 6.984126984126983e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 616 }, { "completion_length": 157.6428680419922, "epoch": 0.039174603174603174, "grad_norm": 0.00036586340866051614, "kl": 0.0005248539964668453, "learning_rate": 6.995464852607709e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 617 }, { "completion_length": 153.71429443359375, "epoch": 0.03923809523809524, "grad_norm": 0.00036087181069888175, "kl": 0.0005038831732235849, "learning_rate": 7.006802721088436e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 618 }, { "completion_length": 161.2857208251953, "epoch": 0.0393015873015873, "grad_norm": 0.00026239079306833446, "kl": 0.0004378134908620268, "learning_rate": 7.01814058956916e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 619 }, { "completion_length": 203.00001525878906, "epoch": 0.03936507936507937, "grad_norm": 0.00041513858013786376, "kl": 0.0005598083371296525, "learning_rate": 7.029478458049887e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 620 }, { "completion_length": 177.57144165039062, "epoch": 0.03942857142857143, "grad_norm": 0.0002867025905288756, "kl": 0.0005382279632613063, "learning_rate": 7.040816326530611e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 621 }, { "completion_length": 169.57144165039062, "epoch": 0.039492063492063495, "grad_norm": 0.0002432217006571591, "kl": 0.0004046893445774913, "learning_rate": 7.052154195011338e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 622 }, { "completion_length": 151.42857360839844, "epoch": 0.03955555555555555, "grad_norm": 0.00037866615457460284, "kl": 0.0005510973278433084, "learning_rate": 7.063492063492064e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 623 }, { "completion_length": 126.00000762939453, "epoch": 0.039619047619047616, "grad_norm": 0.0003132856509182602, "kl": 0.0004969660076312721, "learning_rate": 7.074829931972789e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 624 }, { "completion_length": 156.71429443359375, "epoch": 0.03968253968253968, "grad_norm": 0.00037090963451191783, "kl": 0.00046228975406847894, "learning_rate": 7.086167800453515e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 625 }, { "completion_length": 158.6428680419922, "epoch": 0.039746031746031744, "grad_norm": 0.0003077488509006798, "kl": 0.0005176070844754577, "learning_rate": 7.097505668934239e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 626 }, { "completion_length": 140.0, "epoch": 0.03980952380952381, "grad_norm": 0.0003524177009239793, "kl": 0.00046753938659094274, "learning_rate": 7.108843537414966e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 627 }, { "completion_length": 180.42857360839844, "epoch": 0.03987301587301587, "grad_norm": 0.00023540720576420426, "kl": 0.0004827974480576813, "learning_rate": 7.120181405895691e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 628 }, { "completion_length": 144.42857360839844, "epoch": 0.03993650793650794, "grad_norm": 0.0002787128323689103, "kl": 0.0004471143474802375, "learning_rate": 7.131519274376417e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 629 }, { "completion_length": 155.2857208251953, "epoch": 0.04, "grad_norm": 0.0004762799944728613, "kl": 0.000495628803037107, "learning_rate": 7.142857142857142e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 630 }, { "completion_length": 147.57144165039062, "epoch": 0.040063492063492065, "grad_norm": 0.00031865714117884636, "kl": 0.0005172042874619365, "learning_rate": 7.154195011337868e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 631 }, { "completion_length": 201.71429443359375, "epoch": 0.04012698412698413, "grad_norm": 0.0003249246219638735, "kl": 0.0005608602659776807, "learning_rate": 7.165532879818593e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 632 }, { "completion_length": 175.7857208251953, "epoch": 0.04019047619047619, "grad_norm": 0.00029213950620032847, "kl": 0.00045647649676539004, "learning_rate": 7.17687074829932e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 633 }, { "completion_length": 162.07144165039062, "epoch": 0.04025396825396826, "grad_norm": 0.0003747105074580759, "kl": 0.000562866625841707, "learning_rate": 7.188208616780045e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 634 }, { "completion_length": 212.1428680419922, "epoch": 0.040317460317460314, "grad_norm": 0.0002247294323751703, "kl": 0.0004289858916308731, "learning_rate": 7.199546485260771e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 635 }, { "completion_length": 140.71429443359375, "epoch": 0.04038095238095238, "grad_norm": 0.00036778682260774076, "kl": 0.0005580056458711624, "learning_rate": 7.210884353741496e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 636 }, { "completion_length": 199.6428680419922, "epoch": 0.04044444444444444, "grad_norm": 0.0003271405876148492, "kl": 0.000556654529646039, "learning_rate": 7.222222222222221e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 637 }, { "completion_length": 231.50001525878906, "epoch": 0.04050793650793651, "grad_norm": 0.0002861293905880302, "kl": 0.0004034190787933767, "learning_rate": 7.233560090702948e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 638 }, { "completion_length": 171.00001525878906, "epoch": 0.04057142857142857, "grad_norm": 0.0002855015336535871, "kl": 0.00041054419125430286, "learning_rate": 7.244897959183672e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 639 }, { "completion_length": 156.07144165039062, "epoch": 0.040634920634920635, "grad_norm": 0.00034280779073014855, "kl": 0.00042461545672267675, "learning_rate": 7.256235827664399e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 640 }, { "completion_length": 146.57144165039062, "epoch": 0.0406984126984127, "grad_norm": 0.0003545335785020143, "kl": 0.0004902083892375231, "learning_rate": 7.267573696145123e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 641 }, { "completion_length": 177.35714721679688, "epoch": 0.04076190476190476, "grad_norm": 0.00026789342518895864, "kl": 0.00042527428013272583, "learning_rate": 7.27891156462585e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 642 }, { "completion_length": 168.0, "epoch": 0.04082539682539683, "grad_norm": 0.0003248519788030535, "kl": 0.00047811202239245176, "learning_rate": 7.290249433106576e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 643 }, { "completion_length": 143.42857360839844, "epoch": 0.04088888888888889, "grad_norm": 0.00035879036295227706, "kl": 0.00045567392953671515, "learning_rate": 7.301587301587301e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 644 }, { "completion_length": 176.57144165039062, "epoch": 0.040952380952380955, "grad_norm": 0.0003011752269230783, "kl": 0.0003674391773529351, "learning_rate": 7.312925170068027e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 645 }, { "completion_length": 172.2857208251953, "epoch": 0.04101587301587301, "grad_norm": 0.00030507319024764, "kl": 0.0005256789736449718, "learning_rate": 7.324263038548752e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 646 }, { "completion_length": 147.6428680419922, "epoch": 0.04107936507936508, "grad_norm": 0.00035440479405224323, "kl": 0.000436708825873211, "learning_rate": 7.335600907029478e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 647 }, { "completion_length": 195.42857360839844, "epoch": 0.04114285714285714, "grad_norm": 0.0002522802969906479, "kl": 0.000448363134637475, "learning_rate": 7.346938775510205e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 648 }, { "completion_length": 156.21429443359375, "epoch": 0.041206349206349205, "grad_norm": 0.000400492106564343, "kl": 0.0006000270368531346, "learning_rate": 7.358276643990929e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 649 }, { "completion_length": 173.2857208251953, "epoch": 0.04126984126984127, "grad_norm": 0.00029164322768338025, "kl": 0.0004454687295947224, "learning_rate": 7.369614512471655e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 650 }, { "completion_length": 138.5, "epoch": 0.04133333333333333, "grad_norm": 0.00029017217457294464, "kl": 0.0005121901049278677, "learning_rate": 7.38095238095238e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 651 }, { "completion_length": 160.35714721679688, "epoch": 0.0413968253968254, "grad_norm": 0.00035126699367538095, "kl": 0.0005957342218607664, "learning_rate": 7.392290249433106e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 652 }, { "completion_length": 149.21429443359375, "epoch": 0.04146031746031746, "grad_norm": 0.00039977292180992663, "kl": 0.00033148829243145883, "learning_rate": 7.403628117913833e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 653 }, { "completion_length": 134.92857360839844, "epoch": 0.041523809523809525, "grad_norm": 0.00033516762778162956, "kl": 0.0005043300334364176, "learning_rate": 7.414965986394557e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 654 }, { "completion_length": 187.85714721679688, "epoch": 0.04158730158730159, "grad_norm": 0.0002400178782409057, "kl": 0.00041780361789278686, "learning_rate": 7.426303854875284e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 655 }, { "completion_length": 108.78572082519531, "epoch": 0.041650793650793654, "grad_norm": 0.0004370259994175285, "kl": 0.0005120371351949871, "learning_rate": 7.437641723356008e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 656 }, { "completion_length": 163.07144165039062, "epoch": 0.04171428571428572, "grad_norm": 0.00040605501271784306, "kl": 0.0005290511762723327, "learning_rate": 7.448979591836735e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 657 }, { "completion_length": 167.71429443359375, "epoch": 0.041777777777777775, "grad_norm": 0.8616147041320801, "kl": 0.00045801966916769743, "learning_rate": 7.46031746031746e-08, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 658 }, { "completion_length": 168.2857208251953, "epoch": 0.04184126984126984, "grad_norm": 0.0004722838057205081, "kl": 0.0005261739133857191, "learning_rate": 7.471655328798186e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 659 }, { "completion_length": 178.42857360839844, "epoch": 0.0419047619047619, "grad_norm": 0.0002889558963943273, "kl": 0.00044856368913315237, "learning_rate": 7.482993197278911e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 660 }, { "completion_length": 179.7857208251953, "epoch": 0.04196825396825397, "grad_norm": 0.00025220532552339137, "kl": 0.00038688204949721694, "learning_rate": 7.494331065759636e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 661 }, { "completion_length": 193.2857208251953, "epoch": 0.04203174603174603, "grad_norm": 0.00023451198649127036, "kl": 0.0004058807098772377, "learning_rate": 7.505668934240362e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 662 }, { "completion_length": 226.57144165039062, "epoch": 0.042095238095238095, "grad_norm": 0.00023967886227183044, "kl": 0.0002794440952129662, "learning_rate": 7.517006802721088e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 663 }, { "completion_length": 177.57144165039062, "epoch": 0.04215873015873016, "grad_norm": 0.0006730767199769616, "kl": 0.0004702845180872828, "learning_rate": 7.528344671201814e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 664 }, { "completion_length": 154.57144165039062, "epoch": 0.042222222222222223, "grad_norm": 0.0004459645424503833, "kl": 0.000537359097506851, "learning_rate": 7.539682539682539e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 665 }, { "completion_length": 192.57144165039062, "epoch": 0.04228571428571429, "grad_norm": 0.00024173094425350428, "kl": 0.0004800860187970102, "learning_rate": 7.551020408163266e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 666 }, { "completion_length": 178.57144165039062, "epoch": 0.04234920634920635, "grad_norm": 0.0003898288414347917, "kl": 0.0005095093511044979, "learning_rate": 7.56235827664399e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 667 }, { "completion_length": 137.92857360839844, "epoch": 0.042412698412698416, "grad_norm": 0.00040088724927045405, "kl": 0.000517215405125171, "learning_rate": 7.573696145124717e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 668 }, { "completion_length": 138.2857208251953, "epoch": 0.04247619047619047, "grad_norm": 0.00040648854337632656, "kl": 0.00048381081433035433, "learning_rate": 7.585034013605441e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 669 }, { "completion_length": 139.5, "epoch": 0.04253968253968254, "grad_norm": 0.0003363695286680013, "kl": 0.000530991586856544, "learning_rate": 7.596371882086168e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 670 }, { "completion_length": 197.57144165039062, "epoch": 0.0426031746031746, "grad_norm": 0.0002610397059470415, "kl": 0.00044165865983814, "learning_rate": 7.607709750566894e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 671 }, { "completion_length": 161.0, "epoch": 0.042666666666666665, "grad_norm": 0.00026042485842481256, "kl": 0.0004376231227070093, "learning_rate": 7.619047619047619e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 672 }, { "completion_length": 190.2857208251953, "epoch": 0.04273015873015873, "grad_norm": 0.0002631857933010906, "kl": 0.00044948747381567955, "learning_rate": 7.630385487528345e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 673 }, { "completion_length": 146.85714721679688, "epoch": 0.04279365079365079, "grad_norm": 0.0003269410808570683, "kl": 0.0005166586488485336, "learning_rate": 7.641723356009069e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 674 }, { "completion_length": 176.7857208251953, "epoch": 0.04285714285714286, "grad_norm": 0.00030047461041249335, "kl": 0.00047095262561924756, "learning_rate": 7.653061224489796e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 675 }, { "completion_length": 185.92857360839844, "epoch": 0.04292063492063492, "grad_norm": 0.00026606785831972957, "kl": 0.00046954560093581676, "learning_rate": 7.664399092970521e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 676 }, { "completion_length": 145.71429443359375, "epoch": 0.042984126984126986, "grad_norm": 0.00027970605879090726, "kl": 0.0003575127338990569, "learning_rate": 7.675736961451247e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 677 }, { "completion_length": 186.00001525878906, "epoch": 0.04304761904761905, "grad_norm": 0.0005210867384448647, "kl": 0.000495014654006809, "learning_rate": 7.687074829931972e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 678 }, { "completion_length": 156.85714721679688, "epoch": 0.043111111111111114, "grad_norm": 0.00028043982456438243, "kl": 0.00046297957305796444, "learning_rate": 7.698412698412698e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 679 }, { "completion_length": 149.1428680419922, "epoch": 0.04317460317460318, "grad_norm": 0.0002892528136726469, "kl": 0.0004604935529641807, "learning_rate": 7.709750566893424e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 680 }, { "completion_length": 196.07144165039062, "epoch": 0.043238095238095235, "grad_norm": 0.00025284013827331364, "kl": 0.0004818530287593603, "learning_rate": 7.72108843537415e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 681 }, { "completion_length": 164.1428680419922, "epoch": 0.0433015873015873, "grad_norm": 0.00032297609141096473, "kl": 0.0004401099868118763, "learning_rate": 7.732426303854875e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 682 }, { "completion_length": 142.71429443359375, "epoch": 0.04336507936507936, "grad_norm": 0.0004644125874619931, "kl": 0.00046143028885126114, "learning_rate": 7.743764172335602e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 683 }, { "completion_length": 154.0, "epoch": 0.04342857142857143, "grad_norm": 0.00043387318146415055, "kl": 0.00042038585525006056, "learning_rate": 7.755102040816326e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 684 }, { "completion_length": 180.1428680419922, "epoch": 0.04349206349206349, "grad_norm": 0.0003100679605267942, "kl": 0.00044998122029937804, "learning_rate": 7.766439909297051e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 685 }, { "completion_length": 152.7857208251953, "epoch": 0.043555555555555556, "grad_norm": 0.00041085880366154015, "kl": 0.0004741332959383726, "learning_rate": 7.777777777777778e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 686 }, { "completion_length": 154.7857208251953, "epoch": 0.04361904761904762, "grad_norm": 0.00041850030538626015, "kl": 0.00048503620200790465, "learning_rate": 7.789115646258502e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 687 }, { "completion_length": 160.1428680419922, "epoch": 0.043682539682539684, "grad_norm": 0.00029996162629686296, "kl": 0.0004213780630379915, "learning_rate": 7.800453514739229e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 688 }, { "completion_length": 121.5714340209961, "epoch": 0.04374603174603175, "grad_norm": 1.3787420988082886, "kl": 0.0005515703815035522, "learning_rate": 7.811791383219953e-08, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 689 }, { "completion_length": 160.35714721679688, "epoch": 0.04380952380952381, "grad_norm": 0.00031891136313788593, "kl": 0.00045403646072372794, "learning_rate": 7.82312925170068e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 690 }, { "completion_length": 181.71429443359375, "epoch": 0.043873015873015876, "grad_norm": 0.00036737057962454855, "kl": 0.00048478602548129857, "learning_rate": 7.834467120181406e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 691 }, { "completion_length": 179.7857208251953, "epoch": 0.04393650793650793, "grad_norm": 0.0002430388703942299, "kl": 0.0003470110532362014, "learning_rate": 7.845804988662131e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 692 }, { "completion_length": 181.7857208251953, "epoch": 0.044, "grad_norm": 0.00026364490622654557, "kl": 0.0004983079852536321, "learning_rate": 7.857142857142857e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 693 }, { "completion_length": 151.42857360839844, "epoch": 0.04406349206349206, "grad_norm": 0.000366314867278561, "kl": 0.0005664968048222363, "learning_rate": 7.868480725623583e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 694 }, { "completion_length": 178.2857208251953, "epoch": 0.044126984126984126, "grad_norm": 0.0002916346420533955, "kl": 0.0004057307669427246, "learning_rate": 7.879818594104308e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 695 }, { "completion_length": 171.50001525878906, "epoch": 0.04419047619047619, "grad_norm": 0.00022945585078559816, "kl": 0.00042500291601754725, "learning_rate": 7.891156462585034e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 696 }, { "completion_length": 164.85714721679688, "epoch": 0.044253968253968254, "grad_norm": 0.0003732450713869184, "kl": 0.00045894429786130786, "learning_rate": 7.902494331065759e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 697 }, { "completion_length": 186.35714721679688, "epoch": 0.04431746031746032, "grad_norm": 0.0003020348376594484, "kl": 0.0005486385780386627, "learning_rate": 7.913832199546485e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 698 }, { "completion_length": 147.2857208251953, "epoch": 0.04438095238095238, "grad_norm": 0.0006308395531959832, "kl": 0.0005832784227095544, "learning_rate": 7.92517006802721e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 699 }, { "completion_length": 148.1428680419922, "epoch": 0.044444444444444446, "grad_norm": 0.00038190840859897435, "kl": 0.0004582336696330458, "learning_rate": 7.936507936507936e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 700 }, { "completion_length": 166.1428680419922, "epoch": 0.04450793650793651, "grad_norm": 0.00025944257504306734, "kl": 0.00043859341531060636, "learning_rate": 7.947845804988663e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 701 }, { "completion_length": 220.71429443359375, "epoch": 0.044571428571428574, "grad_norm": 0.00032743634073995054, "kl": 0.0004660494450945407, "learning_rate": 7.959183673469387e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 702 }, { "completion_length": 159.1428680419922, "epoch": 0.04463492063492063, "grad_norm": 0.00024761827080510557, "kl": 0.00042741402285173535, "learning_rate": 7.970521541950114e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 703 }, { "completion_length": 185.07144165039062, "epoch": 0.044698412698412696, "grad_norm": 0.0002826945565175265, "kl": 0.0005163219757378101, "learning_rate": 7.981859410430838e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 704 }, { "completion_length": 165.42857360839844, "epoch": 0.04476190476190476, "grad_norm": 0.0002991966321133077, "kl": 0.000502759765367955, "learning_rate": 7.993197278911565e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 705 }, { "completion_length": 162.6428680419922, "epoch": 0.044825396825396824, "grad_norm": 0.00043323778663761914, "kl": 0.00047441350761801004, "learning_rate": 8.00453514739229e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 706 }, { "completion_length": 166.6428680419922, "epoch": 0.04488888888888889, "grad_norm": 0.0004001030174549669, "kl": 0.000560066313482821, "learning_rate": 8.015873015873016e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 707 }, { "completion_length": 154.85714721679688, "epoch": 0.04495238095238095, "grad_norm": 0.0004411877307575196, "kl": 0.0004675445088651031, "learning_rate": 8.027210884353741e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 708 }, { "completion_length": 126.71429443359375, "epoch": 0.045015873015873016, "grad_norm": 0.0005606704507954419, "kl": 0.0005614072433672845, "learning_rate": 8.038548752834466e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 709 }, { "completion_length": 132.1428680419922, "epoch": 0.04507936507936508, "grad_norm": 0.0003990813565906137, "kl": 0.0005078090471215546, "learning_rate": 8.049886621315193e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 710 }, { "completion_length": 186.57144165039062, "epoch": 0.045142857142857144, "grad_norm": 0.00030749381403438747, "kl": 0.0004374005366116762, "learning_rate": 8.061224489795918e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 711 }, { "completion_length": 181.1428680419922, "epoch": 0.04520634920634921, "grad_norm": 0.0002980481367558241, "kl": 0.00043089117389172316, "learning_rate": 8.072562358276644e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 712 }, { "completion_length": 202.50001525878906, "epoch": 0.04526984126984127, "grad_norm": 0.00025726130115799606, "kl": 0.0004371191607788205, "learning_rate": 8.083900226757369e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 713 }, { "completion_length": 165.1428680419922, "epoch": 0.04533333333333334, "grad_norm": 0.00047252592048607767, "kl": 0.000510542478878051, "learning_rate": 8.095238095238095e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 714 }, { "completion_length": 177.1428680419922, "epoch": 0.045396825396825394, "grad_norm": 0.00035850636777468026, "kl": 0.0004938794299960136, "learning_rate": 8.10657596371882e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 715 }, { "completion_length": 142.2857208251953, "epoch": 0.04546031746031746, "grad_norm": 0.0004357217694632709, "kl": 0.0004214462242089212, "learning_rate": 8.117913832199547e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 716 }, { "completion_length": 139.35714721679688, "epoch": 0.04552380952380952, "grad_norm": 1.0319348573684692, "kl": 0.0004960554069839418, "learning_rate": 8.129251700680271e-08, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 717 }, { "completion_length": 192.07144165039062, "epoch": 0.045587301587301586, "grad_norm": 0.0003590281121432781, "kl": 0.00043887514038942754, "learning_rate": 8.140589569160998e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 718 }, { "completion_length": 180.50001525878906, "epoch": 0.04565079365079365, "grad_norm": 0.00030452487408183515, "kl": 0.0004992868052795529, "learning_rate": 8.151927437641722e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 719 }, { "completion_length": 182.07144165039062, "epoch": 0.045714285714285714, "grad_norm": 0.00036919835838489234, "kl": 0.0004910407587885857, "learning_rate": 8.163265306122448e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 720 }, { "completion_length": 124.64286041259766, "epoch": 0.04577777777777778, "grad_norm": 0.0005407798453234136, "kl": 0.0004948325804434717, "learning_rate": 8.174603174603175e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 721 }, { "completion_length": 198.2857208251953, "epoch": 0.04584126984126984, "grad_norm": 0.0003749140596482903, "kl": 0.0004896975005976856, "learning_rate": 8.185941043083899e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 722 }, { "completion_length": 163.35714721679688, "epoch": 0.04590476190476191, "grad_norm": 0.0003370757040102035, "kl": 0.0003705437993630767, "learning_rate": 8.197278911564626e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 723 }, { "completion_length": 142.57144165039062, "epoch": 0.04596825396825397, "grad_norm": 0.0006015241960994899, "kl": 0.0006672184099443257, "learning_rate": 8.20861678004535e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 724 }, { "completion_length": 143.6428680419922, "epoch": 0.046031746031746035, "grad_norm": 0.0003449259384069592, "kl": 0.0005565652973018587, "learning_rate": 8.219954648526077e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 725 }, { "completion_length": 173.6428680419922, "epoch": 0.04609523809523809, "grad_norm": 0.0004610929754562676, "kl": 0.0005771591095253825, "learning_rate": 8.231292517006803e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 726 }, { "completion_length": 177.42857360839844, "epoch": 0.046158730158730156, "grad_norm": 0.0003897745627909899, "kl": 0.0003925015917047858, "learning_rate": 8.242630385487528e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 727 }, { "completion_length": 168.21429443359375, "epoch": 0.04622222222222222, "grad_norm": 0.00041753705590963364, "kl": 0.00043591720168478787, "learning_rate": 8.253968253968254e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 728 }, { "completion_length": 140.92857360839844, "epoch": 0.046285714285714284, "grad_norm": 0.0004752151435241103, "kl": 0.00046717430814169347, "learning_rate": 8.265306122448979e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 729 }, { "completion_length": 172.92857360839844, "epoch": 0.04634920634920635, "grad_norm": 0.0006023257737979293, "kl": 0.0005562627920880914, "learning_rate": 8.276643990929705e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 730 }, { "completion_length": 173.35714721679688, "epoch": 0.04641269841269841, "grad_norm": 0.0004722251615021378, "kl": 0.0005794874741695821, "learning_rate": 8.287981859410432e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 731 }, { "completion_length": 144.71429443359375, "epoch": 0.046476190476190476, "grad_norm": 0.0005368462880142033, "kl": 0.000598371319938451, "learning_rate": 8.299319727891156e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 732 }, { "completion_length": 133.1428680419922, "epoch": 0.04653968253968254, "grad_norm": 0.0005964472657069564, "kl": 0.0006097882869653404, "learning_rate": 8.310657596371881e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 733 }, { "completion_length": 154.85714721679688, "epoch": 0.046603174603174605, "grad_norm": 0.0003602864744607359, "kl": 0.000510333979036659, "learning_rate": 8.321995464852607e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 734 }, { "completion_length": 134.6428680419922, "epoch": 0.04666666666666667, "grad_norm": 0.0005934684304520488, "kl": 0.0005680934409610927, "learning_rate": 8.333333333333333e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 735 }, { "completion_length": 130.21429443359375, "epoch": 0.04673015873015873, "grad_norm": 0.0005180926527827978, "kl": 0.0005863915430381894, "learning_rate": 8.34467120181406e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 736 }, { "completion_length": 184.1428680419922, "epoch": 0.0467936507936508, "grad_norm": 0.00030335565679706633, "kl": 0.00043843252933584154, "learning_rate": 8.356009070294784e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 737 }, { "completion_length": 151.0, "epoch": 0.046857142857142854, "grad_norm": 1.2118016481399536, "kl": 0.0006334042409434915, "learning_rate": 8.36734693877551e-08, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 738 }, { "completion_length": 176.6428680419922, "epoch": 0.04692063492063492, "grad_norm": 0.00045602460158988833, "kl": 0.00042947311885654926, "learning_rate": 8.378684807256235e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 739 }, { "completion_length": 184.71429443359375, "epoch": 0.04698412698412698, "grad_norm": 0.00027758823125623167, "kl": 0.00042899357504211366, "learning_rate": 8.390022675736962e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 740 }, { "completion_length": 178.21429443359375, "epoch": 0.047047619047619046, "grad_norm": 0.0004094679025001824, "kl": 0.0004948026617057621, "learning_rate": 8.401360544217687e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 741 }, { "completion_length": 166.5, "epoch": 0.04711111111111111, "grad_norm": 0.0005281185731291771, "kl": 0.0005550537025555968, "learning_rate": 8.412698412698413e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 742 }, { "completion_length": 188.21429443359375, "epoch": 0.047174603174603175, "grad_norm": 0.0004611105832736939, "kl": 0.00045282545033842325, "learning_rate": 8.424036281179138e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 743 }, { "completion_length": 151.71429443359375, "epoch": 0.04723809523809524, "grad_norm": 0.00046244231634773314, "kl": 0.00045852898620069027, "learning_rate": 8.435374149659862e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 744 }, { "completion_length": 183.7857208251953, "epoch": 0.0473015873015873, "grad_norm": 0.7728205919265747, "kl": 0.0004942284431308508, "learning_rate": 8.446712018140589e-08, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 745 }, { "completion_length": 143.5, "epoch": 0.04736507936507937, "grad_norm": 0.0004496110195759684, "kl": 0.0004474685119930655, "learning_rate": 8.458049886621315e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 746 }, { "completion_length": 166.6428680419922, "epoch": 0.04742857142857143, "grad_norm": 0.0005085499724373221, "kl": 0.0005264314822852612, "learning_rate": 8.46938775510204e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 747 }, { "completion_length": 158.71429443359375, "epoch": 0.047492063492063495, "grad_norm": 0.0004747704660985619, "kl": 0.0004979207878932357, "learning_rate": 8.480725623582766e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 748 }, { "completion_length": 120.21429443359375, "epoch": 0.04755555555555555, "grad_norm": 0.0005783126107417047, "kl": 0.0006810923223383725, "learning_rate": 8.492063492063491e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 749 }, { "completion_length": 145.2857208251953, "epoch": 0.047619047619047616, "grad_norm": 0.0004104089748580009, "kl": 0.0005243417108431458, "learning_rate": 8.503401360544217e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 750 }, { "completion_length": 174.92857360839844, "epoch": 0.04768253968253968, "grad_norm": 0.0002471294719725847, "kl": 0.0003702333488035947, "learning_rate": 8.514739229024944e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 751 }, { "completion_length": 167.35714721679688, "epoch": 0.047746031746031745, "grad_norm": 0.0003736658545676619, "kl": 0.0004827269003726542, "learning_rate": 8.526077097505668e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 752 }, { "completion_length": 153.42857360839844, "epoch": 0.04780952380952381, "grad_norm": 0.00046593131264671683, "kl": 0.0006203118246048689, "learning_rate": 8.537414965986395e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 753 }, { "completion_length": 184.6428680419922, "epoch": 0.04787301587301587, "grad_norm": 0.0004268585180398077, "kl": 0.00045593842514790595, "learning_rate": 8.548752834467119e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 754 }, { "completion_length": 198.50001525878906, "epoch": 0.04793650793650794, "grad_norm": 0.0004168241866864264, "kl": 0.00048145194887183607, "learning_rate": 8.560090702947845e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 755 }, { "completion_length": 118.00000762939453, "epoch": 0.048, "grad_norm": 0.0008422178798355162, "kl": 0.0007476680330000818, "learning_rate": 8.571428571428572e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 756 }, { "completion_length": 171.1428680419922, "epoch": 0.048063492063492065, "grad_norm": 0.0005969658959656954, "kl": 0.00048174813855439425, "learning_rate": 8.582766439909296e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 757 }, { "completion_length": 158.71429443359375, "epoch": 0.04812698412698413, "grad_norm": 0.0003309306048322469, "kl": 0.0004270566860213876, "learning_rate": 8.594104308390023e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 758 }, { "completion_length": 210.7857208251953, "epoch": 0.04819047619047619, "grad_norm": 0.00031155828037299216, "kl": 0.00042050195042975247, "learning_rate": 8.605442176870747e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 759 }, { "completion_length": 143.35714721679688, "epoch": 0.04825396825396826, "grad_norm": 0.00031802800367586315, "kl": 0.0004168502346146852, "learning_rate": 8.616780045351474e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 760 }, { "completion_length": 182.7857208251953, "epoch": 0.048317460317460315, "grad_norm": 0.00047322065802291036, "kl": 0.00047532859025523067, "learning_rate": 8.628117913832199e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 761 }, { "completion_length": 171.6428680419922, "epoch": 0.04838095238095238, "grad_norm": 0.00028339101118035614, "kl": 0.0004091806767974049, "learning_rate": 8.639455782312925e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 762 }, { "completion_length": 129.57144165039062, "epoch": 0.04844444444444444, "grad_norm": 0.0006736554787494242, "kl": 0.0005965172313153744, "learning_rate": 8.65079365079365e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 763 }, { "completion_length": 122.5714340209961, "epoch": 0.04850793650793651, "grad_norm": 0.0005884260754100978, "kl": 0.000557421357370913, "learning_rate": 8.662131519274376e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 764 }, { "completion_length": 138.57144165039062, "epoch": 0.04857142857142857, "grad_norm": 0.00044824558426626027, "kl": 0.0005831734742969275, "learning_rate": 8.673469387755102e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 765 }, { "completion_length": 159.6428680419922, "epoch": 0.048634920634920635, "grad_norm": 0.0005008736625313759, "kl": 0.000478370493510738, "learning_rate": 8.684807256235828e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 766 }, { "completion_length": 146.2857208251953, "epoch": 0.0486984126984127, "grad_norm": 0.0006511374958790839, "kl": 0.0005591691005975008, "learning_rate": 8.696145124716553e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 767 }, { "completion_length": 171.85714721679688, "epoch": 0.04876190476190476, "grad_norm": 0.0006783216958865523, "kl": 0.0005175237311050296, "learning_rate": 8.707482993197278e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 768 }, { "completion_length": 164.92857360839844, "epoch": 0.04882539682539683, "grad_norm": 0.00041335602873004973, "kl": 0.0005755054298788309, "learning_rate": 8.718820861678004e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 769 }, { "completion_length": 207.85714721679688, "epoch": 0.04888888888888889, "grad_norm": 0.000252581259701401, "kl": 0.0004551017191261053, "learning_rate": 8.730158730158729e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 770 }, { "completion_length": 117.78572082519531, "epoch": 0.048952380952380956, "grad_norm": 0.00043209135765209794, "kl": 0.0004973990726284683, "learning_rate": 8.741496598639456e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 771 }, { "completion_length": 210.6428680419922, "epoch": 0.04901587301587301, "grad_norm": 0.0003337077796459198, "kl": 0.00045003005652688444, "learning_rate": 8.75283446712018e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 772 }, { "completion_length": 160.1428680419922, "epoch": 0.04907936507936508, "grad_norm": 0.0003882354067172855, "kl": 0.00046442344319075346, "learning_rate": 8.764172335600907e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 773 }, { "completion_length": 134.6428680419922, "epoch": 0.04914285714285714, "grad_norm": 0.0005893823690712452, "kl": 0.0004775213892571628, "learning_rate": 8.775510204081631e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 774 }, { "completion_length": 176.6428680419922, "epoch": 0.049206349206349205, "grad_norm": 0.00045992506784386933, "kl": 0.0005571196670643985, "learning_rate": 8.786848072562358e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 775 }, { "completion_length": 160.0, "epoch": 0.04926984126984127, "grad_norm": 0.000309668859699741, "kl": 0.00047275153337977827, "learning_rate": 8.798185941043084e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 776 }, { "completion_length": 174.00001525878906, "epoch": 0.04933333333333333, "grad_norm": 0.00033987488131970167, "kl": 0.0005259673926047981, "learning_rate": 8.80952380952381e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 777 }, { "completion_length": 172.7857208251953, "epoch": 0.0493968253968254, "grad_norm": 0.00048785589751787484, "kl": 0.0004651439667213708, "learning_rate": 8.820861678004535e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 778 }, { "completion_length": 147.21429443359375, "epoch": 0.04946031746031746, "grad_norm": 0.0002939036057796329, "kl": 0.0005012221517972648, "learning_rate": 8.832199546485259e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 779 }, { "completion_length": 136.35714721679688, "epoch": 0.049523809523809526, "grad_norm": 0.0006535153370350599, "kl": 0.0005146883195266128, "learning_rate": 8.843537414965986e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 780 }, { "completion_length": 236.9285888671875, "epoch": 0.04958730158730159, "grad_norm": 0.0009365629521198571, "kl": 0.0005930022452957928, "learning_rate": 8.854875283446712e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 781 }, { "completion_length": 155.2857208251953, "epoch": 0.049650793650793654, "grad_norm": 0.0005021584802307189, "kl": 0.0005213783006183803, "learning_rate": 8.866213151927437e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 782 }, { "completion_length": 159.85714721679688, "epoch": 0.04971428571428571, "grad_norm": 0.0004814634448848665, "kl": 0.0005300765624269843, "learning_rate": 8.877551020408163e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 783 }, { "completion_length": 120.28572082519531, "epoch": 0.049777777777777775, "grad_norm": 0.00047068807180039585, "kl": 0.0006066432106308639, "learning_rate": 8.888888888888888e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 784 }, { "completion_length": 146.2857208251953, "epoch": 0.04984126984126984, "grad_norm": 0.00041881308425217867, "kl": 0.0005397471832111478, "learning_rate": 8.900226757369614e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 785 }, { "completion_length": 153.6428680419922, "epoch": 0.0499047619047619, "grad_norm": 0.0005904261488467455, "kl": 0.0006331898621283472, "learning_rate": 8.91156462585034e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 786 }, { "completion_length": 161.57144165039062, "epoch": 0.04996825396825397, "grad_norm": 0.0006351718329824507, "kl": 0.0005261586629785597, "learning_rate": 8.922902494331065e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 787 }, { "completion_length": 178.71429443359375, "epoch": 0.05003174603174603, "grad_norm": 0.0003314698697067797, "kl": 0.0004941461374983191, "learning_rate": 8.934240362811792e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 788 }, { "completion_length": 171.2857208251953, "epoch": 0.050095238095238095, "grad_norm": 0.0003464310721028596, "kl": 0.0005061448900960386, "learning_rate": 8.945578231292516e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 789 }, { "completion_length": 176.7857208251953, "epoch": 0.05015873015873016, "grad_norm": 0.00039975720574148, "kl": 0.0005075091612525284, "learning_rate": 8.956916099773243e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 790 }, { "completion_length": 177.42857360839844, "epoch": 0.050222222222222224, "grad_norm": 0.0004276346880942583, "kl": 0.00046228294377215207, "learning_rate": 8.968253968253968e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 791 }, { "completion_length": 148.42857360839844, "epoch": 0.05028571428571429, "grad_norm": 0.0003655824111774564, "kl": 0.0004680465499404818, "learning_rate": 8.979591836734693e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 792 }, { "completion_length": 165.0, "epoch": 0.05034920634920635, "grad_norm": 0.0003815041563939303, "kl": 0.0005113600636832416, "learning_rate": 8.99092970521542e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 793 }, { "completion_length": 161.5, "epoch": 0.050412698412698416, "grad_norm": 0.0008145200554281473, "kl": 0.0006549566751345992, "learning_rate": 9.002267573696144e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 794 }, { "completion_length": 201.42857360839844, "epoch": 0.05047619047619047, "grad_norm": 0.0007039998308755457, "kl": 0.0005553095252253115, "learning_rate": 9.01360544217687e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 795 }, { "completion_length": 166.57144165039062, "epoch": 0.05053968253968254, "grad_norm": 0.0007012063288129866, "kl": 0.0006003215676173568, "learning_rate": 9.024943310657596e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 796 }, { "completion_length": 164.2857208251953, "epoch": 0.0506031746031746, "grad_norm": 0.0003740359388757497, "kl": 0.0005177292041480541, "learning_rate": 9.036281179138322e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 797 }, { "completion_length": 142.42857360839844, "epoch": 0.050666666666666665, "grad_norm": 0.000617173791397363, "kl": 0.0006600077031180263, "learning_rate": 9.047619047619047e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 798 }, { "completion_length": 170.42857360839844, "epoch": 0.05073015873015873, "grad_norm": 0.00043818348785862327, "kl": 0.000534733640961349, "learning_rate": 9.058956916099774e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 799 }, { "completion_length": 196.00001525878906, "epoch": 0.050793650793650794, "grad_norm": 0.00043023668695241213, "kl": 0.0005944440490566194, "learning_rate": 9.070294784580498e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 800 }, { "completion_length": 169.0, "epoch": 0.05085714285714286, "grad_norm": 0.0002682250633370131, "kl": 0.0003894192341249436, "learning_rate": 9.081632653061225e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 801 }, { "completion_length": 180.85714721679688, "epoch": 0.05092063492063492, "grad_norm": 0.0005062249256297946, "kl": 0.0005007227882742882, "learning_rate": 9.092970521541949e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 802 }, { "completion_length": 144.5, "epoch": 0.050984126984126986, "grad_norm": 0.00042913135257549584, "kl": 0.0004969800356775522, "learning_rate": 9.104308390022675e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 803 }, { "completion_length": 143.0, "epoch": 0.05104761904761905, "grad_norm": 0.0004291623190511018, "kl": 0.0005345930112525821, "learning_rate": 9.1156462585034e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 804 }, { "completion_length": 165.35714721679688, "epoch": 0.051111111111111114, "grad_norm": 0.00043280929094180465, "kl": 0.000566109549254179, "learning_rate": 9.126984126984126e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 805 }, { "completion_length": 131.5, "epoch": 0.05117460317460317, "grad_norm": 0.0005641483585350215, "kl": 0.0005717447493225336, "learning_rate": 9.138321995464853e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 806 }, { "completion_length": 141.7857208251953, "epoch": 0.051238095238095235, "grad_norm": 0.0005626431084237993, "kl": 0.0005338816554285586, "learning_rate": 9.149659863945577e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 807 }, { "completion_length": 156.71429443359375, "epoch": 0.0513015873015873, "grad_norm": 0.0005171789089217782, "kl": 0.0005038863164372742, "learning_rate": 9.160997732426304e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 808 }, { "completion_length": 192.35714721679688, "epoch": 0.051365079365079364, "grad_norm": 0.0005436483770608902, "kl": 0.0004642928543034941, "learning_rate": 9.172335600907028e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 809 }, { "completion_length": 125.00000762939453, "epoch": 0.05142857142857143, "grad_norm": 0.0005591521039605141, "kl": 0.000541854533366859, "learning_rate": 9.183673469387755e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 810 }, { "completion_length": 173.6428680419922, "epoch": 0.05149206349206349, "grad_norm": 0.0005121083231642842, "kl": 0.0005202208994887769, "learning_rate": 9.19501133786848e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 811 }, { "completion_length": 223.7857208251953, "epoch": 0.051555555555555556, "grad_norm": 0.00032300109160132706, "kl": 0.0005443364498205483, "learning_rate": 9.206349206349206e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 812 }, { "completion_length": 162.85714721679688, "epoch": 0.05161904761904762, "grad_norm": 0.0006131681147962809, "kl": 0.0005437443032860756, "learning_rate": 9.217687074829932e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 813 }, { "completion_length": 184.1428680419922, "epoch": 0.051682539682539684, "grad_norm": 0.00038957330980338156, "kl": 0.0005266477819532156, "learning_rate": 9.229024943310656e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 814 }, { "completion_length": 185.42857360839844, "epoch": 0.05174603174603175, "grad_norm": 0.000270423770416528, "kl": 0.00047622728743590415, "learning_rate": 9.240362811791383e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 815 }, { "completion_length": 182.00001525878906, "epoch": 0.05180952380952381, "grad_norm": 0.0005499226972460747, "kl": 0.0006298626540228724, "learning_rate": 9.251700680272108e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 816 }, { "completion_length": 185.7857208251953, "epoch": 0.051873015873015876, "grad_norm": 0.0004942824016325176, "kl": 0.00045717053581029177, "learning_rate": 9.263038548752834e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 817 }, { "completion_length": 165.92857360839844, "epoch": 0.051936507936507934, "grad_norm": 0.00040105232619680464, "kl": 0.0005678718443959951, "learning_rate": 9.27437641723356e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 818 }, { "completion_length": 168.07144165039062, "epoch": 0.052, "grad_norm": 0.0005192303215153515, "kl": 0.0006679146317765117, "learning_rate": 9.285714285714286e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 819 }, { "completion_length": 153.0, "epoch": 0.05206349206349206, "grad_norm": 0.00044355858699418604, "kl": 0.0005442381370812654, "learning_rate": 9.29705215419501e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 820 }, { "completion_length": 145.7857208251953, "epoch": 0.052126984126984126, "grad_norm": 0.00039975595427677035, "kl": 0.0005596948321908712, "learning_rate": 9.308390022675737e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 821 }, { "completion_length": 130.07144165039062, "epoch": 0.05219047619047619, "grad_norm": 0.000342684390489012, "kl": 0.00047120809904299676, "learning_rate": 9.319727891156462e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 822 }, { "completion_length": 152.57144165039062, "epoch": 0.052253968253968254, "grad_norm": 0.0005857303622178733, "kl": 0.0006367097375914454, "learning_rate": 9.331065759637188e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 823 }, { "completion_length": 154.42857360839844, "epoch": 0.05231746031746032, "grad_norm": 0.000519468099810183, "kl": 0.0005453192861750722, "learning_rate": 9.342403628117914e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 824 }, { "completion_length": 180.50001525878906, "epoch": 0.05238095238095238, "grad_norm": 0.00032182413269765675, "kl": 0.0004250102210789919, "learning_rate": 9.35374149659864e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 825 }, { "completion_length": 183.07144165039062, "epoch": 0.052444444444444446, "grad_norm": 0.00036183849442750216, "kl": 0.0004618424572981894, "learning_rate": 9.365079365079365e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 826 }, { "completion_length": 150.0, "epoch": 0.05250793650793651, "grad_norm": 0.0005229064263403416, "kl": 0.000461550778709352, "learning_rate": 9.376417233560089e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 827 }, { "completion_length": 153.42857360839844, "epoch": 0.052571428571428575, "grad_norm": 0.0006677102646790445, "kl": 0.0005201970925554633, "learning_rate": 9.387755102040816e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 828 }, { "completion_length": 182.85714721679688, "epoch": 0.05263492063492063, "grad_norm": 0.0003344423894304782, "kl": 0.0005044783465564251, "learning_rate": 9.399092970521542e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 829 }, { "completion_length": 157.7857208251953, "epoch": 0.052698412698412696, "grad_norm": 0.00039799613296054304, "kl": 0.00044727211934514344, "learning_rate": 9.410430839002267e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 830 }, { "completion_length": 169.07144165039062, "epoch": 0.05276190476190476, "grad_norm": 0.00042297522304579616, "kl": 0.0005113209481351078, "learning_rate": 9.421768707482993e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 831 }, { "completion_length": 145.7857208251953, "epoch": 0.052825396825396824, "grad_norm": 0.0005845141131430864, "kl": 0.0005244395579211414, "learning_rate": 9.433106575963718e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 832 }, { "completion_length": 172.21429443359375, "epoch": 0.05288888888888889, "grad_norm": 0.00047155292122624815, "kl": 0.0005150038050487638, "learning_rate": 9.444444444444444e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 833 }, { "completion_length": 139.07144165039062, "epoch": 0.05295238095238095, "grad_norm": 0.0005683982162736356, "kl": 0.0005613989196717739, "learning_rate": 9.455782312925171e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 834 }, { "completion_length": 127.78572082519531, "epoch": 0.053015873015873016, "grad_norm": 0.0009282697574235499, "kl": 0.0006542597548104823, "learning_rate": 9.467120181405895e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 835 }, { "completion_length": 175.00001525878906, "epoch": 0.05307936507936508, "grad_norm": 0.0002511491475161165, "kl": 0.0004079650971107185, "learning_rate": 9.478458049886622e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 836 }, { "completion_length": 207.50001525878906, "epoch": 0.053142857142857144, "grad_norm": 0.0003401729045435786, "kl": 0.00047177853411994874, "learning_rate": 9.489795918367346e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 837 }, { "completion_length": 149.35714721679688, "epoch": 0.05320634920634921, "grad_norm": 0.0003435658582020551, "kl": 0.0004391949041746557, "learning_rate": 9.501133786848072e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 838 }, { "completion_length": 147.42857360839844, "epoch": 0.05326984126984127, "grad_norm": 0.0005911719636060297, "kl": 0.000589685165323317, "learning_rate": 9.512471655328798e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 839 }, { "completion_length": 157.57144165039062, "epoch": 0.05333333333333334, "grad_norm": 0.8033981919288635, "kl": 0.0004919514176435769, "learning_rate": 9.523809523809523e-08, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 840 }, { "completion_length": 150.57144165039062, "epoch": 0.053396825396825394, "grad_norm": 0.00044247930054552853, "kl": 0.000574673933442682, "learning_rate": 9.53514739229025e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 841 }, { "completion_length": 137.1428680419922, "epoch": 0.05346031746031746, "grad_norm": 0.0008212943212129176, "kl": 0.0006150456028990448, "learning_rate": 9.546485260770974e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 842 }, { "completion_length": 196.35714721679688, "epoch": 0.05352380952380952, "grad_norm": 0.0003890233638230711, "kl": 0.0005492194904945791, "learning_rate": 9.5578231292517e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 843 }, { "completion_length": 147.6428680419922, "epoch": 0.053587301587301586, "grad_norm": 0.0004669761401601136, "kl": 0.00045508015318773687, "learning_rate": 9.569160997732426e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 844 }, { "completion_length": 200.07144165039062, "epoch": 0.05365079365079365, "grad_norm": 0.0002532208163756877, "kl": 0.00041894629248417914, "learning_rate": 9.580498866213152e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 845 }, { "completion_length": 158.57144165039062, "epoch": 0.053714285714285714, "grad_norm": 0.0004440140910446644, "kl": 0.0005263880593702197, "learning_rate": 9.591836734693877e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 846 }, { "completion_length": 195.85714721679688, "epoch": 0.05377777777777778, "grad_norm": 0.0002923274878412485, "kl": 0.00047721530427224934, "learning_rate": 9.603174603174603e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 847 }, { "completion_length": 137.07144165039062, "epoch": 0.05384126984126984, "grad_norm": 0.00047914162860251963, "kl": 0.000567432667594403, "learning_rate": 9.614512471655328e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 848 }, { "completion_length": 153.7857208251953, "epoch": 0.05390476190476191, "grad_norm": 0.000548410345800221, "kl": 0.00047879948397167027, "learning_rate": 9.625850340136054e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 849 }, { "completion_length": 137.5, "epoch": 0.05396825396825397, "grad_norm": 0.0005389882135204971, "kl": 0.0005735663580708206, "learning_rate": 9.63718820861678e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 850 }, { "completion_length": 138.07144165039062, "epoch": 0.054031746031746035, "grad_norm": 0.00039480195846408606, "kl": 0.0005153567180968821, "learning_rate": 9.648526077097505e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 851 }, { "completion_length": 157.42857360839844, "epoch": 0.05409523809523809, "grad_norm": 0.00043208981514908373, "kl": 0.0005157968844287097, "learning_rate": 9.65986394557823e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 852 }, { "completion_length": 163.7857208251953, "epoch": 0.054158730158730156, "grad_norm": 0.00039732063305564225, "kl": 0.0004293033271096647, "learning_rate": 9.671201814058956e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 853 }, { "completion_length": 166.21429443359375, "epoch": 0.05422222222222222, "grad_norm": 0.0005903501296415925, "kl": 0.0006082942709326744, "learning_rate": 9.682539682539683e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 854 }, { "completion_length": 174.7857208251953, "epoch": 0.054285714285714284, "grad_norm": 0.0003169176052324474, "kl": 0.0004981447709724307, "learning_rate": 9.693877551020407e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 855 }, { "completion_length": 181.7857208251953, "epoch": 0.05434920634920635, "grad_norm": 0.00033495924435555935, "kl": 0.00045581755694001913, "learning_rate": 9.705215419501134e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 856 }, { "completion_length": 127.85714721679688, "epoch": 0.05441269841269841, "grad_norm": 0.0007031126879155636, "kl": 0.0005959555855952203, "learning_rate": 9.716553287981858e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 857 }, { "completion_length": 145.1428680419922, "epoch": 0.05447619047619048, "grad_norm": 0.0002820639929268509, "kl": 0.00038876550388522446, "learning_rate": 9.727891156462585e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 858 }, { "completion_length": 159.42857360839844, "epoch": 0.05453968253968254, "grad_norm": 0.00037285263533703983, "kl": 0.0005226959474384785, "learning_rate": 9.739229024943311e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 859 }, { "completion_length": 165.71429443359375, "epoch": 0.054603174603174605, "grad_norm": 0.0006453839596360922, "kl": 0.0005858525983057916, "learning_rate": 9.750566893424036e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 860 }, { "completion_length": 167.6428680419922, "epoch": 0.05466666666666667, "grad_norm": 0.0004813374252989888, "kl": 0.0005100056296214461, "learning_rate": 9.761904761904762e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 861 }, { "completion_length": 144.2857208251953, "epoch": 0.05473015873015873, "grad_norm": 0.0005716648884117603, "kl": 0.0005242686020210385, "learning_rate": 9.773242630385486e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 862 }, { "completion_length": 235.2857208251953, "epoch": 0.05479365079365079, "grad_norm": 0.0004580697859637439, "kl": 0.00047176858060993254, "learning_rate": 9.784580498866213e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 863 }, { "completion_length": 151.35714721679688, "epoch": 0.054857142857142854, "grad_norm": 0.0006853957311250269, "kl": 0.0006394624942913651, "learning_rate": 9.795918367346938e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 864 }, { "completion_length": 149.07144165039062, "epoch": 0.05492063492063492, "grad_norm": 0.00041123555274680257, "kl": 0.0005103612202219665, "learning_rate": 9.807256235827664e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 865 }, { "completion_length": 183.00001525878906, "epoch": 0.05498412698412698, "grad_norm": 0.0005142674199305475, "kl": 0.0005555584793910384, "learning_rate": 9.81859410430839e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 866 }, { "completion_length": 137.7857208251953, "epoch": 0.05504761904761905, "grad_norm": 0.0005750372656621039, "kl": 0.0006461308221332729, "learning_rate": 9.829931972789115e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 867 }, { "completion_length": 149.92857360839844, "epoch": 0.05511111111111111, "grad_norm": 0.0005926010198891163, "kl": 0.0006458762218244374, "learning_rate": 9.84126984126984e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 868 }, { "completion_length": 137.6428680419922, "epoch": 0.055174603174603175, "grad_norm": 0.00040394565439783037, "kl": 0.00045396541827358305, "learning_rate": 9.852607709750567e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 869 }, { "completion_length": 172.42857360839844, "epoch": 0.05523809523809524, "grad_norm": 0.00044312869431450963, "kl": 0.0003543568018358201, "learning_rate": 9.863945578231292e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 870 }, { "completion_length": 185.6428680419922, "epoch": 0.0553015873015873, "grad_norm": 0.0004172171466052532, "kl": 0.0004706460749730468, "learning_rate": 9.875283446712019e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 871 }, { "completion_length": 184.35714721679688, "epoch": 0.05536507936507937, "grad_norm": 0.0003957606677431613, "kl": 0.0004185287980362773, "learning_rate": 9.886621315192743e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 872 }, { "completion_length": 157.35714721679688, "epoch": 0.05542857142857143, "grad_norm": 0.0005994882085360587, "kl": 0.0006767712184228003, "learning_rate": 9.897959183673468e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 873 }, { "completion_length": 130.0, "epoch": 0.055492063492063495, "grad_norm": 0.0007726731128059328, "kl": 0.0006644005188718438, "learning_rate": 9.909297052154195e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 874 }, { "completion_length": 135.71429443359375, "epoch": 0.05555555555555555, "grad_norm": 0.0007682704017497599, "kl": 0.0006364217260852456, "learning_rate": 9.92063492063492e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 875 }, { "completion_length": 158.21429443359375, "epoch": 0.05561904761904762, "grad_norm": 0.00056100869551301, "kl": 0.0006491791573353112, "learning_rate": 9.931972789115646e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 876 }, { "completion_length": 180.21429443359375, "epoch": 0.05568253968253968, "grad_norm": 0.0004901498323306441, "kl": 0.0005196251440793276, "learning_rate": 9.94331065759637e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 877 }, { "completion_length": 170.42857360839844, "epoch": 0.055746031746031745, "grad_norm": 0.0005066578160040081, "kl": 0.0005015455535613, "learning_rate": 9.954648526077097e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 878 }, { "completion_length": 157.57144165039062, "epoch": 0.05580952380952381, "grad_norm": 0.0004047029942739755, "kl": 0.000523323891684413, "learning_rate": 9.965986394557823e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 879 }, { "completion_length": 161.42857360839844, "epoch": 0.05587301587301587, "grad_norm": 0.0002774769382085651, "kl": 0.0004085954569745809, "learning_rate": 9.977324263038548e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 880 }, { "completion_length": 199.92857360839844, "epoch": 0.05593650793650794, "grad_norm": 0.0004226342134643346, "kl": 0.0004740757867693901, "learning_rate": 9.988662131519274e-08, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 881 }, { "completion_length": 138.0, "epoch": 0.056, "grad_norm": 0.0005061914562247694, "kl": 0.0005250198300927877, "learning_rate": 1e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 882 }, { "completion_length": 138.07144165039062, "epoch": 0.056063492063492065, "grad_norm": 0.0005656653083860874, "kl": 0.000528413918800652, "learning_rate": 1.0011337868480725e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 883 }, { "completion_length": 156.71429443359375, "epoch": 0.05612698412698413, "grad_norm": 0.0021116205025464296, "kl": 0.0006508860969915986, "learning_rate": 1.0022675736961452e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 884 }, { "completion_length": 186.07144165039062, "epoch": 0.05619047619047619, "grad_norm": 0.00042193385888822377, "kl": 0.00048579982831142843, "learning_rate": 1.0034013605442176e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 885 }, { "completion_length": 169.71429443359375, "epoch": 0.05625396825396825, "grad_norm": 0.0005096945678815246, "kl": 0.000543316244147718, "learning_rate": 1.0045351473922902e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 886 }, { "completion_length": 153.1428680419922, "epoch": 0.056317460317460315, "grad_norm": 0.0003032259992323816, "kl": 0.0003854760143440217, "learning_rate": 1.0056689342403627e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 887 }, { "completion_length": 152.07144165039062, "epoch": 0.05638095238095238, "grad_norm": 0.0005043703713454306, "kl": 0.0005714877624996006, "learning_rate": 1.0068027210884353e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 888 }, { "completion_length": 144.0, "epoch": 0.05644444444444444, "grad_norm": 0.0004691289213951677, "kl": 0.0004825268988497555, "learning_rate": 1.007936507936508e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 889 }, { "completion_length": 152.1428680419922, "epoch": 0.05650793650793651, "grad_norm": 0.000547628675121814, "kl": 0.0005545837339013815, "learning_rate": 1.0090702947845804e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 890 }, { "completion_length": 137.2857208251953, "epoch": 0.05657142857142857, "grad_norm": 0.0008759173797443509, "kl": 0.0006624658708460629, "learning_rate": 1.0102040816326531e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 891 }, { "completion_length": 181.1428680419922, "epoch": 0.056634920634920635, "grad_norm": 0.0004690327332355082, "kl": 0.0004536906781140715, "learning_rate": 1.0113378684807255e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 892 }, { "completion_length": 139.42857360839844, "epoch": 0.0566984126984127, "grad_norm": 0.0006087054498493671, "kl": 0.0006294951890595257, "learning_rate": 1.0124716553287982e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 893 }, { "completion_length": 148.6428680419922, "epoch": 0.05676190476190476, "grad_norm": 0.0006914872792549431, "kl": 0.0006078969454392791, "learning_rate": 1.0136054421768707e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 894 }, { "completion_length": 193.85714721679688, "epoch": 0.05682539682539683, "grad_norm": 0.0004429106484167278, "kl": 0.0005117994151078165, "learning_rate": 1.0147392290249433e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 895 }, { "completion_length": 132.21429443359375, "epoch": 0.05688888888888889, "grad_norm": 0.0007726086769253016, "kl": 0.0007436248124577105, "learning_rate": 1.0158730158730159e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 896 }, { "completion_length": 151.85714721679688, "epoch": 0.056952380952380956, "grad_norm": 0.0006281511159613729, "kl": 0.0006044363835826516, "learning_rate": 1.0170068027210883e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 897 }, { "completion_length": 151.42857360839844, "epoch": 0.05701587301587301, "grad_norm": 0.0004404983774293214, "kl": 0.00046357326209545135, "learning_rate": 1.018140589569161e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 898 }, { "completion_length": 141.21429443359375, "epoch": 0.05707936507936508, "grad_norm": 0.0005234973505139351, "kl": 0.0005892703193239868, "learning_rate": 1.0192743764172335e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 899 }, { "completion_length": 173.07144165039062, "epoch": 0.05714285714285714, "grad_norm": 0.0002429264859529212, "kl": 0.00043429300421848893, "learning_rate": 1.0204081632653061e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 900 }, { "completion_length": 169.0, "epoch": 0.057206349206349205, "grad_norm": 0.00041674915701150894, "kl": 0.00047287941561080515, "learning_rate": 1.0215419501133786e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 901 }, { "completion_length": 167.92857360839844, "epoch": 0.05726984126984127, "grad_norm": 0.0003174407465849072, "kl": 0.00048750522546470165, "learning_rate": 1.0226757369614512e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 902 }, { "completion_length": 171.21429443359375, "epoch": 0.05733333333333333, "grad_norm": 0.00042186910286545753, "kl": 0.0005073443753644824, "learning_rate": 1.0238095238095237e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 903 }, { "completion_length": 165.71429443359375, "epoch": 0.0573968253968254, "grad_norm": 0.0005257924785837531, "kl": 0.000544652808457613, "learning_rate": 1.0249433106575964e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 904 }, { "completion_length": 132.92857360839844, "epoch": 0.05746031746031746, "grad_norm": 0.0009040236473083496, "kl": 0.000638911675196141, "learning_rate": 1.0260770975056688e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 905 }, { "completion_length": 129.07144165039062, "epoch": 0.057523809523809526, "grad_norm": 0.00048264767974615097, "kl": 0.00048438942758366466, "learning_rate": 1.0272108843537415e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 906 }, { "completion_length": 142.35714721679688, "epoch": 0.05758730158730159, "grad_norm": 0.0005811500595882535, "kl": 0.0006732292822562158, "learning_rate": 1.028344671201814e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 907 }, { "completion_length": 131.1428680419922, "epoch": 0.057650793650793654, "grad_norm": 0.000487741781398654, "kl": 0.0005243583582341671, "learning_rate": 1.0294784580498865e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 908 }, { "completion_length": 143.2857208251953, "epoch": 0.05771428571428571, "grad_norm": 0.000713896588422358, "kl": 0.0005819475627504289, "learning_rate": 1.0306122448979592e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 909 }, { "completion_length": 201.7857208251953, "epoch": 0.057777777777777775, "grad_norm": 0.00036868901224806905, "kl": 0.0003649206191767007, "learning_rate": 1.0317460317460316e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 910 }, { "completion_length": 147.1428680419922, "epoch": 0.05784126984126984, "grad_norm": 0.0006816937820985913, "kl": 0.000597202917560935, "learning_rate": 1.0328798185941043e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 911 }, { "completion_length": 188.00001525878906, "epoch": 0.0579047619047619, "grad_norm": 0.0003887007769662887, "kl": 0.000453411164926365, "learning_rate": 1.0340136054421767e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 912 }, { "completion_length": 192.85714721679688, "epoch": 0.05796825396825397, "grad_norm": 0.00042916942038573325, "kl": 0.0005317514296621084, "learning_rate": 1.0351473922902494e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 913 }, { "completion_length": 147.5, "epoch": 0.05803174603174603, "grad_norm": 0.0006376238889060915, "kl": 0.0005505834124051034, "learning_rate": 1.036281179138322e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 914 }, { "completion_length": 121.00000762939453, "epoch": 0.058095238095238096, "grad_norm": 0.0007767696515657008, "kl": 0.0006687831482850015, "learning_rate": 1.0374149659863945e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 915 }, { "completion_length": 218.1428680419922, "epoch": 0.05815873015873016, "grad_norm": 0.0005078971153125167, "kl": 0.0004925166722387075, "learning_rate": 1.0385487528344671e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 916 }, { "completion_length": 189.85714721679688, "epoch": 0.058222222222222224, "grad_norm": 0.0004290988144930452, "kl": 0.00048146897461265326, "learning_rate": 1.0396825396825396e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 917 }, { "completion_length": 173.85714721679688, "epoch": 0.05828571428571429, "grad_norm": 0.0004900228232145309, "kl": 0.0005197395803406835, "learning_rate": 1.0408163265306122e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 918 }, { "completion_length": 183.50001525878906, "epoch": 0.05834920634920635, "grad_norm": 0.0003932609979528934, "kl": 0.0004760443698614836, "learning_rate": 1.0419501133786849e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 919 }, { "completion_length": 137.35714721679688, "epoch": 0.058412698412698416, "grad_norm": 0.0003253074537497014, "kl": 0.0004894952289760113, "learning_rate": 1.0430839002267573e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 920 }, { "completion_length": 152.21429443359375, "epoch": 0.05847619047619047, "grad_norm": 0.000485714350361377, "kl": 0.0005199160659685731, "learning_rate": 1.0442176870748298e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 921 }, { "completion_length": 171.42857360839844, "epoch": 0.05853968253968254, "grad_norm": 0.0004095188342034817, "kl": 0.00044067593989893794, "learning_rate": 1.0453514739229024e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 922 }, { "completion_length": 150.07144165039062, "epoch": 0.0586031746031746, "grad_norm": 0.0003506515931803733, "kl": 0.0004357369034551084, "learning_rate": 1.046485260770975e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 923 }, { "completion_length": 166.71429443359375, "epoch": 0.058666666666666666, "grad_norm": 0.0003273247857578099, "kl": 0.00048513145884498954, "learning_rate": 1.0476190476190476e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 924 }, { "completion_length": 195.1428680419922, "epoch": 0.05873015873015873, "grad_norm": 0.00033335856278426945, "kl": 0.0004084449610672891, "learning_rate": 1.04875283446712e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 925 }, { "completion_length": 173.50001525878906, "epoch": 0.058793650793650794, "grad_norm": 0.0005357545451261103, "kl": 0.0005541420541703701, "learning_rate": 1.0498866213151927e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 926 }, { "completion_length": 150.7857208251953, "epoch": 0.05885714285714286, "grad_norm": 0.000511733116582036, "kl": 0.0007025510421954095, "learning_rate": 1.0510204081632652e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 927 }, { "completion_length": 180.07144165039062, "epoch": 0.05892063492063492, "grad_norm": 0.0005335099413059652, "kl": 0.00054998102132231, "learning_rate": 1.0521541950113379e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 928 }, { "completion_length": 178.2857208251953, "epoch": 0.058984126984126986, "grad_norm": 0.0007262363797053695, "kl": 0.0005774945020675659, "learning_rate": 1.0532879818594104e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 929 }, { "completion_length": 163.7857208251953, "epoch": 0.05904761904761905, "grad_norm": 0.00046304494026117027, "kl": 0.0004487884580157697, "learning_rate": 1.054421768707483e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 930 }, { "completion_length": 156.07144165039062, "epoch": 0.059111111111111114, "grad_norm": 0.0006632120348513126, "kl": 0.0006044239271432161, "learning_rate": 1.0555555555555555e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 931 }, { "completion_length": 140.21429443359375, "epoch": 0.05917460317460317, "grad_norm": 0.0005138430278748274, "kl": 0.0005070529878139496, "learning_rate": 1.056689342403628e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 932 }, { "completion_length": 144.6428680419922, "epoch": 0.059238095238095236, "grad_norm": 0.000642760714981705, "kl": 0.000588299531955272, "learning_rate": 1.0578231292517006e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 933 }, { "completion_length": 181.00001525878906, "epoch": 0.0593015873015873, "grad_norm": 0.0005106753087602556, "kl": 0.0004918179474771023, "learning_rate": 1.0589569160997732e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 934 }, { "completion_length": 178.57144165039062, "epoch": 0.059365079365079364, "grad_norm": 0.0004225834854878485, "kl": 0.0004068621201440692, "learning_rate": 1.0600907029478457e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 935 }, { "completion_length": 136.6428680419922, "epoch": 0.05942857142857143, "grad_norm": 1.6036392450332642, "kl": 0.0005773253506049514, "learning_rate": 1.0612244897959183e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 936 }, { "completion_length": 158.85714721679688, "epoch": 0.05949206349206349, "grad_norm": 0.000378119497327134, "kl": 0.0005181708838790655, "learning_rate": 1.0623582766439908e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 937 }, { "completion_length": 215.2857208251953, "epoch": 0.059555555555555556, "grad_norm": 0.00037226363201625645, "kl": 0.0004754953843075782, "learning_rate": 1.0634920634920634e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 938 }, { "completion_length": 212.07144165039062, "epoch": 0.05961904761904762, "grad_norm": 0.00036815294879488647, "kl": 0.0004938670317642391, "learning_rate": 1.0646258503401361e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 939 }, { "completion_length": 146.1428680419922, "epoch": 0.059682539682539684, "grad_norm": 0.0005429721204563975, "kl": 0.0005914689390920103, "learning_rate": 1.0657596371882085e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 940 }, { "completion_length": 182.21429443359375, "epoch": 0.05974603174603175, "grad_norm": 0.0005394906620495021, "kl": 0.0004964814288541675, "learning_rate": 1.0668934240362812e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 941 }, { "completion_length": 175.21429443359375, "epoch": 0.05980952380952381, "grad_norm": 0.00036202845512889326, "kl": 0.0005425862618722022, "learning_rate": 1.0680272108843536e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 942 }, { "completion_length": 178.50001525878906, "epoch": 0.05987301587301587, "grad_norm": 0.0004925117827951908, "kl": 0.0005117361433804035, "learning_rate": 1.0691609977324263e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 943 }, { "completion_length": 171.50001525878906, "epoch": 0.059936507936507934, "grad_norm": 0.0004660292179323733, "kl": 0.0005026755388826132, "learning_rate": 1.0702947845804989e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 944 }, { "completion_length": 198.50001525878906, "epoch": 0.06, "grad_norm": 0.0003779721155297011, "kl": 0.0005060558905825019, "learning_rate": 1.0714285714285713e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 945 }, { "completion_length": 166.71429443359375, "epoch": 0.06006349206349206, "grad_norm": 0.0006783477147109807, "kl": 0.0006103545892983675, "learning_rate": 1.072562358276644e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 946 }, { "completion_length": 178.7857208251953, "epoch": 0.060126984126984126, "grad_norm": 0.000399876938899979, "kl": 0.0005584832979366183, "learning_rate": 1.0736961451247164e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 947 }, { "completion_length": 158.07144165039062, "epoch": 0.06019047619047619, "grad_norm": 0.00048165908083319664, "kl": 0.0004754774854518473, "learning_rate": 1.0748299319727891e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 948 }, { "completion_length": 173.1428680419922, "epoch": 0.060253968253968254, "grad_norm": 0.00031033827690407634, "kl": 0.00044966978020966053, "learning_rate": 1.0759637188208616e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 949 }, { "completion_length": 141.7857208251953, "epoch": 0.06031746031746032, "grad_norm": 0.00034616910852491856, "kl": 0.00045624596532434225, "learning_rate": 1.0770975056689342e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 950 }, { "completion_length": 162.0, "epoch": 0.06038095238095238, "grad_norm": 0.00045027691521681845, "kl": 0.0006435999530367553, "learning_rate": 1.0782312925170067e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 951 }, { "completion_length": 145.35714721679688, "epoch": 0.060444444444444446, "grad_norm": 0.0005502174608409405, "kl": 0.0005819269572384655, "learning_rate": 1.0793650793650794e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 952 }, { "completion_length": 144.2857208251953, "epoch": 0.06050793650793651, "grad_norm": 0.0007396648870781064, "kl": 0.0007167106959968805, "learning_rate": 1.0804988662131519e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 953 }, { "completion_length": 184.21429443359375, "epoch": 0.060571428571428575, "grad_norm": 0.00045330825378187, "kl": 0.0006039399886503816, "learning_rate": 1.0816326530612245e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 954 }, { "completion_length": 154.71429443359375, "epoch": 0.06063492063492063, "grad_norm": 0.0007090200087986887, "kl": 0.0006267146673053503, "learning_rate": 1.082766439909297e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 955 }, { "completion_length": 190.1428680419922, "epoch": 0.060698412698412696, "grad_norm": 0.0004114945186302066, "kl": 0.0004037345352116972, "learning_rate": 1.0839002267573695e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 956 }, { "completion_length": 176.71429443359375, "epoch": 0.06076190476190476, "grad_norm": 0.0006675521144643426, "kl": 0.0006626852555200458, "learning_rate": 1.0850340136054422e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 957 }, { "completion_length": 162.1428680419922, "epoch": 0.060825396825396824, "grad_norm": 0.000485168828163296, "kl": 0.0004485530371312052, "learning_rate": 1.0861678004535146e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 958 }, { "completion_length": 170.92857360839844, "epoch": 0.06088888888888889, "grad_norm": 0.0005691568367183208, "kl": 0.0006337263039313257, "learning_rate": 1.0873015873015873e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 959 }, { "completion_length": 148.57144165039062, "epoch": 0.06095238095238095, "grad_norm": 0.0005698922323063016, "kl": 0.000628036679700017, "learning_rate": 1.0884353741496597e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 960 }, { "completion_length": 195.71429443359375, "epoch": 0.061015873015873016, "grad_norm": 0.000557157036382705, "kl": 0.0004994332557544112, "learning_rate": 1.0895691609977324e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 961 }, { "completion_length": 150.42857360839844, "epoch": 0.06107936507936508, "grad_norm": 0.0005822438397444785, "kl": 0.0006998457247391343, "learning_rate": 1.0907029478458048e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 962 }, { "completion_length": 183.00001525878906, "epoch": 0.061142857142857145, "grad_norm": 0.0004421861667651683, "kl": 0.0005182396271266043, "learning_rate": 1.0918367346938775e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 963 }, { "completion_length": 166.71429443359375, "epoch": 0.06120634920634921, "grad_norm": 0.000638869940303266, "kl": 0.0005312838475219905, "learning_rate": 1.0929705215419501e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 964 }, { "completion_length": 135.0, "epoch": 0.06126984126984127, "grad_norm": 0.0008461158722639084, "kl": 0.0006940577877685428, "learning_rate": 1.0941043083900226e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 965 }, { "completion_length": 172.6428680419922, "epoch": 0.06133333333333333, "grad_norm": 0.0005957032553851604, "kl": 0.0007194449426606297, "learning_rate": 1.0952380952380952e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 966 }, { "completion_length": 186.1428680419922, "epoch": 0.061396825396825394, "grad_norm": 0.0005146036855876446, "kl": 0.0006554003339260817, "learning_rate": 1.0963718820861676e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 967 }, { "completion_length": 143.7857208251953, "epoch": 0.06146031746031746, "grad_norm": 0.0005031956825405359, "kl": 0.0005981808644719422, "learning_rate": 1.0975056689342403e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 968 }, { "completion_length": 141.1428680419922, "epoch": 0.06152380952380952, "grad_norm": 0.0006242170929908752, "kl": 0.0006103810155764222, "learning_rate": 1.0986394557823129e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 969 }, { "completion_length": 118.50000762939453, "epoch": 0.061587301587301586, "grad_norm": 0.0007936524925753474, "kl": 0.0005892685730941594, "learning_rate": 1.0997732426303854e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 970 }, { "completion_length": 157.5, "epoch": 0.06165079365079365, "grad_norm": 0.0005213521071709692, "kl": 0.000505615898873657, "learning_rate": 1.100907029478458e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 971 }, { "completion_length": 190.07144165039062, "epoch": 0.061714285714285715, "grad_norm": 0.0005422636168077588, "kl": 0.0005541769787669182, "learning_rate": 1.1020408163265307e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 972 }, { "completion_length": 178.2857208251953, "epoch": 0.06177777777777778, "grad_norm": 0.00034078213502652943, "kl": 0.0005555347306653857, "learning_rate": 1.1031746031746031e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 973 }, { "completion_length": 195.50001525878906, "epoch": 0.06184126984126984, "grad_norm": 0.0004880517954006791, "kl": 0.00043701022514142096, "learning_rate": 1.1043083900226758e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 974 }, { "completion_length": 156.6428680419922, "epoch": 0.06190476190476191, "grad_norm": 0.0005942424177192152, "kl": 0.0007208384340628982, "learning_rate": 1.1054421768707482e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 975 }, { "completion_length": 153.57144165039062, "epoch": 0.06196825396825397, "grad_norm": 0.0007250334601849318, "kl": 0.0006130477413535118, "learning_rate": 1.1065759637188209e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 976 }, { "completion_length": 168.85714721679688, "epoch": 0.062031746031746035, "grad_norm": 0.00044326091301627457, "kl": 0.0006760567775927484, "learning_rate": 1.1077097505668934e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 977 }, { "completion_length": 141.5, "epoch": 0.06209523809523809, "grad_norm": 0.0004322118766140193, "kl": 0.0005435377242974937, "learning_rate": 1.108843537414966e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 978 }, { "completion_length": 178.35714721679688, "epoch": 0.062158730158730156, "grad_norm": 0.00042302097426727414, "kl": 0.0005316619644872844, "learning_rate": 1.1099773242630385e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 979 }, { "completion_length": 178.6428680419922, "epoch": 0.06222222222222222, "grad_norm": 0.0006673847092315555, "kl": 0.0005761641077697277, "learning_rate": 1.111111111111111e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 980 }, { "completion_length": 138.57144165039062, "epoch": 0.062285714285714285, "grad_norm": 0.0005221824976615608, "kl": 0.0005557353724725544, "learning_rate": 1.1122448979591836e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 981 }, { "completion_length": 168.21429443359375, "epoch": 0.06234920634920635, "grad_norm": 0.0005745950038544834, "kl": 0.0005947809549979866, "learning_rate": 1.1133786848072562e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 982 }, { "completion_length": 150.0, "epoch": 0.06241269841269841, "grad_norm": 0.0006764248828403652, "kl": 0.0006324356654658914, "learning_rate": 1.1145124716553288e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 983 }, { "completion_length": 169.92857360839844, "epoch": 0.06247619047619048, "grad_norm": 0.000709943356923759, "kl": 0.000557995866984129, "learning_rate": 1.1156462585034013e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 984 }, { "completion_length": 176.92857360839844, "epoch": 0.06253968253968253, "grad_norm": 0.0004996609059162438, "kl": 0.0006766688893549144, "learning_rate": 1.1167800453514739e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 985 }, { "completion_length": 175.71429443359375, "epoch": 0.0626031746031746, "grad_norm": 0.0005546045722439885, "kl": 0.0005971530335955322, "learning_rate": 1.1179138321995464e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 986 }, { "completion_length": 149.42857360839844, "epoch": 0.06266666666666666, "grad_norm": 1.0428568124771118, "kl": 0.0005524402367882431, "learning_rate": 1.1190476190476191e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 987 }, { "completion_length": 185.42857360839844, "epoch": 0.06273015873015873, "grad_norm": 0.00034474744461476803, "kl": 0.00047928205458447337, "learning_rate": 1.1201814058956915e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 988 }, { "completion_length": 193.1428680419922, "epoch": 0.06279365079365079, "grad_norm": 0.00036349293077364564, "kl": 0.0005466099828481674, "learning_rate": 1.1213151927437642e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 989 }, { "completion_length": 200.7857208251953, "epoch": 0.06285714285714286, "grad_norm": 0.0004680304555222392, "kl": 0.0005373318563215435, "learning_rate": 1.1224489795918366e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 990 }, { "completion_length": 144.71429443359375, "epoch": 0.06292063492063492, "grad_norm": 0.0007718125125393271, "kl": 0.0007123101386241615, "learning_rate": 1.1235827664399092e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 991 }, { "completion_length": 177.71429443359375, "epoch": 0.06298412698412699, "grad_norm": 0.0004332766111474484, "kl": 0.0005602681776508689, "learning_rate": 1.1247165532879819e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 992 }, { "completion_length": 139.92857360839844, "epoch": 0.06304761904761905, "grad_norm": 0.0008725118823349476, "kl": 0.0007149603334255517, "learning_rate": 1.1258503401360543e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 993 }, { "completion_length": 187.7857208251953, "epoch": 0.06311111111111112, "grad_norm": 0.0004908363916911185, "kl": 0.0005418679211288691, "learning_rate": 1.126984126984127e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 994 }, { "completion_length": 135.42857360839844, "epoch": 0.06317460317460317, "grad_norm": 1.5672316551208496, "kl": 0.0006646491819992661, "learning_rate": 1.1281179138321994e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 995 }, { "completion_length": 167.85714721679688, "epoch": 0.06323809523809523, "grad_norm": 0.0005651825340464711, "kl": 0.0005139645072631538, "learning_rate": 1.1292517006802721e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 996 }, { "completion_length": 177.07144165039062, "epoch": 0.0633015873015873, "grad_norm": 0.0005109339253976941, "kl": 0.0006621027132496238, "learning_rate": 1.1303854875283446e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 997 }, { "completion_length": 183.21429443359375, "epoch": 0.06336507936507936, "grad_norm": 0.00037993196747265756, "kl": 0.0005664334748871624, "learning_rate": 1.1315192743764172e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 998 }, { "completion_length": 180.07144165039062, "epoch": 0.06342857142857143, "grad_norm": 0.0003781499108299613, "kl": 0.0004890614654868841, "learning_rate": 1.1326530612244898e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 999 }, { "completion_length": 148.6428680419922, "epoch": 0.06349206349206349, "grad_norm": 0.00041664912714622915, "kl": 0.0006186468526721001, "learning_rate": 1.1337868480725623e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1000 }, { "completion_length": 160.35714721679688, "epoch": 0.06355555555555556, "grad_norm": 0.0004694440867751837, "kl": 0.000544480630196631, "learning_rate": 1.1349206349206349e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1001 }, { "completion_length": 169.07144165039062, "epoch": 0.06361904761904762, "grad_norm": 0.0004881338682025671, "kl": 0.0006046794587746263, "learning_rate": 1.1360544217687076e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1002 }, { "completion_length": 201.21429443359375, "epoch": 0.06368253968253969, "grad_norm": 0.0004188242892269045, "kl": 0.0005496832891367376, "learning_rate": 1.13718820861678e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1003 }, { "completion_length": 129.5, "epoch": 0.06374603174603174, "grad_norm": 0.0005071581108495593, "kl": 0.0006903937319293618, "learning_rate": 1.1383219954648525e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1004 }, { "completion_length": 159.7857208251953, "epoch": 0.06380952380952382, "grad_norm": 0.00037512744893319905, "kl": 0.0005577864358201623, "learning_rate": 1.1394557823129251e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1005 }, { "completion_length": 212.2857208251953, "epoch": 0.06387301587301587, "grad_norm": 0.0003306569124106318, "kl": 0.0005386561970226467, "learning_rate": 1.1405895691609976e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1006 }, { "completion_length": 192.85714721679688, "epoch": 0.06393650793650793, "grad_norm": 0.00043244514381513, "kl": 0.0006092509720474482, "learning_rate": 1.1417233560090703e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1007 }, { "completion_length": 183.50001525878906, "epoch": 0.064, "grad_norm": 0.0004287120536901057, "kl": 0.000531333324033767, "learning_rate": 1.1428571428571427e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1008 }, { "completion_length": 108.42857360839844, "epoch": 0.06406349206349206, "grad_norm": 0.0005143065936863422, "kl": 0.0006937649450264871, "learning_rate": 1.1439909297052154e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1009 }, { "completion_length": 173.00001525878906, "epoch": 0.06412698412698413, "grad_norm": 1.1638590097427368, "kl": 0.0006568484241142869, "learning_rate": 1.1451247165532879e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 1010 }, { "completion_length": 232.35714721679688, "epoch": 0.06419047619047619, "grad_norm": 0.00028660587850026786, "kl": 0.00044197007082402706, "learning_rate": 1.1462585034013605e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1011 }, { "completion_length": 152.6428680419922, "epoch": 0.06425396825396826, "grad_norm": 0.0004902044893242419, "kl": 0.0008084889850579202, "learning_rate": 1.1473922902494331e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1012 }, { "completion_length": 163.42857360839844, "epoch": 0.06431746031746031, "grad_norm": 0.0004158020601607859, "kl": 0.0005753645673394203, "learning_rate": 1.1485260770975057e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1013 }, { "completion_length": 172.50001525878906, "epoch": 0.06438095238095239, "grad_norm": 0.00036403958802111447, "kl": 0.0005192036624066532, "learning_rate": 1.1496598639455782e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1014 }, { "completion_length": 192.50001525878906, "epoch": 0.06444444444444444, "grad_norm": 0.0003831975918728858, "kl": 0.0005476083606481552, "learning_rate": 1.1507936507936506e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1015 }, { "completion_length": 149.0, "epoch": 0.06450793650793651, "grad_norm": 0.0004640989936888218, "kl": 0.0005326913087628782, "learning_rate": 1.1519274376417233e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1016 }, { "completion_length": 145.85714721679688, "epoch": 0.06457142857142857, "grad_norm": 0.0005014509661123157, "kl": 0.0006282722461037338, "learning_rate": 1.1530612244897959e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1017 }, { "completion_length": 184.6428680419922, "epoch": 0.06463492063492063, "grad_norm": 0.0004949114518240094, "kl": 0.0006161594646982849, "learning_rate": 1.1541950113378684e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1018 }, { "completion_length": 163.1428680419922, "epoch": 0.0646984126984127, "grad_norm": 0.0005550999194383621, "kl": 0.0007047821418382227, "learning_rate": 1.155328798185941e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1019 }, { "completion_length": 192.71429443359375, "epoch": 0.06476190476190476, "grad_norm": 0.0005154608516022563, "kl": 0.0006612909492105246, "learning_rate": 1.1564625850340135e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1020 }, { "completion_length": 171.50001525878906, "epoch": 0.06482539682539683, "grad_norm": 0.0004561685200314969, "kl": 0.0005718693137168884, "learning_rate": 1.1575963718820861e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1021 }, { "completion_length": 186.71429443359375, "epoch": 0.06488888888888888, "grad_norm": 0.00036151279346086085, "kl": 0.0005154352984391153, "learning_rate": 1.1587301587301588e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1022 }, { "completion_length": 164.57144165039062, "epoch": 0.06495238095238096, "grad_norm": 0.0004070583381690085, "kl": 0.000602195446845144, "learning_rate": 1.1598639455782312e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1023 }, { "completion_length": 177.57144165039062, "epoch": 0.06501587301587301, "grad_norm": 0.0003262550453655422, "kl": 0.0005075876251794398, "learning_rate": 1.1609977324263039e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1024 }, { "completion_length": 176.92857360839844, "epoch": 0.06507936507936508, "grad_norm": 0.00047309865476563573, "kl": 0.0005976149113848805, "learning_rate": 1.1621315192743763e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1025 }, { "completion_length": 151.21429443359375, "epoch": 0.06514285714285714, "grad_norm": 0.0005590456421487033, "kl": 0.0006589965196326375, "learning_rate": 1.1632653061224489e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1026 }, { "completion_length": 183.85714721679688, "epoch": 0.06520634920634921, "grad_norm": 0.0003858211275655776, "kl": 0.0006039685686118901, "learning_rate": 1.1643990929705215e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1027 }, { "completion_length": 159.85714721679688, "epoch": 0.06526984126984127, "grad_norm": 0.0005892164772376418, "kl": 0.000645733904093504, "learning_rate": 1.165532879818594e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1028 }, { "completion_length": 189.07144165039062, "epoch": 0.06533333333333333, "grad_norm": 0.00037075241561979055, "kl": 0.0005293955327942967, "learning_rate": 1.1666666666666667e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1029 }, { "completion_length": 160.7857208251953, "epoch": 0.0653968253968254, "grad_norm": 0.00039830419700592756, "kl": 0.0005142310983501375, "learning_rate": 1.1678004535147391e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1030 }, { "completion_length": 159.92857360839844, "epoch": 0.06546031746031745, "grad_norm": 0.0005322482902556658, "kl": 0.0007337613496929407, "learning_rate": 1.1689342403628118e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1031 }, { "completion_length": 154.7857208251953, "epoch": 0.06552380952380953, "grad_norm": 0.0006244629039429128, "kl": 0.0006715419585816562, "learning_rate": 1.1700680272108843e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1032 }, { "completion_length": 136.5, "epoch": 0.06558730158730158, "grad_norm": 0.00055175949819386, "kl": 0.0007707361946813762, "learning_rate": 1.1712018140589569e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1033 }, { "completion_length": 210.85714721679688, "epoch": 0.06565079365079365, "grad_norm": 0.0003437047125771642, "kl": 0.000485507829580456, "learning_rate": 1.1723356009070294e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1034 }, { "completion_length": 177.92857360839844, "epoch": 0.06571428571428571, "grad_norm": 0.0004687944601755589, "kl": 0.0005969175253994763, "learning_rate": 1.173469387755102e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1035 }, { "completion_length": 167.1428680419922, "epoch": 0.06577777777777778, "grad_norm": 0.0004833219281863421, "kl": 0.0005807502893730998, "learning_rate": 1.1746031746031745e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1036 }, { "completion_length": 159.7857208251953, "epoch": 0.06584126984126984, "grad_norm": 0.00040843719034455717, "kl": 0.0005213406984694302, "learning_rate": 1.1757369614512472e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1037 }, { "completion_length": 183.07144165039062, "epoch": 0.06590476190476191, "grad_norm": 0.0003810359921772033, "kl": 0.0005546229658648372, "learning_rate": 1.1768707482993196e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1038 }, { "completion_length": 150.35714721679688, "epoch": 0.06596825396825397, "grad_norm": 0.000655009935144335, "kl": 0.0008309352560900152, "learning_rate": 1.1780045351473922e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1039 }, { "completion_length": 144.0, "epoch": 0.06603174603174604, "grad_norm": 0.00042869499884545803, "kl": 0.0004748174687847495, "learning_rate": 1.1791383219954648e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1040 }, { "completion_length": 202.35714721679688, "epoch": 0.0660952380952381, "grad_norm": 0.0005995217361487448, "kl": 0.000653797818813473, "learning_rate": 1.1802721088435373e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1041 }, { "completion_length": 143.92857360839844, "epoch": 0.06615873015873015, "grad_norm": 0.0005868782754987478, "kl": 0.0007368244114331901, "learning_rate": 1.18140589569161e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1042 }, { "completion_length": 169.92857360839844, "epoch": 0.06622222222222222, "grad_norm": 0.0004748007340822369, "kl": 0.0006226687110029161, "learning_rate": 1.1825396825396824e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1043 }, { "completion_length": 176.57144165039062, "epoch": 0.06628571428571428, "grad_norm": 0.0003970837569795549, "kl": 0.0006126653752289712, "learning_rate": 1.1836734693877551e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1044 }, { "completion_length": 161.57144165039062, "epoch": 0.06634920634920635, "grad_norm": 0.0003641137736849487, "kl": 0.0005181393935345113, "learning_rate": 1.1848072562358275e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1045 }, { "completion_length": 178.42857360839844, "epoch": 0.06641269841269841, "grad_norm": 0.00043346176971681416, "kl": 0.0005848497385159135, "learning_rate": 1.1859410430839002e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1046 }, { "completion_length": 166.21429443359375, "epoch": 0.06647619047619048, "grad_norm": 0.0006132919806987047, "kl": 0.0007367628277279437, "learning_rate": 1.1870748299319728e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1047 }, { "completion_length": 181.50001525878906, "epoch": 0.06653968253968254, "grad_norm": 0.00043729596654884517, "kl": 0.000561077962629497, "learning_rate": 1.1882086167800453e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1048 }, { "completion_length": 154.85714721679688, "epoch": 0.06660317460317461, "grad_norm": 0.0006268395809456706, "kl": 0.0007344638579525054, "learning_rate": 1.1893424036281179e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1049 }, { "completion_length": 161.57144165039062, "epoch": 0.06666666666666667, "grad_norm": 0.0004532294988166541, "kl": 0.0006370799383148551, "learning_rate": 1.1904761904761903e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1050 }, { "completion_length": 162.71429443359375, "epoch": 0.06673015873015874, "grad_norm": 0.0005283884820528328, "kl": 0.0006470332155004144, "learning_rate": 1.191609977324263e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1051 }, { "completion_length": 158.6428680419922, "epoch": 0.0667936507936508, "grad_norm": 1.395203709602356, "kl": 0.000513516366481781, "learning_rate": 1.1927437641723355e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 1052 }, { "completion_length": 121.64286041259766, "epoch": 0.06685714285714285, "grad_norm": 0.0006344334105961025, "kl": 0.0006876537227071822, "learning_rate": 1.193877551020408e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1053 }, { "completion_length": 178.50001525878906, "epoch": 0.06692063492063492, "grad_norm": 0.0003988519893027842, "kl": 0.0005643150652758777, "learning_rate": 1.1950113378684807e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1054 }, { "completion_length": 143.42857360839844, "epoch": 0.06698412698412698, "grad_norm": 0.0005075882072560489, "kl": 0.0006427119369618595, "learning_rate": 1.1961451247165532e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1055 }, { "completion_length": 162.57144165039062, "epoch": 0.06704761904761905, "grad_norm": 0.000543532776646316, "kl": 0.0007707742624916136, "learning_rate": 1.1972789115646258e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1056 }, { "completion_length": 188.1428680419922, "epoch": 0.06711111111111111, "grad_norm": 0.0004297252744436264, "kl": 0.000556948478333652, "learning_rate": 1.1984126984126983e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1057 }, { "completion_length": 188.7857208251953, "epoch": 0.06717460317460318, "grad_norm": 0.0004377847071737051, "kl": 0.0006419587880373001, "learning_rate": 1.199546485260771e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1058 }, { "completion_length": 168.5, "epoch": 0.06723809523809524, "grad_norm": 0.0004586779105011374, "kl": 0.0005945852026343346, "learning_rate": 1.2006802721088434e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1059 }, { "completion_length": 168.71429443359375, "epoch": 0.0673015873015873, "grad_norm": 0.0004814855637960136, "kl": 0.0006197316106408834, "learning_rate": 1.201814058956916e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1060 }, { "completion_length": 160.07144165039062, "epoch": 0.06736507936507936, "grad_norm": 0.0005325819365680218, "kl": 0.0007116608903743327, "learning_rate": 1.2029478458049888e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1061 }, { "completion_length": 142.07144165039062, "epoch": 0.06742857142857143, "grad_norm": 0.000459541188320145, "kl": 0.0005802114028483629, "learning_rate": 1.204081632653061e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1062 }, { "completion_length": 159.57144165039062, "epoch": 0.06749206349206349, "grad_norm": 0.0004982985556125641, "kl": 0.0006146401865407825, "learning_rate": 1.2052154195011336e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1063 }, { "completion_length": 177.07144165039062, "epoch": 0.06755555555555555, "grad_norm": 0.0005382754025049508, "kl": 0.0005975909880362451, "learning_rate": 1.2063492063492062e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1064 }, { "completion_length": 204.50001525878906, "epoch": 0.06761904761904762, "grad_norm": 0.000602126878220588, "kl": 0.0007011176203377545, "learning_rate": 1.2074829931972788e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1065 }, { "completion_length": 170.42857360839844, "epoch": 0.06768253968253968, "grad_norm": 0.0005364693352021277, "kl": 0.0006055569392628968, "learning_rate": 1.2086167800453516e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1066 }, { "completion_length": 170.42857360839844, "epoch": 0.06774603174603175, "grad_norm": 0.00048234956921078265, "kl": 0.0005688988603651524, "learning_rate": 1.2097505668934239e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1067 }, { "completion_length": 205.92857360839844, "epoch": 0.0678095238095238, "grad_norm": 0.0005119490087963641, "kl": 0.0006522012408822775, "learning_rate": 1.2108843537414967e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1068 }, { "completion_length": 185.71429443359375, "epoch": 0.06787301587301588, "grad_norm": 0.0006080676685087383, "kl": 0.0005718564498238266, "learning_rate": 1.212018140589569e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1069 }, { "completion_length": 188.2857208251953, "epoch": 0.06793650793650793, "grad_norm": 0.0006165633094497025, "kl": 0.0006920230807736516, "learning_rate": 1.2131519274376418e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1070 }, { "completion_length": 194.07144165039062, "epoch": 0.068, "grad_norm": 0.0003883465542457998, "kl": 0.0005493260687217116, "learning_rate": 1.2142857142857143e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1071 }, { "completion_length": 169.1428680419922, "epoch": 0.06806349206349206, "grad_norm": 0.0003912339161615819, "kl": 0.0005949970800429583, "learning_rate": 1.215419501133787e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1072 }, { "completion_length": 172.1428680419922, "epoch": 0.06812698412698413, "grad_norm": 0.00044305730261839926, "kl": 0.0005540936253964901, "learning_rate": 1.2165532879818595e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1073 }, { "completion_length": 192.71429443359375, "epoch": 0.06819047619047619, "grad_norm": 0.00048296936438418925, "kl": 0.0005061161355115473, "learning_rate": 1.2176870748299317e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1074 }, { "completion_length": 148.42857360839844, "epoch": 0.06825396825396825, "grad_norm": 0.0005397534114308655, "kl": 0.00066166598116979, "learning_rate": 1.2188208616780046e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1075 }, { "completion_length": 172.85714721679688, "epoch": 0.06831746031746032, "grad_norm": 0.00047865905798971653, "kl": 0.0005607269704341888, "learning_rate": 1.219954648526077e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1076 }, { "completion_length": 127.35714721679688, "epoch": 0.06838095238095238, "grad_norm": 0.0006048062350600958, "kl": 0.0006699466030113399, "learning_rate": 1.2210884353741497e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1077 }, { "completion_length": 140.57144165039062, "epoch": 0.06844444444444445, "grad_norm": 0.0005699463654309511, "kl": 0.0005579671123996377, "learning_rate": 1.2222222222222222e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1078 }, { "completion_length": 150.07144165039062, "epoch": 0.0685079365079365, "grad_norm": 0.0005248411325737834, "kl": 0.0006411419017240405, "learning_rate": 1.2233560090702948e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1079 }, { "completion_length": 154.92857360839844, "epoch": 0.06857142857142857, "grad_norm": 0.0005175645346753299, "kl": 0.0005710204131901264, "learning_rate": 1.2244897959183673e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1080 }, { "completion_length": 174.50001525878906, "epoch": 0.06863492063492063, "grad_norm": 0.0004285030299797654, "kl": 0.000565843190997839, "learning_rate": 1.22562358276644e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1081 }, { "completion_length": 171.07144165039062, "epoch": 0.0686984126984127, "grad_norm": 0.000649358902592212, "kl": 0.0006979686440899968, "learning_rate": 1.2267573696145124e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1082 }, { "completion_length": 132.71429443359375, "epoch": 0.06876190476190476, "grad_norm": 0.0011285168584436178, "kl": 0.000735479115974158, "learning_rate": 1.227891156462585e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1083 }, { "completion_length": 173.00001525878906, "epoch": 0.06882539682539683, "grad_norm": 1.0616233348846436, "kl": 0.0006086166249588132, "learning_rate": 1.2290249433106576e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 1084 }, { "completion_length": 217.92857360839844, "epoch": 0.06888888888888889, "grad_norm": 0.00045628423686139286, "kl": 0.0006542789051309228, "learning_rate": 1.23015873015873e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1085 }, { "completion_length": 142.57144165039062, "epoch": 0.06895238095238095, "grad_norm": 0.000589792849496007, "kl": 0.0006922072498127818, "learning_rate": 1.2312925170068027e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1086 }, { "completion_length": 159.6428680419922, "epoch": 0.06901587301587302, "grad_norm": 0.0007204750436358154, "kl": 0.0007223397842608392, "learning_rate": 1.2324263038548752e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1087 }, { "completion_length": 174.50001525878906, "epoch": 0.06907936507936507, "grad_norm": 0.0008206955972127616, "kl": 0.0007010532426647842, "learning_rate": 1.2335600907029478e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1088 }, { "completion_length": 150.35714721679688, "epoch": 0.06914285714285714, "grad_norm": 0.0005787154077552259, "kl": 0.0006375533412210643, "learning_rate": 1.2346938775510203e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1089 }, { "completion_length": 166.21429443359375, "epoch": 0.0692063492063492, "grad_norm": 0.0007041607750579715, "kl": 0.000700432516168803, "learning_rate": 1.235827664399093e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1090 }, { "completion_length": 151.42857360839844, "epoch": 0.06926984126984127, "grad_norm": 0.0007833861745893955, "kl": 0.0008394669275730848, "learning_rate": 1.2369614512471654e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1091 }, { "completion_length": 147.42857360839844, "epoch": 0.06933333333333333, "grad_norm": 0.0007410762482322752, "kl": 0.0007333554676733911, "learning_rate": 1.238095238095238e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1092 }, { "completion_length": 164.21429443359375, "epoch": 0.0693968253968254, "grad_norm": 0.0005606567719951272, "kl": 0.0007173902704380453, "learning_rate": 1.2392290249433105e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1093 }, { "completion_length": 177.1428680419922, "epoch": 0.06946031746031746, "grad_norm": 0.0005756729515269399, "kl": 0.0007067265687510371, "learning_rate": 1.240362811791383e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1094 }, { "completion_length": 159.2857208251953, "epoch": 0.06952380952380953, "grad_norm": 0.0007086044643074274, "kl": 0.0006424863822758198, "learning_rate": 1.2414965986394557e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1095 }, { "completion_length": 167.1428680419922, "epoch": 0.06958730158730159, "grad_norm": 0.0004921293584629893, "kl": 0.0006480924785137177, "learning_rate": 1.2426303854875285e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1096 }, { "completion_length": 160.7857208251953, "epoch": 0.06965079365079366, "grad_norm": 0.0008556656539440155, "kl": 0.0007180907996371388, "learning_rate": 1.2437641723356008e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1097 }, { "completion_length": 160.5, "epoch": 0.06971428571428571, "grad_norm": 0.0007146490388549864, "kl": 0.0007019740296527743, "learning_rate": 1.2448979591836733e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1098 }, { "completion_length": 164.42857360839844, "epoch": 0.06977777777777777, "grad_norm": 0.0006189966225065291, "kl": 0.0006438237614929676, "learning_rate": 1.246031746031746e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1099 }, { "completion_length": 158.0, "epoch": 0.06984126984126984, "grad_norm": 0.0007820268510840833, "kl": 0.0007522715604864061, "learning_rate": 1.2471655328798184e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1100 }, { "completion_length": 150.2857208251953, "epoch": 0.0699047619047619, "grad_norm": 0.0008765804232098162, "kl": 0.0007342655444517732, "learning_rate": 1.2482993197278912e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1101 }, { "completion_length": 157.35714721679688, "epoch": 0.06996825396825397, "grad_norm": 0.0010379923041909933, "kl": 0.0009106875513680279, "learning_rate": 1.2494331065759635e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1102 }, { "completion_length": 128.07144165039062, "epoch": 0.07003174603174603, "grad_norm": 0.0009348687599413097, "kl": 0.0007939223432913423, "learning_rate": 1.2505668934240364e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1103 }, { "completion_length": 160.21429443359375, "epoch": 0.0700952380952381, "grad_norm": 0.0007847267552278936, "kl": 0.0007525979890488088, "learning_rate": 1.251700680272109e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1104 }, { "completion_length": 159.92857360839844, "epoch": 0.07015873015873016, "grad_norm": 0.0007523219683207572, "kl": 0.0007216405356302857, "learning_rate": 1.2528344671201812e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1105 }, { "completion_length": 152.21429443359375, "epoch": 0.07022222222222223, "grad_norm": 0.0011083297431468964, "kl": 0.0009551982511766255, "learning_rate": 1.253968253968254e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1106 }, { "completion_length": 176.00001525878906, "epoch": 0.07028571428571428, "grad_norm": 0.0006358449463732541, "kl": 0.0006442029844038188, "learning_rate": 1.2551020408163266e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1107 }, { "completion_length": 202.07144165039062, "epoch": 0.07034920634920636, "grad_norm": 0.0006537506124004722, "kl": 0.0005617024144157767, "learning_rate": 1.256235827664399e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1108 }, { "completion_length": 153.57144165039062, "epoch": 0.07041269841269841, "grad_norm": 0.0007193377823568881, "kl": 0.000752509746234864, "learning_rate": 1.2573696145124714e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1109 }, { "completion_length": 175.07144165039062, "epoch": 0.07047619047619047, "grad_norm": 0.0009504043846391141, "kl": 0.0007715055253356695, "learning_rate": 1.2585034013605442e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1110 }, { "completion_length": 140.92857360839844, "epoch": 0.07053968253968254, "grad_norm": 0.0010242994176223874, "kl": 0.0008446957217529416, "learning_rate": 1.2596371882086168e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1111 }, { "completion_length": 151.21429443359375, "epoch": 0.0706031746031746, "grad_norm": 0.0011662855977192521, "kl": 0.0008423434337601066, "learning_rate": 1.260770975056689e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1112 }, { "completion_length": 191.6428680419922, "epoch": 0.07066666666666667, "grad_norm": 0.0005789525457657874, "kl": 0.0006778276874683797, "learning_rate": 1.261904761904762e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1113 }, { "completion_length": 154.92857360839844, "epoch": 0.07073015873015873, "grad_norm": 0.0009301155805587769, "kl": 0.00087608682224527, "learning_rate": 1.2630385487528345e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1114 }, { "completion_length": 167.5, "epoch": 0.0707936507936508, "grad_norm": 0.0007301860605366528, "kl": 0.0006903950707055628, "learning_rate": 1.264172335600907e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1115 }, { "completion_length": 158.07144165039062, "epoch": 0.07085714285714285, "grad_norm": 0.0006825228920206428, "kl": 0.0006726159481331706, "learning_rate": 1.2653061224489796e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1116 }, { "completion_length": 184.7857208251953, "epoch": 0.07092063492063493, "grad_norm": 0.0005943046417087317, "kl": 0.0007394511485472322, "learning_rate": 1.266439909297052e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1117 }, { "completion_length": 167.35714721679688, "epoch": 0.07098412698412698, "grad_norm": 0.0006953221745789051, "kl": 0.0006912249373272061, "learning_rate": 1.2675736961451247e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1118 }, { "completion_length": 191.35714721679688, "epoch": 0.07104761904761905, "grad_norm": 0.0005779605126008391, "kl": 0.0005958805559203029, "learning_rate": 1.2687074829931972e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1119 }, { "completion_length": 143.57144165039062, "epoch": 0.07111111111111111, "grad_norm": 0.001058965572156012, "kl": 0.0007431035046465695, "learning_rate": 1.2698412698412698e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1120 }, { "completion_length": 186.21429443359375, "epoch": 0.07117460317460317, "grad_norm": 0.0005436944193206728, "kl": 0.0006478240247815847, "learning_rate": 1.2709750566893423e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1121 }, { "completion_length": 158.57144165039062, "epoch": 0.07123809523809524, "grad_norm": 0.0012354038190096617, "kl": 0.001014613313600421, "learning_rate": 1.272108843537415e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1122 }, { "completion_length": 176.07144165039062, "epoch": 0.0713015873015873, "grad_norm": 0.0007837946759536862, "kl": 0.0007870375993661582, "learning_rate": 1.2732426303854874e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1123 }, { "completion_length": 199.21429443359375, "epoch": 0.07136507936507937, "grad_norm": 0.0006774302455596626, "kl": 0.0006761326803825796, "learning_rate": 1.27437641723356e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1124 }, { "completion_length": 184.2857208251953, "epoch": 0.07142857142857142, "grad_norm": 0.0005611940869130194, "kl": 0.0005864253616891801, "learning_rate": 1.2755102040816326e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1125 }, { "completion_length": 160.07144165039062, "epoch": 0.0714920634920635, "grad_norm": 0.0009303238475695252, "kl": 0.0006971515249460936, "learning_rate": 1.2766439909297054e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1126 }, { "completion_length": 187.50001525878906, "epoch": 0.07155555555555555, "grad_norm": 0.0007238035905174911, "kl": 0.0007560440571978688, "learning_rate": 1.2777777777777777e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1127 }, { "completion_length": 157.71429443359375, "epoch": 0.07161904761904762, "grad_norm": 0.0005439307424239814, "kl": 0.0007033731671981514, "learning_rate": 1.2789115646258502e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1128 }, { "completion_length": 152.71429443359375, "epoch": 0.07168253968253968, "grad_norm": 0.0007656253292225301, "kl": 0.0005883310805074871, "learning_rate": 1.2800453514739228e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1129 }, { "completion_length": 152.71429443359375, "epoch": 0.07174603174603175, "grad_norm": 0.000984462327323854, "kl": 0.0009393795044161379, "learning_rate": 1.2811791383219956e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1130 }, { "completion_length": 143.2857208251953, "epoch": 0.07180952380952381, "grad_norm": 0.0006366135203279555, "kl": 0.0006908026989549398, "learning_rate": 1.282312925170068e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1131 }, { "completion_length": 145.57144165039062, "epoch": 0.07187301587301587, "grad_norm": 0.0006848000921308994, "kl": 0.0005812086164951324, "learning_rate": 1.2834467120181404e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1132 }, { "completion_length": 153.42857360839844, "epoch": 0.07193650793650794, "grad_norm": 0.0008642384200356901, "kl": 0.0008103266591206193, "learning_rate": 1.2845804988662133e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1133 }, { "completion_length": 175.50001525878906, "epoch": 0.072, "grad_norm": 0.0007037208415567875, "kl": 0.0007716135587543249, "learning_rate": 1.2857142857142855e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1134 }, { "completion_length": 152.92857360839844, "epoch": 0.07206349206349207, "grad_norm": 0.0007377647561952472, "kl": 0.000675413990393281, "learning_rate": 1.286848072562358e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1135 }, { "completion_length": 152.92857360839844, "epoch": 0.07212698412698412, "grad_norm": 0.0006306071300059557, "kl": 0.0006622484070248902, "learning_rate": 1.287981859410431e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1136 }, { "completion_length": 171.21429443359375, "epoch": 0.0721904761904762, "grad_norm": 0.0008608013740740716, "kl": 0.0007109444704838097, "learning_rate": 1.2891156462585035e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1137 }, { "completion_length": 169.0, "epoch": 0.07225396825396825, "grad_norm": 0.000559222127776593, "kl": 0.0006047798087820411, "learning_rate": 1.2902494331065758e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1138 }, { "completion_length": 137.57144165039062, "epoch": 0.07231746031746032, "grad_norm": 0.0010766893392428756, "kl": 0.0009872540831565857, "learning_rate": 1.2913832199546483e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1139 }, { "completion_length": 154.92857360839844, "epoch": 0.07238095238095238, "grad_norm": 0.0006662481464445591, "kl": 0.0007187745650298893, "learning_rate": 1.2925170068027211e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1140 }, { "completion_length": 185.2857208251953, "epoch": 0.07244444444444445, "grad_norm": 0.0007384591735899448, "kl": 0.0008055881480686367, "learning_rate": 1.2936507936507937e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1141 }, { "completion_length": 182.57144165039062, "epoch": 0.07250793650793651, "grad_norm": 0.0005724017973989248, "kl": 0.0006363079301081598, "learning_rate": 1.294784580498866e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1142 }, { "completion_length": 164.92857360839844, "epoch": 0.07257142857142856, "grad_norm": 0.0009127547382377088, "kl": 0.0008699438185431063, "learning_rate": 1.2959183673469388e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1143 }, { "completion_length": 161.42857360839844, "epoch": 0.07263492063492064, "grad_norm": 0.0006978508899919689, "kl": 0.0006524896598421037, "learning_rate": 1.2970521541950114e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1144 }, { "completion_length": 201.00001525878906, "epoch": 0.07269841269841269, "grad_norm": 0.0005618591676466167, "kl": 0.0006471273954957724, "learning_rate": 1.2981859410430836e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1145 }, { "completion_length": 128.7857208251953, "epoch": 0.07276190476190476, "grad_norm": 0.000761411793064326, "kl": 0.0007268466288223863, "learning_rate": 1.2993197278911565e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1146 }, { "completion_length": 147.07144165039062, "epoch": 0.07282539682539682, "grad_norm": 1.0316693782806396, "kl": 0.0007783558103255928, "learning_rate": 1.300453514739229e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 1147 }, { "completion_length": 224.07144165039062, "epoch": 0.07288888888888889, "grad_norm": 0.0007536418852396309, "kl": 0.0006184297963045537, "learning_rate": 1.3015873015873016e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1148 }, { "completion_length": 165.42857360839844, "epoch": 0.07295238095238095, "grad_norm": 0.0007630626205354929, "kl": 0.0005976330721750855, "learning_rate": 1.3027210884353739e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1149 }, { "completion_length": 183.50001525878906, "epoch": 0.07301587301587302, "grad_norm": 0.0008598277345299721, "kl": 0.0006646478432230651, "learning_rate": 1.3038548752834467e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1150 }, { "completion_length": 119.50000762939453, "epoch": 0.07307936507936508, "grad_norm": 0.0011187787167727947, "kl": 0.0009048997890204191, "learning_rate": 1.3049886621315192e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1151 }, { "completion_length": 178.2857208251953, "epoch": 0.07314285714285715, "grad_norm": 0.0010219578398391604, "kl": 0.0010112565942108631, "learning_rate": 1.3061224489795918e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1152 }, { "completion_length": 143.71429443359375, "epoch": 0.0732063492063492, "grad_norm": 0.0013940976932644844, "kl": 0.0010519128991290927, "learning_rate": 1.3072562358276643e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1153 }, { "completion_length": 157.85714721679688, "epoch": 0.07326984126984128, "grad_norm": 0.0008930497569963336, "kl": 0.0008238631999120116, "learning_rate": 1.308390022675737e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1154 }, { "completion_length": 150.7857208251953, "epoch": 0.07333333333333333, "grad_norm": 0.0012553577544167638, "kl": 0.0009888872737064958, "learning_rate": 1.3095238095238095e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1155 }, { "completion_length": 149.92857360839844, "epoch": 0.07339682539682539, "grad_norm": 1.4321941137313843, "kl": 0.0010119047947227955, "learning_rate": 1.310657596371882e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 1156 }, { "completion_length": 157.92857360839844, "epoch": 0.07346031746031746, "grad_norm": 0.0014087515883147717, "kl": 0.001137194107286632, "learning_rate": 1.3117913832199546e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1157 }, { "completion_length": 161.35714721679688, "epoch": 0.07352380952380952, "grad_norm": 0.0009115771390497684, "kl": 0.000892497890163213, "learning_rate": 1.312925170068027e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1158 }, { "completion_length": 166.5, "epoch": 0.07358730158730159, "grad_norm": 0.0009990198304876685, "kl": 0.0009351047337986529, "learning_rate": 1.3140589569161e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1159 }, { "completion_length": 173.92857360839844, "epoch": 0.07365079365079365, "grad_norm": 0.0013268388574942946, "kl": 0.0012079833541065454, "learning_rate": 1.3151927437641722e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1160 }, { "completion_length": 167.0, "epoch": 0.07371428571428572, "grad_norm": 0.0013273702934384346, "kl": 0.0010521127842366695, "learning_rate": 1.3163265306122448e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1161 }, { "completion_length": 162.0, "epoch": 0.07377777777777778, "grad_norm": 0.0018591884290799499, "kl": 0.0014322169590741396, "learning_rate": 1.3174603174603173e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1162 }, { "completion_length": 180.7857208251953, "epoch": 0.07384126984126985, "grad_norm": 0.001153917401097715, "kl": 0.0011971828062087297, "learning_rate": 1.3185941043083902e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1163 }, { "completion_length": 217.57144165039062, "epoch": 0.0739047619047619, "grad_norm": 0.0009186172974295914, "kl": 0.00093979446683079, "learning_rate": 1.3197278911564624e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1164 }, { "completion_length": 199.00001525878906, "epoch": 0.07396825396825397, "grad_norm": 0.0012761034304276109, "kl": 0.0010410263203084469, "learning_rate": 1.320861678004535e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1165 }, { "completion_length": 198.1428680419922, "epoch": 0.07403174603174603, "grad_norm": 0.0011647710343822837, "kl": 0.0009586311061866581, "learning_rate": 1.3219954648526078e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1166 }, { "completion_length": 186.85714721679688, "epoch": 0.07409523809523809, "grad_norm": 0.0013368215877562761, "kl": 0.0012436792021617293, "learning_rate": 1.3231292517006804e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1167 }, { "completion_length": 184.00001525878906, "epoch": 0.07415873015873016, "grad_norm": 0.0009774649515748024, "kl": 0.0009340469259768724, "learning_rate": 1.3242630385487527e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1168 }, { "completion_length": 150.2857208251953, "epoch": 0.07422222222222222, "grad_norm": 0.0017375288298353553, "kl": 0.001381578273139894, "learning_rate": 1.3253968253968252e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1169 }, { "completion_length": 152.0, "epoch": 0.07428571428571429, "grad_norm": 0.0017197824781760573, "kl": 0.0013957696501165628, "learning_rate": 1.326530612244898e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1170 }, { "completion_length": 171.35714721679688, "epoch": 0.07434920634920635, "grad_norm": 0.0016073592705652118, "kl": 0.0013105899561196566, "learning_rate": 1.3276643990929703e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1171 }, { "completion_length": 160.57144165039062, "epoch": 0.07441269841269842, "grad_norm": 0.0017101708799600601, "kl": 0.0015041091246530414, "learning_rate": 1.328798185941043e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1172 }, { "completion_length": 160.5, "epoch": 0.07447619047619047, "grad_norm": 0.002052864758297801, "kl": 0.0018791930051520467, "learning_rate": 1.3299319727891157e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1173 }, { "completion_length": 182.50001525878906, "epoch": 0.07453968253968254, "grad_norm": 0.7208836674690247, "kl": 0.0013685591984540224, "learning_rate": 1.3310657596371883e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 1174 }, { "completion_length": 140.07144165039062, "epoch": 0.0746031746031746, "grad_norm": 0.0016547965351492167, "kl": 0.0013827575603500009, "learning_rate": 1.3321995464852605e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1175 }, { "completion_length": 176.85714721679688, "epoch": 0.07466666666666667, "grad_norm": 0.0015767957083880901, "kl": 0.001368794240988791, "learning_rate": 1.3333333333333334e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1176 }, { "completion_length": 189.00001525878906, "epoch": 0.07473015873015873, "grad_norm": 0.0011864573461934924, "kl": 0.0011170837096869946, "learning_rate": 1.334467120181406e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1177 }, { "completion_length": 179.00001525878906, "epoch": 0.07479365079365079, "grad_norm": 0.0016951297875493765, "kl": 0.00138251599855721, "learning_rate": 1.3356009070294785e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1178 }, { "completion_length": 141.6428680419922, "epoch": 0.07485714285714286, "grad_norm": 0.0018862319411709905, "kl": 0.001590462983585894, "learning_rate": 1.3367346938775508e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1179 }, { "completion_length": 168.35714721679688, "epoch": 0.07492063492063492, "grad_norm": 0.0018542027100920677, "kl": 0.0014592062216252089, "learning_rate": 1.3378684807256236e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1180 }, { "completion_length": 175.7857208251953, "epoch": 0.07498412698412699, "grad_norm": 0.0018962300382554531, "kl": 0.0016219945391640067, "learning_rate": 1.3390022675736961e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1181 }, { "completion_length": 131.2857208251953, "epoch": 0.07504761904761904, "grad_norm": 0.0015862125437706709, "kl": 0.0014626089250668883, "learning_rate": 1.3401360544217684e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1182 }, { "completion_length": 156.6428680419922, "epoch": 0.07511111111111111, "grad_norm": 0.0018721496453508735, "kl": 0.0016703899018466473, "learning_rate": 1.3412698412698412e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1183 }, { "completion_length": 136.21429443359375, "epoch": 0.07517460317460317, "grad_norm": 0.002488375874236226, "kl": 0.0019223004346713424, "learning_rate": 1.3424036281179138e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1184 }, { "completion_length": 166.6428680419922, "epoch": 0.07523809523809524, "grad_norm": 0.0019208508310839534, "kl": 0.001498186495155096, "learning_rate": 1.3435374149659864e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1185 }, { "completion_length": 152.85714721679688, "epoch": 0.0753015873015873, "grad_norm": 0.0016873241402208805, "kl": 0.0014955566730350256, "learning_rate": 1.344671201814059e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1186 }, { "completion_length": 192.71429443359375, "epoch": 0.07536507936507937, "grad_norm": 0.0012145410291850567, "kl": 0.0012795281363651156, "learning_rate": 1.3458049886621315e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1187 }, { "completion_length": 186.42857360839844, "epoch": 0.07542857142857143, "grad_norm": 0.0018702769884839654, "kl": 0.001711763790808618, "learning_rate": 1.346938775510204e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1188 }, { "completion_length": 169.1428680419922, "epoch": 0.07549206349206349, "grad_norm": 0.0015684609534218907, "kl": 0.0013180351816117764, "learning_rate": 1.3480725623582768e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1189 }, { "completion_length": 168.0, "epoch": 0.07555555555555556, "grad_norm": 0.0019936002790927887, "kl": 0.0015464817406609654, "learning_rate": 1.349206349206349e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1190 }, { "completion_length": 208.1428680419922, "epoch": 0.07561904761904761, "grad_norm": 0.0017536937957629561, "kl": 0.0015034148236736655, "learning_rate": 1.3503401360544217e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1191 }, { "completion_length": 194.2857208251953, "epoch": 0.07568253968253968, "grad_norm": 0.0012212368892505765, "kl": 0.0010414044372737408, "learning_rate": 1.3514739229024942e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1192 }, { "completion_length": 160.5, "epoch": 0.07574603174603174, "grad_norm": 0.001625911914743483, "kl": 0.0014388530980795622, "learning_rate": 1.3526077097505668e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1193 }, { "completion_length": 192.7857208251953, "epoch": 0.07580952380952381, "grad_norm": 0.0017352669965475798, "kl": 0.0015015871031209826, "learning_rate": 1.3537414965986393e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1194 }, { "completion_length": 176.07144165039062, "epoch": 0.07587301587301587, "grad_norm": 0.0010885734809562564, "kl": 0.0011473949998617172, "learning_rate": 1.354875283446712e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1195 }, { "completion_length": 192.50001525878906, "epoch": 0.07593650793650794, "grad_norm": 0.00106631254311651, "kl": 0.00121659180149436, "learning_rate": 1.3560090702947847e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1196 }, { "completion_length": 173.42857360839844, "epoch": 0.076, "grad_norm": 0.0020286862272769213, "kl": 0.0016889231046661735, "learning_rate": 1.357142857142857e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1197 }, { "completion_length": 184.1428680419922, "epoch": 0.07606349206349207, "grad_norm": 0.0016126657137647271, "kl": 0.0014235194539651275, "learning_rate": 1.3582766439909296e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1198 }, { "completion_length": 153.42857360839844, "epoch": 0.07612698412698413, "grad_norm": 0.0021452398505061865, "kl": 0.0015566295478492975, "learning_rate": 1.3594104308390024e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1199 }, { "completion_length": 156.0, "epoch": 0.0761904761904762, "grad_norm": 0.0014362868387252092, "kl": 0.0011337895411998034, "learning_rate": 1.360544217687075e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1200 }, { "completion_length": 180.07144165039062, "epoch": 0.07625396825396825, "grad_norm": 0.001430632546544075, "kl": 0.0012441471917554736, "learning_rate": 1.3616780045351472e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1201 }, { "completion_length": 169.35714721679688, "epoch": 0.07631746031746031, "grad_norm": 0.0015242164954543114, "kl": 0.001364232157357037, "learning_rate": 1.3628117913832198e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1202 }, { "completion_length": 145.35714721679688, "epoch": 0.07638095238095238, "grad_norm": 0.002597275422886014, "kl": 0.0018812112975865602, "learning_rate": 1.3639455782312926e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1203 }, { "completion_length": 141.42857360839844, "epoch": 0.07644444444444444, "grad_norm": 0.0023432355374097824, "kl": 0.001737982383929193, "learning_rate": 1.365079365079365e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1204 }, { "completion_length": 172.85714721679688, "epoch": 0.07650793650793651, "grad_norm": 1.4537967443466187, "kl": 0.0013575541088357568, "learning_rate": 1.3662131519274374e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 1205 }, { "completion_length": 132.5, "epoch": 0.07657142857142857, "grad_norm": 0.001708437455818057, "kl": 0.0014149268390610814, "learning_rate": 1.3673469387755103e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1206 }, { "completion_length": 154.35714721679688, "epoch": 0.07663492063492064, "grad_norm": 0.0022198292426764965, "kl": 0.002117465017363429, "learning_rate": 1.3684807256235828e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1207 }, { "completion_length": 188.2857208251953, "epoch": 0.0766984126984127, "grad_norm": 0.0014829955762252212, "kl": 0.0013734084786847234, "learning_rate": 1.369614512471655e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1208 }, { "completion_length": 192.35714721679688, "epoch": 0.07676190476190477, "grad_norm": 0.0019481276394799352, "kl": 0.0017040189122781157, "learning_rate": 1.370748299319728e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1209 }, { "completion_length": 205.21429443359375, "epoch": 0.07682539682539682, "grad_norm": 0.0015688728308305144, "kl": 0.0014164837775751948, "learning_rate": 1.3718820861678005e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1210 }, { "completion_length": 134.6428680419922, "epoch": 0.0768888888888889, "grad_norm": 0.0022310905624181032, "kl": 0.0021107704378664494, "learning_rate": 1.373015873015873e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1211 }, { "completion_length": 166.5, "epoch": 0.07695238095238095, "grad_norm": 0.002511344151571393, "kl": 0.002253591548651457, "learning_rate": 1.3741496598639453e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1212 }, { "completion_length": 203.42857360839844, "epoch": 0.07701587301587301, "grad_norm": 0.0016034505097195506, "kl": 0.0015081481542438269, "learning_rate": 1.3752834467120181e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1213 }, { "completion_length": 135.57144165039062, "epoch": 0.07707936507936508, "grad_norm": 0.002612999640405178, "kl": 0.0023518402595072985, "learning_rate": 1.3764172335600907e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1214 }, { "completion_length": 161.35714721679688, "epoch": 0.07714285714285714, "grad_norm": 0.0018994967686012387, "kl": 0.0017291305121034384, "learning_rate": 1.377551020408163e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1215 }, { "completion_length": 163.07144165039062, "epoch": 0.07720634920634921, "grad_norm": 0.0020952799823135138, "kl": 0.0019557818304747343, "learning_rate": 1.3786848072562358e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1216 }, { "completion_length": 145.2857208251953, "epoch": 0.07726984126984127, "grad_norm": 0.002606559544801712, "kl": 0.0019057095050811768, "learning_rate": 1.3798185941043084e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1217 }, { "completion_length": 182.07144165039062, "epoch": 0.07733333333333334, "grad_norm": 0.002182832919061184, "kl": 0.0019581865053623915, "learning_rate": 1.380952380952381e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1218 }, { "completion_length": 161.1428680419922, "epoch": 0.0773968253968254, "grad_norm": 0.0019033316057175398, "kl": 0.0017595146782696247, "learning_rate": 1.3820861678004535e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1219 }, { "completion_length": 189.42857360839844, "epoch": 0.07746031746031747, "grad_norm": 0.002461986616253853, "kl": 0.002106923144310713, "learning_rate": 1.383219954648526e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1220 }, { "completion_length": 144.42857360839844, "epoch": 0.07752380952380952, "grad_norm": 0.002379394369199872, "kl": 0.0020432230085134506, "learning_rate": 1.3843537414965986e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1221 }, { "completion_length": 163.85714721679688, "epoch": 0.0775873015873016, "grad_norm": 0.002160919364541769, "kl": 0.002046026987954974, "learning_rate": 1.3854875283446711e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1222 }, { "completion_length": 155.57144165039062, "epoch": 0.07765079365079365, "grad_norm": 0.0017742266645655036, "kl": 0.0016823699697852135, "learning_rate": 1.3866213151927437e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1223 }, { "completion_length": 158.2857208251953, "epoch": 0.07771428571428571, "grad_norm": 0.0018665234092622995, "kl": 0.0016275885282084346, "learning_rate": 1.3877551020408162e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1224 }, { "completion_length": 144.7857208251953, "epoch": 0.07777777777777778, "grad_norm": 0.0026648654602468014, "kl": 0.0021471737418323755, "learning_rate": 1.3888888888888888e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1225 }, { "completion_length": 171.35714721679688, "epoch": 0.07784126984126984, "grad_norm": 0.001834048773162067, "kl": 0.0017129871994256973, "learning_rate": 1.3900226757369616e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1226 }, { "completion_length": 218.71429443359375, "epoch": 0.07790476190476191, "grad_norm": 0.0013683310244232416, "kl": 0.001448984956368804, "learning_rate": 1.391156462585034e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1227 }, { "completion_length": 132.42857360839844, "epoch": 0.07796825396825396, "grad_norm": 0.0028547460678964853, "kl": 0.0022388198412954807, "learning_rate": 1.3922902494331065e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1228 }, { "completion_length": 170.71429443359375, "epoch": 0.07803174603174604, "grad_norm": 0.001877100788988173, "kl": 0.0017148102633655071, "learning_rate": 1.3934240362811793e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1229 }, { "completion_length": 149.71429443359375, "epoch": 0.07809523809523809, "grad_norm": 0.0031371889635920525, "kl": 0.002819862449541688, "learning_rate": 1.3945578231292516e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1230 }, { "completion_length": 179.1428680419922, "epoch": 0.07815873015873016, "grad_norm": 0.0032121562398970127, "kl": 0.002780140144750476, "learning_rate": 1.395691609977324e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1231 }, { "completion_length": 168.1428680419922, "epoch": 0.07822222222222222, "grad_norm": 0.0024875805247575045, "kl": 0.00228215497918427, "learning_rate": 1.3968253968253967e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1232 }, { "completion_length": 200.71429443359375, "epoch": 0.07828571428571429, "grad_norm": 0.0022195118945091963, "kl": 0.0018423498841002584, "learning_rate": 1.3979591836734695e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1233 }, { "completion_length": 171.71429443359375, "epoch": 0.07834920634920635, "grad_norm": 0.001762127736583352, "kl": 0.0016530543798580766, "learning_rate": 1.3990929705215418e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1234 }, { "completion_length": 157.35714721679688, "epoch": 0.0784126984126984, "grad_norm": 0.0014418161008507013, "kl": 0.00140611722599715, "learning_rate": 1.4002267573696143e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1235 }, { "completion_length": 180.92857360839844, "epoch": 0.07847619047619048, "grad_norm": 0.0021798345260322094, "kl": 0.0020488565787672997, "learning_rate": 1.4013605442176872e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1236 }, { "completion_length": 139.6428680419922, "epoch": 0.07853968253968253, "grad_norm": 0.0028626497369259596, "kl": 0.0025435397401452065, "learning_rate": 1.4024943310657597e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1237 }, { "completion_length": 141.6428680419922, "epoch": 0.0786031746031746, "grad_norm": 0.0029154913499951363, "kl": 0.002640926046296954, "learning_rate": 1.403628117913832e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1238 }, { "completion_length": 182.00001525878906, "epoch": 0.07866666666666666, "grad_norm": 0.0020405242685228586, "kl": 0.0021903449669480324, "learning_rate": 1.4047619047619048e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1239 }, { "completion_length": 185.00001525878906, "epoch": 0.07873015873015873, "grad_norm": 0.001506035216152668, "kl": 0.0015337547520175576, "learning_rate": 1.4058956916099774e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1240 }, { "completion_length": 176.07144165039062, "epoch": 0.07879365079365079, "grad_norm": 0.0021463404409587383, "kl": 0.001962571870535612, "learning_rate": 1.4070294784580497e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1241 }, { "completion_length": 220.35714721679688, "epoch": 0.07885714285714286, "grad_norm": 0.0011622741585597396, "kl": 0.0012741514947265387, "learning_rate": 1.4081632653061222e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1242 }, { "completion_length": 151.85714721679688, "epoch": 0.07892063492063492, "grad_norm": 0.002327889436855912, "kl": 0.0019718045368790627, "learning_rate": 1.409297052154195e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1243 }, { "completion_length": 152.0, "epoch": 0.07898412698412699, "grad_norm": 0.002758442424237728, "kl": 0.0025971480645239353, "learning_rate": 1.4104308390022676e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1244 }, { "completion_length": 168.7857208251953, "epoch": 0.07904761904761905, "grad_norm": 0.0029529943130910397, "kl": 0.0024682683870196342, "learning_rate": 1.41156462585034e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1245 }, { "completion_length": 176.50001525878906, "epoch": 0.0791111111111111, "grad_norm": 0.0019825247582048178, "kl": 0.0016868787351995707, "learning_rate": 1.4126984126984127e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1246 }, { "completion_length": 165.5, "epoch": 0.07917460317460318, "grad_norm": 0.0023255886044353247, "kl": 0.0021394153591245413, "learning_rate": 1.4138321995464853e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1247 }, { "completion_length": 133.0, "epoch": 0.07923809523809523, "grad_norm": 0.0028106370009481907, "kl": 0.0022884742356836796, "learning_rate": 1.4149659863945578e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1248 }, { "completion_length": 145.21429443359375, "epoch": 0.0793015873015873, "grad_norm": 1.5766894817352295, "kl": 0.0025912136770784855, "learning_rate": 1.4160997732426304e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 1249 }, { "completion_length": 182.57144165039062, "epoch": 0.07936507936507936, "grad_norm": 1.3952945470809937, "kl": 0.00131905113812536, "learning_rate": 1.417233560090703e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 1250 }, { "completion_length": 174.57144165039062, "epoch": 0.07942857142857143, "grad_norm": 0.0015316365752369165, "kl": 0.001462371670641005, "learning_rate": 1.4183673469387755e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1251 }, { "completion_length": 178.6428680419922, "epoch": 0.07949206349206349, "grad_norm": 0.0023919427767395973, "kl": 0.002055322052910924, "learning_rate": 1.4195011337868478e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1252 }, { "completion_length": 174.71429443359375, "epoch": 0.07955555555555556, "grad_norm": 0.0026041301898658276, "kl": 0.0023545788135379553, "learning_rate": 1.4206349206349206e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1253 }, { "completion_length": 141.21429443359375, "epoch": 0.07961904761904762, "grad_norm": 0.002850173506885767, "kl": 0.002526327734813094, "learning_rate": 1.4217687074829931e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1254 }, { "completion_length": 150.5, "epoch": 0.07968253968253969, "grad_norm": 0.0032121865078806877, "kl": 0.002836289582774043, "learning_rate": 1.4229024943310657e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1255 }, { "completion_length": 200.85714721679688, "epoch": 0.07974603174603175, "grad_norm": 1.089051365852356, "kl": 0.0017295951256528497, "learning_rate": 1.4240362811791383e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 1256 }, { "completion_length": 159.7857208251953, "epoch": 0.07980952380952382, "grad_norm": 0.002337125828489661, "kl": 0.00195467215962708, "learning_rate": 1.4251700680272108e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1257 }, { "completion_length": 159.2857208251953, "epoch": 0.07987301587301587, "grad_norm": 0.0023588426411151886, "kl": 0.002145505277439952, "learning_rate": 1.4263038548752834e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1258 }, { "completion_length": 164.7857208251953, "epoch": 0.07993650793650793, "grad_norm": 0.0020617397967725992, "kl": 0.0019388566724956036, "learning_rate": 1.4274376417233562e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1259 }, { "completion_length": 174.57144165039062, "epoch": 0.08, "grad_norm": 0.002054984215646982, "kl": 0.002012579469010234, "learning_rate": 1.4285714285714285e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1260 }, { "completion_length": 144.2857208251953, "epoch": 0.08006349206349206, "grad_norm": 0.0022653392516076565, "kl": 0.002027074573561549, "learning_rate": 1.429705215419501e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1261 }, { "completion_length": 156.5, "epoch": 0.08012698412698413, "grad_norm": 0.003128249663859606, "kl": 0.0025603545363992453, "learning_rate": 1.4308390022675736e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1262 }, { "completion_length": 159.85714721679688, "epoch": 0.08019047619047619, "grad_norm": 0.8838605880737305, "kl": 0.0024932646192610264, "learning_rate": 1.4319727891156461e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 1263 }, { "completion_length": 173.57144165039062, "epoch": 0.08025396825396826, "grad_norm": 0.0026647034101188183, "kl": 0.0025808352511376143, "learning_rate": 1.4331065759637187e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1264 }, { "completion_length": 162.85714721679688, "epoch": 0.08031746031746032, "grad_norm": 0.002821347676217556, "kl": 0.0026514839846640825, "learning_rate": 1.4342403628117912e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1265 }, { "completion_length": 166.35714721679688, "epoch": 0.08038095238095239, "grad_norm": 0.0024925521574914455, "kl": 0.0025295920204371214, "learning_rate": 1.435374149659864e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1266 }, { "completion_length": 185.85714721679688, "epoch": 0.08044444444444444, "grad_norm": 0.002397907432168722, "kl": 0.0025535686872899532, "learning_rate": 1.4365079365079364e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1267 }, { "completion_length": 144.85714721679688, "epoch": 0.08050793650793651, "grad_norm": 0.0038323686458170414, "kl": 0.003721746150404215, "learning_rate": 1.437641723356009e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1268 }, { "completion_length": 154.0, "epoch": 0.08057142857142857, "grad_norm": 0.0033304858952760696, "kl": 0.002952532609924674, "learning_rate": 1.4387755102040817e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1269 }, { "completion_length": 148.2857208251953, "epoch": 0.08063492063492063, "grad_norm": 0.004055874887853861, "kl": 0.0040828511118888855, "learning_rate": 1.4399092970521543e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1270 }, { "completion_length": 162.35714721679688, "epoch": 0.0806984126984127, "grad_norm": 0.004121606703847647, "kl": 0.004004074726253748, "learning_rate": 1.4410430839002266e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1271 }, { "completion_length": 159.57144165039062, "epoch": 0.08076190476190476, "grad_norm": 0.003335754619911313, "kl": 0.0035484458785504103, "learning_rate": 1.442176870748299e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1272 }, { "completion_length": 158.92857360839844, "epoch": 0.08082539682539683, "grad_norm": 0.002685128478333354, "kl": 0.0028891509864479303, "learning_rate": 1.443310657596372e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1273 }, { "completion_length": 128.07144165039062, "epoch": 0.08088888888888889, "grad_norm": 0.003860997734591365, "kl": 0.0041619255207479, "learning_rate": 1.4444444444444442e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1274 }, { "completion_length": 152.0, "epoch": 0.08095238095238096, "grad_norm": 0.003959013614803553, "kl": 0.004262364935129881, "learning_rate": 1.4455782312925168e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1275 }, { "completion_length": 180.50001525878906, "epoch": 0.08101587301587301, "grad_norm": 0.0026995250955224037, "kl": 0.0030237999744713306, "learning_rate": 1.4467120181405896e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1276 }, { "completion_length": 160.6428680419922, "epoch": 0.08107936507936508, "grad_norm": 0.005232285242527723, "kl": 0.005247342400252819, "learning_rate": 1.4478458049886622e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1277 }, { "completion_length": 155.92857360839844, "epoch": 0.08114285714285714, "grad_norm": 0.002792042912915349, "kl": 0.0031825017649680376, "learning_rate": 1.4489795918367345e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1278 }, { "completion_length": 150.85714721679688, "epoch": 0.08120634920634921, "grad_norm": 0.0055030714720487595, "kl": 0.005446546245366335, "learning_rate": 1.4501133786848073e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1279 }, { "completion_length": 171.21429443359375, "epoch": 0.08126984126984127, "grad_norm": 0.004012655932456255, "kl": 0.0049400897696614265, "learning_rate": 1.4512471655328798e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1280 }, { "completion_length": 161.57144165039062, "epoch": 0.08133333333333333, "grad_norm": 0.0046536000445485115, "kl": 0.0051780566573143005, "learning_rate": 1.4523809523809524e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1281 }, { "completion_length": 143.42857360839844, "epoch": 0.0813968253968254, "grad_norm": 0.004578145686537027, "kl": 0.005150299519300461, "learning_rate": 1.4535147392290247e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1282 }, { "completion_length": 160.92857360839844, "epoch": 0.08146031746031746, "grad_norm": 0.004682878497987986, "kl": 0.005053314846009016, "learning_rate": 1.4546485260770975e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1283 }, { "completion_length": 155.6428680419922, "epoch": 0.08152380952380953, "grad_norm": 0.005187938455492258, "kl": 0.005865135230123997, "learning_rate": 1.45578231292517e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1284 }, { "completion_length": 141.7857208251953, "epoch": 0.08158730158730158, "grad_norm": 0.004797362722456455, "kl": 0.005486887414008379, "learning_rate": 1.4569160997732426e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1285 }, { "completion_length": 163.85714721679688, "epoch": 0.08165079365079365, "grad_norm": 0.0031859788578003645, "kl": 0.003917055204510689, "learning_rate": 1.4580498866213152e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1286 }, { "completion_length": 209.85714721679688, "epoch": 0.08171428571428571, "grad_norm": 0.0027886054012924433, "kl": 0.003369513899087906, "learning_rate": 1.4591836734693877e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1287 }, { "completion_length": 153.1428680419922, "epoch": 0.08177777777777778, "grad_norm": 0.004031719174236059, "kl": 0.004777011927217245, "learning_rate": 1.4603174603174603e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1288 }, { "completion_length": 169.5, "epoch": 0.08184126984126984, "grad_norm": 1.2455193996429443, "kl": 0.0032147446181625128, "learning_rate": 1.4614512471655328e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 1289 }, { "completion_length": 163.07144165039062, "epoch": 0.08190476190476191, "grad_norm": 0.002554691396653652, "kl": 0.0032497995998710394, "learning_rate": 1.4625850340136054e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1290 }, { "completion_length": 173.1428680419922, "epoch": 0.08196825396825397, "grad_norm": 0.0037178429774940014, "kl": 0.003857980016618967, "learning_rate": 1.463718820861678e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1291 }, { "completion_length": 142.6428680419922, "epoch": 0.08203174603174603, "grad_norm": 0.005579093471169472, "kl": 0.005927646532654762, "learning_rate": 1.4648526077097505e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1292 }, { "completion_length": 161.57144165039062, "epoch": 0.0820952380952381, "grad_norm": 0.003492776770144701, "kl": 0.003941970411688089, "learning_rate": 1.465986394557823e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1293 }, { "completion_length": 168.35714721679688, "epoch": 0.08215873015873015, "grad_norm": 0.0030365295242518187, "kl": 0.0038038536440581083, "learning_rate": 1.4671201814058956e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1294 }, { "completion_length": 160.92857360839844, "epoch": 0.08222222222222222, "grad_norm": 0.004001027438789606, "kl": 0.004589388612657785, "learning_rate": 1.4682539682539681e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1295 }, { "completion_length": 177.1428680419922, "epoch": 0.08228571428571428, "grad_norm": 0.0027600075118243694, "kl": 0.0035939831286668777, "learning_rate": 1.469387755102041e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1296 }, { "completion_length": 130.7857208251953, "epoch": 0.08234920634920635, "grad_norm": 0.004289177246391773, "kl": 0.0047781215980648994, "learning_rate": 1.4705215419501133e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1297 }, { "completion_length": 199.1428680419922, "epoch": 0.08241269841269841, "grad_norm": 0.002109657507389784, "kl": 0.0026496497448533773, "learning_rate": 1.4716553287981858e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1298 }, { "completion_length": 170.57144165039062, "epoch": 0.08247619047619048, "grad_norm": 0.00271030911244452, "kl": 0.00335623137652874, "learning_rate": 1.4727891156462586e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1299 }, { "completion_length": 173.85714721679688, "epoch": 0.08253968253968254, "grad_norm": 0.002864036476239562, "kl": 0.0032706623896956444, "learning_rate": 1.473922902494331e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1300 }, { "completion_length": 149.07144165039062, "epoch": 0.08260317460317461, "grad_norm": 0.0049957879818975925, "kl": 0.005381946451961994, "learning_rate": 1.4750566893424035e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1301 }, { "completion_length": 149.1428680419922, "epoch": 0.08266666666666667, "grad_norm": 0.0033759635407477617, "kl": 0.0032548883464187384, "learning_rate": 1.476190476190476e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1302 }, { "completion_length": 173.1428680419922, "epoch": 0.08273015873015872, "grad_norm": 0.0023674950934946537, "kl": 0.002552681602537632, "learning_rate": 1.4773242630385488e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1303 }, { "completion_length": 163.85714721679688, "epoch": 0.0827936507936508, "grad_norm": 0.0028086539823561907, "kl": 0.0031666166614741087, "learning_rate": 1.478458049886621e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1304 }, { "completion_length": 191.6428680419922, "epoch": 0.08285714285714285, "grad_norm": 0.002850054297596216, "kl": 0.003502662293612957, "learning_rate": 1.4795918367346937e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1305 }, { "completion_length": 164.07144165039062, "epoch": 0.08292063492063492, "grad_norm": 0.0034111831337213516, "kl": 0.003426468465477228, "learning_rate": 1.4807256235827665e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1306 }, { "completion_length": 143.71429443359375, "epoch": 0.08298412698412698, "grad_norm": 0.0041046529076993465, "kl": 0.004802500829100609, "learning_rate": 1.481859410430839e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1307 }, { "completion_length": 141.1428680419922, "epoch": 0.08304761904761905, "grad_norm": 0.004728091415017843, "kl": 0.00472218357026577, "learning_rate": 1.4829931972789113e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1308 }, { "completion_length": 161.6428680419922, "epoch": 0.08311111111111111, "grad_norm": 0.0032537442166358232, "kl": 0.003959552384912968, "learning_rate": 1.4841269841269842e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1309 }, { "completion_length": 148.7857208251953, "epoch": 0.08317460317460318, "grad_norm": 0.0039657168090343475, "kl": 0.004669311922043562, "learning_rate": 1.4852607709750567e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1310 }, { "completion_length": 176.50001525878906, "epoch": 0.08323809523809524, "grad_norm": 0.00318721542134881, "kl": 0.0036053734365850687, "learning_rate": 1.486394557823129e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1311 }, { "completion_length": 163.92857360839844, "epoch": 0.08330158730158731, "grad_norm": 0.0026006309781223536, "kl": 0.0032337787561118603, "learning_rate": 1.4875283446712016e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1312 }, { "completion_length": 160.2857208251953, "epoch": 0.08336507936507936, "grad_norm": 1.2602940797805786, "kl": 0.003223133273422718, "learning_rate": 1.4886621315192744e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 1313 }, { "completion_length": 156.21429443359375, "epoch": 0.08342857142857144, "grad_norm": 0.0027249963022768497, "kl": 0.003476721467450261, "learning_rate": 1.489795918367347e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1314 }, { "completion_length": 185.42857360839844, "epoch": 0.08349206349206349, "grad_norm": 0.003837903728708625, "kl": 0.004050922114402056, "learning_rate": 1.4909297052154192e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1315 }, { "completion_length": 149.7857208251953, "epoch": 0.08355555555555555, "grad_norm": 0.003127955598756671, "kl": 0.003382619470357895, "learning_rate": 1.492063492063492e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1316 }, { "completion_length": 153.92857360839844, "epoch": 0.08361904761904762, "grad_norm": 0.003007710911333561, "kl": 0.0038903546519577503, "learning_rate": 1.4931972789115646e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1317 }, { "completion_length": 149.42857360839844, "epoch": 0.08368253968253968, "grad_norm": 0.0039181094616651535, "kl": 0.005019096657633781, "learning_rate": 1.4943310657596372e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1318 }, { "completion_length": 127.28572082519531, "epoch": 0.08374603174603175, "grad_norm": 0.003286986844614148, "kl": 0.0037087332457304, "learning_rate": 1.4954648526077097e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1319 }, { "completion_length": 171.57144165039062, "epoch": 0.0838095238095238, "grad_norm": 0.0033111870288848877, "kl": 0.0037912309635430574, "learning_rate": 1.4965986394557823e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1320 }, { "completion_length": 144.7857208251953, "epoch": 0.08387301587301588, "grad_norm": 0.004630254115909338, "kl": 0.0056769088841974735, "learning_rate": 1.4977324263038548e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1321 }, { "completion_length": 171.21429443359375, "epoch": 0.08393650793650793, "grad_norm": 0.003140308428555727, "kl": 0.0045262607745826244, "learning_rate": 1.498866213151927e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1322 }, { "completion_length": 164.85714721679688, "epoch": 0.084, "grad_norm": 0.004855109378695488, "kl": 0.005938936490565538, "learning_rate": 1.5e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1323 }, { "completion_length": 125.14286041259766, "epoch": 0.08406349206349206, "grad_norm": 0.005227997433394194, "kl": 0.006183063145726919, "learning_rate": 1.5011337868480725e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1324 }, { "completion_length": 182.71429443359375, "epoch": 0.08412698412698413, "grad_norm": 0.003514945739880204, "kl": 0.004745609126985073, "learning_rate": 1.502267573696145e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1325 }, { "completion_length": 199.2857208251953, "epoch": 0.08419047619047619, "grad_norm": 0.00339583121240139, "kl": 0.003986440133303404, "learning_rate": 1.5034013605442176e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1326 }, { "completion_length": 187.71429443359375, "epoch": 0.08425396825396825, "grad_norm": 0.0030780357774347067, "kl": 0.004128722939640284, "learning_rate": 1.5045351473922902e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1327 }, { "completion_length": 182.21429443359375, "epoch": 0.08431746031746032, "grad_norm": 0.0031883439514786005, "kl": 0.0047297063283622265, "learning_rate": 1.5056689342403627e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1328 }, { "completion_length": 149.92857360839844, "epoch": 0.08438095238095238, "grad_norm": 0.0039834799244999886, "kl": 0.005391201004385948, "learning_rate": 1.5068027210884355e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1329 }, { "completion_length": 161.21429443359375, "epoch": 0.08444444444444445, "grad_norm": 0.004584221635013819, "kl": 0.005807441659271717, "learning_rate": 1.5079365079365078e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1330 }, { "completion_length": 179.50001525878906, "epoch": 0.0845079365079365, "grad_norm": 0.003308327402919531, "kl": 0.004129977431148291, "learning_rate": 1.5090702947845804e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1331 }, { "completion_length": 167.35714721679688, "epoch": 0.08457142857142858, "grad_norm": 0.0034258330706506968, "kl": 0.0054493495263159275, "learning_rate": 1.5102040816326532e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1332 }, { "completion_length": 181.07144165039062, "epoch": 0.08463492063492063, "grad_norm": 0.0026541133411228657, "kl": 0.0037083900533616543, "learning_rate": 1.5113378684807255e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1333 }, { "completion_length": 165.85714721679688, "epoch": 0.0846984126984127, "grad_norm": 0.003834639210253954, "kl": 0.005420004948973656, "learning_rate": 1.512471655328798e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1334 }, { "completion_length": 169.1428680419922, "epoch": 0.08476190476190476, "grad_norm": 0.005654396489262581, "kl": 0.0060384622775018215, "learning_rate": 1.5136054421768706e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1335 }, { "completion_length": 228.07144165039062, "epoch": 0.08482539682539683, "grad_norm": 0.0036365753039717674, "kl": 0.004459656309336424, "learning_rate": 1.5147392290249434e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1336 }, { "completion_length": 179.85714721679688, "epoch": 0.08488888888888889, "grad_norm": 0.004085058346390724, "kl": 0.005906356498599052, "learning_rate": 1.5158730158730157e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1337 }, { "completion_length": 183.57144165039062, "epoch": 0.08495238095238095, "grad_norm": 0.0031171119771897793, "kl": 0.00417553074657917, "learning_rate": 1.5170068027210882e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1338 }, { "completion_length": 161.85714721679688, "epoch": 0.08501587301587302, "grad_norm": 0.00433789286762476, "kl": 0.006017266772687435, "learning_rate": 1.518140589569161e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1339 }, { "completion_length": 138.07144165039062, "epoch": 0.08507936507936507, "grad_norm": 1.3430851697921753, "kl": 0.005785152781754732, "learning_rate": 1.5192743764172336e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 1340 }, { "completion_length": 132.1428680419922, "epoch": 0.08514285714285715, "grad_norm": 0.004487768281251192, "kl": 0.006772513035684824, "learning_rate": 1.520408163265306e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1341 }, { "completion_length": 128.6428680419922, "epoch": 0.0852063492063492, "grad_norm": 0.005029665771871805, "kl": 0.006223203148692846, "learning_rate": 1.5215419501133787e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1342 }, { "completion_length": 146.07144165039062, "epoch": 0.08526984126984127, "grad_norm": 0.0047192140482366085, "kl": 0.0060687377117574215, "learning_rate": 1.5226757369614513e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1343 }, { "completion_length": 189.21429443359375, "epoch": 0.08533333333333333, "grad_norm": 0.00338045135140419, "kl": 0.004745602607727051, "learning_rate": 1.5238095238095238e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1344 }, { "completion_length": 147.5, "epoch": 0.0853968253968254, "grad_norm": 0.004647175315767527, "kl": 0.006856995169073343, "learning_rate": 1.524943310657596e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1345 }, { "completion_length": 160.85714721679688, "epoch": 0.08546031746031746, "grad_norm": 0.0033366677816957235, "kl": 0.0052777184173464775, "learning_rate": 1.526077097505669e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1346 }, { "completion_length": 160.5, "epoch": 0.08552380952380953, "grad_norm": 0.005500029772520065, "kl": 0.008643638342618942, "learning_rate": 1.5272108843537415e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1347 }, { "completion_length": 183.50001525878906, "epoch": 0.08558730158730159, "grad_norm": 0.003698362736031413, "kl": 0.005849079694598913, "learning_rate": 1.5283446712018138e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1348 }, { "completion_length": 163.71429443359375, "epoch": 0.08565079365079364, "grad_norm": 0.0047660996206104755, "kl": 0.00752054201439023, "learning_rate": 1.5294784580498866e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1349 }, { "completion_length": 149.21429443359375, "epoch": 0.08571428571428572, "grad_norm": 1.8377482891082764, "kl": 0.006983217317610979, "learning_rate": 1.5306122448979592e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 1350 }, { "completion_length": 165.42857360839844, "epoch": 0.08577777777777777, "grad_norm": 0.004960407502949238, "kl": 0.006780568510293961, "learning_rate": 1.5317460317460317e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1351 }, { "completion_length": 153.35714721679688, "epoch": 0.08584126984126984, "grad_norm": 0.00413749273866415, "kl": 0.006272078957408667, "learning_rate": 1.5328798185941043e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1352 }, { "completion_length": 198.85714721679688, "epoch": 0.0859047619047619, "grad_norm": 0.003116741543635726, "kl": 0.005412002094089985, "learning_rate": 1.5340136054421768e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1353 }, { "completion_length": 175.00001525878906, "epoch": 0.08596825396825397, "grad_norm": 0.004424968268722296, "kl": 0.006956968456506729, "learning_rate": 1.5351473922902494e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1354 }, { "completion_length": 142.6428680419922, "epoch": 0.08603174603174603, "grad_norm": 0.005860547535121441, "kl": 0.007870706729590893, "learning_rate": 1.536281179138322e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1355 }, { "completion_length": 194.21429443359375, "epoch": 0.0860952380952381, "grad_norm": 0.003345511620864272, "kl": 0.005120801739394665, "learning_rate": 1.5374149659863945e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1356 }, { "completion_length": 158.21429443359375, "epoch": 0.08615873015873016, "grad_norm": 0.004825000185519457, "kl": 0.008194824680685997, "learning_rate": 1.538548752834467e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1357 }, { "completion_length": 168.85714721679688, "epoch": 0.08622222222222223, "grad_norm": 0.003592446446418762, "kl": 0.005110108759254217, "learning_rate": 1.5396825396825396e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1358 }, { "completion_length": 155.57144165039062, "epoch": 0.08628571428571429, "grad_norm": 0.0036745413672178984, "kl": 0.006057274527847767, "learning_rate": 1.5408163265306122e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1359 }, { "completion_length": 162.21429443359375, "epoch": 0.08634920634920636, "grad_norm": 0.0035857229959219694, "kl": 0.006035844795405865, "learning_rate": 1.5419501133786847e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1360 }, { "completion_length": 155.57144165039062, "epoch": 0.08641269841269841, "grad_norm": 0.0033030472695827484, "kl": 0.005249849520623684, "learning_rate": 1.5430839002267573e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1361 }, { "completion_length": 169.1428680419922, "epoch": 0.08647619047619047, "grad_norm": 0.0031885679345577955, "kl": 0.005412892904132605, "learning_rate": 1.54421768707483e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1362 }, { "completion_length": 145.71429443359375, "epoch": 0.08653968253968254, "grad_norm": 0.00482512591406703, "kl": 0.007098199799656868, "learning_rate": 1.5453514739229024e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1363 }, { "completion_length": 142.2857208251953, "epoch": 0.0866031746031746, "grad_norm": 0.004449028987437487, "kl": 0.006442159879952669, "learning_rate": 1.546485260770975e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1364 }, { "completion_length": 171.71429443359375, "epoch": 0.08666666666666667, "grad_norm": 0.002905488247051835, "kl": 0.004527694545686245, "learning_rate": 1.5476190476190475e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1365 }, { "completion_length": 177.57144165039062, "epoch": 0.08673015873015873, "grad_norm": 0.003159875515848398, "kl": 0.004735037684440613, "learning_rate": 1.5487528344671203e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1366 }, { "completion_length": 188.71429443359375, "epoch": 0.0867936507936508, "grad_norm": 0.003901015967130661, "kl": 0.0062379250302910805, "learning_rate": 1.5498866213151926e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1367 }, { "completion_length": 140.07144165039062, "epoch": 0.08685714285714285, "grad_norm": 0.0034097107127308846, "kl": 0.005653446540236473, "learning_rate": 1.5510204081632651e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1368 }, { "completion_length": 168.5, "epoch": 0.08692063492063493, "grad_norm": 0.003239415120333433, "kl": 0.005794580094516277, "learning_rate": 1.552154195011338e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1369 }, { "completion_length": 167.07144165039062, "epoch": 0.08698412698412698, "grad_norm": 0.003197632497176528, "kl": 0.005481654312461615, "learning_rate": 1.5532879818594103e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1370 }, { "completion_length": 177.71429443359375, "epoch": 0.08704761904761905, "grad_norm": 0.0035301754251122475, "kl": 0.005752074997872114, "learning_rate": 1.5544217687074828e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1371 }, { "completion_length": 196.2857208251953, "epoch": 0.08711111111111111, "grad_norm": 0.003737028455361724, "kl": 0.00528885331004858, "learning_rate": 1.5555555555555556e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1372 }, { "completion_length": 175.7857208251953, "epoch": 0.08717460317460317, "grad_norm": 0.002772972220554948, "kl": 0.004495224915444851, "learning_rate": 1.5566893424036282e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1373 }, { "completion_length": 146.5, "epoch": 0.08723809523809524, "grad_norm": 0.003387338249012828, "kl": 0.004954352974891663, "learning_rate": 1.5578231292517005e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1374 }, { "completion_length": 142.0, "epoch": 0.0873015873015873, "grad_norm": 0.003479516366496682, "kl": 0.004867336712777615, "learning_rate": 1.558956916099773e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1375 }, { "completion_length": 145.92857360839844, "epoch": 0.08736507936507937, "grad_norm": 0.0027479904238134623, "kl": 0.003934584092348814, "learning_rate": 1.5600907029478459e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1376 }, { "completion_length": 156.6428680419922, "epoch": 0.08742857142857142, "grad_norm": 0.003661427181214094, "kl": 0.005671142600476742, "learning_rate": 1.5612244897959184e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1377 }, { "completion_length": 143.2857208251953, "epoch": 0.0874920634920635, "grad_norm": 0.00315567827783525, "kl": 0.004715928342193365, "learning_rate": 1.5623582766439907e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1378 }, { "completion_length": 192.00001525878906, "epoch": 0.08755555555555555, "grad_norm": 0.003218921832740307, "kl": 0.00520921079441905, "learning_rate": 1.5634920634920635e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1379 }, { "completion_length": 190.07144165039062, "epoch": 0.08761904761904762, "grad_norm": 0.003071291372179985, "kl": 0.00562440836802125, "learning_rate": 1.564625850340136e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1380 }, { "completion_length": 198.57144165039062, "epoch": 0.08768253968253968, "grad_norm": 0.002161039737984538, "kl": 0.0035213350784033537, "learning_rate": 1.5657596371882084e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1381 }, { "completion_length": 172.7857208251953, "epoch": 0.08774603174603175, "grad_norm": 0.0033608118537813425, "kl": 0.005157628562301397, "learning_rate": 1.5668934240362812e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1382 }, { "completion_length": 208.2857208251953, "epoch": 0.08780952380952381, "grad_norm": 0.0032055487390607595, "kl": 0.005153512582182884, "learning_rate": 1.5680272108843537e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1383 }, { "completion_length": 149.07144165039062, "epoch": 0.08787301587301587, "grad_norm": 0.0032124435529112816, "kl": 0.0056884209625422955, "learning_rate": 1.5691609977324263e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1384 }, { "completion_length": 200.57144165039062, "epoch": 0.08793650793650794, "grad_norm": 0.0030506174080073833, "kl": 0.004802302923053503, "learning_rate": 1.5702947845804986e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1385 }, { "completion_length": 140.42857360839844, "epoch": 0.088, "grad_norm": 0.0036943084560334682, "kl": 0.005293522495776415, "learning_rate": 1.5714285714285714e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1386 }, { "completion_length": 154.2857208251953, "epoch": 0.08806349206349207, "grad_norm": 0.003564156824722886, "kl": 0.005402539856731892, "learning_rate": 1.572562358276644e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1387 }, { "completion_length": 136.6428680419922, "epoch": 0.08812698412698412, "grad_norm": 0.004269394092261791, "kl": 0.005713834427297115, "learning_rate": 1.5736961451247165e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1388 }, { "completion_length": 189.07144165039062, "epoch": 0.0881904761904762, "grad_norm": 0.0032774354331195354, "kl": 0.005679185036569834, "learning_rate": 1.574829931972789e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1389 }, { "completion_length": 137.21429443359375, "epoch": 0.08825396825396825, "grad_norm": 0.0024955023545771837, "kl": 0.0036328942514955997, "learning_rate": 1.5759637188208616e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1390 }, { "completion_length": 147.2857208251953, "epoch": 0.08831746031746032, "grad_norm": 0.003369995392858982, "kl": 0.005566117353737354, "learning_rate": 1.5770975056689342e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1391 }, { "completion_length": 150.57144165039062, "epoch": 0.08838095238095238, "grad_norm": 0.0030617178417742252, "kl": 0.004707342945039272, "learning_rate": 1.5782312925170067e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1392 }, { "completion_length": 182.1428680419922, "epoch": 0.08844444444444445, "grad_norm": 0.003049965016543865, "kl": 0.004909242503345013, "learning_rate": 1.5793650793650793e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1393 }, { "completion_length": 159.42857360839844, "epoch": 0.08850793650793651, "grad_norm": 0.003493228694424033, "kl": 0.004390973597764969, "learning_rate": 1.5804988662131518e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1394 }, { "completion_length": 154.42857360839844, "epoch": 0.08857142857142856, "grad_norm": 0.0030118597205728292, "kl": 0.004629346076399088, "learning_rate": 1.5816326530612244e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1395 }, { "completion_length": 174.7857208251953, "epoch": 0.08863492063492064, "grad_norm": 0.0036112223751842976, "kl": 0.004938839003443718, "learning_rate": 1.582766439909297e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1396 }, { "completion_length": 167.5, "epoch": 0.08869841269841269, "grad_norm": 0.003111554542556405, "kl": 0.004515385255217552, "learning_rate": 1.5839002267573695e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1397 }, { "completion_length": 144.57144165039062, "epoch": 0.08876190476190476, "grad_norm": 0.0039740814827382565, "kl": 0.005395256448537111, "learning_rate": 1.585034013605442e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1398 }, { "completion_length": 180.92857360839844, "epoch": 0.08882539682539682, "grad_norm": 0.003377588465809822, "kl": 0.005014426540583372, "learning_rate": 1.586167800453515e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1399 }, { "completion_length": 182.00001525878906, "epoch": 0.08888888888888889, "grad_norm": 0.0024600857868790627, "kl": 0.003618938848376274, "learning_rate": 1.5873015873015872e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1400 }, { "completion_length": 157.71429443359375, "epoch": 0.08895238095238095, "grad_norm": 0.003733142977580428, "kl": 0.0051745339296758175, "learning_rate": 1.5884353741496597e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1401 }, { "completion_length": 141.92857360839844, "epoch": 0.08901587301587302, "grad_norm": 0.002409361070021987, "kl": 0.0035706835333257914, "learning_rate": 1.5895691609977325e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1402 }, { "completion_length": 169.92857360839844, "epoch": 0.08907936507936508, "grad_norm": 0.0028340104036033154, "kl": 0.004443909972906113, "learning_rate": 1.590702947845805e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1403 }, { "completion_length": 175.6428680419922, "epoch": 0.08914285714285715, "grad_norm": 0.002586638554930687, "kl": 0.003640047274529934, "learning_rate": 1.5918367346938774e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1404 }, { "completion_length": 195.42857360839844, "epoch": 0.0892063492063492, "grad_norm": 0.0031644145492464304, "kl": 0.00450815400108695, "learning_rate": 1.59297052154195e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1405 }, { "completion_length": 154.6428680419922, "epoch": 0.08926984126984126, "grad_norm": 0.0035349447280168533, "kl": 0.005298434756696224, "learning_rate": 1.5941043083900228e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1406 }, { "completion_length": 182.71429443359375, "epoch": 0.08933333333333333, "grad_norm": 0.0019882700871676207, "kl": 0.0028908955864608288, "learning_rate": 1.595238095238095e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1407 }, { "completion_length": 187.7857208251953, "epoch": 0.08939682539682539, "grad_norm": 0.0027093973476439714, "kl": 0.003555523930117488, "learning_rate": 1.5963718820861676e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1408 }, { "completion_length": 186.71429443359375, "epoch": 0.08946031746031746, "grad_norm": 0.0026371527928858995, "kl": 0.003637841437011957, "learning_rate": 1.5975056689342404e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1409 }, { "completion_length": 156.85714721679688, "epoch": 0.08952380952380952, "grad_norm": 0.0024715361651033163, "kl": 0.003631418803706765, "learning_rate": 1.598639455782313e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1410 }, { "completion_length": 136.07144165039062, "epoch": 0.08958730158730159, "grad_norm": 0.0023189601488411427, "kl": 0.0031659083906561136, "learning_rate": 1.5997732426303853e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1411 }, { "completion_length": 157.71429443359375, "epoch": 0.08965079365079365, "grad_norm": 0.0035859723575413227, "kl": 0.004583044443279505, "learning_rate": 1.600907029478458e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1412 }, { "completion_length": 149.42857360839844, "epoch": 0.08971428571428572, "grad_norm": 0.0033377858344465494, "kl": 0.004320305772125721, "learning_rate": 1.6020408163265306e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1413 }, { "completion_length": 165.6428680419922, "epoch": 0.08977777777777778, "grad_norm": 0.0031810293439775705, "kl": 0.004526478238403797, "learning_rate": 1.6031746031746032e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1414 }, { "completion_length": 170.42857360839844, "epoch": 0.08984126984126985, "grad_norm": 0.002502231625840068, "kl": 0.003582653822377324, "learning_rate": 1.6043083900226755e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1415 }, { "completion_length": 148.35714721679688, "epoch": 0.0899047619047619, "grad_norm": 0.0037319816183298826, "kl": 0.004735403694212437, "learning_rate": 1.6054421768707483e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1416 }, { "completion_length": 162.92857360839844, "epoch": 0.08996825396825398, "grad_norm": 0.0025476471055299044, "kl": 0.003514497773721814, "learning_rate": 1.6065759637188208e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1417 }, { "completion_length": 164.6428680419922, "epoch": 0.09003174603174603, "grad_norm": 0.002482006326317787, "kl": 0.003389312420040369, "learning_rate": 1.6077097505668931e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1418 }, { "completion_length": 170.35714721679688, "epoch": 0.09009523809523809, "grad_norm": 0.0027532975655049086, "kl": 0.003943956922739744, "learning_rate": 1.608843537414966e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1419 }, { "completion_length": 177.50001525878906, "epoch": 0.09015873015873016, "grad_norm": 0.0023645616602152586, "kl": 0.0032453564926981926, "learning_rate": 1.6099773242630385e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1420 }, { "completion_length": 136.7857208251953, "epoch": 0.09022222222222222, "grad_norm": 0.0021155537106096745, "kl": 0.0029250076040625572, "learning_rate": 1.611111111111111e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1421 }, { "completion_length": 180.07144165039062, "epoch": 0.09028571428571429, "grad_norm": 0.0028571372386068106, "kl": 0.004109895322471857, "learning_rate": 1.6122448979591836e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1422 }, { "completion_length": 155.85714721679688, "epoch": 0.09034920634920635, "grad_norm": 0.0023495666682720184, "kl": 0.0036444992292672396, "learning_rate": 1.6133786848072562e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1423 }, { "completion_length": 151.5, "epoch": 0.09041269841269842, "grad_norm": 0.0030771815218031406, "kl": 0.004197075963020325, "learning_rate": 1.6145124716553287e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1424 }, { "completion_length": 166.2857208251953, "epoch": 0.09047619047619047, "grad_norm": 0.0026223028544336557, "kl": 0.004173248540610075, "learning_rate": 1.6156462585034013e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1425 }, { "completion_length": 200.92857360839844, "epoch": 0.09053968253968254, "grad_norm": 0.002515982836484909, "kl": 0.003581091994419694, "learning_rate": 1.6167800453514738e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1426 }, { "completion_length": 153.2857208251953, "epoch": 0.0906031746031746, "grad_norm": 0.0024963237810879946, "kl": 0.003623061580583453, "learning_rate": 1.6179138321995464e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1427 }, { "completion_length": 194.71429443359375, "epoch": 0.09066666666666667, "grad_norm": 0.002728085033595562, "kl": 0.004035403020679951, "learning_rate": 1.619047619047619e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1428 }, { "completion_length": 177.50001525878906, "epoch": 0.09073015873015873, "grad_norm": 0.002562016947194934, "kl": 0.003240947611629963, "learning_rate": 1.6201814058956915e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1429 }, { "completion_length": 196.35714721679688, "epoch": 0.09079365079365079, "grad_norm": 0.002369194757193327, "kl": 0.003891716245561838, "learning_rate": 1.621315192743764e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1430 }, { "completion_length": 207.2857208251953, "epoch": 0.09085714285714286, "grad_norm": 0.0023982583079487085, "kl": 0.003707727650180459, "learning_rate": 1.6224489795918366e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1431 }, { "completion_length": 155.6428680419922, "epoch": 0.09092063492063492, "grad_norm": 0.002818376524373889, "kl": 0.0037433826364576817, "learning_rate": 1.6235827664399094e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1432 }, { "completion_length": 177.2857208251953, "epoch": 0.09098412698412699, "grad_norm": 0.002842941088601947, "kl": 0.004322744905948639, "learning_rate": 1.6247165532879817e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1433 }, { "completion_length": 176.85714721679688, "epoch": 0.09104761904761904, "grad_norm": 0.0022493847645819187, "kl": 0.0031726777087897062, "learning_rate": 1.6258503401360543e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1434 }, { "completion_length": 166.71429443359375, "epoch": 0.09111111111111111, "grad_norm": 0.002736467169597745, "kl": 0.0040134466253221035, "learning_rate": 1.6269841269841268e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1435 }, { "completion_length": 126.42857360839844, "epoch": 0.09117460317460317, "grad_norm": 0.003647922771051526, "kl": 0.004613946657627821, "learning_rate": 1.6281179138321996e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1436 }, { "completion_length": 183.42857360839844, "epoch": 0.09123809523809524, "grad_norm": 0.002685646992176771, "kl": 0.0037021092139184475, "learning_rate": 1.629251700680272e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1437 }, { "completion_length": 164.35714721679688, "epoch": 0.0913015873015873, "grad_norm": 0.0029925715643912554, "kl": 0.004398646764457226, "learning_rate": 1.6303854875283445e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1438 }, { "completion_length": 133.35714721679688, "epoch": 0.09136507936507937, "grad_norm": 0.003268791362643242, "kl": 0.004147893283516169, "learning_rate": 1.6315192743764173e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1439 }, { "completion_length": 169.2857208251953, "epoch": 0.09142857142857143, "grad_norm": 0.0025992202572524548, "kl": 0.0033105069305747747, "learning_rate": 1.6326530612244896e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1440 }, { "completion_length": 142.21429443359375, "epoch": 0.09149206349206349, "grad_norm": 0.003269120119512081, "kl": 0.003968330100178719, "learning_rate": 1.6337868480725622e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1441 }, { "completion_length": 135.21429443359375, "epoch": 0.09155555555555556, "grad_norm": 0.0022576251067221165, "kl": 0.0031795392278581858, "learning_rate": 1.634920634920635e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1442 }, { "completion_length": 191.71429443359375, "epoch": 0.09161904761904761, "grad_norm": 1.197475552558899, "kl": 0.0035354108549654484, "learning_rate": 1.6360544217687075e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 1443 }, { "completion_length": 156.21429443359375, "epoch": 0.09168253968253968, "grad_norm": 0.0022027231752872467, "kl": 0.0030841806437820196, "learning_rate": 1.6371882086167798e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1444 }, { "completion_length": 156.57144165039062, "epoch": 0.09174603174603174, "grad_norm": 0.0035411405842751265, "kl": 0.003940466325730085, "learning_rate": 1.6383219954648524e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1445 }, { "completion_length": 165.85714721679688, "epoch": 0.09180952380952381, "grad_norm": 0.0021441057324409485, "kl": 0.002880548359826207, "learning_rate": 1.6394557823129252e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1446 }, { "completion_length": 146.2857208251953, "epoch": 0.09187301587301587, "grad_norm": 0.0023015588521957397, "kl": 0.0031844517216086388, "learning_rate": 1.6405895691609977e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1447 }, { "completion_length": 171.71429443359375, "epoch": 0.09193650793650794, "grad_norm": 0.002293759724125266, "kl": 0.0032699434086680412, "learning_rate": 1.64172335600907e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1448 }, { "completion_length": 167.6428680419922, "epoch": 0.092, "grad_norm": 0.0021379783283919096, "kl": 0.00289828865788877, "learning_rate": 1.6428571428571429e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1449 }, { "completion_length": 152.0, "epoch": 0.09206349206349207, "grad_norm": 0.001498116529546678, "kl": 0.002325425622984767, "learning_rate": 1.6439909297052154e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1450 }, { "completion_length": 159.5, "epoch": 0.09212698412698413, "grad_norm": 0.0019312931690365076, "kl": 0.002557111205533147, "learning_rate": 1.6451247165532877e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1451 }, { "completion_length": 133.21429443359375, "epoch": 0.09219047619047618, "grad_norm": 0.002630433766171336, "kl": 0.0036325270775705576, "learning_rate": 1.6462585034013605e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1452 }, { "completion_length": 178.71429443359375, "epoch": 0.09225396825396825, "grad_norm": 0.0020938802044838667, "kl": 0.0031003302428871393, "learning_rate": 1.647392290249433e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1453 }, { "completion_length": 178.7857208251953, "epoch": 0.09231746031746031, "grad_norm": 0.0014977363171055913, "kl": 0.002199428388848901, "learning_rate": 1.6485260770975056e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1454 }, { "completion_length": 186.2857208251953, "epoch": 0.09238095238095238, "grad_norm": 0.002115288283675909, "kl": 0.002760706003755331, "learning_rate": 1.649659863945578e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1455 }, { "completion_length": 218.50001525878906, "epoch": 0.09244444444444444, "grad_norm": 0.0017261485336348414, "kl": 0.002585687907412648, "learning_rate": 1.6507936507936507e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1456 }, { "completion_length": 167.1428680419922, "epoch": 0.09250793650793651, "grad_norm": 0.0018375988584011793, "kl": 0.002664392115548253, "learning_rate": 1.6519274376417233e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1457 }, { "completion_length": 170.5, "epoch": 0.09257142857142857, "grad_norm": 0.0022316209506243467, "kl": 0.0032823034562170506, "learning_rate": 1.6530612244897958e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1458 }, { "completion_length": 167.0, "epoch": 0.09263492063492064, "grad_norm": 0.001755535718984902, "kl": 0.002434683730825782, "learning_rate": 1.6541950113378684e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1459 }, { "completion_length": 161.85714721679688, "epoch": 0.0926984126984127, "grad_norm": 0.002060173312202096, "kl": 0.0030249683186411858, "learning_rate": 1.655328798185941e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1460 }, { "completion_length": 171.92857360839844, "epoch": 0.09276190476190477, "grad_norm": 0.002563977148383856, "kl": 0.0036286888644099236, "learning_rate": 1.6564625850340135e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1461 }, { "completion_length": 167.1428680419922, "epoch": 0.09282539682539682, "grad_norm": 0.0018878370756283402, "kl": 0.0027602114714682102, "learning_rate": 1.6575963718820863e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1462 }, { "completion_length": 176.1428680419922, "epoch": 0.09288888888888888, "grad_norm": 0.0021518052089959383, "kl": 0.0029263184405863285, "learning_rate": 1.6587301587301586e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1463 }, { "completion_length": 156.92857360839844, "epoch": 0.09295238095238095, "grad_norm": 0.0017921117832884192, "kl": 0.0024925973266363144, "learning_rate": 1.6598639455782312e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1464 }, { "completion_length": 168.57144165039062, "epoch": 0.09301587301587301, "grad_norm": 0.0016366380732506514, "kl": 0.002355630975216627, "learning_rate": 1.660997732426304e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1465 }, { "completion_length": 176.35714721679688, "epoch": 0.09307936507936508, "grad_norm": 0.0018408562755212188, "kl": 0.0025862727779895067, "learning_rate": 1.6621315192743763e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1466 }, { "completion_length": 183.07144165039062, "epoch": 0.09314285714285714, "grad_norm": 0.001559266704134643, "kl": 0.0023098543751984835, "learning_rate": 1.6632653061224488e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1467 }, { "completion_length": 130.85714721679688, "epoch": 0.09320634920634921, "grad_norm": 0.002022940432652831, "kl": 0.0029672349337488413, "learning_rate": 1.6643990929705214e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1468 }, { "completion_length": 166.35714721679688, "epoch": 0.09326984126984127, "grad_norm": 0.0016753921518102288, "kl": 0.002376067452132702, "learning_rate": 1.6655328798185942e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1469 }, { "completion_length": 197.50001525878906, "epoch": 0.09333333333333334, "grad_norm": 0.001838429132476449, "kl": 0.002670307643711567, "learning_rate": 1.6666666666666665e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1470 }, { "completion_length": 173.50001525878906, "epoch": 0.0933968253968254, "grad_norm": 0.0018968264339491725, "kl": 0.0028023207560181618, "learning_rate": 1.667800453514739e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1471 }, { "completion_length": 199.00001525878906, "epoch": 0.09346031746031747, "grad_norm": 0.0016155322082340717, "kl": 0.0024500139988958836, "learning_rate": 1.668934240362812e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1472 }, { "completion_length": 160.6428680419922, "epoch": 0.09352380952380952, "grad_norm": 0.001971642253920436, "kl": 0.002706592669710517, "learning_rate": 1.6700680272108844e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1473 }, { "completion_length": 177.6428680419922, "epoch": 0.0935873015873016, "grad_norm": 0.0014924153219908476, "kl": 0.0021620162297040224, "learning_rate": 1.6712018140589567e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1474 }, { "completion_length": 160.6428680419922, "epoch": 0.09365079365079365, "grad_norm": 0.002355751348659396, "kl": 0.003315636422485113, "learning_rate": 1.6723356009070295e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1475 }, { "completion_length": 178.2857208251953, "epoch": 0.09371428571428571, "grad_norm": 0.001669923192821443, "kl": 0.0021480750292539597, "learning_rate": 1.673469387755102e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1476 }, { "completion_length": 188.71429443359375, "epoch": 0.09377777777777778, "grad_norm": 0.0017256141873076558, "kl": 0.0023913856130093336, "learning_rate": 1.6746031746031744e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1477 }, { "completion_length": 131.0, "epoch": 0.09384126984126984, "grad_norm": 0.0017154259840026498, "kl": 0.0026883133687078953, "learning_rate": 1.675736961451247e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1478 }, { "completion_length": 199.57144165039062, "epoch": 0.09390476190476191, "grad_norm": 0.0016023897333070636, "kl": 0.0021633224096149206, "learning_rate": 1.6768707482993198e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1479 }, { "completion_length": 124.14286041259766, "epoch": 0.09396825396825396, "grad_norm": 0.002020603511482477, "kl": 0.002455937210470438, "learning_rate": 1.6780045351473923e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1480 }, { "completion_length": 165.07144165039062, "epoch": 0.09403174603174604, "grad_norm": 0.002117861993610859, "kl": 0.0027852491475641727, "learning_rate": 1.6791383219954646e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1481 }, { "completion_length": 174.7857208251953, "epoch": 0.09409523809523809, "grad_norm": 0.001360271591693163, "kl": 0.002007200848311186, "learning_rate": 1.6802721088435374e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1482 }, { "completion_length": 148.6428680419922, "epoch": 0.09415873015873016, "grad_norm": 0.0012310304446145892, "kl": 0.0017802697839215398, "learning_rate": 1.68140589569161e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1483 }, { "completion_length": 183.50001525878906, "epoch": 0.09422222222222222, "grad_norm": 0.0014857150381430984, "kl": 0.0021652975119650364, "learning_rate": 1.6825396825396825e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1484 }, { "completion_length": 123.00000762939453, "epoch": 0.09428571428571429, "grad_norm": 0.002252758014947176, "kl": 0.0032506862189620733, "learning_rate": 1.6836734693877548e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1485 }, { "completion_length": 160.42857360839844, "epoch": 0.09434920634920635, "grad_norm": 0.0017647244967520237, "kl": 0.0024504137691110373, "learning_rate": 1.6848072562358276e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1486 }, { "completion_length": 148.0, "epoch": 0.0944126984126984, "grad_norm": 0.001656434964388609, "kl": 0.0022677734959870577, "learning_rate": 1.6859410430839002e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1487 }, { "completion_length": 195.57144165039062, "epoch": 0.09447619047619048, "grad_norm": 0.0014128152979537845, "kl": 0.0020148938056081533, "learning_rate": 1.6870748299319725e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1488 }, { "completion_length": 149.57144165039062, "epoch": 0.09453968253968253, "grad_norm": 0.002447995124384761, "kl": 0.0034120064228773117, "learning_rate": 1.6882086167800453e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1489 }, { "completion_length": 175.00001525878906, "epoch": 0.0946031746031746, "grad_norm": 0.001807264517992735, "kl": 0.0023683321196585894, "learning_rate": 1.6893424036281179e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1490 }, { "completion_length": 169.5, "epoch": 0.09466666666666666, "grad_norm": 0.002128523774445057, "kl": 0.003067798214033246, "learning_rate": 1.6904761904761904e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1491 }, { "completion_length": 185.85714721679688, "epoch": 0.09473015873015873, "grad_norm": 0.001450554234907031, "kl": 0.0021244280505925417, "learning_rate": 1.691609977324263e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1492 }, { "completion_length": 136.35714721679688, "epoch": 0.09479365079365079, "grad_norm": 0.0020809697452932596, "kl": 0.002782760886475444, "learning_rate": 1.6927437641723355e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1493 }, { "completion_length": 136.35714721679688, "epoch": 0.09485714285714286, "grad_norm": 0.0016413652338087559, "kl": 0.0023417051415890455, "learning_rate": 1.693877551020408e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1494 }, { "completion_length": 139.71429443359375, "epoch": 0.09492063492063492, "grad_norm": 0.001430787960998714, "kl": 0.0018208561232313514, "learning_rate": 1.695011337868481e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1495 }, { "completion_length": 163.85714721679688, "epoch": 0.09498412698412699, "grad_norm": 0.0016758801648393273, "kl": 0.002580104162916541, "learning_rate": 1.6961451247165532e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1496 }, { "completion_length": 131.07144165039062, "epoch": 0.09504761904761905, "grad_norm": 0.0015430304920300841, "kl": 0.002281497698277235, "learning_rate": 1.6972789115646257e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1497 }, { "completion_length": 164.07144165039062, "epoch": 0.0951111111111111, "grad_norm": 0.0017027470748871565, "kl": 0.0022475835867226124, "learning_rate": 1.6984126984126983e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1498 }, { "completion_length": 149.5, "epoch": 0.09517460317460318, "grad_norm": 0.0016104783862829208, "kl": 0.002136582974344492, "learning_rate": 1.6995464852607708e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1499 }, { "completion_length": 154.92857360839844, "epoch": 0.09523809523809523, "grad_norm": 0.0016666987212374806, "kl": 0.0022696643136441708, "learning_rate": 1.7006802721088434e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1500 }, { "completion_length": 154.21429443359375, "epoch": 0.0953015873015873, "grad_norm": 0.0016670827753841877, "kl": 0.0022503426298499107, "learning_rate": 1.701814058956916e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1501 }, { "completion_length": 180.50001525878906, "epoch": 0.09536507936507936, "grad_norm": 0.001694834209047258, "kl": 0.0023119940888136625, "learning_rate": 1.7029478458049888e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1502 }, { "completion_length": 131.7857208251953, "epoch": 0.09542857142857143, "grad_norm": 0.0018951388774439692, "kl": 0.002501680515706539, "learning_rate": 1.704081632653061e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1503 }, { "completion_length": 160.0, "epoch": 0.09549206349206349, "grad_norm": 0.002043096348643303, "kl": 0.002496238797903061, "learning_rate": 1.7052154195011336e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1504 }, { "completion_length": 162.35714721679688, "epoch": 0.09555555555555556, "grad_norm": 0.0020800670608878136, "kl": 0.002664501080289483, "learning_rate": 1.7063492063492064e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1505 }, { "completion_length": 224.71429443359375, "epoch": 0.09561904761904762, "grad_norm": 0.0016547676641494036, "kl": 0.002358539029955864, "learning_rate": 1.707482993197279e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1506 }, { "completion_length": 174.1428680419922, "epoch": 0.09568253968253969, "grad_norm": 0.0020020673982799053, "kl": 0.0029074857011437416, "learning_rate": 1.7086167800453513e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1507 }, { "completion_length": 149.07144165039062, "epoch": 0.09574603174603175, "grad_norm": 0.002149644074961543, "kl": 0.0030210393015295267, "learning_rate": 1.7097505668934238e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1508 }, { "completion_length": 160.35714721679688, "epoch": 0.0958095238095238, "grad_norm": 0.0021857258398085833, "kl": 0.0027107400819659233, "learning_rate": 1.7108843537414967e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1509 }, { "completion_length": 162.07144165039062, "epoch": 0.09587301587301587, "grad_norm": 0.0016432588454335928, "kl": 0.0022971804719418287, "learning_rate": 1.712018140589569e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1510 }, { "completion_length": 166.85714721679688, "epoch": 0.09593650793650793, "grad_norm": 0.0015813998179510236, "kl": 0.0022629969753324986, "learning_rate": 1.7131519274376415e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1511 }, { "completion_length": 153.5, "epoch": 0.096, "grad_norm": 0.9566473960876465, "kl": 0.0019130308646708727, "learning_rate": 1.7142857142857143e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 1512 }, { "completion_length": 128.2857208251953, "epoch": 0.09606349206349206, "grad_norm": 0.0018695034086704254, "kl": 0.0026400533970445395, "learning_rate": 1.715419501133787e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1513 }, { "completion_length": 175.1428680419922, "epoch": 0.09612698412698413, "grad_norm": 0.0019106970867142081, "kl": 0.002800107467919588, "learning_rate": 1.7165532879818592e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1514 }, { "completion_length": 161.07144165039062, "epoch": 0.09619047619047619, "grad_norm": 0.0018752070609480143, "kl": 0.002583832945674658, "learning_rate": 1.717687074829932e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1515 }, { "completion_length": 212.50001525878906, "epoch": 0.09625396825396826, "grad_norm": 0.001713664154522121, "kl": 0.0022942149080336094, "learning_rate": 1.7188208616780045e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1516 }, { "completion_length": 155.42857360839844, "epoch": 0.09631746031746032, "grad_norm": 0.0020665263291448355, "kl": 0.002609042916446924, "learning_rate": 1.719954648526077e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1517 }, { "completion_length": 115.21429443359375, "epoch": 0.09638095238095239, "grad_norm": 0.002175418194383383, "kl": 0.0025527840480208397, "learning_rate": 1.7210884353741494e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1518 }, { "completion_length": 182.21429443359375, "epoch": 0.09644444444444444, "grad_norm": 0.0017660356825217605, "kl": 0.0026008933782577515, "learning_rate": 1.7222222222222222e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1519 }, { "completion_length": 176.92857360839844, "epoch": 0.09650793650793651, "grad_norm": 0.0022387707140296698, "kl": 0.0028053359128534794, "learning_rate": 1.7233560090702948e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1520 }, { "completion_length": 162.1428680419922, "epoch": 0.09657142857142857, "grad_norm": 0.0017545336158946157, "kl": 0.002225353615358472, "learning_rate": 1.7244897959183673e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1521 }, { "completion_length": 138.7857208251953, "epoch": 0.09663492063492063, "grad_norm": 0.002118348842486739, "kl": 0.002548293676227331, "learning_rate": 1.7256235827664399e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1522 }, { "completion_length": 178.85714721679688, "epoch": 0.0966984126984127, "grad_norm": 0.0019427149090915918, "kl": 0.002457733266055584, "learning_rate": 1.7267573696145124e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1523 }, { "completion_length": 175.42857360839844, "epoch": 0.09676190476190476, "grad_norm": 0.0019972140435129404, "kl": 0.002612676238641143, "learning_rate": 1.727891156462585e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1524 }, { "completion_length": 214.6428680419922, "epoch": 0.09682539682539683, "grad_norm": 0.00160540034994483, "kl": 0.0022935003507882357, "learning_rate": 1.7290249433106575e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1525 }, { "completion_length": 124.21429443359375, "epoch": 0.09688888888888889, "grad_norm": 0.0021117033902555704, "kl": 0.0026919927913695574, "learning_rate": 1.73015873015873e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1526 }, { "completion_length": 167.1428680419922, "epoch": 0.09695238095238096, "grad_norm": 0.0017778152832761407, "kl": 0.0023612857330590487, "learning_rate": 1.7312925170068026e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1527 }, { "completion_length": 138.1428680419922, "epoch": 0.09701587301587301, "grad_norm": 0.002526553114876151, "kl": 0.003169881645590067, "learning_rate": 1.7324263038548752e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1528 }, { "completion_length": 166.35714721679688, "epoch": 0.09707936507936508, "grad_norm": 0.002330382354557514, "kl": 0.002994688693434, "learning_rate": 1.7335600907029477e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1529 }, { "completion_length": 140.85714721679688, "epoch": 0.09714285714285714, "grad_norm": 0.0026149332989007235, "kl": 0.003105515381321311, "learning_rate": 1.7346938775510203e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1530 }, { "completion_length": 181.2857208251953, "epoch": 0.09720634920634921, "grad_norm": 0.0022819016594439745, "kl": 0.003356646280735731, "learning_rate": 1.7358276643990929e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1531 }, { "completion_length": 158.7857208251953, "epoch": 0.09726984126984127, "grad_norm": 0.0022497656755149364, "kl": 0.0030078506097197533, "learning_rate": 1.7369614512471657e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1532 }, { "completion_length": 152.57144165039062, "epoch": 0.09733333333333333, "grad_norm": 0.002432347973808646, "kl": 0.0030635271687060595, "learning_rate": 1.738095238095238e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1533 }, { "completion_length": 153.85714721679688, "epoch": 0.0973968253968254, "grad_norm": 0.002002680441364646, "kl": 0.002588909585028887, "learning_rate": 1.7392290249433105e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1534 }, { "completion_length": 190.50001525878906, "epoch": 0.09746031746031746, "grad_norm": 0.0021948697976768017, "kl": 0.0026609962806105614, "learning_rate": 1.7403628117913833e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1535 }, { "completion_length": 196.42857360839844, "epoch": 0.09752380952380953, "grad_norm": 0.0027842577546834946, "kl": 0.0030729910358786583, "learning_rate": 1.7414965986394556e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1536 }, { "completion_length": 137.07144165039062, "epoch": 0.09758730158730158, "grad_norm": 0.0025816375855356455, "kl": 0.0033930244389921427, "learning_rate": 1.7426303854875282e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1537 }, { "completion_length": 183.2857208251953, "epoch": 0.09765079365079365, "grad_norm": 0.0014539248077198863, "kl": 0.002090894617140293, "learning_rate": 1.7437641723356007e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1538 }, { "completion_length": 161.7857208251953, "epoch": 0.09771428571428571, "grad_norm": 0.0022715686354786158, "kl": 0.0029842290095984936, "learning_rate": 1.7448979591836736e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1539 }, { "completion_length": 157.5, "epoch": 0.09777777777777778, "grad_norm": 0.0019484328804537654, "kl": 0.0025648947339504957, "learning_rate": 1.7460317460317458e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1540 }, { "completion_length": 171.7857208251953, "epoch": 0.09784126984126984, "grad_norm": 0.0024109911173582077, "kl": 0.0032660202123224735, "learning_rate": 1.7471655328798184e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1541 }, { "completion_length": 176.00001525878906, "epoch": 0.09790476190476191, "grad_norm": 0.002395069692283869, "kl": 0.0029539200477302074, "learning_rate": 1.7482993197278912e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1542 }, { "completion_length": 161.71429443359375, "epoch": 0.09796825396825397, "grad_norm": 0.0018429236952215433, "kl": 0.0026444075629115105, "learning_rate": 1.7494331065759638e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1543 }, { "completion_length": 142.71429443359375, "epoch": 0.09803174603174603, "grad_norm": 0.0028385333716869354, "kl": 0.003295100061222911, "learning_rate": 1.750566893424036e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1544 }, { "completion_length": 157.92857360839844, "epoch": 0.0980952380952381, "grad_norm": 0.002563742222264409, "kl": 0.003114530583843589, "learning_rate": 1.751700680272109e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1545 }, { "completion_length": 189.00001525878906, "epoch": 0.09815873015873015, "grad_norm": 0.0017150755738839507, "kl": 0.0023810097482055426, "learning_rate": 1.7528344671201814e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1546 }, { "completion_length": 182.2857208251953, "epoch": 0.09822222222222222, "grad_norm": 0.0021778263617306948, "kl": 0.0028136230539530516, "learning_rate": 1.7539682539682537e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1547 }, { "completion_length": 131.21429443359375, "epoch": 0.09828571428571428, "grad_norm": 0.0025023275520652533, "kl": 0.002552104415372014, "learning_rate": 1.7551020408163263e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1548 }, { "completion_length": 177.50001525878906, "epoch": 0.09834920634920635, "grad_norm": 0.9466010928153992, "kl": 0.002711399458348751, "learning_rate": 1.756235827664399e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 1549 }, { "completion_length": 219.4285888671875, "epoch": 0.09841269841269841, "grad_norm": 0.001471265684813261, "kl": 0.002255969913676381, "learning_rate": 1.7573696145124717e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1550 }, { "completion_length": 211.92857360839844, "epoch": 0.09847619047619048, "grad_norm": 0.0020157359540462494, "kl": 0.002773596905171871, "learning_rate": 1.758503401360544e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1551 }, { "completion_length": 217.00001525878906, "epoch": 0.09853968253968254, "grad_norm": 0.0015367943560704589, "kl": 0.002114498522132635, "learning_rate": 1.7596371882086168e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1552 }, { "completion_length": 160.5, "epoch": 0.09860317460317461, "grad_norm": 0.002253136830404401, "kl": 0.003187960246577859, "learning_rate": 1.7607709750566893e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1553 }, { "completion_length": 148.21429443359375, "epoch": 0.09866666666666667, "grad_norm": 0.0024669310078024864, "kl": 0.003431764431297779, "learning_rate": 1.761904761904762e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1554 }, { "completion_length": 172.35714721679688, "epoch": 0.09873015873015872, "grad_norm": 0.003028113627806306, "kl": 0.0036639689933508635, "learning_rate": 1.7630385487528344e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1555 }, { "completion_length": 178.00001525878906, "epoch": 0.0987936507936508, "grad_norm": 0.0026467395946383476, "kl": 0.003760425839573145, "learning_rate": 1.764172335600907e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1556 }, { "completion_length": 159.85714721679688, "epoch": 0.09885714285714285, "grad_norm": 0.002550916513428092, "kl": 0.003436737461015582, "learning_rate": 1.7653061224489795e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1557 }, { "completion_length": 153.0, "epoch": 0.09892063492063492, "grad_norm": 0.0027606517542153597, "kl": 0.004038097336888313, "learning_rate": 1.7664399092970518e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1558 }, { "completion_length": 170.1428680419922, "epoch": 0.09898412698412698, "grad_norm": 0.0025997194461524487, "kl": 0.004104537423700094, "learning_rate": 1.7675736961451246e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1559 }, { "completion_length": 136.1428680419922, "epoch": 0.09904761904761905, "grad_norm": 0.0029706971254199743, "kl": 0.004525607917457819, "learning_rate": 1.7687074829931972e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1560 }, { "completion_length": 140.6428680419922, "epoch": 0.09911111111111111, "grad_norm": 0.0026608973275870085, "kl": 0.003727060044184327, "learning_rate": 1.7698412698412698e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1561 }, { "completion_length": 134.92857360839844, "epoch": 0.09917460317460318, "grad_norm": 0.003078399458900094, "kl": 0.004882941488176584, "learning_rate": 1.7709750566893423e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1562 }, { "completion_length": 165.42857360839844, "epoch": 0.09923809523809524, "grad_norm": 0.0024915330577641726, "kl": 0.003935728222131729, "learning_rate": 1.7721088435374149e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1563 }, { "completion_length": 152.1428680419922, "epoch": 0.09930158730158731, "grad_norm": 0.002500772476196289, "kl": 0.0038202505093067884, "learning_rate": 1.7732426303854874e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1564 }, { "completion_length": 156.85714721679688, "epoch": 0.09936507936507936, "grad_norm": 0.003417437430471182, "kl": 0.0054852585308253765, "learning_rate": 1.7743764172335602e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1565 }, { "completion_length": 168.85714721679688, "epoch": 0.09942857142857142, "grad_norm": 0.0028621500823646784, "kl": 0.004415030591189861, "learning_rate": 1.7755102040816325e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1566 }, { "completion_length": 196.07144165039062, "epoch": 0.09949206349206349, "grad_norm": 0.003440601984038949, "kl": 0.00529578747227788, "learning_rate": 1.776643990929705e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1567 }, { "completion_length": 151.5, "epoch": 0.09955555555555555, "grad_norm": 0.0038602668792009354, "kl": 0.005536985583603382, "learning_rate": 1.7777777777777776e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1568 }, { "completion_length": 170.7857208251953, "epoch": 0.09961904761904762, "grad_norm": 0.0026716317515820265, "kl": 0.003918703645467758, "learning_rate": 1.7789115646258502e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1569 }, { "completion_length": 161.7857208251953, "epoch": 0.09968253968253968, "grad_norm": 0.0033062670845538378, "kl": 0.0049813287332654, "learning_rate": 1.7800453514739227e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1570 }, { "completion_length": 182.57144165039062, "epoch": 0.09974603174603175, "grad_norm": 0.00306991976685822, "kl": 0.004797288682311773, "learning_rate": 1.7811791383219953e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1571 }, { "completion_length": 196.50001525878906, "epoch": 0.0998095238095238, "grad_norm": 0.0024484023451805115, "kl": 0.004326640162616968, "learning_rate": 1.782312925170068e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1572 }, { "completion_length": 190.57144165039062, "epoch": 0.09987301587301588, "grad_norm": 0.002760802861303091, "kl": 0.004272964783012867, "learning_rate": 1.7834467120181404e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1573 }, { "completion_length": 157.5, "epoch": 0.09993650793650793, "grad_norm": 0.003343067830428481, "kl": 0.005223705433309078, "learning_rate": 1.784580498866213e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1574 }, { "completion_length": 168.7857208251953, "epoch": 0.1, "grad_norm": 0.0035866424441337585, "kl": 0.005835807882249355, "learning_rate": 1.7857142857142858e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1575 }, { "completion_length": 145.0, "epoch": 0.10006349206349206, "grad_norm": 0.0028266520239412785, "kl": 0.004495582543313503, "learning_rate": 1.7868480725623583e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1576 }, { "completion_length": 188.57144165039062, "epoch": 0.10012698412698413, "grad_norm": 0.0027539243455976248, "kl": 0.004692206624895334, "learning_rate": 1.7879818594104306e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1577 }, { "completion_length": 192.21429443359375, "epoch": 0.10019047619047619, "grad_norm": 0.0022914523724466562, "kl": 0.003578274277970195, "learning_rate": 1.7891156462585032e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1578 }, { "completion_length": 176.50001525878906, "epoch": 0.10025396825396825, "grad_norm": 0.0027431349735707045, "kl": 0.004230971913784742, "learning_rate": 1.790249433106576e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1579 }, { "completion_length": 150.0, "epoch": 0.10031746031746032, "grad_norm": 0.0031310878694057465, "kl": 0.004792623221874237, "learning_rate": 1.7913832199546486e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1580 }, { "completion_length": 158.42857360839844, "epoch": 0.10038095238095238, "grad_norm": 0.0038169361650943756, "kl": 0.006291001103818417, "learning_rate": 1.7925170068027208e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1581 }, { "completion_length": 169.85714721679688, "epoch": 0.10044444444444445, "grad_norm": 0.0037994415033608675, "kl": 0.006304455921053886, "learning_rate": 1.7936507936507937e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1582 }, { "completion_length": 171.2857208251953, "epoch": 0.1005079365079365, "grad_norm": 0.003143029985949397, "kl": 0.004672130569815636, "learning_rate": 1.7947845804988662e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1583 }, { "completion_length": 177.7857208251953, "epoch": 0.10057142857142858, "grad_norm": 0.00275485054589808, "kl": 0.004388719797134399, "learning_rate": 1.7959183673469385e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1584 }, { "completion_length": 198.42857360839844, "epoch": 0.10063492063492063, "grad_norm": 0.0019987309351563454, "kl": 0.003356039058417082, "learning_rate": 1.7970521541950113e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1585 }, { "completion_length": 133.57144165039062, "epoch": 0.1006984126984127, "grad_norm": 0.0029482226818799973, "kl": 0.0048209503293037415, "learning_rate": 1.798185941043084e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1586 }, { "completion_length": 150.1428680419922, "epoch": 0.10076190476190476, "grad_norm": 0.003894167486578226, "kl": 0.0055540078319609165, "learning_rate": 1.7993197278911564e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1587 }, { "completion_length": 180.57144165039062, "epoch": 0.10082539682539683, "grad_norm": 0.003051269566640258, "kl": 0.0046735526993870735, "learning_rate": 1.8004535147392287e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1588 }, { "completion_length": 183.71429443359375, "epoch": 0.10088888888888889, "grad_norm": 0.002860976615920663, "kl": 0.00450125290080905, "learning_rate": 1.8015873015873015e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1589 }, { "completion_length": 189.6428680419922, "epoch": 0.10095238095238095, "grad_norm": 0.002599710365757346, "kl": 0.003822039347141981, "learning_rate": 1.802721088435374e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1590 }, { "completion_length": 147.42857360839844, "epoch": 0.10101587301587302, "grad_norm": 0.003907571546733379, "kl": 0.006085122935473919, "learning_rate": 1.8038548752834467e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1591 }, { "completion_length": 140.5, "epoch": 0.10107936507936507, "grad_norm": 0.0029295661952346563, "kl": 0.004454281646758318, "learning_rate": 1.8049886621315192e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1592 }, { "completion_length": 175.42857360839844, "epoch": 0.10114285714285715, "grad_norm": 0.0025983478408306837, "kl": 0.0035162768326699734, "learning_rate": 1.8061224489795918e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1593 }, { "completion_length": 158.6428680419922, "epoch": 0.1012063492063492, "grad_norm": 0.002561342203989625, "kl": 0.004311677068471909, "learning_rate": 1.8072562358276643e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1594 }, { "completion_length": 202.21429443359375, "epoch": 0.10126984126984127, "grad_norm": 0.002538891276344657, "kl": 0.0041772956028580666, "learning_rate": 1.808390022675737e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1595 }, { "completion_length": 199.1428680419922, "epoch": 0.10133333333333333, "grad_norm": 0.0030964931938797235, "kl": 0.004706436302512884, "learning_rate": 1.8095238095238094e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1596 }, { "completion_length": 181.50001525878906, "epoch": 0.1013968253968254, "grad_norm": 0.00276329112239182, "kl": 0.005261146929115057, "learning_rate": 1.810657596371882e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1597 }, { "completion_length": 167.07144165039062, "epoch": 0.10146031746031746, "grad_norm": 0.0022875433787703514, "kl": 0.0036929785273969173, "learning_rate": 1.8117913832199548e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1598 }, { "completion_length": 161.71429443359375, "epoch": 0.10152380952380953, "grad_norm": 0.0029577554669231176, "kl": 0.005266400519758463, "learning_rate": 1.812925170068027e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1599 }, { "completion_length": 156.42857360839844, "epoch": 0.10158730158730159, "grad_norm": 1.152786374092102, "kl": 0.004904290661215782, "learning_rate": 1.8140589569160996e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 1600 }, { "completion_length": 125.64286041259766, "epoch": 0.10165079365079364, "grad_norm": 0.003947845660150051, "kl": 0.005259005818516016, "learning_rate": 1.8151927437641722e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1601 }, { "completion_length": 161.42857360839844, "epoch": 0.10171428571428572, "grad_norm": 0.0034219734370708466, "kl": 0.004824059084057808, "learning_rate": 1.816326530612245e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1602 }, { "completion_length": 182.42857360839844, "epoch": 0.10177777777777777, "grad_norm": 0.00294978148303926, "kl": 0.005235800985246897, "learning_rate": 1.8174603174603173e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1603 }, { "completion_length": 140.71429443359375, "epoch": 0.10184126984126984, "grad_norm": 0.004749060142785311, "kl": 0.007799933198839426, "learning_rate": 1.8185941043083899e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1604 }, { "completion_length": 121.0714340209961, "epoch": 0.1019047619047619, "grad_norm": 0.004555403720587492, "kl": 0.007679781876504421, "learning_rate": 1.8197278911564627e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1605 }, { "completion_length": 151.21429443359375, "epoch": 0.10196825396825397, "grad_norm": 0.0037582083605229855, "kl": 0.0058262962847948074, "learning_rate": 1.820861678004535e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1606 }, { "completion_length": 166.42857360839844, "epoch": 0.10203174603174603, "grad_norm": 0.003687328891828656, "kl": 0.005845583043992519, "learning_rate": 1.8219954648526075e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1607 }, { "completion_length": 130.2857208251953, "epoch": 0.1020952380952381, "grad_norm": 0.004060382489115, "kl": 0.007125564850866795, "learning_rate": 1.82312925170068e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1608 }, { "completion_length": 176.57144165039062, "epoch": 0.10215873015873016, "grad_norm": 0.004191409796476364, "kl": 0.007331525441259146, "learning_rate": 1.824263038548753e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1609 }, { "completion_length": 199.2857208251953, "epoch": 0.10222222222222223, "grad_norm": 0.0033057022374123335, "kl": 0.006175411865115166, "learning_rate": 1.8253968253968252e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1610 }, { "completion_length": 204.00001525878906, "epoch": 0.10228571428571429, "grad_norm": 0.0034280424006283283, "kl": 0.0062189181335270405, "learning_rate": 1.8265306122448977e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1611 }, { "completion_length": 166.5, "epoch": 0.10234920634920634, "grad_norm": 0.004094140138477087, "kl": 0.008200407028198242, "learning_rate": 1.8276643990929706e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1612 }, { "completion_length": 144.6428680419922, "epoch": 0.10241269841269841, "grad_norm": 0.005286720581352711, "kl": 0.009264237247407436, "learning_rate": 1.828798185941043e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1613 }, { "completion_length": 156.92857360839844, "epoch": 0.10247619047619047, "grad_norm": 0.005931058898568153, "kl": 0.011338578537106514, "learning_rate": 1.8299319727891154e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1614 }, { "completion_length": 162.7857208251953, "epoch": 0.10253968253968254, "grad_norm": 0.004272674676030874, "kl": 0.00830461923032999, "learning_rate": 1.8310657596371882e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1615 }, { "completion_length": 150.0, "epoch": 0.1026031746031746, "grad_norm": 0.004823288414627314, "kl": 0.009148887358605862, "learning_rate": 1.8321995464852608e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1616 }, { "completion_length": 166.85714721679688, "epoch": 0.10266666666666667, "grad_norm": 0.003934158477932215, "kl": 0.007190503645688295, "learning_rate": 1.833333333333333e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1617 }, { "completion_length": 183.57144165039062, "epoch": 0.10273015873015873, "grad_norm": 0.004855964332818985, "kl": 0.0102401627227664, "learning_rate": 1.8344671201814056e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1618 }, { "completion_length": 159.42857360839844, "epoch": 0.1027936507936508, "grad_norm": 0.004646598361432552, "kl": 0.008910044096410275, "learning_rate": 1.8356009070294784e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1619 }, { "completion_length": 168.07144165039062, "epoch": 0.10285714285714286, "grad_norm": 0.004102285951375961, "kl": 0.00794889871031046, "learning_rate": 1.836734693877551e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1620 }, { "completion_length": 145.85714721679688, "epoch": 0.10292063492063493, "grad_norm": 0.005630271043628454, "kl": 0.01104033924639225, "learning_rate": 1.8378684807256233e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1621 }, { "completion_length": 177.1428680419922, "epoch": 0.10298412698412698, "grad_norm": 0.004230000078678131, "kl": 0.008353275246918201, "learning_rate": 1.839002267573696e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1622 }, { "completion_length": 165.21429443359375, "epoch": 0.10304761904761905, "grad_norm": 0.00423170393332839, "kl": 0.00850940216332674, "learning_rate": 1.8401360544217687e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1623 }, { "completion_length": 159.0, "epoch": 0.10311111111111111, "grad_norm": 0.00433403579518199, "kl": 0.008262889459729195, "learning_rate": 1.8412698412698412e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1624 }, { "completion_length": 151.7857208251953, "epoch": 0.10317460317460317, "grad_norm": 0.005590429529547691, "kl": 0.01077242847532034, "learning_rate": 1.8424036281179138e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1625 }, { "completion_length": 196.35714721679688, "epoch": 0.10323809523809524, "grad_norm": 0.004491943866014481, "kl": 0.008620722219347954, "learning_rate": 1.8435374149659863e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1626 }, { "completion_length": 166.71429443359375, "epoch": 0.1033015873015873, "grad_norm": 0.004661065060645342, "kl": 0.008810454048216343, "learning_rate": 1.844671201814059e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1627 }, { "completion_length": 143.85714721679688, "epoch": 0.10336507936507937, "grad_norm": 0.00605368847027421, "kl": 0.011144361458718777, "learning_rate": 1.8458049886621312e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1628 }, { "completion_length": 175.21429443359375, "epoch": 0.10342857142857143, "grad_norm": 0.005009175278246403, "kl": 0.009163916110992432, "learning_rate": 1.846938775510204e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1629 }, { "completion_length": 188.1428680419922, "epoch": 0.1034920634920635, "grad_norm": 0.004036573693156242, "kl": 0.00810826662927866, "learning_rate": 1.8480725623582765e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1630 }, { "completion_length": 167.6428680419922, "epoch": 0.10355555555555555, "grad_norm": 0.004702557343989611, "kl": 0.009377832524478436, "learning_rate": 1.849206349206349e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1631 }, { "completion_length": 144.1428680419922, "epoch": 0.10361904761904762, "grad_norm": 0.00598397571593523, "kl": 0.011144711636006832, "learning_rate": 1.8503401360544217e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1632 }, { "completion_length": 146.21429443359375, "epoch": 0.10368253968253968, "grad_norm": 0.005195152945816517, "kl": 0.009293276816606522, "learning_rate": 1.8514739229024942e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1633 }, { "completion_length": 134.7857208251953, "epoch": 0.10374603174603175, "grad_norm": 0.00518771493807435, "kl": 0.008702915161848068, "learning_rate": 1.8526077097505668e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1634 }, { "completion_length": 171.92857360839844, "epoch": 0.10380952380952381, "grad_norm": 0.0038543057162314653, "kl": 0.007582733407616615, "learning_rate": 1.8537414965986396e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1635 }, { "completion_length": 136.71429443359375, "epoch": 0.10387301587301587, "grad_norm": 0.00492450874298811, "kl": 0.009800935164093971, "learning_rate": 1.854875283446712e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1636 }, { "completion_length": 179.00001525878906, "epoch": 0.10393650793650794, "grad_norm": 0.005261406768113375, "kl": 0.009292852133512497, "learning_rate": 1.8560090702947844e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1637 }, { "completion_length": 162.71429443359375, "epoch": 0.104, "grad_norm": 0.004528490826487541, "kl": 0.009106609039008617, "learning_rate": 1.8571428571428572e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1638 }, { "completion_length": 179.85714721679688, "epoch": 0.10406349206349207, "grad_norm": 0.005112271290272474, "kl": 0.009312553331255913, "learning_rate": 1.8582766439909298e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1639 }, { "completion_length": 113.92857360839844, "epoch": 0.10412698412698412, "grad_norm": 0.0060470085591077805, "kl": 0.010103896260261536, "learning_rate": 1.859410430839002e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1640 }, { "completion_length": 150.21429443359375, "epoch": 0.1041904761904762, "grad_norm": 0.00424161646515131, "kl": 0.007966629229485989, "learning_rate": 1.8605442176870746e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1641 }, { "completion_length": 175.42857360839844, "epoch": 0.10425396825396825, "grad_norm": 0.003780842525884509, "kl": 0.0074888477101922035, "learning_rate": 1.8616780045351475e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1642 }, { "completion_length": 187.2857208251953, "epoch": 0.10431746031746032, "grad_norm": 0.0044233109802007675, "kl": 0.008019129745662212, "learning_rate": 1.8628117913832198e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1643 }, { "completion_length": 171.42857360839844, "epoch": 0.10438095238095238, "grad_norm": 0.005065676290541887, "kl": 0.00977415218949318, "learning_rate": 1.8639455782312923e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1644 }, { "completion_length": 167.1428680419922, "epoch": 0.10444444444444445, "grad_norm": 0.003937146160751581, "kl": 0.007444435730576515, "learning_rate": 1.865079365079365e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1645 }, { "completion_length": 180.42857360839844, "epoch": 0.10450793650793651, "grad_norm": 0.004545634612441063, "kl": 0.008385619148612022, "learning_rate": 1.8662131519274377e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1646 }, { "completion_length": 145.35714721679688, "epoch": 0.10457142857142857, "grad_norm": 0.00411961879581213, "kl": 0.007889607921242714, "learning_rate": 1.86734693877551e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1647 }, { "completion_length": 196.21429443359375, "epoch": 0.10463492063492064, "grad_norm": 0.0034354375675320625, "kl": 0.006796490401029587, "learning_rate": 1.8684807256235828e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1648 }, { "completion_length": 152.42857360839844, "epoch": 0.1046984126984127, "grad_norm": 0.004908540286123753, "kl": 0.008186420425772667, "learning_rate": 1.8696145124716553e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1649 }, { "completion_length": 156.6428680419922, "epoch": 0.10476190476190476, "grad_norm": 0.004148035775870085, "kl": 0.007244471460580826, "learning_rate": 1.870748299319728e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1650 }, { "completion_length": 162.35714721679688, "epoch": 0.10482539682539682, "grad_norm": 0.004690549336373806, "kl": 0.007486885413527489, "learning_rate": 1.8718820861678002e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1651 }, { "completion_length": 170.71429443359375, "epoch": 0.10488888888888889, "grad_norm": 0.0057801418006420135, "kl": 0.009385841898620129, "learning_rate": 1.873015873015873e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1652 }, { "completion_length": 190.71429443359375, "epoch": 0.10495238095238095, "grad_norm": 0.003878094954416156, "kl": 0.007131173275411129, "learning_rate": 1.8741496598639456e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1653 }, { "completion_length": 177.85714721679688, "epoch": 0.10501587301587302, "grad_norm": 1.2624231576919556, "kl": 0.007212167605757713, "learning_rate": 1.8752834467120179e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 1654 }, { "completion_length": 187.1428680419922, "epoch": 0.10507936507936508, "grad_norm": 0.004645558074116707, "kl": 0.008274729363620281, "learning_rate": 1.8764172335600907e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1655 }, { "completion_length": 177.21429443359375, "epoch": 0.10514285714285715, "grad_norm": 0.004378374665975571, "kl": 0.006790743675082922, "learning_rate": 1.8775510204081632e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1656 }, { "completion_length": 154.85714721679688, "epoch": 0.1052063492063492, "grad_norm": 0.005746767390519381, "kl": 0.00931329745799303, "learning_rate": 1.8786848072562358e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1657 }, { "completion_length": 179.00001525878906, "epoch": 0.10526984126984126, "grad_norm": 0.0035896238405257463, "kl": 0.007038903422653675, "learning_rate": 1.8798185941043083e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1658 }, { "completion_length": 134.85714721679688, "epoch": 0.10533333333333333, "grad_norm": 0.00576758524402976, "kl": 0.00912709254771471, "learning_rate": 1.880952380952381e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1659 }, { "completion_length": 200.57144165039062, "epoch": 0.10539682539682539, "grad_norm": 0.0036434654612094164, "kl": 0.006080241408199072, "learning_rate": 1.8820861678004534e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1660 }, { "completion_length": 167.21429443359375, "epoch": 0.10546031746031746, "grad_norm": 0.004010304808616638, "kl": 0.007087384350597858, "learning_rate": 1.883219954648526e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1661 }, { "completion_length": 192.21429443359375, "epoch": 0.10552380952380952, "grad_norm": 0.003294388297945261, "kl": 0.0056001534685492516, "learning_rate": 1.8843537414965986e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1662 }, { "completion_length": 141.6428680419922, "epoch": 0.10558730158730159, "grad_norm": 0.0047764559276402, "kl": 0.008068491704761982, "learning_rate": 1.885487528344671e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1663 }, { "completion_length": 151.92857360839844, "epoch": 0.10565079365079365, "grad_norm": 0.0047270203940570354, "kl": 0.00801096111536026, "learning_rate": 1.8866213151927437e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1664 }, { "completion_length": 157.0, "epoch": 0.10571428571428572, "grad_norm": 0.004920764826238155, "kl": 0.0073924316093325615, "learning_rate": 1.8877551020408162e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1665 }, { "completion_length": 143.07144165039062, "epoch": 0.10577777777777778, "grad_norm": 0.00533872889354825, "kl": 0.008352197706699371, "learning_rate": 1.8888888888888888e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1666 }, { "completion_length": 156.92857360839844, "epoch": 0.10584126984126985, "grad_norm": 0.004740037489682436, "kl": 0.009530353359878063, "learning_rate": 1.8900226757369613e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1667 }, { "completion_length": 137.2857208251953, "epoch": 0.1059047619047619, "grad_norm": 0.0060280305333435535, "kl": 0.010528341867029667, "learning_rate": 1.8911564625850341e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1668 }, { "completion_length": 149.6428680419922, "epoch": 0.10596825396825396, "grad_norm": 0.004270905628800392, "kl": 0.007278713397681713, "learning_rate": 1.8922902494331064e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1669 }, { "completion_length": 147.71429443359375, "epoch": 0.10603174603174603, "grad_norm": 0.005955048371106386, "kl": 0.009316292591392994, "learning_rate": 1.893424036281179e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1670 }, { "completion_length": 214.92857360839844, "epoch": 0.10609523809523809, "grad_norm": 0.004405352286994457, "kl": 0.008320074528455734, "learning_rate": 1.8945578231292515e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1671 }, { "completion_length": 194.6428680419922, "epoch": 0.10615873015873016, "grad_norm": 0.0035370849072933197, "kl": 0.006340415682643652, "learning_rate": 1.8956916099773244e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1672 }, { "completion_length": 193.57144165039062, "epoch": 0.10622222222222222, "grad_norm": 0.0034824435133486986, "kl": 0.005907358136028051, "learning_rate": 1.8968253968253967e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1673 }, { "completion_length": 170.71429443359375, "epoch": 0.10628571428571429, "grad_norm": 0.004649449605494738, "kl": 0.008213819935917854, "learning_rate": 1.8979591836734692e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1674 }, { "completion_length": 155.21429443359375, "epoch": 0.10634920634920635, "grad_norm": 0.004209653474390507, "kl": 0.007823852822184563, "learning_rate": 1.899092970521542e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1675 }, { "completion_length": 152.35714721679688, "epoch": 0.10641269841269842, "grad_norm": 0.0055161514319479465, "kl": 0.008845321834087372, "learning_rate": 1.9002267573696143e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1676 }, { "completion_length": 176.21429443359375, "epoch": 0.10647619047619047, "grad_norm": 0.0036658619064837694, "kl": 0.007099483162164688, "learning_rate": 1.901360544217687e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1677 }, { "completion_length": 160.7857208251953, "epoch": 0.10653968253968255, "grad_norm": 0.007007633801549673, "kl": 0.010624698363244534, "learning_rate": 1.9024943310657597e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1678 }, { "completion_length": 179.57144165039062, "epoch": 0.1066031746031746, "grad_norm": 0.00461857533082366, "kl": 0.007699211593717337, "learning_rate": 1.9036281179138322e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1679 }, { "completion_length": 155.6428680419922, "epoch": 0.10666666666666667, "grad_norm": 0.004146162886172533, "kl": 0.007414225954562426, "learning_rate": 1.9047619047619045e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1680 }, { "completion_length": 163.1428680419922, "epoch": 0.10673015873015873, "grad_norm": 0.006491502746939659, "kl": 0.01001195888966322, "learning_rate": 1.905895691609977e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1681 }, { "completion_length": 181.21429443359375, "epoch": 0.10679365079365079, "grad_norm": 0.0035374213475733995, "kl": 0.006449270527809858, "learning_rate": 1.90702947845805e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1682 }, { "completion_length": 153.1428680419922, "epoch": 0.10685714285714286, "grad_norm": 0.005833302158862352, "kl": 0.015018325299024582, "learning_rate": 1.9081632653061225e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1683 }, { "completion_length": 173.2857208251953, "epoch": 0.10692063492063492, "grad_norm": 0.004829125944525003, "kl": 0.007522151339799166, "learning_rate": 1.9092970521541948e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1684 }, { "completion_length": 112.85714721679688, "epoch": 0.10698412698412699, "grad_norm": 0.006694102194160223, "kl": 0.010265840217471123, "learning_rate": 1.9104308390022676e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1685 }, { "completion_length": 122.00000762939453, "epoch": 0.10704761904761904, "grad_norm": 0.006039765197783709, "kl": 0.008869488723576069, "learning_rate": 1.91156462585034e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1686 }, { "completion_length": 167.21429443359375, "epoch": 0.10711111111111112, "grad_norm": 0.003516991389915347, "kl": 0.006535254884511232, "learning_rate": 1.9126984126984124e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1687 }, { "completion_length": 183.21429443359375, "epoch": 0.10717460317460317, "grad_norm": 0.005113195162266493, "kl": 0.008342201821506023, "learning_rate": 1.9138321995464852e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1688 }, { "completion_length": 154.71429443359375, "epoch": 0.10723809523809524, "grad_norm": 0.004309389740228653, "kl": 0.00690001156181097, "learning_rate": 1.9149659863945578e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1689 }, { "completion_length": 147.42857360839844, "epoch": 0.1073015873015873, "grad_norm": 0.005099805537611246, "kl": 0.008614320307970047, "learning_rate": 1.9160997732426303e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1690 }, { "completion_length": 135.7857208251953, "epoch": 0.10736507936507937, "grad_norm": 0.005185260903090239, "kl": 0.007907579652965069, "learning_rate": 1.9172335600907026e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1691 }, { "completion_length": 139.5, "epoch": 0.10742857142857143, "grad_norm": 0.004419436678290367, "kl": 0.006666112225502729, "learning_rate": 1.9183673469387755e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1692 }, { "completion_length": 137.21429443359375, "epoch": 0.10749206349206349, "grad_norm": 0.00437618512660265, "kl": 0.006534035317599773, "learning_rate": 1.919501133786848e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1693 }, { "completion_length": 210.07144165039062, "epoch": 0.10755555555555556, "grad_norm": 0.0032847451511770487, "kl": 0.0056551736779510975, "learning_rate": 1.9206349206349206e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1694 }, { "completion_length": 152.85714721679688, "epoch": 0.10761904761904761, "grad_norm": 0.0039008695166558027, "kl": 0.006698782555758953, "learning_rate": 1.921768707482993e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1695 }, { "completion_length": 148.2857208251953, "epoch": 0.10768253968253969, "grad_norm": 0.006497670896351337, "kl": 0.009291887283325195, "learning_rate": 1.9229024943310657e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1696 }, { "completion_length": 152.7857208251953, "epoch": 0.10774603174603174, "grad_norm": 0.004024290479719639, "kl": 0.006711898371577263, "learning_rate": 1.9240362811791382e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1697 }, { "completion_length": 176.35714721679688, "epoch": 0.10780952380952381, "grad_norm": 0.004564972594380379, "kl": 0.007681332528591156, "learning_rate": 1.9251700680272108e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1698 }, { "completion_length": 195.6428680419922, "epoch": 0.10787301587301587, "grad_norm": 0.002957169897854328, "kl": 0.004960237070918083, "learning_rate": 1.9263038548752833e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1699 }, { "completion_length": 168.6428680419922, "epoch": 0.10793650793650794, "grad_norm": 0.003529496490955353, "kl": 0.005918321665376425, "learning_rate": 1.927437641723356e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1700 }, { "completion_length": 141.07144165039062, "epoch": 0.108, "grad_norm": 0.0047783260233700275, "kl": 0.008643406443297863, "learning_rate": 1.9285714285714284e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1701 }, { "completion_length": 153.07144165039062, "epoch": 0.10806349206349207, "grad_norm": 0.004210657440125942, "kl": 0.007258414290845394, "learning_rate": 1.929705215419501e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1702 }, { "completion_length": 173.50001525878906, "epoch": 0.10812698412698413, "grad_norm": 0.00404335418716073, "kl": 0.006724326405674219, "learning_rate": 1.9308390022675736e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1703 }, { "completion_length": 155.92857360839844, "epoch": 0.10819047619047618, "grad_norm": 0.005345572717487812, "kl": 0.008197893388569355, "learning_rate": 1.931972789115646e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1704 }, { "completion_length": 183.6428680419922, "epoch": 0.10825396825396826, "grad_norm": 0.003466844791546464, "kl": 0.005890087224543095, "learning_rate": 1.933106575963719e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1705 }, { "completion_length": 168.07144165039062, "epoch": 0.10831746031746031, "grad_norm": 0.003288216656073928, "kl": 0.0053800069727003574, "learning_rate": 1.9342403628117912e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1706 }, { "completion_length": 169.85714721679688, "epoch": 0.10838095238095238, "grad_norm": 0.0035590208135545254, "kl": 0.005700335837900639, "learning_rate": 1.9353741496598638e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1707 }, { "completion_length": 169.7857208251953, "epoch": 0.10844444444444444, "grad_norm": 0.0036379944067448378, "kl": 0.006209069397300482, "learning_rate": 1.9365079365079366e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1708 }, { "completion_length": 172.07144165039062, "epoch": 0.10850793650793651, "grad_norm": 0.002582296496257186, "kl": 0.004034870769828558, "learning_rate": 1.9376417233560091e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1709 }, { "completion_length": 134.1428680419922, "epoch": 0.10857142857142857, "grad_norm": 0.00593365216627717, "kl": 0.008338462561368942, "learning_rate": 1.9387755102040814e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1710 }, { "completion_length": 198.50001525878906, "epoch": 0.10863492063492064, "grad_norm": 0.002374413423240185, "kl": 0.004259837791323662, "learning_rate": 1.939909297052154e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1711 }, { "completion_length": 175.92857360839844, "epoch": 0.1086984126984127, "grad_norm": 0.00339798117056489, "kl": 0.00544137554243207, "learning_rate": 1.9410430839002268e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1712 }, { "completion_length": 159.0, "epoch": 0.10876190476190477, "grad_norm": 0.003292111214250326, "kl": 0.004972859285771847, "learning_rate": 1.942176870748299e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1713 }, { "completion_length": 168.92857360839844, "epoch": 0.10882539682539683, "grad_norm": 0.002814287319779396, "kl": 0.00454603461548686, "learning_rate": 1.9433106575963717e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1714 }, { "completion_length": 188.85714721679688, "epoch": 0.10888888888888888, "grad_norm": 0.0038783943746238947, "kl": 0.005948435049504042, "learning_rate": 1.9444444444444445e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1715 }, { "completion_length": 161.42857360839844, "epoch": 0.10895238095238095, "grad_norm": 0.005613296292722225, "kl": 0.007578437682241201, "learning_rate": 1.945578231292517e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1716 }, { "completion_length": 148.2857208251953, "epoch": 0.10901587301587301, "grad_norm": 0.004904354922473431, "kl": 0.007083936128765345, "learning_rate": 1.9467120181405893e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1717 }, { "completion_length": 200.85714721679688, "epoch": 0.10907936507936508, "grad_norm": 0.9627876281738281, "kl": 0.004693340510129929, "learning_rate": 1.9478458049886621e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 1718 }, { "completion_length": 155.07144165039062, "epoch": 0.10914285714285714, "grad_norm": 0.003077007597312331, "kl": 0.005219736136496067, "learning_rate": 1.9489795918367347e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1719 }, { "completion_length": 182.92857360839844, "epoch": 0.10920634920634921, "grad_norm": 0.003053619060665369, "kl": 0.004682933911681175, "learning_rate": 1.9501133786848072e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1720 }, { "completion_length": 191.07144165039062, "epoch": 0.10926984126984127, "grad_norm": 0.002355103613808751, "kl": 0.003964132629334927, "learning_rate": 1.9512471655328795e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1721 }, { "completion_length": 177.2857208251953, "epoch": 0.10933333333333334, "grad_norm": 0.0029908802825957537, "kl": 0.004339632578194141, "learning_rate": 1.9523809523809524e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1722 }, { "completion_length": 135.0, "epoch": 0.1093968253968254, "grad_norm": 1.455147624015808, "kl": 0.006363235879689455, "learning_rate": 1.953514739229025e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 1723 }, { "completion_length": 178.50001525878906, "epoch": 0.10946031746031747, "grad_norm": 0.0031054895371198654, "kl": 0.004035113845020533, "learning_rate": 1.9546485260770972e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1724 }, { "completion_length": 139.21429443359375, "epoch": 0.10952380952380952, "grad_norm": 0.0032779781613498926, "kl": 0.004328545648604631, "learning_rate": 1.95578231292517e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1725 }, { "completion_length": 180.57144165039062, "epoch": 0.10958730158730158, "grad_norm": 1.2632015943527222, "kl": 0.005006785970181227, "learning_rate": 1.9569160997732426e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 1726 }, { "completion_length": 143.6428680419922, "epoch": 0.10965079365079365, "grad_norm": 0.0036894474178552628, "kl": 0.005064725875854492, "learning_rate": 1.958049886621315e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1727 }, { "completion_length": 194.57144165039062, "epoch": 0.10971428571428571, "grad_norm": 0.002630968112498522, "kl": 0.004222140647470951, "learning_rate": 1.9591836734693877e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1728 }, { "completion_length": 156.21429443359375, "epoch": 0.10977777777777778, "grad_norm": 0.0032206501346081495, "kl": 0.005251860711723566, "learning_rate": 1.9603174603174602e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1729 }, { "completion_length": 185.00001525878906, "epoch": 0.10984126984126984, "grad_norm": 0.002440141513943672, "kl": 0.004162599798291922, "learning_rate": 1.9614512471655328e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1730 }, { "completion_length": 165.0, "epoch": 0.10990476190476191, "grad_norm": 0.002806698903441429, "kl": 0.005077776499092579, "learning_rate": 1.9625850340136053e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1731 }, { "completion_length": 165.35714721679688, "epoch": 0.10996825396825397, "grad_norm": 0.0029983867425471544, "kl": 0.005263336468487978, "learning_rate": 1.963718820861678e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1732 }, { "completion_length": 160.92857360839844, "epoch": 0.11003174603174604, "grad_norm": 0.004792881663888693, "kl": 0.007321609649807215, "learning_rate": 1.9648526077097505e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1733 }, { "completion_length": 184.92857360839844, "epoch": 0.1100952380952381, "grad_norm": 0.003449286101385951, "kl": 0.006356208585202694, "learning_rate": 1.965986394557823e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1734 }, { "completion_length": 175.50001525878906, "epoch": 0.11015873015873016, "grad_norm": 0.0035918797366321087, "kl": 0.006466761697083712, "learning_rate": 1.9671201814058956e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1735 }, { "completion_length": 174.7857208251953, "epoch": 0.11022222222222222, "grad_norm": 0.004079252015799284, "kl": 0.007188618183135986, "learning_rate": 1.968253968253968e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1736 }, { "completion_length": 183.85714721679688, "epoch": 0.11028571428571429, "grad_norm": 0.0038712790701538324, "kl": 0.007411314640194178, "learning_rate": 1.9693877551020407e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1737 }, { "completion_length": 174.2857208251953, "epoch": 0.11034920634920635, "grad_norm": 0.0033442468848079443, "kl": 0.005732460413128138, "learning_rate": 1.9705215419501135e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1738 }, { "completion_length": 138.57144165039062, "epoch": 0.1104126984126984, "grad_norm": 0.003212020732462406, "kl": 0.0058428277261555195, "learning_rate": 1.9716553287981858e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1739 }, { "completion_length": 144.6428680419922, "epoch": 0.11047619047619048, "grad_norm": 0.004295979160815477, "kl": 0.007294072769582272, "learning_rate": 1.9727891156462583e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1740 }, { "completion_length": 183.6428680419922, "epoch": 0.11053968253968253, "grad_norm": 0.004654257092624903, "kl": 0.007716313470155001, "learning_rate": 1.973922902494331e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1741 }, { "completion_length": 152.6428680419922, "epoch": 0.1106031746031746, "grad_norm": 0.004627380520105362, "kl": 0.008898717351257801, "learning_rate": 1.9750566893424037e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1742 }, { "completion_length": 156.92857360839844, "epoch": 0.11066666666666666, "grad_norm": 0.003988498821854591, "kl": 0.008164267987012863, "learning_rate": 1.976190476190476e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1743 }, { "completion_length": 193.21429443359375, "epoch": 0.11073015873015873, "grad_norm": 0.0039209602400660515, "kl": 0.00679195299744606, "learning_rate": 1.9773242630385486e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1744 }, { "completion_length": 181.57144165039062, "epoch": 0.11079365079365079, "grad_norm": 0.0030853033531457186, "kl": 0.005910783540457487, "learning_rate": 1.9784580498866214e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1745 }, { "completion_length": 168.5, "epoch": 0.11085714285714286, "grad_norm": 0.0041021015495061874, "kl": 0.007173342164605856, "learning_rate": 1.9795918367346937e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1746 }, { "completion_length": 192.50001525878906, "epoch": 0.11092063492063492, "grad_norm": 0.004090695641934872, "kl": 0.006848381366580725, "learning_rate": 1.9807256235827662e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1747 }, { "completion_length": 136.6428680419922, "epoch": 0.11098412698412699, "grad_norm": 0.0038391968701034784, "kl": 0.007218897342681885, "learning_rate": 1.981859410430839e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1748 }, { "completion_length": 154.92857360839844, "epoch": 0.11104761904761905, "grad_norm": 0.004914931952953339, "kl": 0.008579297922551632, "learning_rate": 1.9829931972789116e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1749 }, { "completion_length": 160.42857360839844, "epoch": 0.1111111111111111, "grad_norm": 0.004712474532425404, "kl": 0.008281154558062553, "learning_rate": 1.984126984126984e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1750 }, { "completion_length": 169.42857360839844, "epoch": 0.11117460317460318, "grad_norm": 0.0034102587960660458, "kl": 0.005988736171275377, "learning_rate": 1.9852607709750564e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1751 }, { "completion_length": 175.85714721679688, "epoch": 0.11123809523809523, "grad_norm": 0.003914595115929842, "kl": 0.0071767778135836124, "learning_rate": 1.9863945578231293e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1752 }, { "completion_length": 157.6428680419922, "epoch": 0.1113015873015873, "grad_norm": 0.003557403339073062, "kl": 0.007137422915548086, "learning_rate": 1.9875283446712018e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1753 }, { "completion_length": 150.57144165039062, "epoch": 0.11136507936507936, "grad_norm": 0.004796394146978855, "kl": 0.008205712772905827, "learning_rate": 1.988662131519274e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1754 }, { "completion_length": 129.07144165039062, "epoch": 0.11142857142857143, "grad_norm": 0.007879223674535751, "kl": 0.010687405243515968, "learning_rate": 1.989795918367347e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1755 }, { "completion_length": 136.35714721679688, "epoch": 0.11149206349206349, "grad_norm": 0.005222900304943323, "kl": 0.007954477332532406, "learning_rate": 1.9909297052154195e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1756 }, { "completion_length": 168.6428680419922, "epoch": 0.11155555555555556, "grad_norm": 0.003926730249077082, "kl": 0.007599777542054653, "learning_rate": 1.9920634920634918e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1757 }, { "completion_length": 159.7857208251953, "epoch": 0.11161904761904762, "grad_norm": 0.005917103961110115, "kl": 0.009210285730659962, "learning_rate": 1.9931972789115646e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1758 }, { "completion_length": 156.85714721679688, "epoch": 0.11168253968253969, "grad_norm": 0.004338668193668127, "kl": 0.007640897296369076, "learning_rate": 1.9943310657596371e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1759 }, { "completion_length": 175.92857360839844, "epoch": 0.11174603174603175, "grad_norm": 0.004322204738855362, "kl": 0.006821820046752691, "learning_rate": 1.9954648526077097e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1760 }, { "completion_length": 170.6428680419922, "epoch": 0.1118095238095238, "grad_norm": 0.004670233465731144, "kl": 0.007867084816098213, "learning_rate": 1.996598639455782e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1761 }, { "completion_length": 159.42857360839844, "epoch": 0.11187301587301587, "grad_norm": 0.0036858762614428997, "kl": 0.006574338302016258, "learning_rate": 1.9977324263038548e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1762 }, { "completion_length": 141.85714721679688, "epoch": 0.11193650793650793, "grad_norm": 0.004441550467163324, "kl": 0.007231985218822956, "learning_rate": 1.9988662131519274e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1763 }, { "completion_length": 114.5714340209961, "epoch": 0.112, "grad_norm": 0.00437430664896965, "kl": 0.007507004775106907, "learning_rate": 2e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1764 }, { "completion_length": 178.85714721679688, "epoch": 0.11206349206349206, "grad_norm": 0.00447919312864542, "kl": 0.007642490789294243, "learning_rate": 2.0011337868480725e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1765 }, { "completion_length": 176.6428680419922, "epoch": 0.11212698412698413, "grad_norm": 0.004064875654876232, "kl": 0.007172624114900827, "learning_rate": 2.002267573696145e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1766 }, { "completion_length": 135.57144165039062, "epoch": 0.11219047619047619, "grad_norm": 0.006152105052024126, "kl": 0.010022944770753384, "learning_rate": 2.0034013605442176e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1767 }, { "completion_length": 170.1428680419922, "epoch": 0.11225396825396826, "grad_norm": 0.003987204749137163, "kl": 0.005934663582593203, "learning_rate": 2.0045351473922904e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1768 }, { "completion_length": 160.21429443359375, "epoch": 0.11231746031746032, "grad_norm": 0.003176023019477725, "kl": 0.005922699812799692, "learning_rate": 2.0056689342403627e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1769 }, { "completion_length": 132.42857360839844, "epoch": 0.11238095238095239, "grad_norm": 0.005163988098502159, "kl": 0.008307880721986294, "learning_rate": 2.0068027210884352e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1770 }, { "completion_length": 172.85714721679688, "epoch": 0.11244444444444444, "grad_norm": 0.004128817934542894, "kl": 0.006539458874613047, "learning_rate": 2.007936507936508e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1771 }, { "completion_length": 137.85714721679688, "epoch": 0.1125079365079365, "grad_norm": 1.514762043952942, "kl": 0.008255546912550926, "learning_rate": 2.0090702947845803e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 1772 }, { "completion_length": 176.85714721679688, "epoch": 0.11257142857142857, "grad_norm": 0.0037400652654469013, "kl": 0.006778513081371784, "learning_rate": 2.010204081632653e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1773 }, { "completion_length": 126.92857360839844, "epoch": 0.11263492063492063, "grad_norm": 0.004750007297843695, "kl": 0.00703680794686079, "learning_rate": 2.0113378684807255e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1774 }, { "completion_length": 168.6428680419922, "epoch": 0.1126984126984127, "grad_norm": 0.004567381460219622, "kl": 0.007192217279225588, "learning_rate": 2.0124716553287983e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1775 }, { "completion_length": 124.21429443359375, "epoch": 0.11276190476190476, "grad_norm": 0.004366480745375156, "kl": 0.007124673575162888, "learning_rate": 2.0136054421768706e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1776 }, { "completion_length": 112.85714721679688, "epoch": 0.11282539682539683, "grad_norm": 0.005729121156036854, "kl": 0.007307939697057009, "learning_rate": 2.014739229024943e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1777 }, { "completion_length": 160.0, "epoch": 0.11288888888888889, "grad_norm": 0.003756224177777767, "kl": 0.005488437134772539, "learning_rate": 2.015873015873016e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1778 }, { "completion_length": 134.85714721679688, "epoch": 0.11295238095238096, "grad_norm": 0.003573434194549918, "kl": 0.006151055917143822, "learning_rate": 2.0170068027210885e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1779 }, { "completion_length": 168.57144165039062, "epoch": 0.11301587301587301, "grad_norm": 0.0026747696101665497, "kl": 0.004068601876497269, "learning_rate": 2.0181405895691608e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1780 }, { "completion_length": 170.6428680419922, "epoch": 0.11307936507936509, "grad_norm": 0.0033079248387366533, "kl": 0.005630736239254475, "learning_rate": 2.0192743764172336e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1781 }, { "completion_length": 185.07144165039062, "epoch": 0.11314285714285714, "grad_norm": 0.002366764238104224, "kl": 0.00417006341740489, "learning_rate": 2.0204081632653062e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1782 }, { "completion_length": 172.71429443359375, "epoch": 0.11320634920634921, "grad_norm": 0.0031867045909166336, "kl": 0.005413432605564594, "learning_rate": 2.0215419501133784e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1783 }, { "completion_length": 154.92857360839844, "epoch": 0.11326984126984127, "grad_norm": 0.0021129343658685684, "kl": 0.004380116704851389, "learning_rate": 2.022675736961451e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1784 }, { "completion_length": 149.92857360839844, "epoch": 0.11333333333333333, "grad_norm": 0.0026788990944623947, "kl": 0.004253100138157606, "learning_rate": 2.0238095238095238e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1785 }, { "completion_length": 173.7857208251953, "epoch": 0.1133968253968254, "grad_norm": 0.0022720331326127052, "kl": 0.0046456074342131615, "learning_rate": 2.0249433106575964e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1786 }, { "completion_length": 162.07144165039062, "epoch": 0.11346031746031746, "grad_norm": 0.0031538463663309813, "kl": 0.004631045740097761, "learning_rate": 2.0260770975056687e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1787 }, { "completion_length": 180.71429443359375, "epoch": 0.11352380952380953, "grad_norm": 0.002827197080478072, "kl": 0.004631516989320517, "learning_rate": 2.0272108843537415e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1788 }, { "completion_length": 168.42857360839844, "epoch": 0.11358730158730158, "grad_norm": 0.003278788411989808, "kl": 0.004457553382962942, "learning_rate": 2.028344671201814e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1789 }, { "completion_length": 152.71429443359375, "epoch": 0.11365079365079366, "grad_norm": 0.0021799190435558558, "kl": 0.00397613737732172, "learning_rate": 2.0294784580498866e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1790 }, { "completion_length": 187.21429443359375, "epoch": 0.11371428571428571, "grad_norm": 0.0020595076493918896, "kl": 0.0037864651530981064, "learning_rate": 2.0306122448979591e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1791 }, { "completion_length": 193.21429443359375, "epoch": 0.11377777777777778, "grad_norm": 0.002095047850161791, "kl": 0.003804383333772421, "learning_rate": 2.0317460317460317e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1792 }, { "completion_length": 148.0, "epoch": 0.11384126984126984, "grad_norm": 0.003460634034126997, "kl": 0.005376139190047979, "learning_rate": 2.0328798185941043e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1793 }, { "completion_length": 191.21429443359375, "epoch": 0.11390476190476191, "grad_norm": 0.002258933847770095, "kl": 0.003463546046987176, "learning_rate": 2.0340136054421765e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1794 }, { "completion_length": 148.6428680419922, "epoch": 0.11396825396825397, "grad_norm": 0.0019328086636960506, "kl": 0.00330963428132236, "learning_rate": 2.0351473922902494e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1795 }, { "completion_length": 174.92857360839844, "epoch": 0.11403174603174603, "grad_norm": 0.002210636157542467, "kl": 0.0040972474962472916, "learning_rate": 2.036281179138322e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1796 }, { "completion_length": 168.57144165039062, "epoch": 0.1140952380952381, "grad_norm": 0.9822778701782227, "kl": 0.004094243980944157, "learning_rate": 2.0374149659863945e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 1797 }, { "completion_length": 189.57144165039062, "epoch": 0.11415873015873015, "grad_norm": 0.0024844470899552107, "kl": 0.0036831481847912073, "learning_rate": 2.038548752834467e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1798 }, { "completion_length": 171.42857360839844, "epoch": 0.11422222222222222, "grad_norm": 0.0019419598393142223, "kl": 0.0037567114923149347, "learning_rate": 2.0396825396825396e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1799 }, { "completion_length": 174.1428680419922, "epoch": 0.11428571428571428, "grad_norm": 0.00210277340374887, "kl": 0.003209765302017331, "learning_rate": 2.0408163265306121e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1800 }, { "completion_length": 174.35714721679688, "epoch": 0.11434920634920635, "grad_norm": 0.002022602129727602, "kl": 0.0035283761098980904, "learning_rate": 2.041950113378685e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1801 }, { "completion_length": 198.07144165039062, "epoch": 0.11441269841269841, "grad_norm": 0.001814098795875907, "kl": 0.0034011462703347206, "learning_rate": 2.0430839002267572e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1802 }, { "completion_length": 125.85714721679688, "epoch": 0.11447619047619048, "grad_norm": 0.00304218172095716, "kl": 0.005116581916809082, "learning_rate": 2.0442176870748298e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1803 }, { "completion_length": 163.42857360839844, "epoch": 0.11453968253968254, "grad_norm": 0.002204271499067545, "kl": 0.003748055547475815, "learning_rate": 2.0453514739229024e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1804 }, { "completion_length": 208.42857360839844, "epoch": 0.11460317460317461, "grad_norm": 0.0019012781558558345, "kl": 0.0031439820304512978, "learning_rate": 2.046485260770975e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1805 }, { "completion_length": 171.1428680419922, "epoch": 0.11466666666666667, "grad_norm": 0.0019361121812835336, "kl": 0.0028739438857883215, "learning_rate": 2.0476190476190475e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1806 }, { "completion_length": 152.07144165039062, "epoch": 0.11473015873015872, "grad_norm": 0.0020819695200771093, "kl": 0.003490877104923129, "learning_rate": 2.04875283446712e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1807 }, { "completion_length": 168.57144165039062, "epoch": 0.1147936507936508, "grad_norm": 0.0021512527018785477, "kl": 0.0034717279486358166, "learning_rate": 2.0498866213151928e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1808 }, { "completion_length": 151.6428680419922, "epoch": 0.11485714285714285, "grad_norm": 0.0024272461887449026, "kl": 0.004458268638700247, "learning_rate": 2.051020408163265e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1809 }, { "completion_length": 119.28572082519531, "epoch": 0.11492063492063492, "grad_norm": 0.003103781258687377, "kl": 0.0048058959655463696, "learning_rate": 2.0521541950113377e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1810 }, { "completion_length": 172.00001525878906, "epoch": 0.11498412698412698, "grad_norm": 0.0019320979481562972, "kl": 0.0033740776125341654, "learning_rate": 2.0532879818594105e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1811 }, { "completion_length": 167.1428680419922, "epoch": 0.11504761904761905, "grad_norm": 0.002005982678383589, "kl": 0.00339278276078403, "learning_rate": 2.054421768707483e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1812 }, { "completion_length": 184.85714721679688, "epoch": 0.11511111111111111, "grad_norm": 0.0026199116837233305, "kl": 0.004166360478848219, "learning_rate": 2.0555555555555553e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1813 }, { "completion_length": 167.21429443359375, "epoch": 0.11517460317460318, "grad_norm": 0.0021618586033582687, "kl": 0.0033117567654699087, "learning_rate": 2.056689342403628e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1814 }, { "completion_length": 182.00001525878906, "epoch": 0.11523809523809524, "grad_norm": 0.0017767653334885836, "kl": 0.003184475703164935, "learning_rate": 2.0578231292517007e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1815 }, { "completion_length": 143.0, "epoch": 0.11530158730158731, "grad_norm": 0.002626702655106783, "kl": 0.004028755240142345, "learning_rate": 2.058956916099773e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1816 }, { "completion_length": 150.85714721679688, "epoch": 0.11536507936507936, "grad_norm": 0.0026941036339849234, "kl": 0.0035490193404257298, "learning_rate": 2.0600907029478456e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1817 }, { "completion_length": 181.1428680419922, "epoch": 0.11542857142857142, "grad_norm": 0.0018144173081964254, "kl": 0.0033476941753178835, "learning_rate": 2.0612244897959184e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1818 }, { "completion_length": 156.42857360839844, "epoch": 0.1154920634920635, "grad_norm": 0.0019586272537708282, "kl": 0.0030318915378302336, "learning_rate": 2.062358276643991e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1819 }, { "completion_length": 166.42857360839844, "epoch": 0.11555555555555555, "grad_norm": 0.0022248816676437855, "kl": 0.0031012161634862423, "learning_rate": 2.0634920634920632e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1820 }, { "completion_length": 152.07144165039062, "epoch": 0.11561904761904762, "grad_norm": 0.002028105780482292, "kl": 0.0033521156292408705, "learning_rate": 2.064625850340136e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1821 }, { "completion_length": 194.71429443359375, "epoch": 0.11568253968253968, "grad_norm": 0.0014920809771865606, "kl": 0.002697108080610633, "learning_rate": 2.0657596371882086e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1822 }, { "completion_length": 171.07144165039062, "epoch": 0.11574603174603175, "grad_norm": 0.0015596321318298578, "kl": 0.002691894304007292, "learning_rate": 2.0668934240362812e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1823 }, { "completion_length": 168.5, "epoch": 0.1158095238095238, "grad_norm": 0.00229471642524004, "kl": 0.003729308256879449, "learning_rate": 2.0680272108843534e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1824 }, { "completion_length": 173.7857208251953, "epoch": 0.11587301587301588, "grad_norm": 0.0021122319158166647, "kl": 0.0035828358959406614, "learning_rate": 2.0691609977324263e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1825 }, { "completion_length": 149.92857360839844, "epoch": 0.11593650793650793, "grad_norm": 0.0020824014209210873, "kl": 0.003196732373908162, "learning_rate": 2.0702947845804988e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1826 }, { "completion_length": 187.00001525878906, "epoch": 0.116, "grad_norm": 0.0016258412506431341, "kl": 0.00289590610191226, "learning_rate": 2.0714285714285714e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1827 }, { "completion_length": 188.50001525878906, "epoch": 0.11606349206349206, "grad_norm": 0.0016175794880837202, "kl": 0.002872228855267167, "learning_rate": 2.072562358276644e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1828 }, { "completion_length": 151.0, "epoch": 0.11612698412698412, "grad_norm": 0.001818111166357994, "kl": 0.00291848205961287, "learning_rate": 2.0736961451247165e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1829 }, { "completion_length": 166.07144165039062, "epoch": 0.11619047619047619, "grad_norm": 0.001718986895866692, "kl": 0.003172211814671755, "learning_rate": 2.074829931972789e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1830 }, { "completion_length": 148.1428680419922, "epoch": 0.11625396825396825, "grad_norm": 0.0017647376516833901, "kl": 0.002949342830106616, "learning_rate": 2.0759637188208616e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1831 }, { "completion_length": 144.21429443359375, "epoch": 0.11631746031746032, "grad_norm": 0.002474449574947357, "kl": 0.003916906658560038, "learning_rate": 2.0770975056689341e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1832 }, { "completion_length": 208.7857208251953, "epoch": 0.11638095238095238, "grad_norm": 0.0015952001558616757, "kl": 0.0027947830967605114, "learning_rate": 2.0782312925170067e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1833 }, { "completion_length": 177.00001525878906, "epoch": 0.11644444444444445, "grad_norm": 0.0016709902556613088, "kl": 0.00267223222181201, "learning_rate": 2.0793650793650793e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1834 }, { "completion_length": 158.35714721679688, "epoch": 0.1165079365079365, "grad_norm": 0.0025477157905697823, "kl": 0.0037585319951176643, "learning_rate": 2.0804988662131518e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1835 }, { "completion_length": 136.07144165039062, "epoch": 0.11657142857142858, "grad_norm": 0.0020207054913043976, "kl": 0.0032179313711822033, "learning_rate": 2.0816326530612244e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1836 }, { "completion_length": 188.85714721679688, "epoch": 0.11663492063492063, "grad_norm": 0.00182806805241853, "kl": 0.002755781402811408, "learning_rate": 2.082766439909297e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1837 }, { "completion_length": 214.57144165039062, "epoch": 0.1166984126984127, "grad_norm": 0.001366633689031005, "kl": 0.002323630964383483, "learning_rate": 2.0839002267573697e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1838 }, { "completion_length": 147.7857208251953, "epoch": 0.11676190476190476, "grad_norm": 0.002305661328136921, "kl": 0.0036161094903945923, "learning_rate": 2.085034013605442e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1839 }, { "completion_length": 197.42857360839844, "epoch": 0.11682539682539683, "grad_norm": 0.0017145401798188686, "kl": 0.0029358582105487585, "learning_rate": 2.0861678004535146e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1840 }, { "completion_length": 177.6428680419922, "epoch": 0.11688888888888889, "grad_norm": 0.0020833725575357676, "kl": 0.0034337742254137993, "learning_rate": 2.0873015873015874e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1841 }, { "completion_length": 155.71429443359375, "epoch": 0.11695238095238095, "grad_norm": 0.002018911298364401, "kl": 0.003392470069229603, "learning_rate": 2.0884353741496597e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1842 }, { "completion_length": 153.7857208251953, "epoch": 0.11701587301587302, "grad_norm": 0.0022264583967626095, "kl": 0.003572203451767564, "learning_rate": 2.0895691609977322e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1843 }, { "completion_length": 176.2857208251953, "epoch": 0.11707936507936507, "grad_norm": 0.0020957726519554853, "kl": 0.003378494642674923, "learning_rate": 2.0907029478458048e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1844 }, { "completion_length": 156.21429443359375, "epoch": 0.11714285714285715, "grad_norm": 0.0022360116709023714, "kl": 0.0039584096521139145, "learning_rate": 2.0918367346938776e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1845 }, { "completion_length": 134.0, "epoch": 0.1172063492063492, "grad_norm": 0.002352707087993622, "kl": 0.0036412999033927917, "learning_rate": 2.09297052154195e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1846 }, { "completion_length": 175.85714721679688, "epoch": 0.11726984126984127, "grad_norm": 0.0021133346017450094, "kl": 0.003422328270971775, "learning_rate": 2.0941043083900225e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1847 }, { "completion_length": 167.57144165039062, "epoch": 0.11733333333333333, "grad_norm": 0.0019041742198169231, "kl": 0.003321998752653599, "learning_rate": 2.0952380952380953e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1848 }, { "completion_length": 142.6428680419922, "epoch": 0.1173968253968254, "grad_norm": 0.0024280238430947065, "kl": 0.0034018626902252436, "learning_rate": 2.0963718820861678e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1849 }, { "completion_length": 186.7857208251953, "epoch": 0.11746031746031746, "grad_norm": 0.0018756275530904531, "kl": 0.002842837246134877, "learning_rate": 2.09750566893424e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1850 }, { "completion_length": 147.7857208251953, "epoch": 0.11752380952380953, "grad_norm": 0.0020928026642650366, "kl": 0.00338355777785182, "learning_rate": 2.098639455782313e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1851 }, { "completion_length": 123.28572082519531, "epoch": 0.11758730158730159, "grad_norm": 0.002231442602351308, "kl": 0.0033156692516058683, "learning_rate": 2.0997732426303855e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1852 }, { "completion_length": 186.1428680419922, "epoch": 0.11765079365079364, "grad_norm": 0.0015428648330271244, "kl": 0.0024511474184691906, "learning_rate": 2.1009070294784578e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1853 }, { "completion_length": 179.2857208251953, "epoch": 0.11771428571428572, "grad_norm": 0.0018649684498086572, "kl": 0.003077569417655468, "learning_rate": 2.1020408163265303e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1854 }, { "completion_length": 163.7857208251953, "epoch": 0.11777777777777777, "grad_norm": 0.0016688331961631775, "kl": 0.0028249898459762335, "learning_rate": 2.1031746031746032e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1855 }, { "completion_length": 153.6428680419922, "epoch": 0.11784126984126984, "grad_norm": 0.0017107788007706404, "kl": 0.002731840591877699, "learning_rate": 2.1043083900226757e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1856 }, { "completion_length": 106.35714721679688, "epoch": 0.1179047619047619, "grad_norm": 0.003031335072591901, "kl": 0.005063805263489485, "learning_rate": 2.105442176870748e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1857 }, { "completion_length": 157.7857208251953, "epoch": 0.11796825396825397, "grad_norm": 0.0018339835805818439, "kl": 0.00304001965560019, "learning_rate": 2.1065759637188208e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1858 }, { "completion_length": 159.71429443359375, "epoch": 0.11803174603174603, "grad_norm": 0.0014498592354357243, "kl": 0.0026481272652745247, "learning_rate": 2.1077097505668934e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1859 }, { "completion_length": 157.6428680419922, "epoch": 0.1180952380952381, "grad_norm": 0.0016976995393633842, "kl": 0.002386389998719096, "learning_rate": 2.108843537414966e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1860 }, { "completion_length": 185.92857360839844, "epoch": 0.11815873015873016, "grad_norm": 0.0014350416604429483, "kl": 0.0024134775158017874, "learning_rate": 2.1099773242630385e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1861 }, { "completion_length": 136.0, "epoch": 0.11822222222222223, "grad_norm": 0.0019818549044430256, "kl": 0.003024347359314561, "learning_rate": 2.111111111111111e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1862 }, { "completion_length": 185.1428680419922, "epoch": 0.11828571428571429, "grad_norm": 0.0015625169035047293, "kl": 0.0029918113723397255, "learning_rate": 2.1122448979591836e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1863 }, { "completion_length": 155.6428680419922, "epoch": 0.11834920634920634, "grad_norm": 1.8444117307662964, "kl": 0.003367313416674733, "learning_rate": 2.113378684807256e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 1864 }, { "completion_length": 198.00001525878906, "epoch": 0.11841269841269841, "grad_norm": 0.0014132957439869642, "kl": 0.002658286364749074, "learning_rate": 2.1145124716553287e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1865 }, { "completion_length": 156.5, "epoch": 0.11847619047619047, "grad_norm": 0.0016654644859954715, "kl": 0.0026379090268164873, "learning_rate": 2.1156462585034013e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1866 }, { "completion_length": 207.7857208251953, "epoch": 0.11853968253968254, "grad_norm": 0.001449531759135425, "kl": 0.0024703936651349068, "learning_rate": 2.1167800453514738e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1867 }, { "completion_length": 211.50001525878906, "epoch": 0.1186031746031746, "grad_norm": 0.0016577381175011396, "kl": 0.002627904526889324, "learning_rate": 2.1179138321995464e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1868 }, { "completion_length": 160.42857360839844, "epoch": 0.11866666666666667, "grad_norm": 0.001509909168817103, "kl": 0.002827807329595089, "learning_rate": 2.119047619047619e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1869 }, { "completion_length": 173.6428680419922, "epoch": 0.11873015873015873, "grad_norm": 1.4285287857055664, "kl": 0.0027544773183763027, "learning_rate": 2.1201814058956915e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 1870 }, { "completion_length": 165.57144165039062, "epoch": 0.1187936507936508, "grad_norm": 0.0016838846495375037, "kl": 0.003083876334130764, "learning_rate": 2.1213151927437643e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1871 }, { "completion_length": 161.92857360839844, "epoch": 0.11885714285714286, "grad_norm": 0.002314013661816716, "kl": 0.00371452490799129, "learning_rate": 2.1224489795918366e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1872 }, { "completion_length": 194.57144165039062, "epoch": 0.11892063492063493, "grad_norm": 0.0014095738297328353, "kl": 0.002672098111361265, "learning_rate": 2.1235827664399091e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1873 }, { "completion_length": 162.07144165039062, "epoch": 0.11898412698412698, "grad_norm": 0.0021355205681174994, "kl": 0.003907395526766777, "learning_rate": 2.1247165532879817e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1874 }, { "completion_length": 161.42857360839844, "epoch": 0.11904761904761904, "grad_norm": 0.0029639636632055044, "kl": 0.004650509916245937, "learning_rate": 2.1258503401360543e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1875 }, { "completion_length": 152.2857208251953, "epoch": 0.11911111111111111, "grad_norm": 0.002362446626648307, "kl": 0.004265127703547478, "learning_rate": 2.1269841269841268e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1876 }, { "completion_length": 149.6428680419922, "epoch": 0.11917460317460317, "grad_norm": 0.0020553262438625097, "kl": 0.00418984517455101, "learning_rate": 2.1281179138321994e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1877 }, { "completion_length": 151.85714721679688, "epoch": 0.11923809523809524, "grad_norm": 0.0020818838384002447, "kl": 0.003948258701711893, "learning_rate": 2.1292517006802722e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1878 }, { "completion_length": 155.21429443359375, "epoch": 0.1193015873015873, "grad_norm": 0.002218359149992466, "kl": 0.004105812404304743, "learning_rate": 2.1303854875283445e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1879 }, { "completion_length": 175.1428680419922, "epoch": 0.11936507936507937, "grad_norm": 0.0017429704312235117, "kl": 0.0034314170479774475, "learning_rate": 2.131519274376417e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1880 }, { "completion_length": 148.5, "epoch": 0.11942857142857143, "grad_norm": 0.0020968029275536537, "kl": 0.004272975958883762, "learning_rate": 2.1326530612244898e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1881 }, { "completion_length": 168.07144165039062, "epoch": 0.1194920634920635, "grad_norm": 0.0023494441993534565, "kl": 0.0044918786734342575, "learning_rate": 2.1337868480725624e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1882 }, { "completion_length": 147.1428680419922, "epoch": 0.11955555555555555, "grad_norm": 0.9090994000434875, "kl": 0.0051641082391142845, "learning_rate": 2.1349206349206347e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 1883 }, { "completion_length": 172.35714721679688, "epoch": 0.11961904761904762, "grad_norm": 0.0018342698458582163, "kl": 0.0036923177540302277, "learning_rate": 2.1360544217687072e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1884 }, { "completion_length": 195.35714721679688, "epoch": 0.11968253968253968, "grad_norm": 0.001690245815552771, "kl": 0.0033456184901297092, "learning_rate": 2.13718820861678e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1885 }, { "completion_length": 146.7857208251953, "epoch": 0.11974603174603174, "grad_norm": 0.002361560007557273, "kl": 0.004645040258765221, "learning_rate": 2.1383219954648526e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1886 }, { "completion_length": 143.85714721679688, "epoch": 0.11980952380952381, "grad_norm": 0.0021199772600084543, "kl": 0.004443762358278036, "learning_rate": 2.139455782312925e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1887 }, { "completion_length": 180.85714721679688, "epoch": 0.11987301587301587, "grad_norm": 0.002025785855948925, "kl": 0.004209714476019144, "learning_rate": 2.1405895691609977e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1888 }, { "completion_length": 154.85714721679688, "epoch": 0.11993650793650794, "grad_norm": 0.002215395914390683, "kl": 0.004715589340776205, "learning_rate": 2.1417233560090703e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1889 }, { "completion_length": 151.0, "epoch": 0.12, "grad_norm": 0.0024112958926707506, "kl": 0.0047965990379452705, "learning_rate": 2.1428571428571426e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1890 }, { "completion_length": 182.42857360839844, "epoch": 0.12006349206349207, "grad_norm": 0.0020959468092769384, "kl": 0.004470168612897396, "learning_rate": 2.1439909297052154e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1891 }, { "completion_length": 129.1428680419922, "epoch": 0.12012698412698412, "grad_norm": 0.002692430280148983, "kl": 0.0054300567135214806, "learning_rate": 2.145124716553288e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1892 }, { "completion_length": 181.7857208251953, "epoch": 0.1201904761904762, "grad_norm": 0.0017682114848867059, "kl": 0.0040810550563037395, "learning_rate": 2.1462585034013605e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1893 }, { "completion_length": 177.92857360839844, "epoch": 0.12025396825396825, "grad_norm": 0.0020172412041574717, "kl": 0.0038790954276919365, "learning_rate": 2.1473922902494328e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1894 }, { "completion_length": 188.2857208251953, "epoch": 0.12031746031746032, "grad_norm": 0.002021601889282465, "kl": 0.00437086820602417, "learning_rate": 2.1485260770975056e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1895 }, { "completion_length": 163.2857208251953, "epoch": 0.12038095238095238, "grad_norm": 0.0015198506880551577, "kl": 0.00316451583057642, "learning_rate": 2.1496598639455782e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1896 }, { "completion_length": 182.50001525878906, "epoch": 0.12044444444444445, "grad_norm": 0.001921106013469398, "kl": 0.003805332351475954, "learning_rate": 2.1507936507936507e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1897 }, { "completion_length": 156.85714721679688, "epoch": 0.12050793650793651, "grad_norm": 0.0021288273856043816, "kl": 0.004609523341059685, "learning_rate": 2.1519274376417233e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1898 }, { "completion_length": 188.57144165039062, "epoch": 0.12057142857142857, "grad_norm": 0.002287370152771473, "kl": 0.004702911712229252, "learning_rate": 2.1530612244897958e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1899 }, { "completion_length": 179.92857360839844, "epoch": 0.12063492063492064, "grad_norm": 0.0017882055835798383, "kl": 0.003844532649964094, "learning_rate": 2.1541950113378684e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1900 }, { "completion_length": 159.21429443359375, "epoch": 0.1206984126984127, "grad_norm": 0.002232756232842803, "kl": 0.0045161256566643715, "learning_rate": 2.155328798185941e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1901 }, { "completion_length": 199.35714721679688, "epoch": 0.12076190476190476, "grad_norm": 0.0018337899819016457, "kl": 0.003930371720343828, "learning_rate": 2.1564625850340135e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1902 }, { "completion_length": 155.07144165039062, "epoch": 0.12082539682539682, "grad_norm": 1.544900894165039, "kl": 0.004241665359586477, "learning_rate": 2.157596371882086e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 1903 }, { "completion_length": 181.71429443359375, "epoch": 0.12088888888888889, "grad_norm": 0.0020855306647717953, "kl": 0.0045744990929961205, "learning_rate": 2.1587301587301589e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1904 }, { "completion_length": 189.57144165039062, "epoch": 0.12095238095238095, "grad_norm": 0.00252471212297678, "kl": 0.00572887621819973, "learning_rate": 2.1598639455782312e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1905 }, { "completion_length": 161.07144165039062, "epoch": 0.12101587301587302, "grad_norm": 0.002461019204929471, "kl": 0.005637055262923241, "learning_rate": 2.1609977324263037e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1906 }, { "completion_length": 226.00001525878906, "epoch": 0.12107936507936508, "grad_norm": 0.0017327193636447191, "kl": 0.004153403919190168, "learning_rate": 2.1621315192743763e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1907 }, { "completion_length": 163.71429443359375, "epoch": 0.12114285714285715, "grad_norm": 0.002873410936444998, "kl": 0.006402952596545219, "learning_rate": 2.163265306122449e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1908 }, { "completion_length": 185.42857360839844, "epoch": 0.1212063492063492, "grad_norm": 0.002190510742366314, "kl": 0.005077038891613483, "learning_rate": 2.1643990929705214e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1909 }, { "completion_length": 143.21429443359375, "epoch": 0.12126984126984126, "grad_norm": 0.003533885581418872, "kl": 0.007107093930244446, "learning_rate": 2.165532879818594e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1910 }, { "completion_length": 177.42857360839844, "epoch": 0.12133333333333333, "grad_norm": 0.0028405082412064075, "kl": 0.006776094902306795, "learning_rate": 2.1666666666666667e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1911 }, { "completion_length": 154.57144165039062, "epoch": 0.12139682539682539, "grad_norm": 0.0027203999925404787, "kl": 0.006580843590199947, "learning_rate": 2.167800453514739e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1912 }, { "completion_length": 161.57144165039062, "epoch": 0.12146031746031746, "grad_norm": 0.0030021306592971087, "kl": 0.00714224623516202, "learning_rate": 2.1689342403628116e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1913 }, { "completion_length": 144.21429443359375, "epoch": 0.12152380952380952, "grad_norm": 0.003834846895188093, "kl": 0.008751478046178818, "learning_rate": 2.1700680272108844e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1914 }, { "completion_length": 187.92857360839844, "epoch": 0.12158730158730159, "grad_norm": 0.0027860747650265694, "kl": 0.006732477806508541, "learning_rate": 2.171201814058957e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1915 }, { "completion_length": 158.57144165039062, "epoch": 0.12165079365079365, "grad_norm": 0.0030241224449127913, "kl": 0.0068874661810696125, "learning_rate": 2.1723356009070293e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1916 }, { "completion_length": 239.71429443359375, "epoch": 0.12171428571428572, "grad_norm": 0.002331996103748679, "kl": 0.0059246947057545185, "learning_rate": 2.1734693877551018e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1917 }, { "completion_length": 140.7857208251953, "epoch": 0.12177777777777778, "grad_norm": 0.003654164494946599, "kl": 0.009035577066242695, "learning_rate": 2.1746031746031746e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1918 }, { "completion_length": 178.35714721679688, "epoch": 0.12184126984126985, "grad_norm": 0.003159252228215337, "kl": 0.008230349980294704, "learning_rate": 2.1757369614512472e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1919 }, { "completion_length": 187.21429443359375, "epoch": 0.1219047619047619, "grad_norm": 0.0026678836438804865, "kl": 0.007111177314072847, "learning_rate": 2.1768707482993195e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1920 }, { "completion_length": 159.57144165039062, "epoch": 0.12196825396825396, "grad_norm": 0.00324424309656024, "kl": 0.008620752021670341, "learning_rate": 2.1780045351473923e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1921 }, { "completion_length": 149.71429443359375, "epoch": 0.12203174603174603, "grad_norm": 0.003452530363574624, "kl": 0.009153928607702255, "learning_rate": 2.1791383219954648e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1922 }, { "completion_length": 229.1428680419922, "epoch": 0.12209523809523809, "grad_norm": 0.0027688967529684305, "kl": 0.006586969364434481, "learning_rate": 2.1802721088435371e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1923 }, { "completion_length": 141.85714721679688, "epoch": 0.12215873015873016, "grad_norm": 1.5212780237197876, "kl": 0.008640140295028687, "learning_rate": 2.1814058956916097e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 1924 }, { "completion_length": 128.5, "epoch": 0.12222222222222222, "grad_norm": 0.004278090316802263, "kl": 0.010099460370838642, "learning_rate": 2.1825396825396825e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1925 }, { "completion_length": 170.5, "epoch": 0.12228571428571429, "grad_norm": 0.003508371766656637, "kl": 0.00900136586278677, "learning_rate": 2.183673469387755e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1926 }, { "completion_length": 158.35714721679688, "epoch": 0.12234920634920635, "grad_norm": 0.003084994852542877, "kl": 0.007527490146458149, "learning_rate": 2.1848072562358274e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1927 }, { "completion_length": 140.57144165039062, "epoch": 0.12241269841269842, "grad_norm": 0.0043142419308424, "kl": 0.010686713270843029, "learning_rate": 2.1859410430839002e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1928 }, { "completion_length": 160.92857360839844, "epoch": 0.12247619047619047, "grad_norm": 0.003463548142462969, "kl": 0.008823150768876076, "learning_rate": 2.1870748299319727e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1929 }, { "completion_length": 200.00001525878906, "epoch": 0.12253968253968255, "grad_norm": 0.003283892525359988, "kl": 0.008672795258462429, "learning_rate": 2.1882086167800453e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1930 }, { "completion_length": 179.50001525878906, "epoch": 0.1226031746031746, "grad_norm": 0.002954683965072036, "kl": 0.007964818738400936, "learning_rate": 2.1893424036281178e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1931 }, { "completion_length": 170.5, "epoch": 0.12266666666666666, "grad_norm": 0.822060227394104, "kl": 0.00814285408705473, "learning_rate": 2.1904761904761904e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 1932 }, { "completion_length": 162.7857208251953, "epoch": 0.12273015873015873, "grad_norm": 0.003563552163541317, "kl": 0.009615419432520866, "learning_rate": 2.191609977324263e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1933 }, { "completion_length": 159.1428680419922, "epoch": 0.12279365079365079, "grad_norm": 0.004097526427358389, "kl": 0.011290044523775578, "learning_rate": 2.1927437641723352e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1934 }, { "completion_length": 178.92857360839844, "epoch": 0.12285714285714286, "grad_norm": 0.0031688206363469362, "kl": 0.007911793887615204, "learning_rate": 2.193877551020408e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1935 }, { "completion_length": 158.57144165039062, "epoch": 0.12292063492063492, "grad_norm": 0.004167595878243446, "kl": 0.011471825651824474, "learning_rate": 2.1950113378684806e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1936 }, { "completion_length": 169.92857360839844, "epoch": 0.12298412698412699, "grad_norm": 0.003493158845230937, "kl": 0.009264355525374413, "learning_rate": 2.1961451247165532e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1937 }, { "completion_length": 157.71429443359375, "epoch": 0.12304761904761904, "grad_norm": 0.003540417179465294, "kl": 0.009938005357980728, "learning_rate": 2.1972789115646257e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1938 }, { "completion_length": 176.7857208251953, "epoch": 0.12311111111111112, "grad_norm": 0.003085100557655096, "kl": 0.008565510623157024, "learning_rate": 2.1984126984126983e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1939 }, { "completion_length": 158.0, "epoch": 0.12317460317460317, "grad_norm": 0.004090656526386738, "kl": 0.011704247444868088, "learning_rate": 2.1995464852607708e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1940 }, { "completion_length": 143.6428680419922, "epoch": 0.12323809523809524, "grad_norm": 0.004797656554728746, "kl": 0.012826012447476387, "learning_rate": 2.2006802721088436e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1941 }, { "completion_length": 159.6428680419922, "epoch": 0.1233015873015873, "grad_norm": 0.00338543183170259, "kl": 0.009259222075343132, "learning_rate": 2.201814058956916e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1942 }, { "completion_length": 171.1428680419922, "epoch": 0.12336507936507937, "grad_norm": 0.0035546680446714163, "kl": 0.009998850524425507, "learning_rate": 2.2029478458049885e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1943 }, { "completion_length": 157.85714721679688, "epoch": 0.12342857142857143, "grad_norm": 0.003405060153454542, "kl": 0.008875355124473572, "learning_rate": 2.2040816326530613e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1944 }, { "completion_length": 170.71429443359375, "epoch": 0.12349206349206349, "grad_norm": 0.003498977981507778, "kl": 0.010189793072640896, "learning_rate": 2.2052154195011339e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1945 }, { "completion_length": 162.1428680419922, "epoch": 0.12355555555555556, "grad_norm": 0.0034272093325853348, "kl": 0.008719462901353836, "learning_rate": 2.2063492063492062e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1946 }, { "completion_length": 155.21429443359375, "epoch": 0.12361904761904761, "grad_norm": 0.003894417779520154, "kl": 0.010534695349633694, "learning_rate": 2.2074829931972787e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1947 }, { "completion_length": 180.85714721679688, "epoch": 0.12368253968253969, "grad_norm": 0.0035271337255835533, "kl": 0.010433975607156754, "learning_rate": 2.2086167800453515e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1948 }, { "completion_length": 156.07144165039062, "epoch": 0.12374603174603174, "grad_norm": 0.0032771998085081577, "kl": 0.00895619671791792, "learning_rate": 2.2097505668934238e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1949 }, { "completion_length": 179.2857208251953, "epoch": 0.12380952380952381, "grad_norm": 0.0030470818746834993, "kl": 0.008588884025812149, "learning_rate": 2.2108843537414964e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1950 }, { "completion_length": 171.42857360839844, "epoch": 0.12387301587301587, "grad_norm": 0.003197522833943367, "kl": 0.009320929646492004, "learning_rate": 2.2120181405895692e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1951 }, { "completion_length": 144.92857360839844, "epoch": 0.12393650793650794, "grad_norm": 0.0032148403115570545, "kl": 0.009382322430610657, "learning_rate": 2.2131519274376417e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1952 }, { "completion_length": 177.1428680419922, "epoch": 0.124, "grad_norm": 0.0029026910196989775, "kl": 0.007970757782459259, "learning_rate": 2.214285714285714e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1953 }, { "completion_length": 198.00001525878906, "epoch": 0.12406349206349207, "grad_norm": 0.003159577026963234, "kl": 0.008548397570848465, "learning_rate": 2.2154195011337869e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1954 }, { "completion_length": 170.92857360839844, "epoch": 0.12412698412698413, "grad_norm": 0.003189756767824292, "kl": 0.009135444648563862, "learning_rate": 2.2165532879818594e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1955 }, { "completion_length": 133.21429443359375, "epoch": 0.12419047619047618, "grad_norm": 0.0038631227798759937, "kl": 0.01046636514365673, "learning_rate": 2.217687074829932e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1956 }, { "completion_length": 178.07144165039062, "epoch": 0.12425396825396826, "grad_norm": 0.003795650554820895, "kl": 0.00955075491219759, "learning_rate": 2.2188208616780043e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1957 }, { "completion_length": 139.71429443359375, "epoch": 0.12431746031746031, "grad_norm": 0.004292443860322237, "kl": 0.011535290628671646, "learning_rate": 2.219954648526077e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1958 }, { "completion_length": 136.85714721679688, "epoch": 0.12438095238095238, "grad_norm": 0.003795559285208583, "kl": 0.011488751508295536, "learning_rate": 2.2210884353741496e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1959 }, { "completion_length": 164.2857208251953, "epoch": 0.12444444444444444, "grad_norm": 0.003763417946174741, "kl": 0.010171324014663696, "learning_rate": 2.222222222222222e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1960 }, { "completion_length": 182.21429443359375, "epoch": 0.12450793650793651, "grad_norm": 0.002859242493286729, "kl": 0.0075829182751476765, "learning_rate": 2.2233560090702947e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1961 }, { "completion_length": 186.85714721679688, "epoch": 0.12457142857142857, "grad_norm": 0.003082635346800089, "kl": 0.00852564163506031, "learning_rate": 2.2244897959183673e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1962 }, { "completion_length": 164.2857208251953, "epoch": 0.12463492063492064, "grad_norm": 0.0043007805943489075, "kl": 0.011267075315117836, "learning_rate": 2.2256235827664398e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1963 }, { "completion_length": 187.2857208251953, "epoch": 0.1246984126984127, "grad_norm": 0.0034044969361275434, "kl": 0.009113925509154797, "learning_rate": 2.2267573696145124e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1964 }, { "completion_length": 209.35714721679688, "epoch": 0.12476190476190477, "grad_norm": 0.0029839682392776012, "kl": 0.008149847388267517, "learning_rate": 2.227891156462585e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1965 }, { "completion_length": 181.00001525878906, "epoch": 0.12482539682539683, "grad_norm": 0.0030250861309468746, "kl": 0.008336219005286694, "learning_rate": 2.2290249433106575e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1966 }, { "completion_length": 154.6428680419922, "epoch": 0.12488888888888888, "grad_norm": 0.004116619937121868, "kl": 0.011418540962040424, "learning_rate": 2.23015873015873e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1967 }, { "completion_length": 209.57144165039062, "epoch": 0.12495238095238095, "grad_norm": 0.00311756762675941, "kl": 0.008442231453955173, "learning_rate": 2.2312925170068026e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1968 }, { "completion_length": 137.1428680419922, "epoch": 0.12501587301587302, "grad_norm": 0.004517621360719204, "kl": 0.011507800780236721, "learning_rate": 2.2324263038548752e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1969 }, { "completion_length": 178.42857360839844, "epoch": 0.12507936507936507, "grad_norm": 0.003403583774343133, "kl": 0.008872974663972855, "learning_rate": 2.2335600907029477e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1970 }, { "completion_length": 168.21429443359375, "epoch": 0.12514285714285714, "grad_norm": 0.00264622550457716, "kl": 0.006808618549257517, "learning_rate": 2.2346938775510203e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1971 }, { "completion_length": 211.6428680419922, "epoch": 0.1252063492063492, "grad_norm": 0.0025671974290162325, "kl": 0.007008319720625877, "learning_rate": 2.2358276643990928e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1972 }, { "completion_length": 182.2857208251953, "epoch": 0.12526984126984128, "grad_norm": 0.0029841531068086624, "kl": 0.007511974778026342, "learning_rate": 2.2369614512471654e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1973 }, { "completion_length": 134.07144165039062, "epoch": 0.12533333333333332, "grad_norm": 0.00449404725804925, "kl": 0.01245674304664135, "learning_rate": 2.2380952380952382e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1974 }, { "completion_length": 161.0, "epoch": 0.1253968253968254, "grad_norm": 0.003611504565924406, "kl": 0.009663376025855541, "learning_rate": 2.2392290249433105e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1975 }, { "completion_length": 177.1428680419922, "epoch": 0.12546031746031747, "grad_norm": 0.0029245142359286547, "kl": 0.008007088676095009, "learning_rate": 2.240362811791383e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1976 }, { "completion_length": 188.50001525878906, "epoch": 0.12552380952380954, "grad_norm": 0.0028486803639680147, "kl": 0.007900267839431763, "learning_rate": 2.2414965986394556e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1977 }, { "completion_length": 159.85714721679688, "epoch": 0.12558730158730158, "grad_norm": 0.003880020696669817, "kl": 0.009527583606541157, "learning_rate": 2.2426303854875284e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1978 }, { "completion_length": 188.71429443359375, "epoch": 0.12565079365079365, "grad_norm": 0.0029347233939915895, "kl": 0.0074219792149960995, "learning_rate": 2.2437641723356007e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1979 }, { "completion_length": 181.2857208251953, "epoch": 0.12571428571428572, "grad_norm": 0.00331349135376513, "kl": 0.008866622112691402, "learning_rate": 2.2448979591836733e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1980 }, { "completion_length": 181.2857208251953, "epoch": 0.12577777777777777, "grad_norm": 0.0028196852654218674, "kl": 0.007554524578154087, "learning_rate": 2.246031746031746e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1981 }, { "completion_length": 163.07144165039062, "epoch": 0.12584126984126984, "grad_norm": 0.002795672509819269, "kl": 0.007126294542104006, "learning_rate": 2.2471655328798184e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1982 }, { "completion_length": 178.6428680419922, "epoch": 0.1259047619047619, "grad_norm": 0.0029317268636077642, "kl": 0.007954462431371212, "learning_rate": 2.248299319727891e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1983 }, { "completion_length": 140.92857360839844, "epoch": 0.12596825396825398, "grad_norm": 0.003653242951259017, "kl": 0.009710030630230904, "learning_rate": 2.2494331065759638e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1984 }, { "completion_length": 152.0, "epoch": 0.12603174603174602, "grad_norm": 0.0037378123961389065, "kl": 0.009897056967020035, "learning_rate": 2.2505668934240363e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1985 }, { "completion_length": 127.64286041259766, "epoch": 0.1260952380952381, "grad_norm": 0.0038266940973699093, "kl": 0.009935718961060047, "learning_rate": 2.2517006802721086e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1986 }, { "completion_length": 176.57144165039062, "epoch": 0.12615873015873016, "grad_norm": 0.003601735457777977, "kl": 0.010352637618780136, "learning_rate": 2.2528344671201812e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1987 }, { "completion_length": 159.6428680419922, "epoch": 0.12622222222222224, "grad_norm": 0.0038693039678037167, "kl": 0.009878117591142654, "learning_rate": 2.253968253968254e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1988 }, { "completion_length": 131.57144165039062, "epoch": 0.12628571428571428, "grad_norm": 0.003593417350202799, "kl": 0.009404849261045456, "learning_rate": 2.2551020408163265e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1989 }, { "completion_length": 131.71429443359375, "epoch": 0.12634920634920635, "grad_norm": 0.0037977569736540318, "kl": 0.009128343313932419, "learning_rate": 2.2562358276643988e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1990 }, { "completion_length": 173.85714721679688, "epoch": 0.12641269841269842, "grad_norm": 0.0031703675631433725, "kl": 0.0077975476160645485, "learning_rate": 2.2573696145124716e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1991 }, { "completion_length": 177.21429443359375, "epoch": 0.12647619047619046, "grad_norm": 0.0034520758781582117, "kl": 0.00870892871171236, "learning_rate": 2.2585034013605442e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1992 }, { "completion_length": 131.21429443359375, "epoch": 0.12653968253968254, "grad_norm": 0.0033248618710786104, "kl": 0.009494101628661156, "learning_rate": 2.2596371882086165e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1993 }, { "completion_length": 205.71429443359375, "epoch": 0.1266031746031746, "grad_norm": 0.002821127651259303, "kl": 0.007214193232357502, "learning_rate": 2.2607709750566893e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1994 }, { "completion_length": 159.57144165039062, "epoch": 0.12666666666666668, "grad_norm": 0.0034499254543334246, "kl": 0.007934832945466042, "learning_rate": 2.2619047619047619e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1995 }, { "completion_length": 156.6428680419922, "epoch": 0.12673015873015872, "grad_norm": 0.0036601750180125237, "kl": 0.00903593935072422, "learning_rate": 2.2630385487528344e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1996 }, { "completion_length": 171.92857360839844, "epoch": 0.1267936507936508, "grad_norm": 0.002536744112148881, "kl": 0.007027808576822281, "learning_rate": 2.2641723356009067e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1997 }, { "completion_length": 151.71429443359375, "epoch": 0.12685714285714286, "grad_norm": 0.0037989220581948757, "kl": 0.009718701243400574, "learning_rate": 2.2653061224489795e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1998 }, { "completion_length": 173.00001525878906, "epoch": 0.12692063492063493, "grad_norm": 0.0032544510904699564, "kl": 0.007910473272204399, "learning_rate": 2.266439909297052e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 1999 }, { "completion_length": 166.21429443359375, "epoch": 0.12698412698412698, "grad_norm": 0.002788131358101964, "kl": 0.00690626073628664, "learning_rate": 2.2675736961451246e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2000 }, { "completion_length": 140.85714721679688, "epoch": 0.12704761904761905, "grad_norm": 0.0040524620562791824, "kl": 0.009931479580700397, "learning_rate": 2.2687074829931972e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2001 }, { "completion_length": 153.07144165039062, "epoch": 0.12711111111111112, "grad_norm": 0.0034879320301115513, "kl": 0.009061699733138084, "learning_rate": 2.2698412698412697e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2002 }, { "completion_length": 171.85714721679688, "epoch": 0.12717460317460316, "grad_norm": 0.002870926633477211, "kl": 0.00718651432543993, "learning_rate": 2.2709750566893423e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2003 }, { "completion_length": 173.57144165039062, "epoch": 0.12723809523809523, "grad_norm": 0.0029961392283439636, "kl": 0.007824160158634186, "learning_rate": 2.272108843537415e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2004 }, { "completion_length": 165.2857208251953, "epoch": 0.1273015873015873, "grad_norm": 0.002550931414589286, "kl": 0.00668518478050828, "learning_rate": 2.2732426303854874e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2005 }, { "completion_length": 154.71429443359375, "epoch": 0.12736507936507938, "grad_norm": 0.00392500776797533, "kl": 0.008389146067202091, "learning_rate": 2.27437641723356e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2006 }, { "completion_length": 170.35714721679688, "epoch": 0.12742857142857142, "grad_norm": 0.002956404583528638, "kl": 0.007343321107327938, "learning_rate": 2.2755102040816325e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2007 }, { "completion_length": 175.21429443359375, "epoch": 0.1274920634920635, "grad_norm": 0.002701869932934642, "kl": 0.006801513023674488, "learning_rate": 2.276643990929705e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2008 }, { "completion_length": 151.1428680419922, "epoch": 0.12755555555555556, "grad_norm": 0.003318927949294448, "kl": 0.008923497051000595, "learning_rate": 2.2777777777777776e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2009 }, { "completion_length": 146.7857208251953, "epoch": 0.12761904761904763, "grad_norm": 0.003502469277009368, "kl": 0.009373544715344906, "learning_rate": 2.2789115646258502e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2010 }, { "completion_length": 146.85714721679688, "epoch": 0.12768253968253968, "grad_norm": 0.0037375320680439472, "kl": 0.008869272656738758, "learning_rate": 2.280045351473923e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2011 }, { "completion_length": 159.0, "epoch": 0.12774603174603175, "grad_norm": 0.00288658426143229, "kl": 0.007039633113890886, "learning_rate": 2.2811791383219953e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2012 }, { "completion_length": 183.85714721679688, "epoch": 0.12780952380952382, "grad_norm": 0.0024699631612747908, "kl": 0.006791410036385059, "learning_rate": 2.2823129251700678e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2013 }, { "completion_length": 169.5, "epoch": 0.12787301587301586, "grad_norm": 0.0026745873037725687, "kl": 0.006739049218595028, "learning_rate": 2.2834467120181407e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2014 }, { "completion_length": 171.85714721679688, "epoch": 0.12793650793650793, "grad_norm": 1.007311463356018, "kl": 0.007868499495089054, "learning_rate": 2.2845804988662132e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 2015 }, { "completion_length": 179.2857208251953, "epoch": 0.128, "grad_norm": 0.0022520620841532946, "kl": 0.00572010176256299, "learning_rate": 2.2857142857142855e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2016 }, { "completion_length": 162.0, "epoch": 0.12806349206349207, "grad_norm": 0.0025585079565644264, "kl": 0.006257930770516396, "learning_rate": 2.286848072562358e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2017 }, { "completion_length": 178.50001525878906, "epoch": 0.12812698412698412, "grad_norm": 0.002737833419814706, "kl": 0.006785405334085226, "learning_rate": 2.287981859410431e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2018 }, { "completion_length": 172.42857360839844, "epoch": 0.1281904761904762, "grad_norm": 0.0026732587721198797, "kl": 0.006315441802144051, "learning_rate": 2.2891156462585032e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2019 }, { "completion_length": 144.2857208251953, "epoch": 0.12825396825396826, "grad_norm": 0.0031811802182346582, "kl": 0.007119523826986551, "learning_rate": 2.2902494331065757e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2020 }, { "completion_length": 166.57144165039062, "epoch": 0.12831746031746033, "grad_norm": 0.0019043725915253162, "kl": 0.004389412701129913, "learning_rate": 2.2913832199546485e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2021 }, { "completion_length": 137.92857360839844, "epoch": 0.12838095238095237, "grad_norm": 0.0027528132777661085, "kl": 0.006570545025169849, "learning_rate": 2.292517006802721e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2022 }, { "completion_length": 187.7857208251953, "epoch": 0.12844444444444444, "grad_norm": 0.00241075549274683, "kl": 0.0053262789733707905, "learning_rate": 2.2936507936507934e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2023 }, { "completion_length": 159.2857208251953, "epoch": 0.12850793650793652, "grad_norm": 0.0024645670782774687, "kl": 0.005830584093928337, "learning_rate": 2.2947845804988662e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2024 }, { "completion_length": 167.6428680419922, "epoch": 0.12857142857142856, "grad_norm": 0.0026883219834417105, "kl": 0.005683952942490578, "learning_rate": 2.2959183673469388e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2025 }, { "completion_length": 149.21429443359375, "epoch": 0.12863492063492063, "grad_norm": 0.003088991856202483, "kl": 0.007099071517586708, "learning_rate": 2.2970521541950113e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2026 }, { "completion_length": 165.1428680419922, "epoch": 0.1286984126984127, "grad_norm": 0.0025685050059109926, "kl": 0.00607061292976141, "learning_rate": 2.2981859410430836e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2027 }, { "completion_length": 157.2857208251953, "epoch": 0.12876190476190477, "grad_norm": 0.0023957849480211735, "kl": 0.0051651690155267715, "learning_rate": 2.2993197278911564e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2028 }, { "completion_length": 171.71429443359375, "epoch": 0.12882539682539682, "grad_norm": 0.0022601548116654158, "kl": 0.00499927019700408, "learning_rate": 2.300453514739229e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2029 }, { "completion_length": 148.5, "epoch": 0.1288888888888889, "grad_norm": 0.002426153514534235, "kl": 0.0051292069256305695, "learning_rate": 2.3015873015873013e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2030 }, { "completion_length": 172.07144165039062, "epoch": 0.12895238095238096, "grad_norm": 0.0024441066198050976, "kl": 0.005472308024764061, "learning_rate": 2.302721088435374e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2031 }, { "completion_length": 146.2857208251953, "epoch": 0.12901587301587303, "grad_norm": 0.002343851840123534, "kl": 0.004978193901479244, "learning_rate": 2.3038548752834466e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2032 }, { "completion_length": 130.92857360839844, "epoch": 0.12907936507936507, "grad_norm": 0.0026486243586987257, "kl": 0.005611542146652937, "learning_rate": 2.3049886621315192e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2033 }, { "completion_length": 171.71429443359375, "epoch": 0.12914285714285714, "grad_norm": 0.0021921214647591114, "kl": 0.00481557659804821, "learning_rate": 2.3061224489795917e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2034 }, { "completion_length": 148.7857208251953, "epoch": 0.1292063492063492, "grad_norm": 0.002644544467329979, "kl": 0.00573549373075366, "learning_rate": 2.3072562358276643e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2035 }, { "completion_length": 158.5, "epoch": 0.12926984126984126, "grad_norm": 0.002184369368478656, "kl": 0.0046186791732907295, "learning_rate": 2.3083900226757369e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2036 }, { "completion_length": 131.35714721679688, "epoch": 0.12933333333333333, "grad_norm": 0.003747862298041582, "kl": 0.007878773845732212, "learning_rate": 2.3095238095238097e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2037 }, { "completion_length": 128.57144165039062, "epoch": 0.1293968253968254, "grad_norm": 0.002960233250632882, "kl": 0.0061088125221431255, "learning_rate": 2.310657596371882e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2038 }, { "completion_length": 144.2857208251953, "epoch": 0.12946031746031747, "grad_norm": 0.0028086912352591753, "kl": 0.0056298417039215565, "learning_rate": 2.3117913832199545e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2039 }, { "completion_length": 169.71429443359375, "epoch": 0.1295238095238095, "grad_norm": 0.0021055133547633886, "kl": 0.00466823810711503, "learning_rate": 2.312925170068027e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2040 }, { "completion_length": 167.57144165039062, "epoch": 0.12958730158730158, "grad_norm": 0.0024665172677487135, "kl": 0.0054656099528074265, "learning_rate": 2.3140589569160996e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2041 }, { "completion_length": 183.2857208251953, "epoch": 0.12965079365079366, "grad_norm": 0.0022288518957793713, "kl": 0.00488389004021883, "learning_rate": 2.3151927437641722e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2042 }, { "completion_length": 131.92857360839844, "epoch": 0.12971428571428573, "grad_norm": 0.002987206680700183, "kl": 0.006356360856443644, "learning_rate": 2.3163265306122447e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2043 }, { "completion_length": 165.1428680419922, "epoch": 0.12977777777777777, "grad_norm": 0.0022879960015416145, "kl": 0.004843647126108408, "learning_rate": 2.3174603174603176e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2044 }, { "completion_length": 196.00001525878906, "epoch": 0.12984126984126984, "grad_norm": 0.0022109949495643377, "kl": 0.0050477078184485435, "learning_rate": 2.3185941043083898e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2045 }, { "completion_length": 138.71429443359375, "epoch": 0.1299047619047619, "grad_norm": 1.3100069761276245, "kl": 0.006845171097666025, "learning_rate": 2.3197278911564624e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 2046 }, { "completion_length": 160.0, "epoch": 0.12996825396825396, "grad_norm": 0.0026333543937653303, "kl": 0.005698499735444784, "learning_rate": 2.320861678004535e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2047 }, { "completion_length": 154.0, "epoch": 0.13003174603174603, "grad_norm": 0.0027785503771156073, "kl": 0.005877593066543341, "learning_rate": 2.3219954648526078e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2048 }, { "completion_length": 163.07144165039062, "epoch": 0.1300952380952381, "grad_norm": 0.0020288650412112474, "kl": 0.004445758648216724, "learning_rate": 2.32312925170068e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2049 }, { "completion_length": 139.85714721679688, "epoch": 0.13015873015873017, "grad_norm": 0.0033525992184877396, "kl": 0.007522072643041611, "learning_rate": 2.3242630385487526e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2050 }, { "completion_length": 161.85714721679688, "epoch": 0.1302222222222222, "grad_norm": 0.002634747652336955, "kl": 0.006052591372281313, "learning_rate": 2.3253968253968254e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2051 }, { "completion_length": 109.50000762939453, "epoch": 0.13028571428571428, "grad_norm": 1.1421421766281128, "kl": 0.007954793982207775, "learning_rate": 2.3265306122448977e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 2052 }, { "completion_length": 184.00001525878906, "epoch": 0.13034920634920635, "grad_norm": 0.0022494362201541662, "kl": 0.005263415165245533, "learning_rate": 2.3276643990929703e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2053 }, { "completion_length": 144.71429443359375, "epoch": 0.13041269841269842, "grad_norm": 0.0027188663370907307, "kl": 0.0068306042812764645, "learning_rate": 2.328798185941043e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2054 }, { "completion_length": 135.71429443359375, "epoch": 0.13047619047619047, "grad_norm": 0.0030803694389760494, "kl": 0.007492963224649429, "learning_rate": 2.3299319727891157e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2055 }, { "completion_length": 111.50000762939453, "epoch": 0.13053968253968254, "grad_norm": 0.003938782960176468, "kl": 0.009012831375002861, "learning_rate": 2.331065759637188e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2056 }, { "completion_length": 167.42857360839844, "epoch": 0.1306031746031746, "grad_norm": 0.0028934874571859837, "kl": 0.006802927702665329, "learning_rate": 2.3321995464852605e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2057 }, { "completion_length": 124.71429443359375, "epoch": 0.13066666666666665, "grad_norm": 0.003599154995754361, "kl": 0.008935212157666683, "learning_rate": 2.3333333333333333e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2058 }, { "completion_length": 125.92857360839844, "epoch": 0.13073015873015872, "grad_norm": 0.00447734072804451, "kl": 0.010355809703469276, "learning_rate": 2.334467120181406e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2059 }, { "completion_length": 155.5, "epoch": 0.1307936507936508, "grad_norm": 0.003125687362626195, "kl": 0.007185628637671471, "learning_rate": 2.3356009070294782e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2060 }, { "completion_length": 167.57144165039062, "epoch": 0.13085714285714287, "grad_norm": 0.002374628558754921, "kl": 0.006021188572049141, "learning_rate": 2.336734693877551e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2061 }, { "completion_length": 171.50001525878906, "epoch": 0.1309206349206349, "grad_norm": 0.002754447516053915, "kl": 0.006747112143784761, "learning_rate": 2.3378684807256235e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2062 }, { "completion_length": 132.92857360839844, "epoch": 0.13098412698412698, "grad_norm": 0.0031999838538467884, "kl": 0.007702664937824011, "learning_rate": 2.339002267573696e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2063 }, { "completion_length": 164.57144165039062, "epoch": 0.13104761904761905, "grad_norm": 0.003403155133128166, "kl": 0.007986203767359257, "learning_rate": 2.3401360544217686e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2064 }, { "completion_length": 161.71429443359375, "epoch": 0.13111111111111112, "grad_norm": 0.003330773673951626, "kl": 0.008435117080807686, "learning_rate": 2.3412698412698412e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2065 }, { "completion_length": 152.92857360839844, "epoch": 0.13117460317460317, "grad_norm": 0.0030870966147631407, "kl": 0.0075261653400957584, "learning_rate": 2.3424036281179138e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2066 }, { "completion_length": 146.1428680419922, "epoch": 0.13123809523809524, "grad_norm": 0.003719672095030546, "kl": 0.0077729797922074795, "learning_rate": 2.343537414965986e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2067 }, { "completion_length": 141.57144165039062, "epoch": 0.1313015873015873, "grad_norm": 0.004209328908473253, "kl": 0.011437508277595043, "learning_rate": 2.3446712018140589e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2068 }, { "completion_length": 163.92857360839844, "epoch": 0.13136507936507935, "grad_norm": 0.0028813346289098263, "kl": 0.006842706818133593, "learning_rate": 2.3458049886621314e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2069 }, { "completion_length": 180.21429443359375, "epoch": 0.13142857142857142, "grad_norm": 0.0025714219082146883, "kl": 0.006259190384298563, "learning_rate": 2.346938775510204e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2070 }, { "completion_length": 145.21429443359375, "epoch": 0.1314920634920635, "grad_norm": 0.0032255700789391994, "kl": 0.008413138799369335, "learning_rate": 2.3480725623582765e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2071 }, { "completion_length": 181.92857360839844, "epoch": 0.13155555555555556, "grad_norm": 0.00285530393011868, "kl": 0.007604769431054592, "learning_rate": 2.349206349206349e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2072 }, { "completion_length": 159.7857208251953, "epoch": 0.1316190476190476, "grad_norm": 0.003165091387927532, "kl": 0.007674572058022022, "learning_rate": 2.3503401360544216e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2073 }, { "completion_length": 160.85714721679688, "epoch": 0.13168253968253968, "grad_norm": 0.0031658674124628305, "kl": 0.0077925147488713264, "learning_rate": 2.3514739229024945e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2074 }, { "completion_length": 166.85714721679688, "epoch": 0.13174603174603175, "grad_norm": 0.003827680367976427, "kl": 0.009153450839221478, "learning_rate": 2.3526077097505667e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2075 }, { "completion_length": 130.1428680419922, "epoch": 0.13180952380952382, "grad_norm": 0.003506170818582177, "kl": 0.008335357531905174, "learning_rate": 2.3537414965986393e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2076 }, { "completion_length": 150.6428680419922, "epoch": 0.13187301587301586, "grad_norm": 0.0031995559111237526, "kl": 0.00797827448695898, "learning_rate": 2.354875283446712e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2077 }, { "completion_length": 145.2857208251953, "epoch": 0.13193650793650794, "grad_norm": 0.0021257312037050724, "kl": 0.005855896044522524, "learning_rate": 2.3560090702947844e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2078 }, { "completion_length": 140.35714721679688, "epoch": 0.132, "grad_norm": 0.0038018175400793552, "kl": 0.009994182735681534, "learning_rate": 2.357142857142857e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2079 }, { "completion_length": 154.71429443359375, "epoch": 0.13206349206349208, "grad_norm": 0.0031976138707250357, "kl": 0.00756865506991744, "learning_rate": 2.3582766439909295e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2080 }, { "completion_length": 186.57144165039062, "epoch": 0.13212698412698412, "grad_norm": 0.002785485005006194, "kl": 0.007289862260222435, "learning_rate": 2.3594104308390023e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2081 }, { "completion_length": 148.0, "epoch": 0.1321904761904762, "grad_norm": 0.0038554484490305185, "kl": 0.00896244402974844, "learning_rate": 2.3605442176870746e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2082 }, { "completion_length": 152.1428680419922, "epoch": 0.13225396825396826, "grad_norm": 0.002815631218254566, "kl": 0.007753326091915369, "learning_rate": 2.3616780045351472e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2083 }, { "completion_length": 137.6428680419922, "epoch": 0.1323174603174603, "grad_norm": 0.003963704686611891, "kl": 0.00974284764379263, "learning_rate": 2.36281179138322e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2084 }, { "completion_length": 160.21429443359375, "epoch": 0.13238095238095238, "grad_norm": 0.0035099119413644075, "kl": 0.008471888490021229, "learning_rate": 2.3639455782312926e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2085 }, { "completion_length": 155.35714721679688, "epoch": 0.13244444444444445, "grad_norm": 0.0032609114423394203, "kl": 0.00827525183558464, "learning_rate": 2.3650793650793648e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2086 }, { "completion_length": 204.6428680419922, "epoch": 0.13250793650793652, "grad_norm": 0.0025157604832202196, "kl": 0.006458136718720198, "learning_rate": 2.3662131519274377e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2087 }, { "completion_length": 181.6428680419922, "epoch": 0.13257142857142856, "grad_norm": 0.0022513961885124445, "kl": 0.005834874697029591, "learning_rate": 2.3673469387755102e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2088 }, { "completion_length": 187.42857360839844, "epoch": 0.13263492063492063, "grad_norm": 0.0027119535952806473, "kl": 0.007170674856752157, "learning_rate": 2.3684807256235825e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2089 }, { "completion_length": 172.7857208251953, "epoch": 0.1326984126984127, "grad_norm": 0.00300028920173645, "kl": 0.007752855308353901, "learning_rate": 2.369614512471655e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2090 }, { "completion_length": 151.0, "epoch": 0.13276190476190478, "grad_norm": 0.0029621045105159283, "kl": 0.00744063314050436, "learning_rate": 2.370748299319728e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2091 }, { "completion_length": 159.85714721679688, "epoch": 0.13282539682539682, "grad_norm": 0.0038220686838030815, "kl": 0.008385227993130684, "learning_rate": 2.3718820861678004e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2092 }, { "completion_length": 159.7857208251953, "epoch": 0.1328888888888889, "grad_norm": 0.003690136130899191, "kl": 0.008829502388834953, "learning_rate": 2.3730158730158727e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2093 }, { "completion_length": 200.2857208251953, "epoch": 0.13295238095238096, "grad_norm": 0.00230710138566792, "kl": 0.005551973823457956, "learning_rate": 2.3741496598639455e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2094 }, { "completion_length": 176.57144165039062, "epoch": 0.133015873015873, "grad_norm": 0.0028057717718183994, "kl": 0.0070509291253983974, "learning_rate": 2.375283446712018e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2095 }, { "completion_length": 196.6428680419922, "epoch": 0.13307936507936508, "grad_norm": 0.0022559736389666796, "kl": 0.005312361754477024, "learning_rate": 2.3764172335600907e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2096 }, { "completion_length": 160.21429443359375, "epoch": 0.13314285714285715, "grad_norm": 0.0023611995857208967, "kl": 0.006291588768362999, "learning_rate": 2.3775510204081632e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2097 }, { "completion_length": 162.0, "epoch": 0.13320634920634922, "grad_norm": 0.002746800659224391, "kl": 0.006742722354829311, "learning_rate": 2.3786848072562358e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2098 }, { "completion_length": 144.57144165039062, "epoch": 0.13326984126984126, "grad_norm": 0.004363781772553921, "kl": 0.00984653364866972, "learning_rate": 2.3798185941043083e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2099 }, { "completion_length": 172.35714721679688, "epoch": 0.13333333333333333, "grad_norm": 0.0027449012268334627, "kl": 0.007089303340762854, "learning_rate": 2.3809523809523806e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2100 }, { "completion_length": 148.0, "epoch": 0.1333968253968254, "grad_norm": 0.0031398863065987825, "kl": 0.006837308872491121, "learning_rate": 2.3820861678004534e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2101 }, { "completion_length": 167.0, "epoch": 0.13346031746031747, "grad_norm": 0.003205385059118271, "kl": 0.008010548539459705, "learning_rate": 2.383219954648526e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2102 }, { "completion_length": 163.42857360839844, "epoch": 0.13352380952380952, "grad_norm": 0.0026204893365502357, "kl": 0.00687475735321641, "learning_rate": 2.384353741496599e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2103 }, { "completion_length": 129.2857208251953, "epoch": 0.1335873015873016, "grad_norm": 0.003736534621566534, "kl": 0.009131073951721191, "learning_rate": 2.385487528344671e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2104 }, { "completion_length": 193.07144165039062, "epoch": 0.13365079365079366, "grad_norm": 0.0022616751957684755, "kl": 0.005684587638825178, "learning_rate": 2.3866213151927434e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2105 }, { "completion_length": 181.92857360839844, "epoch": 0.1337142857142857, "grad_norm": 0.003033753950148821, "kl": 0.006725921295583248, "learning_rate": 2.387755102040816e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2106 }, { "completion_length": 152.42857360839844, "epoch": 0.13377777777777777, "grad_norm": 0.00359356589615345, "kl": 0.008592412807047367, "learning_rate": 2.388888888888889e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2107 }, { "completion_length": 157.07144165039062, "epoch": 0.13384126984126984, "grad_norm": 0.002829794306308031, "kl": 0.007240277715027332, "learning_rate": 2.3900226757369613e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2108 }, { "completion_length": 166.6428680419922, "epoch": 0.13390476190476192, "grad_norm": 0.002776965033262968, "kl": 0.0062656099908053875, "learning_rate": 2.391156462585034e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2109 }, { "completion_length": 142.6428680419922, "epoch": 0.13396825396825396, "grad_norm": 0.00396818108856678, "kl": 0.01010932307690382, "learning_rate": 2.3922902494331064e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2110 }, { "completion_length": 162.92857360839844, "epoch": 0.13403174603174603, "grad_norm": 0.002453564666211605, "kl": 0.006097929086536169, "learning_rate": 2.3934240362811787e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2111 }, { "completion_length": 186.21429443359375, "epoch": 0.1340952380952381, "grad_norm": 0.002339878585189581, "kl": 0.005802210886031389, "learning_rate": 2.3945578231292515e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2112 }, { "completion_length": 159.21429443359375, "epoch": 0.13415873015873017, "grad_norm": 0.003983033820986748, "kl": 0.009414535015821457, "learning_rate": 2.3956916099773243e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2113 }, { "completion_length": 145.85714721679688, "epoch": 0.13422222222222221, "grad_norm": 0.0038082741666585207, "kl": 0.007685259450227022, "learning_rate": 2.3968253968253966e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2114 }, { "completion_length": 157.6428680419922, "epoch": 0.13428571428571429, "grad_norm": 0.0027086578775197268, "kl": 0.006545638665556908, "learning_rate": 2.397959183673469e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2115 }, { "completion_length": 147.5, "epoch": 0.13434920634920636, "grad_norm": 0.003251435002312064, "kl": 0.007784516084939241, "learning_rate": 2.399092970521542e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2116 }, { "completion_length": 183.2857208251953, "epoch": 0.1344126984126984, "grad_norm": 1.016916275024414, "kl": 0.005769586656242609, "learning_rate": 2.4002267573696146e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 2117 }, { "completion_length": 154.07144165039062, "epoch": 0.13447619047619047, "grad_norm": 0.002983781509101391, "kl": 0.007237852085381746, "learning_rate": 2.401360544217687e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2118 }, { "completion_length": 191.6428680419922, "epoch": 0.13453968253968254, "grad_norm": 0.002081010490655899, "kl": 0.005235027056187391, "learning_rate": 2.4024943310657597e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2119 }, { "completion_length": 159.7857208251953, "epoch": 0.1346031746031746, "grad_norm": 0.0030042864382267, "kl": 0.007013824302703142, "learning_rate": 2.403628117913832e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2120 }, { "completion_length": 177.2857208251953, "epoch": 0.13466666666666666, "grad_norm": 0.002809693804010749, "kl": 0.007430708035826683, "learning_rate": 2.404761904761905e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2121 }, { "completion_length": 190.71429443359375, "epoch": 0.13473015873015873, "grad_norm": 0.0024359251838177443, "kl": 0.005814149510115385, "learning_rate": 2.4058956916099776e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2122 }, { "completion_length": 142.42857360839844, "epoch": 0.1347936507936508, "grad_norm": 0.0035582343116402626, "kl": 0.009204301051795483, "learning_rate": 2.40702947845805e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2123 }, { "completion_length": 198.57144165039062, "epoch": 0.13485714285714287, "grad_norm": 1.2524387836456299, "kl": 0.006422300823032856, "learning_rate": 2.408163265306122e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 2124 }, { "completion_length": 144.42857360839844, "epoch": 0.1349206349206349, "grad_norm": 0.004217406734824181, "kl": 0.010374169796705246, "learning_rate": 2.409297052154195e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2125 }, { "completion_length": 162.0, "epoch": 0.13498412698412698, "grad_norm": 0.0033703057561069727, "kl": 0.008154314011335373, "learning_rate": 2.4104308390022673e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2126 }, { "completion_length": 173.85714721679688, "epoch": 0.13504761904761906, "grad_norm": 0.003576416987925768, "kl": 0.007783306762576103, "learning_rate": 2.41156462585034e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2127 }, { "completion_length": 137.57144165039062, "epoch": 0.1351111111111111, "grad_norm": 0.004005177412182093, "kl": 0.009108079597353935, "learning_rate": 2.4126984126984124e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2128 }, { "completion_length": 196.42857360839844, "epoch": 0.13517460317460317, "grad_norm": 0.003345221048220992, "kl": 0.007708623073995113, "learning_rate": 2.413832199546485e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2129 }, { "completion_length": 179.2857208251953, "epoch": 0.13523809523809524, "grad_norm": 0.0039855316281318665, "kl": 0.008484980091452599, "learning_rate": 2.4149659863945575e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2130 }, { "completion_length": 159.42857360839844, "epoch": 0.1353015873015873, "grad_norm": 0.003422958543524146, "kl": 0.008449053391814232, "learning_rate": 2.4160997732426303e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2131 }, { "completion_length": 151.1428680419922, "epoch": 0.13536507936507935, "grad_norm": 0.0037292384076863527, "kl": 0.008587147109210491, "learning_rate": 2.417233560090703e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2132 }, { "completion_length": 163.07144165039062, "epoch": 0.13542857142857143, "grad_norm": 0.005847140680998564, "kl": 0.011925526894629002, "learning_rate": 2.4183673469387754e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2133 }, { "completion_length": 189.85714721679688, "epoch": 0.1354920634920635, "grad_norm": 0.0032857635524123907, "kl": 0.008298731409013271, "learning_rate": 2.4195011337868477e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2134 }, { "completion_length": 170.07144165039062, "epoch": 0.13555555555555557, "grad_norm": 0.003834731411188841, "kl": 0.009556755423545837, "learning_rate": 2.4206349206349205e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2135 }, { "completion_length": 167.5, "epoch": 0.1356190476190476, "grad_norm": 0.004305968061089516, "kl": 0.010035393759608269, "learning_rate": 2.4217687074829934e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2136 }, { "completion_length": 131.0, "epoch": 0.13568253968253968, "grad_norm": 0.005959737580269575, "kl": 0.014530048705637455, "learning_rate": 2.4229024943310657e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2137 }, { "completion_length": 185.6428680419922, "epoch": 0.13574603174603175, "grad_norm": 0.003667130134999752, "kl": 0.00913812406361103, "learning_rate": 2.424036281179138e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2138 }, { "completion_length": 179.7857208251953, "epoch": 0.1358095238095238, "grad_norm": 0.0036690677516162395, "kl": 0.007843991741538048, "learning_rate": 2.425170068027211e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2139 }, { "completion_length": 145.1428680419922, "epoch": 0.13587301587301587, "grad_norm": 0.0049580964259803295, "kl": 0.011574865318834782, "learning_rate": 2.4263038548752836e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2140 }, { "completion_length": 149.7857208251953, "epoch": 0.13593650793650794, "grad_norm": 0.004799487069249153, "kl": 0.01234080083668232, "learning_rate": 2.427437641723356e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2141 }, { "completion_length": 145.0, "epoch": 0.136, "grad_norm": 0.003516780212521553, "kl": 0.008477806113660336, "learning_rate": 2.4285714285714287e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2142 }, { "completion_length": 181.07144165039062, "epoch": 0.13606349206349205, "grad_norm": 0.0035469001159071922, "kl": 0.009036649949848652, "learning_rate": 2.429705215419501e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2143 }, { "completion_length": 175.1428680419922, "epoch": 0.13612698412698412, "grad_norm": 0.0032953894697129726, "kl": 0.007629295811057091, "learning_rate": 2.430839002267574e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2144 }, { "completion_length": 222.6428680419922, "epoch": 0.1361904761904762, "grad_norm": 0.005243384744971991, "kl": 0.009672107174992561, "learning_rate": 2.431972789115646e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2145 }, { "completion_length": 166.1428680419922, "epoch": 0.13625396825396827, "grad_norm": 0.00397404283285141, "kl": 0.008929227478802204, "learning_rate": 2.433106575963719e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2146 }, { "completion_length": 134.7857208251953, "epoch": 0.1363174603174603, "grad_norm": 0.007174638099968433, "kl": 0.014319011941552162, "learning_rate": 2.434240362811791e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2147 }, { "completion_length": 136.35714721679688, "epoch": 0.13638095238095238, "grad_norm": 0.005530364345759153, "kl": 0.011830856092274189, "learning_rate": 2.4353741496598635e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2148 }, { "completion_length": 177.1428680419922, "epoch": 0.13644444444444445, "grad_norm": 0.005314079578965902, "kl": 0.01169966347515583, "learning_rate": 2.4365079365079363e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2149 }, { "completion_length": 161.2857208251953, "epoch": 0.1365079365079365, "grad_norm": 0.003664096351712942, "kl": 0.009138062596321106, "learning_rate": 2.437641723356009e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2150 }, { "completion_length": 158.5, "epoch": 0.13657142857142857, "grad_norm": 0.00426020473241806, "kl": 0.010180090554058552, "learning_rate": 2.4387755102040814e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2151 }, { "completion_length": 143.92857360839844, "epoch": 0.13663492063492064, "grad_norm": 0.7625777125358582, "kl": 0.013011516071856022, "learning_rate": 2.439909297052154e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 2152 }, { "completion_length": 163.1428680419922, "epoch": 0.1366984126984127, "grad_norm": 0.003688153112307191, "kl": 0.009569546207785606, "learning_rate": 2.4410430839002265e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2153 }, { "completion_length": 159.57144165039062, "epoch": 0.13676190476190475, "grad_norm": 0.004605982918292284, "kl": 0.011012976057827473, "learning_rate": 2.4421768707482993e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2154 }, { "completion_length": 157.0, "epoch": 0.13682539682539682, "grad_norm": 0.004757957998663187, "kl": 0.011823069304227829, "learning_rate": 2.4433106575963716e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2155 }, { "completion_length": 192.57144165039062, "epoch": 0.1368888888888889, "grad_norm": 0.004163598641753197, "kl": 0.009591174311935902, "learning_rate": 2.4444444444444445e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2156 }, { "completion_length": 155.21429443359375, "epoch": 0.13695238095238096, "grad_norm": 0.004531318787485361, "kl": 0.010592697188258171, "learning_rate": 2.445578231292517e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2157 }, { "completion_length": 137.5, "epoch": 0.137015873015873, "grad_norm": 0.004339046310633421, "kl": 0.011031883768737316, "learning_rate": 2.4467120181405896e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2158 }, { "completion_length": 153.5, "epoch": 0.13707936507936508, "grad_norm": 0.004966236650943756, "kl": 0.012766453437507153, "learning_rate": 2.447845804988662e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2159 }, { "completion_length": 166.42857360839844, "epoch": 0.13714285714285715, "grad_norm": 1.4792306423187256, "kl": 0.012006337754428387, "learning_rate": 2.4489795918367347e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 2160 }, { "completion_length": 171.92857360839844, "epoch": 0.1372063492063492, "grad_norm": 0.0046182358637452126, "kl": 0.010737315751612186, "learning_rate": 2.450113378684807e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2161 }, { "completion_length": 183.50001525878906, "epoch": 0.13726984126984126, "grad_norm": 0.0030763475224375725, "kl": 0.00718866428360343, "learning_rate": 2.45124716553288e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2162 }, { "completion_length": 172.57144165039062, "epoch": 0.13733333333333334, "grad_norm": 1.0102187395095825, "kl": 0.012451354414224625, "learning_rate": 2.452380952380952e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 2163 }, { "completion_length": 145.57144165039062, "epoch": 0.1373968253968254, "grad_norm": 0.005217868834733963, "kl": 0.010998654179275036, "learning_rate": 2.453514739229025e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2164 }, { "completion_length": 161.21429443359375, "epoch": 0.13746031746031745, "grad_norm": 0.0035938862711191177, "kl": 0.008520875126123428, "learning_rate": 2.454648526077097e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2165 }, { "completion_length": 161.0, "epoch": 0.13752380952380952, "grad_norm": 0.0037870800588279963, "kl": 0.009220287203788757, "learning_rate": 2.45578231292517e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2166 }, { "completion_length": 182.21429443359375, "epoch": 0.1375873015873016, "grad_norm": 0.0036709948908537626, "kl": 0.008125375024974346, "learning_rate": 2.4569160997732423e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2167 }, { "completion_length": 197.71429443359375, "epoch": 0.13765079365079366, "grad_norm": 0.00326165440492332, "kl": 0.008925315923988819, "learning_rate": 2.458049886621315e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2168 }, { "completion_length": 128.57144165039062, "epoch": 0.1377142857142857, "grad_norm": 0.004267251119017601, "kl": 0.010560985654592514, "learning_rate": 2.459183673469388e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2169 }, { "completion_length": 126.00000762939453, "epoch": 0.13777777777777778, "grad_norm": 1.523288607597351, "kl": 0.011657718569040298, "learning_rate": 2.46031746031746e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 2170 }, { "completion_length": 180.85714721679688, "epoch": 0.13784126984126985, "grad_norm": 0.003943490795791149, "kl": 0.008924057707190514, "learning_rate": 2.4614512471655325e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2171 }, { "completion_length": 188.2857208251953, "epoch": 0.1379047619047619, "grad_norm": 0.0033636821899563074, "kl": 0.009214206598699093, "learning_rate": 2.4625850340136053e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2172 }, { "completion_length": 163.0, "epoch": 0.13796825396825396, "grad_norm": 0.0029545670840889215, "kl": 0.0074914745055139065, "learning_rate": 2.463718820861678e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2173 }, { "completion_length": 124.85714721679688, "epoch": 0.13803174603174603, "grad_norm": 0.004678108263760805, "kl": 0.012415640987455845, "learning_rate": 2.4648526077097504e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2174 }, { "completion_length": 156.6428680419922, "epoch": 0.1380952380952381, "grad_norm": 0.003365207463502884, "kl": 0.008674496784806252, "learning_rate": 2.4659863945578227e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2175 }, { "completion_length": 181.92857360839844, "epoch": 0.13815873015873015, "grad_norm": 0.00323109095916152, "kl": 0.007155494298785925, "learning_rate": 2.4671201814058955e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2176 }, { "completion_length": 191.6428680419922, "epoch": 0.13822222222222222, "grad_norm": 0.00304139475338161, "kl": 0.006966029293835163, "learning_rate": 2.4682539682539684e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2177 }, { "completion_length": 214.35714721679688, "epoch": 0.1382857142857143, "grad_norm": 0.002395759802311659, "kl": 0.006287791300565004, "learning_rate": 2.4693877551020407e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2178 }, { "completion_length": 165.57144165039062, "epoch": 0.13834920634920636, "grad_norm": 0.0026661527808755636, "kl": 0.007713871076703072, "learning_rate": 2.4705215419501135e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2179 }, { "completion_length": 147.71429443359375, "epoch": 0.1384126984126984, "grad_norm": 0.0030681283678859472, "kl": 0.008089812472462654, "learning_rate": 2.471655328798186e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2180 }, { "completion_length": 126.92857360839844, "epoch": 0.13847619047619047, "grad_norm": 0.003602262120693922, "kl": 0.009163563139736652, "learning_rate": 2.4727891156462586e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2181 }, { "completion_length": 143.2857208251953, "epoch": 0.13853968253968255, "grad_norm": 0.0028456361033022404, "kl": 0.007542133796960115, "learning_rate": 2.473922902494331e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2182 }, { "completion_length": 158.71429443359375, "epoch": 0.13860317460317462, "grad_norm": 0.002801987575367093, "kl": 0.007590069435536861, "learning_rate": 2.4750566893424037e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2183 }, { "completion_length": 137.0, "epoch": 0.13866666666666666, "grad_norm": 0.003613850101828575, "kl": 0.008474886417388916, "learning_rate": 2.476190476190476e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2184 }, { "completion_length": 177.6428680419922, "epoch": 0.13873015873015873, "grad_norm": 0.002366813598200679, "kl": 0.007048092782497406, "learning_rate": 2.4773242630385483e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2185 }, { "completion_length": 159.6428680419922, "epoch": 0.1387936507936508, "grad_norm": 0.0021262101363390684, "kl": 0.006247290875762701, "learning_rate": 2.478458049886621e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2186 }, { "completion_length": 168.85714721679688, "epoch": 0.13885714285714285, "grad_norm": 0.00331766321323812, "kl": 0.007906596176326275, "learning_rate": 2.479591836734694e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2187 }, { "completion_length": 208.7857208251953, "epoch": 0.13892063492063492, "grad_norm": 0.002169834915548563, "kl": 0.0058449977077543736, "learning_rate": 2.480725623582766e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2188 }, { "completion_length": 183.7857208251953, "epoch": 0.138984126984127, "grad_norm": 0.00240725907497108, "kl": 0.006480940617620945, "learning_rate": 2.481859410430839e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2189 }, { "completion_length": 163.21429443359375, "epoch": 0.13904761904761906, "grad_norm": 0.002190281404182315, "kl": 0.0060181752778589725, "learning_rate": 2.4829931972789113e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2190 }, { "completion_length": 212.85714721679688, "epoch": 0.1391111111111111, "grad_norm": 0.0018620771588757634, "kl": 0.005299816373735666, "learning_rate": 2.484126984126984e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2191 }, { "completion_length": 152.57144165039062, "epoch": 0.13917460317460317, "grad_norm": 0.002254292368888855, "kl": 0.006087929476052523, "learning_rate": 2.485260770975057e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2192 }, { "completion_length": 179.71429443359375, "epoch": 0.13923809523809524, "grad_norm": 0.0020860154181718826, "kl": 0.005869451444596052, "learning_rate": 2.486394557823129e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2193 }, { "completion_length": 154.6428680419922, "epoch": 0.13930158730158732, "grad_norm": 0.003260132158175111, "kl": 0.008412596769630909, "learning_rate": 2.4875283446712015e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2194 }, { "completion_length": 131.6428680419922, "epoch": 0.13936507936507936, "grad_norm": 0.002870785305276513, "kl": 0.008035929873585701, "learning_rate": 2.4886621315192743e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2195 }, { "completion_length": 150.2857208251953, "epoch": 0.13942857142857143, "grad_norm": 0.0027950394432991743, "kl": 0.007415970787405968, "learning_rate": 2.4897959183673466e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2196 }, { "completion_length": 128.35714721679688, "epoch": 0.1394920634920635, "grad_norm": 0.0036833833437412977, "kl": 0.009802213869988918, "learning_rate": 2.4909297052154195e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2197 }, { "completion_length": 166.07144165039062, "epoch": 0.13955555555555554, "grad_norm": 0.0024960495065897703, "kl": 0.006666787434369326, "learning_rate": 2.492063492063492e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2198 }, { "completion_length": 174.42857360839844, "epoch": 0.13961904761904761, "grad_norm": 0.0024323295801877975, "kl": 0.0064588068053126335, "learning_rate": 2.4931972789115646e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2199 }, { "completion_length": 162.07144165039062, "epoch": 0.13968253968253969, "grad_norm": 0.002816100837662816, "kl": 0.00789635069668293, "learning_rate": 2.494331065759637e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2200 }, { "completion_length": 139.1428680419922, "epoch": 0.13974603174603176, "grad_norm": 0.002604590728878975, "kl": 0.007003406528383493, "learning_rate": 2.4954648526077097e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2201 }, { "completion_length": 176.00001525878906, "epoch": 0.1398095238095238, "grad_norm": 0.002314991317689419, "kl": 0.00621447479352355, "learning_rate": 2.4965986394557825e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2202 }, { "completion_length": 168.85714721679688, "epoch": 0.13987301587301587, "grad_norm": 0.0028082230128347874, "kl": 0.007162159308791161, "learning_rate": 2.497732426303855e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2203 }, { "completion_length": 140.57144165039062, "epoch": 0.13993650793650794, "grad_norm": 0.002962408820167184, "kl": 0.00809743907302618, "learning_rate": 2.498866213151927e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2204 }, { "completion_length": 124.50000762939453, "epoch": 0.14, "grad_norm": 0.003479809733107686, "kl": 0.008980492129921913, "learning_rate": 2.5e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2205 }, { "completion_length": 173.00001525878906, "epoch": 0.14006349206349206, "grad_norm": 0.0024012476205825806, "kl": 0.0075914254412055016, "learning_rate": 2.5011337868480727e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2206 }, { "completion_length": 213.07144165039062, "epoch": 0.14012698412698413, "grad_norm": 0.0019439065363258123, "kl": 0.005729804281145334, "learning_rate": 2.502267573696145e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2207 }, { "completion_length": 191.35714721679688, "epoch": 0.1401904761904762, "grad_norm": 0.002316012280061841, "kl": 0.005842764861881733, "learning_rate": 2.503401360544218e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2208 }, { "completion_length": 185.07144165039062, "epoch": 0.14025396825396824, "grad_norm": 0.0024656751193106174, "kl": 0.006584705784916878, "learning_rate": 2.50453514739229e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2209 }, { "completion_length": 178.00001525878906, "epoch": 0.1403174603174603, "grad_norm": 0.002247173571959138, "kl": 0.006541875656694174, "learning_rate": 2.5056689342403624e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2210 }, { "completion_length": 184.6428680419922, "epoch": 0.14038095238095238, "grad_norm": 0.0027100478764623404, "kl": 0.007436135783791542, "learning_rate": 2.506802721088435e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2211 }, { "completion_length": 179.7857208251953, "epoch": 0.14044444444444446, "grad_norm": 0.002108218614012003, "kl": 0.005861013196408749, "learning_rate": 2.507936507936508e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2212 }, { "completion_length": 189.07144165039062, "epoch": 0.1405079365079365, "grad_norm": 0.0019724222365766764, "kl": 0.005913525354117155, "learning_rate": 2.5090702947845803e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2213 }, { "completion_length": 155.1428680419922, "epoch": 0.14057142857142857, "grad_norm": 0.0025789057835936546, "kl": 0.0066888039000332355, "learning_rate": 2.510204081632653e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2214 }, { "completion_length": 136.1428680419922, "epoch": 0.14063492063492064, "grad_norm": 0.0028355142567306757, "kl": 0.007214201148599386, "learning_rate": 2.511337868480726e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2215 }, { "completion_length": 189.92857360839844, "epoch": 0.1406984126984127, "grad_norm": 0.0023697291035205126, "kl": 0.006466034334152937, "learning_rate": 2.512471655328798e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2216 }, { "completion_length": 180.6428680419922, "epoch": 0.14076190476190475, "grad_norm": 0.0023638370912522078, "kl": 0.0066190012730658054, "learning_rate": 2.5136054421768705e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2217 }, { "completion_length": 178.92857360839844, "epoch": 0.14082539682539683, "grad_norm": 0.0021129054948687553, "kl": 0.006052998825907707, "learning_rate": 2.514739229024943e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2218 }, { "completion_length": 163.1428680419922, "epoch": 0.1408888888888889, "grad_norm": 0.0025043138302862644, "kl": 0.006517445668578148, "learning_rate": 2.5158730158730156e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2219 }, { "completion_length": 149.71429443359375, "epoch": 0.14095238095238094, "grad_norm": 0.002438293071463704, "kl": 0.006656246725469828, "learning_rate": 2.5170068027210885e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2220 }, { "completion_length": 160.85714721679688, "epoch": 0.141015873015873, "grad_norm": 0.0021410565823316574, "kl": 0.006109118927270174, "learning_rate": 2.518140589569161e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2221 }, { "completion_length": 153.57144165039062, "epoch": 0.14107936507936508, "grad_norm": 0.002091789385303855, "kl": 0.005685729440301657, "learning_rate": 2.5192743764172336e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2222 }, { "completion_length": 185.85714721679688, "epoch": 0.14114285714285715, "grad_norm": 0.001812087488360703, "kl": 0.00572943827137351, "learning_rate": 2.5204081632653064e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2223 }, { "completion_length": 153.5, "epoch": 0.1412063492063492, "grad_norm": 0.0026017248164862394, "kl": 0.006911394186317921, "learning_rate": 2.521541950113378e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2224 }, { "completion_length": 178.00001525878906, "epoch": 0.14126984126984127, "grad_norm": 0.0019474789733067155, "kl": 0.005279378034174442, "learning_rate": 2.522675736961451e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2225 }, { "completion_length": 177.00001525878906, "epoch": 0.14133333333333334, "grad_norm": 0.002688764361664653, "kl": 0.006571738049387932, "learning_rate": 2.523809523809524e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2226 }, { "completion_length": 178.71429443359375, "epoch": 0.1413968253968254, "grad_norm": 0.0023972205817699432, "kl": 0.006592873018234968, "learning_rate": 2.524943310657596e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2227 }, { "completion_length": 173.21429443359375, "epoch": 0.14146031746031745, "grad_norm": 0.002178426133468747, "kl": 0.006331904791295528, "learning_rate": 2.526077097505669e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2228 }, { "completion_length": 149.71429443359375, "epoch": 0.14152380952380952, "grad_norm": 0.0026867161504924297, "kl": 0.007500781212002039, "learning_rate": 2.5272108843537417e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2229 }, { "completion_length": 170.42857360839844, "epoch": 0.1415873015873016, "grad_norm": 0.0026724045164883137, "kl": 0.007118257228285074, "learning_rate": 2.528344671201814e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2230 }, { "completion_length": 173.42857360839844, "epoch": 0.14165079365079364, "grad_norm": 0.0032580539118498564, "kl": 0.007893293164670467, "learning_rate": 2.5294784580498863e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2231 }, { "completion_length": 153.71429443359375, "epoch": 0.1417142857142857, "grad_norm": 0.0025822375901043415, "kl": 0.006982261780649424, "learning_rate": 2.530612244897959e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2232 }, { "completion_length": 184.85714721679688, "epoch": 0.14177777777777778, "grad_norm": 0.0019634473137557507, "kl": 0.005548928864300251, "learning_rate": 2.5317460317460314e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2233 }, { "completion_length": 127.71429443359375, "epoch": 0.14184126984126985, "grad_norm": 0.0037805766332894564, "kl": 0.009335111826658249, "learning_rate": 2.532879818594104e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2234 }, { "completion_length": 141.21429443359375, "epoch": 0.1419047619047619, "grad_norm": 0.0027705130632966757, "kl": 0.006824006792157888, "learning_rate": 2.534013605442177e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2235 }, { "completion_length": 182.2857208251953, "epoch": 0.14196825396825397, "grad_norm": 0.0022440445609390736, "kl": 0.005575733724981546, "learning_rate": 2.5351473922902493e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2236 }, { "completion_length": 158.42857360839844, "epoch": 0.14203174603174604, "grad_norm": 0.0034655958879739046, "kl": 0.007807284593582153, "learning_rate": 2.536281179138322e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2237 }, { "completion_length": 145.5, "epoch": 0.1420952380952381, "grad_norm": 0.0024971573147922754, "kl": 0.006146538536995649, "learning_rate": 2.5374149659863945e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2238 }, { "completion_length": 179.42857360839844, "epoch": 0.14215873015873015, "grad_norm": 0.0023505801800638437, "kl": 0.0060905818827450275, "learning_rate": 2.538548752834467e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2239 }, { "completion_length": 137.0, "epoch": 0.14222222222222222, "grad_norm": 0.0026000801008194685, "kl": 0.006039170548319817, "learning_rate": 2.5396825396825396e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2240 }, { "completion_length": 166.0, "epoch": 0.1422857142857143, "grad_norm": 0.002118056872859597, "kl": 0.005546692293137312, "learning_rate": 2.540816326530612e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2241 }, { "completion_length": 177.71429443359375, "epoch": 0.14234920634920634, "grad_norm": 0.0029374752193689346, "kl": 0.006948333699256182, "learning_rate": 2.5419501133786847e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2242 }, { "completion_length": 161.42857360839844, "epoch": 0.1424126984126984, "grad_norm": 0.0025734787341207266, "kl": 0.00627157324925065, "learning_rate": 2.5430839002267575e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2243 }, { "completion_length": 211.50001525878906, "epoch": 0.14247619047619048, "grad_norm": 0.0019056742312386632, "kl": 0.005105881951749325, "learning_rate": 2.54421768707483e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2244 }, { "completion_length": 205.85714721679688, "epoch": 0.14253968253968255, "grad_norm": 0.0018703113310039043, "kl": 0.004803039133548737, "learning_rate": 2.5453514739229026e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2245 }, { "completion_length": 112.35714721679688, "epoch": 0.1426031746031746, "grad_norm": 0.003478169674053788, "kl": 0.007319792173802853, "learning_rate": 2.546485260770975e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2246 }, { "completion_length": 180.2857208251953, "epoch": 0.14266666666666666, "grad_norm": 0.002019450766965747, "kl": 0.005499341059476137, "learning_rate": 2.547619047619047e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2247 }, { "completion_length": 209.50001525878906, "epoch": 0.14273015873015873, "grad_norm": 0.001885410980321467, "kl": 0.005360885988920927, "learning_rate": 2.54875283446712e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2248 }, { "completion_length": 144.1428680419922, "epoch": 0.1427936507936508, "grad_norm": 0.0025310786440968513, "kl": 0.006363612599670887, "learning_rate": 2.549886621315193e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2249 }, { "completion_length": 174.07144165039062, "epoch": 0.14285714285714285, "grad_norm": 0.0018802398117259145, "kl": 0.004831219557672739, "learning_rate": 2.551020408163265e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2250 }, { "completion_length": 180.1428680419922, "epoch": 0.14292063492063492, "grad_norm": 0.0017701569013297558, "kl": 0.005040441639721394, "learning_rate": 2.552154195011338e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2251 }, { "completion_length": 193.07144165039062, "epoch": 0.142984126984127, "grad_norm": 0.002218342386186123, "kl": 0.005491132382303476, "learning_rate": 2.553287981859411e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2252 }, { "completion_length": 199.07144165039062, "epoch": 0.14304761904761903, "grad_norm": 1.1948802471160889, "kl": 0.006052558310329914, "learning_rate": 2.5544217687074825e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 2253 }, { "completion_length": 184.85714721679688, "epoch": 0.1431111111111111, "grad_norm": 0.0024615779984742403, "kl": 0.007066378369927406, "learning_rate": 2.5555555555555553e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2254 }, { "completion_length": 187.00001525878906, "epoch": 0.14317460317460318, "grad_norm": 0.0024360939860343933, "kl": 0.005866256542503834, "learning_rate": 2.556689342403628e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2255 }, { "completion_length": 206.2857208251953, "epoch": 0.14323809523809525, "grad_norm": 0.0022044712677598, "kl": 0.0058080279268324375, "learning_rate": 2.5578231292517004e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2256 }, { "completion_length": 182.35714721679688, "epoch": 0.1433015873015873, "grad_norm": 0.002376507269218564, "kl": 0.005723678041249514, "learning_rate": 2.558956916099773e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2257 }, { "completion_length": 198.35714721679688, "epoch": 0.14336507936507936, "grad_norm": 0.0021727446001023054, "kl": 0.005104317329823971, "learning_rate": 2.5600907029478455e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2258 }, { "completion_length": 195.57144165039062, "epoch": 0.14342857142857143, "grad_norm": 0.002645151922479272, "kl": 0.006620663683861494, "learning_rate": 2.5612244897959184e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2259 }, { "completion_length": 156.21429443359375, "epoch": 0.1434920634920635, "grad_norm": 0.003401227993890643, "kl": 0.0077452268451452255, "learning_rate": 2.562358276643991e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2260 }, { "completion_length": 176.7857208251953, "epoch": 0.14355555555555555, "grad_norm": 0.004031984135508537, "kl": 0.0060578021220862865, "learning_rate": 2.563492063492063e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2261 }, { "completion_length": 135.57144165039062, "epoch": 0.14361904761904762, "grad_norm": 0.003535531461238861, "kl": 0.007011073175817728, "learning_rate": 2.564625850340136e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2262 }, { "completion_length": 183.2857208251953, "epoch": 0.1436825396825397, "grad_norm": 0.0027161913458257914, "kl": 0.006080520339310169, "learning_rate": 2.5657596371882086e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2263 }, { "completion_length": 174.92857360839844, "epoch": 0.14374603174603173, "grad_norm": 0.0030431938357651234, "kl": 0.006622613873332739, "learning_rate": 2.566893424036281e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2264 }, { "completion_length": 182.07144165039062, "epoch": 0.1438095238095238, "grad_norm": 0.003154071746394038, "kl": 0.0065927328541874886, "learning_rate": 2.5680272108843537e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2265 }, { "completion_length": 183.00001525878906, "epoch": 0.14387301587301587, "grad_norm": 0.0027641006745398045, "kl": 0.006480913143604994, "learning_rate": 2.5691609977324265e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2266 }, { "completion_length": 139.35714721679688, "epoch": 0.14393650793650795, "grad_norm": 0.005828215274959803, "kl": 0.00816610362380743, "learning_rate": 2.570294784580499e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2267 }, { "completion_length": 165.07144165039062, "epoch": 0.144, "grad_norm": 0.0029627971816807985, "kl": 0.008040040731430054, "learning_rate": 2.571428571428571e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2268 }, { "completion_length": 137.35714721679688, "epoch": 0.14406349206349206, "grad_norm": 0.0038605944719165564, "kl": 0.00889604538679123, "learning_rate": 2.572562358276644e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2269 }, { "completion_length": 176.07144165039062, "epoch": 0.14412698412698413, "grad_norm": 0.0035830573178827763, "kl": 0.007559123449027538, "learning_rate": 2.573696145124716e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2270 }, { "completion_length": 156.21429443359375, "epoch": 0.1441904761904762, "grad_norm": 0.0027014168445020914, "kl": 0.00683442410081625, "learning_rate": 2.574829931972789e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2271 }, { "completion_length": 174.85714721679688, "epoch": 0.14425396825396825, "grad_norm": 0.0027813261840492487, "kl": 0.0060731046833097935, "learning_rate": 2.575963718820862e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2272 }, { "completion_length": 156.21429443359375, "epoch": 0.14431746031746032, "grad_norm": 0.002472753170877695, "kl": 0.006060993764549494, "learning_rate": 2.577097505668934e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2273 }, { "completion_length": 196.71429443359375, "epoch": 0.1443809523809524, "grad_norm": 0.002245298819616437, "kl": 0.006038513965904713, "learning_rate": 2.578231292517007e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2274 }, { "completion_length": 141.6428680419922, "epoch": 0.14444444444444443, "grad_norm": 0.0031649658922106028, "kl": 0.006636889185756445, "learning_rate": 2.57936507936508e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2275 }, { "completion_length": 133.35714721679688, "epoch": 0.1445079365079365, "grad_norm": 0.003569404361769557, "kl": 0.00901806727051735, "learning_rate": 2.5804988662131515e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2276 }, { "completion_length": 153.42857360839844, "epoch": 0.14457142857142857, "grad_norm": 0.003002364654093981, "kl": 0.007858428172767162, "learning_rate": 2.5816326530612243e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2277 }, { "completion_length": 158.2857208251953, "epoch": 0.14463492063492064, "grad_norm": 0.0029702186584472656, "kl": 0.007759088184684515, "learning_rate": 2.5827664399092966e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2278 }, { "completion_length": 209.00001525878906, "epoch": 0.1446984126984127, "grad_norm": 0.0027393829077482224, "kl": 0.006577354855835438, "learning_rate": 2.5839002267573694e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2279 }, { "completion_length": 168.21429443359375, "epoch": 0.14476190476190476, "grad_norm": 0.0037155074533075094, "kl": 0.009211636148393154, "learning_rate": 2.5850340136054423e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2280 }, { "completion_length": 158.0, "epoch": 0.14482539682539683, "grad_norm": 0.8302011489868164, "kl": 0.008792394772171974, "learning_rate": 2.5861678004535146e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 2281 }, { "completion_length": 188.6428680419922, "epoch": 0.1448888888888889, "grad_norm": 0.003241268452256918, "kl": 0.007095506880432367, "learning_rate": 2.5873015873015874e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2282 }, { "completion_length": 125.21429443359375, "epoch": 0.14495238095238094, "grad_norm": 0.00509154936298728, "kl": 0.011378946714103222, "learning_rate": 2.5884353741496597e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2283 }, { "completion_length": 145.2857208251953, "epoch": 0.14501587301587301, "grad_norm": 0.003373517422005534, "kl": 0.008118601515889168, "learning_rate": 2.589569160997732e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2284 }, { "completion_length": 151.42857360839844, "epoch": 0.14507936507936509, "grad_norm": 0.0033431982155889273, "kl": 0.007562499493360519, "learning_rate": 2.590702947845805e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2285 }, { "completion_length": 189.1428680419922, "epoch": 0.14514285714285713, "grad_norm": 0.0025101089850068092, "kl": 0.00589692173525691, "learning_rate": 2.5918367346938776e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2286 }, { "completion_length": 166.7857208251953, "epoch": 0.1452063492063492, "grad_norm": 0.002699401928111911, "kl": 0.006358809303492308, "learning_rate": 2.59297052154195e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2287 }, { "completion_length": 159.7857208251953, "epoch": 0.14526984126984127, "grad_norm": 0.0035096430219709873, "kl": 0.007402971852570772, "learning_rate": 2.5941043083900227e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2288 }, { "completion_length": 211.00001525878906, "epoch": 0.14533333333333334, "grad_norm": 0.0019019399769604206, "kl": 0.004595688544213772, "learning_rate": 2.5952380952380955e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2289 }, { "completion_length": 137.07144165039062, "epoch": 0.14539682539682539, "grad_norm": 0.004353267606347799, "kl": 0.008185130544006824, "learning_rate": 2.5963718820861673e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2290 }, { "completion_length": 182.71429443359375, "epoch": 0.14546031746031746, "grad_norm": 0.0032117306254804134, "kl": 0.007835089229047298, "learning_rate": 2.59750566893424e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2291 }, { "completion_length": 162.35714721679688, "epoch": 0.14552380952380953, "grad_norm": 0.0028575106989592314, "kl": 0.005784822627902031, "learning_rate": 2.598639455782313e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2292 }, { "completion_length": 166.85714721679688, "epoch": 0.1455873015873016, "grad_norm": 0.003644453128799796, "kl": 0.009014066308736801, "learning_rate": 2.599773242630385e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2293 }, { "completion_length": 146.2857208251953, "epoch": 0.14565079365079364, "grad_norm": 1.316135287284851, "kl": 0.004812242928892374, "learning_rate": 2.600907029478458e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 2294 }, { "completion_length": 185.6428680419922, "epoch": 0.1457142857142857, "grad_norm": 0.002906897570937872, "kl": 0.0070557487197220325, "learning_rate": 2.602040816326531e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2295 }, { "completion_length": 184.85714721679688, "epoch": 0.14577777777777778, "grad_norm": 0.0032744593918323517, "kl": 0.0066157677210867405, "learning_rate": 2.603174603174603e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2296 }, { "completion_length": 146.5, "epoch": 0.14584126984126985, "grad_norm": 0.003905469086021185, "kl": 0.009208290837705135, "learning_rate": 2.604308390022676e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2297 }, { "completion_length": 176.35714721679688, "epoch": 0.1459047619047619, "grad_norm": 0.002704824786633253, "kl": 0.006926111876964569, "learning_rate": 2.6054421768707477e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2298 }, { "completion_length": 132.57144165039062, "epoch": 0.14596825396825397, "grad_norm": 0.005231579300016165, "kl": 0.012393112294375896, "learning_rate": 2.6065759637188205e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2299 }, { "completion_length": 140.92857360839844, "epoch": 0.14603174603174604, "grad_norm": 1.1871308088302612, "kl": 0.01423837523907423, "learning_rate": 2.6077097505668934e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 2300 }, { "completion_length": 129.57144165039062, "epoch": 0.14609523809523808, "grad_norm": 0.003962670918554068, "kl": 0.009354058653116226, "learning_rate": 2.6088435374149656e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2301 }, { "completion_length": 157.6428680419922, "epoch": 0.14615873015873015, "grad_norm": 0.004842285066843033, "kl": 0.010160809382796288, "learning_rate": 2.6099773242630385e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2302 }, { "completion_length": 190.92857360839844, "epoch": 0.14622222222222223, "grad_norm": 0.002982529578730464, "kl": 0.008015627972781658, "learning_rate": 2.6111111111111113e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2303 }, { "completion_length": 143.57144165039062, "epoch": 0.1462857142857143, "grad_norm": 0.004256884101778269, "kl": 0.01218881644308567, "learning_rate": 2.6122448979591836e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2304 }, { "completion_length": 129.6428680419922, "epoch": 0.14634920634920634, "grad_norm": 0.0063829123973846436, "kl": 0.015204062685370445, "learning_rate": 2.613378684807256e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2305 }, { "completion_length": 183.92857360839844, "epoch": 0.1464126984126984, "grad_norm": 0.0042648944072425365, "kl": 0.010048473253846169, "learning_rate": 2.6145124716553287e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2306 }, { "completion_length": 175.42857360839844, "epoch": 0.14647619047619048, "grad_norm": 0.004359028302133083, "kl": 0.013159947469830513, "learning_rate": 2.615646258503401e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2307 }, { "completion_length": 152.42857360839844, "epoch": 0.14653968253968255, "grad_norm": 0.004679317586123943, "kl": 0.013058631680905819, "learning_rate": 2.616780045351474e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2308 }, { "completion_length": 165.21429443359375, "epoch": 0.1466031746031746, "grad_norm": 0.9661431908607483, "kl": 0.008675238117575645, "learning_rate": 2.6179138321995466e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 2309 }, { "completion_length": 169.5, "epoch": 0.14666666666666667, "grad_norm": 0.004837579093873501, "kl": 0.012940770946443081, "learning_rate": 2.619047619047619e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2310 }, { "completion_length": 150.0, "epoch": 0.14673015873015874, "grad_norm": 0.005470718257129192, "kl": 0.01362310629338026, "learning_rate": 2.6201814058956917e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2311 }, { "completion_length": 148.6428680419922, "epoch": 0.14679365079365078, "grad_norm": 0.00414683623239398, "kl": 0.010873007588088512, "learning_rate": 2.621315192743764e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2312 }, { "completion_length": 153.0, "epoch": 0.14685714285714285, "grad_norm": 0.00639977864921093, "kl": 0.01523186732083559, "learning_rate": 2.6224489795918363e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2313 }, { "completion_length": 157.42857360839844, "epoch": 0.14692063492063492, "grad_norm": 0.0040805465541779995, "kl": 0.012651289813220501, "learning_rate": 2.623582766439909e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2314 }, { "completion_length": 185.1428680419922, "epoch": 0.146984126984127, "grad_norm": 0.004847276955842972, "kl": 0.012970111332833767, "learning_rate": 2.624716553287982e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2315 }, { "completion_length": 164.92857360839844, "epoch": 0.14704761904761904, "grad_norm": 0.004554383922368288, "kl": 0.013543893583118916, "learning_rate": 2.625850340136054e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2316 }, { "completion_length": 161.5, "epoch": 0.1471111111111111, "grad_norm": 0.006406910251826048, "kl": 0.016547011211514473, "learning_rate": 2.626984126984127e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2317 }, { "completion_length": 143.21429443359375, "epoch": 0.14717460317460318, "grad_norm": 0.006752071902155876, "kl": 0.018379373475909233, "learning_rate": 2.6281179138322e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2318 }, { "completion_length": 187.35714721679688, "epoch": 0.14723809523809525, "grad_norm": 0.0038896447513252497, "kl": 0.012442125007510185, "learning_rate": 2.629251700680272e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2319 }, { "completion_length": 166.85714721679688, "epoch": 0.1473015873015873, "grad_norm": 0.005315846763551235, "kl": 0.014188203029334545, "learning_rate": 2.6303854875283444e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2320 }, { "completion_length": 193.07144165039062, "epoch": 0.14736507936507937, "grad_norm": 0.0043915691785514355, "kl": 0.012866402044892311, "learning_rate": 2.631519274376417e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2321 }, { "completion_length": 179.85714721679688, "epoch": 0.14742857142857144, "grad_norm": 1.113214135169983, "kl": 0.013956960290670395, "learning_rate": 2.6326530612244896e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 2322 }, { "completion_length": 172.50001525878906, "epoch": 0.14749206349206348, "grad_norm": 0.006482269149273634, "kl": 0.0163203664124012, "learning_rate": 2.6337868480725624e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2323 }, { "completion_length": 201.7857208251953, "epoch": 0.14755555555555555, "grad_norm": 0.003834869246929884, "kl": 0.01232079602777958, "learning_rate": 2.6349206349206347e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2324 }, { "completion_length": 182.57144165039062, "epoch": 0.14761904761904762, "grad_norm": 0.005230438895523548, "kl": 0.01384531520307064, "learning_rate": 2.6360544217687075e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2325 }, { "completion_length": 126.28572082519531, "epoch": 0.1476825396825397, "grad_norm": 0.006575240753591061, "kl": 0.01676764152944088, "learning_rate": 2.6371882086167803e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2326 }, { "completion_length": 162.5, "epoch": 0.14774603174603174, "grad_norm": 0.004365749191492796, "kl": 0.011215727776288986, "learning_rate": 2.638321995464852e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2327 }, { "completion_length": 187.07144165039062, "epoch": 0.1478095238095238, "grad_norm": 0.005447923671454191, "kl": 0.01576516591012478, "learning_rate": 2.639455782312925e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2328 }, { "completion_length": 186.92857360839844, "epoch": 0.14787301587301588, "grad_norm": 0.0054377899505198, "kl": 0.013503153808414936, "learning_rate": 2.6405895691609977e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2329 }, { "completion_length": 176.92857360839844, "epoch": 0.14793650793650795, "grad_norm": 0.0034677330404520035, "kl": 0.010903699323534966, "learning_rate": 2.64172335600907e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2330 }, { "completion_length": 159.2857208251953, "epoch": 0.148, "grad_norm": 0.006635057739913464, "kl": 0.01652834750711918, "learning_rate": 2.642857142857143e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2331 }, { "completion_length": 151.07144165039062, "epoch": 0.14806349206349206, "grad_norm": 0.007279406767338514, "kl": 0.01778131164610386, "learning_rate": 2.6439909297052156e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2332 }, { "completion_length": 168.92857360839844, "epoch": 0.14812698412698413, "grad_norm": 0.00405737804248929, "kl": 0.013431347906589508, "learning_rate": 2.645124716553288e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2333 }, { "completion_length": 182.00001525878906, "epoch": 0.14819047619047618, "grad_norm": 0.003797519952058792, "kl": 0.010136301629245281, "learning_rate": 2.646258503401361e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2334 }, { "completion_length": 179.50001525878906, "epoch": 0.14825396825396825, "grad_norm": 0.005288620945066214, "kl": 0.012834439054131508, "learning_rate": 2.647392290249433e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2335 }, { "completion_length": 181.1428680419922, "epoch": 0.14831746031746032, "grad_norm": 0.003909940831363201, "kl": 0.011027553118765354, "learning_rate": 2.6485260770975053e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2336 }, { "completion_length": 164.35714721679688, "epoch": 0.1483809523809524, "grad_norm": 0.004634086042642593, "kl": 0.012589653953909874, "learning_rate": 2.649659863945578e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2337 }, { "completion_length": 137.92857360839844, "epoch": 0.14844444444444443, "grad_norm": 0.006450233049690723, "kl": 0.01832343451678753, "learning_rate": 2.6507936507936504e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2338 }, { "completion_length": 214.21429443359375, "epoch": 0.1485079365079365, "grad_norm": 0.005116727668792009, "kl": 0.01294463686645031, "learning_rate": 2.651927437641723e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2339 }, { "completion_length": 154.85714721679688, "epoch": 0.14857142857142858, "grad_norm": 0.005217795260250568, "kl": 0.01566605269908905, "learning_rate": 2.653061224489796e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2340 }, { "completion_length": 178.00001525878906, "epoch": 0.14863492063492065, "grad_norm": 0.005053485743701458, "kl": 0.012862225994467735, "learning_rate": 2.6541950113378684e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2341 }, { "completion_length": 132.5, "epoch": 0.1486984126984127, "grad_norm": 0.008933745324611664, "kl": 0.021833615377545357, "learning_rate": 2.6553287981859406e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2342 }, { "completion_length": 155.35714721679688, "epoch": 0.14876190476190476, "grad_norm": 0.004828005563467741, "kl": 0.012850603088736534, "learning_rate": 2.6564625850340135e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2343 }, { "completion_length": 147.0, "epoch": 0.14882539682539683, "grad_norm": 0.006967287044972181, "kl": 0.014663215726613998, "learning_rate": 2.657596371882086e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2344 }, { "completion_length": 151.57144165039062, "epoch": 0.14888888888888888, "grad_norm": 0.005295697599649429, "kl": 0.013773813843727112, "learning_rate": 2.6587301587301586e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2345 }, { "completion_length": 173.85714721679688, "epoch": 0.14895238095238095, "grad_norm": 0.005169282667338848, "kl": 0.013396074064075947, "learning_rate": 2.6598639455782314e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2346 }, { "completion_length": 158.6428680419922, "epoch": 0.14901587301587302, "grad_norm": 0.004741716664284468, "kl": 0.01207976695150137, "learning_rate": 2.6609977324263037e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2347 }, { "completion_length": 156.21429443359375, "epoch": 0.1490793650793651, "grad_norm": 0.0029658381827175617, "kl": 0.009633410722017288, "learning_rate": 2.6621315192743765e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2348 }, { "completion_length": 191.85714721679688, "epoch": 0.14914285714285713, "grad_norm": 0.007398068904876709, "kl": 0.015235740691423416, "learning_rate": 2.663265306122449e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2349 }, { "completion_length": 178.07144165039062, "epoch": 0.1492063492063492, "grad_norm": 0.004807936027646065, "kl": 0.012133887968957424, "learning_rate": 2.664399092970521e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2350 }, { "completion_length": 138.42857360839844, "epoch": 0.14926984126984127, "grad_norm": 0.0050357310101389885, "kl": 0.012520146556198597, "learning_rate": 2.665532879818594e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2351 }, { "completion_length": 196.1428680419922, "epoch": 0.14933333333333335, "grad_norm": 0.007032010238617659, "kl": 0.013868981972336769, "learning_rate": 2.6666666666666667e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2352 }, { "completion_length": 187.85714721679688, "epoch": 0.1493968253968254, "grad_norm": 0.00387749751098454, "kl": 0.009926149621605873, "learning_rate": 2.667800453514739e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2353 }, { "completion_length": 141.35714721679688, "epoch": 0.14946031746031746, "grad_norm": 0.004358986858278513, "kl": 0.0104827294126153, "learning_rate": 2.668934240362812e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2354 }, { "completion_length": 186.85714721679688, "epoch": 0.14952380952380953, "grad_norm": 0.004022386856377125, "kl": 0.010446653701364994, "learning_rate": 2.6700680272108847e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2355 }, { "completion_length": 167.1428680419922, "epoch": 0.14958730158730157, "grad_norm": 0.004591368138790131, "kl": 0.011682217009365559, "learning_rate": 2.671201814058957e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2356 }, { "completion_length": 153.6428680419922, "epoch": 0.14965079365079365, "grad_norm": 0.004866491537541151, "kl": 0.013898584060370922, "learning_rate": 2.672335600907029e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2357 }, { "completion_length": 187.85714721679688, "epoch": 0.14971428571428572, "grad_norm": 0.003139063948765397, "kl": 0.009394544176757336, "learning_rate": 2.6734693877551015e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2358 }, { "completion_length": 146.1428680419922, "epoch": 0.1497777777777778, "grad_norm": 0.004438489209860563, "kl": 0.011201051995158195, "learning_rate": 2.6746031746031743e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2359 }, { "completion_length": 153.21429443359375, "epoch": 0.14984126984126983, "grad_norm": 0.00489411037415266, "kl": 0.011502227745950222, "learning_rate": 2.675736961451247e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2360 }, { "completion_length": 152.71429443359375, "epoch": 0.1499047619047619, "grad_norm": 0.005482051987200975, "kl": 0.013714013621211052, "learning_rate": 2.6768707482993194e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2361 }, { "completion_length": 149.7857208251953, "epoch": 0.14996825396825397, "grad_norm": 0.005121493712067604, "kl": 0.014840834774076939, "learning_rate": 2.6780045351473923e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2362 }, { "completion_length": 178.50001525878906, "epoch": 0.15003174603174604, "grad_norm": 0.005155620165169239, "kl": 0.01129890326410532, "learning_rate": 2.679138321995465e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2363 }, { "completion_length": 167.57144165039062, "epoch": 0.1500952380952381, "grad_norm": 0.0035846808459609747, "kl": 0.010008014738559723, "learning_rate": 2.680272108843537e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2364 }, { "completion_length": 151.0, "epoch": 0.15015873015873016, "grad_norm": 0.005415631923824549, "kl": 0.013855477795004845, "learning_rate": 2.6814058956916097e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2365 }, { "completion_length": 145.42857360839844, "epoch": 0.15022222222222223, "grad_norm": 0.004457668401300907, "kl": 0.01099228486418724, "learning_rate": 2.6825396825396825e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2366 }, { "completion_length": 142.6428680419922, "epoch": 0.15028571428571427, "grad_norm": 0.004815880209207535, "kl": 0.011723424308001995, "learning_rate": 2.683673469387755e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2367 }, { "completion_length": 143.7857208251953, "epoch": 0.15034920634920634, "grad_norm": 0.00477974209934473, "kl": 0.013450244441628456, "learning_rate": 2.6848072562358276e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2368 }, { "completion_length": 169.1428680419922, "epoch": 0.15041269841269841, "grad_norm": 0.0035454779863357544, "kl": 0.009272786788642406, "learning_rate": 2.6859410430839004e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2369 }, { "completion_length": 156.85714721679688, "epoch": 0.15047619047619049, "grad_norm": 0.0043618567287921906, "kl": 0.012653210200369358, "learning_rate": 2.6870748299319727e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2370 }, { "completion_length": 172.1428680419922, "epoch": 0.15053968253968253, "grad_norm": 0.003966831602156162, "kl": 0.010423986241221428, "learning_rate": 2.688208616780045e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2371 }, { "completion_length": 185.92857360839844, "epoch": 0.1506031746031746, "grad_norm": 0.0036642765626311302, "kl": 0.010727493092417717, "learning_rate": 2.689342403628118e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2372 }, { "completion_length": 181.7857208251953, "epoch": 0.15066666666666667, "grad_norm": 0.003918939735740423, "kl": 0.011313443072140217, "learning_rate": 2.69047619047619e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2373 }, { "completion_length": 109.42857360839844, "epoch": 0.15073015873015874, "grad_norm": 0.0050733657553792, "kl": 0.012599540874361992, "learning_rate": 2.691609977324263e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2374 }, { "completion_length": 154.7857208251953, "epoch": 0.15079365079365079, "grad_norm": 0.0037443467881530523, "kl": 0.010103734210133553, "learning_rate": 2.692743764172336e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2375 }, { "completion_length": 170.07144165039062, "epoch": 0.15085714285714286, "grad_norm": 0.005393062252551317, "kl": 0.011474679224193096, "learning_rate": 2.693877551020408e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2376 }, { "completion_length": 154.35714721679688, "epoch": 0.15092063492063493, "grad_norm": 0.0035570559557527304, "kl": 0.010338190011680126, "learning_rate": 2.695011337868481e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2377 }, { "completion_length": 190.42857360839844, "epoch": 0.15098412698412697, "grad_norm": 0.004309026058763266, "kl": 0.011260231956839561, "learning_rate": 2.6961451247165537e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2378 }, { "completion_length": 171.35714721679688, "epoch": 0.15104761904761904, "grad_norm": 0.004474744200706482, "kl": 0.00971465278416872, "learning_rate": 2.6972789115646254e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2379 }, { "completion_length": 164.35714721679688, "epoch": 0.1511111111111111, "grad_norm": 0.007307226303964853, "kl": 0.0126788429915905, "learning_rate": 2.698412698412698e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2380 }, { "completion_length": 163.7857208251953, "epoch": 0.15117460317460318, "grad_norm": 0.004685711581259966, "kl": 0.009191400371491909, "learning_rate": 2.6995464852607705e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2381 }, { "completion_length": 173.2857208251953, "epoch": 0.15123809523809523, "grad_norm": 0.003514317562803626, "kl": 0.009296699427068233, "learning_rate": 2.7006802721088434e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2382 }, { "completion_length": 130.21429443359375, "epoch": 0.1513015873015873, "grad_norm": 0.004069405607879162, "kl": 0.010415494441986084, "learning_rate": 2.701814058956916e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2383 }, { "completion_length": 184.6428680419922, "epoch": 0.15136507936507937, "grad_norm": 0.004398734774440527, "kl": 0.010206182487308979, "learning_rate": 2.7029478458049885e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2384 }, { "completion_length": 151.35714721679688, "epoch": 0.15142857142857144, "grad_norm": 0.004342271946370602, "kl": 0.011791499331593513, "learning_rate": 2.7040816326530613e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2385 }, { "completion_length": 177.07144165039062, "epoch": 0.15149206349206348, "grad_norm": 0.005287575535476208, "kl": 0.012358138337731361, "learning_rate": 2.7052154195011336e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2386 }, { "completion_length": 186.00001525878906, "epoch": 0.15155555555555555, "grad_norm": 0.004180122632533312, "kl": 0.009530236013233662, "learning_rate": 2.706349206349206e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2387 }, { "completion_length": 152.6428680419922, "epoch": 0.15161904761904763, "grad_norm": 0.0045565394684672356, "kl": 0.010624452494084835, "learning_rate": 2.7074829931972787e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2388 }, { "completion_length": 184.35714721679688, "epoch": 0.15168253968253967, "grad_norm": 0.00342568545602262, "kl": 0.00897405855357647, "learning_rate": 2.7086167800453515e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2389 }, { "completion_length": 159.2857208251953, "epoch": 0.15174603174603174, "grad_norm": 0.002947222674265504, "kl": 0.00835342612117529, "learning_rate": 2.709750566893424e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2390 }, { "completion_length": 152.07144165039062, "epoch": 0.1518095238095238, "grad_norm": 0.004744121339172125, "kl": 0.010634907521307468, "learning_rate": 2.7108843537414966e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2391 }, { "completion_length": 183.92857360839844, "epoch": 0.15187301587301588, "grad_norm": 0.003108453471213579, "kl": 0.008427275344729424, "learning_rate": 2.7120181405895694e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2392 }, { "completion_length": 172.07144165039062, "epoch": 0.15193650793650793, "grad_norm": 0.0032224177848547697, "kl": 0.007774039171636105, "learning_rate": 2.7131519274376417e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2393 }, { "completion_length": 147.1428680419922, "epoch": 0.152, "grad_norm": 0.004549226723611355, "kl": 0.009647725149989128, "learning_rate": 2.714285714285714e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2394 }, { "completion_length": 165.92857360839844, "epoch": 0.15206349206349207, "grad_norm": 0.0026859724894165993, "kl": 0.007802658714354038, "learning_rate": 2.715419501133787e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2395 }, { "completion_length": 175.71429443359375, "epoch": 0.15212698412698414, "grad_norm": 0.003918017260730267, "kl": 0.009586915373802185, "learning_rate": 2.716553287981859e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2396 }, { "completion_length": 157.7857208251953, "epoch": 0.15219047619047618, "grad_norm": 0.00505383126437664, "kl": 0.011232702061533928, "learning_rate": 2.717687074829932e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2397 }, { "completion_length": 192.57144165039062, "epoch": 0.15225396825396825, "grad_norm": 0.0040195900946855545, "kl": 0.00886115338653326, "learning_rate": 2.718820861678005e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2398 }, { "completion_length": 146.42857360839844, "epoch": 0.15231746031746032, "grad_norm": 0.003887333907186985, "kl": 0.010878713801503181, "learning_rate": 2.719954648526077e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2399 }, { "completion_length": 150.1428680419922, "epoch": 0.1523809523809524, "grad_norm": 0.004574884660542011, "kl": 0.0107565401121974, "learning_rate": 2.72108843537415e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2400 }, { "completion_length": 131.71429443359375, "epoch": 0.15244444444444444, "grad_norm": 0.0038587090093642473, "kl": 0.009392892941832542, "learning_rate": 2.7222222222222216e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2401 }, { "completion_length": 128.0, "epoch": 0.1525079365079365, "grad_norm": 0.006711745634675026, "kl": 0.01361133623868227, "learning_rate": 2.7233560090702944e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2402 }, { "completion_length": 159.1428680419922, "epoch": 0.15257142857142858, "grad_norm": 0.0032459315843880177, "kl": 0.008282708935439587, "learning_rate": 2.7244897959183673e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2403 }, { "completion_length": 184.42857360839844, "epoch": 0.15263492063492062, "grad_norm": 0.004450263921171427, "kl": 0.009381371550261974, "learning_rate": 2.7256235827664396e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2404 }, { "completion_length": 169.35714721679688, "epoch": 0.1526984126984127, "grad_norm": 0.004230231046676636, "kl": 0.008966663852334023, "learning_rate": 2.7267573696145124e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2405 }, { "completion_length": 170.92857360839844, "epoch": 0.15276190476190477, "grad_norm": 0.004042289685457945, "kl": 0.009247871115803719, "learning_rate": 2.727891156462585e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2406 }, { "completion_length": 156.71429443359375, "epoch": 0.15282539682539684, "grad_norm": 0.00391186960041523, "kl": 0.010495931841433048, "learning_rate": 2.7290249433106575e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2407 }, { "completion_length": 147.42857360839844, "epoch": 0.15288888888888888, "grad_norm": 0.0037659485824406147, "kl": 0.011275717988610268, "learning_rate": 2.73015873015873e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2408 }, { "completion_length": 144.21429443359375, "epoch": 0.15295238095238095, "grad_norm": 0.002884955843910575, "kl": 0.007765968330204487, "learning_rate": 2.7312925170068026e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2409 }, { "completion_length": 194.00001525878906, "epoch": 0.15301587301587302, "grad_norm": 0.0026271427050232887, "kl": 0.006927114445716143, "learning_rate": 2.732426303854875e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2410 }, { "completion_length": 190.7857208251953, "epoch": 0.1530793650793651, "grad_norm": 0.003959627356380224, "kl": 0.010842366144061089, "learning_rate": 2.7335600907029477e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2411 }, { "completion_length": 145.7857208251953, "epoch": 0.15314285714285714, "grad_norm": 0.0030698960181325674, "kl": 0.008364150300621986, "learning_rate": 2.7346938775510205e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2412 }, { "completion_length": 150.71429443359375, "epoch": 0.1532063492063492, "grad_norm": 0.003915862645953894, "kl": 0.00884622149169445, "learning_rate": 2.735827664399093e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2413 }, { "completion_length": 162.21429443359375, "epoch": 0.15326984126984128, "grad_norm": 0.004783354699611664, "kl": 0.011067100800573826, "learning_rate": 2.7369614512471656e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2414 }, { "completion_length": 174.42857360839844, "epoch": 0.15333333333333332, "grad_norm": 0.0033150839153677225, "kl": 0.008956394158303738, "learning_rate": 2.7380952380952385e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2415 }, { "completion_length": 176.7857208251953, "epoch": 0.1533968253968254, "grad_norm": 0.002751594875007868, "kl": 0.008174058049917221, "learning_rate": 2.73922902494331e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2416 }, { "completion_length": 173.2857208251953, "epoch": 0.15346031746031746, "grad_norm": 0.002899925457313657, "kl": 0.00819961354136467, "learning_rate": 2.740362811791383e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2417 }, { "completion_length": 190.07144165039062, "epoch": 0.15352380952380953, "grad_norm": 0.002854537684470415, "kl": 0.0072759767062962055, "learning_rate": 2.741496598639456e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2418 }, { "completion_length": 151.07144165039062, "epoch": 0.15358730158730158, "grad_norm": 0.005131436977535486, "kl": 0.011151856742799282, "learning_rate": 2.742630385487528e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2419 }, { "completion_length": 136.71429443359375, "epoch": 0.15365079365079365, "grad_norm": 0.0028808622155338526, "kl": 0.008278383873403072, "learning_rate": 2.743764172335601e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2420 }, { "completion_length": 121.85714721679688, "epoch": 0.15371428571428572, "grad_norm": 0.0031761466525495052, "kl": 0.009364803321659565, "learning_rate": 2.744897959183673e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2421 }, { "completion_length": 163.71429443359375, "epoch": 0.1537777777777778, "grad_norm": 0.003888138569891453, "kl": 0.008747858926653862, "learning_rate": 2.746031746031746e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2422 }, { "completion_length": 178.71429443359375, "epoch": 0.15384126984126983, "grad_norm": 0.002695742528885603, "kl": 0.007867773063480854, "learning_rate": 2.7471655328798184e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2423 }, { "completion_length": 155.0, "epoch": 0.1539047619047619, "grad_norm": 0.0029716119170188904, "kl": 0.008173378184437752, "learning_rate": 2.7482993197278906e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2424 }, { "completion_length": 148.42857360839844, "epoch": 0.15396825396825398, "grad_norm": 0.003050324972718954, "kl": 0.008442666381597519, "learning_rate": 2.7494331065759635e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2425 }, { "completion_length": 195.7857208251953, "epoch": 0.15403174603174602, "grad_norm": 0.003683156566694379, "kl": 0.007446860428899527, "learning_rate": 2.7505668934240363e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2426 }, { "completion_length": 166.85714721679688, "epoch": 0.1540952380952381, "grad_norm": 0.937964141368866, "kl": 0.007390350569039583, "learning_rate": 2.7517006802721086e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 2427 }, { "completion_length": 176.92857360839844, "epoch": 0.15415873015873016, "grad_norm": 0.0025257838424295187, "kl": 0.006936222314834595, "learning_rate": 2.7528344671201814e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2428 }, { "completion_length": 176.7857208251953, "epoch": 0.15422222222222223, "grad_norm": 0.003018006682395935, "kl": 0.00823668297380209, "learning_rate": 2.753968253968254e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2429 }, { "completion_length": 151.5, "epoch": 0.15428571428571428, "grad_norm": 0.0025653764605522156, "kl": 0.007662788964807987, "learning_rate": 2.755102040816326e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2430 }, { "completion_length": 183.7857208251953, "epoch": 0.15434920634920635, "grad_norm": 0.0033155973069369793, "kl": 0.007424443028867245, "learning_rate": 2.756235827664399e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2431 }, { "completion_length": 134.7857208251953, "epoch": 0.15441269841269842, "grad_norm": 0.003672773251309991, "kl": 0.008905033580958843, "learning_rate": 2.7573696145124716e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2432 }, { "completion_length": 167.0, "epoch": 0.1544761904761905, "grad_norm": 0.002728097839280963, "kl": 0.007073008920997381, "learning_rate": 2.758503401360544e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2433 }, { "completion_length": 140.57144165039062, "epoch": 0.15453968253968253, "grad_norm": 0.004451224580407143, "kl": 0.008596895262598991, "learning_rate": 2.7596371882086167e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2434 }, { "completion_length": 179.85714721679688, "epoch": 0.1546031746031746, "grad_norm": 0.0031024343334138393, "kl": 0.006905061192810535, "learning_rate": 2.7607709750566895e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2435 }, { "completion_length": 134.6428680419922, "epoch": 0.15466666666666667, "grad_norm": 0.004372937139123678, "kl": 0.010555136948823929, "learning_rate": 2.761904761904762e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2436 }, { "completion_length": 175.85714721679688, "epoch": 0.15473015873015872, "grad_norm": 0.0033144818153232336, "kl": 0.006920197047293186, "learning_rate": 2.7630385487528346e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2437 }, { "completion_length": 133.7857208251953, "epoch": 0.1547936507936508, "grad_norm": 0.004192468710243702, "kl": 0.00802616961300373, "learning_rate": 2.764172335600907e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2438 }, { "completion_length": 203.71429443359375, "epoch": 0.15485714285714286, "grad_norm": 0.002019245643168688, "kl": 0.004792198073118925, "learning_rate": 2.765306122448979e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2439 }, { "completion_length": 167.71429443359375, "epoch": 0.15492063492063493, "grad_norm": 0.004358573816716671, "kl": 0.009290631860494614, "learning_rate": 2.766439909297052e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2440 }, { "completion_length": 171.42857360839844, "epoch": 0.15498412698412697, "grad_norm": 0.0023689039517194033, "kl": 0.005842695478349924, "learning_rate": 2.7675736961451243e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2441 }, { "completion_length": 141.85714721679688, "epoch": 0.15504761904761905, "grad_norm": 0.0028275111690163612, "kl": 0.007222416345030069, "learning_rate": 2.768707482993197e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2442 }, { "completion_length": 202.07144165039062, "epoch": 0.15511111111111112, "grad_norm": 0.002621843945235014, "kl": 0.006933889351785183, "learning_rate": 2.76984126984127e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2443 }, { "completion_length": 142.92857360839844, "epoch": 0.1551746031746032, "grad_norm": 0.002578500658273697, "kl": 0.005933510605245829, "learning_rate": 2.7709750566893423e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2444 }, { "completion_length": 162.07144165039062, "epoch": 0.15523809523809523, "grad_norm": 0.002077102893963456, "kl": 0.006031510420143604, "learning_rate": 2.7721088435374146e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2445 }, { "completion_length": 144.5, "epoch": 0.1553015873015873, "grad_norm": 0.0027220326010137796, "kl": 0.006817755289375782, "learning_rate": 2.7732426303854874e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2446 }, { "completion_length": 175.7857208251953, "epoch": 0.15536507936507937, "grad_norm": 0.002721527824178338, "kl": 0.0063684070482850075, "learning_rate": 2.7743764172335597e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2447 }, { "completion_length": 188.6428680419922, "epoch": 0.15542857142857142, "grad_norm": 0.0035470519214868546, "kl": 0.0070010763593018055, "learning_rate": 2.7755102040816325e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2448 }, { "completion_length": 194.71429443359375, "epoch": 0.1554920634920635, "grad_norm": 0.0022701374255120754, "kl": 0.006351314950734377, "learning_rate": 2.7766439909297053e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2449 }, { "completion_length": 170.21429443359375, "epoch": 0.15555555555555556, "grad_norm": 0.0025430875830352306, "kl": 0.005982378963381052, "learning_rate": 2.7777777777777776e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2450 }, { "completion_length": 187.1428680419922, "epoch": 0.15561904761904763, "grad_norm": 0.926339864730835, "kl": 0.007621728349477053, "learning_rate": 2.7789115646258504e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 2451 }, { "completion_length": 201.1428680419922, "epoch": 0.15568253968253967, "grad_norm": 0.0024949770886451006, "kl": 0.006141638848930597, "learning_rate": 2.780045351473923e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2452 }, { "completion_length": 163.42857360839844, "epoch": 0.15574603174603174, "grad_norm": 0.0022893932182341814, "kl": 0.005706357304006815, "learning_rate": 2.781179138321995e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2453 }, { "completion_length": 220.00001525878906, "epoch": 0.15580952380952381, "grad_norm": 0.0015789901372045279, "kl": 0.004885650239884853, "learning_rate": 2.782312925170068e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2454 }, { "completion_length": 181.07144165039062, "epoch": 0.15587301587301589, "grad_norm": 0.002098841592669487, "kl": 0.005494619254022837, "learning_rate": 2.7834467120181406e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2455 }, { "completion_length": 140.35714721679688, "epoch": 0.15593650793650793, "grad_norm": 0.0021418144460767508, "kl": 0.007054871879518032, "learning_rate": 2.784580498866213e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2456 }, { "completion_length": 183.1428680419922, "epoch": 0.156, "grad_norm": 0.0026130978949368, "kl": 0.007790219970047474, "learning_rate": 2.785714285714286e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2457 }, { "completion_length": 142.57144165039062, "epoch": 0.15606349206349207, "grad_norm": 0.002840806031599641, "kl": 0.007515740115195513, "learning_rate": 2.7868480725623586e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2458 }, { "completion_length": 126.35714721679688, "epoch": 0.15612698412698411, "grad_norm": 0.004019810818135738, "kl": 0.013222876004874706, "learning_rate": 2.787981859410431e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2459 }, { "completion_length": 140.85714721679688, "epoch": 0.15619047619047619, "grad_norm": 0.003033167216926813, "kl": 0.008846556767821312, "learning_rate": 2.789115646258503e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2460 }, { "completion_length": 201.50001525878906, "epoch": 0.15625396825396826, "grad_norm": 0.0028583488892763853, "kl": 0.007622935343533754, "learning_rate": 2.7902494331065754e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2461 }, { "completion_length": 147.42857360839844, "epoch": 0.15631746031746033, "grad_norm": 0.8436303734779358, "kl": 0.00622532656416297, "learning_rate": 2.791383219954648e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 2462 }, { "completion_length": 154.07144165039062, "epoch": 0.15638095238095237, "grad_norm": 0.0038380164187401533, "kl": 0.00935895275324583, "learning_rate": 2.792517006802721e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2463 }, { "completion_length": 170.7857208251953, "epoch": 0.15644444444444444, "grad_norm": 0.003028162056580186, "kl": 0.008134246803820133, "learning_rate": 2.7936507936507934e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2464 }, { "completion_length": 220.21429443359375, "epoch": 0.1565079365079365, "grad_norm": 0.002531222766265273, "kl": 0.007095493841916323, "learning_rate": 2.794784580498866e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2465 }, { "completion_length": 160.1428680419922, "epoch": 0.15657142857142858, "grad_norm": 0.003233943833038211, "kl": 0.008908255957067013, "learning_rate": 2.795918367346939e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2466 }, { "completion_length": 189.92857360839844, "epoch": 0.15663492063492063, "grad_norm": 0.0034299837425351143, "kl": 0.008422989398241043, "learning_rate": 2.797052154195011e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2467 }, { "completion_length": 151.21429443359375, "epoch": 0.1566984126984127, "grad_norm": 0.002505540382117033, "kl": 0.007335870526731014, "learning_rate": 2.7981859410430836e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2468 }, { "completion_length": 189.50001525878906, "epoch": 0.15676190476190477, "grad_norm": 0.003270256333053112, "kl": 0.00864855945110321, "learning_rate": 2.7993197278911564e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2469 }, { "completion_length": 181.2857208251953, "epoch": 0.1568253968253968, "grad_norm": 0.0029304029885679483, "kl": 0.008365053683519363, "learning_rate": 2.8004535147392287e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2470 }, { "completion_length": 139.21429443359375, "epoch": 0.15688888888888888, "grad_norm": 0.004036264028400183, "kl": 0.010511313565075397, "learning_rate": 2.8015873015873015e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2471 }, { "completion_length": 157.71429443359375, "epoch": 0.15695238095238095, "grad_norm": 1.5165446996688843, "kl": 0.014375658705830574, "learning_rate": 2.8027210884353743e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 2472 }, { "completion_length": 179.2857208251953, "epoch": 0.15701587301587303, "grad_norm": 0.004700474441051483, "kl": 0.01064651645720005, "learning_rate": 2.8038548752834466e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2473 }, { "completion_length": 176.6428680419922, "epoch": 0.15707936507936507, "grad_norm": 0.005230159033089876, "kl": 0.011492978781461716, "learning_rate": 2.8049886621315194e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2474 }, { "completion_length": 150.85714721679688, "epoch": 0.15714285714285714, "grad_norm": 1.1582120656967163, "kl": 0.011327440850436687, "learning_rate": 2.8061224489795917e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 2475 }, { "completion_length": 154.1428680419922, "epoch": 0.1572063492063492, "grad_norm": 0.005187759641557932, "kl": 0.012752192094922066, "learning_rate": 2.807256235827664e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2476 }, { "completion_length": 149.92857360839844, "epoch": 0.15726984126984128, "grad_norm": 0.004638990852981806, "kl": 0.012115087360143661, "learning_rate": 2.808390022675737e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2477 }, { "completion_length": 189.6428680419922, "epoch": 0.15733333333333333, "grad_norm": 0.004092576913535595, "kl": 0.009672265499830246, "learning_rate": 2.8095238095238096e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2478 }, { "completion_length": 158.1428680419922, "epoch": 0.1573968253968254, "grad_norm": 0.0047821965999901295, "kl": 0.011472055688500404, "learning_rate": 2.810657596371882e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2479 }, { "completion_length": 165.71429443359375, "epoch": 0.15746031746031747, "grad_norm": 0.0038102648686617613, "kl": 0.00956682302057743, "learning_rate": 2.811791383219955e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2480 }, { "completion_length": 155.5, "epoch": 0.1575238095238095, "grad_norm": 0.004306967835873365, "kl": 0.012910294346511364, "learning_rate": 2.8129251700680276e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2481 }, { "completion_length": 171.07144165039062, "epoch": 0.15758730158730158, "grad_norm": 0.00436900882050395, "kl": 0.01306143682450056, "learning_rate": 2.8140589569160993e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2482 }, { "completion_length": 210.50001525878906, "epoch": 0.15765079365079365, "grad_norm": 0.004274831153452396, "kl": 0.011588316410779953, "learning_rate": 2.815192743764172e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2483 }, { "completion_length": 140.57144165039062, "epoch": 0.15771428571428572, "grad_norm": 0.004930154420435429, "kl": 0.013278554193675518, "learning_rate": 2.8163265306122444e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2484 }, { "completion_length": 139.21429443359375, "epoch": 0.15777777777777777, "grad_norm": 0.007493523880839348, "kl": 0.019707757979631424, "learning_rate": 2.8174603174603173e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2485 }, { "completion_length": 195.7857208251953, "epoch": 0.15784126984126984, "grad_norm": 0.004090686794370413, "kl": 0.011422396637499332, "learning_rate": 2.81859410430839e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2486 }, { "completion_length": 184.71429443359375, "epoch": 0.1579047619047619, "grad_norm": 0.00389122124761343, "kl": 0.010379644110798836, "learning_rate": 2.8197278911564624e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2487 }, { "completion_length": 162.71429443359375, "epoch": 0.15796825396825398, "grad_norm": 0.004086132626980543, "kl": 0.009754326194524765, "learning_rate": 2.820861678004535e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2488 }, { "completion_length": 127.78572082519531, "epoch": 0.15803174603174602, "grad_norm": 0.007470878306776285, "kl": 0.021721061319112778, "learning_rate": 2.8219954648526075e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2489 }, { "completion_length": 182.7857208251953, "epoch": 0.1580952380952381, "grad_norm": 0.006354070268571377, "kl": 0.012920535169541836, "learning_rate": 2.82312925170068e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2490 }, { "completion_length": 166.6428680419922, "epoch": 0.15815873015873017, "grad_norm": 0.006200915202498436, "kl": 0.015868838876485825, "learning_rate": 2.8242630385487526e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2491 }, { "completion_length": 141.71429443359375, "epoch": 0.1582222222222222, "grad_norm": 0.007283309008926153, "kl": 0.01539452001452446, "learning_rate": 2.8253968253968254e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2492 }, { "completion_length": 166.0, "epoch": 0.15828571428571428, "grad_norm": 0.005290540866553783, "kl": 0.012837463989853859, "learning_rate": 2.8265306122448977e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2493 }, { "completion_length": 152.07144165039062, "epoch": 0.15834920634920635, "grad_norm": 0.005941423587501049, "kl": 0.016144776716828346, "learning_rate": 2.8276643990929705e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2494 }, { "completion_length": 182.21429443359375, "epoch": 0.15841269841269842, "grad_norm": 0.005196216516196728, "kl": 0.014143818989396095, "learning_rate": 2.8287981859410433e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2495 }, { "completion_length": 158.07144165039062, "epoch": 0.15847619047619046, "grad_norm": 0.010969175957143307, "kl": 0.019971609115600586, "learning_rate": 2.8299319727891156e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2496 }, { "completion_length": 151.92857360839844, "epoch": 0.15853968253968254, "grad_norm": 0.007869396358728409, "kl": 0.01722247339785099, "learning_rate": 2.831065759637188e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2497 }, { "completion_length": 168.0, "epoch": 0.1586031746031746, "grad_norm": 0.005270804278552532, "kl": 0.014980632811784744, "learning_rate": 2.832199546485261e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2498 }, { "completion_length": 175.2857208251953, "epoch": 0.15866666666666668, "grad_norm": 0.00476026302203536, "kl": 0.012027869001030922, "learning_rate": 2.833333333333333e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2499 }, { "completion_length": 167.0, "epoch": 0.15873015873015872, "grad_norm": 0.006907178089022636, "kl": 0.0167450699955225, "learning_rate": 2.834467120181406e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2500 }, { "completion_length": 148.42857360839844, "epoch": 0.1587936507936508, "grad_norm": 0.005955335684120655, "kl": 0.015039008110761642, "learning_rate": 2.8356009070294787e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2501 }, { "completion_length": 169.6428680419922, "epoch": 0.15885714285714286, "grad_norm": 0.00518605625256896, "kl": 0.013295378535985947, "learning_rate": 2.836734693877551e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2502 }, { "completion_length": 205.92857360839844, "epoch": 0.15892063492063493, "grad_norm": 0.004862398840487003, "kl": 0.012936089187860489, "learning_rate": 2.837868480725624e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2503 }, { "completion_length": 168.0, "epoch": 0.15898412698412698, "grad_norm": 0.005764266941696405, "kl": 0.01339890155941248, "learning_rate": 2.8390022675736955e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2504 }, { "completion_length": 113.5714340209961, "epoch": 0.15904761904761905, "grad_norm": 0.009536752477288246, "kl": 0.022428883239626884, "learning_rate": 2.8401360544217684e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2505 }, { "completion_length": 148.92857360839844, "epoch": 0.15911111111111112, "grad_norm": 0.005397588945925236, "kl": 0.013196413405239582, "learning_rate": 2.841269841269841e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2506 }, { "completion_length": 161.71429443359375, "epoch": 0.15917460317460316, "grad_norm": 0.005100608803331852, "kl": 0.013212727382779121, "learning_rate": 2.8424036281179135e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2507 }, { "completion_length": 165.1428680419922, "epoch": 0.15923809523809523, "grad_norm": 0.005080381408333778, "kl": 0.012598900124430656, "learning_rate": 2.8435374149659863e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2508 }, { "completion_length": 168.07144165039062, "epoch": 0.1593015873015873, "grad_norm": 0.006345550063997507, "kl": 0.015322254039347172, "learning_rate": 2.844671201814059e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2509 }, { "completion_length": 128.2857208251953, "epoch": 0.15936507936507938, "grad_norm": 0.00720905652269721, "kl": 0.018503984436392784, "learning_rate": 2.8458049886621314e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2510 }, { "completion_length": 145.21429443359375, "epoch": 0.15942857142857142, "grad_norm": 0.005724634509533644, "kl": 0.015638938173651695, "learning_rate": 2.846938775510204e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2511 }, { "completion_length": 190.92857360839844, "epoch": 0.1594920634920635, "grad_norm": 0.003499699058011174, "kl": 0.010476590134203434, "learning_rate": 2.8480725623582765e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2512 }, { "completion_length": 208.21429443359375, "epoch": 0.15955555555555556, "grad_norm": 0.004175899084657431, "kl": 0.010182100348174572, "learning_rate": 2.849206349206349e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2513 }, { "completion_length": 145.92857360839844, "epoch": 0.15961904761904763, "grad_norm": 0.0057732341811060905, "kl": 0.01390150934457779, "learning_rate": 2.8503401360544216e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2514 }, { "completion_length": 154.35714721679688, "epoch": 0.15968253968253968, "grad_norm": 0.0048589445650577545, "kl": 0.012585263699293137, "learning_rate": 2.8514739229024944e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2515 }, { "completion_length": 180.42857360839844, "epoch": 0.15974603174603175, "grad_norm": 0.0061038946732878685, "kl": 0.014618615619838238, "learning_rate": 2.8526077097505667e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2516 }, { "completion_length": 193.2857208251953, "epoch": 0.15980952380952382, "grad_norm": 0.003530892776325345, "kl": 0.009569290094077587, "learning_rate": 2.8537414965986395e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2517 }, { "completion_length": 176.57144165039062, "epoch": 0.15987301587301586, "grad_norm": 0.004403631202876568, "kl": 0.012481722980737686, "learning_rate": 2.8548752834467124e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2518 }, { "completion_length": 136.57144165039062, "epoch": 0.15993650793650793, "grad_norm": 0.004771662876009941, "kl": 0.01785176806151867, "learning_rate": 2.856009070294784e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2519 }, { "completion_length": 193.7857208251953, "epoch": 0.16, "grad_norm": 0.0043449620716273785, "kl": 0.011070135049521923, "learning_rate": 2.857142857142857e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2520 }, { "completion_length": 195.6428680419922, "epoch": 0.16006349206349207, "grad_norm": 0.0029407276306301355, "kl": 0.007885847240686417, "learning_rate": 2.85827664399093e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2521 }, { "completion_length": 176.71429443359375, "epoch": 0.16012698412698412, "grad_norm": 1.144533634185791, "kl": 0.012056292966008186, "learning_rate": 2.859410430839002e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 2522 }, { "completion_length": 165.07144165039062, "epoch": 0.1601904761904762, "grad_norm": 0.004340966232120991, "kl": 0.011464199982583523, "learning_rate": 2.860544217687075e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2523 }, { "completion_length": 170.07144165039062, "epoch": 0.16025396825396826, "grad_norm": 0.004361278843134642, "kl": 0.01142684556543827, "learning_rate": 2.861678004535147e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2524 }, { "completion_length": 144.35714721679688, "epoch": 0.16031746031746033, "grad_norm": 0.007980715483427048, "kl": 0.019042855128645897, "learning_rate": 2.86281179138322e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2525 }, { "completion_length": 156.1428680419922, "epoch": 0.16038095238095237, "grad_norm": 0.009088022634387016, "kl": 0.01790463924407959, "learning_rate": 2.8639455782312923e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2526 }, { "completion_length": 161.6428680419922, "epoch": 0.16044444444444445, "grad_norm": 0.005129717756062746, "kl": 0.014758173376321793, "learning_rate": 2.8650793650793646e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2527 }, { "completion_length": 127.78572082519531, "epoch": 0.16050793650793652, "grad_norm": 1.3204278945922852, "kl": 0.013166249729692936, "learning_rate": 2.8662131519274374e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 2528 }, { "completion_length": 148.71429443359375, "epoch": 0.16057142857142856, "grad_norm": 0.0059279510751366615, "kl": 0.014185250736773014, "learning_rate": 2.86734693877551e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2529 }, { "completion_length": 182.7857208251953, "epoch": 0.16063492063492063, "grad_norm": 0.004230824299156666, "kl": 0.011407499201595783, "learning_rate": 2.8684807256235825e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2530 }, { "completion_length": 158.92857360839844, "epoch": 0.1606984126984127, "grad_norm": 0.00407894654199481, "kl": 0.009876396507024765, "learning_rate": 2.8696145124716553e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2531 }, { "completion_length": 168.71429443359375, "epoch": 0.16076190476190477, "grad_norm": 0.0047871447168290615, "kl": 0.01270793005824089, "learning_rate": 2.870748299319728e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2532 }, { "completion_length": 178.1428680419922, "epoch": 0.16082539682539682, "grad_norm": 0.004276186693459749, "kl": 0.009887333028018475, "learning_rate": 2.8718820861678004e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2533 }, { "completion_length": 189.7857208251953, "epoch": 0.1608888888888889, "grad_norm": 0.0043959105387330055, "kl": 0.012772130779922009, "learning_rate": 2.8730158730158727e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2534 }, { "completion_length": 197.35714721679688, "epoch": 0.16095238095238096, "grad_norm": 0.0029079345986247063, "kl": 0.008213315159082413, "learning_rate": 2.8741496598639455e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2535 }, { "completion_length": 149.42857360839844, "epoch": 0.16101587301587303, "grad_norm": 0.006857023574411869, "kl": 0.014451732859015465, "learning_rate": 2.875283446712018e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2536 }, { "completion_length": 123.35714721679688, "epoch": 0.16107936507936507, "grad_norm": 0.006342204287648201, "kl": 0.014531144872307777, "learning_rate": 2.8764172335600906e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2537 }, { "completion_length": 177.85714721679688, "epoch": 0.16114285714285714, "grad_norm": 0.003044905373826623, "kl": 0.008499100804328918, "learning_rate": 2.8775510204081634e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2538 }, { "completion_length": 146.71429443359375, "epoch": 0.16120634920634921, "grad_norm": 0.003663947805762291, "kl": 0.009123561903834343, "learning_rate": 2.878684807256236e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2539 }, { "completion_length": 185.85714721679688, "epoch": 0.16126984126984126, "grad_norm": 0.003495515789836645, "kl": 0.009242947213351727, "learning_rate": 2.8798185941043086e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2540 }, { "completion_length": 138.92857360839844, "epoch": 0.16133333333333333, "grad_norm": 0.9992606043815613, "kl": 0.01047720666974783, "learning_rate": 2.8809523809523803e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 2541 }, { "completion_length": 143.7857208251953, "epoch": 0.1613968253968254, "grad_norm": 0.004574310965836048, "kl": 0.010670581832528114, "learning_rate": 2.882086167800453e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2542 }, { "completion_length": 173.6428680419922, "epoch": 0.16146031746031747, "grad_norm": 0.004321102052927017, "kl": 0.010175577364861965, "learning_rate": 2.883219954648526e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2543 }, { "completion_length": 172.85714721679688, "epoch": 0.16152380952380951, "grad_norm": 0.0033746748231351376, "kl": 0.009414403699338436, "learning_rate": 2.884353741496598e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2544 }, { "completion_length": 187.2857208251953, "epoch": 0.16158730158730158, "grad_norm": 0.004174704197794199, "kl": 0.010557129047811031, "learning_rate": 2.885487528344671e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2545 }, { "completion_length": 176.07144165039062, "epoch": 0.16165079365079366, "grad_norm": 0.005743754096329212, "kl": 0.011951755732297897, "learning_rate": 2.886621315192744e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2546 }, { "completion_length": 148.85714721679688, "epoch": 0.16171428571428573, "grad_norm": 0.005360489245504141, "kl": 0.014872007071971893, "learning_rate": 2.887755102040816e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2547 }, { "completion_length": 137.92857360839844, "epoch": 0.16177777777777777, "grad_norm": 0.0044631678611040115, "kl": 0.01131383515894413, "learning_rate": 2.8888888888888885e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2548 }, { "completion_length": 169.0, "epoch": 0.16184126984126984, "grad_norm": 0.003168208757415414, "kl": 0.009647432714700699, "learning_rate": 2.8900226757369613e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2549 }, { "completion_length": 183.21429443359375, "epoch": 0.1619047619047619, "grad_norm": 0.0030243319924920797, "kl": 0.007353528402745724, "learning_rate": 2.8911564625850336e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2550 }, { "completion_length": 173.50001525878906, "epoch": 0.16196825396825396, "grad_norm": 0.004727032035589218, "kl": 0.011257758364081383, "learning_rate": 2.8922902494331064e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2551 }, { "completion_length": 189.1428680419922, "epoch": 0.16203174603174603, "grad_norm": 0.004628859926015139, "kl": 0.010352392680943012, "learning_rate": 2.893424036281179e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2552 }, { "completion_length": 147.21429443359375, "epoch": 0.1620952380952381, "grad_norm": 0.006249860394746065, "kl": 0.01312271598726511, "learning_rate": 2.8945578231292515e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2553 }, { "completion_length": 155.1428680419922, "epoch": 0.16215873015873017, "grad_norm": 0.005754463374614716, "kl": 0.012524492107331753, "learning_rate": 2.8956916099773243e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2554 }, { "completion_length": 182.85714721679688, "epoch": 0.1622222222222222, "grad_norm": 0.004287314135581255, "kl": 0.009754667989909649, "learning_rate": 2.896825396825397e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2555 }, { "completion_length": 169.71429443359375, "epoch": 0.16228571428571428, "grad_norm": 0.0036828448064625263, "kl": 0.010078269056975842, "learning_rate": 2.897959183673469e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2556 }, { "completion_length": 176.21429443359375, "epoch": 0.16234920634920635, "grad_norm": 0.004276298452168703, "kl": 0.01071044523268938, "learning_rate": 2.8990929705215417e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2557 }, { "completion_length": 145.42857360839844, "epoch": 0.16241269841269843, "grad_norm": 0.006575235165655613, "kl": 0.013771943747997284, "learning_rate": 2.9002267573696145e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2558 }, { "completion_length": 144.5, "epoch": 0.16247619047619047, "grad_norm": 0.006292229052633047, "kl": 0.012642939575016499, "learning_rate": 2.901360544217687e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2559 }, { "completion_length": 191.6428680419922, "epoch": 0.16253968253968254, "grad_norm": 0.006101664621382952, "kl": 0.013441424816846848, "learning_rate": 2.9024943310657596e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2560 }, { "completion_length": 137.42857360839844, "epoch": 0.1626031746031746, "grad_norm": 0.0061111957766115665, "kl": 0.015576936304569244, "learning_rate": 2.9036281179138325e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2561 }, { "completion_length": 219.00001525878906, "epoch": 0.16266666666666665, "grad_norm": 0.8278862833976746, "kl": 0.008796993643045425, "learning_rate": 2.904761904761905e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 2562 }, { "completion_length": 177.2857208251953, "epoch": 0.16273015873015872, "grad_norm": 0.005260159727185965, "kl": 0.012686762027442455, "learning_rate": 2.905895691609977e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2563 }, { "completion_length": 171.71429443359375, "epoch": 0.1627936507936508, "grad_norm": 0.006735906470566988, "kl": 0.01460498757660389, "learning_rate": 2.9070294784580493e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2564 }, { "completion_length": 122.50000762939453, "epoch": 0.16285714285714287, "grad_norm": 0.007686985190957785, "kl": 0.01989186927676201, "learning_rate": 2.908163265306122e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2565 }, { "completion_length": 170.57144165039062, "epoch": 0.1629206349206349, "grad_norm": 0.022846922278404236, "kl": 0.03010108321905136, "learning_rate": 2.909297052154195e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2566 }, { "completion_length": 195.00001525878906, "epoch": 0.16298412698412698, "grad_norm": 0.005701570305973291, "kl": 0.012954470701515675, "learning_rate": 2.910430839002267e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2567 }, { "completion_length": 147.71429443359375, "epoch": 0.16304761904761905, "grad_norm": 0.009676119312644005, "kl": 0.018601614981889725, "learning_rate": 2.91156462585034e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2568 }, { "completion_length": 141.85714721679688, "epoch": 0.16311111111111112, "grad_norm": 0.0073505607433617115, "kl": 0.01762889325618744, "learning_rate": 2.912698412698413e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2569 }, { "completion_length": 168.1428680419922, "epoch": 0.16317460317460317, "grad_norm": 0.0075300633907318115, "kl": 0.015620460733771324, "learning_rate": 2.913832199546485e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2570 }, { "completion_length": 182.85714721679688, "epoch": 0.16323809523809524, "grad_norm": 0.00732151186093688, "kl": 0.015346971340477467, "learning_rate": 2.9149659863945575e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2571 }, { "completion_length": 166.5, "epoch": 0.1633015873015873, "grad_norm": 0.006729744840413332, "kl": 0.01471942663192749, "learning_rate": 2.9160997732426303e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2572 }, { "completion_length": 192.57144165039062, "epoch": 0.16336507936507935, "grad_norm": 0.0036128514911979437, "kl": 0.010107328183948994, "learning_rate": 2.9172335600907026e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2573 }, { "completion_length": 165.1428680419922, "epoch": 0.16342857142857142, "grad_norm": 0.006915684789419174, "kl": 0.018071511760354042, "learning_rate": 2.9183673469387754e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2574 }, { "completion_length": 143.92857360839844, "epoch": 0.1634920634920635, "grad_norm": 0.008529987186193466, "kl": 0.019904835149645805, "learning_rate": 2.919501133786848e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2575 }, { "completion_length": 158.21429443359375, "epoch": 0.16355555555555557, "grad_norm": 0.005473616067320108, "kl": 0.012771950103342533, "learning_rate": 2.9206349206349205e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2576 }, { "completion_length": 156.2857208251953, "epoch": 0.1636190476190476, "grad_norm": 0.005369608290493488, "kl": 0.014072244055569172, "learning_rate": 2.9217687074829933e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2577 }, { "completion_length": 124.64286041259766, "epoch": 0.16368253968253968, "grad_norm": 0.007752115838229656, "kl": 0.018592502921819687, "learning_rate": 2.9229024943310656e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2578 }, { "completion_length": 160.85714721679688, "epoch": 0.16374603174603175, "grad_norm": 0.008839850313961506, "kl": 0.018864566460251808, "learning_rate": 2.924036281179138e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2579 }, { "completion_length": 155.2857208251953, "epoch": 0.16380952380952382, "grad_norm": 0.9855409860610962, "kl": 0.019848087802529335, "learning_rate": 2.925170068027211e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 2580 }, { "completion_length": 111.5714340209961, "epoch": 0.16387301587301586, "grad_norm": 0.011074461042881012, "kl": 0.025213118642568588, "learning_rate": 2.9263038548752836e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2581 }, { "completion_length": 146.21429443359375, "epoch": 0.16393650793650794, "grad_norm": 0.013796920888125896, "kl": 0.02516474947333336, "learning_rate": 2.927437641723356e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2582 }, { "completion_length": 166.85714721679688, "epoch": 0.164, "grad_norm": 0.0067809089086949825, "kl": 0.01566964015364647, "learning_rate": 2.9285714285714287e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2583 }, { "completion_length": 161.5, "epoch": 0.16406349206349205, "grad_norm": 0.006654613185673952, "kl": 0.01818276382982731, "learning_rate": 2.929705215419501e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2584 }, { "completion_length": 184.7857208251953, "epoch": 0.16412698412698412, "grad_norm": 0.0048810504376888275, "kl": 0.01285354234278202, "learning_rate": 2.930839002267573e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2585 }, { "completion_length": 162.07144165039062, "epoch": 0.1641904761904762, "grad_norm": 0.007067425642162561, "kl": 0.015109626576304436, "learning_rate": 2.931972789115646e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2586 }, { "completion_length": 153.2857208251953, "epoch": 0.16425396825396826, "grad_norm": 0.0073658437468111515, "kl": 0.01626644842326641, "learning_rate": 2.9331065759637184e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2587 }, { "completion_length": 156.35714721679688, "epoch": 0.1643174603174603, "grad_norm": 0.005699957255274057, "kl": 0.01380081195384264, "learning_rate": 2.934240362811791e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2588 }, { "completion_length": 151.57144165039062, "epoch": 0.16438095238095238, "grad_norm": 0.008062641136348248, "kl": 0.01614467054605484, "learning_rate": 2.935374149659864e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2589 }, { "completion_length": 182.85714721679688, "epoch": 0.16444444444444445, "grad_norm": 0.005340513773262501, "kl": 0.013255695812404156, "learning_rate": 2.9365079365079363e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2590 }, { "completion_length": 183.7857208251953, "epoch": 0.16450793650793652, "grad_norm": 0.006157460156828165, "kl": 0.01405271701514721, "learning_rate": 2.937641723356009e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2591 }, { "completion_length": 127.64286041259766, "epoch": 0.16457142857142856, "grad_norm": 0.005763099063187838, "kl": 0.0132792629301548, "learning_rate": 2.938775510204082e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2592 }, { "completion_length": 219.1428680419922, "epoch": 0.16463492063492063, "grad_norm": 0.003861556760966778, "kl": 0.009692630730569363, "learning_rate": 2.9399092970521537e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2593 }, { "completion_length": 178.35714721679688, "epoch": 0.1646984126984127, "grad_norm": 0.006494323723018169, "kl": 0.014957671985030174, "learning_rate": 2.9410430839002265e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2594 }, { "completion_length": 168.85714721679688, "epoch": 0.16476190476190475, "grad_norm": 0.005362838506698608, "kl": 0.012540881521999836, "learning_rate": 2.9421768707482993e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2595 }, { "completion_length": 209.92857360839844, "epoch": 0.16482539682539682, "grad_norm": 0.004776429384946823, "kl": 0.011363985016942024, "learning_rate": 2.9433106575963716e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2596 }, { "completion_length": 155.6428680419922, "epoch": 0.1648888888888889, "grad_norm": 0.010508417151868343, "kl": 0.01818324439227581, "learning_rate": 2.9444444444444444e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2597 }, { "completion_length": 164.21429443359375, "epoch": 0.16495238095238096, "grad_norm": 0.006722734309732914, "kl": 0.013325407169759274, "learning_rate": 2.945578231292517e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2598 }, { "completion_length": 189.92857360839844, "epoch": 0.165015873015873, "grad_norm": 0.004988372791558504, "kl": 0.012529578059911728, "learning_rate": 2.9467120181405895e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2599 }, { "completion_length": 164.42857360839844, "epoch": 0.16507936507936508, "grad_norm": 0.005784476641565561, "kl": 0.01454908400774002, "learning_rate": 2.947845804988662e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2600 }, { "completion_length": 183.07144165039062, "epoch": 0.16514285714285715, "grad_norm": 0.005107516422867775, "kl": 0.01240130327641964, "learning_rate": 2.9489795918367346e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2601 }, { "completion_length": 151.35714721679688, "epoch": 0.16520634920634922, "grad_norm": 0.006738363299518824, "kl": 0.015778621658682823, "learning_rate": 2.950113378684807e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2602 }, { "completion_length": 152.92857360839844, "epoch": 0.16526984126984126, "grad_norm": 0.007333248388022184, "kl": 0.015785519033670425, "learning_rate": 2.95124716553288e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2603 }, { "completion_length": 180.1428680419922, "epoch": 0.16533333333333333, "grad_norm": 0.006891077850013971, "kl": 0.012912155129015446, "learning_rate": 2.952380952380952e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2604 }, { "completion_length": 162.42857360839844, "epoch": 0.1653968253968254, "grad_norm": 0.010691022500395775, "kl": 0.018751632422208786, "learning_rate": 2.953514739229025e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2605 }, { "completion_length": 158.85714721679688, "epoch": 0.16546031746031745, "grad_norm": 0.005171972326934338, "kl": 0.013636098243296146, "learning_rate": 2.9546485260770977e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2606 }, { "completion_length": 136.5, "epoch": 0.16552380952380952, "grad_norm": 0.010656501166522503, "kl": 0.017214186489582062, "learning_rate": 2.9557823129251694e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2607 }, { "completion_length": 168.85714721679688, "epoch": 0.1655873015873016, "grad_norm": 0.005199051462113857, "kl": 0.013560253195464611, "learning_rate": 2.956916099773242e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2608 }, { "completion_length": 152.7857208251953, "epoch": 0.16565079365079366, "grad_norm": 0.00896392296999693, "kl": 0.01651361770927906, "learning_rate": 2.958049886621315e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2609 }, { "completion_length": 156.0, "epoch": 0.1657142857142857, "grad_norm": 0.004507048055529594, "kl": 0.011166817508637905, "learning_rate": 2.9591836734693874e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2610 }, { "completion_length": 164.42857360839844, "epoch": 0.16577777777777777, "grad_norm": 0.004456795286387205, "kl": 0.01200143713504076, "learning_rate": 2.96031746031746e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2611 }, { "completion_length": 151.0, "epoch": 0.16584126984126984, "grad_norm": 0.004560511093586683, "kl": 0.011600865051150322, "learning_rate": 2.961451247165533e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2612 }, { "completion_length": 181.85714721679688, "epoch": 0.16590476190476192, "grad_norm": 0.8316294550895691, "kl": 0.009576169773936272, "learning_rate": 2.9625850340136053e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 2613 }, { "completion_length": 205.50001525878906, "epoch": 0.16596825396825396, "grad_norm": 0.0041705225594341755, "kl": 0.011247938498854637, "learning_rate": 2.963718820861678e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2614 }, { "completion_length": 178.50001525878906, "epoch": 0.16603174603174603, "grad_norm": 0.6312456130981445, "kl": 0.009401822462677956, "learning_rate": 2.9648526077097504e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 2615 }, { "completion_length": 159.57144165039062, "epoch": 0.1660952380952381, "grad_norm": 0.005162945948541164, "kl": 0.012063086964190006, "learning_rate": 2.9659863945578227e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2616 }, { "completion_length": 177.6428680419922, "epoch": 0.16615873015873017, "grad_norm": 0.007386797573417425, "kl": 0.01283213123679161, "learning_rate": 2.9671201814058955e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2617 }, { "completion_length": 190.71429443359375, "epoch": 0.16622222222222222, "grad_norm": 0.003955415450036526, "kl": 0.009590287692844868, "learning_rate": 2.9682539682539683e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2618 }, { "completion_length": 138.92857360839844, "epoch": 0.1662857142857143, "grad_norm": 0.005472627002745867, "kl": 0.011953871697187424, "learning_rate": 2.9693877551020406e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2619 }, { "completion_length": 154.0, "epoch": 0.16634920634920636, "grad_norm": 0.004582131747156382, "kl": 0.010693286545574665, "learning_rate": 2.9705215419501134e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2620 }, { "completion_length": 187.50001525878906, "epoch": 0.1664126984126984, "grad_norm": 0.005641478579491377, "kl": 0.01291041262447834, "learning_rate": 2.9716553287981863e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2621 }, { "completion_length": 161.2857208251953, "epoch": 0.16647619047619047, "grad_norm": 0.005876719020307064, "kl": 0.010963535867631435, "learning_rate": 2.972789115646258e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2622 }, { "completion_length": 167.85714721679688, "epoch": 0.16653968253968254, "grad_norm": 0.005580723751336336, "kl": 0.010630938224494457, "learning_rate": 2.973922902494331e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2623 }, { "completion_length": 171.07144165039062, "epoch": 0.16660317460317461, "grad_norm": 0.004408146254718304, "kl": 0.011326557025313377, "learning_rate": 2.975056689342403e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2624 }, { "completion_length": 171.85714721679688, "epoch": 0.16666666666666666, "grad_norm": 0.006808430887758732, "kl": 0.013633651658892632, "learning_rate": 2.976190476190476e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2625 }, { "completion_length": 149.2857208251953, "epoch": 0.16673015873015873, "grad_norm": 0.006522388197481632, "kl": 0.0136649701744318, "learning_rate": 2.977324263038549e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2626 }, { "completion_length": 151.0, "epoch": 0.1667936507936508, "grad_norm": 0.007130773738026619, "kl": 0.014047292992472649, "learning_rate": 2.978458049886621e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2627 }, { "completion_length": 141.42857360839844, "epoch": 0.16685714285714287, "grad_norm": 0.007238809484988451, "kl": 0.017045075073838234, "learning_rate": 2.979591836734694e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2628 }, { "completion_length": 157.21429443359375, "epoch": 0.1669206349206349, "grad_norm": 0.006220079492777586, "kl": 0.013770290650427341, "learning_rate": 2.9807256235827667e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2629 }, { "completion_length": 127.85714721679688, "epoch": 0.16698412698412698, "grad_norm": 0.006154390051960945, "kl": 0.015629565343260765, "learning_rate": 2.9818594104308385e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2630 }, { "completion_length": 177.00001525878906, "epoch": 0.16704761904761906, "grad_norm": 0.004973144270479679, "kl": 0.010781463235616684, "learning_rate": 2.9829931972789113e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2631 }, { "completion_length": 129.92857360839844, "epoch": 0.1671111111111111, "grad_norm": 0.005508532281965017, "kl": 0.013174813240766525, "learning_rate": 2.984126984126984e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2632 }, { "completion_length": 127.0714340209961, "epoch": 0.16717460317460317, "grad_norm": 0.007830125279724598, "kl": 0.01621994376182556, "learning_rate": 2.9852607709750564e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2633 }, { "completion_length": 170.92857360839844, "epoch": 0.16723809523809524, "grad_norm": 0.0060891867615282536, "kl": 0.012880195863544941, "learning_rate": 2.986394557823129e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2634 }, { "completion_length": 206.00001525878906, "epoch": 0.1673015873015873, "grad_norm": 0.004134963266551495, "kl": 0.009419403038918972, "learning_rate": 2.987528344671202e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2635 }, { "completion_length": 180.92857360839844, "epoch": 0.16736507936507936, "grad_norm": 0.0043551744893193245, "kl": 0.009310735389590263, "learning_rate": 2.9886621315192743e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2636 }, { "completion_length": 134.85714721679688, "epoch": 0.16742857142857143, "grad_norm": 0.0052039735019207, "kl": 0.010496838018298149, "learning_rate": 2.9897959183673466e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2637 }, { "completion_length": 158.71429443359375, "epoch": 0.1674920634920635, "grad_norm": 0.007037161383777857, "kl": 0.01521303690969944, "learning_rate": 2.9909297052154194e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2638 }, { "completion_length": 158.35714721679688, "epoch": 0.16755555555555557, "grad_norm": 0.007092001382261515, "kl": 0.014354039914906025, "learning_rate": 2.9920634920634917e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2639 }, { "completion_length": 164.57144165039062, "epoch": 0.1676190476190476, "grad_norm": 0.00464983144775033, "kl": 0.01098391693085432, "learning_rate": 2.9931972789115645e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2640 }, { "completion_length": 178.6428680419922, "epoch": 0.16768253968253968, "grad_norm": 0.005818421021103859, "kl": 0.010590909980237484, "learning_rate": 2.9943310657596374e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2641 }, { "completion_length": 175.2857208251953, "epoch": 0.16774603174603175, "grad_norm": 0.005249812267720699, "kl": 0.010586732067167759, "learning_rate": 2.9954648526077096e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2642 }, { "completion_length": 206.92857360839844, "epoch": 0.1678095238095238, "grad_norm": 0.0031355286482721567, "kl": 0.008230926468968391, "learning_rate": 2.9965986394557825e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2643 }, { "completion_length": 194.2857208251953, "epoch": 0.16787301587301587, "grad_norm": 0.005376467946916819, "kl": 0.010051513090729713, "learning_rate": 2.997732426303854e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2644 }, { "completion_length": 150.71429443359375, "epoch": 0.16793650793650794, "grad_norm": 0.004430555738508701, "kl": 0.011476384475827217, "learning_rate": 2.998866213151927e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2645 }, { "completion_length": 159.57144165039062, "epoch": 0.168, "grad_norm": 0.003826187225058675, "kl": 0.009888524189591408, "learning_rate": 3e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2646 }, { "completion_length": 182.92857360839844, "epoch": 0.16806349206349205, "grad_norm": 0.003164019901305437, "kl": 0.008875643834471703, "learning_rate": 3.001133786848072e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2647 }, { "completion_length": 173.6428680419922, "epoch": 0.16812698412698412, "grad_norm": 0.005517400335520506, "kl": 0.010538904927670956, "learning_rate": 3.002267573696145e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2648 }, { "completion_length": 148.21429443359375, "epoch": 0.1681904761904762, "grad_norm": 0.004828088916838169, "kl": 0.009961014613509178, "learning_rate": 3.003401360544218e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2649 }, { "completion_length": 160.1428680419922, "epoch": 0.16825396825396827, "grad_norm": 0.005760341417044401, "kl": 0.012679592706263065, "learning_rate": 3.00453514739229e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2650 }, { "completion_length": 211.7857208251953, "epoch": 0.1683174603174603, "grad_norm": 0.003759572980925441, "kl": 0.008894257247447968, "learning_rate": 3.005668934240363e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2651 }, { "completion_length": 177.2857208251953, "epoch": 0.16838095238095238, "grad_norm": 0.004131522960960865, "kl": 0.008852757513523102, "learning_rate": 3.006802721088435e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2652 }, { "completion_length": 170.07144165039062, "epoch": 0.16844444444444445, "grad_norm": 0.0035433818120509386, "kl": 0.008296024054288864, "learning_rate": 3.0079365079365075e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2653 }, { "completion_length": 177.71429443359375, "epoch": 0.1685079365079365, "grad_norm": 0.005202563479542732, "kl": 0.009647726081311703, "learning_rate": 3.0090702947845803e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2654 }, { "completion_length": 178.7857208251953, "epoch": 0.16857142857142857, "grad_norm": 0.003012152621522546, "kl": 0.00887385569512844, "learning_rate": 3.010204081632653e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2655 }, { "completion_length": 132.5, "epoch": 0.16863492063492064, "grad_norm": 0.0047341217286884785, "kl": 0.011187773197889328, "learning_rate": 3.0113378684807254e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2656 }, { "completion_length": 128.2857208251953, "epoch": 0.1686984126984127, "grad_norm": 0.004951381590217352, "kl": 0.010157261975109577, "learning_rate": 3.012471655328798e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2657 }, { "completion_length": 150.71429443359375, "epoch": 0.16876190476190475, "grad_norm": 0.004006025847047567, "kl": 0.009748004376888275, "learning_rate": 3.013605442176871e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2658 }, { "completion_length": 186.85714721679688, "epoch": 0.16882539682539682, "grad_norm": 0.0038833990693092346, "kl": 0.008741727098822594, "learning_rate": 3.014739229024943e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2659 }, { "completion_length": 172.00001525878906, "epoch": 0.1688888888888889, "grad_norm": 0.007437816355377436, "kl": 0.013510283082723618, "learning_rate": 3.0158730158730156e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2660 }, { "completion_length": 170.21429443359375, "epoch": 0.16895238095238096, "grad_norm": 0.004857530351728201, "kl": 0.011769859120249748, "learning_rate": 3.0170068027210884e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2661 }, { "completion_length": 181.1428680419922, "epoch": 0.169015873015873, "grad_norm": 0.0032330388203263283, "kl": 0.009450960904359818, "learning_rate": 3.018140589569161e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2662 }, { "completion_length": 133.0, "epoch": 0.16907936507936508, "grad_norm": 0.004241641145199537, "kl": 0.012070341035723686, "learning_rate": 3.0192743764172336e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2663 }, { "completion_length": 178.21429443359375, "epoch": 0.16914285714285715, "grad_norm": 0.0033663210924714804, "kl": 0.008537939749658108, "learning_rate": 3.0204081632653064e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2664 }, { "completion_length": 180.1428680419922, "epoch": 0.1692063492063492, "grad_norm": 0.0032944988925009966, "kl": 0.008856451138854027, "learning_rate": 3.0215419501133787e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2665 }, { "completion_length": 150.1428680419922, "epoch": 0.16926984126984126, "grad_norm": 0.006301301997154951, "kl": 0.013645255006849766, "learning_rate": 3.022675736961451e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2666 }, { "completion_length": 158.35714721679688, "epoch": 0.16933333333333334, "grad_norm": 0.00549890985712409, "kl": 0.010356788523495197, "learning_rate": 3.023809523809523e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2667 }, { "completion_length": 204.50001525878906, "epoch": 0.1693968253968254, "grad_norm": 0.003356463275849819, "kl": 0.008515379391610622, "learning_rate": 3.024943310657596e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2668 }, { "completion_length": 162.5, "epoch": 0.16946031746031745, "grad_norm": 0.004184204153716564, "kl": 0.00926054734736681, "learning_rate": 3.026077097505669e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2669 }, { "completion_length": 154.21429443359375, "epoch": 0.16952380952380952, "grad_norm": 0.0034629381261765957, "kl": 0.00827787071466446, "learning_rate": 3.027210884353741e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2670 }, { "completion_length": 153.57144165039062, "epoch": 0.1695873015873016, "grad_norm": 0.002948514884337783, "kl": 0.00808946043252945, "learning_rate": 3.028344671201814e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2671 }, { "completion_length": 163.42857360839844, "epoch": 0.16965079365079366, "grad_norm": 0.005756877828389406, "kl": 0.011104648001492023, "learning_rate": 3.029478458049887e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2672 }, { "completion_length": 145.42857360839844, "epoch": 0.1697142857142857, "grad_norm": 0.004211876541376114, "kl": 0.009882831946015358, "learning_rate": 3.030612244897959e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2673 }, { "completion_length": 198.7857208251953, "epoch": 0.16977777777777778, "grad_norm": 0.004281829576939344, "kl": 0.008112326264381409, "learning_rate": 3.0317460317460314e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2674 }, { "completion_length": 139.2857208251953, "epoch": 0.16984126984126985, "grad_norm": 0.004264074843376875, "kl": 0.010126529261469841, "learning_rate": 3.032879818594104e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2675 }, { "completion_length": 140.2857208251953, "epoch": 0.1699047619047619, "grad_norm": 0.0036444077268242836, "kl": 0.00964280404150486, "learning_rate": 3.0340136054421765e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2676 }, { "completion_length": 177.92857360839844, "epoch": 0.16996825396825396, "grad_norm": 0.003943473566323519, "kl": 0.008225500583648682, "learning_rate": 3.0351473922902493e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2677 }, { "completion_length": 178.85714721679688, "epoch": 0.17003174603174603, "grad_norm": 0.0033019469119608402, "kl": 0.007816605269908905, "learning_rate": 3.036281179138322e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2678 }, { "completion_length": 170.7857208251953, "epoch": 0.1700952380952381, "grad_norm": 0.004886647220700979, "kl": 0.011849082075059414, "learning_rate": 3.0374149659863944e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2679 }, { "completion_length": 184.57144165039062, "epoch": 0.17015873015873015, "grad_norm": 0.003374167252331972, "kl": 0.008013996295630932, "learning_rate": 3.038548752834467e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2680 }, { "completion_length": 147.7857208251953, "epoch": 0.17022222222222222, "grad_norm": 1.4784188270568848, "kl": 0.00933137908577919, "learning_rate": 3.0396825396825395e-07, "loss": 0.0, "reward": 0.1428571492433548, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.1428571492433548, "step": 2681 }, { "completion_length": 161.85714721679688, "epoch": 0.1702857142857143, "grad_norm": 0.0031699971295893192, "kl": 0.0081680528819561, "learning_rate": 3.040816326530612e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2682 }, { "completion_length": 127.42857360839844, "epoch": 0.17034920634920636, "grad_norm": 0.003818029770627618, "kl": 0.009205479174852371, "learning_rate": 3.0419501133786846e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2683 }, { "completion_length": 188.35714721679688, "epoch": 0.1704126984126984, "grad_norm": 0.0035711312666535378, "kl": 0.007602004334330559, "learning_rate": 3.0430839002267575e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2684 }, { "completion_length": 210.1428680419922, "epoch": 0.17047619047619048, "grad_norm": 0.00316626881249249, "kl": 0.00772553076967597, "learning_rate": 3.04421768707483e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2685 }, { "completion_length": 188.35714721679688, "epoch": 0.17053968253968255, "grad_norm": 0.003962690941989422, "kl": 0.0082173440605402, "learning_rate": 3.0453514739229026e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2686 }, { "completion_length": 151.35714721679688, "epoch": 0.1706031746031746, "grad_norm": 0.006076129153370857, "kl": 0.012261495925486088, "learning_rate": 3.046485260770975e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2687 }, { "completion_length": 157.1428680419922, "epoch": 0.17066666666666666, "grad_norm": 0.004573802929371595, "kl": 0.010837501846253872, "learning_rate": 3.0476190476190477e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2688 }, { "completion_length": 148.42857360839844, "epoch": 0.17073015873015873, "grad_norm": 0.004076038021594286, "kl": 0.008984265848994255, "learning_rate": 3.04875283446712e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2689 }, { "completion_length": 175.6428680419922, "epoch": 0.1707936507936508, "grad_norm": 0.003275996306911111, "kl": 0.008561643771827221, "learning_rate": 3.049886621315192e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2690 }, { "completion_length": 172.35714721679688, "epoch": 0.17085714285714285, "grad_norm": 0.0025603165850043297, "kl": 0.007492739707231522, "learning_rate": 3.051020408163265e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2691 }, { "completion_length": 161.07144165039062, "epoch": 0.17092063492063492, "grad_norm": 0.0036388887092471123, "kl": 0.010610440745949745, "learning_rate": 3.052154195011338e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2692 }, { "completion_length": 199.2857208251953, "epoch": 0.170984126984127, "grad_norm": 0.003150409087538719, "kl": 0.008268688805401325, "learning_rate": 3.05328798185941e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2693 }, { "completion_length": 127.35714721679688, "epoch": 0.17104761904761906, "grad_norm": 0.0038907460402697325, "kl": 0.009072455577552319, "learning_rate": 3.054421768707483e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2694 }, { "completion_length": 203.21429443359375, "epoch": 0.1711111111111111, "grad_norm": 0.0019658771343529224, "kl": 0.006360872648656368, "learning_rate": 3.055555555555556e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2695 }, { "completion_length": 198.71429443359375, "epoch": 0.17117460317460317, "grad_norm": 0.0032264343462884426, "kl": 0.007666078861802816, "learning_rate": 3.0566893424036276e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2696 }, { "completion_length": 148.2857208251953, "epoch": 0.17123809523809524, "grad_norm": 0.0033800513483583927, "kl": 0.009922980330884457, "learning_rate": 3.0578231292517004e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2697 }, { "completion_length": 167.1428680419922, "epoch": 0.1713015873015873, "grad_norm": 0.003340506460517645, "kl": 0.008790655992925167, "learning_rate": 3.058956916099773e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2698 }, { "completion_length": 195.00001525878906, "epoch": 0.17136507936507936, "grad_norm": 0.0028583083767443895, "kl": 0.006860272493213415, "learning_rate": 3.0600907029478455e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2699 }, { "completion_length": 166.21429443359375, "epoch": 0.17142857142857143, "grad_norm": 0.0026058503426611423, "kl": 0.00789375975728035, "learning_rate": 3.0612244897959183e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2700 }, { "completion_length": 154.7857208251953, "epoch": 0.1714920634920635, "grad_norm": 0.004213318228721619, "kl": 0.00891057588160038, "learning_rate": 3.062358276643991e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2701 }, { "completion_length": 158.1428680419922, "epoch": 0.17155555555555554, "grad_norm": 0.002567915478721261, "kl": 0.007670023012906313, "learning_rate": 3.0634920634920634e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2702 }, { "completion_length": 172.1428680419922, "epoch": 0.17161904761904762, "grad_norm": 0.002878794213756919, "kl": 0.007608675863593817, "learning_rate": 3.064625850340136e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2703 }, { "completion_length": 206.35714721679688, "epoch": 0.1716825396825397, "grad_norm": 0.002768582431599498, "kl": 0.007079532369971275, "learning_rate": 3.0657596371882086e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2704 }, { "completion_length": 150.35714721679688, "epoch": 0.17174603174603176, "grad_norm": 0.0036613112315535545, "kl": 0.007904799655079842, "learning_rate": 3.066893424036281e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2705 }, { "completion_length": 158.07144165039062, "epoch": 0.1718095238095238, "grad_norm": 0.0027868335600942373, "kl": 0.007870814763009548, "learning_rate": 3.0680272108843537e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2706 }, { "completion_length": 180.07144165039062, "epoch": 0.17187301587301587, "grad_norm": 0.0027667987160384655, "kl": 0.008284691721200943, "learning_rate": 3.069160997732426e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2707 }, { "completion_length": 148.21429443359375, "epoch": 0.17193650793650794, "grad_norm": 0.004355899058282375, "kl": 0.009975342079997063, "learning_rate": 3.070294784580499e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2708 }, { "completion_length": 159.6428680419922, "epoch": 0.172, "grad_norm": 0.0035064334515482187, "kl": 0.008850852958858013, "learning_rate": 3.0714285714285716e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2709 }, { "completion_length": 144.71429443359375, "epoch": 0.17206349206349206, "grad_norm": 0.0031410325318574905, "kl": 0.0070786066353321075, "learning_rate": 3.072562358276644e-07, "loss": 0.0, "reward": 0.1428571492433548, "reward_std": 0.0, "rewards/check_originality_func": 0.1428571492433548, "step": 2710 }, { "completion_length": 131.07144165039062, "epoch": 0.17212698412698413, "grad_norm": 0.005172989331185818, "kl": 0.011495748534798622, "learning_rate": 3.073696145124716e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2711 }, { "completion_length": 178.2857208251953, "epoch": 0.1721904761904762, "grad_norm": 0.0026261615566909313, "kl": 0.006961731240153313, "learning_rate": 3.074829931972789e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2712 }, { "completion_length": 195.57144165039062, "epoch": 0.17225396825396824, "grad_norm": 0.0036733560264110565, "kl": 0.007733347360044718, "learning_rate": 3.0759637188208613e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2713 }, { "completion_length": 156.71429443359375, "epoch": 0.1723174603174603, "grad_norm": 0.0022994596511125565, "kl": 0.006574793718755245, "learning_rate": 3.077097505668934e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2714 }, { "completion_length": 154.35714721679688, "epoch": 0.17238095238095238, "grad_norm": 0.007294307928532362, "kl": 0.012331944890320301, "learning_rate": 3.078231292517007e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2715 }, { "completion_length": 149.7857208251953, "epoch": 0.17244444444444446, "grad_norm": 0.0037675725761801004, "kl": 0.009522506967186928, "learning_rate": 3.079365079365079e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2716 }, { "completion_length": 162.21429443359375, "epoch": 0.1725079365079365, "grad_norm": 0.003030631225556135, "kl": 0.008688762784004211, "learning_rate": 3.080498866213152e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2717 }, { "completion_length": 170.35714721679688, "epoch": 0.17257142857142857, "grad_norm": 0.0033011336345225573, "kl": 0.007663450203835964, "learning_rate": 3.0816326530612243e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2718 }, { "completion_length": 132.57144165039062, "epoch": 0.17263492063492064, "grad_norm": 0.004216620698571205, "kl": 0.010350558906793594, "learning_rate": 3.0827664399092966e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2719 }, { "completion_length": 150.7857208251953, "epoch": 0.1726984126984127, "grad_norm": 0.002827664837241173, "kl": 0.008063587360084057, "learning_rate": 3.0839002267573694e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2720 }, { "completion_length": 167.7857208251953, "epoch": 0.17276190476190476, "grad_norm": 0.003306514583528042, "kl": 0.009519900195300579, "learning_rate": 3.085034013605442e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2721 }, { "completion_length": 163.71429443359375, "epoch": 0.17282539682539683, "grad_norm": 0.0025621887762099504, "kl": 0.0069678002037107944, "learning_rate": 3.0861678004535145e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2722 }, { "completion_length": 173.57144165039062, "epoch": 0.1728888888888889, "grad_norm": 0.0030327471904456615, "kl": 0.0074440850876271725, "learning_rate": 3.0873015873015874e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2723 }, { "completion_length": 176.35714721679688, "epoch": 0.17295238095238094, "grad_norm": 0.002901265164837241, "kl": 0.007195749785751104, "learning_rate": 3.08843537414966e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2724 }, { "completion_length": 155.92857360839844, "epoch": 0.173015873015873, "grad_norm": 0.002964972285553813, "kl": 0.007429227698594332, "learning_rate": 3.089569160997732e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2725 }, { "completion_length": 166.42857360839844, "epoch": 0.17307936507936508, "grad_norm": 0.0024000448174774647, "kl": 0.007335728500038385, "learning_rate": 3.090702947845805e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2726 }, { "completion_length": 172.57144165039062, "epoch": 0.17314285714285715, "grad_norm": 0.002333929529413581, "kl": 0.006554779130965471, "learning_rate": 3.091836734693877e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2727 }, { "completion_length": 204.35714721679688, "epoch": 0.1732063492063492, "grad_norm": 0.003925842698663473, "kl": 0.009588446468114853, "learning_rate": 3.09297052154195e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2728 }, { "completion_length": 162.71429443359375, "epoch": 0.17326984126984127, "grad_norm": 0.0025604714173823595, "kl": 0.0074756755493581295, "learning_rate": 3.0941043083900227e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2729 }, { "completion_length": 188.00001525878906, "epoch": 0.17333333333333334, "grad_norm": 0.002901271916925907, "kl": 0.0075613390654325485, "learning_rate": 3.095238095238095e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2730 }, { "completion_length": 143.57144165039062, "epoch": 0.1733968253968254, "grad_norm": 0.005541389808058739, "kl": 0.012109441682696342, "learning_rate": 3.096371882086168e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2731 }, { "completion_length": 183.00001525878906, "epoch": 0.17346031746031745, "grad_norm": 0.002900712424889207, "kl": 0.007985527627170086, "learning_rate": 3.0975056689342406e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2732 }, { "completion_length": 142.42857360839844, "epoch": 0.17352380952380952, "grad_norm": 0.0023717223666608334, "kl": 0.006560888607054949, "learning_rate": 3.0986394557823124e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2733 }, { "completion_length": 174.92857360839844, "epoch": 0.1735873015873016, "grad_norm": 0.0028798128478229046, "kl": 0.00722519401460886, "learning_rate": 3.099773242630385e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2734 }, { "completion_length": 172.71429443359375, "epoch": 0.17365079365079364, "grad_norm": 0.0037415092810988426, "kl": 0.010255283676087856, "learning_rate": 3.100907029478458e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2735 }, { "completion_length": 163.21429443359375, "epoch": 0.1737142857142857, "grad_norm": 0.002158086746931076, "kl": 0.006170045118778944, "learning_rate": 3.1020408163265303e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2736 }, { "completion_length": 187.92857360839844, "epoch": 0.17377777777777778, "grad_norm": 0.002647924004122615, "kl": 0.007473458535969257, "learning_rate": 3.103174603174603e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2737 }, { "completion_length": 143.07144165039062, "epoch": 0.17384126984126985, "grad_norm": 0.003728864947333932, "kl": 0.010395526885986328, "learning_rate": 3.104308390022676e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2738 }, { "completion_length": 140.5, "epoch": 0.1739047619047619, "grad_norm": 0.00256896554492414, "kl": 0.006868413649499416, "learning_rate": 3.105442176870748e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2739 }, { "completion_length": 169.71429443359375, "epoch": 0.17396825396825397, "grad_norm": 0.002646489068865776, "kl": 0.006550350692123175, "learning_rate": 3.1065759637188205e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2740 }, { "completion_length": 196.1428680419922, "epoch": 0.17403174603174604, "grad_norm": 0.684147834777832, "kl": 0.007329348474740982, "learning_rate": 3.1077097505668933e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 2741 }, { "completion_length": 206.6428680419922, "epoch": 0.1740952380952381, "grad_norm": 0.001834062859416008, "kl": 0.005492504220455885, "learning_rate": 3.1088435374149656e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2742 }, { "completion_length": 134.2857208251953, "epoch": 0.17415873015873015, "grad_norm": 0.0030686401296406984, "kl": 0.008689923211932182, "learning_rate": 3.1099773242630384e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2743 }, { "completion_length": 179.85714721679688, "epoch": 0.17422222222222222, "grad_norm": 0.0024229411501437426, "kl": 0.007672902196645737, "learning_rate": 3.111111111111111e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2744 }, { "completion_length": 165.0, "epoch": 0.1742857142857143, "grad_norm": 0.004396264441311359, "kl": 0.008738099597394466, "learning_rate": 3.1122448979591836e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2745 }, { "completion_length": 159.42857360839844, "epoch": 0.17434920634920634, "grad_norm": 0.0024249847047030926, "kl": 0.007203731685876846, "learning_rate": 3.1133786848072564e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2746 }, { "completion_length": 160.71429443359375, "epoch": 0.1744126984126984, "grad_norm": 0.002616515848785639, "kl": 0.007116646971553564, "learning_rate": 3.114512471655329e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2747 }, { "completion_length": 153.21429443359375, "epoch": 0.17447619047619048, "grad_norm": 0.0026293133851140738, "kl": 0.007504502777010202, "learning_rate": 3.115646258503401e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2748 }, { "completion_length": 155.71429443359375, "epoch": 0.17453968253968255, "grad_norm": 0.0026586679741740227, "kl": 0.007479768246412277, "learning_rate": 3.116780045351474e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2749 }, { "completion_length": 186.6428680419922, "epoch": 0.1746031746031746, "grad_norm": 0.0022985711693763733, "kl": 0.0064835576340556145, "learning_rate": 3.117913832199546e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2750 }, { "completion_length": 141.7857208251953, "epoch": 0.17466666666666666, "grad_norm": 0.002125643892213702, "kl": 0.008121559396386147, "learning_rate": 3.119047619047619e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2751 }, { "completion_length": 138.42857360839844, "epoch": 0.17473015873015874, "grad_norm": 0.0030113670509308577, "kl": 0.008410252630710602, "learning_rate": 3.1201814058956917e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2752 }, { "completion_length": 178.85714721679688, "epoch": 0.1747936507936508, "grad_norm": 0.0026121949777007103, "kl": 0.006896849721670151, "learning_rate": 3.121315192743764e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2753 }, { "completion_length": 208.57144165039062, "epoch": 0.17485714285714285, "grad_norm": 0.0015220671193674207, "kl": 0.005177660845220089, "learning_rate": 3.122448979591837e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2754 }, { "completion_length": 181.07144165039062, "epoch": 0.17492063492063492, "grad_norm": 0.002216907450929284, "kl": 0.005608541890978813, "learning_rate": 3.123582766439909e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2755 }, { "completion_length": 161.7857208251953, "epoch": 0.174984126984127, "grad_norm": 0.002074446063488722, "kl": 0.005581497680395842, "learning_rate": 3.1247165532879814e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2756 }, { "completion_length": 195.42857360839844, "epoch": 0.17504761904761904, "grad_norm": 0.0017718578455969691, "kl": 0.005713738966733217, "learning_rate": 3.125850340136054e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2757 }, { "completion_length": 194.71429443359375, "epoch": 0.1751111111111111, "grad_norm": 0.0017224421026185155, "kl": 0.005530369933694601, "learning_rate": 3.126984126984127e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2758 }, { "completion_length": 154.0, "epoch": 0.17517460317460318, "grad_norm": 0.0016650805482640862, "kl": 0.005260644480586052, "learning_rate": 3.1281179138321993e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2759 }, { "completion_length": 171.21429443359375, "epoch": 0.17523809523809525, "grad_norm": 0.002438819734379649, "kl": 0.00653369864448905, "learning_rate": 3.129251700680272e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2760 }, { "completion_length": 147.6428680419922, "epoch": 0.1753015873015873, "grad_norm": 0.0020437065977603197, "kl": 0.006766598671674728, "learning_rate": 3.130385487528345e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2761 }, { "completion_length": 177.42857360839844, "epoch": 0.17536507936507936, "grad_norm": 0.002148648491129279, "kl": 0.005960961803793907, "learning_rate": 3.1315192743764167e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2762 }, { "completion_length": 150.6428680419922, "epoch": 0.17542857142857143, "grad_norm": 1.8709772825241089, "kl": 0.009147661738097668, "learning_rate": 3.1326530612244895e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 2763 }, { "completion_length": 190.07144165039062, "epoch": 0.1754920634920635, "grad_norm": 0.0022230653557926416, "kl": 0.007092934101819992, "learning_rate": 3.1337868480725624e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2764 }, { "completion_length": 163.5, "epoch": 0.17555555555555555, "grad_norm": 0.0034957383759319782, "kl": 0.00753059284761548, "learning_rate": 3.1349206349206346e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2765 }, { "completion_length": 181.6428680419922, "epoch": 0.17561904761904762, "grad_norm": 0.003149154130369425, "kl": 0.008482078090310097, "learning_rate": 3.1360544217687075e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2766 }, { "completion_length": 157.7857208251953, "epoch": 0.1756825396825397, "grad_norm": 0.0022842020262032747, "kl": 0.006487720180302858, "learning_rate": 3.1371882086167803e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2767 }, { "completion_length": 152.5, "epoch": 0.17574603174603173, "grad_norm": 0.0026176166720688343, "kl": 0.007837203331291676, "learning_rate": 3.1383219954648526e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2768 }, { "completion_length": 158.85714721679688, "epoch": 0.1758095238095238, "grad_norm": 0.0019338973797857761, "kl": 0.00626843748614192, "learning_rate": 3.1394557823129254e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2769 }, { "completion_length": 173.7857208251953, "epoch": 0.17587301587301588, "grad_norm": 0.00203559547662735, "kl": 0.006449601612985134, "learning_rate": 3.140589569160997e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2770 }, { "completion_length": 200.57144165039062, "epoch": 0.17593650793650795, "grad_norm": 0.011354477144777775, "kl": 0.010839114896953106, "learning_rate": 3.14172335600907e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2771 }, { "completion_length": 178.2857208251953, "epoch": 0.176, "grad_norm": 0.0015560979954898357, "kl": 0.004979549441486597, "learning_rate": 3.142857142857143e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2772 }, { "completion_length": 151.07144165039062, "epoch": 0.17606349206349206, "grad_norm": 0.002852032659575343, "kl": 0.007700009737163782, "learning_rate": 3.143990929705215e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2773 }, { "completion_length": 158.92857360839844, "epoch": 0.17612698412698413, "grad_norm": 0.002846925053745508, "kl": 0.008029202930629253, "learning_rate": 3.145124716553288e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2774 }, { "completion_length": 163.7857208251953, "epoch": 0.1761904761904762, "grad_norm": 0.0015417574904859066, "kl": 0.005359954200685024, "learning_rate": 3.1462585034013607e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2775 }, { "completion_length": 157.21429443359375, "epoch": 0.17625396825396825, "grad_norm": 1.3066540956497192, "kl": 0.011164623312652111, "learning_rate": 3.147392290249433e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 2776 }, { "completion_length": 139.6428680419922, "epoch": 0.17631746031746032, "grad_norm": 0.0021975894924253225, "kl": 0.006617252249270678, "learning_rate": 3.1485260770975053e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2777 }, { "completion_length": 169.07144165039062, "epoch": 0.1763809523809524, "grad_norm": 0.0034591308794915676, "kl": 0.009360229596495628, "learning_rate": 3.149659863945578e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2778 }, { "completion_length": 173.07144165039062, "epoch": 0.17644444444444443, "grad_norm": 0.002344265580177307, "kl": 0.005895416252315044, "learning_rate": 3.1507936507936504e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2779 }, { "completion_length": 167.85714721679688, "epoch": 0.1765079365079365, "grad_norm": 0.004605285357683897, "kl": 0.008535600267350674, "learning_rate": 3.151927437641723e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2780 }, { "completion_length": 162.1428680419922, "epoch": 0.17657142857142857, "grad_norm": 0.0019132044399157166, "kl": 0.005823696963489056, "learning_rate": 3.153061224489796e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2781 }, { "completion_length": 166.92857360839844, "epoch": 0.17663492063492064, "grad_norm": 0.0027490672655403614, "kl": 0.00812390074133873, "learning_rate": 3.1541950113378683e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2782 }, { "completion_length": 171.6428680419922, "epoch": 0.1766984126984127, "grad_norm": 0.0018704195972532034, "kl": 0.006239497568458319, "learning_rate": 3.155328798185941e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2783 }, { "completion_length": 152.5, "epoch": 0.17676190476190476, "grad_norm": 0.003306251484900713, "kl": 0.007448541931807995, "learning_rate": 3.1564625850340134e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2784 }, { "completion_length": 170.42857360839844, "epoch": 0.17682539682539683, "grad_norm": 0.0026670314837247133, "kl": 0.007035367656499147, "learning_rate": 3.1575963718820857e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2785 }, { "completion_length": 147.71429443359375, "epoch": 0.1768888888888889, "grad_norm": 0.0025178235955536366, "kl": 0.0068393065594136715, "learning_rate": 3.1587301587301586e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2786 }, { "completion_length": 184.35714721679688, "epoch": 0.17695238095238094, "grad_norm": 0.002307786839082837, "kl": 0.007399542257189751, "learning_rate": 3.159863945578231e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2787 }, { "completion_length": 183.57144165039062, "epoch": 0.17701587301587302, "grad_norm": 0.0023610475473105907, "kl": 0.006564988289028406, "learning_rate": 3.1609977324263037e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2788 }, { "completion_length": 147.57144165039062, "epoch": 0.1770793650793651, "grad_norm": 0.0023376266472041607, "kl": 0.006616110447794199, "learning_rate": 3.1621315192743765e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2789 }, { "completion_length": 181.42857360839844, "epoch": 0.17714285714285713, "grad_norm": 0.006092622876167297, "kl": 0.010166550055146217, "learning_rate": 3.163265306122449e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2790 }, { "completion_length": 168.71429443359375, "epoch": 0.1772063492063492, "grad_norm": 0.0014305714285001159, "kl": 0.004325552377849817, "learning_rate": 3.1643990929705216e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2791 }, { "completion_length": 125.78572082519531, "epoch": 0.17726984126984127, "grad_norm": 0.004981372505426407, "kl": 0.009483183734118938, "learning_rate": 3.165532879818594e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2792 }, { "completion_length": 139.92857360839844, "epoch": 0.17733333333333334, "grad_norm": 0.002590347081422806, "kl": 0.0059608458541333675, "learning_rate": 3.166666666666666e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2793 }, { "completion_length": 139.6428680419922, "epoch": 0.17739682539682539, "grad_norm": 0.002878224477171898, "kl": 0.008017522282898426, "learning_rate": 3.167800453514739e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2794 }, { "completion_length": 155.07144165039062, "epoch": 0.17746031746031746, "grad_norm": 0.0024496258702129126, "kl": 0.007968513295054436, "learning_rate": 3.168934240362812e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2795 }, { "completion_length": 133.5, "epoch": 0.17752380952380953, "grad_norm": 1.516259789466858, "kl": 0.009461412206292152, "learning_rate": 3.170068027210884e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 2796 }, { "completion_length": 138.92857360839844, "epoch": 0.1775873015873016, "grad_norm": 0.002607228234410286, "kl": 0.008245250210165977, "learning_rate": 3.171201814058957e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2797 }, { "completion_length": 186.42857360839844, "epoch": 0.17765079365079364, "grad_norm": 0.0017336508026346564, "kl": 0.0066362605430185795, "learning_rate": 3.17233560090703e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2798 }, { "completion_length": 167.2857208251953, "epoch": 0.1777142857142857, "grad_norm": 0.0020367654506117105, "kl": 0.006942091044038534, "learning_rate": 3.1734693877551015e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2799 }, { "completion_length": 153.2857208251953, "epoch": 0.17777777777777778, "grad_norm": 0.003454189980402589, "kl": 0.008685188367962837, "learning_rate": 3.1746031746031743e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2800 }, { "completion_length": 216.1428680419922, "epoch": 0.17784126984126983, "grad_norm": 0.004796729423105717, "kl": 0.009551119059324265, "learning_rate": 3.175736961451247e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2801 }, { "completion_length": 153.1428680419922, "epoch": 0.1779047619047619, "grad_norm": 0.004473987501114607, "kl": 0.011325249448418617, "learning_rate": 3.1768707482993194e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2802 }, { "completion_length": 164.07144165039062, "epoch": 0.17796825396825397, "grad_norm": 0.0029980631079524755, "kl": 0.007729513570666313, "learning_rate": 3.178004535147392e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2803 }, { "completion_length": 182.07144165039062, "epoch": 0.17803174603174604, "grad_norm": 0.003753709839656949, "kl": 0.009081135503947735, "learning_rate": 3.179138321995465e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2804 }, { "completion_length": 157.07144165039062, "epoch": 0.17809523809523808, "grad_norm": 0.00325739081017673, "kl": 0.008592450991272926, "learning_rate": 3.1802721088435374e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2805 }, { "completion_length": 157.6428680419922, "epoch": 0.17815873015873016, "grad_norm": 0.002915875753387809, "kl": 0.00945357233285904, "learning_rate": 3.18140589569161e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2806 }, { "completion_length": 151.35714721679688, "epoch": 0.17822222222222223, "grad_norm": 0.0035789895337074995, "kl": 0.010247481986880302, "learning_rate": 3.182539682539682e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2807 }, { "completion_length": 146.42857360839844, "epoch": 0.1782857142857143, "grad_norm": 0.0049986387602984905, "kl": 0.014011822640895844, "learning_rate": 3.183673469387755e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2808 }, { "completion_length": 141.92857360839844, "epoch": 0.17834920634920634, "grad_norm": 0.0034259362146258354, "kl": 0.009875732474029064, "learning_rate": 3.1848072562358276e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2809 }, { "completion_length": 162.42857360839844, "epoch": 0.1784126984126984, "grad_norm": 0.0036426193546503782, "kl": 0.009919981472194195, "learning_rate": 3.1859410430839e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2810 }, { "completion_length": 172.1428680419922, "epoch": 0.17847619047619048, "grad_norm": 0.002622442552819848, "kl": 0.007473251316696405, "learning_rate": 3.1870748299319727e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2811 }, { "completion_length": 171.7857208251953, "epoch": 0.17853968253968253, "grad_norm": 0.0036684474907815456, "kl": 0.010050848126411438, "learning_rate": 3.1882086167800455e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2812 }, { "completion_length": 154.71429443359375, "epoch": 0.1786031746031746, "grad_norm": 0.0047467779368162155, "kl": 0.013743267394602299, "learning_rate": 3.189342403628118e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2813 }, { "completion_length": 118.78572082519531, "epoch": 0.17866666666666667, "grad_norm": 0.005985833238810301, "kl": 0.013208780437707901, "learning_rate": 3.19047619047619e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2814 }, { "completion_length": 156.57144165039062, "epoch": 0.17873015873015874, "grad_norm": 0.011424395255744457, "kl": 0.014851164072751999, "learning_rate": 3.191609977324263e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2815 }, { "completion_length": 198.85714721679688, "epoch": 0.17879365079365078, "grad_norm": 0.0032265472691506147, "kl": 0.00913984701037407, "learning_rate": 3.192743764172335e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2816 }, { "completion_length": 168.35714721679688, "epoch": 0.17885714285714285, "grad_norm": 0.00483226403594017, "kl": 0.012242470867931843, "learning_rate": 3.193877551020408e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2817 }, { "completion_length": 159.7857208251953, "epoch": 0.17892063492063492, "grad_norm": 0.004605913534760475, "kl": 0.01318311970680952, "learning_rate": 3.195011337868481e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2818 }, { "completion_length": 150.35714721679688, "epoch": 0.178984126984127, "grad_norm": 0.0030682808719575405, "kl": 0.011281672865152359, "learning_rate": 3.196145124716553e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2819 }, { "completion_length": 158.57144165039062, "epoch": 0.17904761904761904, "grad_norm": 0.0035693065728992224, "kl": 0.010232964530587196, "learning_rate": 3.197278911564626e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2820 }, { "completion_length": 196.1428680419922, "epoch": 0.1791111111111111, "grad_norm": 0.003571723122149706, "kl": 0.010383229702711105, "learning_rate": 3.198412698412698e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2821 }, { "completion_length": 156.71429443359375, "epoch": 0.17917460317460318, "grad_norm": 0.0034872200340032578, "kl": 0.008993093855679035, "learning_rate": 3.1995464852607705e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2822 }, { "completion_length": 172.85714721679688, "epoch": 0.17923809523809525, "grad_norm": 0.0034081246703863144, "kl": 0.008814751170575619, "learning_rate": 3.2006802721088433e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2823 }, { "completion_length": 169.0, "epoch": 0.1793015873015873, "grad_norm": 0.007359639275819063, "kl": 0.016074558719992638, "learning_rate": 3.201814058956916e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2824 }, { "completion_length": 168.6428680419922, "epoch": 0.17936507936507937, "grad_norm": 0.0033169209491461515, "kl": 0.00909382477402687, "learning_rate": 3.2029478458049884e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2825 }, { "completion_length": 168.0, "epoch": 0.17942857142857144, "grad_norm": 0.0036731003783643246, "kl": 0.009944331832230091, "learning_rate": 3.204081632653061e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2826 }, { "completion_length": 148.6428680419922, "epoch": 0.17949206349206348, "grad_norm": 0.00383990490809083, "kl": 0.011854467913508415, "learning_rate": 3.205215419501134e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2827 }, { "completion_length": 212.21429443359375, "epoch": 0.17955555555555555, "grad_norm": 0.0024807145819067955, "kl": 0.007718092296272516, "learning_rate": 3.2063492063492064e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2828 }, { "completion_length": 156.6428680419922, "epoch": 0.17961904761904762, "grad_norm": 0.0036246725358068943, "kl": 0.010031621903181076, "learning_rate": 3.2074829931972787e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2829 }, { "completion_length": 133.7857208251953, "epoch": 0.1796825396825397, "grad_norm": 0.004969700239598751, "kl": 0.013033527880907059, "learning_rate": 3.208616780045351e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2830 }, { "completion_length": 160.5, "epoch": 0.17974603174603174, "grad_norm": 0.0036035114899277687, "kl": 0.00970599427819252, "learning_rate": 3.209750566893424e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2831 }, { "completion_length": 114.0714340209961, "epoch": 0.1798095238095238, "grad_norm": 0.00481334188953042, "kl": 0.014635342173278332, "learning_rate": 3.2108843537414966e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2832 }, { "completion_length": 157.1428680419922, "epoch": 0.17987301587301588, "grad_norm": 0.0043105026707053185, "kl": 0.012022367678582668, "learning_rate": 3.212018140589569e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2833 }, { "completion_length": 125.50000762939453, "epoch": 0.17993650793650795, "grad_norm": 0.006094392854720354, "kl": 0.01705484464764595, "learning_rate": 3.2131519274376417e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2834 }, { "completion_length": 172.2857208251953, "epoch": 0.18, "grad_norm": 0.00585573073476553, "kl": 0.015924464911222458, "learning_rate": 3.2142857142857145e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2835 }, { "completion_length": 155.35714721679688, "epoch": 0.18006349206349206, "grad_norm": 0.0025609955191612244, "kl": 0.007739392574876547, "learning_rate": 3.2154195011337863e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2836 }, { "completion_length": 172.50001525878906, "epoch": 0.18012698412698414, "grad_norm": 0.0031449359375983477, "kl": 0.008775061927735806, "learning_rate": 3.216553287981859e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2837 }, { "completion_length": 194.42857360839844, "epoch": 0.18019047619047618, "grad_norm": 0.003337870817631483, "kl": 0.008411437273025513, "learning_rate": 3.217687074829932e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2838 }, { "completion_length": 207.50001525878906, "epoch": 0.18025396825396825, "grad_norm": 0.0029578066896647215, "kl": 0.008270496502518654, "learning_rate": 3.218820861678004e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2839 }, { "completion_length": 123.21429443359375, "epoch": 0.18031746031746032, "grad_norm": 0.00573744997382164, "kl": 0.014831595122814178, "learning_rate": 3.219954648526077e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2840 }, { "completion_length": 174.2857208251953, "epoch": 0.1803809523809524, "grad_norm": 0.004795794375240803, "kl": 0.012813241221010685, "learning_rate": 3.22108843537415e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2841 }, { "completion_length": 164.35714721679688, "epoch": 0.18044444444444444, "grad_norm": 0.0028668860904872417, "kl": 0.007869507186114788, "learning_rate": 3.222222222222222e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2842 }, { "completion_length": 172.7857208251953, "epoch": 0.1805079365079365, "grad_norm": 0.0031449440866708755, "kl": 0.009272373281419277, "learning_rate": 3.2233560090702944e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2843 }, { "completion_length": 146.85714721679688, "epoch": 0.18057142857142858, "grad_norm": 0.004532573278993368, "kl": 0.009971020743250847, "learning_rate": 3.224489795918367e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2844 }, { "completion_length": 133.92857360839844, "epoch": 0.18063492063492065, "grad_norm": 0.0031701538246124983, "kl": 0.009534825570881367, "learning_rate": 3.2256235827664395e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2845 }, { "completion_length": 114.14286041259766, "epoch": 0.1806984126984127, "grad_norm": 0.005974077619612217, "kl": 0.013954557478427887, "learning_rate": 3.2267573696145124e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2846 }, { "completion_length": 182.42857360839844, "epoch": 0.18076190476190476, "grad_norm": 0.8210514783859253, "kl": 0.007874002680182457, "learning_rate": 3.227891156462585e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 2847 }, { "completion_length": 143.42857360839844, "epoch": 0.18082539682539683, "grad_norm": 0.0037247606087476015, "kl": 0.009034342132508755, "learning_rate": 3.2290249433106575e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2848 }, { "completion_length": 131.7857208251953, "epoch": 0.18088888888888888, "grad_norm": 0.005163985770195723, "kl": 0.015210948884487152, "learning_rate": 3.2301587301587303e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2849 }, { "completion_length": 196.42857360839844, "epoch": 0.18095238095238095, "grad_norm": 0.0029522369150072336, "kl": 0.007749279960989952, "learning_rate": 3.2312925170068026e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2850 }, { "completion_length": 165.7857208251953, "epoch": 0.18101587301587302, "grad_norm": 0.0036298970226198435, "kl": 0.010302454233169556, "learning_rate": 3.232426303854875e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2851 }, { "completion_length": 168.2857208251953, "epoch": 0.1810793650793651, "grad_norm": 0.003824725514277816, "kl": 0.010445589199662209, "learning_rate": 3.2335600907029477e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2852 }, { "completion_length": 173.35714721679688, "epoch": 0.18114285714285713, "grad_norm": 0.004667832050472498, "kl": 0.010745651088654995, "learning_rate": 3.23469387755102e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2853 }, { "completion_length": 146.92857360839844, "epoch": 0.1812063492063492, "grad_norm": 0.004960658494383097, "kl": 0.01514404360204935, "learning_rate": 3.235827664399093e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2854 }, { "completion_length": 143.42857360839844, "epoch": 0.18126984126984128, "grad_norm": 0.0036461728159338236, "kl": 0.010562950745224953, "learning_rate": 3.2369614512471656e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2855 }, { "completion_length": 159.21429443359375, "epoch": 0.18133333333333335, "grad_norm": 0.005006916355341673, "kl": 0.012297994457185268, "learning_rate": 3.238095238095238e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2856 }, { "completion_length": 156.21429443359375, "epoch": 0.1813968253968254, "grad_norm": 0.004711472429335117, "kl": 0.014487686567008495, "learning_rate": 3.2392290249433107e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2857 }, { "completion_length": 190.07144165039062, "epoch": 0.18146031746031746, "grad_norm": 0.004817202687263489, "kl": 0.014374316670000553, "learning_rate": 3.240362811791383e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2858 }, { "completion_length": 187.42857360839844, "epoch": 0.18152380952380953, "grad_norm": 0.0035210626665502787, "kl": 0.009201417677104473, "learning_rate": 3.2414965986394553e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2859 }, { "completion_length": 160.1428680419922, "epoch": 0.18158730158730158, "grad_norm": 0.006953984033316374, "kl": 0.018797509372234344, "learning_rate": 3.242630385487528e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2860 }, { "completion_length": 174.1428680419922, "epoch": 0.18165079365079365, "grad_norm": 0.004103287123143673, "kl": 0.010844985023140907, "learning_rate": 3.243764172335601e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2861 }, { "completion_length": 166.85714721679688, "epoch": 0.18171428571428572, "grad_norm": 0.00568073196336627, "kl": 0.015446086414158344, "learning_rate": 3.244897959183673e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2862 }, { "completion_length": 138.6428680419922, "epoch": 0.1817777777777778, "grad_norm": 0.008421200327575207, "kl": 0.02275875397026539, "learning_rate": 3.246031746031746e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2863 }, { "completion_length": 181.50001525878906, "epoch": 0.18184126984126983, "grad_norm": 0.00402427464723587, "kl": 0.011318814009428024, "learning_rate": 3.247165532879819e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2864 }, { "completion_length": 157.0, "epoch": 0.1819047619047619, "grad_norm": 1.0628548860549927, "kl": 0.01637968048453331, "learning_rate": 3.248299319727891e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 2865 }, { "completion_length": 216.92857360839844, "epoch": 0.18196825396825397, "grad_norm": 0.004159466363489628, "kl": 0.011070078238844872, "learning_rate": 3.2494331065759634e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2866 }, { "completion_length": 150.57144165039062, "epoch": 0.18203174603174604, "grad_norm": 0.0052779982797801495, "kl": 0.015192694962024689, "learning_rate": 3.250566893424036e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2867 }, { "completion_length": 158.21429443359375, "epoch": 0.1820952380952381, "grad_norm": 0.009028611704707146, "kl": 0.0244750939309597, "learning_rate": 3.2517006802721086e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2868 }, { "completion_length": 167.57144165039062, "epoch": 0.18215873015873016, "grad_norm": 0.005842612124979496, "kl": 0.01102933008223772, "learning_rate": 3.2528344671201814e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2869 }, { "completion_length": 185.92857360839844, "epoch": 0.18222222222222223, "grad_norm": 0.0038277108687907457, "kl": 0.010104595683515072, "learning_rate": 3.2539682539682537e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2870 }, { "completion_length": 174.07144165039062, "epoch": 0.18228571428571427, "grad_norm": 0.004176336340606213, "kl": 0.011308815330266953, "learning_rate": 3.2551020408163265e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2871 }, { "completion_length": 167.1428680419922, "epoch": 0.18234920634920634, "grad_norm": 0.005684552248567343, "kl": 0.013023333624005318, "learning_rate": 3.2562358276643993e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2872 }, { "completion_length": 157.42857360839844, "epoch": 0.18241269841269842, "grad_norm": 0.00392892537638545, "kl": 0.011640125885605812, "learning_rate": 3.257369614512471e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2873 }, { "completion_length": 202.57144165039062, "epoch": 0.1824761904761905, "grad_norm": 0.003051703330129385, "kl": 0.008037952706217766, "learning_rate": 3.258503401360544e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2874 }, { "completion_length": 200.1428680419922, "epoch": 0.18253968253968253, "grad_norm": 0.002548755845054984, "kl": 0.006741393823176622, "learning_rate": 3.2596371882086167e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2875 }, { "completion_length": 139.71429443359375, "epoch": 0.1826031746031746, "grad_norm": 0.004217267967760563, "kl": 0.011442061513662338, "learning_rate": 3.260770975056689e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2876 }, { "completion_length": 151.42857360839844, "epoch": 0.18266666666666667, "grad_norm": 0.003546220948919654, "kl": 0.00903021078556776, "learning_rate": 3.261904761904762e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2877 }, { "completion_length": 141.6428680419922, "epoch": 0.18273015873015874, "grad_norm": 0.0063611725345253944, "kl": 0.013487284071743488, "learning_rate": 3.2630385487528346e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2878 }, { "completion_length": 147.5, "epoch": 0.18279365079365079, "grad_norm": 0.005642799660563469, "kl": 0.012964651919901371, "learning_rate": 3.264172335600907e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2879 }, { "completion_length": 153.57144165039062, "epoch": 0.18285714285714286, "grad_norm": 1.9698784351348877, "kl": 0.013757919892668724, "learning_rate": 3.265306122448979e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 2880 }, { "completion_length": 137.5, "epoch": 0.18292063492063493, "grad_norm": 0.007640885654836893, "kl": 0.01587647944688797, "learning_rate": 3.266439909297052e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2881 }, { "completion_length": 162.5, "epoch": 0.18298412698412697, "grad_norm": 0.00485018128529191, "kl": 0.010657576844096184, "learning_rate": 3.2675736961451243e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2882 }, { "completion_length": 173.1428680419922, "epoch": 0.18304761904761904, "grad_norm": 0.0025220203679054976, "kl": 0.00694122351706028, "learning_rate": 3.268707482993197e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2883 }, { "completion_length": 184.85714721679688, "epoch": 0.1831111111111111, "grad_norm": 0.005178715568035841, "kl": 0.010831540450453758, "learning_rate": 3.26984126984127e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2884 }, { "completion_length": 127.64286041259766, "epoch": 0.18317460317460318, "grad_norm": 0.004008207470178604, "kl": 0.00926487147808075, "learning_rate": 3.270975056689342e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2885 }, { "completion_length": 188.57144165039062, "epoch": 0.18323809523809523, "grad_norm": 0.0030890158377587795, "kl": 0.007959803566336632, "learning_rate": 3.272108843537415e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2886 }, { "completion_length": 149.07144165039062, "epoch": 0.1833015873015873, "grad_norm": 0.004062964580953121, "kl": 0.009781192056834698, "learning_rate": 3.273242630385488e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2887 }, { "completion_length": 200.7857208251953, "epoch": 0.18336507936507937, "grad_norm": 0.002372465329244733, "kl": 0.007110225968062878, "learning_rate": 3.2743764172335596e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2888 }, { "completion_length": 160.42857360839844, "epoch": 0.18342857142857144, "grad_norm": 0.0028083547949790955, "kl": 0.007202044129371643, "learning_rate": 3.2755102040816325e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2889 }, { "completion_length": 116.64286041259766, "epoch": 0.18349206349206348, "grad_norm": 0.003740380285307765, "kl": 0.00869186781346798, "learning_rate": 3.276643990929705e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2890 }, { "completion_length": 181.00001525878906, "epoch": 0.18355555555555556, "grad_norm": 0.0024617905728518963, "kl": 0.007925199344754219, "learning_rate": 3.2777777777777776e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2891 }, { "completion_length": 181.2857208251953, "epoch": 0.18361904761904763, "grad_norm": 0.0022742589935660362, "kl": 0.0067085521295666695, "learning_rate": 3.2789115646258504e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2892 }, { "completion_length": 175.50001525878906, "epoch": 0.18368253968253967, "grad_norm": 0.002266460796818137, "kl": 0.006358427926898003, "learning_rate": 3.2800453514739227e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2893 }, { "completion_length": 175.42857360839844, "epoch": 0.18374603174603174, "grad_norm": 0.0020143610890954733, "kl": 0.005979794077575207, "learning_rate": 3.2811791383219955e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2894 }, { "completion_length": 167.21429443359375, "epoch": 0.1838095238095238, "grad_norm": 0.0028473716229200363, "kl": 0.006793315056711435, "learning_rate": 3.282312925170068e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2895 }, { "completion_length": 139.7857208251953, "epoch": 0.18387301587301588, "grad_norm": 0.002267645439133048, "kl": 0.006298610474914312, "learning_rate": 3.28344671201814e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2896 }, { "completion_length": 215.07144165039062, "epoch": 0.18393650793650793, "grad_norm": 0.002214154927060008, "kl": 0.006169470027089119, "learning_rate": 3.284580498866213e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2897 }, { "completion_length": 160.7857208251953, "epoch": 0.184, "grad_norm": 0.002659679390490055, "kl": 0.007644101977348328, "learning_rate": 3.2857142857142857e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2898 }, { "completion_length": 156.21429443359375, "epoch": 0.18406349206349207, "grad_norm": 0.0027838244568556547, "kl": 0.007399755530059338, "learning_rate": 3.286848072562358e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2899 }, { "completion_length": 193.21429443359375, "epoch": 0.18412698412698414, "grad_norm": 0.0017297285376116633, "kl": 0.004895126447081566, "learning_rate": 3.287981859410431e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2900 }, { "completion_length": 121.00000762939453, "epoch": 0.18419047619047618, "grad_norm": 0.003183036809787154, "kl": 0.009385315701365471, "learning_rate": 3.2891156462585036e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2901 }, { "completion_length": 169.85714721679688, "epoch": 0.18425396825396825, "grad_norm": 0.0027395065408200026, "kl": 0.0072532715275883675, "learning_rate": 3.2902494331065754e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2902 }, { "completion_length": 180.2857208251953, "epoch": 0.18431746031746032, "grad_norm": 0.0025079799816012383, "kl": 0.0066811637952923775, "learning_rate": 3.291383219954648e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2903 }, { "completion_length": 156.35714721679688, "epoch": 0.18438095238095237, "grad_norm": 0.0022317091934382915, "kl": 0.00661160983145237, "learning_rate": 3.292517006802721e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2904 }, { "completion_length": 167.7857208251953, "epoch": 0.18444444444444444, "grad_norm": 0.0023443931713700294, "kl": 0.006690348032861948, "learning_rate": 3.2936507936507933e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2905 }, { "completion_length": 162.5, "epoch": 0.1845079365079365, "grad_norm": 0.0035749853122979403, "kl": 0.009678129106760025, "learning_rate": 3.294784580498866e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2906 }, { "completion_length": 141.6428680419922, "epoch": 0.18457142857142858, "grad_norm": 0.002141220262274146, "kl": 0.006326244678348303, "learning_rate": 3.295918367346939e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2907 }, { "completion_length": 158.5, "epoch": 0.18463492063492062, "grad_norm": 0.003102588700130582, "kl": 0.008145106956362724, "learning_rate": 3.297052154195011e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2908 }, { "completion_length": 147.35714721679688, "epoch": 0.1846984126984127, "grad_norm": 0.0026289725210517645, "kl": 0.006559650879353285, "learning_rate": 3.298185941043084e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2909 }, { "completion_length": 160.07144165039062, "epoch": 0.18476190476190477, "grad_norm": 0.0026646978221833706, "kl": 0.007814135402441025, "learning_rate": 3.299319727891156e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2910 }, { "completion_length": 195.57144165039062, "epoch": 0.18482539682539684, "grad_norm": 0.0023478304501622915, "kl": 0.006394528318196535, "learning_rate": 3.3004535147392287e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2911 }, { "completion_length": 155.92857360839844, "epoch": 0.18488888888888888, "grad_norm": 0.002617008052766323, "kl": 0.006868643686175346, "learning_rate": 3.3015873015873015e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2912 }, { "completion_length": 197.1428680419922, "epoch": 0.18495238095238095, "grad_norm": 0.0024610799737274647, "kl": 0.0070579564198851585, "learning_rate": 3.302721088435374e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2913 }, { "completion_length": 163.0, "epoch": 0.18501587301587302, "grad_norm": 0.0024533672258257866, "kl": 0.006045156624168158, "learning_rate": 3.3038548752834466e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2914 }, { "completion_length": 154.0, "epoch": 0.18507936507936507, "grad_norm": 0.002311705844476819, "kl": 0.006295262835919857, "learning_rate": 3.3049886621315194e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2915 }, { "completion_length": 156.35714721679688, "epoch": 0.18514285714285714, "grad_norm": 0.0027870212215930223, "kl": 0.006675143726170063, "learning_rate": 3.3061224489795917e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2916 }, { "completion_length": 167.7857208251953, "epoch": 0.1852063492063492, "grad_norm": 0.0024243562947958708, "kl": 0.006827410776168108, "learning_rate": 3.307256235827664e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2917 }, { "completion_length": 148.2857208251953, "epoch": 0.18526984126984128, "grad_norm": 0.0027017395477741957, "kl": 0.007352177053689957, "learning_rate": 3.308390022675737e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2918 }, { "completion_length": 197.21429443359375, "epoch": 0.18533333333333332, "grad_norm": 0.0024729506112635136, "kl": 0.0071828775107860565, "learning_rate": 3.309523809523809e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2919 }, { "completion_length": 183.7857208251953, "epoch": 0.1853968253968254, "grad_norm": 0.0019553648307919502, "kl": 0.005327729973942041, "learning_rate": 3.310657596371882e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2920 }, { "completion_length": 161.0, "epoch": 0.18546031746031746, "grad_norm": 0.0034158292692154646, "kl": 0.008335168473422527, "learning_rate": 3.3117913832199547e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2921 }, { "completion_length": 177.42857360839844, "epoch": 0.18552380952380954, "grad_norm": 0.0022096855100244284, "kl": 0.006561604328453541, "learning_rate": 3.312925170068027e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2922 }, { "completion_length": 168.92857360839844, "epoch": 0.18558730158730158, "grad_norm": 0.002555588725954294, "kl": 0.0067041190341115, "learning_rate": 3.3140589569161e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2923 }, { "completion_length": 180.21429443359375, "epoch": 0.18565079365079365, "grad_norm": 0.002035310259088874, "kl": 0.005866129882633686, "learning_rate": 3.3151927437641727e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2924 }, { "completion_length": 156.57144165039062, "epoch": 0.18571428571428572, "grad_norm": 0.0023578135296702385, "kl": 0.00688969437032938, "learning_rate": 3.3163265306122444e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2925 }, { "completion_length": 174.92857360839844, "epoch": 0.18577777777777776, "grad_norm": 0.00307919317856431, "kl": 0.007738430052995682, "learning_rate": 3.317460317460317e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2926 }, { "completion_length": 155.71429443359375, "epoch": 0.18584126984126983, "grad_norm": 0.0032851537689566612, "kl": 0.010103915818035603, "learning_rate": 3.31859410430839e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2927 }, { "completion_length": 157.2857208251953, "epoch": 0.1859047619047619, "grad_norm": 0.0021099550649523735, "kl": 0.006089439615607262, "learning_rate": 3.3197278911564624e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2928 }, { "completion_length": 183.71429443359375, "epoch": 0.18596825396825398, "grad_norm": 0.001962095033377409, "kl": 0.005912214983254671, "learning_rate": 3.320861678004535e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2929 }, { "completion_length": 194.42857360839844, "epoch": 0.18603174603174602, "grad_norm": 0.0018203422660008073, "kl": 0.004919139668345451, "learning_rate": 3.321995464852608e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2930 }, { "completion_length": 180.35714721679688, "epoch": 0.1860952380952381, "grad_norm": 0.002450644737109542, "kl": 0.0066740019246935844, "learning_rate": 3.3231292517006803e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2931 }, { "completion_length": 193.92857360839844, "epoch": 0.18615873015873016, "grad_norm": 0.0015988406958058476, "kl": 0.0051811570301651955, "learning_rate": 3.3242630385487526e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2932 }, { "completion_length": 138.07144165039062, "epoch": 0.18622222222222223, "grad_norm": 0.0030240658670663834, "kl": 0.007700883783400059, "learning_rate": 3.325396825396825e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2933 }, { "completion_length": 165.35714721679688, "epoch": 0.18628571428571428, "grad_norm": 0.0019906682427972555, "kl": 0.006610692944377661, "learning_rate": 3.3265306122448977e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2934 }, { "completion_length": 145.57144165039062, "epoch": 0.18634920634920635, "grad_norm": 0.002037447178736329, "kl": 0.005704623181372881, "learning_rate": 3.3276643990929705e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2935 }, { "completion_length": 202.07144165039062, "epoch": 0.18641269841269842, "grad_norm": 0.0016855379799380898, "kl": 0.005187439266592264, "learning_rate": 3.328798185941043e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2936 }, { "completion_length": 195.00001525878906, "epoch": 0.1864761904761905, "grad_norm": 0.0016857702285051346, "kl": 0.006262539885938168, "learning_rate": 3.3299319727891156e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2937 }, { "completion_length": 153.85714721679688, "epoch": 0.18653968253968253, "grad_norm": 0.0022622128017246723, "kl": 0.00655315862968564, "learning_rate": 3.3310657596371884e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2938 }, { "completion_length": 196.50001525878906, "epoch": 0.1866031746031746, "grad_norm": 0.0018213661387562752, "kl": 0.0060937185771763325, "learning_rate": 3.33219954648526e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2939 }, { "completion_length": 153.0, "epoch": 0.18666666666666668, "grad_norm": 1.2020930051803589, "kl": 0.007411210797727108, "learning_rate": 3.333333333333333e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 2940 }, { "completion_length": 173.2857208251953, "epoch": 0.18673015873015872, "grad_norm": 0.002681253245100379, "kl": 0.009951089508831501, "learning_rate": 3.334467120181406e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2941 }, { "completion_length": 131.57144165039062, "epoch": 0.1867936507936508, "grad_norm": 0.00299834948964417, "kl": 0.008678942918777466, "learning_rate": 3.335600907029478e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2942 }, { "completion_length": 179.7857208251953, "epoch": 0.18685714285714286, "grad_norm": 0.9539927840232849, "kl": 0.008123712614178658, "learning_rate": 3.336734693877551e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 2943 }, { "completion_length": 146.1428680419922, "epoch": 0.18692063492063493, "grad_norm": 0.0030420650728046894, "kl": 0.007438214495778084, "learning_rate": 3.337868480725624e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2944 }, { "completion_length": 167.7857208251953, "epoch": 0.18698412698412697, "grad_norm": 0.0038123021367937326, "kl": 0.008613074198365211, "learning_rate": 3.339002267573696e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2945 }, { "completion_length": 153.85714721679688, "epoch": 0.18704761904761905, "grad_norm": 0.0024130232632160187, "kl": 0.007200679741799831, "learning_rate": 3.340136054421769e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2946 }, { "completion_length": 165.5, "epoch": 0.18711111111111112, "grad_norm": 0.003180981148034334, "kl": 0.007648817263543606, "learning_rate": 3.341269841269841e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2947 }, { "completion_length": 154.85714721679688, "epoch": 0.1871746031746032, "grad_norm": 1.953026294708252, "kl": 0.008401786908507347, "learning_rate": 3.3424036281179134e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 2948 }, { "completion_length": 162.85714721679688, "epoch": 0.18723809523809523, "grad_norm": 0.0020108106546103954, "kl": 0.005936493165791035, "learning_rate": 3.343537414965986e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2949 }, { "completion_length": 195.2857208251953, "epoch": 0.1873015873015873, "grad_norm": 0.002886682515963912, "kl": 0.007130871526896954, "learning_rate": 3.344671201814059e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2950 }, { "completion_length": 157.85714721679688, "epoch": 0.18736507936507937, "grad_norm": 0.0027508754283189774, "kl": 0.007496778853237629, "learning_rate": 3.3458049886621314e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2951 }, { "completion_length": 141.1428680419922, "epoch": 0.18742857142857142, "grad_norm": 0.004040705505758524, "kl": 0.009936448186635971, "learning_rate": 3.346938775510204e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2952 }, { "completion_length": 158.71429443359375, "epoch": 0.1874920634920635, "grad_norm": 0.003183746011927724, "kl": 0.010179529897868633, "learning_rate": 3.3480725623582765e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2953 }, { "completion_length": 149.92857360839844, "epoch": 0.18755555555555556, "grad_norm": 0.002966885222122073, "kl": 0.009575361385941505, "learning_rate": 3.349206349206349e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2954 }, { "completion_length": 180.57144165039062, "epoch": 0.18761904761904763, "grad_norm": 0.002738457638770342, "kl": 0.007534876465797424, "learning_rate": 3.3503401360544216e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2955 }, { "completion_length": 163.5, "epoch": 0.18768253968253967, "grad_norm": 0.003037068760022521, "kl": 0.009506236761808395, "learning_rate": 3.351473922902494e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2956 }, { "completion_length": 174.85714721679688, "epoch": 0.18774603174603174, "grad_norm": 0.004710698500275612, "kl": 0.013806035742163658, "learning_rate": 3.3526077097505667e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2957 }, { "completion_length": 167.92857360839844, "epoch": 0.18780952380952382, "grad_norm": 0.003324650228023529, "kl": 0.011714914813637733, "learning_rate": 3.3537414965986395e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2958 }, { "completion_length": 175.57144165039062, "epoch": 0.1878730158730159, "grad_norm": 0.003643354866653681, "kl": 0.012491255067288876, "learning_rate": 3.354875283446712e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2959 }, { "completion_length": 197.50001525878906, "epoch": 0.18793650793650793, "grad_norm": 0.0029407681431621313, "kl": 0.009193716570734978, "learning_rate": 3.3560090702947846e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2960 }, { "completion_length": 187.71429443359375, "epoch": 0.188, "grad_norm": 0.004506090190261602, "kl": 0.014340790919959545, "learning_rate": 3.357142857142857e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2961 }, { "completion_length": 170.1428680419922, "epoch": 0.18806349206349207, "grad_norm": 0.0029155940283089876, "kl": 0.009276632219552994, "learning_rate": 3.358276643990929e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2962 }, { "completion_length": 131.85714721679688, "epoch": 0.18812698412698411, "grad_norm": 0.004995744209736586, "kl": 0.015585874207317829, "learning_rate": 3.359410430839002e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2963 }, { "completion_length": 181.00001525878906, "epoch": 0.18819047619047619, "grad_norm": 0.005190521478652954, "kl": 0.014349240809679031, "learning_rate": 3.360544217687075e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2964 }, { "completion_length": 180.42857360839844, "epoch": 0.18825396825396826, "grad_norm": 0.004174519330263138, "kl": 0.013769522309303284, "learning_rate": 3.361678004535147e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2965 }, { "completion_length": 145.1428680419922, "epoch": 0.18831746031746033, "grad_norm": 0.004353443160653114, "kl": 0.015521610155701637, "learning_rate": 3.36281179138322e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2966 }, { "completion_length": 132.42857360839844, "epoch": 0.18838095238095237, "grad_norm": 0.00571346515789628, "kl": 0.018299058079719543, "learning_rate": 3.363945578231293e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2967 }, { "completion_length": 166.07144165039062, "epoch": 0.18844444444444444, "grad_norm": 0.004115398973226547, "kl": 0.014147231355309486, "learning_rate": 3.365079365079365e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2968 }, { "completion_length": 157.92857360839844, "epoch": 0.1885079365079365, "grad_norm": 1.4329935312271118, "kl": 0.014889366924762726, "learning_rate": 3.3662131519274374e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 2969 }, { "completion_length": 137.71429443359375, "epoch": 0.18857142857142858, "grad_norm": 0.005351041443645954, "kl": 0.016236767172813416, "learning_rate": 3.3673469387755096e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2970 }, { "completion_length": 163.6428680419922, "epoch": 0.18863492063492063, "grad_norm": 0.005013058427721262, "kl": 0.01436099223792553, "learning_rate": 3.3684807256235825e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2971 }, { "completion_length": 155.0, "epoch": 0.1886984126984127, "grad_norm": 0.005801328457891941, "kl": 0.01836850866675377, "learning_rate": 3.3696145124716553e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2972 }, { "completion_length": 143.5, "epoch": 0.18876190476190477, "grad_norm": 0.0041604419238865376, "kl": 0.014269108884036541, "learning_rate": 3.3707482993197276e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2973 }, { "completion_length": 158.2857208251953, "epoch": 0.1888253968253968, "grad_norm": 0.0034232130274176598, "kl": 0.012548447586596012, "learning_rate": 3.3718820861678004e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2974 }, { "completion_length": 161.07144165039062, "epoch": 0.18888888888888888, "grad_norm": 0.0042027500458061695, "kl": 0.014749055728316307, "learning_rate": 3.373015873015873e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2975 }, { "completion_length": 146.57144165039062, "epoch": 0.18895238095238095, "grad_norm": 0.0053718783892691135, "kl": 0.018917173147201538, "learning_rate": 3.374149659863945e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2976 }, { "completion_length": 162.07144165039062, "epoch": 0.18901587301587303, "grad_norm": 0.00478466646745801, "kl": 0.016195019707083702, "learning_rate": 3.375283446712018e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2977 }, { "completion_length": 169.2857208251953, "epoch": 0.18907936507936507, "grad_norm": 0.0055074542760849, "kl": 0.01913125440478325, "learning_rate": 3.3764172335600906e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2978 }, { "completion_length": 170.7857208251953, "epoch": 0.18914285714285714, "grad_norm": 0.00405482854694128, "kl": 0.012971658259630203, "learning_rate": 3.377551020408163e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2979 }, { "completion_length": 182.71429443359375, "epoch": 0.1892063492063492, "grad_norm": 0.0032241058070212603, "kl": 0.012209589593112469, "learning_rate": 3.3786848072562357e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2980 }, { "completion_length": 173.42857360839844, "epoch": 0.18926984126984128, "grad_norm": 0.004510378465056419, "kl": 0.014422734268009663, "learning_rate": 3.3798185941043085e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2981 }, { "completion_length": 161.71429443359375, "epoch": 0.18933333333333333, "grad_norm": 0.005109981168061495, "kl": 0.018359437584877014, "learning_rate": 3.380952380952381e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2982 }, { "completion_length": 137.6428680419922, "epoch": 0.1893968253968254, "grad_norm": 0.006485156249254942, "kl": 0.01991523616015911, "learning_rate": 3.3820861678004536e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2983 }, { "completion_length": 164.7857208251953, "epoch": 0.18946031746031747, "grad_norm": 0.004160245880484581, "kl": 0.012976134195923805, "learning_rate": 3.383219954648526e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2984 }, { "completion_length": 143.0, "epoch": 0.1895238095238095, "grad_norm": 0.0039420281536877155, "kl": 0.015025686472654343, "learning_rate": 3.384353741496598e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2985 }, { "completion_length": 182.00001525878906, "epoch": 0.18958730158730158, "grad_norm": 0.0039648860692977905, "kl": 0.01456930860877037, "learning_rate": 3.385487528344671e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2986 }, { "completion_length": 196.92857360839844, "epoch": 0.18965079365079365, "grad_norm": 0.0037548921536654234, "kl": 0.013680761680006981, "learning_rate": 3.386621315192744e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2987 }, { "completion_length": 150.5, "epoch": 0.18971428571428572, "grad_norm": 0.004464098252356052, "kl": 0.01662420481443405, "learning_rate": 3.387755102040816e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2988 }, { "completion_length": 142.71429443359375, "epoch": 0.18977777777777777, "grad_norm": 0.0038488851860165596, "kl": 0.013573097065091133, "learning_rate": 3.388888888888889e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2989 }, { "completion_length": 180.57144165039062, "epoch": 0.18984126984126984, "grad_norm": 0.003548749489709735, "kl": 0.012884816154837608, "learning_rate": 3.390022675736962e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2990 }, { "completion_length": 153.71429443359375, "epoch": 0.1899047619047619, "grad_norm": 0.0061066304333508015, "kl": 0.021005835384130478, "learning_rate": 3.3911564625850336e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2991 }, { "completion_length": 169.85714721679688, "epoch": 0.18996825396825398, "grad_norm": 0.003475178498774767, "kl": 0.01158415712416172, "learning_rate": 3.3922902494331064e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2992 }, { "completion_length": 163.35714721679688, "epoch": 0.19003174603174602, "grad_norm": 0.00395731907337904, "kl": 0.013604188337922096, "learning_rate": 3.3934240362811787e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2993 }, { "completion_length": 158.85714721679688, "epoch": 0.1900952380952381, "grad_norm": 0.005109698045998812, "kl": 0.017467129975557327, "learning_rate": 3.3945578231292515e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2994 }, { "completion_length": 184.35714721679688, "epoch": 0.19015873015873017, "grad_norm": 0.004202594980597496, "kl": 0.013914505019783974, "learning_rate": 3.3956916099773243e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2995 }, { "completion_length": 162.1428680419922, "epoch": 0.1902222222222222, "grad_norm": 0.004599146544933319, "kl": 0.01525199692696333, "learning_rate": 3.3968253968253966e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2996 }, { "completion_length": 169.6428680419922, "epoch": 0.19028571428571428, "grad_norm": 0.00516487518325448, "kl": 0.014323585666716099, "learning_rate": 3.3979591836734694e-07, "loss": 0.0, "reward": 0.1428571492433548, "reward_std": 0.0, "rewards/check_originality_func": 0.1428571492433548, "step": 2997 }, { "completion_length": 192.57144165039062, "epoch": 0.19034920634920635, "grad_norm": 0.004111945163458586, "kl": 0.013431793078780174, "learning_rate": 3.3990929705215417e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2998 }, { "completion_length": 132.92857360839844, "epoch": 0.19041269841269842, "grad_norm": 0.004673215094953775, "kl": 0.01524352002888918, "learning_rate": 3.400226757369614e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 2999 }, { "completion_length": 156.42857360839844, "epoch": 0.19047619047619047, "grad_norm": 0.0047284578904509544, "kl": 0.017020611092448235, "learning_rate": 3.401360544217687e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3000 }, { "completion_length": 185.2857208251953, "epoch": 0.19053968253968254, "grad_norm": 0.004134323447942734, "kl": 0.012903931550681591, "learning_rate": 3.4024943310657596e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3001 }, { "completion_length": 141.6428680419922, "epoch": 0.1906031746031746, "grad_norm": 0.006483005825430155, "kl": 0.018907738849520683, "learning_rate": 3.403628117913832e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3002 }, { "completion_length": 175.7857208251953, "epoch": 0.19066666666666668, "grad_norm": 0.0034735973458737135, "kl": 0.0128276152536273, "learning_rate": 3.4047619047619047e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3003 }, { "completion_length": 142.5, "epoch": 0.19073015873015872, "grad_norm": 0.005647104699164629, "kl": 0.018308449536561966, "learning_rate": 3.4058956916099776e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3004 }, { "completion_length": 192.2857208251953, "epoch": 0.1907936507936508, "grad_norm": 0.0069643910974264145, "kl": 0.01646111160516739, "learning_rate": 3.40702947845805e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3005 }, { "completion_length": 190.07144165039062, "epoch": 0.19085714285714286, "grad_norm": 0.0036628914531320333, "kl": 0.01291059423238039, "learning_rate": 3.408163265306122e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3006 }, { "completion_length": 159.21429443359375, "epoch": 0.1909206349206349, "grad_norm": 0.004569764249026775, "kl": 0.015169735997915268, "learning_rate": 3.409297052154195e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3007 }, { "completion_length": 141.5, "epoch": 0.19098412698412698, "grad_norm": 1.4316935539245605, "kl": 0.01647157408297062, "learning_rate": 3.410430839002267e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 3008 }, { "completion_length": 168.42857360839844, "epoch": 0.19104761904761905, "grad_norm": 0.003907142672687769, "kl": 0.012835103087127209, "learning_rate": 3.41156462585034e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3009 }, { "completion_length": 170.92857360839844, "epoch": 0.19111111111111112, "grad_norm": 0.0037715390790253878, "kl": 0.013150166720151901, "learning_rate": 3.412698412698413e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3010 }, { "completion_length": 137.0, "epoch": 0.19117460317460316, "grad_norm": 0.004619303625077009, "kl": 0.01577506773173809, "learning_rate": 3.413832199546485e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3011 }, { "completion_length": 166.35714721679688, "epoch": 0.19123809523809523, "grad_norm": 0.005746142938733101, "kl": 0.017068490386009216, "learning_rate": 3.414965986394558e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3012 }, { "completion_length": 147.6428680419922, "epoch": 0.1913015873015873, "grad_norm": 0.0041202581487596035, "kl": 0.01422478724271059, "learning_rate": 3.41609977324263e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3013 }, { "completion_length": 169.92857360839844, "epoch": 0.19136507936507938, "grad_norm": 0.004652522969990969, "kl": 0.016585662961006165, "learning_rate": 3.4172335600907026e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3014 }, { "completion_length": 154.6428680419922, "epoch": 0.19142857142857142, "grad_norm": 0.005854981020092964, "kl": 0.017685431987047195, "learning_rate": 3.4183673469387754e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3015 }, { "completion_length": 131.7857208251953, "epoch": 0.1914920634920635, "grad_norm": 1.0288646221160889, "kl": 0.01780233159661293, "learning_rate": 3.4195011337868477e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 3016 }, { "completion_length": 186.85714721679688, "epoch": 0.19155555555555556, "grad_norm": 0.003690488403663039, "kl": 0.01146854180842638, "learning_rate": 3.4206349206349205e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3017 }, { "completion_length": 103.92857360839844, "epoch": 0.1916190476190476, "grad_norm": 0.005990264471620321, "kl": 0.021495606750249863, "learning_rate": 3.4217687074829933e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3018 }, { "completion_length": 162.0, "epoch": 0.19168253968253968, "grad_norm": 0.003990787547081709, "kl": 0.014942087233066559, "learning_rate": 3.4229024943310656e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3019 }, { "completion_length": 144.2857208251953, "epoch": 0.19174603174603175, "grad_norm": 0.007331385277211666, "kl": 0.02220514975488186, "learning_rate": 3.424036281179138e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3020 }, { "completion_length": 152.35714721679688, "epoch": 0.19180952380952382, "grad_norm": 0.005270492285490036, "kl": 0.018148893490433693, "learning_rate": 3.4251700680272107e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3021 }, { "completion_length": 127.00000762939453, "epoch": 0.19187301587301586, "grad_norm": 0.005652410909533501, "kl": 0.02039637602865696, "learning_rate": 3.426303854875283e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3022 }, { "completion_length": 158.92857360839844, "epoch": 0.19193650793650793, "grad_norm": 0.006272639147937298, "kl": 0.020632749423384666, "learning_rate": 3.427437641723356e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3023 }, { "completion_length": 149.1428680419922, "epoch": 0.192, "grad_norm": 0.0048471055924892426, "kl": 0.018667474389076233, "learning_rate": 3.4285714285714286e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3024 }, { "completion_length": 158.0, "epoch": 0.19206349206349208, "grad_norm": 0.006015630438923836, "kl": 0.018474021926522255, "learning_rate": 3.429705215419501e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3025 }, { "completion_length": 168.71429443359375, "epoch": 0.19212698412698412, "grad_norm": 0.005526956170797348, "kl": 0.020374873653054237, "learning_rate": 3.430839002267574e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3026 }, { "completion_length": 142.7857208251953, "epoch": 0.1921904761904762, "grad_norm": 0.006392505019903183, "kl": 0.019801059737801552, "learning_rate": 3.4319727891156466e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3027 }, { "completion_length": 166.5, "epoch": 0.19225396825396826, "grad_norm": 0.005503755994141102, "kl": 0.019609970971941948, "learning_rate": 3.4331065759637183e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3028 }, { "completion_length": 163.7857208251953, "epoch": 0.1923174603174603, "grad_norm": 0.005686107557266951, "kl": 0.019027939066290855, "learning_rate": 3.434240362811791e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3029 }, { "completion_length": 186.42857360839844, "epoch": 0.19238095238095237, "grad_norm": 0.004056937526911497, "kl": 0.014660107903182507, "learning_rate": 3.435374149659864e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3030 }, { "completion_length": 153.1428680419922, "epoch": 0.19244444444444445, "grad_norm": 0.0047796182334423065, "kl": 0.01653290167450905, "learning_rate": 3.436507936507936e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3031 }, { "completion_length": 159.5, "epoch": 0.19250793650793652, "grad_norm": 0.0059866877272725105, "kl": 0.024296006187796593, "learning_rate": 3.437641723356009e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3032 }, { "completion_length": 171.42857360839844, "epoch": 0.19257142857142856, "grad_norm": 0.005165725946426392, "kl": 0.021792765706777573, "learning_rate": 3.4387755102040814e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3033 }, { "completion_length": 169.5, "epoch": 0.19263492063492063, "grad_norm": 0.004509997554123402, "kl": 0.018874628469347954, "learning_rate": 3.439909297052154e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3034 }, { "completion_length": 169.2857208251953, "epoch": 0.1926984126984127, "grad_norm": 0.006414404138922691, "kl": 0.024777041748166084, "learning_rate": 3.4410430839002265e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3035 }, { "completion_length": 131.92857360839844, "epoch": 0.19276190476190477, "grad_norm": 0.0093039246276021, "kl": 0.028475144878029823, "learning_rate": 3.442176870748299e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3036 }, { "completion_length": 143.0, "epoch": 0.19282539682539682, "grad_norm": 0.007220402825623751, "kl": 0.024326810613274574, "learning_rate": 3.4433106575963716e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3037 }, { "completion_length": 148.21429443359375, "epoch": 0.1928888888888889, "grad_norm": 0.0099031962454319, "kl": 0.029257461428642273, "learning_rate": 3.4444444444444444e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3038 }, { "completion_length": 162.6428680419922, "epoch": 0.19295238095238096, "grad_norm": 0.00620970968157053, "kl": 0.021672608330845833, "learning_rate": 3.4455782312925167e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3039 }, { "completion_length": 169.0, "epoch": 0.19301587301587303, "grad_norm": 0.004756515845656395, "kl": 0.01711677759885788, "learning_rate": 3.4467120181405895e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3040 }, { "completion_length": 156.7857208251953, "epoch": 0.19307936507936507, "grad_norm": 0.006828123703598976, "kl": 0.023114195093512535, "learning_rate": 3.4478458049886623e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3041 }, { "completion_length": 190.92857360839844, "epoch": 0.19314285714285714, "grad_norm": 0.005111666861921549, "kl": 0.018001489341259003, "learning_rate": 3.4489795918367346e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3042 }, { "completion_length": 194.6428680419922, "epoch": 0.19320634920634921, "grad_norm": 0.004474600777029991, "kl": 0.017016863450407982, "learning_rate": 3.450113378684807e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3043 }, { "completion_length": 165.92857360839844, "epoch": 0.19326984126984126, "grad_norm": 0.0055555920116603374, "kl": 0.019207820296287537, "learning_rate": 3.4512471655328797e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3044 }, { "completion_length": 169.92857360839844, "epoch": 0.19333333333333333, "grad_norm": 0.0044538904912769794, "kl": 0.015423172153532505, "learning_rate": 3.452380952380952e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3045 }, { "completion_length": 229.85714721679688, "epoch": 0.1933968253968254, "grad_norm": 0.00406607985496521, "kl": 0.014723976142704487, "learning_rate": 3.453514739229025e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3046 }, { "completion_length": 172.07144165039062, "epoch": 0.19346031746031747, "grad_norm": 0.004610291216522455, "kl": 0.01677118055522442, "learning_rate": 3.4546485260770977e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3047 }, { "completion_length": 177.57144165039062, "epoch": 0.19352380952380951, "grad_norm": 1.06732177734375, "kl": 0.015434663742780685, "learning_rate": 3.45578231292517e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 3048 }, { "completion_length": 197.92857360839844, "epoch": 0.19358730158730159, "grad_norm": 0.0038892454467713833, "kl": 0.01268655527383089, "learning_rate": 3.456916099773243e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3049 }, { "completion_length": 156.21429443359375, "epoch": 0.19365079365079366, "grad_norm": 0.006236729212105274, "kl": 0.02247738093137741, "learning_rate": 3.458049886621315e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3050 }, { "completion_length": 181.6428680419922, "epoch": 0.19371428571428573, "grad_norm": 0.0059595569036901, "kl": 0.021304059773683548, "learning_rate": 3.4591836734693874e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3051 }, { "completion_length": 156.57144165039062, "epoch": 0.19377777777777777, "grad_norm": 0.00521105295047164, "kl": 0.019316328689455986, "learning_rate": 3.46031746031746e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3052 }, { "completion_length": 167.07144165039062, "epoch": 0.19384126984126984, "grad_norm": 0.004385302774608135, "kl": 0.015749622136354446, "learning_rate": 3.4614512471655325e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3053 }, { "completion_length": 130.35714721679688, "epoch": 0.1939047619047619, "grad_norm": 0.005020115990191698, "kl": 0.019342778250575066, "learning_rate": 3.4625850340136053e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3054 }, { "completion_length": 155.92857360839844, "epoch": 0.19396825396825396, "grad_norm": 0.004619958810508251, "kl": 0.013034072704613209, "learning_rate": 3.463718820861678e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3055 }, { "completion_length": 145.57144165039062, "epoch": 0.19403174603174603, "grad_norm": 0.006098707672208548, "kl": 0.024342317134141922, "learning_rate": 3.4648526077097504e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3056 }, { "completion_length": 143.5, "epoch": 0.1940952380952381, "grad_norm": 0.006279780063778162, "kl": 0.020782986655831337, "learning_rate": 3.4659863945578227e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3057 }, { "completion_length": 171.7857208251953, "epoch": 0.19415873015873017, "grad_norm": 0.005763562396168709, "kl": 0.017435971647500992, "learning_rate": 3.4671201814058955e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3058 }, { "completion_length": 119.64286041259766, "epoch": 0.1942222222222222, "grad_norm": 0.005842437036335468, "kl": 0.020687129348516464, "learning_rate": 3.468253968253968e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3059 }, { "completion_length": 130.07144165039062, "epoch": 0.19428571428571428, "grad_norm": 0.005014285445213318, "kl": 0.018550928682088852, "learning_rate": 3.4693877551020406e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3060 }, { "completion_length": 195.85714721679688, "epoch": 0.19434920634920635, "grad_norm": 0.004097475670278072, "kl": 0.01370691042393446, "learning_rate": 3.4705215419501134e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3061 }, { "completion_length": 178.1428680419922, "epoch": 0.19441269841269843, "grad_norm": 0.006912533659487963, "kl": 0.025018462911248207, "learning_rate": 3.4716553287981857e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3062 }, { "completion_length": 140.5, "epoch": 0.19447619047619047, "grad_norm": 0.004860131070017815, "kl": 0.01873328536748886, "learning_rate": 3.4727891156462585e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3063 }, { "completion_length": 137.7857208251953, "epoch": 0.19453968253968254, "grad_norm": 0.006263709161430597, "kl": 0.022185422480106354, "learning_rate": 3.4739229024943314e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3064 }, { "completion_length": 163.7857208251953, "epoch": 0.1946031746031746, "grad_norm": 0.0047981454990804195, "kl": 0.015809262171387672, "learning_rate": 3.475056689342403e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3065 }, { "completion_length": 147.2857208251953, "epoch": 0.19466666666666665, "grad_norm": 0.006490349769592285, "kl": 0.020082784816622734, "learning_rate": 3.476190476190476e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3066 }, { "completion_length": 161.21429443359375, "epoch": 0.19473015873015873, "grad_norm": 0.005826259031891823, "kl": 0.01695800945162773, "learning_rate": 3.477324263038549e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3067 }, { "completion_length": 161.7857208251953, "epoch": 0.1947936507936508, "grad_norm": 0.006111344788223505, "kl": 0.020620381459593773, "learning_rate": 3.478458049886621e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3068 }, { "completion_length": 140.42857360839844, "epoch": 0.19485714285714287, "grad_norm": 0.0038297679275274277, "kl": 0.015045770443975925, "learning_rate": 3.479591836734694e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3069 }, { "completion_length": 125.35714721679688, "epoch": 0.1949206349206349, "grad_norm": 0.0061795967631042, "kl": 0.019991207867860794, "learning_rate": 3.4807256235827667e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3070 }, { "completion_length": 177.6428680419922, "epoch": 0.19498412698412698, "grad_norm": 0.004299910273402929, "kl": 0.01615183986723423, "learning_rate": 3.481859410430839e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3071 }, { "completion_length": 157.21429443359375, "epoch": 0.19504761904761905, "grad_norm": 0.00729069858789444, "kl": 0.018400589004158974, "learning_rate": 3.482993197278911e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3072 }, { "completion_length": 178.42857360839844, "epoch": 0.19511111111111112, "grad_norm": 0.004896100610494614, "kl": 0.015613360330462456, "learning_rate": 3.4841269841269835e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3073 }, { "completion_length": 151.6428680419922, "epoch": 0.19517460317460317, "grad_norm": 0.00581789156422019, "kl": 0.021140700206160545, "learning_rate": 3.4852607709750564e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3074 }, { "completion_length": 129.71429443359375, "epoch": 0.19523809523809524, "grad_norm": 0.0076461061835289, "kl": 0.025541769340634346, "learning_rate": 3.486394557823129e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3075 }, { "completion_length": 165.6428680419922, "epoch": 0.1953015873015873, "grad_norm": 0.007238955236971378, "kl": 0.018397433683276176, "learning_rate": 3.4875283446712015e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3076 }, { "completion_length": 153.7857208251953, "epoch": 0.19536507936507935, "grad_norm": 0.005741315893828869, "kl": 0.016665494069457054, "learning_rate": 3.4886621315192743e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3077 }, { "completion_length": 171.85714721679688, "epoch": 0.19542857142857142, "grad_norm": 0.0046756151132285595, "kl": 0.016124073415994644, "learning_rate": 3.489795918367347e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3078 }, { "completion_length": 141.92857360839844, "epoch": 0.1954920634920635, "grad_norm": 0.004109062720090151, "kl": 0.015410835854709148, "learning_rate": 3.490929705215419e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3079 }, { "completion_length": 165.7857208251953, "epoch": 0.19555555555555557, "grad_norm": 0.004959065932780504, "kl": 0.014215679839253426, "learning_rate": 3.4920634920634917e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3080 }, { "completion_length": 196.35714721679688, "epoch": 0.1956190476190476, "grad_norm": 0.0034728983882814646, "kl": 0.013302979990839958, "learning_rate": 3.4931972789115645e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3081 }, { "completion_length": 153.35714721679688, "epoch": 0.19568253968253968, "grad_norm": 0.004451396409422159, "kl": 0.015146102756261826, "learning_rate": 3.494331065759637e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3082 }, { "completion_length": 153.42857360839844, "epoch": 0.19574603174603175, "grad_norm": 0.005131111945956945, "kl": 0.015459026210010052, "learning_rate": 3.4954648526077096e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3083 }, { "completion_length": 184.71429443359375, "epoch": 0.19580952380952382, "grad_norm": 0.00413641007617116, "kl": 0.014126169495284557, "learning_rate": 3.4965986394557824e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3084 }, { "completion_length": 145.2857208251953, "epoch": 0.19587301587301587, "grad_norm": 0.004885003436356783, "kl": 0.016643179580569267, "learning_rate": 3.4977324263038547e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3085 }, { "completion_length": 170.57144165039062, "epoch": 0.19593650793650794, "grad_norm": 0.0034350724890828133, "kl": 0.012283788993954659, "learning_rate": 3.4988662131519276e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3086 }, { "completion_length": 170.92857360839844, "epoch": 0.196, "grad_norm": 0.0038776418659836054, "kl": 0.01263626292347908, "learning_rate": 3.5e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3087 }, { "completion_length": 179.42857360839844, "epoch": 0.19606349206349205, "grad_norm": 0.003057247493416071, "kl": 0.010441645048558712, "learning_rate": 3.501133786848072e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3088 }, { "completion_length": 157.57144165039062, "epoch": 0.19612698412698412, "grad_norm": 0.006598386913537979, "kl": 0.0229326244443655, "learning_rate": 3.502267573696145e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3089 }, { "completion_length": 156.1428680419922, "epoch": 0.1961904761904762, "grad_norm": 0.0036734789609909058, "kl": 0.012438410893082619, "learning_rate": 3.503401360544218e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3090 }, { "completion_length": 214.85714721679688, "epoch": 0.19625396825396826, "grad_norm": 0.002566214185208082, "kl": 0.008503030985593796, "learning_rate": 3.50453514739229e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3091 }, { "completion_length": 144.21429443359375, "epoch": 0.1963174603174603, "grad_norm": 0.005565222818404436, "kl": 0.016226910054683685, "learning_rate": 3.505668934240363e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3092 }, { "completion_length": 163.35714721679688, "epoch": 0.19638095238095238, "grad_norm": 0.004768711980432272, "kl": 0.01451881043612957, "learning_rate": 3.5068027210884357e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3093 }, { "completion_length": 150.71429443359375, "epoch": 0.19644444444444445, "grad_norm": 0.00367463449947536, "kl": 0.011922347359359264, "learning_rate": 3.5079365079365075e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3094 }, { "completion_length": 181.00001525878906, "epoch": 0.19650793650793652, "grad_norm": 0.002916062017902732, "kl": 0.008389770984649658, "learning_rate": 3.5090702947845803e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3095 }, { "completion_length": 147.2857208251953, "epoch": 0.19657142857142856, "grad_norm": 0.0034252596087753773, "kl": 0.011182271875441074, "learning_rate": 3.5102040816326526e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3096 }, { "completion_length": 176.7857208251953, "epoch": 0.19663492063492063, "grad_norm": 0.0037275045178830624, "kl": 0.011534931138157845, "learning_rate": 3.5113378684807254e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3097 }, { "completion_length": 151.85714721679688, "epoch": 0.1966984126984127, "grad_norm": 0.004536970052868128, "kl": 0.014597993344068527, "learning_rate": 3.512471655328798e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3098 }, { "completion_length": 169.57144165039062, "epoch": 0.19676190476190475, "grad_norm": 0.0035875982139259577, "kl": 0.011153081431984901, "learning_rate": 3.5136054421768705e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3099 }, { "completion_length": 154.2857208251953, "epoch": 0.19682539682539682, "grad_norm": 0.00488432077690959, "kl": 0.01610330119729042, "learning_rate": 3.5147392290249433e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3100 }, { "completion_length": 138.21429443359375, "epoch": 0.1968888888888889, "grad_norm": 0.004069582559168339, "kl": 0.01385672390460968, "learning_rate": 3.515873015873016e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3101 }, { "completion_length": 191.00001525878906, "epoch": 0.19695238095238096, "grad_norm": 0.0027715612668544054, "kl": 0.009290516376495361, "learning_rate": 3.517006802721088e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3102 }, { "completion_length": 138.0, "epoch": 0.197015873015873, "grad_norm": 1.3001458644866943, "kl": 0.011106946505606174, "learning_rate": 3.5181405895691607e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 3103 }, { "completion_length": 165.0, "epoch": 0.19707936507936508, "grad_norm": 0.00369844282977283, "kl": 0.011709264479577541, "learning_rate": 3.5192743764172335e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3104 }, { "completion_length": 139.07144165039062, "epoch": 0.19714285714285715, "grad_norm": 0.005036552902311087, "kl": 0.01404175627976656, "learning_rate": 3.520408163265306e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3105 }, { "completion_length": 211.50001525878906, "epoch": 0.19720634920634922, "grad_norm": 0.002879780251532793, "kl": 0.010859290137887001, "learning_rate": 3.5215419501133786e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3106 }, { "completion_length": 160.0, "epoch": 0.19726984126984126, "grad_norm": 0.005322787910699844, "kl": 0.012839255854487419, "learning_rate": 3.5226757369614515e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3107 }, { "completion_length": 151.2857208251953, "epoch": 0.19733333333333333, "grad_norm": 0.003998117055743933, "kl": 0.010204200632870197, "learning_rate": 3.523809523809524e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3108 }, { "completion_length": 148.42857360839844, "epoch": 0.1973968253968254, "grad_norm": 0.004723784048110247, "kl": 0.01568891853094101, "learning_rate": 3.524943310657596e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3109 }, { "completion_length": 173.00001525878906, "epoch": 0.19746031746031745, "grad_norm": 0.006631737574934959, "kl": 0.016763364896178246, "learning_rate": 3.526077097505669e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3110 }, { "completion_length": 177.00001525878906, "epoch": 0.19752380952380952, "grad_norm": 0.0033835419453680515, "kl": 0.011386965401470661, "learning_rate": 3.527210884353741e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3111 }, { "completion_length": 147.5, "epoch": 0.1975873015873016, "grad_norm": 0.006000323686748743, "kl": 0.01576908305287361, "learning_rate": 3.528344671201814e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3112 }, { "completion_length": 142.6428680419922, "epoch": 0.19765079365079366, "grad_norm": 0.003806942142546177, "kl": 0.013828041031956673, "learning_rate": 3.529478458049887e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3113 }, { "completion_length": 147.1428680419922, "epoch": 0.1977142857142857, "grad_norm": 0.004969824571162462, "kl": 0.017158225178718567, "learning_rate": 3.530612244897959e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3114 }, { "completion_length": 154.92857360839844, "epoch": 0.19777777777777777, "grad_norm": 0.00413618003949523, "kl": 0.01404861081391573, "learning_rate": 3.531746031746032e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3115 }, { "completion_length": 152.0, "epoch": 0.19784126984126985, "grad_norm": 0.005866720341145992, "kl": 0.01646391674876213, "learning_rate": 3.5328798185941037e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3116 }, { "completion_length": 163.7857208251953, "epoch": 0.19790476190476192, "grad_norm": 0.004480978939682245, "kl": 0.013130863197147846, "learning_rate": 3.5340136054421765e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3117 }, { "completion_length": 148.85714721679688, "epoch": 0.19796825396825396, "grad_norm": 0.004619052167981863, "kl": 0.01428860891610384, "learning_rate": 3.5351473922902493e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3118 }, { "completion_length": 145.6428680419922, "epoch": 0.19803174603174603, "grad_norm": 0.004116210155189037, "kl": 0.013050330802798271, "learning_rate": 3.5362811791383216e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3119 }, { "completion_length": 146.85714721679688, "epoch": 0.1980952380952381, "grad_norm": 0.004475240595638752, "kl": 0.0131809888407588, "learning_rate": 3.5374149659863944e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3120 }, { "completion_length": 173.57144165039062, "epoch": 0.19815873015873015, "grad_norm": 0.003654025262221694, "kl": 0.011633077636361122, "learning_rate": 3.538548752834467e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3121 }, { "completion_length": 152.7857208251953, "epoch": 0.19822222222222222, "grad_norm": 0.005070328246802092, "kl": 0.01715790294110775, "learning_rate": 3.5396825396825395e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3122 }, { "completion_length": 158.57144165039062, "epoch": 0.1982857142857143, "grad_norm": 0.0031982730142772198, "kl": 0.011803853325545788, "learning_rate": 3.5408163265306123e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3123 }, { "completion_length": 142.5, "epoch": 0.19834920634920636, "grad_norm": 0.004586727358400822, "kl": 0.013765121810138226, "learning_rate": 3.5419501133786846e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3124 }, { "completion_length": 150.71429443359375, "epoch": 0.1984126984126984, "grad_norm": 0.004983718041330576, "kl": 0.0152615737169981, "learning_rate": 3.543083900226757e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3125 }, { "completion_length": 148.21429443359375, "epoch": 0.19847619047619047, "grad_norm": 0.005705272313207388, "kl": 0.013670302927494049, "learning_rate": 3.5442176870748297e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3126 }, { "completion_length": 156.07144165039062, "epoch": 0.19853968253968254, "grad_norm": 0.00439606886357069, "kl": 0.015033440664410591, "learning_rate": 3.5453514739229026e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3127 }, { "completion_length": 173.35714721679688, "epoch": 0.19860317460317461, "grad_norm": 0.003397394670173526, "kl": 0.011231595650315285, "learning_rate": 3.546485260770975e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3128 }, { "completion_length": 148.2857208251953, "epoch": 0.19866666666666666, "grad_norm": 0.004705641884356737, "kl": 0.013413872569799423, "learning_rate": 3.5476190476190477e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3129 }, { "completion_length": 151.35714721679688, "epoch": 0.19873015873015873, "grad_norm": 0.0034616889897733927, "kl": 0.011483893729746342, "learning_rate": 3.5487528344671205e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3130 }, { "completion_length": 155.6428680419922, "epoch": 0.1987936507936508, "grad_norm": 0.004856267478317022, "kl": 0.01634245179593563, "learning_rate": 3.549886621315192e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3131 }, { "completion_length": 170.71429443359375, "epoch": 0.19885714285714284, "grad_norm": 0.0029263359028846025, "kl": 0.009606296196579933, "learning_rate": 3.551020408163265e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3132 }, { "completion_length": 133.57144165039062, "epoch": 0.19892063492063491, "grad_norm": 0.004585157614201307, "kl": 0.012682472355663776, "learning_rate": 3.552154195011338e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3133 }, { "completion_length": 186.00001525878906, "epoch": 0.19898412698412699, "grad_norm": 0.002651289803907275, "kl": 0.008614812977612019, "learning_rate": 3.55328798185941e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3134 }, { "completion_length": 149.85714721679688, "epoch": 0.19904761904761906, "grad_norm": 0.004337796941399574, "kl": 0.010691882111132145, "learning_rate": 3.554421768707483e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3135 }, { "completion_length": 115.50000762939453, "epoch": 0.1991111111111111, "grad_norm": 1.5105341672897339, "kl": 0.013672233559191227, "learning_rate": 3.5555555555555553e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 3136 }, { "completion_length": 204.50001525878906, "epoch": 0.19917460317460317, "grad_norm": 0.004324740264564753, "kl": 0.013479526154696941, "learning_rate": 3.556689342403628e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3137 }, { "completion_length": 187.2857208251953, "epoch": 0.19923809523809524, "grad_norm": 0.003374255495145917, "kl": 0.010333683341741562, "learning_rate": 3.5578231292517004e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3138 }, { "completion_length": 132.92857360839844, "epoch": 0.1993015873015873, "grad_norm": 0.004315427970141172, "kl": 0.014983597211539745, "learning_rate": 3.5589569160997727e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3139 }, { "completion_length": 153.57144165039062, "epoch": 0.19936507936507936, "grad_norm": 0.0038385274820029736, "kl": 0.012114149518311024, "learning_rate": 3.5600907029478455e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3140 }, { "completion_length": 159.92857360839844, "epoch": 0.19942857142857143, "grad_norm": 0.0037642952520400286, "kl": 0.012086252681910992, "learning_rate": 3.5612244897959183e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3141 }, { "completion_length": 153.6428680419922, "epoch": 0.1994920634920635, "grad_norm": 0.0027881846763193607, "kl": 0.01140002254396677, "learning_rate": 3.5623582766439906e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3142 }, { "completion_length": 155.42857360839844, "epoch": 0.19955555555555557, "grad_norm": 0.0035090362653136253, "kl": 0.010066553950309753, "learning_rate": 3.5634920634920634e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3143 }, { "completion_length": 156.35714721679688, "epoch": 0.1996190476190476, "grad_norm": 0.005291131790727377, "kl": 0.014270920306444168, "learning_rate": 3.564625850340136e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3144 }, { "completion_length": 170.71429443359375, "epoch": 0.19968253968253968, "grad_norm": 0.003434312529861927, "kl": 0.009894452057778835, "learning_rate": 3.5657596371882085e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3145 }, { "completion_length": 172.07144165039062, "epoch": 0.19974603174603175, "grad_norm": 0.0037673916667699814, "kl": 0.012753802351653576, "learning_rate": 3.566893424036281e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3146 }, { "completion_length": 196.07144165039062, "epoch": 0.1998095238095238, "grad_norm": 0.0027307237032800913, "kl": 0.010613618418574333, "learning_rate": 3.5680272108843536e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3147 }, { "completion_length": 189.00001525878906, "epoch": 0.19987301587301587, "grad_norm": 0.003301663091406226, "kl": 0.01150155533105135, "learning_rate": 3.569160997732426e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3148 }, { "completion_length": 149.2857208251953, "epoch": 0.19993650793650794, "grad_norm": 0.006023419089615345, "kl": 0.02131686732172966, "learning_rate": 3.570294784580499e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3149 }, { "completion_length": 127.00000762939453, "epoch": 0.2, "grad_norm": 0.004432065412402153, "kl": 0.015537736937403679, "learning_rate": 3.5714285714285716e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3150 }, { "completion_length": 163.6428680419922, "epoch": 0.20006349206349205, "grad_norm": 0.003772615920752287, "kl": 0.012719513848423958, "learning_rate": 3.572562358276644e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3151 }, { "completion_length": 169.71429443359375, "epoch": 0.20012698412698413, "grad_norm": 0.0026797805912792683, "kl": 0.010391688905656338, "learning_rate": 3.5736961451247167e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3152 }, { "completion_length": 152.85714721679688, "epoch": 0.2001904761904762, "grad_norm": 0.004296388942748308, "kl": 0.01634126342833042, "learning_rate": 3.574829931972789e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3153 }, { "completion_length": 122.78572082519531, "epoch": 0.20025396825396827, "grad_norm": 0.003883786965161562, "kl": 0.01240174937993288, "learning_rate": 3.575963718820861e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3154 }, { "completion_length": 141.71429443359375, "epoch": 0.2003174603174603, "grad_norm": 0.004082430154085159, "kl": 0.013952575623989105, "learning_rate": 3.577097505668934e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3155 }, { "completion_length": 177.85714721679688, "epoch": 0.20038095238095238, "grad_norm": 0.004302500281482935, "kl": 0.012900053523480892, "learning_rate": 3.5782312925170064e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3156 }, { "completion_length": 128.85714721679688, "epoch": 0.20044444444444445, "grad_norm": 0.004874600097537041, "kl": 0.01607651077210903, "learning_rate": 3.579365079365079e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3157 }, { "completion_length": 159.35714721679688, "epoch": 0.2005079365079365, "grad_norm": 0.0032307205256074667, "kl": 0.011622244492173195, "learning_rate": 3.580498866213152e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3158 }, { "completion_length": 160.42857360839844, "epoch": 0.20057142857142857, "grad_norm": 0.004975056741386652, "kl": 0.013331165537238121, "learning_rate": 3.5816326530612243e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3159 }, { "completion_length": 152.42857360839844, "epoch": 0.20063492063492064, "grad_norm": 0.003990689758211374, "kl": 0.014704383909702301, "learning_rate": 3.582766439909297e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3160 }, { "completion_length": 151.85714721679688, "epoch": 0.2006984126984127, "grad_norm": 0.003488282673060894, "kl": 0.011869150213897228, "learning_rate": 3.5839002267573694e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3161 }, { "completion_length": 177.50001525878906, "epoch": 0.20076190476190475, "grad_norm": 0.003627280006185174, "kl": 0.011705192737281322, "learning_rate": 3.5850340136054417e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3162 }, { "completion_length": 218.2857208251953, "epoch": 0.20082539682539682, "grad_norm": 0.0028588026762008667, "kl": 0.009082237258553505, "learning_rate": 3.5861678004535145e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3163 }, { "completion_length": 139.5, "epoch": 0.2008888888888889, "grad_norm": 0.0044354661367833614, "kl": 0.014455278404057026, "learning_rate": 3.5873015873015873e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3164 }, { "completion_length": 156.5, "epoch": 0.20095238095238097, "grad_norm": 0.0031478451564908028, "kl": 0.010789320804178715, "learning_rate": 3.5884353741496596e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3165 }, { "completion_length": 142.85714721679688, "epoch": 0.201015873015873, "grad_norm": 0.00472031906247139, "kl": 0.014713220298290253, "learning_rate": 3.5895691609977324e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3166 }, { "completion_length": 148.0, "epoch": 0.20107936507936508, "grad_norm": 0.004081391263753176, "kl": 0.014170709997415543, "learning_rate": 3.590702947845805e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3167 }, { "completion_length": 148.1428680419922, "epoch": 0.20114285714285715, "grad_norm": 0.0039491839706897736, "kl": 0.01122868712991476, "learning_rate": 3.591836734693877e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3168 }, { "completion_length": 170.92857360839844, "epoch": 0.2012063492063492, "grad_norm": 0.004033768083900213, "kl": 0.011872725561261177, "learning_rate": 3.59297052154195e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3169 }, { "completion_length": 173.00001525878906, "epoch": 0.20126984126984127, "grad_norm": 0.0036363715771585703, "kl": 0.012020247057080269, "learning_rate": 3.5941043083900227e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3170 }, { "completion_length": 174.57144165039062, "epoch": 0.20133333333333334, "grad_norm": 0.0027755671180784702, "kl": 0.008955535478889942, "learning_rate": 3.595238095238095e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3171 }, { "completion_length": 181.1428680419922, "epoch": 0.2013968253968254, "grad_norm": 0.0034727128222584724, "kl": 0.010905275121331215, "learning_rate": 3.596371882086168e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3172 }, { "completion_length": 161.0, "epoch": 0.20146031746031745, "grad_norm": 0.003361632814630866, "kl": 0.010134254582226276, "learning_rate": 3.5975056689342406e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3173 }, { "completion_length": 179.35714721679688, "epoch": 0.20152380952380952, "grad_norm": 0.0032161534763872623, "kl": 0.011317956261336803, "learning_rate": 3.598639455782313e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3174 }, { "completion_length": 176.6428680419922, "epoch": 0.2015873015873016, "grad_norm": 0.0031442444305866957, "kl": 0.010960198007524014, "learning_rate": 3.599773242630385e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3175 }, { "completion_length": 139.35714721679688, "epoch": 0.20165079365079366, "grad_norm": 0.0035054816398769617, "kl": 0.009670878760516644, "learning_rate": 3.6009070294784575e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3176 }, { "completion_length": 192.07144165039062, "epoch": 0.2017142857142857, "grad_norm": 0.0035621211864054203, "kl": 0.008916454389691353, "learning_rate": 3.6020408163265303e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3177 }, { "completion_length": 156.57144165039062, "epoch": 0.20177777777777778, "grad_norm": 0.0037917119916528463, "kl": 0.012686389498412609, "learning_rate": 3.603174603174603e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3178 }, { "completion_length": 141.42857360839844, "epoch": 0.20184126984126985, "grad_norm": 0.004660044796764851, "kl": 0.016029221937060356, "learning_rate": 3.6043083900226754e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3179 }, { "completion_length": 199.21429443359375, "epoch": 0.2019047619047619, "grad_norm": 0.0027697139885276556, "kl": 0.00800132192671299, "learning_rate": 3.605442176870748e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3180 }, { "completion_length": 152.07144165039062, "epoch": 0.20196825396825396, "grad_norm": 0.0030096168629825115, "kl": 0.009546090848743916, "learning_rate": 3.606575963718821e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3181 }, { "completion_length": 164.42857360839844, "epoch": 0.20203174603174603, "grad_norm": 0.003185400040820241, "kl": 0.010996679775416851, "learning_rate": 3.6077097505668933e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3182 }, { "completion_length": 169.71429443359375, "epoch": 0.2020952380952381, "grad_norm": 0.0028142069932073355, "kl": 0.009331307373940945, "learning_rate": 3.6088435374149656e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3183 }, { "completion_length": 188.42857360839844, "epoch": 0.20215873015873015, "grad_norm": 0.0023730099201202393, "kl": 0.007861809805035591, "learning_rate": 3.6099773242630384e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3184 }, { "completion_length": 163.0, "epoch": 0.20222222222222222, "grad_norm": 0.004090574104338884, "kl": 0.011750478297472, "learning_rate": 3.6111111111111107e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3185 }, { "completion_length": 183.6428680419922, "epoch": 0.2022857142857143, "grad_norm": 0.0029441509395837784, "kl": 0.011429405771195889, "learning_rate": 3.6122448979591835e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3186 }, { "completion_length": 172.92857360839844, "epoch": 0.20234920634920636, "grad_norm": 0.0031181874219328165, "kl": 0.008868567645549774, "learning_rate": 3.6133786848072564e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3187 }, { "completion_length": 195.07144165039062, "epoch": 0.2024126984126984, "grad_norm": 0.0026780557818710804, "kl": 0.00862282793968916, "learning_rate": 3.6145124716553286e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3188 }, { "completion_length": 166.42857360839844, "epoch": 0.20247619047619048, "grad_norm": 0.0030726061668246984, "kl": 0.011144382879137993, "learning_rate": 3.6156462585034015e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3189 }, { "completion_length": 172.35714721679688, "epoch": 0.20253968253968255, "grad_norm": 0.0025718894321471453, "kl": 0.008666389621794224, "learning_rate": 3.616780045351474e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3190 }, { "completion_length": 121.64286041259766, "epoch": 0.2026031746031746, "grad_norm": 0.003735196776688099, "kl": 0.010882409289479256, "learning_rate": 3.617913832199546e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3191 }, { "completion_length": 159.92857360839844, "epoch": 0.20266666666666666, "grad_norm": 0.0026027557905763388, "kl": 0.008699861355125904, "learning_rate": 3.619047619047619e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3192 }, { "completion_length": 160.57144165039062, "epoch": 0.20273015873015873, "grad_norm": 0.0023653560783714056, "kl": 0.007871974259614944, "learning_rate": 3.6201814058956917e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3193 }, { "completion_length": 154.85714721679688, "epoch": 0.2027936507936508, "grad_norm": 0.0028631549794226885, "kl": 0.008614699356257915, "learning_rate": 3.621315192743764e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3194 }, { "completion_length": 138.6428680419922, "epoch": 0.20285714285714285, "grad_norm": 0.003469536080956459, "kl": 0.01170395128428936, "learning_rate": 3.622448979591837e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3195 }, { "completion_length": 163.57144165039062, "epoch": 0.20292063492063492, "grad_norm": 0.002335070166736841, "kl": 0.007757978048175573, "learning_rate": 3.6235827664399096e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3196 }, { "completion_length": 170.0, "epoch": 0.202984126984127, "grad_norm": 0.003022599732503295, "kl": 0.008400957100093365, "learning_rate": 3.6247165532879814e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3197 }, { "completion_length": 167.21429443359375, "epoch": 0.20304761904761906, "grad_norm": 0.002920052967965603, "kl": 0.00833231583237648, "learning_rate": 3.625850340136054e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3198 }, { "completion_length": 165.57144165039062, "epoch": 0.2031111111111111, "grad_norm": 0.0023435468319803476, "kl": 0.007579334545880556, "learning_rate": 3.6269841269841265e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3199 }, { "completion_length": 158.42857360839844, "epoch": 0.20317460317460317, "grad_norm": 0.002467009937390685, "kl": 0.008839085698127747, "learning_rate": 3.6281179138321993e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3200 }, { "completion_length": 138.7857208251953, "epoch": 0.20323809523809525, "grad_norm": 0.0033367311116307974, "kl": 0.010565589182078838, "learning_rate": 3.629251700680272e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3201 }, { "completion_length": 143.2857208251953, "epoch": 0.2033015873015873, "grad_norm": 0.0036253808066248894, "kl": 0.013838986866176128, "learning_rate": 3.6303854875283444e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3202 }, { "completion_length": 148.85714721679688, "epoch": 0.20336507936507936, "grad_norm": 0.002803756855428219, "kl": 0.009139105677604675, "learning_rate": 3.631519274376417e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3203 }, { "completion_length": 175.2857208251953, "epoch": 0.20342857142857143, "grad_norm": 0.0035716567654162645, "kl": 0.010860449634492397, "learning_rate": 3.63265306122449e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3204 }, { "completion_length": 186.42857360839844, "epoch": 0.2034920634920635, "grad_norm": 0.004647649824619293, "kl": 0.011196029372513294, "learning_rate": 3.633786848072562e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3205 }, { "completion_length": 163.7857208251953, "epoch": 0.20355555555555555, "grad_norm": 0.0026037918869405985, "kl": 0.008602038025856018, "learning_rate": 3.6349206349206346e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3206 }, { "completion_length": 141.5, "epoch": 0.20361904761904762, "grad_norm": 0.003589319298043847, "kl": 0.012468215078115463, "learning_rate": 3.6360544217687074e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3207 }, { "completion_length": 166.57144165039062, "epoch": 0.2036825396825397, "grad_norm": 0.0025406177155673504, "kl": 0.007485632784664631, "learning_rate": 3.6371882086167797e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3208 }, { "completion_length": 187.50001525878906, "epoch": 0.20374603174603176, "grad_norm": 0.0023395668249577284, "kl": 0.006661492865532637, "learning_rate": 3.6383219954648525e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3209 }, { "completion_length": 166.5, "epoch": 0.2038095238095238, "grad_norm": 0.002646995009854436, "kl": 0.007427687291055918, "learning_rate": 3.6394557823129254e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3210 }, { "completion_length": 207.1428680419922, "epoch": 0.20387301587301587, "grad_norm": 0.001823929138481617, "kl": 0.006112200673669577, "learning_rate": 3.6405895691609977e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3211 }, { "completion_length": 214.1428680419922, "epoch": 0.20393650793650794, "grad_norm": 0.0017563682049512863, "kl": 0.005860124249011278, "learning_rate": 3.64172335600907e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3212 }, { "completion_length": 164.57144165039062, "epoch": 0.204, "grad_norm": 0.003587712999433279, "kl": 0.011089134030044079, "learning_rate": 3.642857142857143e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3213 }, { "completion_length": 149.42857360839844, "epoch": 0.20406349206349206, "grad_norm": 0.0029267161153256893, "kl": 0.00909085851162672, "learning_rate": 3.643990929705215e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3214 }, { "completion_length": 166.0, "epoch": 0.20412698412698413, "grad_norm": 1.1011861562728882, "kl": 0.007155300583690405, "learning_rate": 3.645124716553288e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 3215 }, { "completion_length": 139.6428680419922, "epoch": 0.2041904761904762, "grad_norm": 0.0031300692353397608, "kl": 0.01086117047816515, "learning_rate": 3.64625850340136e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3216 }, { "completion_length": 183.57144165039062, "epoch": 0.20425396825396824, "grad_norm": 0.00290467101149261, "kl": 0.006821988150477409, "learning_rate": 3.647392290249433e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3217 }, { "completion_length": 201.85714721679688, "epoch": 0.20431746031746031, "grad_norm": 0.001814257469959557, "kl": 0.005820303224027157, "learning_rate": 3.648526077097506e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3218 }, { "completion_length": 148.07144165039062, "epoch": 0.20438095238095239, "grad_norm": 0.002227186458185315, "kl": 0.007463647983968258, "learning_rate": 3.649659863945578e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3219 }, { "completion_length": 189.00001525878906, "epoch": 0.20444444444444446, "grad_norm": 0.0021534005645662546, "kl": 0.0064373076893389225, "learning_rate": 3.6507936507936504e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3220 }, { "completion_length": 164.07144165039062, "epoch": 0.2045079365079365, "grad_norm": 0.001870596082881093, "kl": 0.006604293826967478, "learning_rate": 3.651927437641723e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3221 }, { "completion_length": 203.85714721679688, "epoch": 0.20457142857142857, "grad_norm": 0.0016292380169034004, "kl": 0.006443419028073549, "learning_rate": 3.6530612244897955e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3222 }, { "completion_length": 175.07144165039062, "epoch": 0.20463492063492064, "grad_norm": 0.001952976337634027, "kl": 0.0075177522376179695, "learning_rate": 3.6541950113378683e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3223 }, { "completion_length": 149.7857208251953, "epoch": 0.20469841269841269, "grad_norm": 0.0017659679288044572, "kl": 0.00644401041790843, "learning_rate": 3.655328798185941e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3224 }, { "completion_length": 152.21429443359375, "epoch": 0.20476190476190476, "grad_norm": 0.0021631207782775164, "kl": 0.008286240510642529, "learning_rate": 3.6564625850340134e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3225 }, { "completion_length": 148.35714721679688, "epoch": 0.20482539682539683, "grad_norm": 0.0021879307460039854, "kl": 0.007971839979290962, "learning_rate": 3.657596371882086e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3226 }, { "completion_length": 151.5, "epoch": 0.2048888888888889, "grad_norm": 0.0019729561172425747, "kl": 0.007541038561612368, "learning_rate": 3.6587301587301585e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3227 }, { "completion_length": 172.7857208251953, "epoch": 0.20495238095238094, "grad_norm": 0.0016552213346585631, "kl": 0.00640566973015666, "learning_rate": 3.659863945578231e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3228 }, { "completion_length": 165.0, "epoch": 0.205015873015873, "grad_norm": 0.003166168462485075, "kl": 0.009327208623290062, "learning_rate": 3.6609977324263036e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3229 }, { "completion_length": 155.42857360839844, "epoch": 0.20507936507936508, "grad_norm": 0.0016610240563750267, "kl": 0.005684081930667162, "learning_rate": 3.6621315192743765e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3230 }, { "completion_length": 170.85714721679688, "epoch": 0.20514285714285715, "grad_norm": 0.002179678063839674, "kl": 0.007871288806200027, "learning_rate": 3.663265306122449e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3231 }, { "completion_length": 149.7857208251953, "epoch": 0.2052063492063492, "grad_norm": 0.0016790317604318261, "kl": 0.0057303146459162235, "learning_rate": 3.6643990929705216e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3232 }, { "completion_length": 186.00001525878906, "epoch": 0.20526984126984127, "grad_norm": 0.0016218106029555202, "kl": 0.006733184680342674, "learning_rate": 3.6655328798185944e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3233 }, { "completion_length": 134.42857360839844, "epoch": 0.20533333333333334, "grad_norm": 0.0019408171065151691, "kl": 0.006937385071069002, "learning_rate": 3.666666666666666e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3234 }, { "completion_length": 185.7857208251953, "epoch": 0.20539682539682538, "grad_norm": 0.0016413448611274362, "kl": 0.0070041329599916935, "learning_rate": 3.667800453514739e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3235 }, { "completion_length": 177.21429443359375, "epoch": 0.20546031746031745, "grad_norm": 0.0017626286717131734, "kl": 0.0061189099214971066, "learning_rate": 3.668934240362811e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3236 }, { "completion_length": 162.0, "epoch": 0.20552380952380953, "grad_norm": 0.001805102452635765, "kl": 0.007502687629312277, "learning_rate": 3.670068027210884e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3237 }, { "completion_length": 157.2857208251953, "epoch": 0.2055873015873016, "grad_norm": 0.002044413471594453, "kl": 0.008960556238889694, "learning_rate": 3.671201814058957e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3238 }, { "completion_length": 196.35714721679688, "epoch": 0.20565079365079364, "grad_norm": 0.001412234385497868, "kl": 0.006142111960798502, "learning_rate": 3.672335600907029e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3239 }, { "completion_length": 149.85714721679688, "epoch": 0.2057142857142857, "grad_norm": 0.0020006143022328615, "kl": 0.007349037099629641, "learning_rate": 3.673469387755102e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3240 }, { "completion_length": 198.85714721679688, "epoch": 0.20577777777777778, "grad_norm": 0.0013270918279886246, "kl": 0.005162055138498545, "learning_rate": 3.674603174603175e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3241 }, { "completion_length": 187.6428680419922, "epoch": 0.20584126984126985, "grad_norm": 0.0015569392126053572, "kl": 0.006610167678445578, "learning_rate": 3.6757369614512466e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3242 }, { "completion_length": 171.6428680419922, "epoch": 0.2059047619047619, "grad_norm": 0.002205710392445326, "kl": 0.007922998629510403, "learning_rate": 3.6768707482993194e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3243 }, { "completion_length": 153.42857360839844, "epoch": 0.20596825396825397, "grad_norm": 0.002760001691058278, "kl": 0.012196470983326435, "learning_rate": 3.678004535147392e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3244 }, { "completion_length": 178.85714721679688, "epoch": 0.20603174603174604, "grad_norm": 0.001433155033737421, "kl": 0.006028195843100548, "learning_rate": 3.6791383219954645e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3245 }, { "completion_length": 190.07144165039062, "epoch": 0.2060952380952381, "grad_norm": 0.001964687602594495, "kl": 0.008054849691689014, "learning_rate": 3.6802721088435373e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3246 }, { "completion_length": 142.5, "epoch": 0.20615873015873015, "grad_norm": 0.002058073179796338, "kl": 0.008796358481049538, "learning_rate": 3.68140589569161e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3247 }, { "completion_length": 168.71429443359375, "epoch": 0.20622222222222222, "grad_norm": 0.0020020988304167986, "kl": 0.00975264236330986, "learning_rate": 3.6825396825396824e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3248 }, { "completion_length": 142.5, "epoch": 0.2062857142857143, "grad_norm": 0.0017481240211054683, "kl": 0.006629406474530697, "learning_rate": 3.6836734693877547e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3249 }, { "completion_length": 197.7857208251953, "epoch": 0.20634920634920634, "grad_norm": 0.0015043099410831928, "kl": 0.005216869059950113, "learning_rate": 3.6848072562358275e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3250 }, { "completion_length": 181.21429443359375, "epoch": 0.2064126984126984, "grad_norm": 0.0013734394451603293, "kl": 0.005599688272923231, "learning_rate": 3.6859410430839e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3251 }, { "completion_length": 169.5, "epoch": 0.20647619047619048, "grad_norm": 0.001927547506056726, "kl": 0.006511246785521507, "learning_rate": 3.6870748299319727e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3252 }, { "completion_length": 169.85714721679688, "epoch": 0.20653968253968255, "grad_norm": 0.0015841854037716985, "kl": 0.006554402876645327, "learning_rate": 3.6882086167800455e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3253 }, { "completion_length": 171.42857360839844, "epoch": 0.2066031746031746, "grad_norm": 0.004124737344682217, "kl": 0.00794650986790657, "learning_rate": 3.689342403628118e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3254 }, { "completion_length": 148.21429443359375, "epoch": 0.20666666666666667, "grad_norm": 0.002261530142277479, "kl": 0.00706123374402523, "learning_rate": 3.6904761904761906e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3255 }, { "completion_length": 158.1428680419922, "epoch": 0.20673015873015874, "grad_norm": 0.0015945673221722245, "kl": 0.006614075507968664, "learning_rate": 3.6916099773242623e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3256 }, { "completion_length": 158.85714721679688, "epoch": 0.2067936507936508, "grad_norm": 0.0016999850049614906, "kl": 0.007498881313949823, "learning_rate": 3.692743764172335e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3257 }, { "completion_length": 163.21429443359375, "epoch": 0.20685714285714285, "grad_norm": 0.0020399519708007574, "kl": 0.007051069755107164, "learning_rate": 3.693877551020408e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3258 }, { "completion_length": 162.1428680419922, "epoch": 0.20692063492063492, "grad_norm": 0.0020944201387465, "kl": 0.008812006562948227, "learning_rate": 3.6950113378684803e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3259 }, { "completion_length": 169.6428680419922, "epoch": 0.206984126984127, "grad_norm": 0.0022798744030296803, "kl": 0.006565510295331478, "learning_rate": 3.696145124716553e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3260 }, { "completion_length": 192.00001525878906, "epoch": 0.20704761904761904, "grad_norm": 0.0013213226338848472, "kl": 0.005241877399384975, "learning_rate": 3.697278911564626e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3261 }, { "completion_length": 154.71429443359375, "epoch": 0.2071111111111111, "grad_norm": 0.0026624167803674936, "kl": 0.007280820515006781, "learning_rate": 3.698412698412698e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3262 }, { "completion_length": 178.50001525878906, "epoch": 0.20717460317460318, "grad_norm": 0.0015643610386177897, "kl": 0.006420696619898081, "learning_rate": 3.699546485260771e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3263 }, { "completion_length": 170.71429443359375, "epoch": 0.20723809523809525, "grad_norm": 0.0017952198395505548, "kl": 0.006585184019058943, "learning_rate": 3.7006802721088433e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3264 }, { "completion_length": 151.71429443359375, "epoch": 0.2073015873015873, "grad_norm": 0.0027301402296870947, "kl": 0.009291020222008228, "learning_rate": 3.7018140589569156e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3265 }, { "completion_length": 161.42857360839844, "epoch": 0.20736507936507936, "grad_norm": 0.0017567642498761415, "kl": 0.00690803537145257, "learning_rate": 3.7029478458049884e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3266 }, { "completion_length": 148.2857208251953, "epoch": 0.20742857142857143, "grad_norm": 0.0017555642407387495, "kl": 0.006637575104832649, "learning_rate": 3.704081632653061e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3267 }, { "completion_length": 165.71429443359375, "epoch": 0.2074920634920635, "grad_norm": 0.0017432710155844688, "kl": 0.007434133440256119, "learning_rate": 3.7052154195011335e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3268 }, { "completion_length": 149.2857208251953, "epoch": 0.20755555555555555, "grad_norm": 0.0024791276082396507, "kl": 0.00852249190211296, "learning_rate": 3.7063492063492063e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3269 }, { "completion_length": 161.21429443359375, "epoch": 0.20761904761904762, "grad_norm": 0.0019152460154145956, "kl": 0.0073323436081409454, "learning_rate": 3.707482993197279e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3270 }, { "completion_length": 136.2857208251953, "epoch": 0.2076825396825397, "grad_norm": 1.2085154056549072, "kl": 0.010113867931067944, "learning_rate": 3.708616780045351e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 3271 }, { "completion_length": 214.7857208251953, "epoch": 0.20774603174603173, "grad_norm": 0.0013501709327101707, "kl": 0.005669513251632452, "learning_rate": 3.709750566893424e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3272 }, { "completion_length": 129.85714721679688, "epoch": 0.2078095238095238, "grad_norm": 0.002358426805585623, "kl": 0.009223360568284988, "learning_rate": 3.7108843537414966e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3273 }, { "completion_length": 178.2857208251953, "epoch": 0.20787301587301588, "grad_norm": 0.0017150165513157845, "kl": 0.006645891349762678, "learning_rate": 3.712018140589569e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3274 }, { "completion_length": 163.71429443359375, "epoch": 0.20793650793650795, "grad_norm": 0.0016758984420448542, "kl": 0.006870416924357414, "learning_rate": 3.7131519274376417e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3275 }, { "completion_length": 149.21429443359375, "epoch": 0.208, "grad_norm": 0.002693570451810956, "kl": 0.00830781739205122, "learning_rate": 3.7142857142857145e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3276 }, { "completion_length": 166.7857208251953, "epoch": 0.20806349206349206, "grad_norm": 0.0029373029246926308, "kl": 0.009348420426249504, "learning_rate": 3.715419501133787e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3277 }, { "completion_length": 156.92857360839844, "epoch": 0.20812698412698413, "grad_norm": 0.001986528979614377, "kl": 0.007745469454675913, "learning_rate": 3.7165532879818596e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3278 }, { "completion_length": 166.71429443359375, "epoch": 0.2081904761904762, "grad_norm": 0.0026651113294065, "kl": 0.007827971130609512, "learning_rate": 3.7176870748299314e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3279 }, { "completion_length": 152.21429443359375, "epoch": 0.20825396825396825, "grad_norm": 0.002701905556023121, "kl": 0.01042179949581623, "learning_rate": 3.718820861678004e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3280 }, { "completion_length": 165.1428680419922, "epoch": 0.20831746031746032, "grad_norm": 0.002544438000768423, "kl": 0.008101828396320343, "learning_rate": 3.719954648526077e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3281 }, { "completion_length": 147.71429443359375, "epoch": 0.2083809523809524, "grad_norm": 0.003179526189342141, "kl": 0.011389871127903461, "learning_rate": 3.7210884353741493e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3282 }, { "completion_length": 164.6428680419922, "epoch": 0.20844444444444443, "grad_norm": 0.0030913071241229773, "kl": 0.008776896633207798, "learning_rate": 3.722222222222222e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3283 }, { "completion_length": 163.21429443359375, "epoch": 0.2085079365079365, "grad_norm": 0.0022372938692569733, "kl": 0.008016828447580338, "learning_rate": 3.723356009070295e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3284 }, { "completion_length": 192.2857208251953, "epoch": 0.20857142857142857, "grad_norm": 0.001818917109631002, "kl": 0.0055623422376811504, "learning_rate": 3.724489795918367e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3285 }, { "completion_length": 207.42857360839844, "epoch": 0.20863492063492065, "grad_norm": 0.8911441564559937, "kl": 0.006887597497552633, "learning_rate": 3.7256235827664395e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 3286 }, { "completion_length": 161.0, "epoch": 0.2086984126984127, "grad_norm": 0.0027770812157541513, "kl": 0.008355935104191303, "learning_rate": 3.7267573696145123e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3287 }, { "completion_length": 132.5, "epoch": 0.20876190476190476, "grad_norm": 0.004316702019423246, "kl": 0.013527022674679756, "learning_rate": 3.7278911564625846e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3288 }, { "completion_length": 188.50001525878906, "epoch": 0.20882539682539683, "grad_norm": 0.0023752341512590647, "kl": 0.007854822091758251, "learning_rate": 3.7290249433106574e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3289 }, { "completion_length": 173.1428680419922, "epoch": 0.2088888888888889, "grad_norm": 0.002692881040275097, "kl": 0.008191492408514023, "learning_rate": 3.73015873015873e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3290 }, { "completion_length": 184.6428680419922, "epoch": 0.20895238095238095, "grad_norm": 0.0021227034740149975, "kl": 0.0067642987705767155, "learning_rate": 3.7312925170068025e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3291 }, { "completion_length": 147.42857360839844, "epoch": 0.20901587301587302, "grad_norm": 0.0028747590258717537, "kl": 0.00810211431235075, "learning_rate": 3.7324263038548754e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3292 }, { "completion_length": 135.6428680419922, "epoch": 0.2090793650793651, "grad_norm": 0.004783681593835354, "kl": 0.0131558608263731, "learning_rate": 3.7335600907029477e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3293 }, { "completion_length": 164.0, "epoch": 0.20914285714285713, "grad_norm": 0.0031825427431613207, "kl": 0.008803836070001125, "learning_rate": 3.73469387755102e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3294 }, { "completion_length": 130.7857208251953, "epoch": 0.2092063492063492, "grad_norm": 0.00395578658208251, "kl": 0.011668404564261436, "learning_rate": 3.735827664399093e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3295 }, { "completion_length": 158.21429443359375, "epoch": 0.20926984126984127, "grad_norm": 0.0033101015724241734, "kl": 0.011299828067421913, "learning_rate": 3.7369614512471656e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3296 }, { "completion_length": 156.5, "epoch": 0.20933333333333334, "grad_norm": 0.9689010381698608, "kl": 0.010402386076748371, "learning_rate": 3.738095238095238e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 3297 }, { "completion_length": 162.0, "epoch": 0.2093968253968254, "grad_norm": 0.0028599007055163383, "kl": 0.008467075414955616, "learning_rate": 3.7392290249433107e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3298 }, { "completion_length": 161.85714721679688, "epoch": 0.20946031746031746, "grad_norm": 0.003783801570534706, "kl": 0.008920878171920776, "learning_rate": 3.740362811791383e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3299 }, { "completion_length": 196.57144165039062, "epoch": 0.20952380952380953, "grad_norm": 0.003967135678976774, "kl": 0.011683070100843906, "learning_rate": 3.741496598639456e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3300 }, { "completion_length": 158.0, "epoch": 0.2095873015873016, "grad_norm": 0.006716459058225155, "kl": 0.018932830542325974, "learning_rate": 3.742630385487528e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3301 }, { "completion_length": 161.42857360839844, "epoch": 0.20965079365079364, "grad_norm": 0.004992369096726179, "kl": 0.014097554609179497, "learning_rate": 3.7437641723356004e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3302 }, { "completion_length": 143.07144165039062, "epoch": 0.20971428571428571, "grad_norm": 0.0033724764361977577, "kl": 0.009623396210372448, "learning_rate": 3.744897959183673e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3303 }, { "completion_length": 170.92857360839844, "epoch": 0.20977777777777779, "grad_norm": 0.0029041492380201817, "kl": 0.010323608294129372, "learning_rate": 3.746031746031746e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3304 }, { "completion_length": 161.92857360839844, "epoch": 0.20984126984126983, "grad_norm": 0.003551553236320615, "kl": 0.009063197299838066, "learning_rate": 3.7471655328798183e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3305 }, { "completion_length": 169.07144165039062, "epoch": 0.2099047619047619, "grad_norm": 0.0034606854896992445, "kl": 0.009571433067321777, "learning_rate": 3.748299319727891e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3306 }, { "completion_length": 149.1428680419922, "epoch": 0.20996825396825397, "grad_norm": 0.005311236251145601, "kl": 0.014992659911513329, "learning_rate": 3.749433106575964e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3307 }, { "completion_length": 168.6428680419922, "epoch": 0.21003174603174604, "grad_norm": 0.0042512803338468075, "kl": 0.01280149258673191, "learning_rate": 3.7505668934240357e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3308 }, { "completion_length": 157.7857208251953, "epoch": 0.21009523809523808, "grad_norm": 0.004040715750306845, "kl": 0.012041048146784306, "learning_rate": 3.7517006802721085e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3309 }, { "completion_length": 138.92857360839844, "epoch": 0.21015873015873016, "grad_norm": 0.004638755694031715, "kl": 0.015232237987220287, "learning_rate": 3.7528344671201813e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3310 }, { "completion_length": 151.71429443359375, "epoch": 0.21022222222222223, "grad_norm": 0.00526680052280426, "kl": 0.01615353673696518, "learning_rate": 3.7539682539682536e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3311 }, { "completion_length": 166.21429443359375, "epoch": 0.2102857142857143, "grad_norm": 1.0254333019256592, "kl": 0.00814261008054018, "learning_rate": 3.7551020408163265e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 3312 }, { "completion_length": 170.1428680419922, "epoch": 0.21034920634920634, "grad_norm": 0.0033584001939743757, "kl": 0.011689756996929646, "learning_rate": 3.7562358276643993e-07, "loss": 0.0, "reward": 0.1428571492433548, "reward_std": 0.0, "rewards/check_originality_func": 0.1428571492433548, "step": 3313 }, { "completion_length": 157.21429443359375, "epoch": 0.2104126984126984, "grad_norm": 0.006196616217494011, "kl": 0.017055781558156013, "learning_rate": 3.7573696145124716e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3314 }, { "completion_length": 171.50001525878906, "epoch": 0.21047619047619048, "grad_norm": 0.0036421276163309813, "kl": 0.011447343043982983, "learning_rate": 3.758503401360544e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3315 }, { "completion_length": 141.6428680419922, "epoch": 0.21053968253968253, "grad_norm": 0.004936554469168186, "kl": 0.01624453440308571, "learning_rate": 3.7596371882086167e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3316 }, { "completion_length": 144.85714721679688, "epoch": 0.2106031746031746, "grad_norm": 0.005170873366296291, "kl": 0.021882956847548485, "learning_rate": 3.760770975056689e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3317 }, { "completion_length": 164.21429443359375, "epoch": 0.21066666666666667, "grad_norm": 0.005235264077782631, "kl": 0.01754939928650856, "learning_rate": 3.761904761904762e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3318 }, { "completion_length": 145.21429443359375, "epoch": 0.21073015873015874, "grad_norm": 0.00588140869513154, "kl": 0.01949804089963436, "learning_rate": 3.763038548752834e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3319 }, { "completion_length": 133.6428680419922, "epoch": 0.21079365079365078, "grad_norm": 0.005154197104275227, "kl": 0.01816120557487011, "learning_rate": 3.764172335600907e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3320 }, { "completion_length": 158.57144165039062, "epoch": 0.21085714285714285, "grad_norm": 0.004724254831671715, "kl": 0.017267411574721336, "learning_rate": 3.7653061224489797e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3321 }, { "completion_length": 159.92857360839844, "epoch": 0.21092063492063493, "grad_norm": 0.0064423480071127415, "kl": 0.0201431717723608, "learning_rate": 3.766439909297052e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3322 }, { "completion_length": 174.92857360839844, "epoch": 0.210984126984127, "grad_norm": 0.004678533878177404, "kl": 0.017280062660574913, "learning_rate": 3.7675736961451243e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3323 }, { "completion_length": 165.21429443359375, "epoch": 0.21104761904761904, "grad_norm": 0.0055427588522434235, "kl": 0.018317092210054398, "learning_rate": 3.768707482993197e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3324 }, { "completion_length": 139.2857208251953, "epoch": 0.2111111111111111, "grad_norm": 0.005854194052517414, "kl": 0.02077076956629753, "learning_rate": 3.7698412698412694e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3325 }, { "completion_length": 177.1428680419922, "epoch": 0.21117460317460318, "grad_norm": 0.004096685908734798, "kl": 0.014982430264353752, "learning_rate": 3.770975056689342e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3326 }, { "completion_length": 191.35714721679688, "epoch": 0.21123809523809522, "grad_norm": 0.004375983029603958, "kl": 0.01572563871741295, "learning_rate": 3.772108843537415e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3327 }, { "completion_length": 126.5714340209961, "epoch": 0.2113015873015873, "grad_norm": 0.006495606154203415, "kl": 0.025768842548131943, "learning_rate": 3.7732426303854873e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3328 }, { "completion_length": 163.07144165039062, "epoch": 0.21136507936507937, "grad_norm": 0.005192522890865803, "kl": 0.015165051445364952, "learning_rate": 3.77437641723356e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3329 }, { "completion_length": 166.07144165039062, "epoch": 0.21142857142857144, "grad_norm": 0.004714954178780317, "kl": 0.014816636219620705, "learning_rate": 3.7755102040816324e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3330 }, { "completion_length": 152.2857208251953, "epoch": 0.21149206349206348, "grad_norm": 0.004076559096574783, "kl": 0.01599397137761116, "learning_rate": 3.7766439909297047e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3331 }, { "completion_length": 198.6428680419922, "epoch": 0.21155555555555555, "grad_norm": 0.003806333290413022, "kl": 0.012495594099164009, "learning_rate": 3.7777777777777775e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3332 }, { "completion_length": 171.50001525878906, "epoch": 0.21161904761904762, "grad_norm": 0.006713295821100473, "kl": 0.01985100656747818, "learning_rate": 3.7789115646258504e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3333 }, { "completion_length": 116.50000762939453, "epoch": 0.2116825396825397, "grad_norm": 0.009111211635172367, "kl": 0.03372190147638321, "learning_rate": 3.7800453514739227e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3334 }, { "completion_length": 185.07144165039062, "epoch": 0.21174603174603174, "grad_norm": 0.004039494786411524, "kl": 0.01515294425189495, "learning_rate": 3.7811791383219955e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3335 }, { "completion_length": 138.21429443359375, "epoch": 0.2118095238095238, "grad_norm": 0.005922708660364151, "kl": 0.022121421992778778, "learning_rate": 3.7823129251700683e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3336 }, { "completion_length": 168.42857360839844, "epoch": 0.21187301587301588, "grad_norm": 0.0031333714723587036, "kl": 0.01132325641810894, "learning_rate": 3.78344671201814e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3337 }, { "completion_length": 171.2857208251953, "epoch": 0.21193650793650792, "grad_norm": 0.0050798035226762295, "kl": 0.018635593354701996, "learning_rate": 3.784580498866213e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3338 }, { "completion_length": 161.6428680419922, "epoch": 0.212, "grad_norm": 0.005004465114325285, "kl": 0.017694612964987755, "learning_rate": 3.785714285714285e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3339 }, { "completion_length": 170.07144165039062, "epoch": 0.21206349206349207, "grad_norm": 0.006374760996550322, "kl": 0.019322210922837257, "learning_rate": 3.786848072562358e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3340 }, { "completion_length": 178.92857360839844, "epoch": 0.21212698412698414, "grad_norm": 0.0034359735436737537, "kl": 0.013658897951245308, "learning_rate": 3.787981859410431e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3341 }, { "completion_length": 127.00000762939453, "epoch": 0.21219047619047618, "grad_norm": 0.0066543761640787125, "kl": 0.02338145487010479, "learning_rate": 3.789115646258503e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3342 }, { "completion_length": 185.6428680419922, "epoch": 0.21225396825396825, "grad_norm": 0.00452566659078002, "kl": 0.017290901392698288, "learning_rate": 3.790249433106576e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3343 }, { "completion_length": 133.21429443359375, "epoch": 0.21231746031746032, "grad_norm": 0.005399390123784542, "kl": 0.020895665511488914, "learning_rate": 3.7913832199546487e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3344 }, { "completion_length": 198.2857208251953, "epoch": 0.2123809523809524, "grad_norm": 0.0030536383856087923, "kl": 0.010948465205729008, "learning_rate": 3.7925170068027205e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3345 }, { "completion_length": 172.2857208251953, "epoch": 0.21244444444444444, "grad_norm": 0.005373932886868715, "kl": 0.017860624939203262, "learning_rate": 3.7936507936507933e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3346 }, { "completion_length": 174.1428680419922, "epoch": 0.2125079365079365, "grad_norm": 0.006104875821620226, "kl": 0.018609443679451942, "learning_rate": 3.794784580498866e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3347 }, { "completion_length": 147.42857360839844, "epoch": 0.21257142857142858, "grad_norm": 0.004294203594326973, "kl": 0.019474472850561142, "learning_rate": 3.7959183673469384e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3348 }, { "completion_length": 184.85714721679688, "epoch": 0.21263492063492062, "grad_norm": 0.005702809896320105, "kl": 0.01929188147187233, "learning_rate": 3.797052154195011e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3349 }, { "completion_length": 175.50001525878906, "epoch": 0.2126984126984127, "grad_norm": 0.005353765562176704, "kl": 0.017817704007029533, "learning_rate": 3.798185941043084e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3350 }, { "completion_length": 215.50001525878906, "epoch": 0.21276190476190476, "grad_norm": 0.002572458703070879, "kl": 0.009010287001729012, "learning_rate": 3.7993197278911563e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3351 }, { "completion_length": 166.2857208251953, "epoch": 0.21282539682539683, "grad_norm": 0.0033361134119331837, "kl": 0.01314091868698597, "learning_rate": 3.8004535147392286e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3352 }, { "completion_length": 170.92857360839844, "epoch": 0.21288888888888888, "grad_norm": 0.0032663634046912193, "kl": 0.012467997148633003, "learning_rate": 3.8015873015873015e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3353 }, { "completion_length": 238.7857208251953, "epoch": 0.21295238095238095, "grad_norm": 0.0025282881688326597, "kl": 0.009009156376123428, "learning_rate": 3.802721088435374e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3354 }, { "completion_length": 156.5, "epoch": 0.21301587301587302, "grad_norm": 0.003976513631641865, "kl": 0.015253211371600628, "learning_rate": 3.8038548752834466e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3355 }, { "completion_length": 157.35714721679688, "epoch": 0.2130793650793651, "grad_norm": 0.004365206230431795, "kl": 0.01616206206381321, "learning_rate": 3.8049886621315194e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3356 }, { "completion_length": 107.71428680419922, "epoch": 0.21314285714285713, "grad_norm": 0.007195096462965012, "kl": 0.024977875873446465, "learning_rate": 3.8061224489795917e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3357 }, { "completion_length": 167.1428680419922, "epoch": 0.2132063492063492, "grad_norm": 0.003257203381508589, "kl": 0.009731126949191093, "learning_rate": 3.8072562358276645e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3358 }, { "completion_length": 199.1428680419922, "epoch": 0.21326984126984128, "grad_norm": 0.004068186040967703, "kl": 0.012466575019061565, "learning_rate": 3.8083900226757373e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3359 }, { "completion_length": 180.00001525878906, "epoch": 0.21333333333333335, "grad_norm": 0.004653573036193848, "kl": 0.013615493662655354, "learning_rate": 3.809523809523809e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3360 }, { "completion_length": 158.5, "epoch": 0.2133968253968254, "grad_norm": 0.0038789997342973948, "kl": 0.014501957222819328, "learning_rate": 3.810657596371882e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3361 }, { "completion_length": 149.1428680419922, "epoch": 0.21346031746031746, "grad_norm": 0.004279535263776779, "kl": 0.015775425359606743, "learning_rate": 3.811791383219954e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3362 }, { "completion_length": 141.07144165039062, "epoch": 0.21352380952380953, "grad_norm": 0.003655862296000123, "kl": 0.013266571797430515, "learning_rate": 3.812925170068027e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3363 }, { "completion_length": 159.07144165039062, "epoch": 0.21358730158730158, "grad_norm": 0.003633856074884534, "kl": 0.013578519225120544, "learning_rate": 3.8140589569161e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3364 }, { "completion_length": 146.85714721679688, "epoch": 0.21365079365079365, "grad_norm": 0.005544246640056372, "kl": 0.017857443541288376, "learning_rate": 3.815192743764172e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3365 }, { "completion_length": 156.07144165039062, "epoch": 0.21371428571428572, "grad_norm": 0.006265632808208466, "kl": 0.0194225013256073, "learning_rate": 3.816326530612245e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3366 }, { "completion_length": 157.1428680419922, "epoch": 0.2137777777777778, "grad_norm": 0.0035076679196208715, "kl": 0.010844370350241661, "learning_rate": 3.817460317460317e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3367 }, { "completion_length": 170.7857208251953, "epoch": 0.21384126984126983, "grad_norm": 0.003750123782083392, "kl": 0.012075978331267834, "learning_rate": 3.8185941043083895e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3368 }, { "completion_length": 156.7857208251953, "epoch": 0.2139047619047619, "grad_norm": 0.0045159123837947845, "kl": 0.014349542558193207, "learning_rate": 3.8197278911564623e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3369 }, { "completion_length": 185.35714721679688, "epoch": 0.21396825396825397, "grad_norm": 0.002968790242448449, "kl": 0.010938521474599838, "learning_rate": 3.820861678004535e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3370 }, { "completion_length": 157.5, "epoch": 0.21403174603174605, "grad_norm": 0.004876354709267616, "kl": 0.0168004147708416, "learning_rate": 3.8219954648526074e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3371 }, { "completion_length": 199.85714721679688, "epoch": 0.2140952380952381, "grad_norm": 0.0030902300495654345, "kl": 0.009780635125935078, "learning_rate": 3.82312925170068e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3372 }, { "completion_length": 135.7857208251953, "epoch": 0.21415873015873016, "grad_norm": 0.008443031460046768, "kl": 0.023485517129302025, "learning_rate": 3.824263038548753e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3373 }, { "completion_length": 165.0, "epoch": 0.21422222222222223, "grad_norm": 0.0038239595014601946, "kl": 0.012107540853321552, "learning_rate": 3.825396825396825e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3374 }, { "completion_length": 155.5, "epoch": 0.21428571428571427, "grad_norm": 0.005354024935513735, "kl": 0.017169857397675514, "learning_rate": 3.8265306122448977e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3375 }, { "completion_length": 150.42857360839844, "epoch": 0.21434920634920634, "grad_norm": 1.2722957134246826, "kl": 0.019818885251879692, "learning_rate": 3.8276643990929705e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 3376 }, { "completion_length": 125.35714721679688, "epoch": 0.21441269841269842, "grad_norm": 0.005804709158837795, "kl": 0.018188484013080597, "learning_rate": 3.828798185941043e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3377 }, { "completion_length": 171.1428680419922, "epoch": 0.2144761904761905, "grad_norm": 0.0036175153218209743, "kl": 0.012394582852721214, "learning_rate": 3.8299319727891156e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3378 }, { "completion_length": 156.42857360839844, "epoch": 0.21453968253968253, "grad_norm": 0.00442334171384573, "kl": 0.013780293054878712, "learning_rate": 3.8310657596371884e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3379 }, { "completion_length": 156.2857208251953, "epoch": 0.2146031746031746, "grad_norm": 0.003442096523940563, "kl": 0.010024753399193287, "learning_rate": 3.8321995464852607e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3380 }, { "completion_length": 173.6428680419922, "epoch": 0.21466666666666667, "grad_norm": 0.0053865560330450535, "kl": 0.01740862801671028, "learning_rate": 3.8333333333333335e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3381 }, { "completion_length": 198.6428680419922, "epoch": 0.21473015873015874, "grad_norm": 0.004359483253210783, "kl": 0.011003450490534306, "learning_rate": 3.8344671201814053e-07, "loss": 0.0, "reward": 0.1428571492433548, "reward_std": 0.0, "rewards/check_originality_func": 0.1428571492433548, "step": 3382 }, { "completion_length": 177.50001525878906, "epoch": 0.2147936507936508, "grad_norm": 0.0059005627408623695, "kl": 0.017960987985134125, "learning_rate": 3.835600907029478e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3383 }, { "completion_length": 162.6428680419922, "epoch": 0.21485714285714286, "grad_norm": 0.004787123762071133, "kl": 0.014233985915780067, "learning_rate": 3.836734693877551e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3384 }, { "completion_length": 171.2857208251953, "epoch": 0.21492063492063493, "grad_norm": 0.005082457326352596, "kl": 0.017446763813495636, "learning_rate": 3.837868480725623e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3385 }, { "completion_length": 165.85714721679688, "epoch": 0.21498412698412697, "grad_norm": 0.0056681642308831215, "kl": 0.017162814736366272, "learning_rate": 3.839002267573696e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3386 }, { "completion_length": 152.21429443359375, "epoch": 0.21504761904761904, "grad_norm": 0.005233559757471085, "kl": 0.01869221031665802, "learning_rate": 3.840136054421769e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3387 }, { "completion_length": 190.92857360839844, "epoch": 0.21511111111111111, "grad_norm": 0.006153958383947611, "kl": 0.01427374966442585, "learning_rate": 3.841269841269841e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3388 }, { "completion_length": 176.07144165039062, "epoch": 0.21517460317460319, "grad_norm": 0.004906437359750271, "kl": 0.01564779132604599, "learning_rate": 3.8424036281179134e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3389 }, { "completion_length": 156.0, "epoch": 0.21523809523809523, "grad_norm": 0.004567870404571295, "kl": 0.016200244426727295, "learning_rate": 3.843537414965986e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3390 }, { "completion_length": 178.50001525878906, "epoch": 0.2153015873015873, "grad_norm": 0.006658990401774645, "kl": 0.018564565107226372, "learning_rate": 3.8446712018140585e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3391 }, { "completion_length": 180.35714721679688, "epoch": 0.21536507936507937, "grad_norm": 0.0043230196461081505, "kl": 0.014905856922268867, "learning_rate": 3.8458049886621313e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3392 }, { "completion_length": 185.50001525878906, "epoch": 0.21542857142857144, "grad_norm": 0.005826582200825214, "kl": 0.019643431529402733, "learning_rate": 3.846938775510204e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3393 }, { "completion_length": 152.42857360839844, "epoch": 0.21549206349206348, "grad_norm": 0.007806348614394665, "kl": 0.025070633739233017, "learning_rate": 3.8480725623582765e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3394 }, { "completion_length": 199.1428680419922, "epoch": 0.21555555555555556, "grad_norm": 0.004684616811573505, "kl": 0.014601258561015129, "learning_rate": 3.8492063492063493e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3395 }, { "completion_length": 165.35714721679688, "epoch": 0.21561904761904763, "grad_norm": 0.004833524581044912, "kl": 0.016242584213614464, "learning_rate": 3.8503401360544216e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3396 }, { "completion_length": 139.5, "epoch": 0.21568253968253967, "grad_norm": 1.190017580986023, "kl": 0.035876061767339706, "learning_rate": 3.851473922902494e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 3397 }, { "completion_length": 153.2857208251953, "epoch": 0.21574603174603174, "grad_norm": 0.004611271899193525, "kl": 0.016704851761460304, "learning_rate": 3.8526077097505667e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3398 }, { "completion_length": 190.42857360839844, "epoch": 0.2158095238095238, "grad_norm": 0.0038769494276493788, "kl": 0.013481048867106438, "learning_rate": 3.8537414965986395e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3399 }, { "completion_length": 158.57144165039062, "epoch": 0.21587301587301588, "grad_norm": 0.004672067239880562, "kl": 0.017943069338798523, "learning_rate": 3.854875283446712e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3400 }, { "completion_length": 211.1428680419922, "epoch": 0.21593650793650793, "grad_norm": 0.00303299049846828, "kl": 0.011685064993798733, "learning_rate": 3.8560090702947846e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3401 }, { "completion_length": 130.6428680419922, "epoch": 0.216, "grad_norm": 0.007313009351491928, "kl": 0.0224775243550539, "learning_rate": 3.857142857142857e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3402 }, { "completion_length": 140.35714721679688, "epoch": 0.21606349206349207, "grad_norm": 0.004038992337882519, "kl": 0.014841392636299133, "learning_rate": 3.8582766439909297e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3403 }, { "completion_length": 182.92857360839844, "epoch": 0.21612698412698414, "grad_norm": 1.1634676456451416, "kl": 0.015169136226177216, "learning_rate": 3.859410430839002e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 3404 }, { "completion_length": 182.1428680419922, "epoch": 0.21619047619047618, "grad_norm": 0.0031955421436578035, "kl": 0.012800666503608227, "learning_rate": 3.8605442176870743e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3405 }, { "completion_length": 177.71429443359375, "epoch": 0.21625396825396825, "grad_norm": 0.004733784589916468, "kl": 0.01396738737821579, "learning_rate": 3.861678004535147e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3406 }, { "completion_length": 178.50001525878906, "epoch": 0.21631746031746032, "grad_norm": 0.00516427680850029, "kl": 0.01665942743420601, "learning_rate": 3.86281179138322e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3407 }, { "completion_length": 155.92857360839844, "epoch": 0.21638095238095237, "grad_norm": 0.003957631066441536, "kl": 0.01575004868209362, "learning_rate": 3.863945578231292e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3408 }, { "completion_length": 160.5, "epoch": 0.21644444444444444, "grad_norm": 0.006742074154317379, "kl": 0.025234024971723557, "learning_rate": 3.865079365079365e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3409 }, { "completion_length": 179.21429443359375, "epoch": 0.2165079365079365, "grad_norm": 0.003525980981066823, "kl": 0.012317845597863197, "learning_rate": 3.866213151927438e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3410 }, { "completion_length": 131.57144165039062, "epoch": 0.21657142857142858, "grad_norm": 2.116767168045044, "kl": 0.02112985961139202, "learning_rate": 3.8673469387755096e-07, "loss": 0.0, "reward": 0.1428571492433548, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.1428571492433548, "step": 3411 }, { "completion_length": 170.71429443359375, "epoch": 0.21663492063492062, "grad_norm": 0.004486804362386465, "kl": 0.017776072025299072, "learning_rate": 3.8684807256235824e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3412 }, { "completion_length": 155.42857360839844, "epoch": 0.2166984126984127, "grad_norm": 0.004744645673781633, "kl": 0.019254503771662712, "learning_rate": 3.869614512471655e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3413 }, { "completion_length": 144.07144165039062, "epoch": 0.21676190476190477, "grad_norm": 0.006550414487719536, "kl": 0.02953956462442875, "learning_rate": 3.8707482993197275e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3414 }, { "completion_length": 148.57144165039062, "epoch": 0.21682539682539684, "grad_norm": 0.010779913514852524, "kl": 0.03659670799970627, "learning_rate": 3.8718820861678004e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3415 }, { "completion_length": 152.2857208251953, "epoch": 0.21688888888888888, "grad_norm": 1.1046853065490723, "kl": 0.027668923139572144, "learning_rate": 3.873015873015873e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 3416 }, { "completion_length": 151.5, "epoch": 0.21695238095238095, "grad_norm": 0.005919779185205698, "kl": 0.024559374898672104, "learning_rate": 3.8741496598639455e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3417 }, { "completion_length": 166.71429443359375, "epoch": 0.21701587301587302, "grad_norm": 0.006208932027220726, "kl": 0.026743166148662567, "learning_rate": 3.8752834467120183e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3418 }, { "completion_length": 191.35714721679688, "epoch": 0.21707936507936507, "grad_norm": 0.005728531163185835, "kl": 0.022571569308638573, "learning_rate": 3.87641723356009e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3419 }, { "completion_length": 176.7857208251953, "epoch": 0.21714285714285714, "grad_norm": 0.005161543842405081, "kl": 0.020816797390580177, "learning_rate": 3.877551020408163e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3420 }, { "completion_length": 145.07144165039062, "epoch": 0.2172063492063492, "grad_norm": 0.0065867286175489426, "kl": 0.03513321653008461, "learning_rate": 3.8786848072562357e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3421 }, { "completion_length": 198.85714721679688, "epoch": 0.21726984126984128, "grad_norm": 0.004319347906857729, "kl": 0.02454865165054798, "learning_rate": 3.879818594104308e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3422 }, { "completion_length": 155.92857360839844, "epoch": 0.21733333333333332, "grad_norm": 1.1894588470458984, "kl": 0.03295975923538208, "learning_rate": 3.880952380952381e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 3423 }, { "completion_length": 199.7857208251953, "epoch": 0.2173968253968254, "grad_norm": 0.006587803829461336, "kl": 0.02827247604727745, "learning_rate": 3.8820861678004536e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3424 }, { "completion_length": 151.92857360839844, "epoch": 0.21746031746031746, "grad_norm": 0.006315998733043671, "kl": 0.032529763877391815, "learning_rate": 3.883219954648526e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3425 }, { "completion_length": 160.0, "epoch": 0.21752380952380954, "grad_norm": 0.004664697218686342, "kl": 0.023028673604130745, "learning_rate": 3.884353741496598e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3426 }, { "completion_length": 175.50001525878906, "epoch": 0.21758730158730158, "grad_norm": 0.005986402742564678, "kl": 0.040815211832523346, "learning_rate": 3.885487528344671e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3427 }, { "completion_length": 147.07144165039062, "epoch": 0.21765079365079365, "grad_norm": 0.008012661710381508, "kl": 0.04513530805706978, "learning_rate": 3.8866213151927433e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3428 }, { "completion_length": 195.35714721679688, "epoch": 0.21771428571428572, "grad_norm": 0.004652831237763166, "kl": 0.028902791440486908, "learning_rate": 3.887755102040816e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3429 }, { "completion_length": 164.5, "epoch": 0.21777777777777776, "grad_norm": 0.005465617403388023, "kl": 0.03856587037444115, "learning_rate": 3.888888888888889e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3430 }, { "completion_length": 170.7857208251953, "epoch": 0.21784126984126984, "grad_norm": 0.007967952638864517, "kl": 0.044410113245248795, "learning_rate": 3.890022675736961e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3431 }, { "completion_length": 214.1428680419922, "epoch": 0.2179047619047619, "grad_norm": 0.004472401458770037, "kl": 0.028386926278471947, "learning_rate": 3.891156462585034e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3432 }, { "completion_length": 136.42857360839844, "epoch": 0.21796825396825398, "grad_norm": 0.012551266700029373, "kl": 0.05096183717250824, "learning_rate": 3.8922902494331063e-07, "loss": 0.0001, "reward": 0.1428571492433548, "reward_std": 0.0, "rewards/check_originality_func": 0.1428571492433548, "step": 3433 }, { "completion_length": 169.42857360839844, "epoch": 0.21803174603174602, "grad_norm": 0.0070615001022815704, "kl": 0.04626515135169029, "learning_rate": 3.8934240362811786e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3434 }, { "completion_length": 159.1428680419922, "epoch": 0.2180952380952381, "grad_norm": 0.0064145950600504875, "kl": 0.04223069176077843, "learning_rate": 3.8945578231292515e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3435 }, { "completion_length": 175.21429443359375, "epoch": 0.21815873015873016, "grad_norm": 0.004872153978794813, "kl": 0.03504373878240585, "learning_rate": 3.8956916099773243e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3436 }, { "completion_length": 194.35714721679688, "epoch": 0.21822222222222223, "grad_norm": 0.004595986567437649, "kl": 0.03233521431684494, "learning_rate": 3.8968253968253966e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3437 }, { "completion_length": 121.78572082519531, "epoch": 0.21828571428571428, "grad_norm": 0.006992503069341183, "kl": 0.045070160180330276, "learning_rate": 3.8979591836734694e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3438 }, { "completion_length": 167.57144165039062, "epoch": 0.21834920634920635, "grad_norm": 0.008973920717835426, "kl": 0.044720377773046494, "learning_rate": 3.899092970521542e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3439 }, { "completion_length": 158.21429443359375, "epoch": 0.21841269841269842, "grad_norm": 0.005249807611107826, "kl": 0.04121841862797737, "learning_rate": 3.9002267573696145e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3440 }, { "completion_length": 147.0, "epoch": 0.21847619047619046, "grad_norm": 0.005095441360026598, "kl": 0.03588780015707016, "learning_rate": 3.901360544217687e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3441 }, { "completion_length": 142.92857360839844, "epoch": 0.21853968253968253, "grad_norm": 0.0060784355737268925, "kl": 0.04851063713431358, "learning_rate": 3.902494331065759e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3442 }, { "completion_length": 140.35714721679688, "epoch": 0.2186031746031746, "grad_norm": 0.005495466757565737, "kl": 0.04090728983283043, "learning_rate": 3.903628117913832e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3443 }, { "completion_length": 145.6428680419922, "epoch": 0.21866666666666668, "grad_norm": 0.0073258401826024055, "kl": 0.04633389413356781, "learning_rate": 3.9047619047619047e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3444 }, { "completion_length": 177.00001525878906, "epoch": 0.21873015873015872, "grad_norm": 0.005699151661247015, "kl": 0.038327813148498535, "learning_rate": 3.905895691609977e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3445 }, { "completion_length": 167.92857360839844, "epoch": 0.2187936507936508, "grad_norm": 0.006069890223443508, "kl": 0.043863799422979355, "learning_rate": 3.90702947845805e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3446 }, { "completion_length": 145.57144165039062, "epoch": 0.21885714285714286, "grad_norm": 0.007404864765703678, "kl": 0.04491467773914337, "learning_rate": 3.9081632653061226e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3447 }, { "completion_length": 184.42857360839844, "epoch": 0.21892063492063493, "grad_norm": 0.0057932473719120026, "kl": 0.04039464518427849, "learning_rate": 3.9092970521541944e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3448 }, { "completion_length": 121.14286041259766, "epoch": 0.21898412698412698, "grad_norm": 0.005846845917403698, "kl": 0.052789244800806046, "learning_rate": 3.910430839002267e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3449 }, { "completion_length": 131.21429443359375, "epoch": 0.21904761904761905, "grad_norm": 0.006942493841052055, "kl": 0.05618412047624588, "learning_rate": 3.91156462585034e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3450 }, { "completion_length": 173.00001525878906, "epoch": 0.21911111111111112, "grad_norm": 0.0055110217072069645, "kl": 0.036913927644491196, "learning_rate": 3.9126984126984123e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3451 }, { "completion_length": 166.2857208251953, "epoch": 0.21917460317460316, "grad_norm": 0.005153012927621603, "kl": 0.03453752025961876, "learning_rate": 3.913832199546485e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3452 }, { "completion_length": 158.35714721679688, "epoch": 0.21923809523809523, "grad_norm": 0.01010327972471714, "kl": 0.04693221300840378, "learning_rate": 3.914965986394558e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3453 }, { "completion_length": 151.35714721679688, "epoch": 0.2193015873015873, "grad_norm": 0.008354967460036278, "kl": 0.0611598938703537, "learning_rate": 3.91609977324263e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3454 }, { "completion_length": 156.57144165039062, "epoch": 0.21936507936507937, "grad_norm": 1.7207175493240356, "kl": 0.0479804091155529, "learning_rate": 3.9172335600907025e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 3455 }, { "completion_length": 161.2857208251953, "epoch": 0.21942857142857142, "grad_norm": 0.005790341645479202, "kl": 0.03887542709708214, "learning_rate": 3.9183673469387754e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3456 }, { "completion_length": 168.85714721679688, "epoch": 0.2194920634920635, "grad_norm": 0.005136099178344011, "kl": 0.03733950853347778, "learning_rate": 3.9195011337868477e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3457 }, { "completion_length": 168.71429443359375, "epoch": 0.21955555555555556, "grad_norm": 0.005915784277021885, "kl": 0.040788616985082626, "learning_rate": 3.9206349206349205e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3458 }, { "completion_length": 146.92857360839844, "epoch": 0.21961904761904763, "grad_norm": 0.005820630118250847, "kl": 0.05522412806749344, "learning_rate": 3.9217687074829933e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3459 }, { "completion_length": 183.57144165039062, "epoch": 0.21968253968253967, "grad_norm": 0.9504691958427429, "kl": 0.03843296319246292, "learning_rate": 3.9229024943310656e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 3460 }, { "completion_length": 182.85714721679688, "epoch": 0.21974603174603174, "grad_norm": 0.6708554625511169, "kl": 0.038836654275655746, "learning_rate": 3.9240362811791384e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 3461 }, { "completion_length": 162.0, "epoch": 0.21980952380952382, "grad_norm": 0.007899495773017406, "kl": 0.05981964245438576, "learning_rate": 3.9251700680272107e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3462 }, { "completion_length": 203.1428680419922, "epoch": 0.2198730158730159, "grad_norm": 0.006582856643944979, "kl": 0.05335047096014023, "learning_rate": 3.926303854875283e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3463 }, { "completion_length": 143.71429443359375, "epoch": 0.21993650793650793, "grad_norm": 0.007352880667895079, "kl": 0.06744575500488281, "learning_rate": 3.927437641723356e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3464 }, { "completion_length": 174.07144165039062, "epoch": 0.22, "grad_norm": 0.005892169661819935, "kl": 0.04405824840068817, "learning_rate": 3.928571428571428e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3465 }, { "completion_length": 135.6428680419922, "epoch": 0.22006349206349207, "grad_norm": 0.011900011450052261, "kl": 0.09864254295825958, "learning_rate": 3.929705215419501e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3466 }, { "completion_length": 170.7857208251953, "epoch": 0.22012698412698412, "grad_norm": 0.009229999966919422, "kl": 0.08125584572553635, "learning_rate": 3.9308390022675737e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3467 }, { "completion_length": 203.7857208251953, "epoch": 0.2201904761904762, "grad_norm": 0.9740623831748962, "kl": 0.06540136784315109, "learning_rate": 3.931972789115646e-07, "loss": 0.0001, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 3468 }, { "completion_length": 139.92857360839844, "epoch": 0.22025396825396826, "grad_norm": 0.011301115155220032, "kl": 0.09738267213106155, "learning_rate": 3.933106575963719e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3469 }, { "completion_length": 169.21429443359375, "epoch": 0.22031746031746033, "grad_norm": 1.3932543992996216, "kl": 0.06381605565547943, "learning_rate": 3.934240362811791e-07, "loss": 0.0001, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 3470 }, { "completion_length": 178.71429443359375, "epoch": 0.22038095238095237, "grad_norm": 1.3892265558242798, "kl": 0.0710182711482048, "learning_rate": 3.9353741496598634e-07, "loss": 0.0001, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 3471 }, { "completion_length": 162.2857208251953, "epoch": 0.22044444444444444, "grad_norm": 0.009256933815777302, "kl": 0.08810137212276459, "learning_rate": 3.936507936507936e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3472 }, { "completion_length": 189.85714721679688, "epoch": 0.22050793650793651, "grad_norm": 0.010562836192548275, "kl": 0.0889853909611702, "learning_rate": 3.937641723356009e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3473 }, { "completion_length": 160.6428680419922, "epoch": 0.22057142857142858, "grad_norm": 0.01306145079433918, "kl": 0.11614996194839478, "learning_rate": 3.9387755102040813e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3474 }, { "completion_length": 182.85714721679688, "epoch": 0.22063492063492063, "grad_norm": 0.010927039198577404, "kl": 0.0845334455370903, "learning_rate": 3.939909297052154e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3475 }, { "completion_length": 150.85714721679688, "epoch": 0.2206984126984127, "grad_norm": 0.0132441446185112, "kl": 0.09459536522626877, "learning_rate": 3.941043083900227e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3476 }, { "completion_length": 174.50001525878906, "epoch": 0.22076190476190477, "grad_norm": 0.012262988835573196, "kl": 0.09711245447397232, "learning_rate": 3.9421768707482993e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3477 }, { "completion_length": 131.2857208251953, "epoch": 0.2208253968253968, "grad_norm": 0.016084512695670128, "kl": 0.1389535516500473, "learning_rate": 3.9433106575963716e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3478 }, { "completion_length": 140.57144165039062, "epoch": 0.22088888888888888, "grad_norm": 0.015168520621955395, "kl": 0.11560685932636261, "learning_rate": 3.9444444444444444e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3479 }, { "completion_length": 170.0, "epoch": 0.22095238095238096, "grad_norm": 1.204156517982483, "kl": 0.10190708935260773, "learning_rate": 3.9455782312925167e-07, "loss": 0.0001, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 3480 }, { "completion_length": 193.1428680419922, "epoch": 0.22101587301587303, "grad_norm": 0.012402132153511047, "kl": 0.11368995904922485, "learning_rate": 3.9467120181405895e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3481 }, { "completion_length": 146.57144165039062, "epoch": 0.22107936507936507, "grad_norm": 0.010930124670267105, "kl": 0.10912096500396729, "learning_rate": 3.947845804988662e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3482 }, { "completion_length": 149.0, "epoch": 0.22114285714285714, "grad_norm": 0.009243253618478775, "kl": 0.08640248328447342, "learning_rate": 3.9489795918367346e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3483 }, { "completion_length": 194.07144165039062, "epoch": 0.2212063492063492, "grad_norm": 0.007500326260924339, "kl": 0.0746895894408226, "learning_rate": 3.9501133786848074e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3484 }, { "completion_length": 193.07144165039062, "epoch": 0.22126984126984128, "grad_norm": 0.00640144245699048, "kl": 0.06356852501630783, "learning_rate": 3.951247165532879e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3485 }, { "completion_length": 169.85714721679688, "epoch": 0.22133333333333333, "grad_norm": 0.00691219512373209, "kl": 0.06953619420528412, "learning_rate": 3.952380952380952e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3486 }, { "completion_length": 175.42857360839844, "epoch": 0.2213968253968254, "grad_norm": 0.005830196663737297, "kl": 0.06063978001475334, "learning_rate": 3.953514739229025e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3487 }, { "completion_length": 182.6428680419922, "epoch": 0.22146031746031747, "grad_norm": 0.006743031088262796, "kl": 0.08516092598438263, "learning_rate": 3.954648526077097e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3488 }, { "completion_length": 148.42857360839844, "epoch": 0.2215238095238095, "grad_norm": 0.006396356038749218, "kl": 0.06289398670196533, "learning_rate": 3.95578231292517e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3489 }, { "completion_length": 158.57144165039062, "epoch": 0.22158730158730158, "grad_norm": 1.0222513675689697, "kl": 0.07027941197156906, "learning_rate": 3.956916099773243e-07, "loss": 0.0001, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 3490 }, { "completion_length": 172.2857208251953, "epoch": 0.22165079365079365, "grad_norm": 0.007008370943367481, "kl": 0.05804221332073212, "learning_rate": 3.958049886621315e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3491 }, { "completion_length": 193.1428680419922, "epoch": 0.22171428571428572, "grad_norm": 0.004495983477681875, "kl": 0.04006683826446533, "learning_rate": 3.9591836734693873e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3492 }, { "completion_length": 164.5, "epoch": 0.22177777777777777, "grad_norm": 0.007592245005071163, "kl": 0.07498987019062042, "learning_rate": 3.96031746031746e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3493 }, { "completion_length": 135.42857360839844, "epoch": 0.22184126984126984, "grad_norm": 0.007415971253067255, "kl": 0.07283417135477066, "learning_rate": 3.9614512471655324e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3494 }, { "completion_length": 190.6428680419922, "epoch": 0.2219047619047619, "grad_norm": 0.0042811306193470955, "kl": 0.04353242367506027, "learning_rate": 3.962585034013605e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3495 }, { "completion_length": 168.92857360839844, "epoch": 0.22196825396825398, "grad_norm": 0.005507079418748617, "kl": 0.047600146383047104, "learning_rate": 3.963718820861678e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3496 }, { "completion_length": 144.6428680419922, "epoch": 0.22203174603174602, "grad_norm": 0.005178383551537991, "kl": 0.048797644674777985, "learning_rate": 3.9648526077097504e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3497 }, { "completion_length": 165.07144165039062, "epoch": 0.2220952380952381, "grad_norm": 0.0056646838784217834, "kl": 0.057920902967453, "learning_rate": 3.965986394557823e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3498 }, { "completion_length": 167.7857208251953, "epoch": 0.22215873015873017, "grad_norm": 0.006228437647223473, "kl": 0.05207445099949837, "learning_rate": 3.967120181405896e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3499 }, { "completion_length": 182.21429443359375, "epoch": 0.2222222222222222, "grad_norm": 0.005362620577216148, "kl": 0.05811595171689987, "learning_rate": 3.968253968253968e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3500 }, { "completion_length": 173.6428680419922, "epoch": 0.22228571428571428, "grad_norm": 1.5410120487213135, "kl": 0.041332051157951355, "learning_rate": 3.9693877551020406e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 3501 }, { "completion_length": 169.5, "epoch": 0.22234920634920635, "grad_norm": 0.005296550691127777, "kl": 0.04736564680933952, "learning_rate": 3.970521541950113e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3502 }, { "completion_length": 136.71429443359375, "epoch": 0.22241269841269842, "grad_norm": 0.014606683515012264, "kl": 0.08074287325143814, "learning_rate": 3.9716553287981857e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3503 }, { "completion_length": 154.1428680419922, "epoch": 0.22247619047619047, "grad_norm": 0.005847502965480089, "kl": 0.05853849649429321, "learning_rate": 3.9727891156462585e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3504 }, { "completion_length": 159.85714721679688, "epoch": 0.22253968253968254, "grad_norm": 0.00518185505643487, "kl": 0.051669683307409286, "learning_rate": 3.973922902494331e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3505 }, { "completion_length": 154.57144165039062, "epoch": 0.2226031746031746, "grad_norm": 0.005591859109699726, "kl": 0.045886196196079254, "learning_rate": 3.9750566893424036e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3506 }, { "completion_length": 181.2857208251953, "epoch": 0.22266666666666668, "grad_norm": 0.005231233313679695, "kl": 0.04471123591065407, "learning_rate": 3.976190476190476e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3507 }, { "completion_length": 161.6428680419922, "epoch": 0.22273015873015872, "grad_norm": 0.8042535781860352, "kl": 0.03970462083816528, "learning_rate": 3.977324263038548e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 3508 }, { "completion_length": 145.7857208251953, "epoch": 0.2227936507936508, "grad_norm": 0.005700518377125263, "kl": 0.06081322953104973, "learning_rate": 3.978458049886621e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3509 }, { "completion_length": 131.35714721679688, "epoch": 0.22285714285714286, "grad_norm": 0.008835838176310062, "kl": 0.0746220201253891, "learning_rate": 3.979591836734694e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3510 }, { "completion_length": 157.7857208251953, "epoch": 0.2229206349206349, "grad_norm": 0.004499827977269888, "kl": 0.04649374634027481, "learning_rate": 3.980725623582766e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3511 }, { "completion_length": 162.7857208251953, "epoch": 0.22298412698412698, "grad_norm": 1.1379107236862183, "kl": 0.03600137308239937, "learning_rate": 3.981859410430839e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 3512 }, { "completion_length": 193.1428680419922, "epoch": 0.22304761904761905, "grad_norm": 0.003741494147107005, "kl": 0.031149614602327347, "learning_rate": 3.982993197278912e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3513 }, { "completion_length": 153.35714721679688, "epoch": 0.22311111111111112, "grad_norm": 0.0059923394583165646, "kl": 0.04501614347100258, "learning_rate": 3.9841269841269835e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3514 }, { "completion_length": 123.35714721679688, "epoch": 0.22317460317460316, "grad_norm": 1.6643023490905762, "kl": 0.051892977207899094, "learning_rate": 3.9852607709750563e-07, "loss": 0.0001, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 3515 }, { "completion_length": 149.21429443359375, "epoch": 0.22323809523809524, "grad_norm": 0.005673625972121954, "kl": 0.052325669676065445, "learning_rate": 3.986394557823129e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3516 }, { "completion_length": 179.35714721679688, "epoch": 0.2233015873015873, "grad_norm": 0.003841313999146223, "kl": 0.035752467811107635, "learning_rate": 3.9875283446712015e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3517 }, { "completion_length": 180.2857208251953, "epoch": 0.22336507936507938, "grad_norm": 0.004110748879611492, "kl": 0.03392602130770683, "learning_rate": 3.9886621315192743e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3518 }, { "completion_length": 161.92857360839844, "epoch": 0.22342857142857142, "grad_norm": 0.004866893403232098, "kl": 0.04231451079249382, "learning_rate": 3.989795918367347e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3519 }, { "completion_length": 166.21429443359375, "epoch": 0.2234920634920635, "grad_norm": 0.005774925462901592, "kl": 0.04088912904262543, "learning_rate": 3.9909297052154194e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3520 }, { "completion_length": 145.85714721679688, "epoch": 0.22355555555555556, "grad_norm": 0.005496158730238676, "kl": 0.03927047178149223, "learning_rate": 3.992063492063492e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3521 }, { "completion_length": 186.21429443359375, "epoch": 0.2236190476190476, "grad_norm": 0.004154660739004612, "kl": 0.03223502263426781, "learning_rate": 3.993197278911564e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3522 }, { "completion_length": 173.21429443359375, "epoch": 0.22368253968253968, "grad_norm": 0.004854124039411545, "kl": 0.032499026507139206, "learning_rate": 3.994331065759637e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3523 }, { "completion_length": 195.00001525878906, "epoch": 0.22374603174603175, "grad_norm": 0.003636196255683899, "kl": 0.03148917853832245, "learning_rate": 3.9954648526077096e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3524 }, { "completion_length": 220.00001525878906, "epoch": 0.22380952380952382, "grad_norm": 0.0033888700418174267, "kl": 0.027653712779283524, "learning_rate": 3.996598639455782e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3525 }, { "completion_length": 162.21429443359375, "epoch": 0.22387301587301586, "grad_norm": 0.004244841169565916, "kl": 0.03374660760164261, "learning_rate": 3.9977324263038547e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3526 }, { "completion_length": 167.1428680419922, "epoch": 0.22393650793650793, "grad_norm": 0.0034685744903981686, "kl": 0.02986319363117218, "learning_rate": 3.9988662131519275e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3527 }, { "completion_length": 185.35714721679688, "epoch": 0.224, "grad_norm": 0.00361373252235353, "kl": 0.03305981680750847, "learning_rate": 4e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3528 }, { "completion_length": 168.1428680419922, "epoch": 0.22406349206349208, "grad_norm": 0.003289056709036231, "kl": 0.02699384279549122, "learning_rate": 4.001133786848072e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3529 }, { "completion_length": 125.42857360839844, "epoch": 0.22412698412698412, "grad_norm": 0.005289925262331963, "kl": 0.04381967708468437, "learning_rate": 4.002267573696145e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3530 }, { "completion_length": 148.7857208251953, "epoch": 0.2241904761904762, "grad_norm": 0.003997182939201593, "kl": 0.035478487610816956, "learning_rate": 4.003401360544217e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3531 }, { "completion_length": 152.6428680419922, "epoch": 0.22425396825396826, "grad_norm": 0.004473051987588406, "kl": 0.03059922531247139, "learning_rate": 4.00453514739229e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3532 }, { "completion_length": 167.6428680419922, "epoch": 0.2243174603174603, "grad_norm": 0.004753341432660818, "kl": 0.04337681457400322, "learning_rate": 4.005668934240363e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3533 }, { "completion_length": 170.5, "epoch": 0.22438095238095238, "grad_norm": 0.00635576993227005, "kl": 0.04011934995651245, "learning_rate": 4.006802721088435e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3534 }, { "completion_length": 206.21429443359375, "epoch": 0.22444444444444445, "grad_norm": 0.004063928499817848, "kl": 0.035242751240730286, "learning_rate": 4.007936507936508e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3535 }, { "completion_length": 191.6428680419922, "epoch": 0.22450793650793652, "grad_norm": 0.003564775688573718, "kl": 0.03295208513736725, "learning_rate": 4.009070294784581e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3536 }, { "completion_length": 180.00001525878906, "epoch": 0.22457142857142856, "grad_norm": 0.003997837193310261, "kl": 0.024465162307024002, "learning_rate": 4.0102040816326525e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3537 }, { "completion_length": 150.5, "epoch": 0.22463492063492063, "grad_norm": 0.006848123390227556, "kl": 0.03893187642097473, "learning_rate": 4.0113378684807254e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3538 }, { "completion_length": 130.0, "epoch": 0.2246984126984127, "grad_norm": 0.0056187608279287815, "kl": 0.046000901609659195, "learning_rate": 4.012471655328798e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3539 }, { "completion_length": 141.2857208251953, "epoch": 0.22476190476190477, "grad_norm": 0.004683522507548332, "kl": 0.03477081283926964, "learning_rate": 4.0136054421768705e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3540 }, { "completion_length": 159.85714721679688, "epoch": 0.22482539682539682, "grad_norm": 0.004147564526647329, "kl": 0.035983067005872726, "learning_rate": 4.0147392290249433e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3541 }, { "completion_length": 153.2857208251953, "epoch": 0.2248888888888889, "grad_norm": 0.005788079928606749, "kl": 0.04238429293036461, "learning_rate": 4.015873015873016e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3542 }, { "completion_length": 163.5, "epoch": 0.22495238095238096, "grad_norm": 0.005542292725294828, "kl": 0.037472523748874664, "learning_rate": 4.0170068027210884e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3543 }, { "completion_length": 192.71429443359375, "epoch": 0.225015873015873, "grad_norm": 0.0037476832512766123, "kl": 0.02771005965769291, "learning_rate": 4.0181405895691607e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3544 }, { "completion_length": 147.85714721679688, "epoch": 0.22507936507936507, "grad_norm": 0.00756207387894392, "kl": 0.04143368452787399, "learning_rate": 4.019274376417233e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3545 }, { "completion_length": 160.2857208251953, "epoch": 0.22514285714285714, "grad_norm": 0.003539536613970995, "kl": 0.031852755695581436, "learning_rate": 4.020408163265306e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3546 }, { "completion_length": 186.07144165039062, "epoch": 0.22520634920634922, "grad_norm": 0.004090524278581142, "kl": 0.030163025483489037, "learning_rate": 4.0215419501133786e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3547 }, { "completion_length": 180.07144165039062, "epoch": 0.22526984126984126, "grad_norm": 0.003440750064328313, "kl": 0.03142391890287399, "learning_rate": 4.022675736961451e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3548 }, { "completion_length": 177.07144165039062, "epoch": 0.22533333333333333, "grad_norm": 0.0035723657347261906, "kl": 0.0320974700152874, "learning_rate": 4.0238095238095237e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3549 }, { "completion_length": 198.57144165039062, "epoch": 0.2253968253968254, "grad_norm": 0.006305191200226545, "kl": 0.030031021684408188, "learning_rate": 4.0249433106575965e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3550 }, { "completion_length": 196.07144165039062, "epoch": 0.22546031746031747, "grad_norm": 0.004418602213263512, "kl": 0.03131886571645737, "learning_rate": 4.0260770975056683e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3551 }, { "completion_length": 170.35714721679688, "epoch": 0.22552380952380952, "grad_norm": 0.005897615570574999, "kl": 0.036814939230680466, "learning_rate": 4.027210884353741e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3552 }, { "completion_length": 142.1428680419922, "epoch": 0.2255873015873016, "grad_norm": 0.00543254567310214, "kl": 0.04147722199559212, "learning_rate": 4.028344671201814e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3553 }, { "completion_length": 172.1428680419922, "epoch": 0.22565079365079366, "grad_norm": 0.004981666803359985, "kl": 0.028026923537254333, "learning_rate": 4.029478458049886e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3554 }, { "completion_length": 151.42857360839844, "epoch": 0.2257142857142857, "grad_norm": 0.003940674476325512, "kl": 0.0366046242415905, "learning_rate": 4.030612244897959e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3555 }, { "completion_length": 157.42857360839844, "epoch": 0.22577777777777777, "grad_norm": 0.005067820195108652, "kl": 0.029590079560875893, "learning_rate": 4.031746031746032e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3556 }, { "completion_length": 164.35714721679688, "epoch": 0.22584126984126984, "grad_norm": 0.0035114858765155077, "kl": 0.02660731039941311, "learning_rate": 4.032879818594104e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3557 }, { "completion_length": 160.0, "epoch": 0.2259047619047619, "grad_norm": 0.003921249881386757, "kl": 0.032414939254522324, "learning_rate": 4.034013605442177e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3558 }, { "completion_length": 172.07144165039062, "epoch": 0.22596825396825396, "grad_norm": 0.003958564251661301, "kl": 0.03214382007718086, "learning_rate": 4.0351473922902493e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3559 }, { "completion_length": 134.21429443359375, "epoch": 0.22603174603174603, "grad_norm": 0.005958859808743, "kl": 0.04542113468050957, "learning_rate": 4.0362811791383216e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3560 }, { "completion_length": 162.92857360839844, "epoch": 0.2260952380952381, "grad_norm": 0.004707413725554943, "kl": 0.02980276383459568, "learning_rate": 4.0374149659863944e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3561 }, { "completion_length": 154.07144165039062, "epoch": 0.22615873015873017, "grad_norm": 0.007747597061097622, "kl": 0.04832050949335098, "learning_rate": 4.038548752834467e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3562 }, { "completion_length": 147.07144165039062, "epoch": 0.2262222222222222, "grad_norm": 0.004297241568565369, "kl": 0.03486695885658264, "learning_rate": 4.0396825396825395e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3563 }, { "completion_length": 154.35714721679688, "epoch": 0.22628571428571428, "grad_norm": 0.004321590065956116, "kl": 0.033810488879680634, "learning_rate": 4.0408163265306123e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3564 }, { "completion_length": 166.92857360839844, "epoch": 0.22634920634920636, "grad_norm": 0.004634900949895382, "kl": 0.026951923966407776, "learning_rate": 4.0419501133786846e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3565 }, { "completion_length": 188.35714721679688, "epoch": 0.22641269841269843, "grad_norm": 0.00276967603713274, "kl": 0.026233430951833725, "learning_rate": 4.043083900226757e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3566 }, { "completion_length": 190.42857360839844, "epoch": 0.22647619047619047, "grad_norm": 0.003306096186861396, "kl": 0.03357366845011711, "learning_rate": 4.0442176870748297e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3567 }, { "completion_length": 148.21429443359375, "epoch": 0.22653968253968254, "grad_norm": 0.0052136750891804695, "kl": 0.041028451174497604, "learning_rate": 4.045351473922902e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3568 }, { "completion_length": 182.57144165039062, "epoch": 0.2266031746031746, "grad_norm": 0.0031391119118779898, "kl": 0.02586526609957218, "learning_rate": 4.046485260770975e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3569 }, { "completion_length": 146.7857208251953, "epoch": 0.22666666666666666, "grad_norm": 0.0045554982498288155, "kl": 0.0312521867454052, "learning_rate": 4.0476190476190476e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3570 }, { "completion_length": 158.92857360839844, "epoch": 0.22673015873015873, "grad_norm": 0.0034066999796777964, "kl": 0.03250595182180405, "learning_rate": 4.04875283446712e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3571 }, { "completion_length": 142.1428680419922, "epoch": 0.2267936507936508, "grad_norm": 0.0034813459496945143, "kl": 0.03299519419670105, "learning_rate": 4.049886621315193e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3572 }, { "completion_length": 185.42857360839844, "epoch": 0.22685714285714287, "grad_norm": 0.9745299816131592, "kl": 0.026941122487187386, "learning_rate": 4.051020408163265e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 3573 }, { "completion_length": 137.71429443359375, "epoch": 0.2269206349206349, "grad_norm": 0.004369187168776989, "kl": 0.03201939910650253, "learning_rate": 4.0521541950113373e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3574 }, { "completion_length": 174.35714721679688, "epoch": 0.22698412698412698, "grad_norm": 0.002916790312156081, "kl": 0.027611013501882553, "learning_rate": 4.05328798185941e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3575 }, { "completion_length": 200.35714721679688, "epoch": 0.22704761904761905, "grad_norm": 0.002834850689396262, "kl": 0.02547544799745083, "learning_rate": 4.054421768707483e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3576 }, { "completion_length": 181.6428680419922, "epoch": 0.22711111111111112, "grad_norm": 0.0031788991764187813, "kl": 0.03142446279525757, "learning_rate": 4.055555555555555e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3577 }, { "completion_length": 177.6428680419922, "epoch": 0.22717460317460317, "grad_norm": 0.003805403131991625, "kl": 0.032693397253751755, "learning_rate": 4.056689342403628e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3578 }, { "completion_length": 143.92857360839844, "epoch": 0.22723809523809524, "grad_norm": 0.003908601123839617, "kl": 0.03646473586559296, "learning_rate": 4.057823129251701e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3579 }, { "completion_length": 164.71429443359375, "epoch": 0.2273015873015873, "grad_norm": 0.0038662562146782875, "kl": 0.03734356909990311, "learning_rate": 4.058956916099773e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3580 }, { "completion_length": 164.07144165039062, "epoch": 0.22736507936507935, "grad_norm": 0.003710645716637373, "kl": 0.0320315808057785, "learning_rate": 4.0600907029478455e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3581 }, { "completion_length": 170.7857208251953, "epoch": 0.22742857142857142, "grad_norm": 0.003103592898696661, "kl": 0.027786973863840103, "learning_rate": 4.0612244897959183e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3582 }, { "completion_length": 167.35714721679688, "epoch": 0.2274920634920635, "grad_norm": 0.003980307374149561, "kl": 0.024579593911767006, "learning_rate": 4.0623582766439906e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3583 }, { "completion_length": 161.21429443359375, "epoch": 0.22755555555555557, "grad_norm": 0.003795559285208583, "kl": 0.03201230242848396, "learning_rate": 4.0634920634920634e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3584 }, { "completion_length": 216.07144165039062, "epoch": 0.2276190476190476, "grad_norm": 0.002933147130534053, "kl": 0.02686726301908493, "learning_rate": 4.0646258503401357e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3585 }, { "completion_length": 201.2857208251953, "epoch": 0.22768253968253968, "grad_norm": 0.0025863926857709885, "kl": 0.024979667738080025, "learning_rate": 4.0657596371882085e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3586 }, { "completion_length": 191.50001525878906, "epoch": 0.22774603174603175, "grad_norm": 0.0028025428764522076, "kl": 0.024152187630534172, "learning_rate": 4.0668934240362813e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3587 }, { "completion_length": 145.5, "epoch": 0.22780952380952382, "grad_norm": 0.0032375056762248278, "kl": 0.030305974185466766, "learning_rate": 4.068027210884353e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3588 }, { "completion_length": 155.92857360839844, "epoch": 0.22787301587301587, "grad_norm": 0.004574970342218876, "kl": 0.031148772686719894, "learning_rate": 4.069160997732426e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3589 }, { "completion_length": 166.7857208251953, "epoch": 0.22793650793650794, "grad_norm": 0.002836568048223853, "kl": 0.026171134784817696, "learning_rate": 4.0702947845804987e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3590 }, { "completion_length": 179.50001525878906, "epoch": 0.228, "grad_norm": 0.0029345701914280653, "kl": 0.02753857895731926, "learning_rate": 4.071428571428571e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3591 }, { "completion_length": 158.85714721679688, "epoch": 0.22806349206349205, "grad_norm": 0.0038115577772259712, "kl": 0.0361960306763649, "learning_rate": 4.072562358276644e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3592 }, { "completion_length": 153.57144165039062, "epoch": 0.22812698412698412, "grad_norm": 0.0030105942860245705, "kl": 0.028544027358293533, "learning_rate": 4.0736961451247167e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3593 }, { "completion_length": 157.07144165039062, "epoch": 0.2281904761904762, "grad_norm": 0.004962278064340353, "kl": 0.02838248759508133, "learning_rate": 4.074829931972789e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3594 }, { "completion_length": 149.35714721679688, "epoch": 0.22825396825396826, "grad_norm": 0.0037538683973252773, "kl": 0.033270448446273804, "learning_rate": 4.075963718820862e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3595 }, { "completion_length": 135.71429443359375, "epoch": 0.2283174603174603, "grad_norm": 0.005018624942749739, "kl": 0.037141066044569016, "learning_rate": 4.077097505668934e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3596 }, { "completion_length": 142.7857208251953, "epoch": 0.22838095238095238, "grad_norm": 0.003896396839991212, "kl": 0.02742733806371689, "learning_rate": 4.0782312925170063e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3597 }, { "completion_length": 170.7857208251953, "epoch": 0.22844444444444445, "grad_norm": 0.0032293188851326704, "kl": 0.028618063777685165, "learning_rate": 4.079365079365079e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3598 }, { "completion_length": 219.7857208251953, "epoch": 0.22850793650793652, "grad_norm": 0.002408122643828392, "kl": 0.02212885394692421, "learning_rate": 4.080498866213152e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3599 }, { "completion_length": 182.71429443359375, "epoch": 0.22857142857142856, "grad_norm": 0.003464178880676627, "kl": 0.03378668427467346, "learning_rate": 4.0816326530612243e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3600 }, { "completion_length": 199.42857360839844, "epoch": 0.22863492063492064, "grad_norm": 0.903806746006012, "kl": 0.03026890940964222, "learning_rate": 4.082766439909297e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 3601 }, { "completion_length": 169.42857360839844, "epoch": 0.2286984126984127, "grad_norm": 0.004635138437151909, "kl": 0.03737947344779968, "learning_rate": 4.08390022675737e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3602 }, { "completion_length": 190.7857208251953, "epoch": 0.22876190476190475, "grad_norm": 0.0034066415391862392, "kl": 0.029122473672032356, "learning_rate": 4.0850340136054417e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3603 }, { "completion_length": 183.57144165039062, "epoch": 0.22882539682539682, "grad_norm": 0.003588420804589987, "kl": 0.029292233288288116, "learning_rate": 4.0861678004535145e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3604 }, { "completion_length": 155.6428680419922, "epoch": 0.2288888888888889, "grad_norm": 0.0035732255782932043, "kl": 0.029782021418213844, "learning_rate": 4.087301587301587e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3605 }, { "completion_length": 199.6428680419922, "epoch": 0.22895238095238096, "grad_norm": 0.00358234578743577, "kl": 0.03177693486213684, "learning_rate": 4.0884353741496596e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3606 }, { "completion_length": 137.35714721679688, "epoch": 0.229015873015873, "grad_norm": 1.2006640434265137, "kl": 0.03691061586141586, "learning_rate": 4.0895691609977324e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 3607 }, { "completion_length": 166.92857360839844, "epoch": 0.22907936507936508, "grad_norm": 0.003550090827047825, "kl": 0.028620202094316483, "learning_rate": 4.0907029478458047e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3608 }, { "completion_length": 142.1428680419922, "epoch": 0.22914285714285715, "grad_norm": 0.008327198214828968, "kl": 0.05524824559688568, "learning_rate": 4.0918367346938775e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3609 }, { "completion_length": 155.1428680419922, "epoch": 0.22920634920634922, "grad_norm": 0.003901735180988908, "kl": 0.03881145641207695, "learning_rate": 4.09297052154195e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3610 }, { "completion_length": 168.2857208251953, "epoch": 0.22926984126984126, "grad_norm": 0.0037457719445228577, "kl": 0.03508879244327545, "learning_rate": 4.094104308390022e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3611 }, { "completion_length": 170.35714721679688, "epoch": 0.22933333333333333, "grad_norm": 0.00419914023950696, "kl": 0.03593254089355469, "learning_rate": 4.095238095238095e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3612 }, { "completion_length": 224.6428680419922, "epoch": 0.2293968253968254, "grad_norm": 0.003386199939996004, "kl": 0.03459307178854942, "learning_rate": 4.096371882086168e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3613 }, { "completion_length": 148.57144165039062, "epoch": 0.22946031746031745, "grad_norm": 0.0045800525695085526, "kl": 0.04513172432780266, "learning_rate": 4.09750566893424e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3614 }, { "completion_length": 131.57144165039062, "epoch": 0.22952380952380952, "grad_norm": 1.4832525253295898, "kl": 0.046797435730695724, "learning_rate": 4.098639455782313e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 3615 }, { "completion_length": 202.92857360839844, "epoch": 0.2295873015873016, "grad_norm": 0.0032297647558152676, "kl": 0.031705841422080994, "learning_rate": 4.0997732426303857e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3616 }, { "completion_length": 186.71429443359375, "epoch": 0.22965079365079366, "grad_norm": 0.9987828135490417, "kl": 0.03738692030310631, "learning_rate": 4.100907029478458e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 3617 }, { "completion_length": 147.42857360839844, "epoch": 0.2297142857142857, "grad_norm": 0.004812732804566622, "kl": 0.044316794723272324, "learning_rate": 4.10204081632653e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3618 }, { "completion_length": 143.6428680419922, "epoch": 0.22977777777777778, "grad_norm": 0.9274957180023193, "kl": 0.03376893699169159, "learning_rate": 4.103174603174603e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 3619 }, { "completion_length": 175.71429443359375, "epoch": 0.22984126984126985, "grad_norm": 0.004112439230084419, "kl": 0.040056418627500534, "learning_rate": 4.1043083900226754e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3620 }, { "completion_length": 160.35714721679688, "epoch": 0.22990476190476192, "grad_norm": 0.003967310767620802, "kl": 0.037095386534929276, "learning_rate": 4.105442176870748e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3621 }, { "completion_length": 178.7857208251953, "epoch": 0.22996825396825396, "grad_norm": 0.003534038318321109, "kl": 0.0359414741396904, "learning_rate": 4.106575963718821e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3622 }, { "completion_length": 174.50001525878906, "epoch": 0.23003174603174603, "grad_norm": 0.0044065495021641254, "kl": 0.040625300258398056, "learning_rate": 4.1077097505668933e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3623 }, { "completion_length": 156.07144165039062, "epoch": 0.2300952380952381, "grad_norm": 0.0040772948414087296, "kl": 0.04133090376853943, "learning_rate": 4.108843537414966e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3624 }, { "completion_length": 169.42857360839844, "epoch": 0.23015873015873015, "grad_norm": 0.00360025092959404, "kl": 0.03545333072543144, "learning_rate": 4.109977324263038e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3625 }, { "completion_length": 131.7857208251953, "epoch": 0.23022222222222222, "grad_norm": 0.004030916839838028, "kl": 0.037018973380327225, "learning_rate": 4.1111111111111107e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3626 }, { "completion_length": 178.57144165039062, "epoch": 0.2302857142857143, "grad_norm": 0.00529816560447216, "kl": 0.04721299558877945, "learning_rate": 4.1122448979591835e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3627 }, { "completion_length": 137.42857360839844, "epoch": 0.23034920634920636, "grad_norm": 0.004254417959600687, "kl": 0.04179106652736664, "learning_rate": 4.113378684807256e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3628 }, { "completion_length": 165.0, "epoch": 0.2304126984126984, "grad_norm": 0.0035150968469679356, "kl": 0.0335865318775177, "learning_rate": 4.1145124716553286e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3629 }, { "completion_length": 201.92857360839844, "epoch": 0.23047619047619047, "grad_norm": 0.003083809046074748, "kl": 0.03320299834012985, "learning_rate": 4.1156462585034014e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3630 }, { "completion_length": 162.07144165039062, "epoch": 0.23053968253968254, "grad_norm": 0.004353433381766081, "kl": 0.03141918033361435, "learning_rate": 4.1167800453514737e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3631 }, { "completion_length": 176.35714721679688, "epoch": 0.23060317460317462, "grad_norm": 0.0038084015250205994, "kl": 0.030986644327640533, "learning_rate": 4.117913832199546e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3632 }, { "completion_length": 174.21429443359375, "epoch": 0.23066666666666666, "grad_norm": 0.003333669388666749, "kl": 0.032392289489507675, "learning_rate": 4.119047619047619e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3633 }, { "completion_length": 205.85714721679688, "epoch": 0.23073015873015873, "grad_norm": 0.0034077949821949005, "kl": 0.029480524361133575, "learning_rate": 4.120181405895691e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3634 }, { "completion_length": 152.92857360839844, "epoch": 0.2307936507936508, "grad_norm": 0.0046913959085941315, "kl": 0.041021108627319336, "learning_rate": 4.121315192743764e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3635 }, { "completion_length": 167.0, "epoch": 0.23085714285714284, "grad_norm": 0.00387357035651803, "kl": 0.034970905631780624, "learning_rate": 4.122448979591837e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3636 }, { "completion_length": 162.21429443359375, "epoch": 0.23092063492063492, "grad_norm": 1.0800853967666626, "kl": 0.04610799252986908, "learning_rate": 4.123582766439909e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 3637 }, { "completion_length": 137.7857208251953, "epoch": 0.230984126984127, "grad_norm": 0.0038414483424276114, "kl": 0.04097362980246544, "learning_rate": 4.124716553287982e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3638 }, { "completion_length": 212.21429443359375, "epoch": 0.23104761904761906, "grad_norm": 0.003201682586222887, "kl": 0.03104989603161812, "learning_rate": 4.1258503401360547e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3639 }, { "completion_length": 169.0, "epoch": 0.2311111111111111, "grad_norm": 0.003085080534219742, "kl": 0.029278969392180443, "learning_rate": 4.1269841269841265e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3640 }, { "completion_length": 161.57144165039062, "epoch": 0.23117460317460317, "grad_norm": 0.0032952078618109226, "kl": 0.03148188069462776, "learning_rate": 4.1281179138321993e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3641 }, { "completion_length": 179.57144165039062, "epoch": 0.23123809523809524, "grad_norm": 0.004841846879571676, "kl": 0.037534307688474655, "learning_rate": 4.129251700680272e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3642 }, { "completion_length": 178.2857208251953, "epoch": 0.2313015873015873, "grad_norm": 0.006268974393606186, "kl": 0.03903151676058769, "learning_rate": 4.1303854875283444e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3643 }, { "completion_length": 158.71429443359375, "epoch": 0.23136507936507936, "grad_norm": 0.005136052146553993, "kl": 0.042801376432180405, "learning_rate": 4.131519274376417e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3644 }, { "completion_length": 151.2857208251953, "epoch": 0.23142857142857143, "grad_norm": 0.004032395780086517, "kl": 0.04395851120352745, "learning_rate": 4.13265306122449e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3645 }, { "completion_length": 144.5, "epoch": 0.2314920634920635, "grad_norm": 0.005835257936269045, "kl": 0.04485442489385605, "learning_rate": 4.1337868480725623e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3646 }, { "completion_length": 212.42857360839844, "epoch": 0.23155555555555554, "grad_norm": 0.0027516609989106655, "kl": 0.026858124881982803, "learning_rate": 4.1349206349206346e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3647 }, { "completion_length": 144.21429443359375, "epoch": 0.2316190476190476, "grad_norm": 0.0035290843807160854, "kl": 0.03196791559457779, "learning_rate": 4.136054421768707e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3648 }, { "completion_length": 181.07144165039062, "epoch": 0.23168253968253968, "grad_norm": 0.0037487191148102283, "kl": 0.03106769546866417, "learning_rate": 4.1371882086167797e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3649 }, { "completion_length": 164.2857208251953, "epoch": 0.23174603174603176, "grad_norm": 1.0300143957138062, "kl": 0.03365342691540718, "learning_rate": 4.1383219954648525e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 3650 }, { "completion_length": 172.2857208251953, "epoch": 0.2318095238095238, "grad_norm": 0.004522394388914108, "kl": 0.035611867904663086, "learning_rate": 4.139455782312925e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3651 }, { "completion_length": 137.42857360839844, "epoch": 0.23187301587301587, "grad_norm": 0.0056433528661727905, "kl": 0.03608591482043266, "learning_rate": 4.1405895691609976e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3652 }, { "completion_length": 127.64286041259766, "epoch": 0.23193650793650794, "grad_norm": 0.00433366559445858, "kl": 0.04504186287522316, "learning_rate": 4.1417233560090705e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3653 }, { "completion_length": 208.57144165039062, "epoch": 0.232, "grad_norm": 1.7066190242767334, "kl": 0.03407048434019089, "learning_rate": 4.142857142857143e-07, "loss": 0.0, "reward": 0.1428571492433548, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.1428571492433548, "step": 3654 }, { "completion_length": 164.5, "epoch": 0.23206349206349206, "grad_norm": 0.0031846570782363415, "kl": 0.029175512492656708, "learning_rate": 4.143990929705215e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3655 }, { "completion_length": 176.50001525878906, "epoch": 0.23212698412698413, "grad_norm": 0.0037192997988313437, "kl": 0.038249388337135315, "learning_rate": 4.145124716553288e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3656 }, { "completion_length": 155.71429443359375, "epoch": 0.2321904761904762, "grad_norm": 0.0038916850462555885, "kl": 0.03887505084276199, "learning_rate": 4.14625850340136e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3657 }, { "completion_length": 160.42857360839844, "epoch": 0.23225396825396824, "grad_norm": 0.8760786652565002, "kl": 0.035547684878110886, "learning_rate": 4.147392290249433e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 3658 }, { "completion_length": 189.6428680419922, "epoch": 0.2323174603174603, "grad_norm": 0.0034169447608292103, "kl": 0.031163867563009262, "learning_rate": 4.148526077097506e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3659 }, { "completion_length": 125.85714721679688, "epoch": 0.23238095238095238, "grad_norm": 0.003956277389079332, "kl": 0.03694942221045494, "learning_rate": 4.149659863945578e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3660 }, { "completion_length": 167.2857208251953, "epoch": 0.23244444444444445, "grad_norm": 0.003955129534006119, "kl": 0.036453086882829666, "learning_rate": 4.150793650793651e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3661 }, { "completion_length": 159.07144165039062, "epoch": 0.2325079365079365, "grad_norm": 0.00596056692302227, "kl": 0.04266602173447609, "learning_rate": 4.151927437641723e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3662 }, { "completion_length": 169.6428680419922, "epoch": 0.23257142857142857, "grad_norm": 1.153001070022583, "kl": 0.040302202105522156, "learning_rate": 4.1530612244897955e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 3663 }, { "completion_length": 212.00001525878906, "epoch": 0.23263492063492064, "grad_norm": 0.0029816690366715193, "kl": 0.029764587059617043, "learning_rate": 4.1541950113378683e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3664 }, { "completion_length": 162.7857208251953, "epoch": 0.2326984126984127, "grad_norm": 0.0031990758143365383, "kl": 0.03522183746099472, "learning_rate": 4.1553287981859406e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3665 }, { "completion_length": 167.35714721679688, "epoch": 0.23276190476190475, "grad_norm": 0.00429528160020709, "kl": 0.045668501406908035, "learning_rate": 4.1564625850340134e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3666 }, { "completion_length": 185.71429443359375, "epoch": 0.23282539682539682, "grad_norm": 1.3697408437728882, "kl": 0.039887141436338425, "learning_rate": 4.157596371882086e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 3667 }, { "completion_length": 167.21429443359375, "epoch": 0.2328888888888889, "grad_norm": 0.007146317977458239, "kl": 0.06288065016269684, "learning_rate": 4.1587301587301585e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3668 }, { "completion_length": 160.7857208251953, "epoch": 0.23295238095238094, "grad_norm": 0.004185226280242205, "kl": 0.040220629423856735, "learning_rate": 4.159863945578231e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3669 }, { "completion_length": 151.07144165039062, "epoch": 0.233015873015873, "grad_norm": 0.004219348076730967, "kl": 0.04077791050076485, "learning_rate": 4.1609977324263036e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3670 }, { "completion_length": 150.85714721679688, "epoch": 0.23307936507936508, "grad_norm": 1.5758146047592163, "kl": 0.04410940781235695, "learning_rate": 4.162131519274376e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 3671 }, { "completion_length": 169.07144165039062, "epoch": 0.23314285714285715, "grad_norm": 0.00531343650072813, "kl": 0.05403175204992294, "learning_rate": 4.1632653061224487e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3672 }, { "completion_length": 205.7857208251953, "epoch": 0.2332063492063492, "grad_norm": 0.003899181028828025, "kl": 0.03671404346823692, "learning_rate": 4.1643990929705215e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3673 }, { "completion_length": 190.1428680419922, "epoch": 0.23326984126984127, "grad_norm": 0.005077827721834183, "kl": 0.040284253656864166, "learning_rate": 4.165532879818594e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3674 }, { "completion_length": 152.07144165039062, "epoch": 0.23333333333333334, "grad_norm": 0.013449933379888535, "kl": 0.05151035264134407, "learning_rate": 4.1666666666666667e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3675 }, { "completion_length": 224.71429443359375, "epoch": 0.2333968253968254, "grad_norm": 0.787316083908081, "kl": 0.039741452783346176, "learning_rate": 4.1678004535147395e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 3676 }, { "completion_length": 172.85714721679688, "epoch": 0.23346031746031745, "grad_norm": 0.0045447214506566525, "kl": 0.04619700834155083, "learning_rate": 4.168934240362811e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3677 }, { "completion_length": 155.0, "epoch": 0.23352380952380952, "grad_norm": 0.006954946089535952, "kl": 0.05792567506432533, "learning_rate": 4.170068027210884e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3678 }, { "completion_length": 212.35714721679688, "epoch": 0.2335873015873016, "grad_norm": 0.004103757906705141, "kl": 0.04259226471185684, "learning_rate": 4.171201814058957e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3679 }, { "completion_length": 140.5, "epoch": 0.23365079365079366, "grad_norm": 0.006290967110544443, "kl": 0.05373041331768036, "learning_rate": 4.172335600907029e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3680 }, { "completion_length": 180.6428680419922, "epoch": 0.2337142857142857, "grad_norm": 0.005562415346503258, "kl": 0.06344519555568695, "learning_rate": 4.173469387755102e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3681 }, { "completion_length": 192.07144165039062, "epoch": 0.23377777777777778, "grad_norm": 1.0184754133224487, "kl": 0.06172758713364601, "learning_rate": 4.174603174603175e-07, "loss": 0.0001, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 3682 }, { "completion_length": 201.1428680419922, "epoch": 0.23384126984126985, "grad_norm": 0.007826963439583778, "kl": 0.048874881118535995, "learning_rate": 4.175736961451247e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3683 }, { "completion_length": 161.85714721679688, "epoch": 0.2339047619047619, "grad_norm": 0.005775094032287598, "kl": 0.04936736449599266, "learning_rate": 4.1768707482993194e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3684 }, { "completion_length": 173.35714721679688, "epoch": 0.23396825396825396, "grad_norm": 0.007437058724462986, "kl": 0.060209743678569794, "learning_rate": 4.1780045351473917e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3685 }, { "completion_length": 188.00001525878906, "epoch": 0.23403174603174604, "grad_norm": 0.004653294570744038, "kl": 0.042664069682359695, "learning_rate": 4.1791383219954645e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3686 }, { "completion_length": 166.1428680419922, "epoch": 0.2340952380952381, "grad_norm": 0.006380935199558735, "kl": 0.048899855464696884, "learning_rate": 4.1802721088435373e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3687 }, { "completion_length": 227.57144165039062, "epoch": 0.23415873015873015, "grad_norm": 0.0035970471799373627, "kl": 0.030906852334737778, "learning_rate": 4.1814058956916096e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3688 }, { "completion_length": 160.85714721679688, "epoch": 0.23422222222222222, "grad_norm": 1.1898159980773926, "kl": 0.0682680532336235, "learning_rate": 4.1825396825396824e-07, "loss": 0.0001, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 3689 }, { "completion_length": 162.5, "epoch": 0.2342857142857143, "grad_norm": 0.005604250356554985, "kl": 0.05592816323041916, "learning_rate": 4.183673469387755e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3690 }, { "completion_length": 145.42857360839844, "epoch": 0.23434920634920636, "grad_norm": 0.7684634923934937, "kl": 0.04174220934510231, "learning_rate": 4.184807256235827e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 3691 }, { "completion_length": 175.07144165039062, "epoch": 0.2344126984126984, "grad_norm": 0.00790677685290575, "kl": 0.04586100950837135, "learning_rate": 4.1859410430839e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3692 }, { "completion_length": 176.6428680419922, "epoch": 0.23447619047619048, "grad_norm": 0.007383454591035843, "kl": 0.06207917258143425, "learning_rate": 4.1870748299319726e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3693 }, { "completion_length": 185.71429443359375, "epoch": 0.23453968253968255, "grad_norm": 0.0041740271262824535, "kl": 0.03485448285937309, "learning_rate": 4.188208616780045e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3694 }, { "completion_length": 145.35714721679688, "epoch": 0.2346031746031746, "grad_norm": 0.008646034635603428, "kl": 0.06385929882526398, "learning_rate": 4.189342403628118e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3695 }, { "completion_length": 145.7857208251953, "epoch": 0.23466666666666666, "grad_norm": 0.007582852616906166, "kl": 0.05504491925239563, "learning_rate": 4.1904761904761906e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3696 }, { "completion_length": 167.42857360839844, "epoch": 0.23473015873015873, "grad_norm": 0.005286464933305979, "kl": 0.05053718015551567, "learning_rate": 4.191609977324263e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3697 }, { "completion_length": 171.6428680419922, "epoch": 0.2347936507936508, "grad_norm": 0.005403436254709959, "kl": 0.05238693580031395, "learning_rate": 4.1927437641723357e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3698 }, { "completion_length": 209.57144165039062, "epoch": 0.23485714285714285, "grad_norm": 0.004401732236146927, "kl": 0.04792952910065651, "learning_rate": 4.193877551020408e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3699 }, { "completion_length": 154.6428680419922, "epoch": 0.23492063492063492, "grad_norm": 0.008308718912303448, "kl": 0.058433279395103455, "learning_rate": 4.19501133786848e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3700 }, { "completion_length": 155.0, "epoch": 0.234984126984127, "grad_norm": 0.008850629441440105, "kl": 0.05343509092926979, "learning_rate": 4.196145124716553e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3701 }, { "completion_length": 179.2857208251953, "epoch": 0.23504761904761906, "grad_norm": 0.003987928386777639, "kl": 0.032750532031059265, "learning_rate": 4.197278911564626e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3702 }, { "completion_length": 158.2857208251953, "epoch": 0.2351111111111111, "grad_norm": 0.010457574389874935, "kl": 0.057606641203165054, "learning_rate": 4.198412698412698e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3703 }, { "completion_length": 178.50001525878906, "epoch": 0.23517460317460318, "grad_norm": 0.0056138732470571995, "kl": 0.04946240410208702, "learning_rate": 4.199546485260771e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3704 }, { "completion_length": 150.57144165039062, "epoch": 0.23523809523809525, "grad_norm": 1.393812656402588, "kl": 0.06006701663136482, "learning_rate": 4.200680272108844e-07, "loss": 0.0001, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 3705 }, { "completion_length": 174.50001525878906, "epoch": 0.2353015873015873, "grad_norm": 0.005415671970695257, "kl": 0.042584847658872604, "learning_rate": 4.2018140589569156e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3706 }, { "completion_length": 168.57144165039062, "epoch": 0.23536507936507936, "grad_norm": 0.006458655931055546, "kl": 0.050856366753578186, "learning_rate": 4.2029478458049884e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3707 }, { "completion_length": 165.6428680419922, "epoch": 0.23542857142857143, "grad_norm": 0.004117267671972513, "kl": 0.03690989688038826, "learning_rate": 4.2040816326530607e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3708 }, { "completion_length": 198.2857208251953, "epoch": 0.2354920634920635, "grad_norm": 0.00628920691087842, "kl": 0.043029818683862686, "learning_rate": 4.2052154195011335e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3709 }, { "completion_length": 173.21429443359375, "epoch": 0.23555555555555555, "grad_norm": 0.006939897779375315, "kl": 0.05370843783020973, "learning_rate": 4.2063492063492063e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3710 }, { "completion_length": 188.42857360839844, "epoch": 0.23561904761904762, "grad_norm": 0.006573862861841917, "kl": 0.05172262340784073, "learning_rate": 4.2074829931972786e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3711 }, { "completion_length": 202.71429443359375, "epoch": 0.2356825396825397, "grad_norm": 0.0063541121780872345, "kl": 0.05282348766922951, "learning_rate": 4.2086167800453514e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3712 }, { "completion_length": 191.6428680419922, "epoch": 0.23574603174603176, "grad_norm": 0.006762719713151455, "kl": 0.05165928974747658, "learning_rate": 4.209750566893424e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3713 }, { "completion_length": 173.21429443359375, "epoch": 0.2358095238095238, "grad_norm": 0.006170045584440231, "kl": 0.04385225847363472, "learning_rate": 4.210884353741496e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3714 }, { "completion_length": 142.2857208251953, "epoch": 0.23587301587301587, "grad_norm": 0.005936968605965376, "kl": 0.040959715843200684, "learning_rate": 4.212018140589569e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3715 }, { "completion_length": 176.50001525878906, "epoch": 0.23593650793650794, "grad_norm": 0.005068330094218254, "kl": 0.03538145124912262, "learning_rate": 4.2131519274376417e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3716 }, { "completion_length": 174.57144165039062, "epoch": 0.236, "grad_norm": 0.0056823925115168095, "kl": 0.04202745109796524, "learning_rate": 4.214285714285714e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3717 }, { "completion_length": 135.7857208251953, "epoch": 0.23606349206349206, "grad_norm": 0.009291076101362705, "kl": 0.06986314058303833, "learning_rate": 4.215419501133787e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3718 }, { "completion_length": 166.2857208251953, "epoch": 0.23612698412698413, "grad_norm": 0.006073323078453541, "kl": 0.052351903170347214, "learning_rate": 4.2165532879818596e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3719 }, { "completion_length": 164.1428680419922, "epoch": 0.2361904761904762, "grad_norm": 0.007098281290382147, "kl": 0.05837118998169899, "learning_rate": 4.217687074829932e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3720 }, { "completion_length": 145.85714721679688, "epoch": 0.23625396825396824, "grad_norm": 0.005308469291776419, "kl": 0.052062105387449265, "learning_rate": 4.218820861678004e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3721 }, { "completion_length": 219.1428680419922, "epoch": 0.23631746031746032, "grad_norm": 0.0033355425111949444, "kl": 0.02800625190138817, "learning_rate": 4.219954648526077e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3722 }, { "completion_length": 151.85714721679688, "epoch": 0.23638095238095239, "grad_norm": 0.00418796855956316, "kl": 0.03584742173552513, "learning_rate": 4.2210884353741493e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3723 }, { "completion_length": 152.6428680419922, "epoch": 0.23644444444444446, "grad_norm": 0.007103811018168926, "kl": 0.04909879341721535, "learning_rate": 4.222222222222222e-07, "loss": 0.0, "reward": 0.1428571492433548, "reward_std": 0.0, "rewards/check_originality_func": 0.1428571492433548, "step": 3724 }, { "completion_length": 157.35714721679688, "epoch": 0.2365079365079365, "grad_norm": 0.005281398538500071, "kl": 0.04201963171362877, "learning_rate": 4.223356009070295e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3725 }, { "completion_length": 163.85714721679688, "epoch": 0.23657142857142857, "grad_norm": 0.004086733795702457, "kl": 0.03650326654314995, "learning_rate": 4.224489795918367e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3726 }, { "completion_length": 150.35714721679688, "epoch": 0.23663492063492064, "grad_norm": 0.005922038108110428, "kl": 0.045429784804582596, "learning_rate": 4.22562358276644e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3727 }, { "completion_length": 183.07144165039062, "epoch": 0.23669841269841269, "grad_norm": 0.005930526182055473, "kl": 0.04431029409170151, "learning_rate": 4.226757369614512e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3728 }, { "completion_length": 164.07144165039062, "epoch": 0.23676190476190476, "grad_norm": 0.007132227532565594, "kl": 0.05760706588625908, "learning_rate": 4.2278911564625846e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3729 }, { "completion_length": 188.07144165039062, "epoch": 0.23682539682539683, "grad_norm": 0.006511027924716473, "kl": 0.050869170576334, "learning_rate": 4.2290249433106574e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3730 }, { "completion_length": 157.6428680419922, "epoch": 0.2368888888888889, "grad_norm": 0.004967382177710533, "kl": 0.04558393731713295, "learning_rate": 4.2301587301587297e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3731 }, { "completion_length": 187.50001525878906, "epoch": 0.23695238095238094, "grad_norm": 0.005603124853223562, "kl": 0.055368028581142426, "learning_rate": 4.2312925170068025e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3732 }, { "completion_length": 159.07144165039062, "epoch": 0.237015873015873, "grad_norm": 0.006318411324173212, "kl": 0.06298886984586716, "learning_rate": 4.2324263038548753e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3733 }, { "completion_length": 168.85714721679688, "epoch": 0.23707936507936508, "grad_norm": 0.005235540680587292, "kl": 0.0449315682053566, "learning_rate": 4.2335600907029476e-07, "loss": 0.0, "reward": 0.1428571492433548, "reward_std": 0.0, "rewards/check_originality_func": 0.1428571492433548, "step": 3734 }, { "completion_length": 171.07144165039062, "epoch": 0.23714285714285716, "grad_norm": 0.0046837180852890015, "kl": 0.044613610953092575, "learning_rate": 4.2346938775510205e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3735 }, { "completion_length": 148.1428680419922, "epoch": 0.2372063492063492, "grad_norm": 0.004758525639772415, "kl": 0.04074617102742195, "learning_rate": 4.235827664399093e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3736 }, { "completion_length": 180.85714721679688, "epoch": 0.23726984126984127, "grad_norm": 0.010256617330014706, "kl": 0.04328273981809616, "learning_rate": 4.236961451247165e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3737 }, { "completion_length": 170.07144165039062, "epoch": 0.23733333333333334, "grad_norm": 0.004696583840996027, "kl": 0.0369093082845211, "learning_rate": 4.238095238095238e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3738 }, { "completion_length": 156.35714721679688, "epoch": 0.23739682539682538, "grad_norm": 0.007198865059763193, "kl": 0.054986659437417984, "learning_rate": 4.2392290249433107e-07, "loss": 0.0001, "reward": 0.1428571492433548, "reward_std": 0.0, "rewards/check_originality_func": 0.1428571492433548, "step": 3739 }, { "completion_length": 179.92857360839844, "epoch": 0.23746031746031745, "grad_norm": 0.637259840965271, "kl": 0.053564075380563736, "learning_rate": 4.240362811791383e-07, "loss": 0.0001, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 3740 }, { "completion_length": 177.50001525878906, "epoch": 0.23752380952380953, "grad_norm": 0.005116856656968594, "kl": 0.03946034610271454, "learning_rate": 4.241496598639456e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3741 }, { "completion_length": 198.92857360839844, "epoch": 0.2375873015873016, "grad_norm": 0.0038201119750738144, "kl": 0.03175721690058708, "learning_rate": 4.2426303854875286e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3742 }, { "completion_length": 219.57144165039062, "epoch": 0.23765079365079364, "grad_norm": 0.0036405916325747967, "kl": 0.030250627547502518, "learning_rate": 4.2437641723356004e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3743 }, { "completion_length": 182.50001525878906, "epoch": 0.2377142857142857, "grad_norm": 0.0050767711363732815, "kl": 0.03321545198559761, "learning_rate": 4.244897959183673e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3744 }, { "completion_length": 162.2857208251953, "epoch": 0.23777777777777778, "grad_norm": 0.004395599942654371, "kl": 0.03759955242276192, "learning_rate": 4.246031746031746e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3745 }, { "completion_length": 193.1428680419922, "epoch": 0.23784126984126985, "grad_norm": 0.005250567104667425, "kl": 0.04182722792029381, "learning_rate": 4.2471655328798183e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3746 }, { "completion_length": 162.35714721679688, "epoch": 0.2379047619047619, "grad_norm": 0.005769739858806133, "kl": 0.04686306044459343, "learning_rate": 4.248299319727891e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3747 }, { "completion_length": 174.21429443359375, "epoch": 0.23796825396825397, "grad_norm": 0.009647081606090069, "kl": 0.04483500495553017, "learning_rate": 4.2494331065759634e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3748 }, { "completion_length": 164.1428680419922, "epoch": 0.23803174603174604, "grad_norm": 0.0062562813982367516, "kl": 0.04972657933831215, "learning_rate": 4.250566893424036e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3749 }, { "completion_length": 176.2857208251953, "epoch": 0.23809523809523808, "grad_norm": 0.0033635839354246855, "kl": 0.027764882892370224, "learning_rate": 4.2517006802721085e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3750 }, { "completion_length": 170.57144165039062, "epoch": 0.23815873015873015, "grad_norm": 0.0048567159101367, "kl": 0.04483456164598465, "learning_rate": 4.252834467120181e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3751 }, { "completion_length": 154.2857208251953, "epoch": 0.23822222222222222, "grad_norm": 0.006913249846547842, "kl": 0.048379912972450256, "learning_rate": 4.2539682539682536e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3752 }, { "completion_length": 172.35714721679688, "epoch": 0.2382857142857143, "grad_norm": 0.0038287583738565445, "kl": 0.031120242550969124, "learning_rate": 4.2551020408163264e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3753 }, { "completion_length": 159.0, "epoch": 0.23834920634920634, "grad_norm": 0.008119202218949795, "kl": 0.05333838239312172, "learning_rate": 4.2562358276643987e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3754 }, { "completion_length": 148.7857208251953, "epoch": 0.2384126984126984, "grad_norm": 0.004671704024076462, "kl": 0.046106111258268356, "learning_rate": 4.2573696145124715e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3755 }, { "completion_length": 187.2857208251953, "epoch": 0.23847619047619048, "grad_norm": 0.005888927727937698, "kl": 0.0412808321416378, "learning_rate": 4.2585034013605444e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3756 }, { "completion_length": 178.00001525878906, "epoch": 0.23853968253968255, "grad_norm": 1.1475441455841064, "kl": 0.035673752427101135, "learning_rate": 4.2596371882086167e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 3757 }, { "completion_length": 144.5, "epoch": 0.2386031746031746, "grad_norm": 0.008500023745000362, "kl": 0.06013783812522888, "learning_rate": 4.260770975056689e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3758 }, { "completion_length": 150.1428680419922, "epoch": 0.23866666666666667, "grad_norm": 0.6952959895133972, "kl": 0.06025537848472595, "learning_rate": 4.261904761904762e-07, "loss": 0.0001, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 3759 }, { "completion_length": 156.85714721679688, "epoch": 0.23873015873015874, "grad_norm": 1.0480504035949707, "kl": 0.04169589653611183, "learning_rate": 4.263038548752834e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 3760 }, { "completion_length": 180.50001525878906, "epoch": 0.23879365079365078, "grad_norm": 0.005993760656565428, "kl": 0.04732883349061012, "learning_rate": 4.264172335600907e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3761 }, { "completion_length": 142.7857208251953, "epoch": 0.23885714285714285, "grad_norm": 0.007950402796268463, "kl": 0.06623492389917374, "learning_rate": 4.2653061224489797e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3762 }, { "completion_length": 180.57144165039062, "epoch": 0.23892063492063492, "grad_norm": 0.006211280357092619, "kl": 0.056560639292001724, "learning_rate": 4.266439909297052e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3763 }, { "completion_length": 189.00001525878906, "epoch": 0.238984126984127, "grad_norm": 0.005550558678805828, "kl": 0.04445317015051842, "learning_rate": 4.267573696145125e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3764 }, { "completion_length": 166.0, "epoch": 0.23904761904761904, "grad_norm": 0.005440456327050924, "kl": 0.04144110530614853, "learning_rate": 4.268707482993197e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3765 }, { "completion_length": 157.7857208251953, "epoch": 0.2391111111111111, "grad_norm": 0.010578369721770287, "kl": 0.0650382786989212, "learning_rate": 4.2698412698412694e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3766 }, { "completion_length": 147.7857208251953, "epoch": 0.23917460317460318, "grad_norm": 0.008623841218650341, "kl": 0.058178648352622986, "learning_rate": 4.270975056689342e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3767 }, { "completion_length": 160.2857208251953, "epoch": 0.23923809523809525, "grad_norm": 0.00791185349225998, "kl": 0.05914200469851494, "learning_rate": 4.2721088435374145e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3768 }, { "completion_length": 169.42857360839844, "epoch": 0.2393015873015873, "grad_norm": 0.0055595641024410725, "kl": 0.04338039085268974, "learning_rate": 4.2732426303854873e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3769 }, { "completion_length": 207.00001525878906, "epoch": 0.23936507936507936, "grad_norm": 0.0066484007984399796, "kl": 0.041953958570957184, "learning_rate": 4.27437641723356e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3770 }, { "completion_length": 181.57144165039062, "epoch": 0.23942857142857144, "grad_norm": 0.007781119551509619, "kl": 0.06100277602672577, "learning_rate": 4.2755102040816324e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3771 }, { "completion_length": 176.21429443359375, "epoch": 0.23949206349206348, "grad_norm": 0.006816177163273096, "kl": 0.05505675822496414, "learning_rate": 4.276643990929705e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3772 }, { "completion_length": 168.7857208251953, "epoch": 0.23955555555555555, "grad_norm": 0.006272822618484497, "kl": 0.05047709122300148, "learning_rate": 4.2777777777777775e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3773 }, { "completion_length": 156.92857360839844, "epoch": 0.23961904761904762, "grad_norm": 0.008127795532345772, "kl": 0.0658959373831749, "learning_rate": 4.27891156462585e-07, "loss": 0.0001, "reward": 0.1428571492433548, "reward_std": 0.0, "rewards/check_originality_func": 0.1428571492433548, "step": 3774 }, { "completion_length": 198.6428680419922, "epoch": 0.2396825396825397, "grad_norm": 0.005384264979511499, "kl": 0.05287102982401848, "learning_rate": 4.2800453514739226e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3775 }, { "completion_length": 198.50001525878906, "epoch": 0.23974603174603173, "grad_norm": 0.004301512148231268, "kl": 0.042791374027729034, "learning_rate": 4.2811791383219955e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3776 }, { "completion_length": 137.71429443359375, "epoch": 0.2398095238095238, "grad_norm": 0.008281613700091839, "kl": 0.0612642765045166, "learning_rate": 4.282312925170068e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3777 }, { "completion_length": 179.21429443359375, "epoch": 0.23987301587301588, "grad_norm": 0.01083018071949482, "kl": 0.057480908930301666, "learning_rate": 4.2834467120181406e-07, "loss": 0.0001, "reward": 0.1428571492433548, "reward_std": 0.0, "rewards/check_originality_func": 0.1428571492433548, "step": 3778 }, { "completion_length": 182.00001525878906, "epoch": 0.23993650793650795, "grad_norm": 0.006678937468677759, "kl": 0.06690682470798492, "learning_rate": 4.2845804988662134e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3779 }, { "completion_length": 162.07144165039062, "epoch": 0.24, "grad_norm": 0.007747058290988207, "kl": 0.062163688242435455, "learning_rate": 4.285714285714285e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3780 }, { "completion_length": 162.6428680419922, "epoch": 0.24006349206349206, "grad_norm": 0.005267269443720579, "kl": 0.044694963842630386, "learning_rate": 4.286848072562358e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3781 }, { "completion_length": 185.42857360839844, "epoch": 0.24012698412698413, "grad_norm": 0.005247669760137796, "kl": 0.04929159954190254, "learning_rate": 4.287981859410431e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3782 }, { "completion_length": 178.92857360839844, "epoch": 0.2401904761904762, "grad_norm": 0.005859410855919123, "kl": 0.04623611643910408, "learning_rate": 4.289115646258503e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3783 }, { "completion_length": 184.6428680419922, "epoch": 0.24025396825396825, "grad_norm": 0.005912467371672392, "kl": 0.050050124526023865, "learning_rate": 4.290249433106576e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3784 }, { "completion_length": 191.92857360839844, "epoch": 0.24031746031746032, "grad_norm": 0.0076331705786287785, "kl": 0.053199153393507004, "learning_rate": 4.2913832199546487e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3785 }, { "completion_length": 189.7857208251953, "epoch": 0.2403809523809524, "grad_norm": 0.005721640307456255, "kl": 0.045694537460803986, "learning_rate": 4.292517006802721e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3786 }, { "completion_length": 169.6428680419922, "epoch": 0.24044444444444443, "grad_norm": 0.0055133202113211155, "kl": 0.04817444831132889, "learning_rate": 4.2936507936507933e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3787 }, { "completion_length": 160.07144165039062, "epoch": 0.2405079365079365, "grad_norm": 0.007181311957538128, "kl": 0.05583643168210983, "learning_rate": 4.2947845804988656e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3788 }, { "completion_length": 175.1428680419922, "epoch": 0.24057142857142857, "grad_norm": 0.005345685873180628, "kl": 0.04837075620889664, "learning_rate": 4.2959183673469384e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3789 }, { "completion_length": 176.42857360839844, "epoch": 0.24063492063492065, "grad_norm": 1.433131217956543, "kl": 0.04987139627337456, "learning_rate": 4.297052154195011e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 3790 }, { "completion_length": 166.5, "epoch": 0.2406984126984127, "grad_norm": 0.007447384763509035, "kl": 0.05271653085947037, "learning_rate": 4.2981859410430835e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3791 }, { "completion_length": 184.6428680419922, "epoch": 0.24076190476190476, "grad_norm": 0.007013552822172642, "kl": 0.06417595595121384, "learning_rate": 4.2993197278911563e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3792 }, { "completion_length": 142.7857208251953, "epoch": 0.24082539682539683, "grad_norm": 0.007152218837291002, "kl": 0.06179339066147804, "learning_rate": 4.300453514739229e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3793 }, { "completion_length": 196.07144165039062, "epoch": 0.2408888888888889, "grad_norm": 1.041031837463379, "kl": 0.054495278745889664, "learning_rate": 4.3015873015873014e-07, "loss": 0.0001, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 3794 }, { "completion_length": 173.07144165039062, "epoch": 0.24095238095238095, "grad_norm": 0.015410663560032845, "kl": 0.08195589482784271, "learning_rate": 4.3027210884353737e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3795 }, { "completion_length": 128.71429443359375, "epoch": 0.24101587301587302, "grad_norm": 1.3101614713668823, "kl": 0.061220932751894, "learning_rate": 4.3038548752834465e-07, "loss": 0.0001, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 3796 }, { "completion_length": 172.50001525878906, "epoch": 0.2410793650793651, "grad_norm": 0.005300719290971756, "kl": 0.05241665616631508, "learning_rate": 4.304988662131519e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3797 }, { "completion_length": 207.71429443359375, "epoch": 0.24114285714285713, "grad_norm": 0.00548385689035058, "kl": 0.04578555375337601, "learning_rate": 4.3061224489795917e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3798 }, { "completion_length": 145.0, "epoch": 0.2412063492063492, "grad_norm": 0.005830016452819109, "kl": 0.053762275725603104, "learning_rate": 4.3072562358276645e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3799 }, { "completion_length": 206.07144165039062, "epoch": 0.24126984126984127, "grad_norm": 0.011404442600905895, "kl": 0.04860410839319229, "learning_rate": 4.308390022675737e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3800 }, { "completion_length": 158.6428680419922, "epoch": 0.24133333333333334, "grad_norm": 0.6406751275062561, "kl": 0.06703456491231918, "learning_rate": 4.3095238095238096e-07, "loss": 0.0001, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 3801 }, { "completion_length": 193.1428680419922, "epoch": 0.2413968253968254, "grad_norm": 0.004642133601009846, "kl": 0.04194388911128044, "learning_rate": 4.310657596371882e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3802 }, { "completion_length": 163.21429443359375, "epoch": 0.24146031746031746, "grad_norm": 0.006009942851960659, "kl": 0.04877178743481636, "learning_rate": 4.311791383219954e-07, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3803 }, { "completion_length": 122.64286041259766, "epoch": 0.24152380952380953, "grad_norm": 0.00803473498672247, "kl": 0.0820428878068924, "learning_rate": 4.312925170068027e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3804 }, { "completion_length": 153.92857360839844, "epoch": 0.2415873015873016, "grad_norm": 0.0054093012586236, "kl": 0.05903855711221695, "learning_rate": 4.3140589569161e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3805 }, { "completion_length": 151.0, "epoch": 0.24165079365079364, "grad_norm": 0.008260608650743961, "kl": 0.07837949693202972, "learning_rate": 4.315192743764172e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3806 }, { "completion_length": 145.85714721679688, "epoch": 0.24171428571428571, "grad_norm": 0.9167727828025818, "kl": 0.08575371652841568, "learning_rate": 4.316326530612245e-07, "loss": 0.0001, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 3807 }, { "completion_length": 207.71429443359375, "epoch": 0.24177777777777779, "grad_norm": 0.008241081610321999, "kl": 0.05812935531139374, "learning_rate": 4.3174603174603177e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3808 }, { "completion_length": 169.5, "epoch": 0.24184126984126983, "grad_norm": 0.00890944804996252, "kl": 0.06849755346775055, "learning_rate": 4.3185941043083895e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3809 }, { "completion_length": 171.92857360839844, "epoch": 0.2419047619047619, "grad_norm": 0.004598038271069527, "kl": 0.05469388887286186, "learning_rate": 4.3197278911564623e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3810 }, { "completion_length": 199.00001525878906, "epoch": 0.24196825396825397, "grad_norm": 0.005092112813144922, "kl": 0.06679289788007736, "learning_rate": 4.3208616780045346e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3811 }, { "completion_length": 172.35714721679688, "epoch": 0.24203174603174604, "grad_norm": 0.005579019896686077, "kl": 0.05426351726055145, "learning_rate": 4.3219954648526074e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3812 }, { "completion_length": 190.35714721679688, "epoch": 0.24209523809523809, "grad_norm": 1.07096529006958, "kl": 0.04342032968997955, "learning_rate": 4.32312925170068e-07, "loss": 0.0, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 3813 }, { "completion_length": 183.00001525878906, "epoch": 0.24215873015873016, "grad_norm": 0.005223942920565605, "kl": 0.06013726443052292, "learning_rate": 4.3242630385487525e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3814 }, { "completion_length": 182.92857360839844, "epoch": 0.24222222222222223, "grad_norm": 0.004482683725655079, "kl": 0.05865179002285004, "learning_rate": 4.3253968253968253e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3815 }, { "completion_length": 201.50001525878906, "epoch": 0.2422857142857143, "grad_norm": 0.005911070853471756, "kl": 0.06757689267396927, "learning_rate": 4.326530612244898e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3816 }, { "completion_length": 154.57144165039062, "epoch": 0.24234920634920634, "grad_norm": 0.005766936112195253, "kl": 0.0775895044207573, "learning_rate": 4.32766439909297e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3817 }, { "completion_length": 212.6428680419922, "epoch": 0.2424126984126984, "grad_norm": 0.0057185934856534, "kl": 0.07575242966413498, "learning_rate": 4.328798185941043e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3818 }, { "completion_length": 206.2857208251953, "epoch": 0.24247619047619048, "grad_norm": 0.0046227979473769665, "kl": 0.05577385798096657, "learning_rate": 4.3299319727891156e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3819 }, { "completion_length": 173.6428680419922, "epoch": 0.24253968253968253, "grad_norm": 0.00597468251362443, "kl": 0.0677388459444046, "learning_rate": 4.331065759637188e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3820 }, { "completion_length": 156.0, "epoch": 0.2426031746031746, "grad_norm": 0.005476026795804501, "kl": 0.065020851790905, "learning_rate": 4.3321995464852607e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3821 }, { "completion_length": 198.92857360839844, "epoch": 0.24266666666666667, "grad_norm": 0.004668098408728838, "kl": 0.06294967979192734, "learning_rate": 4.3333333333333335e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3822 }, { "completion_length": 171.50001525878906, "epoch": 0.24273015873015874, "grad_norm": 0.007348706480115652, "kl": 0.09488746523857117, "learning_rate": 4.334467120181406e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3823 }, { "completion_length": 206.00001525878906, "epoch": 0.24279365079365078, "grad_norm": 0.9279805421829224, "kl": 0.06505872309207916, "learning_rate": 4.335600907029478e-07, "loss": 0.0001, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 3824 }, { "completion_length": 185.6428680419922, "epoch": 0.24285714285714285, "grad_norm": 0.005001723300665617, "kl": 0.06228025630116463, "learning_rate": 4.336734693877551e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3825 }, { "completion_length": 146.07144165039062, "epoch": 0.24292063492063493, "grad_norm": 0.009027130901813507, "kl": 0.10037735849618912, "learning_rate": 4.337868480725623e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3826 }, { "completion_length": 180.00001525878906, "epoch": 0.242984126984127, "grad_norm": 0.005169695243239403, "kl": 0.06574586033821106, "learning_rate": 4.339002267573696e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3827 }, { "completion_length": 163.1428680419922, "epoch": 0.24304761904761904, "grad_norm": 0.007339847274124622, "kl": 0.07291048020124435, "learning_rate": 4.340136054421769e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3828 }, { "completion_length": 187.21429443359375, "epoch": 0.2431111111111111, "grad_norm": 0.004739558324217796, "kl": 0.060604363679885864, "learning_rate": 4.341269841269841e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3829 }, { "completion_length": 140.7857208251953, "epoch": 0.24317460317460318, "grad_norm": 1.2216647863388062, "kl": 0.08408686518669128, "learning_rate": 4.342403628117914e-07, "loss": 0.0001, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 3830 }, { "completion_length": 173.7857208251953, "epoch": 0.24323809523809523, "grad_norm": 0.005712568294256926, "kl": 0.07286721467971802, "learning_rate": 4.343537414965986e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3831 }, { "completion_length": 177.21429443359375, "epoch": 0.2433015873015873, "grad_norm": 0.004836381413042545, "kl": 0.06471864879131317, "learning_rate": 4.3446712018140585e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3832 }, { "completion_length": 178.42857360839844, "epoch": 0.24336507936507937, "grad_norm": 0.006261629983782768, "kl": 0.06499656289815903, "learning_rate": 4.3458049886621313e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3833 }, { "completion_length": 215.07144165039062, "epoch": 0.24342857142857144, "grad_norm": 1.1696598529815674, "kl": 0.05578497797250748, "learning_rate": 4.3469387755102036e-07, "loss": 0.0001, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 3834 }, { "completion_length": 185.71429443359375, "epoch": 0.24349206349206348, "grad_norm": 0.0058037699200212955, "kl": 0.07648151367902756, "learning_rate": 4.3480725623582764e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3835 }, { "completion_length": 161.1428680419922, "epoch": 0.24355555555555555, "grad_norm": 0.007391019724309444, "kl": 0.10575038939714432, "learning_rate": 4.349206349206349e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3836 }, { "completion_length": 149.71429443359375, "epoch": 0.24361904761904762, "grad_norm": 0.008859341032803059, "kl": 0.11082872748374939, "learning_rate": 4.3503401360544215e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3837 }, { "completion_length": 154.0, "epoch": 0.2436825396825397, "grad_norm": 0.007717093452811241, "kl": 0.10640282183885574, "learning_rate": 4.3514739229024944e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3838 }, { "completion_length": 206.21429443359375, "epoch": 0.24374603174603174, "grad_norm": 0.005938940215855837, "kl": 0.0756036788225174, "learning_rate": 4.3526077097505667e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3839 }, { "completion_length": 189.00001525878906, "epoch": 0.2438095238095238, "grad_norm": 0.009095050394535065, "kl": 0.10468195378780365, "learning_rate": 4.353741496598639e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3840 }, { "completion_length": 166.1428680419922, "epoch": 0.24387301587301588, "grad_norm": 0.006837708409875631, "kl": 0.0682404488325119, "learning_rate": 4.354875283446712e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3841 }, { "completion_length": 228.57144165039062, "epoch": 0.24393650793650792, "grad_norm": 0.005885804072022438, "kl": 0.0851675346493721, "learning_rate": 4.3560090702947846e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3842 }, { "completion_length": 185.57144165039062, "epoch": 0.244, "grad_norm": 0.011047554202377796, "kl": 0.13535639643669128, "learning_rate": 4.357142857142857e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3843 }, { "completion_length": 144.0, "epoch": 0.24406349206349207, "grad_norm": 1.631823182106018, "kl": 0.1401940882205963, "learning_rate": 4.3582766439909297e-07, "loss": 0.0001, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 3844 }, { "completion_length": 197.00001525878906, "epoch": 0.24412698412698414, "grad_norm": 0.0099211186170578, "kl": 0.1374054253101349, "learning_rate": 4.3594104308390025e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3845 }, { "completion_length": 165.35714721679688, "epoch": 0.24419047619047618, "grad_norm": 0.00974997878074646, "kl": 0.11269503831863403, "learning_rate": 4.3605442176870743e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3846 }, { "completion_length": 153.07144165039062, "epoch": 0.24425396825396825, "grad_norm": 0.013624711893498898, "kl": 0.14176636934280396, "learning_rate": 4.361678004535147e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3847 }, { "completion_length": 218.71429443359375, "epoch": 0.24431746031746032, "grad_norm": 0.010763915255665779, "kl": 0.11960943788290024, "learning_rate": 4.3628117913832194e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3848 }, { "completion_length": 180.7857208251953, "epoch": 0.2443809523809524, "grad_norm": 0.010696266777813435, "kl": 0.14888904988765717, "learning_rate": 4.363945578231292e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3849 }, { "completion_length": 196.57144165039062, "epoch": 0.24444444444444444, "grad_norm": 1.192299246788025, "kl": 0.12394160032272339, "learning_rate": 4.365079365079365e-07, "loss": 0.0001, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 3850 }, { "completion_length": 187.57144165039062, "epoch": 0.2445079365079365, "grad_norm": 0.9012259840965271, "kl": 0.1430523842573166, "learning_rate": 4.3662131519274373e-07, "loss": 0.0001, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 3851 }, { "completion_length": 159.5, "epoch": 0.24457142857142858, "grad_norm": 1.3238354921340942, "kl": 0.13315674662590027, "learning_rate": 4.36734693877551e-07, "loss": 0.0001, "reward": 0.1428571492433548, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.1428571492433548, "step": 3852 }, { "completion_length": 205.00001525878906, "epoch": 0.24463492063492062, "grad_norm": 0.01071292906999588, "kl": 0.13976596295833588, "learning_rate": 4.368480725623583e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3853 }, { "completion_length": 193.2857208251953, "epoch": 0.2446984126984127, "grad_norm": 0.009848705492913723, "kl": 0.12231048196554184, "learning_rate": 4.3696145124716547e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3854 }, { "completion_length": 231.4285888671875, "epoch": 0.24476190476190476, "grad_norm": 0.009113939478993416, "kl": 0.11429528146982193, "learning_rate": 4.3707482993197275e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3855 }, { "completion_length": 200.07144165039062, "epoch": 0.24482539682539683, "grad_norm": 1.1995058059692383, "kl": 0.14016520977020264, "learning_rate": 4.3718820861678003e-07, "loss": 0.0001, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 3856 }, { "completion_length": 204.1428680419922, "epoch": 0.24488888888888888, "grad_norm": 0.9860327839851379, "kl": 0.09470199048519135, "learning_rate": 4.3730158730158726e-07, "loss": 0.0001, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 3857 }, { "completion_length": 176.6428680419922, "epoch": 0.24495238095238095, "grad_norm": 0.007919399999082088, "kl": 0.10280261188745499, "learning_rate": 4.3741496598639455e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3858 }, { "completion_length": 157.0, "epoch": 0.24501587301587302, "grad_norm": 1.2815407514572144, "kl": 0.11074033379554749, "learning_rate": 4.3752834467120183e-07, "loss": 0.0001, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 3859 }, { "completion_length": 211.35714721679688, "epoch": 0.2450793650793651, "grad_norm": 0.9226598143577576, "kl": 0.07397449016571045, "learning_rate": 4.3764172335600906e-07, "loss": 0.0001, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 3860 }, { "completion_length": 210.92857360839844, "epoch": 0.24514285714285713, "grad_norm": 0.009034129790961742, "kl": 0.1069207563996315, "learning_rate": 4.377551020408163e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3861 }, { "completion_length": 192.07144165039062, "epoch": 0.2452063492063492, "grad_norm": 1.3584645986557007, "kl": 0.18799254298210144, "learning_rate": 4.3786848072562357e-07, "loss": 0.0002, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 3862 }, { "completion_length": 211.07144165039062, "epoch": 0.24526984126984128, "grad_norm": 0.008986815810203552, "kl": 0.11972752958536148, "learning_rate": 4.379818594104308e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3863 }, { "completion_length": 208.1428680419922, "epoch": 0.24533333333333332, "grad_norm": 0.011141270399093628, "kl": 0.13186998665332794, "learning_rate": 4.380952380952381e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3864 }, { "completion_length": 189.71429443359375, "epoch": 0.2453968253968254, "grad_norm": 0.010352888144552708, "kl": 0.1540261059999466, "learning_rate": 4.3820861678004536e-07, "loss": 0.0002, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3865 }, { "completion_length": 189.92857360839844, "epoch": 0.24546031746031746, "grad_norm": 2.069061517715454, "kl": 0.17700999975204468, "learning_rate": 4.383219954648526e-07, "loss": 0.0002, "reward": 0.1428571492433548, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.1428571492433548, "step": 3866 }, { "completion_length": 226.2857208251953, "epoch": 0.24552380952380953, "grad_norm": 0.009537940844893456, "kl": 0.1397051215171814, "learning_rate": 4.3843537414965987e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3867 }, { "completion_length": 228.50001525878906, "epoch": 0.24558730158730158, "grad_norm": 0.7831752300262451, "kl": 0.14166799187660217, "learning_rate": 4.3854875283446705e-07, "loss": 0.0001, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 3868 }, { "completion_length": 248.1428680419922, "epoch": 0.24565079365079365, "grad_norm": 0.018749535083770752, "kl": 0.15208344161510468, "learning_rate": 4.3866213151927433e-07, "loss": 0.0002, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3869 }, { "completion_length": 229.35714721679688, "epoch": 0.24571428571428572, "grad_norm": 1.4264601469039917, "kl": 0.17575529217720032, "learning_rate": 4.387755102040816e-07, "loss": 0.0002, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 3870 }, { "completion_length": 215.50001525878906, "epoch": 0.2457777777777778, "grad_norm": 0.011754120700061321, "kl": 0.19955386221408844, "learning_rate": 4.3888888888888884e-07, "loss": 0.0002, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3871 }, { "completion_length": 218.1428680419922, "epoch": 0.24584126984126983, "grad_norm": 0.012614291161298752, "kl": 0.1893373429775238, "learning_rate": 4.390022675736961e-07, "loss": 0.0002, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3872 }, { "completion_length": 268.3571472167969, "epoch": 0.2459047619047619, "grad_norm": 0.009019880555570126, "kl": 0.15103116631507874, "learning_rate": 4.391156462585034e-07, "loss": 0.0002, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3873 }, { "completion_length": 204.50001525878906, "epoch": 0.24596825396825397, "grad_norm": 0.012884503230452538, "kl": 0.17616485059261322, "learning_rate": 4.3922902494331063e-07, "loss": 0.0002, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3874 }, { "completion_length": 263.4285888671875, "epoch": 0.24603174603174602, "grad_norm": 0.009689408354461193, "kl": 0.14746952056884766, "learning_rate": 4.393424036281179e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3875 }, { "completion_length": 192.50001525878906, "epoch": 0.2460952380952381, "grad_norm": 1.6385430097579956, "kl": 0.2353506088256836, "learning_rate": 4.3945578231292514e-07, "loss": 0.0002, "reward": 0.1428571492433548, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.1428571492433548, "step": 3876 }, { "completion_length": 206.1428680419922, "epoch": 0.24615873015873016, "grad_norm": 1.3795764446258545, "kl": 0.23336796462535858, "learning_rate": 4.3956916099773237e-07, "loss": 0.0002, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 3877 }, { "completion_length": 167.2857208251953, "epoch": 0.24622222222222223, "grad_norm": 0.030789507552981377, "kl": 0.3238154351711273, "learning_rate": 4.3968253968253965e-07, "loss": 0.0003, "reward": 0.1428571492433548, "reward_std": 0.0, "rewards/check_originality_func": 0.1428571492433548, "step": 3878 }, { "completion_length": 209.85714721679688, "epoch": 0.24628571428571427, "grad_norm": 0.01627969928085804, "kl": 0.23055824637413025, "learning_rate": 4.3979591836734694e-07, "loss": 0.0002, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3879 }, { "completion_length": 175.07144165039062, "epoch": 0.24634920634920635, "grad_norm": 0.9734737873077393, "kl": 0.32597100734710693, "learning_rate": 4.3990929705215417e-07, "loss": 0.0003, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 3880 }, { "completion_length": 184.92857360839844, "epoch": 0.24641269841269842, "grad_norm": 0.021385008469223976, "kl": 0.352409690618515, "learning_rate": 4.4002267573696145e-07, "loss": 0.0004, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3881 }, { "completion_length": 204.07144165039062, "epoch": 0.2464761904761905, "grad_norm": 0.015055270865559578, "kl": 0.28308454155921936, "learning_rate": 4.4013605442176873e-07, "loss": 0.0003, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3882 }, { "completion_length": 174.42857360839844, "epoch": 0.24653968253968253, "grad_norm": 2.4307503700256348, "kl": 0.2795622944831848, "learning_rate": 4.402494331065759e-07, "loss": 0.0003, "reward": 0.2142857313156128, "reward_std": 0.30304574966430664, "rewards/check_originality_func": 0.2142857313156128, "step": 3883 }, { "completion_length": 220.21429443359375, "epoch": 0.2466031746031746, "grad_norm": 2.0464911460876465, "kl": 0.29252296686172485, "learning_rate": 4.403628117913832e-07, "loss": 0.0003, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 3884 }, { "completion_length": 181.2857208251953, "epoch": 0.24666666666666667, "grad_norm": 0.05425742268562317, "kl": 0.5714453458786011, "learning_rate": 4.4047619047619047e-07, "loss": 0.0006, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3885 }, { "completion_length": 197.42857360839844, "epoch": 0.24673015873015874, "grad_norm": 24.803760528564453, "kl": 1.5276572704315186, "learning_rate": 4.405895691609977e-07, "loss": 0.0015, "reward": 0.1428571492433548, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.1428571492433548, "step": 3886 }, { "completion_length": 107.21428680419922, "epoch": 0.2467936507936508, "grad_norm": 0.3301255404949188, "kl": 1.7801086902618408, "learning_rate": 4.40702947845805e-07, "loss": 0.0018, "reward": 0.1428571492433548, "reward_std": 0.0, "rewards/check_originality_func": 0.1428571492433548, "step": 3887 }, { "completion_length": 108.71428680419922, "epoch": 0.24685714285714286, "grad_norm": 6.731717109680176, "kl": 2.1921277046203613, "learning_rate": 4.4081632653061226e-07, "loss": 0.0022, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 3888 }, { "completion_length": 103.14286041259766, "epoch": 0.24692063492063493, "grad_norm": 8.955172538757324, "kl": 2.180272102355957, "learning_rate": 4.409297052154195e-07, "loss": 0.0022, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 3889 }, { "completion_length": 132.42857360839844, "epoch": 0.24698412698412697, "grad_norm": 0.20919503271579742, "kl": 1.6775180101394653, "learning_rate": 4.4104308390022677e-07, "loss": 0.0017, "reward": 0.1428571492433548, "reward_std": 0.0, "rewards/check_originality_func": 0.1428571492433548, "step": 3890 }, { "completion_length": 165.35714721679688, "epoch": 0.24704761904761904, "grad_norm": 0.06512439996004105, "kl": 0.7372785806655884, "learning_rate": 4.4115646258503395e-07, "loss": 0.0007, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3891 }, { "completion_length": 206.50001525878906, "epoch": 0.24711111111111111, "grad_norm": 5.2269768714904785, "kl": 0.49248895049095154, "learning_rate": 4.4126984126984123e-07, "loss": 0.0005, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 3892 }, { "completion_length": 172.92857360839844, "epoch": 0.24717460317460319, "grad_norm": 1.8514864444732666, "kl": 0.4237743318080902, "learning_rate": 4.413832199546485e-07, "loss": 0.0004, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 3893 }, { "completion_length": 180.35714721679688, "epoch": 0.24723809523809523, "grad_norm": 1.7679449319839478, "kl": 0.23142951726913452, "learning_rate": 4.4149659863945574e-07, "loss": 0.0002, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 3894 }, { "completion_length": 209.21429443359375, "epoch": 0.2473015873015873, "grad_norm": 1.522645354270935, "kl": 0.22283774614334106, "learning_rate": 4.41609977324263e-07, "loss": 0.0002, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 3895 }, { "completion_length": 181.35714721679688, "epoch": 0.24736507936507937, "grad_norm": 0.022060543298721313, "kl": 0.18720728158950806, "learning_rate": 4.417233560090703e-07, "loss": 0.0002, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3896 }, { "completion_length": 212.50001525878906, "epoch": 0.24742857142857144, "grad_norm": 1.0457137823104858, "kl": 0.1332951784133911, "learning_rate": 4.4183673469387753e-07, "loss": 0.0001, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 3897 }, { "completion_length": 153.57144165039062, "epoch": 0.24749206349206349, "grad_norm": 0.00711150374263525, "kl": 0.12819358706474304, "learning_rate": 4.4195011337868476e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3898 }, { "completion_length": 168.07144165039062, "epoch": 0.24755555555555556, "grad_norm": 0.007666607387363911, "kl": 0.12679222226142883, "learning_rate": 4.4206349206349205e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3899 }, { "completion_length": 189.21429443359375, "epoch": 0.24761904761904763, "grad_norm": 0.006835807114839554, "kl": 0.09949980676174164, "learning_rate": 4.421768707482993e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3900 }, { "completion_length": 189.7857208251953, "epoch": 0.24768253968253967, "grad_norm": 0.005882733967155218, "kl": 0.074727863073349, "learning_rate": 4.4229024943310656e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3901 }, { "completion_length": 161.1428680419922, "epoch": 0.24774603174603174, "grad_norm": 0.007318397052586079, "kl": 0.11385633796453476, "learning_rate": 4.4240362811791384e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3902 }, { "completion_length": 221.21429443359375, "epoch": 0.2478095238095238, "grad_norm": 0.0061507998034358025, "kl": 0.08973123878240585, "learning_rate": 4.4251700680272107e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3903 }, { "completion_length": 182.21429443359375, "epoch": 0.24787301587301588, "grad_norm": 0.006182069890201092, "kl": 0.09415286034345627, "learning_rate": 4.4263038548752835e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3904 }, { "completion_length": 201.42857360839844, "epoch": 0.24793650793650793, "grad_norm": 0.006469788495451212, "kl": 0.09841331839561462, "learning_rate": 4.427437641723356e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3905 }, { "completion_length": 185.6428680419922, "epoch": 0.248, "grad_norm": 0.005279009696096182, "kl": 0.07492705434560776, "learning_rate": 4.428571428571428e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3906 }, { "completion_length": 196.00001525878906, "epoch": 0.24806349206349207, "grad_norm": 0.005242150276899338, "kl": 0.08199309557676315, "learning_rate": 4.429705215419501e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3907 }, { "completion_length": 168.6428680419922, "epoch": 0.24812698412698414, "grad_norm": 0.0070634642615914345, "kl": 0.09074519574642181, "learning_rate": 4.4308390022675737e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3908 }, { "completion_length": 174.42857360839844, "epoch": 0.24819047619047618, "grad_norm": 1.133474588394165, "kl": 0.09217824786901474, "learning_rate": 4.431972789115646e-07, "loss": 0.0001, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 3909 }, { "completion_length": 211.21429443359375, "epoch": 0.24825396825396825, "grad_norm": 0.005556355230510235, "kl": 0.0767543688416481, "learning_rate": 4.433106575963719e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3910 }, { "completion_length": 179.6428680419922, "epoch": 0.24831746031746033, "grad_norm": 0.007950976490974426, "kl": 0.10339926183223724, "learning_rate": 4.434240362811791e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3911 }, { "completion_length": 190.7857208251953, "epoch": 0.24838095238095237, "grad_norm": 0.007942807860672474, "kl": 0.09312868863344193, "learning_rate": 4.435374149659864e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3912 }, { "completion_length": 177.50001525878906, "epoch": 0.24844444444444444, "grad_norm": 0.006049958057701588, "kl": 0.07310894876718521, "learning_rate": 4.436507936507936e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3913 }, { "completion_length": 163.57144165039062, "epoch": 0.2485079365079365, "grad_norm": 0.8210187554359436, "kl": 0.08188286423683167, "learning_rate": 4.4376417233560085e-07, "loss": 0.0001, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 3914 }, { "completion_length": 214.92857360839844, "epoch": 0.24857142857142858, "grad_norm": 0.0047234827652573586, "kl": 0.06541885435581207, "learning_rate": 4.4387755102040813e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3915 }, { "completion_length": 123.92857360839844, "epoch": 0.24863492063492063, "grad_norm": 0.007530054077506065, "kl": 0.10825230926275253, "learning_rate": 4.439909297052154e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3916 }, { "completion_length": 191.42857360839844, "epoch": 0.2486984126984127, "grad_norm": 0.005893738940358162, "kl": 0.0857459083199501, "learning_rate": 4.4410430839002264e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3917 }, { "completion_length": 189.1428680419922, "epoch": 0.24876190476190477, "grad_norm": 0.004278039559721947, "kl": 0.06516633927822113, "learning_rate": 4.442176870748299e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3918 }, { "completion_length": 157.5, "epoch": 0.24882539682539684, "grad_norm": 0.7931103706359863, "kl": 0.09491585195064545, "learning_rate": 4.443310657596372e-07, "loss": 0.0001, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 3919 }, { "completion_length": 170.7857208251953, "epoch": 0.24888888888888888, "grad_norm": 0.008082631044089794, "kl": 0.09962496906518936, "learning_rate": 4.444444444444444e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3920 }, { "completion_length": 190.92857360839844, "epoch": 0.24895238095238095, "grad_norm": 0.9956346750259399, "kl": 0.09139885008335114, "learning_rate": 4.4455782312925167e-07, "loss": 0.0001, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 3921 }, { "completion_length": 206.00001525878906, "epoch": 0.24901587301587302, "grad_norm": 1.1314494609832764, "kl": 0.06299105286598206, "learning_rate": 4.4467120181405895e-07, "loss": 0.0001, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 3922 }, { "completion_length": 186.6428680419922, "epoch": 0.24907936507936507, "grad_norm": 0.006854510400444269, "kl": 0.08280050754547119, "learning_rate": 4.447845804988662e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3923 }, { "completion_length": 193.35714721679688, "epoch": 0.24914285714285714, "grad_norm": 0.0064911069348454475, "kl": 0.1030096709728241, "learning_rate": 4.4489795918367346e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3924 }, { "completion_length": 173.71429443359375, "epoch": 0.2492063492063492, "grad_norm": 0.005064559634774923, "kl": 0.07526636123657227, "learning_rate": 4.4501133786848074e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3925 }, { "completion_length": 165.92857360839844, "epoch": 0.24926984126984128, "grad_norm": 0.006321936380118132, "kl": 0.08057065308094025, "learning_rate": 4.4512471655328797e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3926 }, { "completion_length": 257.7857360839844, "epoch": 0.24933333333333332, "grad_norm": 0.00451319245621562, "kl": 0.06441888958215714, "learning_rate": 4.452380952380952e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3927 }, { "completion_length": 170.7857208251953, "epoch": 0.2493968253968254, "grad_norm": 0.006389564834535122, "kl": 0.09491188824176788, "learning_rate": 4.453514739229025e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3928 }, { "completion_length": 215.57144165039062, "epoch": 0.24946031746031747, "grad_norm": 0.005555807147175074, "kl": 0.06981318444013596, "learning_rate": 4.454648526077097e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3929 }, { "completion_length": 204.21429443359375, "epoch": 0.24952380952380954, "grad_norm": 0.004382789134979248, "kl": 0.07340829819440842, "learning_rate": 4.45578231292517e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3930 }, { "completion_length": 135.2857208251953, "epoch": 0.24958730158730158, "grad_norm": 0.007909083738923073, "kl": 0.1007860004901886, "learning_rate": 4.456916099773242e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3931 }, { "completion_length": 228.07144165039062, "epoch": 0.24965079365079365, "grad_norm": 0.004804079420864582, "kl": 0.06954365968704224, "learning_rate": 4.458049886621315e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3932 }, { "completion_length": 155.5, "epoch": 0.24971428571428572, "grad_norm": 0.009585481137037277, "kl": 0.13873213529586792, "learning_rate": 4.459183673469388e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3933 }, { "completion_length": 177.57144165039062, "epoch": 0.24977777777777777, "grad_norm": 0.005773178301751614, "kl": 0.0822012647986412, "learning_rate": 4.46031746031746e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3934 }, { "completion_length": 195.35714721679688, "epoch": 0.24984126984126984, "grad_norm": 0.017057662829756737, "kl": 0.10492219030857086, "learning_rate": 4.4614512471655324e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3935 }, { "completion_length": 176.07144165039062, "epoch": 0.2499047619047619, "grad_norm": 0.006206988822668791, "kl": 0.10840791463851929, "learning_rate": 4.462585034013605e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3936 }, { "completion_length": 179.92857360839844, "epoch": 0.24996825396825398, "grad_norm": 0.005977805703878403, "kl": 0.08489980548620224, "learning_rate": 4.4637188208616775e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3937 }, { "completion_length": 149.6428680419922, "epoch": 0.25003174603174605, "grad_norm": 0.007990529760718346, "kl": 0.11670996993780136, "learning_rate": 4.4648526077097503e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3938 }, { "completion_length": 165.6428680419922, "epoch": 0.2500952380952381, "grad_norm": 0.006629507057368755, "kl": 0.1100306361913681, "learning_rate": 4.465986394557823e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3939 }, { "completion_length": 210.6428680419922, "epoch": 0.25015873015873014, "grad_norm": 0.0051155127584934235, "kl": 0.07477188110351562, "learning_rate": 4.4671201814058955e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3940 }, { "completion_length": 151.2857208251953, "epoch": 0.25022222222222223, "grad_norm": 1.0184714794158936, "kl": 0.1400870382785797, "learning_rate": 4.4682539682539683e-07, "loss": 0.0001, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 3941 }, { "completion_length": 159.6428680419922, "epoch": 0.2502857142857143, "grad_norm": 0.009562790393829346, "kl": 0.1390589475631714, "learning_rate": 4.4693877551020406e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3942 }, { "completion_length": 162.21429443359375, "epoch": 0.2503492063492064, "grad_norm": 0.005849115084856749, "kl": 0.0943053737282753, "learning_rate": 4.470521541950113e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3943 }, { "completion_length": 205.92857360839844, "epoch": 0.2504126984126984, "grad_norm": 0.008512604981660843, "kl": 0.08049880713224411, "learning_rate": 4.4716553287981857e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3944 }, { "completion_length": 178.21429443359375, "epoch": 0.25047619047619046, "grad_norm": 0.006098847836256027, "kl": 0.09239392727613449, "learning_rate": 4.4727891156462585e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3945 }, { "completion_length": 186.92857360839844, "epoch": 0.25053968253968256, "grad_norm": 0.7912845611572266, "kl": 0.09177538007497787, "learning_rate": 4.473922902494331e-07, "loss": 0.0001, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 3946 }, { "completion_length": 191.92857360839844, "epoch": 0.2506031746031746, "grad_norm": 0.006521355826407671, "kl": 0.08891210705041885, "learning_rate": 4.4750566893424036e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3947 }, { "completion_length": 239.00001525878906, "epoch": 0.25066666666666665, "grad_norm": 0.00457839434966445, "kl": 0.07083860039710999, "learning_rate": 4.4761904761904764e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3948 }, { "completion_length": 159.1428680419922, "epoch": 0.25073015873015875, "grad_norm": 0.8762533664703369, "kl": 0.07905058562755585, "learning_rate": 4.4773242630385487e-07, "loss": 0.0001, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 3949 }, { "completion_length": 161.42857360839844, "epoch": 0.2507936507936508, "grad_norm": 1.3445639610290527, "kl": 0.10499636828899384, "learning_rate": 4.478458049886621e-07, "loss": 0.0001, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 3950 }, { "completion_length": 164.0, "epoch": 0.25085714285714283, "grad_norm": 0.006651856005191803, "kl": 0.09669368714094162, "learning_rate": 4.4795918367346933e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3951 }, { "completion_length": 176.21429443359375, "epoch": 0.25092063492063493, "grad_norm": 0.01122602354735136, "kl": 0.0990760549902916, "learning_rate": 4.480725623582766e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3952 }, { "completion_length": 151.5, "epoch": 0.250984126984127, "grad_norm": 0.007222394458949566, "kl": 0.10776451975107193, "learning_rate": 4.481859410430839e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3953 }, { "completion_length": 165.7857208251953, "epoch": 0.2510476190476191, "grad_norm": 0.007566975429654121, "kl": 0.10320035368204117, "learning_rate": 4.482993197278911e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3954 }, { "completion_length": 199.7857208251953, "epoch": 0.2511111111111111, "grad_norm": 0.005454748868942261, "kl": 0.06737549602985382, "learning_rate": 4.484126984126984e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3955 }, { "completion_length": 197.07144165039062, "epoch": 0.25117460317460316, "grad_norm": 0.005827170331031084, "kl": 0.06658522039651871, "learning_rate": 4.485260770975057e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3956 }, { "completion_length": 128.35714721679688, "epoch": 0.25123809523809526, "grad_norm": 0.008987070061266422, "kl": 0.11558070033788681, "learning_rate": 4.4863945578231286e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3957 }, { "completion_length": 168.42857360839844, "epoch": 0.2513015873015873, "grad_norm": 0.008714966475963593, "kl": 0.09391757845878601, "learning_rate": 4.4875283446712014e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3958 }, { "completion_length": 166.0, "epoch": 0.25136507936507935, "grad_norm": 0.9162573218345642, "kl": 0.10546322911977768, "learning_rate": 4.488662131519274e-07, "loss": 0.0001, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 3959 }, { "completion_length": 182.7857208251953, "epoch": 0.25142857142857145, "grad_norm": 0.008008874952793121, "kl": 0.08707295358181, "learning_rate": 4.4897959183673465e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3960 }, { "completion_length": 183.85714721679688, "epoch": 0.2514920634920635, "grad_norm": 0.011475822888314724, "kl": 0.12064479291439056, "learning_rate": 4.4909297052154194e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3961 }, { "completion_length": 164.35714721679688, "epoch": 0.25155555555555553, "grad_norm": 0.008650513365864754, "kl": 0.11259844154119492, "learning_rate": 4.492063492063492e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3962 }, { "completion_length": 179.85714721679688, "epoch": 0.25161904761904763, "grad_norm": 0.006349655333906412, "kl": 0.07790934294462204, "learning_rate": 4.4931972789115645e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3963 }, { "completion_length": 174.71429443359375, "epoch": 0.2516825396825397, "grad_norm": 0.007375293876975775, "kl": 0.08966014534235, "learning_rate": 4.494331065759637e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3964 }, { "completion_length": 147.07144165039062, "epoch": 0.2517460317460318, "grad_norm": 0.00805007852613926, "kl": 0.10918420553207397, "learning_rate": 4.4954648526077096e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3965 }, { "completion_length": 179.42857360839844, "epoch": 0.2518095238095238, "grad_norm": 0.007550273090600967, "kl": 0.10596231371164322, "learning_rate": 4.496598639455782e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3966 }, { "completion_length": 173.7857208251953, "epoch": 0.25187301587301586, "grad_norm": 1.4945299625396729, "kl": 0.1279718428850174, "learning_rate": 4.4977324263038547e-07, "loss": 0.0001, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 3967 }, { "completion_length": 186.85714721679688, "epoch": 0.25193650793650796, "grad_norm": 0.743882417678833, "kl": 0.08602453768253326, "learning_rate": 4.4988662131519275e-07, "loss": 0.0001, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 3968 }, { "completion_length": 157.0, "epoch": 0.252, "grad_norm": 0.009474747814238071, "kl": 0.12263816595077515, "learning_rate": 4.5e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3969 }, { "completion_length": 185.85714721679688, "epoch": 0.25206349206349205, "grad_norm": 0.006828292738646269, "kl": 0.09363842755556107, "learning_rate": 4.5011337868480726e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3970 }, { "completion_length": 181.21429443359375, "epoch": 0.25212698412698414, "grad_norm": 0.008052774704992771, "kl": 0.11373864114284515, "learning_rate": 4.5022675736961454e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3971 }, { "completion_length": 191.07144165039062, "epoch": 0.2521904761904762, "grad_norm": 0.01186308078467846, "kl": 0.11044348031282425, "learning_rate": 4.503401360544217e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3972 }, { "completion_length": 164.5, "epoch": 0.25225396825396823, "grad_norm": 0.010113874450325966, "kl": 0.13842639327049255, "learning_rate": 4.50453514739229e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3973 }, { "completion_length": 185.85714721679688, "epoch": 0.25231746031746033, "grad_norm": 0.007316838018596172, "kl": 0.09510316699743271, "learning_rate": 4.5056689342403623e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3974 }, { "completion_length": 172.21429443359375, "epoch": 0.2523809523809524, "grad_norm": 0.008551514707505703, "kl": 0.10764158517122269, "learning_rate": 4.506802721088435e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3975 }, { "completion_length": 182.85714721679688, "epoch": 0.25244444444444447, "grad_norm": 0.009556110948324203, "kl": 0.11448393017053604, "learning_rate": 4.507936507936508e-07, "loss": 0.0001, "reward": 0.1428571492433548, "reward_std": 0.0, "rewards/check_originality_func": 0.1428571492433548, "step": 3976 }, { "completion_length": 227.50001525878906, "epoch": 0.2525079365079365, "grad_norm": 0.006657703779637814, "kl": 0.09362057596445084, "learning_rate": 4.50907029478458e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3977 }, { "completion_length": 174.2857208251953, "epoch": 0.25257142857142856, "grad_norm": 0.01130168791860342, "kl": 0.14245188236236572, "learning_rate": 4.510204081632653e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3978 }, { "completion_length": 212.42857360839844, "epoch": 0.25263492063492066, "grad_norm": 0.8482787013053894, "kl": 0.12162352353334427, "learning_rate": 4.5113378684807253e-07, "loss": 0.0001, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 3979 }, { "completion_length": 156.92857360839844, "epoch": 0.2526984126984127, "grad_norm": 0.009295693598687649, "kl": 0.10417734086513519, "learning_rate": 4.5124716553287976e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3980 }, { "completion_length": 188.1428680419922, "epoch": 0.25276190476190474, "grad_norm": 0.00869559682905674, "kl": 0.10775240510702133, "learning_rate": 4.5136054421768705e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3981 }, { "completion_length": 186.42857360839844, "epoch": 0.25282539682539684, "grad_norm": 1.18717622756958, "kl": 0.07898445427417755, "learning_rate": 4.5147392290249433e-07, "loss": 0.0001, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 3982 }, { "completion_length": 181.21429443359375, "epoch": 0.2528888888888889, "grad_norm": 0.00763192493468523, "kl": 0.11861644685268402, "learning_rate": 4.5158730158730156e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3983 }, { "completion_length": 189.71429443359375, "epoch": 0.25295238095238093, "grad_norm": 0.6655921339988708, "kl": 0.09526821970939636, "learning_rate": 4.5170068027210884e-07, "loss": 0.0001, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 3984 }, { "completion_length": 197.57144165039062, "epoch": 0.253015873015873, "grad_norm": 0.007114000152796507, "kl": 0.10277370363473892, "learning_rate": 4.518140589569161e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3985 }, { "completion_length": 153.57144165039062, "epoch": 0.25307936507936507, "grad_norm": 0.010310794226825237, "kl": 0.1317082941532135, "learning_rate": 4.519274376417233e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3986 }, { "completion_length": 136.42857360839844, "epoch": 0.25314285714285717, "grad_norm": 0.012350964359939098, "kl": 0.15826304256916046, "learning_rate": 4.520408163265306e-07, "loss": 0.0002, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3987 }, { "completion_length": 218.00001525878906, "epoch": 0.2532063492063492, "grad_norm": 0.006702533923089504, "kl": 0.08462771028280258, "learning_rate": 4.5215419501133786e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3988 }, { "completion_length": 254.9285888671875, "epoch": 0.25326984126984126, "grad_norm": 0.005296858958899975, "kl": 0.08844014257192612, "learning_rate": 4.522675736961451e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3989 }, { "completion_length": 188.85714721679688, "epoch": 0.25333333333333335, "grad_norm": 1.4191077947616577, "kl": 0.12403760105371475, "learning_rate": 4.5238095238095237e-07, "loss": 0.0001, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 3990 }, { "completion_length": 147.35714721679688, "epoch": 0.2533968253968254, "grad_norm": 1.6847478151321411, "kl": 0.13816224038600922, "learning_rate": 4.5249433106575965e-07, "loss": 0.0001, "reward": 0.1428571492433548, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.1428571492433548, "step": 3991 }, { "completion_length": 177.6428680419922, "epoch": 0.25346031746031744, "grad_norm": 1.2160342931747437, "kl": 0.12146760523319244, "learning_rate": 4.526077097505669e-07, "loss": 0.0001, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 3992 }, { "completion_length": 176.21429443359375, "epoch": 0.25352380952380954, "grad_norm": 0.00930765736848116, "kl": 0.14145933091640472, "learning_rate": 4.5272108843537416e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3993 }, { "completion_length": 146.85714721679688, "epoch": 0.2535873015873016, "grad_norm": 0.011463052593171597, "kl": 0.15250809490680695, "learning_rate": 4.5283446712018134e-07, "loss": 0.0002, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3994 }, { "completion_length": 159.7857208251953, "epoch": 0.2536507936507936, "grad_norm": 1.292561650276184, "kl": 0.1726994663476944, "learning_rate": 4.529478458049886e-07, "loss": 0.0002, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 3995 }, { "completion_length": 144.5, "epoch": 0.2537142857142857, "grad_norm": 0.015843572095036507, "kl": 0.17259295284748077, "learning_rate": 4.530612244897959e-07, "loss": 0.0002, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3996 }, { "completion_length": 191.92857360839844, "epoch": 0.25377777777777777, "grad_norm": 0.006819401867687702, "kl": 0.1057497039437294, "learning_rate": 4.5317460317460313e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3997 }, { "completion_length": 157.85714721679688, "epoch": 0.25384126984126987, "grad_norm": 0.00846148282289505, "kl": 0.1313459873199463, "learning_rate": 4.532879818594104e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3998 }, { "completion_length": 157.5, "epoch": 0.2539047619047619, "grad_norm": 0.011868434958159924, "kl": 0.15127670764923096, "learning_rate": 4.534013605442177e-07, "loss": 0.0002, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 3999 }, { "completion_length": 177.7857208251953, "epoch": 0.25396825396825395, "grad_norm": 1.18031644821167, "kl": 0.12293001264333725, "learning_rate": 4.535147392290249e-07, "loss": 0.0001, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 4000 }, { "completion_length": 156.35714721679688, "epoch": 0.25403174603174605, "grad_norm": 0.009028329513967037, "kl": 0.13373084366321564, "learning_rate": 4.5362811791383215e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 4001 }, { "completion_length": 185.1428680419922, "epoch": 0.2540952380952381, "grad_norm": 0.613955557346344, "kl": 0.13885904848575592, "learning_rate": 4.5374149659863944e-07, "loss": 0.0001, "reward": 0.2142857313156128, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.2142857313156128, "step": 4002 }, { "completion_length": 161.1428680419922, "epoch": 0.25415873015873014, "grad_norm": 1.0938584804534912, "kl": 0.1320054680109024, "learning_rate": 4.5385487528344666e-07, "loss": 0.0001, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 4003 }, { "completion_length": 140.92857360839844, "epoch": 0.25422222222222224, "grad_norm": 0.009835212491452694, "kl": 0.18110838532447815, "learning_rate": 4.5396825396825395e-07, "loss": 0.0002, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 4004 }, { "completion_length": 238.6428680419922, "epoch": 0.2542857142857143, "grad_norm": 0.006517623085528612, "kl": 0.1031985729932785, "learning_rate": 4.5408163265306123e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 4005 }, { "completion_length": 173.00001525878906, "epoch": 0.2543492063492063, "grad_norm": 0.009523142129182816, "kl": 0.167701855301857, "learning_rate": 4.5419501133786846e-07, "loss": 0.0002, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 4006 }, { "completion_length": 213.21429443359375, "epoch": 0.2544126984126984, "grad_norm": 0.008795893751084805, "kl": 0.14765749871730804, "learning_rate": 4.5430839002267574e-07, "loss": 0.0001, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 4007 }, { "completion_length": 173.7857208251953, "epoch": 0.25447619047619047, "grad_norm": 0.009503026492893696, "kl": 0.17923232913017273, "learning_rate": 4.54421768707483e-07, "loss": 0.0002, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 4008 }, { "completion_length": 195.42857360839844, "epoch": 0.25453968253968257, "grad_norm": 0.010385910049080849, "kl": 0.1640627235174179, "learning_rate": 4.545351473922902e-07, "loss": 0.0002, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 4009 }, { "completion_length": 152.57144165039062, "epoch": 0.2546031746031746, "grad_norm": 0.013235210441052914, "kl": 0.2337774634361267, "learning_rate": 4.546485260770975e-07, "loss": 0.0002, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 4010 }, { "completion_length": 152.92857360839844, "epoch": 0.25466666666666665, "grad_norm": 0.012185524217784405, "kl": 0.20688512921333313, "learning_rate": 4.5476190476190476e-07, "loss": 0.0002, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 4011 }, { "completion_length": 157.92857360839844, "epoch": 0.25473015873015875, "grad_norm": 0.008984887041151524, "kl": 0.18233348429203033, "learning_rate": 4.54875283446712e-07, "loss": 0.0002, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 4012 }, { "completion_length": 143.5, "epoch": 0.2547936507936508, "grad_norm": 0.0113411545753479, "kl": 0.19353622198104858, "learning_rate": 4.5498866213151927e-07, "loss": 0.0002, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 4013 }, { "completion_length": 158.85714721679688, "epoch": 0.25485714285714284, "grad_norm": 0.01015919353812933, "kl": 0.17841146886348724, "learning_rate": 4.551020408163265e-07, "loss": 0.0002, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 4014 }, { "completion_length": 165.0, "epoch": 0.25492063492063494, "grad_norm": 0.9845941066741943, "kl": 0.1602982133626938, "learning_rate": 4.552154195011338e-07, "loss": 0.0002, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 4015 }, { "completion_length": 174.7857208251953, "epoch": 0.254984126984127, "grad_norm": 0.9576379060745239, "kl": 0.1597096025943756, "learning_rate": 4.55328798185941e-07, "loss": 0.0002, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 4016 }, { "completion_length": 206.71429443359375, "epoch": 0.255047619047619, "grad_norm": 0.007822735235095024, "kl": 0.15645501017570496, "learning_rate": 4.5544217687074824e-07, "loss": 0.0002, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 4017 }, { "completion_length": 182.07144165039062, "epoch": 0.2551111111111111, "grad_norm": 1.2035075426101685, "kl": 0.14623326063156128, "learning_rate": 4.555555555555555e-07, "loss": 0.0001, "reward": 0.2142857313156128, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.2142857313156128, "step": 4018 }, { "completion_length": 210.42857360839844, "epoch": 0.25517460317460317, "grad_norm": 0.9168229699134827, "kl": 0.14503593742847443, "learning_rate": 4.556689342403628e-07, "loss": 0.0001, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 4019 }, { "completion_length": 183.85714721679688, "epoch": 0.25523809523809526, "grad_norm": 1.0537526607513428, "kl": 0.20713196694850922, "learning_rate": 4.5578231292517003e-07, "loss": 0.0002, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 4020 }, { "completion_length": 165.2857208251953, "epoch": 0.2553015873015873, "grad_norm": 1.164968729019165, "kl": 0.27521848678588867, "learning_rate": 4.558956916099773e-07, "loss": 0.0003, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 4021 }, { "completion_length": 183.6428680419922, "epoch": 0.25536507936507935, "grad_norm": 0.010330863296985626, "kl": 0.19582191109657288, "learning_rate": 4.560090702947846e-07, "loss": 0.0002, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 4022 }, { "completion_length": 183.21429443359375, "epoch": 0.25542857142857145, "grad_norm": 1.581008791923523, "kl": 0.1927303671836853, "learning_rate": 4.561224489795918e-07, "loss": 0.0002, "reward": 0.1428571492433548, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.1428571492433548, "step": 4023 }, { "completion_length": 223.00001525878906, "epoch": 0.2554920634920635, "grad_norm": 0.011506368406116962, "kl": 0.17963631451129913, "learning_rate": 4.5623582766439906e-07, "loss": 0.0002, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 4024 }, { "completion_length": 209.57144165039062, "epoch": 0.25555555555555554, "grad_norm": 0.6751397252082825, "kl": 0.16901449859142303, "learning_rate": 4.5634920634920634e-07, "loss": 0.0002, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 4025 }, { "completion_length": 165.42857360839844, "epoch": 0.25561904761904763, "grad_norm": 1.232268214225769, "kl": 0.22644108533859253, "learning_rate": 4.5646258503401357e-07, "loss": 0.0002, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 4026 }, { "completion_length": 159.42857360839844, "epoch": 0.2556825396825397, "grad_norm": 1.5147775411605835, "kl": 0.2708292603492737, "learning_rate": 4.5657596371882085e-07, "loss": 0.0003, "reward": 0.1428571492433548, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.1428571492433548, "step": 4027 }, { "completion_length": 157.42857360839844, "epoch": 0.2557460317460317, "grad_norm": 0.012828830629587173, "kl": 0.21707849204540253, "learning_rate": 4.5668934240362813e-07, "loss": 0.0002, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 4028 }, { "completion_length": 216.85714721679688, "epoch": 0.2558095238095238, "grad_norm": 0.01077397633343935, "kl": 0.19757042825222015, "learning_rate": 4.5680272108843536e-07, "loss": 0.0002, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 4029 }, { "completion_length": 166.7857208251953, "epoch": 0.25587301587301586, "grad_norm": 0.011536900885403156, "kl": 0.2535213232040405, "learning_rate": 4.5691609977324264e-07, "loss": 0.0003, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 4030 }, { "completion_length": 225.85714721679688, "epoch": 0.25593650793650796, "grad_norm": 0.010201135650277138, "kl": 0.16106641292572021, "learning_rate": 4.5702947845804987e-07, "loss": 0.0002, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 4031 }, { "completion_length": 183.42857360839844, "epoch": 0.256, "grad_norm": 0.010872731916606426, "kl": 0.22362558543682098, "learning_rate": 4.571428571428571e-07, "loss": 0.0002, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 4032 }, { "completion_length": 179.00001525878906, "epoch": 0.25606349206349205, "grad_norm": 1.336104154586792, "kl": 0.22151276469230652, "learning_rate": 4.572562358276644e-07, "loss": 0.0002, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 4033 }, { "completion_length": 184.71429443359375, "epoch": 0.25612698412698415, "grad_norm": 1.5311391353607178, "kl": 0.22988082468509674, "learning_rate": 4.573696145124716e-07, "loss": 0.0002, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 4034 }, { "completion_length": 154.0, "epoch": 0.2561904761904762, "grad_norm": 0.012348220683634281, "kl": 0.24728649854660034, "learning_rate": 4.574829931972789e-07, "loss": 0.0002, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 4035 }, { "completion_length": 170.0, "epoch": 0.25625396825396823, "grad_norm": 1.2759606838226318, "kl": 0.18194685876369476, "learning_rate": 4.575963718820862e-07, "loss": 0.0002, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 4036 }, { "completion_length": 161.57144165039062, "epoch": 0.25631746031746033, "grad_norm": 0.010187370702624321, "kl": 0.2424413114786148, "learning_rate": 4.577097505668934e-07, "loss": 0.0002, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 4037 }, { "completion_length": 197.42857360839844, "epoch": 0.2563809523809524, "grad_norm": 0.8806452751159668, "kl": 0.15752361714839935, "learning_rate": 4.5782312925170063e-07, "loss": 0.0002, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 4038 }, { "completion_length": 181.6428680419922, "epoch": 0.2564444444444444, "grad_norm": 1.3893349170684814, "kl": 0.22304528951644897, "learning_rate": 4.579365079365079e-07, "loss": 0.0002, "reward": 0.1428571492433548, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.1428571492433548, "step": 4039 }, { "completion_length": 191.92857360839844, "epoch": 0.2565079365079365, "grad_norm": 0.01609472930431366, "kl": 0.30058732628822327, "learning_rate": 4.5804988662131514e-07, "loss": 0.0003, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 4040 }, { "completion_length": 176.1428680419922, "epoch": 0.25657142857142856, "grad_norm": 2.022242307662964, "kl": 0.27573826909065247, "learning_rate": 4.581632653061224e-07, "loss": 0.0003, "reward": 0.1428571492433548, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.1428571492433548, "step": 4041 }, { "completion_length": 182.42857360839844, "epoch": 0.25663492063492066, "grad_norm": 1.67704176902771, "kl": 0.23109464347362518, "learning_rate": 4.582766439909297e-07, "loss": 0.0002, "reward": 0.1428571492433548, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.1428571492433548, "step": 4042 }, { "completion_length": 178.42857360839844, "epoch": 0.2566984126984127, "grad_norm": 0.020517434924840927, "kl": 0.2820926904678345, "learning_rate": 4.5839002267573694e-07, "loss": 0.0003, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 4043 }, { "completion_length": 184.57144165039062, "epoch": 0.25676190476190475, "grad_norm": 1.7701488733291626, "kl": 0.3383195996284485, "learning_rate": 4.585034013605442e-07, "loss": 0.0003, "reward": 0.1428571492433548, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.1428571492433548, "step": 4044 }, { "completion_length": 218.71429443359375, "epoch": 0.25682539682539685, "grad_norm": 1.2801132202148438, "kl": 0.28375011682510376, "learning_rate": 4.5861678004535145e-07, "loss": 0.0003, "reward": 0.1428571492433548, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.1428571492433548, "step": 4045 }, { "completion_length": 189.7857208251953, "epoch": 0.2568888888888889, "grad_norm": 1.6717276573181152, "kl": 0.25606870651245117, "learning_rate": 4.587301587301587e-07, "loss": 0.0003, "reward": 0.1428571492433548, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.1428571492433548, "step": 4046 }, { "completion_length": 150.1428680419922, "epoch": 0.25695238095238093, "grad_norm": 2.6478333473205566, "kl": 0.4049972891807556, "learning_rate": 4.5884353741496596e-07, "loss": 0.0004, "reward": 0.2857142984867096, "reward_std": 0.4040610194206238, "rewards/check_originality_func": 0.2857142984867096, "step": 4047 }, { "completion_length": 207.2857208251953, "epoch": 0.25701587301587303, "grad_norm": 1.162183165550232, "kl": 0.3015795350074768, "learning_rate": 4.5895691609977324e-07, "loss": 0.0003, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 4048 }, { "completion_length": 174.42857360839844, "epoch": 0.2570793650793651, "grad_norm": 1.6746653318405151, "kl": 0.27724313735961914, "learning_rate": 4.5907029478458047e-07, "loss": 0.0003, "reward": 0.1428571492433548, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.1428571492433548, "step": 4049 }, { "completion_length": 168.6428680419922, "epoch": 0.2571428571428571, "grad_norm": 0.014194468967616558, "kl": 0.30780255794525146, "learning_rate": 4.5918367346938775e-07, "loss": 0.0003, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 4050 }, { "completion_length": 170.57144165039062, "epoch": 0.2572063492063492, "grad_norm": 1.9072903394699097, "kl": 0.2949625551700592, "learning_rate": 4.5929705215419503e-07, "loss": 0.0003, "reward": 0.2142857313156128, "reward_std": 0.30304574966430664, "rewards/check_originality_func": 0.2142857313156128, "step": 4051 }, { "completion_length": 255.9285888671875, "epoch": 0.25726984126984126, "grad_norm": 1.151955485343933, "kl": 0.18179163336753845, "learning_rate": 4.5941043083900226e-07, "loss": 0.0002, "reward": 0.1428571492433548, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.1428571492433548, "step": 4052 }, { "completion_length": 182.57144165039062, "epoch": 0.25733333333333336, "grad_norm": 1.309532880783081, "kl": 0.2938285768032074, "learning_rate": 4.595238095238095e-07, "loss": 0.0003, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 4053 }, { "completion_length": 217.92857360839844, "epoch": 0.2573968253968254, "grad_norm": 1.5474119186401367, "kl": 0.27496498823165894, "learning_rate": 4.596371882086167e-07, "loss": 0.0003, "reward": 0.2857142984867096, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.2857142984867096, "step": 4054 }, { "completion_length": 194.57144165039062, "epoch": 0.25746031746031744, "grad_norm": 1.2329273223876953, "kl": 0.274762362241745, "learning_rate": 4.59750566893424e-07, "loss": 0.0003, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 4055 }, { "completion_length": 146.35714721679688, "epoch": 0.25752380952380954, "grad_norm": 1.3005471229553223, "kl": 0.28622084856033325, "learning_rate": 4.598639455782313e-07, "loss": 0.0003, "reward": 0.3571428656578064, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.3571428656578064, "step": 4056 }, { "completion_length": 209.57144165039062, "epoch": 0.2575873015873016, "grad_norm": 0.890284538269043, "kl": 0.2623230516910553, "learning_rate": 4.599773242630385e-07, "loss": 0.0003, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 4057 }, { "completion_length": 190.92857360839844, "epoch": 0.25765079365079363, "grad_norm": 1.8095418214797974, "kl": 0.2888296842575073, "learning_rate": 4.600907029478458e-07, "loss": 0.0003, "reward": 0.2142857313156128, "reward_std": 0.30304574966430664, "rewards/check_originality_func": 0.2142857313156128, "step": 4058 }, { "completion_length": 230.21429443359375, "epoch": 0.25771428571428573, "grad_norm": 1.3381835222244263, "kl": 0.3192419409751892, "learning_rate": 4.602040816326531e-07, "loss": 0.0003, "reward": 0.1428571492433548, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.1428571492433548, "step": 4059 }, { "completion_length": 190.71429443359375, "epoch": 0.2577777777777778, "grad_norm": 0.9360422492027283, "kl": 0.2794160544872284, "learning_rate": 4.6031746031746025e-07, "loss": 0.0003, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 4060 }, { "completion_length": 204.1428680419922, "epoch": 0.2578412698412698, "grad_norm": 1.276884913444519, "kl": 0.34798333048820496, "learning_rate": 4.6043083900226753e-07, "loss": 0.0003, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 4061 }, { "completion_length": 179.07144165039062, "epoch": 0.2579047619047619, "grad_norm": 1.803126573562622, "kl": 0.3193984031677246, "learning_rate": 4.605442176870748e-07, "loss": 0.0003, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 4062 }, { "completion_length": 184.21429443359375, "epoch": 0.25796825396825396, "grad_norm": 0.012849144637584686, "kl": 0.3572065532207489, "learning_rate": 4.6065759637188204e-07, "loss": 0.0004, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 4063 }, { "completion_length": 187.85714721679688, "epoch": 0.25803174603174606, "grad_norm": 1.1342549324035645, "kl": 0.3092101514339447, "learning_rate": 4.6077097505668933e-07, "loss": 0.0003, "reward": 0.2857142984867096, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.2857142984867096, "step": 4064 }, { "completion_length": 206.42857360839844, "epoch": 0.2580952380952381, "grad_norm": 1.6731715202331543, "kl": 0.30548739433288574, "learning_rate": 4.608843537414966e-07, "loss": 0.0003, "reward": 0.2142857313156128, "reward_std": 0.30304574966430664, "rewards/check_originality_func": 0.2142857313156128, "step": 4065 }, { "completion_length": 237.21429443359375, "epoch": 0.25815873015873014, "grad_norm": 0.7887415289878845, "kl": 0.29201096296310425, "learning_rate": 4.6099773242630384e-07, "loss": 0.0003, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 4066 }, { "completion_length": 201.00001525878906, "epoch": 0.25822222222222224, "grad_norm": 2.028411388397217, "kl": 0.39660128951072693, "learning_rate": 4.611111111111111e-07, "loss": 0.0004, "reward": 0.4285714626312256, "reward_std": 0.4040610194206238, "rewards/check_originality_func": 0.4285714626312256, "step": 4067 }, { "completion_length": 186.92857360839844, "epoch": 0.2582857142857143, "grad_norm": 1.1858445405960083, "kl": 0.2952807545661926, "learning_rate": 4.6122448979591835e-07, "loss": 0.0003, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 4068 }, { "completion_length": 230.2857208251953, "epoch": 0.25834920634920633, "grad_norm": 0.011488061398267746, "kl": 0.3106285631656647, "learning_rate": 4.613378684807256e-07, "loss": 0.0003, "reward": 0.1428571492433548, "reward_std": 0.0, "rewards/check_originality_func": 0.1428571492433548, "step": 4069 }, { "completion_length": 170.5, "epoch": 0.2584126984126984, "grad_norm": 1.8807018995285034, "kl": 0.3498700261116028, "learning_rate": 4.6145124716553286e-07, "loss": 0.0003, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 4070 }, { "completion_length": 184.85714721679688, "epoch": 0.25847619047619047, "grad_norm": 1.501336932182312, "kl": 0.3563138544559479, "learning_rate": 4.6156462585034014e-07, "loss": 0.0004, "reward": 0.2857142984867096, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.2857142984867096, "step": 4071 }, { "completion_length": 190.85714721679688, "epoch": 0.2585396825396825, "grad_norm": 0.011176446452736855, "kl": 0.3233101963996887, "learning_rate": 4.6167800453514737e-07, "loss": 0.0003, "reward": 0.4285714626312256, "reward_std": 0.0, "rewards/check_originality_func": 0.4285714626312256, "step": 4072 }, { "completion_length": 204.2857208251953, "epoch": 0.2586031746031746, "grad_norm": 1.7625385522842407, "kl": 0.29919764399528503, "learning_rate": 4.6179138321995465e-07, "loss": 0.0003, "reward": 0.1428571492433548, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.1428571492433548, "step": 4073 }, { "completion_length": 241.71429443359375, "epoch": 0.25866666666666666, "grad_norm": 1.192600965499878, "kl": 0.25749027729034424, "learning_rate": 4.6190476190476193e-07, "loss": 0.0003, "reward": 0.1428571492433548, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.1428571492433548, "step": 4074 }, { "completion_length": 198.71429443359375, "epoch": 0.25873015873015875, "grad_norm": 0.01392232533544302, "kl": 0.35086148977279663, "learning_rate": 4.620181405895691e-07, "loss": 0.0004, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 4075 }, { "completion_length": 180.50001525878906, "epoch": 0.2587936507936508, "grad_norm": 1.1614195108413696, "kl": 0.32526078820228577, "learning_rate": 4.621315192743764e-07, "loss": 0.0003, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 4076 }, { "completion_length": 188.42857360839844, "epoch": 0.25885714285714284, "grad_norm": 2.042670726776123, "kl": 0.32670533657073975, "learning_rate": 4.622448979591836e-07, "loss": 0.0003, "reward": 0.2857142984867096, "reward_std": 0.4040610194206238, "rewards/check_originality_func": 0.2857142984867096, "step": 4077 }, { "completion_length": 207.07144165039062, "epoch": 0.25892063492063494, "grad_norm": 1.5073812007904053, "kl": 0.2999223470687866, "learning_rate": 4.623582766439909e-07, "loss": 0.0003, "reward": 0.1428571492433548, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.1428571492433548, "step": 4078 }, { "completion_length": 214.35714721679688, "epoch": 0.258984126984127, "grad_norm": 2.1577258110046387, "kl": 0.2597561776638031, "learning_rate": 4.624716553287982e-07, "loss": 0.0003, "reward": 0.3571428656578064, "reward_std": 0.5050762891769409, "rewards/check_originality_func": 0.3571428656578064, "step": 4079 }, { "completion_length": 206.07144165039062, "epoch": 0.259047619047619, "grad_norm": 1.7413712739944458, "kl": 0.2938350439071655, "learning_rate": 4.625850340136054e-07, "loss": 0.0003, "reward": 0.1428571492433548, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.1428571492433548, "step": 4080 }, { "completion_length": 195.92857360839844, "epoch": 0.2591111111111111, "grad_norm": 0.010543441399931908, "kl": 0.3114427328109741, "learning_rate": 4.626984126984127e-07, "loss": 0.0003, "reward": 0.1428571492433548, "reward_std": 0.0, "rewards/check_originality_func": 0.1428571492433548, "step": 4081 }, { "completion_length": 183.07144165039062, "epoch": 0.25917460317460317, "grad_norm": 2.266806125640869, "kl": 0.35691389441490173, "learning_rate": 4.628117913832199e-07, "loss": 0.0004, "reward": 0.2857142984867096, "reward_std": 0.4040610194206238, "rewards/check_originality_func": 0.2857142984867096, "step": 4082 }, { "completion_length": 194.50001525878906, "epoch": 0.2592380952380952, "grad_norm": 1.1099387407302856, "kl": 0.28709647059440613, "learning_rate": 4.6292517006802715e-07, "loss": 0.0003, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 4083 }, { "completion_length": 151.92857360839844, "epoch": 0.2593015873015873, "grad_norm": 0.009827541187405586, "kl": 0.3410792052745819, "learning_rate": 4.6303854875283444e-07, "loss": 0.0003, "reward": 0.4285714626312256, "reward_std": 0.0, "rewards/check_originality_func": 0.4285714626312256, "step": 4084 }, { "completion_length": 164.85714721679688, "epoch": 0.25936507936507935, "grad_norm": 1.9531335830688477, "kl": 0.34472087025642395, "learning_rate": 4.631519274376417e-07, "loss": 0.0003, "reward": 0.1428571492433548, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.1428571492433548, "step": 4085 }, { "completion_length": 214.00001525878906, "epoch": 0.25942857142857145, "grad_norm": 1.8238023519515991, "kl": 0.3419904410839081, "learning_rate": 4.6326530612244895e-07, "loss": 0.0003, "reward": 0.2857142984867096, "reward_std": 0.4040610194206238, "rewards/check_originality_func": 0.2857142984867096, "step": 4086 }, { "completion_length": 189.2857208251953, "epoch": 0.2594920634920635, "grad_norm": 0.010266873985528946, "kl": 0.30594563484191895, "learning_rate": 4.6337868480725623e-07, "loss": 0.0003, "reward": 0.1428571492433548, "reward_std": 0.0, "rewards/check_originality_func": 0.1428571492433548, "step": 4087 }, { "completion_length": 219.50001525878906, "epoch": 0.25955555555555554, "grad_norm": 0.010337411426007748, "kl": 0.24484583735466003, "learning_rate": 4.634920634920635e-07, "loss": 0.0002, "reward": 0.0, "reward_std": 0.0, "rewards/check_originality_func": 0.0, "step": 4088 }, { "completion_length": 184.2857208251953, "epoch": 0.25961904761904764, "grad_norm": 1.5661553144454956, "kl": 0.3574748933315277, "learning_rate": 4.6360544217687074e-07, "loss": 0.0004, "reward": 0.2142857313156128, "reward_std": 0.30304574966430664, "rewards/check_originality_func": 0.2142857313156128, "step": 4089 }, { "completion_length": 198.7857208251953, "epoch": 0.2596825396825397, "grad_norm": 1.5223182439804077, "kl": 0.3151862621307373, "learning_rate": 4.6371882086167797e-07, "loss": 0.0003, "reward": 0.2142857313156128, "reward_std": 0.30304574966430664, "rewards/check_originality_func": 0.2142857313156128, "step": 4090 }, { "completion_length": 207.7857208251953, "epoch": 0.2597460317460317, "grad_norm": 1.715840220451355, "kl": 0.3155994415283203, "learning_rate": 4.6383219954648525e-07, "loss": 0.0003, "reward": 0.3571428656578064, "reward_std": 0.30304574966430664, "rewards/check_originality_func": 0.3571428656578064, "step": 4091 }, { "completion_length": 220.07144165039062, "epoch": 0.2598095238095238, "grad_norm": 0.012130877934396267, "kl": 0.363057017326355, "learning_rate": 4.639455782312925e-07, "loss": 0.0004, "reward": 0.2857142984867096, "reward_std": 0.0, "rewards/check_originality_func": 0.2857142984867096, "step": 4092 }, { "completion_length": 235.07144165039062, "epoch": 0.25987301587301587, "grad_norm": 1.2636196613311768, "kl": 0.36564868688583374, "learning_rate": 4.6405895691609976e-07, "loss": 0.0004, "reward": 0.4285714626312256, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.4285714626312256, "step": 4093 }, { "completion_length": 188.2857208251953, "epoch": 0.2599365079365079, "grad_norm": 1.9722459316253662, "kl": 0.4205230176448822, "learning_rate": 4.64172335600907e-07, "loss": 0.0004, "reward": 0.5, "reward_std": 0.30304574966430664, "rewards/check_originality_func": 0.5, "step": 4094 }, { "completion_length": 206.07144165039062, "epoch": 0.26, "grad_norm": 2.403101921081543, "kl": 0.4802016019821167, "learning_rate": 4.6428571428571427e-07, "loss": 0.0005, "reward": 0.2142857313156128, "reward_std": 0.30304574966430664, "rewards/check_originality_func": 0.2142857313156128, "step": 4095 }, { "completion_length": 252.2857208251953, "epoch": 0.26006349206349205, "grad_norm": 1.8544501066207886, "kl": 0.34608080983161926, "learning_rate": 4.6439909297052155e-07, "loss": 0.0003, "reward": 0.2857142984867096, "reward_std": 0.4040610194206238, "rewards/check_originality_func": 0.2857142984867096, "step": 4096 }, { "completion_length": 233.57144165039062, "epoch": 0.26012698412698415, "grad_norm": 1.2905486822128296, "kl": 0.42646506428718567, "learning_rate": 4.6451247165532873e-07, "loss": 0.0004, "reward": 0.1428571492433548, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.1428571492433548, "step": 4097 }, { "completion_length": 257.4285888671875, "epoch": 0.2601904761904762, "grad_norm": 0.009488909505307674, "kl": 0.3316859006881714, "learning_rate": 4.64625850340136e-07, "loss": 0.0003, "reward": 0.4285714626312256, "reward_std": 0.0, "rewards/check_originality_func": 0.4285714626312256, "step": 4098 }, { "completion_length": 239.57144165039062, "epoch": 0.26025396825396824, "grad_norm": 2.191596508026123, "kl": 0.6106977462768555, "learning_rate": 4.647392290249433e-07, "loss": 0.0006, "reward": 0.3571428656578064, "reward_std": 0.30304574966430664, "rewards/check_originality_func": 0.3571428656578064, "step": 4099 }, { "completion_length": 245.1428680419922, "epoch": 0.26031746031746034, "grad_norm": 0.010830075480043888, "kl": 0.391072541475296, "learning_rate": 4.648526077097505e-07, "loss": 0.0004, "reward": 0.2857142984867096, "reward_std": 0.0, "rewards/check_originality_func": 0.2857142984867096, "step": 4100 }, { "completion_length": 215.07144165039062, "epoch": 0.2603809523809524, "grad_norm": 1.62492036819458, "kl": 0.5090252161026001, "learning_rate": 4.649659863945578e-07, "loss": 0.0005, "reward": 0.4285714626312256, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.4285714626312256, "step": 4101 }, { "completion_length": 210.57144165039062, "epoch": 0.2604444444444444, "grad_norm": 1.9362244606018066, "kl": 0.537385880947113, "learning_rate": 4.650793650793651e-07, "loss": 0.0005, "reward": 0.2857142984867096, "reward_std": 0.4040610194206238, "rewards/check_originality_func": 0.2857142984867096, "step": 4102 }, { "completion_length": 191.42857360839844, "epoch": 0.2605079365079365, "grad_norm": 2.8401520252227783, "kl": 0.8006948828697205, "learning_rate": 4.651927437641723e-07, "loss": 0.0008, "reward": 0.3571428656578064, "reward_std": 0.5050762891769409, "rewards/check_originality_func": 0.3571428656578064, "step": 4103 }, { "completion_length": 298.14288330078125, "epoch": 0.26057142857142856, "grad_norm": 1.219651460647583, "kl": 0.40660560131073, "learning_rate": 4.6530612244897954e-07, "loss": 0.0004, "reward": 0.5714285969734192, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.5714285969734192, "step": 4104 }, { "completion_length": 249.4285888671875, "epoch": 0.2606349206349206, "grad_norm": 1.4113562107086182, "kl": 0.4933326840400696, "learning_rate": 4.6541950113378683e-07, "loss": 0.0005, "reward": 0.5714285969734192, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.5714285969734192, "step": 4105 }, { "completion_length": 236.2857208251953, "epoch": 0.2606984126984127, "grad_norm": 1.135615587234497, "kl": 0.5567325949668884, "learning_rate": 4.6553287981859406e-07, "loss": 0.0006, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.785714328289032, "step": 4106 }, { "completion_length": 251.35714721679688, "epoch": 0.26076190476190475, "grad_norm": 2.0643978118896484, "kl": 0.5424600839614868, "learning_rate": 4.6564625850340134e-07, "loss": 0.0005, "reward": 0.3571428656578064, "reward_std": 0.30304574966430664, "rewards/check_originality_func": 0.3571428656578064, "step": 4107 }, { "completion_length": 214.42857360839844, "epoch": 0.26082539682539685, "grad_norm": 1.9999340772628784, "kl": 0.6641647815704346, "learning_rate": 4.657596371882086e-07, "loss": 0.0007, "reward": 0.6428571939468384, "reward_std": 0.30304574966430664, "rewards/check_originality_func": 0.6428571939468384, "step": 4108 }, { "completion_length": 308.3571472167969, "epoch": 0.2608888888888889, "grad_norm": 1.737647294998169, "kl": 0.47024014592170715, "learning_rate": 4.6587301587301585e-07, "loss": 0.0005, "reward": 0.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.6428571939468384, "step": 4109 }, { "completion_length": 279.2857360839844, "epoch": 0.26095238095238094, "grad_norm": 2.4732234477996826, "kl": 0.5597915649414062, "learning_rate": 4.6598639455782313e-07, "loss": 0.0006, "reward": 0.5, "reward_std": 0.5050762891769409, "rewards/check_originality_func": 0.5, "step": 4110 }, { "completion_length": 291.21429443359375, "epoch": 0.26101587301587303, "grad_norm": 1.819278359413147, "kl": 0.5551693439483643, "learning_rate": 4.660997732426304e-07, "loss": 0.0006, "reward": 0.4285714626312256, "reward_std": 0.4040610194206238, "rewards/check_originality_func": 0.4285714626312256, "step": 4111 }, { "completion_length": 343.14288330078125, "epoch": 0.2610793650793651, "grad_norm": 1.8712385892868042, "kl": 0.3969982862472534, "learning_rate": 4.662131519274376e-07, "loss": 0.0004, "reward": 0.5714285969734192, "reward_std": 0.4040610194206238, "rewards/check_originality_func": 0.5714285969734192, "step": 4112 }, { "completion_length": 295.8571472167969, "epoch": 0.2611428571428571, "grad_norm": 0.989803671836853, "kl": 0.4529052674770355, "learning_rate": 4.6632653061224487e-07, "loss": 0.0005, "reward": 0.3571428656578064, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.3571428656578064, "step": 4113 }, { "completion_length": 313.71429443359375, "epoch": 0.2612063492063492, "grad_norm": 2.204716444015503, "kl": 0.47653356194496155, "learning_rate": 4.664399092970521e-07, "loss": 0.0005, "reward": 0.6428571939468384, "reward_std": 0.30304574966430664, "rewards/check_originality_func": 0.6428571939468384, "step": 4114 }, { "completion_length": 268.9285888671875, "epoch": 0.26126984126984126, "grad_norm": 2.4418554306030273, "kl": 0.573342502117157, "learning_rate": 4.665532879818594e-07, "loss": 0.0006, "reward": 0.5714285969734192, "reward_std": 0.4040610194206238, "rewards/check_originality_func": 0.5714285969734192, "step": 4115 }, { "completion_length": 269.5, "epoch": 0.2613333333333333, "grad_norm": 1.8007583618164062, "kl": 0.5306920409202576, "learning_rate": 4.6666666666666666e-07, "loss": 0.0005, "reward": 0.2142857313156128, "reward_std": 0.30304574966430664, "rewards/check_originality_func": 0.2142857313156128, "step": 4116 }, { "completion_length": 224.4285888671875, "epoch": 0.2613968253968254, "grad_norm": 2.2027225494384766, "kl": 0.5939174890518188, "learning_rate": 4.667800453514739e-07, "loss": 0.0006, "reward": 0.5, "reward_std": 0.30304574966430664, "rewards/check_originality_func": 0.5, "step": 4117 }, { "completion_length": 258.14288330078125, "epoch": 0.26146031746031745, "grad_norm": 2.5354321002960205, "kl": 0.47050777077674866, "learning_rate": 4.668934240362812e-07, "loss": 0.0005, "reward": 0.3571428656578064, "reward_std": 0.5050762891769409, "rewards/check_originality_func": 0.3571428656578064, "step": 4118 }, { "completion_length": 244.1428680419922, "epoch": 0.26152380952380955, "grad_norm": 1.9901249408721924, "kl": 0.5895392298698425, "learning_rate": 4.670068027210884e-07, "loss": 0.0006, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.8571429252624512, "step": 4119 }, { "completion_length": 196.71429443359375, "epoch": 0.2615873015873016, "grad_norm": 2.517808198928833, "kl": 0.6218923330307007, "learning_rate": 4.6712018140589563e-07, "loss": 0.0006, "reward": 0.7142857313156128, "reward_std": 0.4040610194206238, "rewards/check_originality_func": 0.7142857313156128, "step": 4120 }, { "completion_length": 214.2857208251953, "epoch": 0.26165079365079363, "grad_norm": 0.017073234543204308, "kl": 0.5581075549125671, "learning_rate": 4.672335600907029e-07, "loss": 0.0006, "reward": 0.5714285969734192, "reward_std": 0.0, "rewards/check_originality_func": 0.5714285969734192, "step": 4121 }, { "completion_length": 239.35714721679688, "epoch": 0.26171428571428573, "grad_norm": 2.754986047744751, "kl": 0.4852755665779114, "learning_rate": 4.673469387755102e-07, "loss": 0.0005, "reward": 0.5714285969734192, "reward_std": 0.6060914993286133, "rewards/check_originality_func": 0.5714285969734192, "step": 4122 }, { "completion_length": 227.07144165039062, "epoch": 0.2617777777777778, "grad_norm": 0.017452474683523178, "kl": 0.540373682975769, "learning_rate": 4.674603174603174e-07, "loss": 0.0005, "reward": 0.5714285969734192, "reward_std": 0.0, "rewards/check_originality_func": 0.5714285969734192, "step": 4123 }, { "completion_length": 216.71429443359375, "epoch": 0.2618412698412698, "grad_norm": 2.0799977779388428, "kl": 0.5342637896537781, "learning_rate": 4.675736961451247e-07, "loss": 0.0005, "reward": 0.5714285969734192, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.5714285969734192, "step": 4124 }, { "completion_length": 261.3571472167969, "epoch": 0.2619047619047619, "grad_norm": 1.7584805488586426, "kl": 0.495243102312088, "learning_rate": 4.67687074829932e-07, "loss": 0.0005, "reward": 0.5, "reward_std": 0.30304574966430664, "rewards/check_originality_func": 0.5, "step": 4125 }, { "completion_length": 235.4285888671875, "epoch": 0.26196825396825396, "grad_norm": 1.7610865831375122, "kl": 0.5725675821304321, "learning_rate": 4.678004535147392e-07, "loss": 0.0006, "reward": 0.5714285969734192, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.5714285969734192, "step": 4126 }, { "completion_length": 260.8571472167969, "epoch": 0.262031746031746, "grad_norm": 2.355806350708008, "kl": 0.4820699989795685, "learning_rate": 4.6791383219954645e-07, "loss": 0.0005, "reward": 0.5714285969734192, "reward_std": 0.4040610194206238, "rewards/check_originality_func": 0.5714285969734192, "step": 4127 }, { "completion_length": 216.71429443359375, "epoch": 0.2620952380952381, "grad_norm": 2.8211631774902344, "kl": 0.5928999185562134, "learning_rate": 4.6802721088435373e-07, "loss": 0.0006, "reward": 0.5714285969734192, "reward_std": 0.4040610194206238, "rewards/check_originality_func": 0.5714285969734192, "step": 4128 }, { "completion_length": 260.4285888671875, "epoch": 0.26215873015873015, "grad_norm": 1.721095323562622, "kl": 0.5481166243553162, "learning_rate": 4.6814058956916096e-07, "loss": 0.0005, "reward": 0.7142857313156128, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.7142857313156128, "step": 4129 }, { "completion_length": 292.2857360839844, "epoch": 0.26222222222222225, "grad_norm": 1.5009737014770508, "kl": 0.49571284651756287, "learning_rate": 4.6825396825396824e-07, "loss": 0.0005, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.8571429252624512, "step": 4130 }, { "completion_length": 268.9285888671875, "epoch": 0.2622857142857143, "grad_norm": 1.795851230621338, "kl": 0.5446960926055908, "learning_rate": 4.683673469387755e-07, "loss": 0.0005, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.785714328289032, "step": 4131 }, { "completion_length": 286.14288330078125, "epoch": 0.26234920634920633, "grad_norm": 1.2900540828704834, "kl": 0.6011914014816284, "learning_rate": 4.6848072562358275e-07, "loss": 0.0006, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.785714328289032, "step": 4132 }, { "completion_length": 292.0714416503906, "epoch": 0.26241269841269843, "grad_norm": 1.5396699905395508, "kl": 0.6337445378303528, "learning_rate": 4.6859410430839003e-07, "loss": 0.0006, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.8571429252624512, "step": 4133 }, { "completion_length": 342.64288330078125, "epoch": 0.2624761904761905, "grad_norm": 2.1114492416381836, "kl": 0.5682219862937927, "learning_rate": 4.687074829931972e-07, "loss": 0.0006, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.8571429252624512, "step": 4134 }, { "completion_length": 301.21429443359375, "epoch": 0.2625396825396825, "grad_norm": 0.9176802039146423, "kl": 0.6389752626419067, "learning_rate": 4.688208616780045e-07, "loss": 0.0006, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4135 }, { "completion_length": 304.71429443359375, "epoch": 0.2626031746031746, "grad_norm": 1.7850420475006104, "kl": 0.6939324736595154, "learning_rate": 4.6893424036281177e-07, "loss": 0.0007, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.8571429252624512, "step": 4136 }, { "completion_length": 347.21429443359375, "epoch": 0.26266666666666666, "grad_norm": 1.1169904470443726, "kl": 0.68860924243927, "learning_rate": 4.69047619047619e-07, "loss": 0.0007, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4137 }, { "completion_length": 290.2857360839844, "epoch": 0.2627301587301587, "grad_norm": 1.6631416082382202, "kl": 0.7956785559654236, "learning_rate": 4.691609977324263e-07, "loss": 0.0008, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4138 }, { "completion_length": 335.14288330078125, "epoch": 0.2627936507936508, "grad_norm": 1.2650845050811768, "kl": 0.7848406434059143, "learning_rate": 4.6927437641723357e-07, "loss": 0.0008, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4139 }, { "completion_length": 377.14288330078125, "epoch": 0.26285714285714284, "grad_norm": 2.284266948699951, "kl": 0.6226666569709778, "learning_rate": 4.693877551020408e-07, "loss": 0.0006, "reward": 0.6428571939468384, "reward_std": 0.30304574966430664, "rewards/check_originality_func": 0.6428571939468384, "step": 4140 }, { "completion_length": 333.0714416503906, "epoch": 0.26292063492063494, "grad_norm": 1.4245530366897583, "kl": 0.7770518660545349, "learning_rate": 4.69501133786848e-07, "loss": 0.0008, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.785714328289032, "step": 4141 }, { "completion_length": 319.8571472167969, "epoch": 0.262984126984127, "grad_norm": 4.694519996643066, "kl": 0.6692377328872681, "learning_rate": 4.696145124716553e-07, "loss": 0.0007, "reward": 0.6428571939468384, "reward_std": 0.5050762891769409, "rewards/check_originality_func": 0.6428571939468384, "step": 4142 }, { "completion_length": 381.5714416503906, "epoch": 0.26304761904761903, "grad_norm": 3.5922257900238037, "kl": 0.7801650166511536, "learning_rate": 4.6972789115646253e-07, "loss": 0.0008, "reward": 0.785714328289032, "reward_std": 0.30304574966430664, "rewards/check_originality_func": 0.785714328289032, "step": 4143 }, { "completion_length": 336.9285888671875, "epoch": 0.26311111111111113, "grad_norm": 3.4457974433898926, "kl": 0.7994218468666077, "learning_rate": 4.698412698412698e-07, "loss": 0.0008, "reward": 0.6428571939468384, "reward_std": 0.30304574966430664, "rewards/check_originality_func": 0.6428571939468384, "step": 4144 }, { "completion_length": 311.8571472167969, "epoch": 0.26317460317460317, "grad_norm": 6.132865905761719, "kl": 0.7984511256217957, "learning_rate": 4.699546485260771e-07, "loss": 0.0008, "reward": 0.785714328289032, "reward_std": 0.30304574966430664, "rewards/check_originality_func": 0.785714328289032, "step": 4145 }, { "completion_length": 344.14288330078125, "epoch": 0.2632380952380952, "grad_norm": 3.1575489044189453, "kl": 0.840414822101593, "learning_rate": 4.7006802721088433e-07, "loss": 0.0008, "reward": 0.7142857313156128, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.7142857313156128, "step": 4146 }, { "completion_length": 362.4285888671875, "epoch": 0.2633015873015873, "grad_norm": 0.1725700944662094, "kl": 0.7758854627609253, "learning_rate": 4.701814058956916e-07, "loss": 0.0008, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_originality_func": 0.8571429252624512, "step": 4147 }, { "completion_length": 337.14288330078125, "epoch": 0.26336507936507936, "grad_norm": 0.14361794292926788, "kl": 0.8615310192108154, "learning_rate": 4.702947845804989e-07, "loss": 0.0009, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4148 }, { "completion_length": 344.5714416503906, "epoch": 0.2634285714285714, "grad_norm": 3.329986095428467, "kl": 0.956541121006012, "learning_rate": 4.7040816326530607e-07, "loss": 0.001, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4149 }, { "completion_length": 301.0, "epoch": 0.2634920634920635, "grad_norm": 0.2146577388048172, "kl": 0.8897188901901245, "learning_rate": 4.7052154195011335e-07, "loss": 0.0009, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_originality_func": 0.8571429252624512, "step": 4150 }, { "completion_length": 237.4285888671875, "epoch": 0.26355555555555554, "grad_norm": 4.525792598724365, "kl": 0.7369154095649719, "learning_rate": 4.7063492063492063e-07, "loss": 0.0007, "reward": 0.7142857313156128, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.7142857313156128, "step": 4151 }, { "completion_length": 164.42857360839844, "epoch": 0.26361904761904764, "grad_norm": 3.1619365215301514, "kl": 0.6965131163597107, "learning_rate": 4.7074829931972786e-07, "loss": 0.0007, "reward": 0.3571428656578064, "reward_std": 0.30304574966430664, "rewards/check_originality_func": 0.3571428656578064, "step": 4152 }, { "completion_length": 225.50001525878906, "epoch": 0.2636825396825397, "grad_norm": 3.1897106170654297, "kl": 0.49073758721351624, "learning_rate": 4.7086167800453514e-07, "loss": 0.0005, "reward": 0.2857142984867096, "reward_std": 0.4040610194206238, "rewards/check_originality_func": 0.2857142984867096, "step": 4153 }, { "completion_length": 161.57144165039062, "epoch": 0.26374603174603173, "grad_norm": 2.650261640548706, "kl": 0.7922613620758057, "learning_rate": 4.709750566893424e-07, "loss": 0.0008, "reward": 0.2142857313156128, "reward_std": 0.30304574966430664, "rewards/check_originality_func": 0.2142857313156128, "step": 4154 }, { "completion_length": 185.21429443359375, "epoch": 0.2638095238095238, "grad_norm": 2.907670259475708, "kl": 0.6903700232505798, "learning_rate": 4.7108843537414965e-07, "loss": 0.0007, "reward": 0.2142857313156128, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.2142857313156128, "step": 4155 }, { "completion_length": 191.07144165039062, "epoch": 0.26387301587301587, "grad_norm": 4.338425159454346, "kl": 1.13289213180542, "learning_rate": 4.712018140589569e-07, "loss": 0.0011, "reward": 0.2142857313156128, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.2142857313156128, "step": 4156 }, { "completion_length": 193.35714721679688, "epoch": 0.2639365079365079, "grad_norm": 0.09769139438867569, "kl": 0.9432704448699951, "learning_rate": 4.713151927437641e-07, "loss": 0.0009, "reward": 0.1428571492433548, "reward_std": 0.0, "rewards/check_originality_func": 0.1428571492433548, "step": 4157 }, { "completion_length": 149.5, "epoch": 0.264, "grad_norm": 3.6033811569213867, "kl": 1.4714418649673462, "learning_rate": 4.714285714285714e-07, "loss": 0.0015, "reward": 0.2142857313156128, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.2142857313156128, "step": 4158 }, { "completion_length": 185.6428680419922, "epoch": 0.26406349206349206, "grad_norm": 10.139079093933105, "kl": 1.1061220169067383, "learning_rate": 4.715419501133787e-07, "loss": 0.0011, "reward": 0.3571428656578064, "reward_std": 0.5050762891769409, "rewards/check_originality_func": 0.3571428656578064, "step": 4159 }, { "completion_length": 167.71429443359375, "epoch": 0.26412698412698415, "grad_norm": 21.68720054626465, "kl": 1.1584526300430298, "learning_rate": 4.716553287981859e-07, "loss": 0.0012, "reward": 0.3571428656578064, "reward_std": 0.30304574966430664, "rewards/check_originality_func": 0.3571428656578064, "step": 4160 }, { "completion_length": 189.1428680419922, "epoch": 0.2641904761904762, "grad_norm": 5.11839485168457, "kl": 0.960639476776123, "learning_rate": 4.717687074829932e-07, "loss": 0.001, "reward": 0.5, "reward_std": 0.30304574966430664, "rewards/check_originality_func": 0.5, "step": 4161 }, { "completion_length": 238.1428680419922, "epoch": 0.26425396825396824, "grad_norm": 4.153481483459473, "kl": 0.6259135603904724, "learning_rate": 4.7188208616780047e-07, "loss": 0.0006, "reward": 0.2857142984867096, "reward_std": 0.4040610194206238, "rewards/check_originality_func": 0.2857142984867096, "step": 4162 }, { "completion_length": 179.1428680419922, "epoch": 0.26431746031746034, "grad_norm": 3.523165225982666, "kl": 0.7576484084129333, "learning_rate": 4.7199546485260764e-07, "loss": 0.0008, "reward": 0.5714285969734192, "reward_std": 0.4040610194206238, "rewards/check_originality_func": 0.5714285969734192, "step": 4163 }, { "completion_length": 194.07144165039062, "epoch": 0.2643809523809524, "grad_norm": 3.7332711219787598, "kl": 0.7953566312789917, "learning_rate": 4.721088435374149e-07, "loss": 0.0008, "reward": 0.4285714626312256, "reward_std": 0.4040610194206238, "rewards/check_originality_func": 0.4285714626312256, "step": 4164 }, { "completion_length": 201.1428680419922, "epoch": 0.2644444444444444, "grad_norm": 3.4919774532318115, "kl": 0.6450689435005188, "learning_rate": 4.722222222222222e-07, "loss": 0.0006, "reward": 0.5714285969734192, "reward_std": 0.4040610194206238, "rewards/check_originality_func": 0.5714285969734192, "step": 4165 }, { "completion_length": 237.07144165039062, "epoch": 0.2645079365079365, "grad_norm": 1.9672473669052124, "kl": 0.6136229634284973, "learning_rate": 4.7233560090702944e-07, "loss": 0.0006, "reward": 0.7142857313156128, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.7142857313156128, "step": 4166 }, { "completion_length": 187.1428680419922, "epoch": 0.26457142857142857, "grad_norm": 2.289961814880371, "kl": 0.8211475610733032, "learning_rate": 4.724489795918367e-07, "loss": 0.0008, "reward": 0.7142857313156128, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.7142857313156128, "step": 4167 }, { "completion_length": 194.57144165039062, "epoch": 0.2646349206349206, "grad_norm": 2.9685444831848145, "kl": 0.697510302066803, "learning_rate": 4.72562358276644e-07, "loss": 0.0007, "reward": 0.5, "reward_std": 0.30304574966430664, "rewards/check_originality_func": 0.5, "step": 4168 }, { "completion_length": 199.7857208251953, "epoch": 0.2646984126984127, "grad_norm": 2.0026257038116455, "kl": 0.6703615784645081, "learning_rate": 4.7267573696145123e-07, "loss": 0.0007, "reward": 0.5714285969734192, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.5714285969734192, "step": 4169 }, { "completion_length": 218.00001525878906, "epoch": 0.26476190476190475, "grad_norm": 3.336007595062256, "kl": 0.5978933572769165, "learning_rate": 4.727891156462585e-07, "loss": 0.0006, "reward": 0.6428571939468384, "reward_std": 0.5050762891769409, "rewards/check_originality_func": 0.6428571939468384, "step": 4170 }, { "completion_length": 190.1428680419922, "epoch": 0.26482539682539685, "grad_norm": 2.3812272548675537, "kl": 0.7772906422615051, "learning_rate": 4.7290249433106574e-07, "loss": 0.0008, "reward": 0.7142857313156128, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.7142857313156128, "step": 4171 }, { "completion_length": 188.6428680419922, "epoch": 0.2648888888888889, "grad_norm": 4.906514644622803, "kl": 0.8039206266403198, "learning_rate": 4.7301587301587297e-07, "loss": 0.0008, "reward": 0.5714285969734192, "reward_std": 0.4040610194206238, "rewards/check_originality_func": 0.5714285969734192, "step": 4172 }, { "completion_length": 195.1428680419922, "epoch": 0.26495238095238094, "grad_norm": 2.4131217002868652, "kl": 0.6660009622573853, "learning_rate": 4.7312925170068025e-07, "loss": 0.0007, "reward": 0.5714285969734192, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.5714285969734192, "step": 4173 }, { "completion_length": 194.1428680419922, "epoch": 0.26501587301587304, "grad_norm": 3.2680888175964355, "kl": 0.8355497717857361, "learning_rate": 4.7324263038548753e-07, "loss": 0.0008, "reward": 0.6428571939468384, "reward_std": 0.30304574966430664, "rewards/check_originality_func": 0.6428571939468384, "step": 4174 }, { "completion_length": 231.2857208251953, "epoch": 0.2650793650793651, "grad_norm": 1.9742354154586792, "kl": 0.5909287929534912, "learning_rate": 4.7335600907029476e-07, "loss": 0.0006, "reward": 0.7142857313156128, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.7142857313156128, "step": 4175 }, { "completion_length": 250.35714721679688, "epoch": 0.2651428571428571, "grad_norm": 1.832851767539978, "kl": 0.5521296858787537, "learning_rate": 4.7346938775510204e-07, "loss": 0.0006, "reward": 0.5714285969734192, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.5714285969734192, "step": 4176 }, { "completion_length": 187.50001525878906, "epoch": 0.2652063492063492, "grad_norm": 2.950444459915161, "kl": 0.7002212405204773, "learning_rate": 4.7358276643990927e-07, "loss": 0.0007, "reward": 0.7142857313156128, "reward_std": 0.4040610194206238, "rewards/check_originality_func": 0.7142857313156128, "step": 4177 }, { "completion_length": 238.50001525878906, "epoch": 0.26526984126984127, "grad_norm": 2.2498743534088135, "kl": 0.6719976663589478, "learning_rate": 4.736961451247165e-07, "loss": 0.0007, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.8571429252624512, "step": 4178 }, { "completion_length": 211.00001525878906, "epoch": 0.2653333333333333, "grad_norm": 2.1345362663269043, "kl": 0.7078397274017334, "learning_rate": 4.738095238095238e-07, "loss": 0.0007, "reward": 0.7142857313156128, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.7142857313156128, "step": 4179 }, { "completion_length": 205.2857208251953, "epoch": 0.2653968253968254, "grad_norm": 3.015345811843872, "kl": 0.5690280199050903, "learning_rate": 4.73922902494331e-07, "loss": 0.0006, "reward": 0.4285714626312256, "reward_std": 0.4040610194206238, "rewards/check_originality_func": 0.4285714626312256, "step": 4180 }, { "completion_length": 157.21429443359375, "epoch": 0.26546031746031745, "grad_norm": 2.641444444656372, "kl": 0.833804190158844, "learning_rate": 4.740362811791383e-07, "loss": 0.0008, "reward": 0.785714328289032, "reward_std": 0.30304574966430664, "rewards/check_originality_func": 0.785714328289032, "step": 4181 }, { "completion_length": 170.07144165039062, "epoch": 0.26552380952380955, "grad_norm": 4.017512798309326, "kl": 0.8074332475662231, "learning_rate": 4.741496598639456e-07, "loss": 0.0008, "reward": 0.7142857313156128, "reward_std": 0.4040610194206238, "rewards/check_originality_func": 0.7142857313156128, "step": 4182 }, { "completion_length": 216.6428680419922, "epoch": 0.2655873015873016, "grad_norm": 2.669283151626587, "kl": 0.7201595902442932, "learning_rate": 4.742630385487528e-07, "loss": 0.0007, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.785714328289032, "step": 4183 }, { "completion_length": 237.6428680419922, "epoch": 0.26565079365079364, "grad_norm": 2.63287353515625, "kl": 0.7346757054328918, "learning_rate": 4.743764172335601e-07, "loss": 0.0007, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4184 }, { "completion_length": 228.85714721679688, "epoch": 0.26571428571428574, "grad_norm": 3.5554635524749756, "kl": 1.010898470878601, "learning_rate": 4.7448979591836737e-07, "loss": 0.001, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4185 }, { "completion_length": 251.35714721679688, "epoch": 0.2657777777777778, "grad_norm": 0.10662361234426498, "kl": 0.8325759768486023, "learning_rate": 4.7460317460317454e-07, "loss": 0.0008, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4186 }, { "completion_length": 275.14288330078125, "epoch": 0.2658412698412698, "grad_norm": 1.661217451095581, "kl": 0.9847896099090576, "learning_rate": 4.7471655328798183e-07, "loss": 0.001, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4187 }, { "completion_length": 275.4285888671875, "epoch": 0.2659047619047619, "grad_norm": 2.382624626159668, "kl": 1.067484974861145, "learning_rate": 4.748299319727891e-07, "loss": 0.0011, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4188 }, { "completion_length": 300.14288330078125, "epoch": 0.26596825396825396, "grad_norm": 0.0780726745724678, "kl": 0.8597525358200073, "learning_rate": 4.7494331065759634e-07, "loss": 0.0009, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4189 }, { "completion_length": 292.4285888671875, "epoch": 0.266031746031746, "grad_norm": 2.8167014122009277, "kl": 0.8786271810531616, "learning_rate": 4.750566893424036e-07, "loss": 0.0009, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.8571429252624512, "step": 4190 }, { "completion_length": 278.8571472167969, "epoch": 0.2660952380952381, "grad_norm": 1.7810418605804443, "kl": 0.9646121859550476, "learning_rate": 4.751700680272109e-07, "loss": 0.001, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.785714328289032, "step": 4191 }, { "completion_length": 292.14288330078125, "epoch": 0.26615873015873015, "grad_norm": 4.492855072021484, "kl": 0.9432002902030945, "learning_rate": 4.7528344671201813e-07, "loss": 0.0009, "reward": 0.5, "reward_std": 0.5050762891769409, "rewards/check_originality_func": 0.5, "step": 4192 }, { "completion_length": 316.7857360839844, "epoch": 0.26622222222222225, "grad_norm": 3.1863534450531006, "kl": 0.9136435985565186, "learning_rate": 4.7539682539682536e-07, "loss": 0.0009, "reward": 0.785714328289032, "reward_std": 0.30304574966430664, "rewards/check_originality_func": 0.785714328289032, "step": 4193 }, { "completion_length": 258.21429443359375, "epoch": 0.2662857142857143, "grad_norm": 2.269124984741211, "kl": 1.1116530895233154, "learning_rate": 4.7551020408163264e-07, "loss": 0.0011, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.785714328289032, "step": 4194 }, { "completion_length": 304.4285888671875, "epoch": 0.26634920634920634, "grad_norm": 3.335963726043701, "kl": 0.982237696647644, "learning_rate": 4.7562358276643987e-07, "loss": 0.001, "reward": 0.7142857313156128, "reward_std": 0.4040610194206238, "rewards/check_originality_func": 0.7142857313156128, "step": 4195 }, { "completion_length": 277.3571472167969, "epoch": 0.26641269841269843, "grad_norm": 3.104490280151367, "kl": 0.8363463878631592, "learning_rate": 4.7573696145124715e-07, "loss": 0.0008, "reward": 0.6428571939468384, "reward_std": 0.30304574966430664, "rewards/check_originality_func": 0.6428571939468384, "step": 4196 }, { "completion_length": 231.21429443359375, "epoch": 0.2664761904761905, "grad_norm": 3.7361838817596436, "kl": 1.0174254179000854, "learning_rate": 4.758503401360544e-07, "loss": 0.001, "reward": 0.6428571939468384, "reward_std": 0.30304574966430664, "rewards/check_originality_func": 0.6428571939468384, "step": 4197 }, { "completion_length": 212.00001525878906, "epoch": 0.2665396825396825, "grad_norm": 4.799015045166016, "kl": 1.2023009061813354, "learning_rate": 4.7596371882086166e-07, "loss": 0.0012, "reward": 0.7142857313156128, "reward_std": 0.4040610194206238, "rewards/check_originality_func": 0.7142857313156128, "step": 4198 }, { "completion_length": 273.14288330078125, "epoch": 0.2666031746031746, "grad_norm": 3.9007251262664795, "kl": 0.9501388072967529, "learning_rate": 4.7607709750566894e-07, "loss": 0.001, "reward": 0.6428571939468384, "reward_std": 0.30304574966430664, "rewards/check_originality_func": 0.6428571939468384, "step": 4199 }, { "completion_length": 240.57144165039062, "epoch": 0.26666666666666666, "grad_norm": 2.804825782775879, "kl": 1.347165584564209, "learning_rate": 4.761904761904761e-07, "loss": 0.0013, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.8571429252624512, "step": 4200 }, { "completion_length": 214.2857208251953, "epoch": 0.2667301587301587, "grad_norm": 2.7781240940093994, "kl": 1.3159593343734741, "learning_rate": 4.763038548752834e-07, "loss": 0.0013, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4201 }, { "completion_length": 213.71429443359375, "epoch": 0.2667936507936508, "grad_norm": 0.0739641785621643, "kl": 1.1728744506835938, "learning_rate": 4.764172335600907e-07, "loss": 0.0012, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_originality_func": 0.8571429252624512, "step": 4202 }, { "completion_length": 204.00001525878906, "epoch": 0.26685714285714285, "grad_norm": 0.08480040729045868, "kl": 1.343180537223816, "learning_rate": 4.765306122448979e-07, "loss": 0.0013, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4203 }, { "completion_length": 244.9285888671875, "epoch": 0.26692063492063495, "grad_norm": 1.5897209644317627, "kl": 1.4357167482376099, "learning_rate": 4.766439909297052e-07, "loss": 0.0014, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4204 }, { "completion_length": 202.35714721679688, "epoch": 0.266984126984127, "grad_norm": 2.4719603061676025, "kl": 1.3718632459640503, "learning_rate": 4.767573696145125e-07, "loss": 0.0014, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4205 }, { "completion_length": 177.71429443359375, "epoch": 0.26704761904761903, "grad_norm": 2.2169740200042725, "kl": 1.4156486988067627, "learning_rate": 4.768707482993198e-07, "loss": 0.0014, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.785714328289032, "step": 4206 }, { "completion_length": 190.6428680419922, "epoch": 0.26711111111111113, "grad_norm": 0.091004379093647, "kl": 1.2650173902511597, "learning_rate": 4.76984126984127e-07, "loss": 0.0013, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4207 }, { "completion_length": 166.2857208251953, "epoch": 0.2671746031746032, "grad_norm": 0.07245585322380066, "kl": 1.3321025371551514, "learning_rate": 4.770975056689342e-07, "loss": 0.0013, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4208 }, { "completion_length": 157.92857360839844, "epoch": 0.2672380952380952, "grad_norm": 0.05366009473800659, "kl": 1.1522375345230103, "learning_rate": 4.772108843537414e-07, "loss": 0.0012, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4209 }, { "completion_length": 159.0, "epoch": 0.2673015873015873, "grad_norm": 0.06590978056192398, "kl": 1.090592384338379, "learning_rate": 4.773242630385487e-07, "loss": 0.0011, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4210 }, { "completion_length": 168.07144165039062, "epoch": 0.26736507936507936, "grad_norm": 2.394988536834717, "kl": 1.065352439880371, "learning_rate": 4.77437641723356e-07, "loss": 0.0011, "reward": 0.785714328289032, "reward_std": 0.30304574966430664, "rewards/check_originality_func": 0.785714328289032, "step": 4211 }, { "completion_length": 148.42857360839844, "epoch": 0.2674285714285714, "grad_norm": 4.339240074157715, "kl": 1.1532028913497925, "learning_rate": 4.775510204081632e-07, "loss": 0.0012, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4212 }, { "completion_length": 157.6428680419922, "epoch": 0.2674920634920635, "grad_norm": 2.2191457748413086, "kl": 0.9738608002662659, "learning_rate": 4.776643990929705e-07, "loss": 0.001, "reward": 0.5714285969734192, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.5714285969734192, "step": 4213 }, { "completion_length": 137.1428680419922, "epoch": 0.26755555555555555, "grad_norm": 2.7344045639038086, "kl": 1.1983879804611206, "learning_rate": 4.777777777777778e-07, "loss": 0.0012, "reward": 0.7142857313156128, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.7142857313156128, "step": 4214 }, { "completion_length": 181.42857360839844, "epoch": 0.26761904761904765, "grad_norm": 0.06643085181713104, "kl": 0.9936885833740234, "learning_rate": 4.77891156462585e-07, "loss": 0.001, "reward": 0.7142857313156128, "reward_std": 0.0, "rewards/check_originality_func": 0.7142857313156128, "step": 4215 }, { "completion_length": 157.07144165039062, "epoch": 0.2676825396825397, "grad_norm": 3.7230799198150635, "kl": 1.037063479423523, "learning_rate": 4.780045351473923e-07, "loss": 0.001, "reward": 0.785714328289032, "reward_std": 0.30304574966430664, "rewards/check_originality_func": 0.785714328289032, "step": 4216 }, { "completion_length": 140.92857360839844, "epoch": 0.26774603174603173, "grad_norm": 2.573652505874634, "kl": 1.199446439743042, "learning_rate": 4.781179138321995e-07, "loss": 0.0012, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.8571429252624512, "step": 4217 }, { "completion_length": 142.35714721679688, "epoch": 0.26780952380952383, "grad_norm": 4.7358078956604, "kl": 1.3782007694244385, "learning_rate": 4.782312925170068e-07, "loss": 0.0014, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.8571429252624512, "step": 4218 }, { "completion_length": 126.14286041259766, "epoch": 0.2678730158730159, "grad_norm": 4.861232757568359, "kl": 1.316864013671875, "learning_rate": 4.783446712018141e-07, "loss": 0.0013, "reward": 0.6428571939468384, "reward_std": 0.30304574966430664, "rewards/check_originality_func": 0.6428571939468384, "step": 4219 }, { "completion_length": 133.42857360839844, "epoch": 0.2679365079365079, "grad_norm": 3.668064832687378, "kl": 1.3333134651184082, "learning_rate": 4.784580498866213e-07, "loss": 0.0013, "reward": 0.7142857313156128, "reward_std": 0.4040610194206238, "rewards/check_originality_func": 0.7142857313156128, "step": 4220 }, { "completion_length": 108.21428680419922, "epoch": 0.268, "grad_norm": 7.091073036193848, "kl": 1.9636751413345337, "learning_rate": 4.785714285714286e-07, "loss": 0.002, "reward": 0.785714328289032, "reward_std": 0.30304574966430664, "rewards/check_originality_func": 0.785714328289032, "step": 4221 }, { "completion_length": 114.00000762939453, "epoch": 0.26806349206349206, "grad_norm": 8.900333404541016, "kl": 1.8064759969711304, "learning_rate": 4.786848072562357e-07, "loss": 0.0018, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.8571429252624512, "step": 4222 }, { "completion_length": 93.35714721679688, "epoch": 0.2681269841269841, "grad_norm": 10.455821990966797, "kl": 1.7036038637161255, "learning_rate": 4.787981859410431e-07, "loss": 0.0017, "reward": 0.6428571939468384, "reward_std": 0.30304574966430664, "rewards/check_originality_func": 0.6428571939468384, "step": 4223 }, { "completion_length": 109.0714340209961, "epoch": 0.2681904761904762, "grad_norm": 4.991456985473633, "kl": 1.3823672533035278, "learning_rate": 4.789115646258503e-07, "loss": 0.0014, "reward": 0.5, "reward_std": 0.30304574966430664, "rewards/check_originality_func": 0.5, "step": 4224 }, { "completion_length": 93.28572082519531, "epoch": 0.26825396825396824, "grad_norm": 2.3972370624542236, "kl": 1.6782249212265015, "learning_rate": 4.790249433106575e-07, "loss": 0.0017, "reward": 0.3571428656578064, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.3571428656578064, "step": 4225 }, { "completion_length": 106.28572082519531, "epoch": 0.26831746031746034, "grad_norm": 4.880405426025391, "kl": 1.289872407913208, "learning_rate": 4.791383219954649e-07, "loss": 0.0013, "reward": 0.7142857313156128, "reward_std": 0.4040610194206238, "rewards/check_originality_func": 0.7142857313156128, "step": 4226 }, { "completion_length": 92.28572082519531, "epoch": 0.2683809523809524, "grad_norm": 3.6008758544921875, "kl": 1.6906365156173706, "learning_rate": 4.792517006802721e-07, "loss": 0.0017, "reward": 0.5714285969734192, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.5714285969734192, "step": 4227 }, { "completion_length": 83.35714721679688, "epoch": 0.26844444444444443, "grad_norm": 10.796307563781738, "kl": 2.822964906692505, "learning_rate": 4.793650793650793e-07, "loss": 0.0028, "reward": 0.6428571939468384, "reward_std": 0.30304574966430664, "rewards/check_originality_func": 0.6428571939468384, "step": 4228 }, { "completion_length": 99.28572082519531, "epoch": 0.26850793650793653, "grad_norm": 7.923113822937012, "kl": 1.7390012741088867, "learning_rate": 4.794784580498867e-07, "loss": 0.0017, "reward": 0.5714285969734192, "reward_std": 0.4040610194206238, "rewards/check_originality_func": 0.5714285969734192, "step": 4229 }, { "completion_length": 131.0, "epoch": 0.26857142857142857, "grad_norm": 7.028718948364258, "kl": 1.3407143354415894, "learning_rate": 4.795918367346938e-07, "loss": 0.0013, "reward": 0.6428571939468384, "reward_std": 0.5050762891769409, "rewards/check_originality_func": 0.6428571939468384, "step": 4230 }, { "completion_length": 104.85714721679688, "epoch": 0.2686349206349206, "grad_norm": 3.2100377082824707, "kl": 1.7118302583694458, "learning_rate": 4.797052154195011e-07, "loss": 0.0017, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.8571429252624512, "step": 4231 }, { "completion_length": 105.14286041259766, "epoch": 0.2686984126984127, "grad_norm": 5.905674934387207, "kl": 1.6726148128509521, "learning_rate": 4.798185941043083e-07, "loss": 0.0017, "reward": 0.3571428656578064, "reward_std": 0.30304574966430664, "rewards/check_originality_func": 0.3571428656578064, "step": 4232 }, { "completion_length": 110.64286041259766, "epoch": 0.26876190476190476, "grad_norm": 8.842429161071777, "kl": 1.6595473289489746, "learning_rate": 4.799319727891156e-07, "loss": 0.0017, "reward": 0.5714285969734192, "reward_std": 0.6060914993286133, "rewards/check_originality_func": 0.5714285969734192, "step": 4233 }, { "completion_length": 110.35714721679688, "epoch": 0.2688253968253968, "grad_norm": 6.721752643585205, "kl": 1.5051360130310059, "learning_rate": 4.800453514739229e-07, "loss": 0.0015, "reward": 0.5714285969734192, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.5714285969734192, "step": 4234 }, { "completion_length": 111.21429443359375, "epoch": 0.2688888888888889, "grad_norm": 5.579428672790527, "kl": 1.5417481660842896, "learning_rate": 4.801587301587301e-07, "loss": 0.0015, "reward": 0.5, "reward_std": 0.5050762891769409, "rewards/check_originality_func": 0.5, "step": 4235 }, { "completion_length": 116.00000762939453, "epoch": 0.26895238095238094, "grad_norm": 2.881115198135376, "kl": 1.5291860103607178, "learning_rate": 4.802721088435374e-07, "loss": 0.0015, "reward": 0.5714285969734192, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.5714285969734192, "step": 4236 }, { "completion_length": 111.28572082519531, "epoch": 0.26901587301587304, "grad_norm": 4.388076305389404, "kl": 1.3151957988739014, "learning_rate": 4.803854875283446e-07, "loss": 0.0013, "reward": 0.5, "reward_std": 0.30304574966430664, "rewards/check_originality_func": 0.5, "step": 4237 }, { "completion_length": 102.28572082519531, "epoch": 0.2690793650793651, "grad_norm": 5.506381511688232, "kl": 1.6301442384719849, "learning_rate": 4.804988662131519e-07, "loss": 0.0016, "reward": 0.7142857313156128, "reward_std": 0.4040610194206238, "rewards/check_originality_func": 0.7142857313156128, "step": 4238 }, { "completion_length": 87.71428680419922, "epoch": 0.26914285714285713, "grad_norm": 4.766063213348389, "kl": 1.7686948776245117, "learning_rate": 4.806122448979592e-07, "loss": 0.0018, "reward": 0.7142857313156128, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.7142857313156128, "step": 4239 }, { "completion_length": 101.5714340209961, "epoch": 0.2692063492063492, "grad_norm": 7.608897686004639, "kl": 1.9833112955093384, "learning_rate": 4.807256235827664e-07, "loss": 0.002, "reward": 0.2142857313156128, "reward_std": 0.30304574966430664, "rewards/check_originality_func": 0.2142857313156128, "step": 4240 }, { "completion_length": 95.21428680419922, "epoch": 0.26926984126984127, "grad_norm": 12.387585639953613, "kl": 2.1396679878234863, "learning_rate": 4.808390022675737e-07, "loss": 0.0021, "reward": 0.4285714626312256, "reward_std": 0.4040610194206238, "rewards/check_originality_func": 0.4285714626312256, "step": 4241 }, { "completion_length": 103.28572082519531, "epoch": 0.2693333333333333, "grad_norm": 7.478267669677734, "kl": 2.04486083984375, "learning_rate": 4.80952380952381e-07, "loss": 0.002, "reward": 0.7142857313156128, "reward_std": 0.4040610194206238, "rewards/check_originality_func": 0.7142857313156128, "step": 4242 }, { "completion_length": 111.78572082519531, "epoch": 0.2693968253968254, "grad_norm": 4.68037223815918, "kl": 1.8537293672561646, "learning_rate": 4.810657596371882e-07, "loss": 0.0019, "reward": 0.785714328289032, "reward_std": 0.30304574966430664, "rewards/check_originality_func": 0.785714328289032, "step": 4243 }, { "completion_length": 160.71429443359375, "epoch": 0.26946031746031746, "grad_norm": 3.5009756088256836, "kl": 1.3664253950119019, "learning_rate": 4.811791383219955e-07, "loss": 0.0014, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.8571429252624512, "step": 4244 }, { "completion_length": 137.07144165039062, "epoch": 0.2695238095238095, "grad_norm": 11.107714653015137, "kl": 1.248702049255371, "learning_rate": 4.812925170068026e-07, "loss": 0.0012, "reward": 0.5714285969734192, "reward_std": 0.6060914993286133, "rewards/check_originality_func": 0.5714285969734192, "step": 4245 }, { "completion_length": 195.71429443359375, "epoch": 0.2695873015873016, "grad_norm": 1.6103168725967407, "kl": 0.80918288230896, "learning_rate": 4.8140589569161e-07, "loss": 0.0008, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.785714328289032, "step": 4246 }, { "completion_length": 167.71429443359375, "epoch": 0.26965079365079364, "grad_norm": 3.2081708908081055, "kl": 1.0953826904296875, "learning_rate": 4.815192743764172e-07, "loss": 0.0011, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.8571429252624512, "step": 4247 }, { "completion_length": 190.92857360839844, "epoch": 0.26971428571428574, "grad_norm": 1.4515081644058228, "kl": 0.847178041934967, "learning_rate": 4.816326530612244e-07, "loss": 0.0008, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4248 }, { "completion_length": 199.92857360839844, "epoch": 0.2697777777777778, "grad_norm": 1.6475739479064941, "kl": 0.8442364931106567, "learning_rate": 4.817460317460318e-07, "loss": 0.0008, "reward": 0.7142857313156128, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.7142857313156128, "step": 4249 }, { "completion_length": 201.85714721679688, "epoch": 0.2698412698412698, "grad_norm": 1.6082290410995483, "kl": 0.7335818409919739, "learning_rate": 4.81859410430839e-07, "loss": 0.0007, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4250 }, { "completion_length": 180.21429443359375, "epoch": 0.2699047619047619, "grad_norm": 1.224457025527954, "kl": 0.8813214302062988, "learning_rate": 4.819727891156462e-07, "loss": 0.0009, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4251 }, { "completion_length": 155.0, "epoch": 0.26996825396825397, "grad_norm": 2.303752899169922, "kl": 0.9268847107887268, "learning_rate": 4.820861678004535e-07, "loss": 0.0009, "reward": 0.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.6428571939468384, "step": 4252 }, { "completion_length": 192.1428680419922, "epoch": 0.270031746031746, "grad_norm": 1.8528705835342407, "kl": 0.7478501796722412, "learning_rate": 4.821995464852607e-07, "loss": 0.0007, "reward": 0.7142857313156128, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.7142857313156128, "step": 4253 }, { "completion_length": 189.35714721679688, "epoch": 0.2700952380952381, "grad_norm": 1.2264888286590576, "kl": 0.6393431425094604, "learning_rate": 4.82312925170068e-07, "loss": 0.0006, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4254 }, { "completion_length": 212.00001525878906, "epoch": 0.27015873015873015, "grad_norm": 2.2568564414978027, "kl": 0.710408627986908, "learning_rate": 4.824263038548753e-07, "loss": 0.0007, "reward": 0.7142857313156128, "reward_std": 0.4040610194206238, "rewards/check_originality_func": 0.7142857313156128, "step": 4255 }, { "completion_length": 227.1428680419922, "epoch": 0.2702222222222222, "grad_norm": 1.8096829652786255, "kl": 0.6549661159515381, "learning_rate": 4.825396825396825e-07, "loss": 0.0007, "reward": 0.6428571939468384, "reward_std": 0.30304574966430664, "rewards/check_originality_func": 0.6428571939468384, "step": 4256 }, { "completion_length": 256.14288330078125, "epoch": 0.2702857142857143, "grad_norm": 2.4427173137664795, "kl": 0.5367710590362549, "learning_rate": 4.826530612244898e-07, "loss": 0.0005, "reward": 0.5714285969734192, "reward_std": 0.4040610194206238, "rewards/check_originality_func": 0.5714285969734192, "step": 4257 }, { "completion_length": 213.00001525878906, "epoch": 0.27034920634920634, "grad_norm": 1.634702444076538, "kl": 0.6499355435371399, "learning_rate": 4.82766439909297e-07, "loss": 0.0006, "reward": 0.7142857313156128, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.7142857313156128, "step": 4258 }, { "completion_length": 208.2857208251953, "epoch": 0.27041269841269844, "grad_norm": 1.288299322128296, "kl": 0.5875320434570312, "learning_rate": 4.828798185941043e-07, "loss": 0.0006, "reward": 0.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.6428571939468384, "step": 4259 }, { "completion_length": 195.50001525878906, "epoch": 0.2704761904761905, "grad_norm": 1.9426981210708618, "kl": 0.7547253370285034, "learning_rate": 4.829931972789115e-07, "loss": 0.0008, "reward": 0.7142857313156128, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.7142857313156128, "step": 4260 }, { "completion_length": 222.21429443359375, "epoch": 0.2705396825396825, "grad_norm": 1.1558586359024048, "kl": 0.6639512181282043, "learning_rate": 4.831065759637188e-07, "loss": 0.0007, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.785714328289032, "step": 4261 }, { "completion_length": 215.35714721679688, "epoch": 0.2706031746031746, "grad_norm": 2.0943195819854736, "kl": 0.725727915763855, "learning_rate": 4.832199546485261e-07, "loss": 0.0007, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.8571429252624512, "step": 4262 }, { "completion_length": 239.71429443359375, "epoch": 0.27066666666666667, "grad_norm": 2.4823834896087646, "kl": 0.5822128057479858, "learning_rate": 4.833333333333333e-07, "loss": 0.0006, "reward": 0.785714328289032, "reward_std": 0.30304574966430664, "rewards/check_originality_func": 0.785714328289032, "step": 4263 }, { "completion_length": 234.1428680419922, "epoch": 0.2707301587301587, "grad_norm": 0.035796087235212326, "kl": 0.6916972398757935, "learning_rate": 4.834467120181406e-07, "loss": 0.0007, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4264 }, { "completion_length": 190.85714721679688, "epoch": 0.2707936507936508, "grad_norm": 2.2262115478515625, "kl": 0.8176278471946716, "learning_rate": 4.835600907029479e-07, "loss": 0.0008, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.8571429252624512, "step": 4265 }, { "completion_length": 259.9285888671875, "epoch": 0.27085714285714285, "grad_norm": 1.2761098146438599, "kl": 0.6695212125778198, "learning_rate": 4.836734693877551e-07, "loss": 0.0007, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.8571429252624512, "step": 4266 }, { "completion_length": 208.35714721679688, "epoch": 0.2709206349206349, "grad_norm": 1.8347827196121216, "kl": 0.9482144117355347, "learning_rate": 4.837868480725623e-07, "loss": 0.0009, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4267 }, { "completion_length": 234.1428680419922, "epoch": 0.270984126984127, "grad_norm": 2.884148120880127, "kl": 0.7595441341400146, "learning_rate": 4.839002267573695e-07, "loss": 0.0008, "reward": 0.6428571939468384, "reward_std": 0.30304574966430664, "rewards/check_originality_func": 0.6428571939468384, "step": 4268 }, { "completion_length": 225.21429443359375, "epoch": 0.27104761904761904, "grad_norm": 0.03464444354176521, "kl": 0.7324783802032471, "learning_rate": 4.840136054421769e-07, "loss": 0.0007, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4269 }, { "completion_length": 241.4285888671875, "epoch": 0.27111111111111114, "grad_norm": 2.257824420928955, "kl": 0.9765759706497192, "learning_rate": 4.841269841269841e-07, "loss": 0.001, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.8571429252624512, "step": 4270 }, { "completion_length": 248.57144165039062, "epoch": 0.2711746031746032, "grad_norm": 1.8117144107818604, "kl": 0.7358835339546204, "learning_rate": 4.842403628117913e-07, "loss": 0.0007, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4271 }, { "completion_length": 241.57144165039062, "epoch": 0.2712380952380952, "grad_norm": 0.054256804287433624, "kl": 0.9019848108291626, "learning_rate": 4.843537414965987e-07, "loss": 0.0009, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4272 }, { "completion_length": 232.71429443359375, "epoch": 0.2713015873015873, "grad_norm": 1.5737173557281494, "kl": 0.8038108944892883, "learning_rate": 4.844671201814059e-07, "loss": 0.0008, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.785714328289032, "step": 4273 }, { "completion_length": 235.50001525878906, "epoch": 0.27136507936507936, "grad_norm": 1.7384183406829834, "kl": 0.7913469076156616, "learning_rate": 4.845804988662131e-07, "loss": 0.0008, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4274 }, { "completion_length": 208.50001525878906, "epoch": 0.2714285714285714, "grad_norm": 1.1681824922561646, "kl": 0.9185384511947632, "learning_rate": 4.846938775510204e-07, "loss": 0.0009, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4275 }, { "completion_length": 219.9285888671875, "epoch": 0.2714920634920635, "grad_norm": 0.039558980613946915, "kl": 0.944359302520752, "learning_rate": 4.848072562358276e-07, "loss": 0.0009, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4276 }, { "completion_length": 258.0, "epoch": 0.27155555555555555, "grad_norm": 1.7357003688812256, "kl": 0.7321649193763733, "learning_rate": 4.849206349206349e-07, "loss": 0.0007, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4277 }, { "completion_length": 243.57144165039062, "epoch": 0.2716190476190476, "grad_norm": 3.0971875190734863, "kl": 0.8724713325500488, "learning_rate": 4.850340136054422e-07, "loss": 0.0009, "reward": 0.785714328289032, "reward_std": 0.30304574966430664, "rewards/check_originality_func": 0.785714328289032, "step": 4278 }, { "completion_length": 238.85714721679688, "epoch": 0.2716825396825397, "grad_norm": 0.037229787558317184, "kl": 0.9227973818778992, "learning_rate": 4.851473922902494e-07, "loss": 0.0009, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4279 }, { "completion_length": 256.71429443359375, "epoch": 0.27174603174603174, "grad_norm": 2.001969337463379, "kl": 0.8279750347137451, "learning_rate": 4.852607709750567e-07, "loss": 0.0008, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.8571429252624512, "step": 4280 }, { "completion_length": 240.7857208251953, "epoch": 0.27180952380952383, "grad_norm": 1.643097162246704, "kl": 0.7361494898796082, "learning_rate": 4.853741496598639e-07, "loss": 0.0007, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4281 }, { "completion_length": 224.00001525878906, "epoch": 0.2718730158730159, "grad_norm": 0.05973983183503151, "kl": 0.9037460088729858, "learning_rate": 4.854875283446712e-07, "loss": 0.0009, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4282 }, { "completion_length": 258.5714416503906, "epoch": 0.2719365079365079, "grad_norm": 2.220506191253662, "kl": 0.8365228176116943, "learning_rate": 4.856009070294784e-07, "loss": 0.0008, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4283 }, { "completion_length": 257.5714416503906, "epoch": 0.272, "grad_norm": 0.049465034157037735, "kl": 0.836449384689331, "learning_rate": 4.857142857142857e-07, "loss": 0.0008, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4284 }, { "completion_length": 236.2857208251953, "epoch": 0.27206349206349206, "grad_norm": 0.03829921409487724, "kl": 0.9095403552055359, "learning_rate": 4.85827664399093e-07, "loss": 0.0009, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4285 }, { "completion_length": 222.9285888671875, "epoch": 0.2721269841269841, "grad_norm": 0.04362870007753372, "kl": 0.9742715358734131, "learning_rate": 4.859410430839002e-07, "loss": 0.001, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4286 }, { "completion_length": 221.1428680419922, "epoch": 0.2721904761904762, "grad_norm": 0.05086783319711685, "kl": 0.9981504082679749, "learning_rate": 4.860544217687075e-07, "loss": 0.001, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_originality_func": 0.8571429252624512, "step": 4287 }, { "completion_length": 251.07144165039062, "epoch": 0.27225396825396825, "grad_norm": 1.6204001903533936, "kl": 1.0263590812683105, "learning_rate": 4.861678004535148e-07, "loss": 0.001, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4288 }, { "completion_length": 167.85714721679688, "epoch": 0.2723174603174603, "grad_norm": 0.0948513075709343, "kl": 1.4406739473342896, "learning_rate": 4.86281179138322e-07, "loss": 0.0014, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4289 }, { "completion_length": 271.0, "epoch": 0.2723809523809524, "grad_norm": 0.09266466647386551, "kl": 1.005719542503357, "learning_rate": 4.863945578231292e-07, "loss": 0.001, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4290 }, { "completion_length": 285.5, "epoch": 0.27244444444444443, "grad_norm": 1.2819617986679077, "kl": 0.8474997282028198, "learning_rate": 4.865079365079364e-07, "loss": 0.0008, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4291 }, { "completion_length": 237.6428680419922, "epoch": 0.27250793650793653, "grad_norm": 0.05309712514281273, "kl": 1.0225439071655273, "learning_rate": 4.866213151927438e-07, "loss": 0.001, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4292 }, { "completion_length": 227.07144165039062, "epoch": 0.2725714285714286, "grad_norm": 0.10015997290611267, "kl": 1.1945338249206543, "learning_rate": 4.86734693877551e-07, "loss": 0.0012, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4293 }, { "completion_length": 242.21429443359375, "epoch": 0.2726349206349206, "grad_norm": 0.08584396541118622, "kl": 0.9307051301002502, "learning_rate": 4.868480725623582e-07, "loss": 0.0009, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4294 }, { "completion_length": 300.21429443359375, "epoch": 0.2726984126984127, "grad_norm": 2.166159152984619, "kl": 0.7397663593292236, "learning_rate": 4.869614512471656e-07, "loss": 0.0007, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.8571429252624512, "step": 4295 }, { "completion_length": 255.07144165039062, "epoch": 0.27276190476190476, "grad_norm": 0.0756230428814888, "kl": 0.8958966732025146, "learning_rate": 4.870748299319727e-07, "loss": 0.0009, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4296 }, { "completion_length": 228.7857208251953, "epoch": 0.2728253968253968, "grad_norm": 0.05419829487800598, "kl": 1.1621818542480469, "learning_rate": 4.8718820861678e-07, "loss": 0.0012, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4297 }, { "completion_length": 248.00001525878906, "epoch": 0.2728888888888889, "grad_norm": 1.59685218334198, "kl": 0.9388024806976318, "learning_rate": 4.873015873015873e-07, "loss": 0.0009, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4298 }, { "completion_length": 205.92857360839844, "epoch": 0.27295238095238095, "grad_norm": 1.3887802362442017, "kl": 1.1654260158538818, "learning_rate": 4.874149659863945e-07, "loss": 0.0012, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4299 }, { "completion_length": 221.71429443359375, "epoch": 0.273015873015873, "grad_norm": 1.2927041053771973, "kl": 1.0969070196151733, "learning_rate": 4.875283446712018e-07, "loss": 0.0011, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.785714328289032, "step": 4300 }, { "completion_length": 221.6428680419922, "epoch": 0.2730793650793651, "grad_norm": 0.046919651329517365, "kl": 0.9677135348320007, "learning_rate": 4.876417233560091e-07, "loss": 0.001, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4301 }, { "completion_length": 260.0, "epoch": 0.27314285714285713, "grad_norm": 1.0371472835540771, "kl": 0.8144747018814087, "learning_rate": 4.877551020408163e-07, "loss": 0.0008, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4302 }, { "completion_length": 223.21429443359375, "epoch": 0.27320634920634923, "grad_norm": 0.059263575822114944, "kl": 0.926750898361206, "learning_rate": 4.878684807256236e-07, "loss": 0.0009, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4303 }, { "completion_length": 223.7857208251953, "epoch": 0.2732698412698413, "grad_norm": 3.06213116645813, "kl": 1.049752116203308, "learning_rate": 4.879818594104308e-07, "loss": 0.001, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4304 }, { "completion_length": 203.57144165039062, "epoch": 0.2733333333333333, "grad_norm": 1.845963478088379, "kl": 0.948875904083252, "learning_rate": 4.880952380952381e-07, "loss": 0.0009, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4305 }, { "completion_length": 267.71429443359375, "epoch": 0.2733968253968254, "grad_norm": 2.025662422180176, "kl": 0.8082095980644226, "learning_rate": 4.882086167800453e-07, "loss": 0.0008, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4306 }, { "completion_length": 227.1428680419922, "epoch": 0.27346031746031746, "grad_norm": 2.7323594093322754, "kl": 0.9210889339447021, "learning_rate": 4.883219954648526e-07, "loss": 0.0009, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.8571429252624512, "step": 4307 }, { "completion_length": 259.5714416503906, "epoch": 0.2735238095238095, "grad_norm": 0.03568011894822121, "kl": 0.7255619168281555, "learning_rate": 4.884353741496599e-07, "loss": 0.0007, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4308 }, { "completion_length": 226.21429443359375, "epoch": 0.2735873015873016, "grad_norm": 0.05759283900260925, "kl": 0.9936767816543579, "learning_rate": 4.885487528344671e-07, "loss": 0.001, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4309 }, { "completion_length": 209.00001525878906, "epoch": 0.27365079365079364, "grad_norm": 1.9665274620056152, "kl": 0.9522619843482971, "learning_rate": 4.886621315192743e-07, "loss": 0.001, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4310 }, { "completion_length": 219.71429443359375, "epoch": 0.2737142857142857, "grad_norm": 1.1541789770126343, "kl": 1.0175410509109497, "learning_rate": 4.887755102040816e-07, "loss": 0.001, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4311 }, { "completion_length": 244.21429443359375, "epoch": 0.2737777777777778, "grad_norm": 3.013228416442871, "kl": 0.8731837868690491, "learning_rate": 4.888888888888889e-07, "loss": 0.0009, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.8571429252624512, "step": 4312 }, { "completion_length": 278.21429443359375, "epoch": 0.27384126984126983, "grad_norm": 0.034003034234046936, "kl": 0.7570482492446899, "learning_rate": 4.890022675736961e-07, "loss": 0.0008, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4313 }, { "completion_length": 185.42857360839844, "epoch": 0.27390476190476193, "grad_norm": 0.05391394719481468, "kl": 1.0340533256530762, "learning_rate": 4.891156462585033e-07, "loss": 0.001, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4314 }, { "completion_length": 208.85714721679688, "epoch": 0.27396825396825397, "grad_norm": 2.9975264072418213, "kl": 0.8696114420890808, "learning_rate": 4.892290249433107e-07, "loss": 0.0009, "reward": 0.785714328289032, "reward_std": 0.30304574966430664, "rewards/check_originality_func": 0.785714328289032, "step": 4315 }, { "completion_length": 231.6428680419922, "epoch": 0.274031746031746, "grad_norm": 1.4308600425720215, "kl": 0.7150971293449402, "learning_rate": 4.893424036281179e-07, "loss": 0.0007, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4316 }, { "completion_length": 244.4285888671875, "epoch": 0.2740952380952381, "grad_norm": 2.5804710388183594, "kl": 0.7982797026634216, "learning_rate": 4.894557823129251e-07, "loss": 0.0008, "reward": 0.785714328289032, "reward_std": 0.30304574966430664, "rewards/check_originality_func": 0.785714328289032, "step": 4317 }, { "completion_length": 241.85714721679688, "epoch": 0.27415873015873016, "grad_norm": 0.02451992593705654, "kl": 0.6869050860404968, "learning_rate": 4.895691609977324e-07, "loss": 0.0007, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4318 }, { "completion_length": 222.1428680419922, "epoch": 0.2742222222222222, "grad_norm": 2.859555959701538, "kl": 0.8644309043884277, "learning_rate": 4.896825396825396e-07, "loss": 0.0009, "reward": 0.785714328289032, "reward_std": 0.30304574966430664, "rewards/check_originality_func": 0.785714328289032, "step": 4319 }, { "completion_length": 201.00001525878906, "epoch": 0.2742857142857143, "grad_norm": 1.420308232307434, "kl": 0.8474876284599304, "learning_rate": 4.897959183673469e-07, "loss": 0.0008, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4320 }, { "completion_length": 268.5714416503906, "epoch": 0.27434920634920634, "grad_norm": 1.0446362495422363, "kl": 0.8114669322967529, "learning_rate": 4.899092970521542e-07, "loss": 0.0008, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4321 }, { "completion_length": 260.9285888671875, "epoch": 0.2744126984126984, "grad_norm": 0.9569954872131348, "kl": 0.7510165572166443, "learning_rate": 4.900226757369614e-07, "loss": 0.0008, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4322 }, { "completion_length": 231.2857208251953, "epoch": 0.2744761904761905, "grad_norm": 0.029011322185397148, "kl": 0.7823328375816345, "learning_rate": 4.901360544217687e-07, "loss": 0.0008, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4323 }, { "completion_length": 309.21429443359375, "epoch": 0.27453968253968253, "grad_norm": 0.02862580120563507, "kl": 0.5959486365318298, "learning_rate": 4.90249433106576e-07, "loss": 0.0006, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4324 }, { "completion_length": 280.2857360839844, "epoch": 0.2746031746031746, "grad_norm": 2.109591484069824, "kl": 0.6816899180412292, "learning_rate": 4.903628117913832e-07, "loss": 0.0007, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4325 }, { "completion_length": 269.21429443359375, "epoch": 0.27466666666666667, "grad_norm": 1.2498267889022827, "kl": 0.6857253313064575, "learning_rate": 4.904761904761904e-07, "loss": 0.0007, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4326 }, { "completion_length": 224.57144165039062, "epoch": 0.2747301587301587, "grad_norm": 2.7382400035858154, "kl": 0.8697214722633362, "learning_rate": 4.905895691609977e-07, "loss": 0.0009, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.8571429252624512, "step": 4327 }, { "completion_length": 259.0, "epoch": 0.2747936507936508, "grad_norm": 0.036163073033094406, "kl": 0.6340568661689758, "learning_rate": 4.90702947845805e-07, "loss": 0.0006, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4328 }, { "completion_length": 241.4285888671875, "epoch": 0.27485714285714286, "grad_norm": 0.0423370897769928, "kl": 0.7771446108818054, "learning_rate": 4.908163265306122e-07, "loss": 0.0008, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4329 }, { "completion_length": 191.1428680419922, "epoch": 0.2749206349206349, "grad_norm": 0.037918586283922195, "kl": 0.9579647779464722, "learning_rate": 4.909297052154194e-07, "loss": 0.001, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4330 }, { "completion_length": 290.0, "epoch": 0.274984126984127, "grad_norm": 0.029268965125083923, "kl": 0.7339172959327698, "learning_rate": 4.910430839002268e-07, "loss": 0.0007, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4331 }, { "completion_length": 237.50001525878906, "epoch": 0.27504761904761904, "grad_norm": 0.032367777079343796, "kl": 0.7866434454917908, "learning_rate": 4.91156462585034e-07, "loss": 0.0008, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4332 }, { "completion_length": 230.85714721679688, "epoch": 0.2751111111111111, "grad_norm": 1.6649776697158813, "kl": 0.8036953210830688, "learning_rate": 4.912698412698412e-07, "loss": 0.0008, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4333 }, { "completion_length": 280.14288330078125, "epoch": 0.2751746031746032, "grad_norm": 0.42844313383102417, "kl": 1.0285162925720215, "learning_rate": 4.913832199546485e-07, "loss": 0.001, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4334 }, { "completion_length": 214.85714721679688, "epoch": 0.2752380952380952, "grad_norm": 1.7292357683181763, "kl": 1.0068343877792358, "learning_rate": 4.914965986394558e-07, "loss": 0.001, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4335 }, { "completion_length": 278.5, "epoch": 0.2753015873015873, "grad_norm": 2.3399152755737305, "kl": 0.7270343899726868, "learning_rate": 4.91609977324263e-07, "loss": 0.0007, "reward": 0.785714328289032, "reward_std": 0.30304574966430664, "rewards/check_originality_func": 0.785714328289032, "step": 4336 }, { "completion_length": 284.0, "epoch": 0.27536507936507937, "grad_norm": 0.04897017776966095, "kl": 0.7188239693641663, "learning_rate": 4.917233560090703e-07, "loss": 0.0007, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4337 }, { "completion_length": 258.5714416503906, "epoch": 0.2754285714285714, "grad_norm": 1.3830559253692627, "kl": 0.7144431471824646, "learning_rate": 4.918367346938776e-07, "loss": 0.0007, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4338 }, { "completion_length": 204.21429443359375, "epoch": 0.2754920634920635, "grad_norm": 2.155130386352539, "kl": 0.8047225475311279, "learning_rate": 4.919501133786848e-07, "loss": 0.0008, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.8571429252624512, "step": 4339 }, { "completion_length": 258.9285888671875, "epoch": 0.27555555555555555, "grad_norm": 2.4258337020874023, "kl": 0.7564803957939148, "learning_rate": 4.92063492063492e-07, "loss": 0.0008, "reward": 0.785714328289032, "reward_std": 0.30304574966430664, "rewards/check_originality_func": 0.785714328289032, "step": 4340 }, { "completion_length": 261.5714416503906, "epoch": 0.2756190476190476, "grad_norm": 1.3807746171951294, "kl": 0.7352096438407898, "learning_rate": 4.921768707482993e-07, "loss": 0.0007, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4341 }, { "completion_length": 282.0, "epoch": 0.2756825396825397, "grad_norm": 1.861255407333374, "kl": 0.6882936358451843, "learning_rate": 4.922902494331065e-07, "loss": 0.0007, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4342 }, { "completion_length": 223.71429443359375, "epoch": 0.27574603174603174, "grad_norm": 1.8425216674804688, "kl": 0.772337794303894, "learning_rate": 4.924036281179138e-07, "loss": 0.0008, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.8571429252624512, "step": 4343 }, { "completion_length": 271.0714416503906, "epoch": 0.2758095238095238, "grad_norm": 0.029922518879175186, "kl": 0.7156094908714294, "learning_rate": 4.925170068027211e-07, "loss": 0.0007, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4344 }, { "completion_length": 318.7857360839844, "epoch": 0.2758730158730159, "grad_norm": 1.0849546194076538, "kl": 0.5368942022323608, "learning_rate": 4.926303854875283e-07, "loss": 0.0005, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4345 }, { "completion_length": 231.71429443359375, "epoch": 0.2759365079365079, "grad_norm": 0.04950243607163429, "kl": 0.755166232585907, "learning_rate": 4.927437641723356e-07, "loss": 0.0008, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4346 }, { "completion_length": 286.8571472167969, "epoch": 0.276, "grad_norm": 0.035990405827760696, "kl": 0.6820003390312195, "learning_rate": 4.928571428571429e-07, "loss": 0.0007, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4347 }, { "completion_length": 253.07144165039062, "epoch": 0.27606349206349207, "grad_norm": 1.5103577375411987, "kl": 0.7340666055679321, "learning_rate": 4.929705215419501e-07, "loss": 0.0007, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4348 }, { "completion_length": 258.3571472167969, "epoch": 0.2761269841269841, "grad_norm": 2.0985829830169678, "kl": 0.6902582049369812, "learning_rate": 4.930839002267573e-07, "loss": 0.0007, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.8571429252624512, "step": 4349 }, { "completion_length": 200.50001525878906, "epoch": 0.2761904761904762, "grad_norm": 1.0880473852157593, "kl": 0.9822878837585449, "learning_rate": 4.931972789115645e-07, "loss": 0.001, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4350 }, { "completion_length": 274.0, "epoch": 0.27625396825396825, "grad_norm": 1.717240810394287, "kl": 0.6233856081962585, "learning_rate": 4.933106575963719e-07, "loss": 0.0006, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.8571429252624512, "step": 4351 }, { "completion_length": 251.00001525878906, "epoch": 0.2763174603174603, "grad_norm": 0.02554299496114254, "kl": 0.6084503531455994, "learning_rate": 4.934240362811791e-07, "loss": 0.0006, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_originality_func": 0.8571429252624512, "step": 4352 }, { "completion_length": 251.1428680419922, "epoch": 0.2763809523809524, "grad_norm": 0.021846232935786247, "kl": 0.6198158264160156, "learning_rate": 4.935374149659863e-07, "loss": 0.0006, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4353 }, { "completion_length": 222.35714721679688, "epoch": 0.27644444444444444, "grad_norm": 1.903703212738037, "kl": 0.7116273641586304, "learning_rate": 4.936507936507937e-07, "loss": 0.0007, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4354 }, { "completion_length": 240.00001525878906, "epoch": 0.2765079365079365, "grad_norm": 1.9444189071655273, "kl": 0.5733417868614197, "learning_rate": 4.937641723356009e-07, "loss": 0.0006, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.8571429252624512, "step": 4355 }, { "completion_length": 294.0714416503906, "epoch": 0.2765714285714286, "grad_norm": 1.110579490661621, "kl": 0.5224992632865906, "learning_rate": 4.938775510204081e-07, "loss": 0.0005, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.785714328289032, "step": 4356 }, { "completion_length": 207.7857208251953, "epoch": 0.2766349206349206, "grad_norm": 1.302248239517212, "kl": 0.6330930590629578, "learning_rate": 4.939909297052154e-07, "loss": 0.0006, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4357 }, { "completion_length": 253.1428680419922, "epoch": 0.2766984126984127, "grad_norm": 1.153946876525879, "kl": 0.5856195688247681, "learning_rate": 4.941043083900227e-07, "loss": 0.0006, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.785714328289032, "step": 4358 }, { "completion_length": 222.2857208251953, "epoch": 0.27676190476190476, "grad_norm": 0.9170907139778137, "kl": 0.653455913066864, "learning_rate": 4.942176870748299e-07, "loss": 0.0007, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.785714328289032, "step": 4359 }, { "completion_length": 192.92857360839844, "epoch": 0.2768253968253968, "grad_norm": 2.3718907833099365, "kl": 0.6946043372154236, "learning_rate": 4.943310657596372e-07, "loss": 0.0007, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.8571429252624512, "step": 4360 }, { "completion_length": 270.21429443359375, "epoch": 0.2768888888888889, "grad_norm": 1.1745103597640991, "kl": 0.5478464365005493, "learning_rate": 4.944444444444445e-07, "loss": 0.0005, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.785714328289032, "step": 4361 }, { "completion_length": 262.2857360839844, "epoch": 0.27695238095238095, "grad_norm": 1.7057780027389526, "kl": 0.5539121627807617, "learning_rate": 4.945578231292517e-07, "loss": 0.0006, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.8571429252624512, "step": 4362 }, { "completion_length": 185.92857360839844, "epoch": 0.277015873015873, "grad_norm": 2.3685097694396973, "kl": 0.720299482345581, "learning_rate": 4.946712018140589e-07, "loss": 0.0007, "reward": 0.785714328289032, "reward_std": 0.30304574966430664, "rewards/check_originality_func": 0.785714328289032, "step": 4363 }, { "completion_length": 206.92857360839844, "epoch": 0.2770793650793651, "grad_norm": 2.324732780456543, "kl": 0.6033782958984375, "learning_rate": 4.947845804988662e-07, "loss": 0.0006, "reward": 0.6428571939468384, "reward_std": 0.30304574966430664, "rewards/check_originality_func": 0.6428571939468384, "step": 4364 }, { "completion_length": 246.7857208251953, "epoch": 0.27714285714285714, "grad_norm": 1.1027605533599854, "kl": 0.643487274646759, "learning_rate": 4.948979591836734e-07, "loss": 0.0006, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4365 }, { "completion_length": 228.6428680419922, "epoch": 0.27720634920634923, "grad_norm": 2.3485240936279297, "kl": 0.6732455492019653, "learning_rate": 4.950113378684807e-07, "loss": 0.0007, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.8571429252624512, "step": 4366 }, { "completion_length": 217.21429443359375, "epoch": 0.2772698412698413, "grad_norm": 1.6599708795547485, "kl": 0.6285558938980103, "learning_rate": 4.95124716553288e-07, "loss": 0.0006, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.8571429252624512, "step": 4367 }, { "completion_length": 245.71429443359375, "epoch": 0.2773333333333333, "grad_norm": 0.9500961899757385, "kl": 0.6499223113059998, "learning_rate": 4.952380952380952e-07, "loss": 0.0006, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4368 }, { "completion_length": 207.50001525878906, "epoch": 0.2773968253968254, "grad_norm": 0.017693495377898216, "kl": 0.7148803472518921, "learning_rate": 4.953514739229025e-07, "loss": 0.0007, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4369 }, { "completion_length": 201.07144165039062, "epoch": 0.27746031746031746, "grad_norm": 0.026134761050343513, "kl": 0.6789048314094543, "learning_rate": 4.954648526077097e-07, "loss": 0.0007, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4370 }, { "completion_length": 269.2857360839844, "epoch": 0.2775238095238095, "grad_norm": 1.672399878501892, "kl": 0.48121514916419983, "learning_rate": 4.95578231292517e-07, "loss": 0.0005, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.8571429252624512, "step": 4371 }, { "completion_length": 222.00001525878906, "epoch": 0.2775873015873016, "grad_norm": 2.4348673820495605, "kl": 0.6715160012245178, "learning_rate": 4.956916099773242e-07, "loss": 0.0007, "reward": 0.785714328289032, "reward_std": 0.30304574966430664, "rewards/check_originality_func": 0.785714328289032, "step": 4372 }, { "completion_length": 263.21429443359375, "epoch": 0.27765079365079365, "grad_norm": 1.156374454498291, "kl": 0.5980266332626343, "learning_rate": 4.958049886621314e-07, "loss": 0.0006, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4373 }, { "completion_length": 255.6428680419922, "epoch": 0.2777142857142857, "grad_norm": 1.7521764039993286, "kl": 0.5737795233726501, "learning_rate": 4.959183673469388e-07, "loss": 0.0006, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.8571429252624512, "step": 4374 }, { "completion_length": 183.7857208251953, "epoch": 0.2777777777777778, "grad_norm": 1.7993987798690796, "kl": 0.8995482921600342, "learning_rate": 4.96031746031746e-07, "loss": 0.0009, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4375 }, { "completion_length": 229.9285888671875, "epoch": 0.27784126984126983, "grad_norm": 1.2127463817596436, "kl": 0.6242775917053223, "learning_rate": 4.961451247165532e-07, "loss": 0.0006, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4376 }, { "completion_length": 272.64288330078125, "epoch": 0.27790476190476193, "grad_norm": 1.7624471187591553, "kl": 0.5545397996902466, "learning_rate": 4.962585034013605e-07, "loss": 0.0006, "reward": 0.7142857313156128, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.7142857313156128, "step": 4377 }, { "completion_length": 223.00001525878906, "epoch": 0.277968253968254, "grad_norm": 0.026310687884688377, "kl": 0.7931506633758545, "learning_rate": 4.963718820861678e-07, "loss": 0.0008, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4378 }, { "completion_length": 283.2857360839844, "epoch": 0.278031746031746, "grad_norm": 2.6898317337036133, "kl": 0.6965035796165466, "learning_rate": 4.96485260770975e-07, "loss": 0.0007, "reward": 0.785714328289032, "reward_std": 0.30304574966430664, "rewards/check_originality_func": 0.785714328289032, "step": 4379 }, { "completion_length": 305.64288330078125, "epoch": 0.2780952380952381, "grad_norm": 1.0181374549865723, "kl": 0.5759960412979126, "learning_rate": 4.965986394557823e-07, "loss": 0.0006, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4380 }, { "completion_length": 299.7857360839844, "epoch": 0.27815873015873016, "grad_norm": 1.3847603797912598, "kl": 0.5537112951278687, "learning_rate": 4.967120181405896e-07, "loss": 0.0006, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4381 }, { "completion_length": 291.71429443359375, "epoch": 0.2782222222222222, "grad_norm": 2.335031270980835, "kl": 0.5943407416343689, "learning_rate": 4.968253968253968e-07, "loss": 0.0006, "reward": 0.785714328289032, "reward_std": 0.30304574966430664, "rewards/check_originality_func": 0.785714328289032, "step": 4382 }, { "completion_length": 252.7857208251953, "epoch": 0.2782857142857143, "grad_norm": 1.6900925636291504, "kl": 0.7040861248970032, "learning_rate": 4.969387755102041e-07, "loss": 0.0007, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4383 }, { "completion_length": 241.2857208251953, "epoch": 0.27834920634920635, "grad_norm": 2.1153039932250977, "kl": 0.6769067645072937, "learning_rate": 4.970521541950114e-07, "loss": 0.0007, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.8571429252624512, "step": 4384 }, { "completion_length": 267.21429443359375, "epoch": 0.2784126984126984, "grad_norm": 1.3667405843734741, "kl": 0.6723155379295349, "learning_rate": 4.971655328798185e-07, "loss": 0.0007, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4385 }, { "completion_length": 249.00001525878906, "epoch": 0.2784761904761905, "grad_norm": 1.332175612449646, "kl": 0.7753771543502808, "learning_rate": 4.972789115646258e-07, "loss": 0.0008, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4386 }, { "completion_length": 218.50001525878906, "epoch": 0.27853968253968253, "grad_norm": 0.05618206411600113, "kl": 0.8444311618804932, "learning_rate": 4.973922902494331e-07, "loss": 0.0008, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_originality_func": 0.8571429252624512, "step": 4387 }, { "completion_length": 272.5714416503906, "epoch": 0.27860317460317463, "grad_norm": 0.030264105647802353, "kl": 0.6761388182640076, "learning_rate": 4.975056689342403e-07, "loss": 0.0007, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_originality_func": 0.8571429252624512, "step": 4388 }, { "completion_length": 270.2857360839844, "epoch": 0.2786666666666667, "grad_norm": 0.024474265053868294, "kl": 0.6551933288574219, "learning_rate": 4.976190476190476e-07, "loss": 0.0007, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4389 }, { "completion_length": 301.14288330078125, "epoch": 0.2787301587301587, "grad_norm": 0.023556556552648544, "kl": 0.5982284545898438, "learning_rate": 4.977324263038549e-07, "loss": 0.0006, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4390 }, { "completion_length": 290.5714416503906, "epoch": 0.2787936507936508, "grad_norm": 0.023849237710237503, "kl": 0.6106185913085938, "learning_rate": 4.978458049886621e-07, "loss": 0.0006, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4391 }, { "completion_length": 248.71429443359375, "epoch": 0.27885714285714286, "grad_norm": 0.03813817352056503, "kl": 0.7257576584815979, "learning_rate": 4.979591836734693e-07, "loss": 0.0007, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4392 }, { "completion_length": 252.9285888671875, "epoch": 0.2789206349206349, "grad_norm": 1.6247739791870117, "kl": 0.7339186072349548, "learning_rate": 4.980725623582766e-07, "loss": 0.0007, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4393 }, { "completion_length": 288.21429443359375, "epoch": 0.278984126984127, "grad_norm": 1.3416836261749268, "kl": 0.5812774896621704, "learning_rate": 4.981859410430839e-07, "loss": 0.0006, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4394 }, { "completion_length": 243.7857208251953, "epoch": 0.27904761904761904, "grad_norm": 0.025932811200618744, "kl": 0.7085345387458801, "learning_rate": 4.982993197278911e-07, "loss": 0.0007, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_originality_func": 0.8571429252624512, "step": 4395 }, { "completion_length": 273.0714416503906, "epoch": 0.2791111111111111, "grad_norm": 0.030051928013563156, "kl": 0.6642073392868042, "learning_rate": 4.984126984126983e-07, "loss": 0.0007, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4396 }, { "completion_length": 258.71429443359375, "epoch": 0.2791746031746032, "grad_norm": 0.027590136975049973, "kl": 0.7174277901649475, "learning_rate": 4.985260770975057e-07, "loss": 0.0007, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4397 }, { "completion_length": 241.00001525878906, "epoch": 0.27923809523809523, "grad_norm": 0.026515763252973557, "kl": 0.7350285649299622, "learning_rate": 4.986394557823129e-07, "loss": 0.0007, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4398 }, { "completion_length": 238.21429443359375, "epoch": 0.27930158730158733, "grad_norm": 0.08331138640642166, "kl": 0.7460821866989136, "learning_rate": 4.987528344671201e-07, "loss": 0.0007, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4399 }, { "completion_length": 248.71429443359375, "epoch": 0.27936507936507937, "grad_norm": 0.02865569293498993, "kl": 0.7845973372459412, "learning_rate": 4.988662131519274e-07, "loss": 0.0008, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4400 }, { "completion_length": 258.0, "epoch": 0.2794285714285714, "grad_norm": 0.036968130618333817, "kl": 0.7835261821746826, "learning_rate": 4.989795918367347e-07, "loss": 0.0008, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4401 }, { "completion_length": 250.50001525878906, "epoch": 0.2794920634920635, "grad_norm": 1.7837224006652832, "kl": 0.6615871787071228, "learning_rate": 4.990929705215419e-07, "loss": 0.0007, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4402 }, { "completion_length": 261.14288330078125, "epoch": 0.27955555555555556, "grad_norm": 0.04493631422519684, "kl": 0.6930686235427856, "learning_rate": 4.992063492063492e-07, "loss": 0.0007, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4403 }, { "completion_length": 204.71429443359375, "epoch": 0.2796190476190476, "grad_norm": 2.135301113128662, "kl": 0.8645660281181335, "learning_rate": 4.993197278911565e-07, "loss": 0.0009, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.8571429252624512, "step": 4404 }, { "completion_length": 230.1428680419922, "epoch": 0.2796825396825397, "grad_norm": 3.202638864517212, "kl": 0.7483285069465637, "learning_rate": 4.994331065759637e-07, "loss": 0.0007, "reward": 0.7142857313156128, "reward_std": 0.4040610194206238, "rewards/check_originality_func": 0.7142857313156128, "step": 4405 }, { "completion_length": 239.4285888671875, "epoch": 0.27974603174603174, "grad_norm": 1.8720481395721436, "kl": 0.7888578176498413, "learning_rate": 4.99546485260771e-07, "loss": 0.0008, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.8571429252624512, "step": 4406 }, { "completion_length": 260.2857360839844, "epoch": 0.2798095238095238, "grad_norm": 0.02230123244225979, "kl": 0.5679978132247925, "learning_rate": 4.996598639455782e-07, "loss": 0.0006, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4407 }, { "completion_length": 242.6428680419922, "epoch": 0.2798730158730159, "grad_norm": 0.019831042736768723, "kl": 0.7363025546073914, "learning_rate": 4.997732426303854e-07, "loss": 0.0007, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4408 }, { "completion_length": 267.0, "epoch": 0.27993650793650793, "grad_norm": 1.0199261903762817, "kl": 0.6123690009117126, "learning_rate": 4.998866213151927e-07, "loss": 0.0006, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4409 }, { "completion_length": 273.5, "epoch": 0.28, "grad_norm": 0.017150800675153732, "kl": 0.5425620675086975, "learning_rate": 5e-07, "loss": 0.0005, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4410 }, { "completion_length": 198.1428680419922, "epoch": 0.28006349206349207, "grad_norm": 2.4703855514526367, "kl": 0.8189154267311096, "learning_rate": 4.999999996406127e-07, "loss": 0.0008, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4411 }, { "completion_length": 220.21429443359375, "epoch": 0.2801269841269841, "grad_norm": 2.215134620666504, "kl": 0.7742483019828796, "learning_rate": 4.999999985624507e-07, "loss": 0.0008, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4412 }, { "completion_length": 227.2857208251953, "epoch": 0.2801904761904762, "grad_norm": 0.01739530824124813, "kl": 0.6851274967193604, "learning_rate": 4.99999996765514e-07, "loss": 0.0007, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_originality_func": 0.8571429252624512, "step": 4413 }, { "completion_length": 217.00001525878906, "epoch": 0.28025396825396826, "grad_norm": 1.9203367233276367, "kl": 0.6418840885162354, "learning_rate": 4.999999942498027e-07, "loss": 0.0006, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.8571429252624512, "step": 4414 }, { "completion_length": 178.42857360839844, "epoch": 0.2803174603174603, "grad_norm": 1.4891351461410522, "kl": 0.9987488389015198, "learning_rate": 4.999999910153167e-07, "loss": 0.001, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4415 }, { "completion_length": 218.1428680419922, "epoch": 0.2803809523809524, "grad_norm": 0.02443927712738514, "kl": 0.7467435598373413, "learning_rate": 4.999999870620562e-07, "loss": 0.0007, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4416 }, { "completion_length": 212.85714721679688, "epoch": 0.28044444444444444, "grad_norm": 2.1970715522766113, "kl": 0.690747857093811, "learning_rate": 4.99999982390021e-07, "loss": 0.0007, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.8571429252624512, "step": 4417 }, { "completion_length": 235.35714721679688, "epoch": 0.2805079365079365, "grad_norm": 0.924934446811676, "kl": 0.6366738080978394, "learning_rate": 4.999999769992112e-07, "loss": 0.0006, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4418 }, { "completion_length": 214.92857360839844, "epoch": 0.2805714285714286, "grad_norm": 1.549172043800354, "kl": 0.7455323338508606, "learning_rate": 4.999999708896268e-07, "loss": 0.0007, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4419 }, { "completion_length": 171.35714721679688, "epoch": 0.2806349206349206, "grad_norm": 1.7845524549484253, "kl": 0.7706581354141235, "learning_rate": 4.999999640612679e-07, "loss": 0.0008, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4420 }, { "completion_length": 163.7857208251953, "epoch": 0.2806984126984127, "grad_norm": 2.7937872409820557, "kl": 0.9715994000434875, "learning_rate": 4.999999565141343e-07, "loss": 0.001, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4421 }, { "completion_length": 209.7857208251953, "epoch": 0.28076190476190477, "grad_norm": 1.94904363155365, "kl": 0.714136004447937, "learning_rate": 4.999999482482262e-07, "loss": 0.0007, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4422 }, { "completion_length": 216.21429443359375, "epoch": 0.2808253968253968, "grad_norm": 0.049088045954704285, "kl": 0.8942247629165649, "learning_rate": 4.999999392635437e-07, "loss": 0.0009, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_originality_func": 0.8571429252624512, "step": 4423 }, { "completion_length": 221.71429443359375, "epoch": 0.2808888888888889, "grad_norm": 1.9195659160614014, "kl": 0.7894815802574158, "learning_rate": 4.999999295600866e-07, "loss": 0.0008, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4424 }, { "completion_length": 199.2857208251953, "epoch": 0.28095238095238095, "grad_norm": 1.8731895685195923, "kl": 0.6859953999519348, "learning_rate": 4.99999919137855e-07, "loss": 0.0007, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.8571429252624512, "step": 4425 }, { "completion_length": 217.21429443359375, "epoch": 0.281015873015873, "grad_norm": 0.9562215209007263, "kl": 0.664715588092804, "learning_rate": 4.999999079968491e-07, "loss": 0.0007, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4426 }, { "completion_length": 171.42857360839844, "epoch": 0.2810793650793651, "grad_norm": 0.01892365701496601, "kl": 0.9608849287033081, "learning_rate": 4.999998961370688e-07, "loss": 0.001, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_originality_func": 0.8571429252624512, "step": 4427 }, { "completion_length": 201.00001525878906, "epoch": 0.28114285714285714, "grad_norm": 1.084532618522644, "kl": 0.7768070101737976, "learning_rate": 4.999998835585141e-07, "loss": 0.0008, "reward": 0.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.6428571939468384, "step": 4428 }, { "completion_length": 189.2857208251953, "epoch": 0.2812063492063492, "grad_norm": 2.57936429977417, "kl": 0.8299790024757385, "learning_rate": 4.999998702611851e-07, "loss": 0.0008, "reward": 0.7142857313156128, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.7142857313156128, "step": 4429 }, { "completion_length": 185.42857360839844, "epoch": 0.2812698412698413, "grad_norm": 0.026027221232652664, "kl": 0.8008578419685364, "learning_rate": 4.999998562450817e-07, "loss": 0.0008, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4430 }, { "completion_length": 237.07144165039062, "epoch": 0.2813333333333333, "grad_norm": 0.01640039123594761, "kl": 0.6537240147590637, "learning_rate": 4.999998415102041e-07, "loss": 0.0007, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4431 }, { "completion_length": 212.35714721679688, "epoch": 0.2813968253968254, "grad_norm": 0.02027413435280323, "kl": 0.7859070897102356, "learning_rate": 4.999998260565523e-07, "loss": 0.0008, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4432 }, { "completion_length": 206.71429443359375, "epoch": 0.28146031746031747, "grad_norm": 1.9803797006607056, "kl": 0.7321235537528992, "learning_rate": 4.999998098841265e-07, "loss": 0.0007, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.8571429252624512, "step": 4433 }, { "completion_length": 223.85714721679688, "epoch": 0.2815238095238095, "grad_norm": 0.022440601140260696, "kl": 0.6554068922996521, "learning_rate": 4.999997929929264e-07, "loss": 0.0007, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4434 }, { "completion_length": 257.2857360839844, "epoch": 0.2815873015873016, "grad_norm": 0.018770260736346245, "kl": 0.5817533731460571, "learning_rate": 4.999997753829523e-07, "loss": 0.0006, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_originality_func": 0.8571429252624512, "step": 4435 }, { "completion_length": 173.50001525878906, "epoch": 0.28165079365079365, "grad_norm": 1.7088754177093506, "kl": 0.8362554907798767, "learning_rate": 4.999997570542041e-07, "loss": 0.0008, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4436 }, { "completion_length": 182.57144165039062, "epoch": 0.2817142857142857, "grad_norm": 2.133542537689209, "kl": 1.0369895696640015, "learning_rate": 4.99999738006682e-07, "loss": 0.001, "reward": 0.7142857313156128, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.7142857313156128, "step": 4437 }, { "completion_length": 185.57144165039062, "epoch": 0.2817777777777778, "grad_norm": 1.0815449953079224, "kl": 0.7671431303024292, "learning_rate": 4.999997182403862e-07, "loss": 0.0008, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.785714328289032, "step": 4438 }, { "completion_length": 189.7857208251953, "epoch": 0.28184126984126984, "grad_norm": 1.7601900100708008, "kl": 0.769948422908783, "learning_rate": 4.999996977553163e-07, "loss": 0.0008, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4439 }, { "completion_length": 166.0, "epoch": 0.2819047619047619, "grad_norm": 2.0394022464752197, "kl": 0.9592848420143127, "learning_rate": 4.999996765514726e-07, "loss": 0.001, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4440 }, { "completion_length": 179.85714721679688, "epoch": 0.281968253968254, "grad_norm": 2.3493998050689697, "kl": 0.8624454736709595, "learning_rate": 4.999996546288553e-07, "loss": 0.0009, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.8571429252624512, "step": 4441 }, { "completion_length": 187.50001525878906, "epoch": 0.282031746031746, "grad_norm": 3.271735906600952, "kl": 0.9082843661308289, "learning_rate": 4.999996319874642e-07, "loss": 0.0009, "reward": 0.785714328289032, "reward_std": 0.30304574966430664, "rewards/check_originality_func": 0.785714328289032, "step": 4442 }, { "completion_length": 128.92857360839844, "epoch": 0.2820952380952381, "grad_norm": 6.584936141967773, "kl": 1.1989147663116455, "learning_rate": 4.999996086272996e-07, "loss": 0.0012, "reward": 0.7142857313156128, "reward_std": 0.4040610194206238, "rewards/check_originality_func": 0.7142857313156128, "step": 4443 }, { "completion_length": 107.21428680419922, "epoch": 0.28215873015873016, "grad_norm": 2.915012836456299, "kl": 1.4545950889587402, "learning_rate": 4.999995845483615e-07, "loss": 0.0015, "reward": 0.2857142984867096, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.2857142984867096, "step": 4444 }, { "completion_length": 133.6428680419922, "epoch": 0.2822222222222222, "grad_norm": 3.35174822807312, "kl": 1.285132884979248, "learning_rate": 4.999995597506498e-07, "loss": 0.0013, "reward": 0.3571428656578064, "reward_std": 0.30304574966430664, "rewards/check_originality_func": 0.3571428656578064, "step": 4445 }, { "completion_length": 79.14286041259766, "epoch": 0.2822857142857143, "grad_norm": 0.12569551169872284, "kl": 2.2796549797058105, "learning_rate": 4.999995342341648e-07, "loss": 0.0023, "reward": 0.1428571492433548, "reward_std": 0.0, "rewards/check_originality_func": 0.1428571492433548, "step": 4446 }, { "completion_length": 64.64286041259766, "epoch": 0.28234920634920635, "grad_norm": 7.4594879150390625, "kl": 2.758185625076294, "learning_rate": 4.999995079989065e-07, "loss": 0.0028, "reward": 0.2857142984867096, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.2857142984867096, "step": 4447 }, { "completion_length": 81.42857360839844, "epoch": 0.2824126984126984, "grad_norm": 1.5306241512298584, "kl": 2.2814764976501465, "learning_rate": 4.999994810448749e-07, "loss": 0.0023, "reward": 0.0714285746216774, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.0714285746216774, "step": 4448 }, { "completion_length": 65.85714721679688, "epoch": 0.2824761904761905, "grad_norm": 4.308773040771484, "kl": 2.8368990421295166, "learning_rate": 4.9999945337207e-07, "loss": 0.0028, "reward": 0.1428571492433548, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.1428571492433548, "step": 4449 }, { "completion_length": 75.78572082519531, "epoch": 0.28253968253968254, "grad_norm": 3.584317684173584, "kl": 2.3759236335754395, "learning_rate": 4.999994249804922e-07, "loss": 0.0024, "reward": 0.2142857313156128, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.2142857313156128, "step": 4450 }, { "completion_length": 92.28572082519531, "epoch": 0.2826031746031746, "grad_norm": 3.3903987407684326, "kl": 1.7823585271835327, "learning_rate": 4.999993958701414e-07, "loss": 0.0018, "reward": 0.1428571492433548, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.1428571492433548, "step": 4451 }, { "completion_length": 87.21428680419922, "epoch": 0.2826666666666667, "grad_norm": 4.94028902053833, "kl": 2.064136028289795, "learning_rate": 4.999993660410176e-07, "loss": 0.0021, "reward": 0.2142857313156128, "reward_std": 0.30304574966430664, "rewards/check_originality_func": 0.2142857313156128, "step": 4452 }, { "completion_length": 102.92857360839844, "epoch": 0.2827301587301587, "grad_norm": 4.003997802734375, "kl": 1.9248145818710327, "learning_rate": 4.99999335493121e-07, "loss": 0.0019, "reward": 0.1428571492433548, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.1428571492433548, "step": 4453 }, { "completion_length": 146.42857360839844, "epoch": 0.2827936507936508, "grad_norm": 4.338335990905762, "kl": 1.2722240686416626, "learning_rate": 4.999993042264516e-07, "loss": 0.0013, "reward": 0.6428571939468384, "reward_std": 0.30304574966430664, "rewards/check_originality_func": 0.6428571939468384, "step": 4454 }, { "completion_length": 189.00001525878906, "epoch": 0.28285714285714286, "grad_norm": 1.5395607948303223, "kl": 1.0843901634216309, "learning_rate": 4.999992722410096e-07, "loss": 0.0011, "reward": 0.5, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.5, "step": 4455 }, { "completion_length": 97.50000762939453, "epoch": 0.2829206349206349, "grad_norm": 8.979925155639648, "kl": 1.7512298822402954, "learning_rate": 4.99999239536795e-07, "loss": 0.0018, "reward": 0.3571428656578064, "reward_std": 0.5050762891769409, "rewards/check_originality_func": 0.3571428656578064, "step": 4456 }, { "completion_length": 141.1428680419922, "epoch": 0.282984126984127, "grad_norm": 1.8883625268936157, "kl": 1.2294174432754517, "learning_rate": 4.999992061138079e-07, "loss": 0.0012, "reward": 0.5, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.5, "step": 4457 }, { "completion_length": 144.71429443359375, "epoch": 0.28304761904761905, "grad_norm": 3.3201446533203125, "kl": 1.275491714477539, "learning_rate": 4.999991719720485e-07, "loss": 0.0013, "reward": 0.7142857313156128, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.7142857313156128, "step": 4458 }, { "completion_length": 117.64286041259766, "epoch": 0.2831111111111111, "grad_norm": 3.1244702339172363, "kl": 1.6001579761505127, "learning_rate": 4.999991371115168e-07, "loss": 0.0016, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4459 }, { "completion_length": 177.07144165039062, "epoch": 0.2831746031746032, "grad_norm": 0.3150768280029297, "kl": 1.4711209535598755, "learning_rate": 4.999991015322128e-07, "loss": 0.0015, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4460 }, { "completion_length": 215.92857360839844, "epoch": 0.28323809523809523, "grad_norm": 1.2373732328414917, "kl": 0.8632524013519287, "learning_rate": 4.999990652341368e-07, "loss": 0.0009, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4461 }, { "completion_length": 167.2857208251953, "epoch": 0.2833015873015873, "grad_norm": 2.0573251247406006, "kl": 1.1610201597213745, "learning_rate": 4.999990282172889e-07, "loss": 0.0012, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.785714328289032, "step": 4462 }, { "completion_length": 243.21429443359375, "epoch": 0.2833650793650794, "grad_norm": 0.027530524879693985, "kl": 0.758414626121521, "learning_rate": 4.99998990481669e-07, "loss": 0.0008, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4463 }, { "completion_length": 197.92857360839844, "epoch": 0.2834285714285714, "grad_norm": 2.1369171142578125, "kl": 0.9735791683197021, "learning_rate": 4.999989520272775e-07, "loss": 0.001, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4464 }, { "completion_length": 206.35714721679688, "epoch": 0.2834920634920635, "grad_norm": 2.1091673374176025, "kl": 0.9567190408706665, "learning_rate": 4.999989128541143e-07, "loss": 0.001, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.8571429252624512, "step": 4465 }, { "completion_length": 222.4285888671875, "epoch": 0.28355555555555556, "grad_norm": 0.07566037029027939, "kl": 1.0976189374923706, "learning_rate": 4.999988729621795e-07, "loss": 0.0011, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4466 }, { "completion_length": 246.6428680419922, "epoch": 0.2836190476190476, "grad_norm": 1.7923539876937866, "kl": 0.7459099888801575, "learning_rate": 4.999988323514733e-07, "loss": 0.0007, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4467 }, { "completion_length": 176.1428680419922, "epoch": 0.2836825396825397, "grad_norm": 0.3059016168117523, "kl": 1.6707875728607178, "learning_rate": 4.999987910219958e-07, "loss": 0.0017, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4468 }, { "completion_length": 196.00001525878906, "epoch": 0.28374603174603175, "grad_norm": 1.7033623456954956, "kl": 0.888179361820221, "learning_rate": 4.999987489737472e-07, "loss": 0.0009, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.8571429252624512, "step": 4469 }, { "completion_length": 260.21429443359375, "epoch": 0.2838095238095238, "grad_norm": 1.3330957889556885, "kl": 0.6028925180435181, "learning_rate": 4.999987062067274e-07, "loss": 0.0006, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.785714328289032, "step": 4470 }, { "completion_length": 182.57144165039062, "epoch": 0.2838730158730159, "grad_norm": 1.7494559288024902, "kl": 0.994409441947937, "learning_rate": 4.999986627209368e-07, "loss": 0.001, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4471 }, { "completion_length": 193.85714721679688, "epoch": 0.28393650793650793, "grad_norm": 0.03170151636004448, "kl": 0.9999013543128967, "learning_rate": 4.999986185163754e-07, "loss": 0.001, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4472 }, { "completion_length": 193.21429443359375, "epoch": 0.284, "grad_norm": 2.14817214012146, "kl": 0.9976801872253418, "learning_rate": 4.999985735930431e-07, "loss": 0.001, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.8571429252624512, "step": 4473 }, { "completion_length": 200.42857360839844, "epoch": 0.2840634920634921, "grad_norm": 0.020394472405314445, "kl": 0.8779566884040833, "learning_rate": 4.999985279509404e-07, "loss": 0.0009, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4474 }, { "completion_length": 249.21429443359375, "epoch": 0.2841269841269841, "grad_norm": 1.4325164556503296, "kl": 0.7566850185394287, "learning_rate": 4.999984815900673e-07, "loss": 0.0008, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.8571429252624512, "step": 4475 }, { "completion_length": 236.1428680419922, "epoch": 0.2841904761904762, "grad_norm": 0.04351560026407242, "kl": 0.8789218068122864, "learning_rate": 4.999984345104238e-07, "loss": 0.0009, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4476 }, { "completion_length": 211.92857360839844, "epoch": 0.28425396825396826, "grad_norm": 2.701155662536621, "kl": 0.9182178378105164, "learning_rate": 4.999983867120102e-07, "loss": 0.0009, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.8571429252624512, "step": 4477 }, { "completion_length": 156.2857208251953, "epoch": 0.2843174603174603, "grad_norm": 1.4926198720932007, "kl": 1.1563421487808228, "learning_rate": 4.999983381948266e-07, "loss": 0.0012, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4478 }, { "completion_length": 235.7857208251953, "epoch": 0.2843809523809524, "grad_norm": 0.023365939036011696, "kl": 0.8358680009841919, "learning_rate": 4.999982889588731e-07, "loss": 0.0008, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4479 }, { "completion_length": 229.35714721679688, "epoch": 0.28444444444444444, "grad_norm": 0.020289400592446327, "kl": 0.8556203246116638, "learning_rate": 4.999982390041498e-07, "loss": 0.0009, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4480 }, { "completion_length": 227.57144165039062, "epoch": 0.2845079365079365, "grad_norm": 1.6329084634780884, "kl": 0.870163083076477, "learning_rate": 4.99998188330657e-07, "loss": 0.0009, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.785714328289032, "step": 4481 }, { "completion_length": 257.21429443359375, "epoch": 0.2845714285714286, "grad_norm": 1.263142704963684, "kl": 0.7432628870010376, "learning_rate": 4.999981369383947e-07, "loss": 0.0007, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4482 }, { "completion_length": 218.92857360839844, "epoch": 0.28463492063492063, "grad_norm": 0.02079668454825878, "kl": 0.8703539371490479, "learning_rate": 4.999980848273631e-07, "loss": 0.0009, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4483 }, { "completion_length": 247.1428680419922, "epoch": 0.2846984126984127, "grad_norm": 1.22498619556427, "kl": 0.925827145576477, "learning_rate": 4.999980319975623e-07, "loss": 0.0009, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4484 }, { "completion_length": 260.2857360839844, "epoch": 0.28476190476190477, "grad_norm": 0.024334663525223732, "kl": 0.8779839277267456, "learning_rate": 4.999979784489925e-07, "loss": 0.0009, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4485 }, { "completion_length": 183.42857360839844, "epoch": 0.2848253968253968, "grad_norm": 0.0234366524964571, "kl": 0.9824706315994263, "learning_rate": 4.999979241816539e-07, "loss": 0.001, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4486 }, { "completion_length": 223.85714721679688, "epoch": 0.2848888888888889, "grad_norm": 1.996868371963501, "kl": 0.8176884055137634, "learning_rate": 4.999978691955465e-07, "loss": 0.0008, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.8571429252624512, "step": 4487 }, { "completion_length": 249.00001525878906, "epoch": 0.28495238095238096, "grad_norm": 1.9761477708816528, "kl": 0.7517998814582825, "learning_rate": 4.999978134906707e-07, "loss": 0.0008, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4488 }, { "completion_length": 169.6428680419922, "epoch": 0.285015873015873, "grad_norm": 1.5224049091339111, "kl": 1.1060622930526733, "learning_rate": 4.999977570670266e-07, "loss": 0.0011, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.785714328289032, "step": 4489 }, { "completion_length": 203.42857360839844, "epoch": 0.2850793650793651, "grad_norm": 1.7548918724060059, "kl": 0.9042261838912964, "learning_rate": 4.999976999246141e-07, "loss": 0.0009, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4490 }, { "completion_length": 190.6428680419922, "epoch": 0.28514285714285714, "grad_norm": 0.05296851694583893, "kl": 1.0560868978500366, "learning_rate": 4.999976420634337e-07, "loss": 0.0011, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4491 }, { "completion_length": 214.1428680419922, "epoch": 0.2852063492063492, "grad_norm": 1.5946625471115112, "kl": 0.9795113205909729, "learning_rate": 4.999975834834853e-07, "loss": 0.001, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4492 }, { "completion_length": 226.9285888671875, "epoch": 0.2852698412698413, "grad_norm": 2.0699822902679443, "kl": 0.8090237379074097, "learning_rate": 4.999975241847692e-07, "loss": 0.0008, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.8571429252624512, "step": 4493 }, { "completion_length": 211.71429443359375, "epoch": 0.2853333333333333, "grad_norm": 0.02814127318561077, "kl": 0.9220308065414429, "learning_rate": 4.999974641672856e-07, "loss": 0.0009, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_originality_func": 0.8571429252624512, "step": 4494 }, { "completion_length": 235.57144165039062, "epoch": 0.28539682539682537, "grad_norm": 1.3847934007644653, "kl": 0.7866901755332947, "learning_rate": 4.999974034310346e-07, "loss": 0.0008, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4495 }, { "completion_length": 220.2857208251953, "epoch": 0.28546031746031747, "grad_norm": 0.023404335603117943, "kl": 1.0103700160980225, "learning_rate": 4.999973419760164e-07, "loss": 0.001, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4496 }, { "completion_length": 222.71429443359375, "epoch": 0.2855238095238095, "grad_norm": 2.020958185195923, "kl": 0.8555546402931213, "learning_rate": 4.999972798022313e-07, "loss": 0.0009, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.8571429252624512, "step": 4497 }, { "completion_length": 219.71429443359375, "epoch": 0.2855873015873016, "grad_norm": 0.026342378929257393, "kl": 0.9134226441383362, "learning_rate": 4.999972169096792e-07, "loss": 0.0009, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4498 }, { "completion_length": 224.85714721679688, "epoch": 0.28565079365079366, "grad_norm": 0.031093865633010864, "kl": 1.0508060455322266, "learning_rate": 4.999971532983606e-07, "loss": 0.0011, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4499 }, { "completion_length": 241.7857208251953, "epoch": 0.2857142857142857, "grad_norm": 1.8032456636428833, "kl": 0.7860925793647766, "learning_rate": 4.999970889682754e-07, "loss": 0.0008, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4500 }, { "completion_length": 257.3571472167969, "epoch": 0.2857777777777778, "grad_norm": 1.7621595859527588, "kl": 0.8258322477340698, "learning_rate": 4.99997023919424e-07, "loss": 0.0008, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.8571429252624512, "step": 4501 }, { "completion_length": 243.2857208251953, "epoch": 0.28584126984126984, "grad_norm": 0.03528570756316185, "kl": 1.0388100147247314, "learning_rate": 4.999969581518065e-07, "loss": 0.001, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4502 }, { "completion_length": 277.14288330078125, "epoch": 0.2859047619047619, "grad_norm": 0.02712418884038925, "kl": 0.756600022315979, "learning_rate": 4.999968916654229e-07, "loss": 0.0008, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4503 }, { "completion_length": 259.21429443359375, "epoch": 0.285968253968254, "grad_norm": 0.022698327898979187, "kl": 0.7719879746437073, "learning_rate": 4.999968244602738e-07, "loss": 0.0008, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4504 }, { "completion_length": 239.7857208251953, "epoch": 0.286031746031746, "grad_norm": 0.03129749000072479, "kl": 0.9552915096282959, "learning_rate": 4.99996756536359e-07, "loss": 0.001, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4505 }, { "completion_length": 244.7857208251953, "epoch": 0.28609523809523807, "grad_norm": 0.08837380260229111, "kl": 1.0219881534576416, "learning_rate": 4.99996687893679e-07, "loss": 0.001, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4506 }, { "completion_length": 240.50001525878906, "epoch": 0.28615873015873017, "grad_norm": 1.3665642738342285, "kl": 0.8306944966316223, "learning_rate": 4.999966185322338e-07, "loss": 0.0008, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4507 }, { "completion_length": 261.8571472167969, "epoch": 0.2862222222222222, "grad_norm": 0.898582935333252, "kl": 0.9158215522766113, "learning_rate": 4.999965484520236e-07, "loss": 0.0009, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4508 }, { "completion_length": 225.4285888671875, "epoch": 0.2862857142857143, "grad_norm": 0.027126239612698555, "kl": 0.9513834714889526, "learning_rate": 4.999964776530488e-07, "loss": 0.001, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4509 }, { "completion_length": 220.2857208251953, "epoch": 0.28634920634920635, "grad_norm": 1.120225429534912, "kl": 1.0000441074371338, "learning_rate": 4.999964061353093e-07, "loss": 0.001, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4510 }, { "completion_length": 197.1428680419922, "epoch": 0.2864126984126984, "grad_norm": 0.03177597001194954, "kl": 1.1088695526123047, "learning_rate": 4.999963338988056e-07, "loss": 0.0011, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4511 }, { "completion_length": 253.7857208251953, "epoch": 0.2864761904761905, "grad_norm": 0.04863520339131355, "kl": 0.9253824949264526, "learning_rate": 4.999962609435377e-07, "loss": 0.0009, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4512 }, { "completion_length": 244.7857208251953, "epoch": 0.28653968253968254, "grad_norm": 0.027049655094742775, "kl": 0.8501050472259521, "learning_rate": 4.99996187269506e-07, "loss": 0.0009, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4513 }, { "completion_length": 234.6428680419922, "epoch": 0.2866031746031746, "grad_norm": 0.03195007145404816, "kl": 0.9145139455795288, "learning_rate": 4.999961128767106e-07, "loss": 0.0009, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4514 }, { "completion_length": 243.21429443359375, "epoch": 0.2866666666666667, "grad_norm": 0.03663218393921852, "kl": 0.8619134426116943, "learning_rate": 4.999960377651516e-07, "loss": 0.0009, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4515 }, { "completion_length": 222.50001525878906, "epoch": 0.2867301587301587, "grad_norm": 0.024337567389011383, "kl": 0.9223791360855103, "learning_rate": 4.999959619348294e-07, "loss": 0.0009, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4516 }, { "completion_length": 310.71429443359375, "epoch": 0.28679365079365077, "grad_norm": 0.02006271854043007, "kl": 0.7327356934547424, "learning_rate": 4.999958853857442e-07, "loss": 0.0007, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4517 }, { "completion_length": 174.1428680419922, "epoch": 0.28685714285714287, "grad_norm": 1.7363731861114502, "kl": 1.1965022087097168, "learning_rate": 4.999958081178961e-07, "loss": 0.0012, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4518 }, { "completion_length": 283.0714416503906, "epoch": 0.2869206349206349, "grad_norm": 0.02149876207113266, "kl": 0.8237995505332947, "learning_rate": 4.999957301312854e-07, "loss": 0.0008, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4519 }, { "completion_length": 213.1428680419922, "epoch": 0.286984126984127, "grad_norm": 0.035212621092796326, "kl": 1.0255260467529297, "learning_rate": 4.999956514259123e-07, "loss": 0.001, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4520 }, { "completion_length": 212.6428680419922, "epoch": 0.28704761904761905, "grad_norm": 0.027422158047556877, "kl": 1.004692792892456, "learning_rate": 4.999955720017771e-07, "loss": 0.001, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4521 }, { "completion_length": 185.2857208251953, "epoch": 0.2871111111111111, "grad_norm": 0.035001762211322784, "kl": 1.0877684354782104, "learning_rate": 4.999954918588799e-07, "loss": 0.0011, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4522 }, { "completion_length": 220.71429443359375, "epoch": 0.2871746031746032, "grad_norm": 0.06685949116945267, "kl": 0.98175448179245, "learning_rate": 4.999954109972209e-07, "loss": 0.001, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4523 }, { "completion_length": 209.85714721679688, "epoch": 0.28723809523809524, "grad_norm": 0.027558114379644394, "kl": 1.0949718952178955, "learning_rate": 4.999953294168006e-07, "loss": 0.0011, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4524 }, { "completion_length": 189.50001525878906, "epoch": 0.2873015873015873, "grad_norm": 2.5839598178863525, "kl": 1.1776885986328125, "learning_rate": 4.999952471176191e-07, "loss": 0.0012, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4525 }, { "completion_length": 230.57144165039062, "epoch": 0.2873650793650794, "grad_norm": 0.026758138090372086, "kl": 0.9315088987350464, "learning_rate": 4.999951640996765e-07, "loss": 0.0009, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4526 }, { "completion_length": 236.21429443359375, "epoch": 0.2874285714285714, "grad_norm": 0.05046243593096733, "kl": 0.9260435700416565, "learning_rate": 4.999950803629732e-07, "loss": 0.0009, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4527 }, { "completion_length": 213.1428680419922, "epoch": 0.28749206349206347, "grad_norm": 0.03527776524424553, "kl": 0.9855396151542664, "learning_rate": 4.999949959075093e-07, "loss": 0.001, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4528 }, { "completion_length": 222.07144165039062, "epoch": 0.28755555555555556, "grad_norm": 0.03408358618617058, "kl": 0.9433889985084534, "learning_rate": 4.999949107332853e-07, "loss": 0.0009, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4529 }, { "completion_length": 226.7857208251953, "epoch": 0.2876190476190476, "grad_norm": 0.02504662238061428, "kl": 0.8616340160369873, "learning_rate": 4.999948248403012e-07, "loss": 0.0009, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4530 }, { "completion_length": 267.14288330078125, "epoch": 0.2876825396825397, "grad_norm": 0.018906569108366966, "kl": 0.6774017214775085, "learning_rate": 4.999947382285573e-07, "loss": 0.0007, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4531 }, { "completion_length": 225.1428680419922, "epoch": 0.28774603174603175, "grad_norm": 0.019184045493602753, "kl": 0.8646345734596252, "learning_rate": 4.999946508980538e-07, "loss": 0.0009, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4532 }, { "completion_length": 235.07144165039062, "epoch": 0.2878095238095238, "grad_norm": 0.020676277577877045, "kl": 0.8563345670700073, "learning_rate": 4.99994562848791e-07, "loss": 0.0009, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4533 }, { "completion_length": 183.00001525878906, "epoch": 0.2878730158730159, "grad_norm": 1.5373308658599854, "kl": 1.0253437757492065, "learning_rate": 4.999944740807693e-07, "loss": 0.001, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4534 }, { "completion_length": 226.07144165039062, "epoch": 0.28793650793650793, "grad_norm": 0.024552971124649048, "kl": 0.7885817289352417, "learning_rate": 4.999943845939888e-07, "loss": 0.0008, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4535 }, { "completion_length": 220.2857208251953, "epoch": 0.288, "grad_norm": 0.0315864160656929, "kl": 0.9213454723358154, "learning_rate": 4.999942943884498e-07, "loss": 0.0009, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4536 }, { "completion_length": 224.85714721679688, "epoch": 0.2880634920634921, "grad_norm": 0.022550296038389206, "kl": 0.7521594166755676, "learning_rate": 4.999942034641525e-07, "loss": 0.0008, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4537 }, { "completion_length": 178.71429443359375, "epoch": 0.2881269841269841, "grad_norm": 1.2428683042526245, "kl": 1.0887004137039185, "learning_rate": 4.999941118210972e-07, "loss": 0.0011, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4538 }, { "completion_length": 217.6428680419922, "epoch": 0.28819047619047616, "grad_norm": 1.2943187952041626, "kl": 0.7872563004493713, "learning_rate": 4.999940194592841e-07, "loss": 0.0008, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4539 }, { "completion_length": 236.1428680419922, "epoch": 0.28825396825396826, "grad_norm": 0.021161938086152077, "kl": 0.7521269917488098, "learning_rate": 4.999939263787136e-07, "loss": 0.0008, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4540 }, { "completion_length": 250.35714721679688, "epoch": 0.2883174603174603, "grad_norm": 0.02105868235230446, "kl": 0.7070871591567993, "learning_rate": 4.999938325793859e-07, "loss": 0.0007, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4541 }, { "completion_length": 236.21429443359375, "epoch": 0.2883809523809524, "grad_norm": 1.2300406694412231, "kl": 0.7194188237190247, "learning_rate": 4.999937380613014e-07, "loss": 0.0007, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4542 }, { "completion_length": 220.85714721679688, "epoch": 0.28844444444444445, "grad_norm": 1.083013892173767, "kl": 0.833113968372345, "learning_rate": 4.999936428244601e-07, "loss": 0.0008, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4543 }, { "completion_length": 214.7857208251953, "epoch": 0.2885079365079365, "grad_norm": 0.02341564930975437, "kl": 0.7921010255813599, "learning_rate": 4.999935468688624e-07, "loss": 0.0008, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4544 }, { "completion_length": 268.4285888671875, "epoch": 0.2885714285714286, "grad_norm": 0.024792054668068886, "kl": 0.6754072308540344, "learning_rate": 4.999934501945087e-07, "loss": 0.0007, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4545 }, { "completion_length": 256.64288330078125, "epoch": 0.28863492063492063, "grad_norm": 0.028403913602232933, "kl": 0.7243621349334717, "learning_rate": 4.99993352801399e-07, "loss": 0.0007, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4546 }, { "completion_length": 229.1428680419922, "epoch": 0.2886984126984127, "grad_norm": 0.022031130269169807, "kl": 0.7392446398735046, "learning_rate": 4.999932546895339e-07, "loss": 0.0007, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4547 }, { "completion_length": 234.07144165039062, "epoch": 0.2887619047619048, "grad_norm": 0.01617853157222271, "kl": 0.6832769513130188, "learning_rate": 4.999931558589136e-07, "loss": 0.0007, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4548 }, { "completion_length": 221.6428680419922, "epoch": 0.2888253968253968, "grad_norm": 0.020655106753110886, "kl": 0.7518410086631775, "learning_rate": 4.999930563095381e-07, "loss": 0.0008, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4549 }, { "completion_length": 217.07144165039062, "epoch": 0.28888888888888886, "grad_norm": 0.020552415400743484, "kl": 0.7515419721603394, "learning_rate": 4.999929560414081e-07, "loss": 0.0008, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4550 }, { "completion_length": 221.7857208251953, "epoch": 0.28895238095238096, "grad_norm": 0.01786130666732788, "kl": 0.7170286774635315, "learning_rate": 4.999928550545236e-07, "loss": 0.0007, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4551 }, { "completion_length": 217.35714721679688, "epoch": 0.289015873015873, "grad_norm": 1.266353726387024, "kl": 0.8085719347000122, "learning_rate": 4.99992753348885e-07, "loss": 0.0008, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4552 }, { "completion_length": 238.85714721679688, "epoch": 0.2890793650793651, "grad_norm": 0.022397099062800407, "kl": 0.683515191078186, "learning_rate": 4.999926509244925e-07, "loss": 0.0007, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4553 }, { "completion_length": 224.9285888671875, "epoch": 0.28914285714285715, "grad_norm": 1.672434687614441, "kl": 0.8076632618904114, "learning_rate": 4.999925477813465e-07, "loss": 0.0008, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4554 }, { "completion_length": 166.2857208251953, "epoch": 0.2892063492063492, "grad_norm": 0.024332914501428604, "kl": 0.9684084057807922, "learning_rate": 4.999924439194474e-07, "loss": 0.001, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4555 }, { "completion_length": 236.71429443359375, "epoch": 0.2892698412698413, "grad_norm": 0.021625975146889687, "kl": 0.685332715511322, "learning_rate": 4.999923393387952e-07, "loss": 0.0007, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4556 }, { "completion_length": 261.0, "epoch": 0.28933333333333333, "grad_norm": 0.024591388180851936, "kl": 0.6883918642997742, "learning_rate": 4.999922340393905e-07, "loss": 0.0007, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4557 }, { "completion_length": 226.7857208251953, "epoch": 0.2893968253968254, "grad_norm": 0.019582469016313553, "kl": 0.6691152453422546, "learning_rate": 4.999921280212333e-07, "loss": 0.0007, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4558 }, { "completion_length": 234.00001525878906, "epoch": 0.2894603174603175, "grad_norm": 0.01999831199645996, "kl": 0.714259684085846, "learning_rate": 4.999920212843242e-07, "loss": 0.0007, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4559 }, { "completion_length": 234.7857208251953, "epoch": 0.2895238095238095, "grad_norm": 0.01651042513549328, "kl": 0.7111231684684753, "learning_rate": 4.999919138286633e-07, "loss": 0.0007, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4560 }, { "completion_length": 205.57144165039062, "epoch": 0.28958730158730156, "grad_norm": 0.020107440650463104, "kl": 0.7530276775360107, "learning_rate": 4.999918056542511e-07, "loss": 0.0008, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4561 }, { "completion_length": 267.0714416503906, "epoch": 0.28965079365079366, "grad_norm": 0.780341625213623, "kl": 0.7648287415504456, "learning_rate": 4.999916967610877e-07, "loss": 0.0008, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4562 }, { "completion_length": 272.0714416503906, "epoch": 0.2897142857142857, "grad_norm": 0.01711159199476242, "kl": 0.5921506881713867, "learning_rate": 4.999915871491736e-07, "loss": 0.0006, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4563 }, { "completion_length": 211.07144165039062, "epoch": 0.2897777777777778, "grad_norm": 0.020329652354121208, "kl": 0.7417401075363159, "learning_rate": 4.99991476818509e-07, "loss": 0.0007, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4564 }, { "completion_length": 201.07144165039062, "epoch": 0.28984126984126984, "grad_norm": 0.019474251195788383, "kl": 0.7700560092926025, "learning_rate": 4.999913657690942e-07, "loss": 0.0008, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4565 }, { "completion_length": 235.35714721679688, "epoch": 0.2899047619047619, "grad_norm": 0.015260864980518818, "kl": 0.6405893564224243, "learning_rate": 4.999912540009295e-07, "loss": 0.0006, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4566 }, { "completion_length": 260.21429443359375, "epoch": 0.289968253968254, "grad_norm": 0.013792893849313259, "kl": 0.5877964496612549, "learning_rate": 4.999911415140154e-07, "loss": 0.0006, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4567 }, { "completion_length": 163.1428680419922, "epoch": 0.29003174603174603, "grad_norm": 0.019075507298111916, "kl": 0.9297053813934326, "learning_rate": 4.999910283083521e-07, "loss": 0.0009, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4568 }, { "completion_length": 244.1428680419922, "epoch": 0.2900952380952381, "grad_norm": 0.01613607443869114, "kl": 0.6019994020462036, "learning_rate": 4.9999091438394e-07, "loss": 0.0006, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4569 }, { "completion_length": 225.4285888671875, "epoch": 0.29015873015873017, "grad_norm": 1.72795569896698, "kl": 0.6909247636795044, "learning_rate": 4.999907997407793e-07, "loss": 0.0007, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4570 }, { "completion_length": 208.07144165039062, "epoch": 0.2902222222222222, "grad_norm": 0.013647167012095451, "kl": 0.6706145405769348, "learning_rate": 4.999906843788703e-07, "loss": 0.0007, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4571 }, { "completion_length": 197.35714721679688, "epoch": 0.29028571428571426, "grad_norm": 0.018158623948693275, "kl": 0.708670973777771, "learning_rate": 4.999905682982135e-07, "loss": 0.0007, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4572 }, { "completion_length": 204.07144165039062, "epoch": 0.29034920634920636, "grad_norm": 1.6650420427322388, "kl": 0.7227233052253723, "learning_rate": 4.999904514988092e-07, "loss": 0.0007, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_originality_func": 0.8571429252624512, "step": 4573 }, { "completion_length": 213.1428680419922, "epoch": 0.2904126984126984, "grad_norm": 1.303656816482544, "kl": 0.7018543481826782, "learning_rate": 4.999903339806576e-07, "loss": 0.0007, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4574 }, { "completion_length": 239.1428680419922, "epoch": 0.2904761904761905, "grad_norm": 0.8987554907798767, "kl": 0.5869736075401306, "learning_rate": 4.999902157437591e-07, "loss": 0.0006, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4575 }, { "completion_length": 203.6428680419922, "epoch": 0.29053968253968254, "grad_norm": 0.019829554483294487, "kl": 0.8226071000099182, "learning_rate": 4.999900967881142e-07, "loss": 0.0008, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4576 }, { "completion_length": 225.4285888671875, "epoch": 0.2906031746031746, "grad_norm": 0.013086114078760147, "kl": 0.6628443598747253, "learning_rate": 4.99989977113723e-07, "loss": 0.0007, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4577 }, { "completion_length": 222.21429443359375, "epoch": 0.2906666666666667, "grad_norm": 1.4449797868728638, "kl": 0.7067251801490784, "learning_rate": 4.999898567205861e-07, "loss": 0.0007, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4578 }, { "completion_length": 249.9285888671875, "epoch": 0.2907301587301587, "grad_norm": 0.013703769072890282, "kl": 0.6110812425613403, "learning_rate": 4.999897356087036e-07, "loss": 0.0006, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4579 }, { "completion_length": 228.00001525878906, "epoch": 0.29079365079365077, "grad_norm": 0.023254873231053352, "kl": 0.6508129835128784, "learning_rate": 4.99989613778076e-07, "loss": 0.0007, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4580 }, { "completion_length": 216.42857360839844, "epoch": 0.29085714285714287, "grad_norm": 0.01586894504725933, "kl": 0.6630246639251709, "learning_rate": 4.999894912287035e-07, "loss": 0.0007, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4581 }, { "completion_length": 184.21429443359375, "epoch": 0.2909206349206349, "grad_norm": 0.016700485721230507, "kl": 0.8179503083229065, "learning_rate": 4.999893679605867e-07, "loss": 0.0008, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_originality_func": 0.8571429252624512, "step": 4582 }, { "completion_length": 226.7857208251953, "epoch": 0.290984126984127, "grad_norm": 0.019282931461930275, "kl": 0.6849563121795654, "learning_rate": 4.999892439737257e-07, "loss": 0.0007, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4583 }, { "completion_length": 192.85714721679688, "epoch": 0.29104761904761905, "grad_norm": 1.2589662075042725, "kl": 0.8266213536262512, "learning_rate": 4.99989119268121e-07, "loss": 0.0008, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4584 }, { "completion_length": 240.21429443359375, "epoch": 0.2911111111111111, "grad_norm": 1.0729355812072754, "kl": 0.6302468776702881, "learning_rate": 4.999889938437729e-07, "loss": 0.0006, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4585 }, { "completion_length": 206.2857208251953, "epoch": 0.2911746031746032, "grad_norm": 1.4106168746948242, "kl": 0.8089088201522827, "learning_rate": 4.999888677006818e-07, "loss": 0.0008, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4586 }, { "completion_length": 219.21429443359375, "epoch": 0.29123809523809524, "grad_norm": 1.7675431966781616, "kl": 0.7041475176811218, "learning_rate": 4.99988740838848e-07, "loss": 0.0007, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4587 }, { "completion_length": 253.1428680419922, "epoch": 0.2913015873015873, "grad_norm": 0.013730177655816078, "kl": 0.6595959067344666, "learning_rate": 4.999886132582721e-07, "loss": 0.0007, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4588 }, { "completion_length": 205.57144165039062, "epoch": 0.2913650793650794, "grad_norm": 0.09986013174057007, "kl": 0.747195839881897, "learning_rate": 4.999884849589541e-07, "loss": 0.0007, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4589 }, { "completion_length": 228.2857208251953, "epoch": 0.2914285714285714, "grad_norm": 0.01671597734093666, "kl": 0.7372283339500427, "learning_rate": 4.999883559408944e-07, "loss": 0.0007, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4590 }, { "completion_length": 166.92857360839844, "epoch": 0.29149206349206347, "grad_norm": 0.023339593783020973, "kl": 0.9171629548072815, "learning_rate": 4.999882262040939e-07, "loss": 0.0009, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4591 }, { "completion_length": 227.00001525878906, "epoch": 0.29155555555555557, "grad_norm": 1.2568261623382568, "kl": 0.6502583622932434, "learning_rate": 4.999880957485523e-07, "loss": 0.0007, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4592 }, { "completion_length": 218.42857360839844, "epoch": 0.2916190476190476, "grad_norm": 0.015203270129859447, "kl": 0.6820968985557556, "learning_rate": 4.999879645742704e-07, "loss": 0.0007, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4593 }, { "completion_length": 210.85714721679688, "epoch": 0.2916825396825397, "grad_norm": 0.016200793907046318, "kl": 0.7076600193977356, "learning_rate": 4.999878326812484e-07, "loss": 0.0007, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4594 }, { "completion_length": 267.5, "epoch": 0.29174603174603175, "grad_norm": 0.0120950136333704, "kl": 0.5941957831382751, "learning_rate": 4.999877000694867e-07, "loss": 0.0006, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4595 }, { "completion_length": 237.35714721679688, "epoch": 0.2918095238095238, "grad_norm": 0.01566070131957531, "kl": 0.6173586845397949, "learning_rate": 4.999875667389857e-07, "loss": 0.0006, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4596 }, { "completion_length": 231.50001525878906, "epoch": 0.2918730158730159, "grad_norm": 1.046263575553894, "kl": 0.6764680743217468, "learning_rate": 4.999874326897458e-07, "loss": 0.0007, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4597 }, { "completion_length": 248.2857208251953, "epoch": 0.29193650793650794, "grad_norm": 0.05823395028710365, "kl": 0.6653379201889038, "learning_rate": 4.999872979217675e-07, "loss": 0.0007, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4598 }, { "completion_length": 265.0714416503906, "epoch": 0.292, "grad_norm": 1.7151906490325928, "kl": 0.6594507098197937, "learning_rate": 4.999871624350509e-07, "loss": 0.0007, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_originality_func": 0.9285714626312256, "step": 4599 }, { "completion_length": 253.35714721679688, "epoch": 0.2920634920634921, "grad_norm": 0.020651085302233696, "kl": 0.7282385230064392, "learning_rate": 4.999870262295966e-07, "loss": 0.0007, "reward": 1.0, "reward_std": 0.0, "rewards/check_originality_func": 1.0, "step": 4600 } ], "logging_steps": 1, "max_steps": 63000, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }