| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "episode": 9264, |
| "epoch": 0.254128490700609, |
| "eval_steps": 500, |
| "global_step": 579, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "episode": 16, |
| "epoch": 0.00043890931036374606, |
| "loss/policy_avg": -0.012268156744539738, |
| "lr": 3e-05, |
| "objective/entropy": 167.71548461914062, |
| "objective/kl": 0.21667994558811188, |
| "objective/non_score_reward": -0.021668005734682083, |
| "objective/rlhf_reward": 4.313327980786562, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 0.7804256677627563, |
| "policy/clipfrac_avg": 1.5, |
| "policy/entropy_avg": 0.7981860041618347, |
| "step": 0, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 1, |
| "val/ratio": 2.002265691757202 |
| }, |
| { |
| "episode": 32, |
| "epoch": 0.0008778186207274921, |
| "loss/policy_avg": 0.28600460290908813, |
| "lr": 2.9973684210526316e-05, |
| "objective/entropy": 178.8330535888672, |
| "objective/kl": 12.701642990112305, |
| "objective/non_score_reward": -1.2701644897460938, |
| "objective/rlhf_reward": -0.6806579291820523, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 300.53009033203125, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.7146598100662231, |
| "step": 1, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 0, |
| "val/ratio": 2.010622978210449 |
| }, |
| { |
| "episode": 48, |
| "epoch": 0.0013167279310912384, |
| "loss/policy_avg": -0.22723183035850525, |
| "lr": 2.994736842105263e-05, |
| "objective/entropy": -15.617034912109375, |
| "objective/kl": 6.93002986907959, |
| "objective/non_score_reward": -0.6930029392242432, |
| "objective/rlhf_reward": -2.37201172709465, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 79.79334259033203, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.7464854717254639, |
| "step": 2, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 2, |
| "val/ratio": 1.9983209371566772 |
| }, |
| { |
| "episode": 64, |
| "epoch": 0.0017556372414549843, |
| "loss/policy_avg": 0.8261077404022217, |
| "lr": 2.992105263157895e-05, |
| "objective/entropy": 117.23881530761719, |
| "objective/kl": 12.038005828857422, |
| "objective/non_score_reward": -1.2038006782531738, |
| "objective/rlhf_reward": -0.4152026534080502, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 105.13363647460938, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.49201399087905884, |
| "step": 3, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 0, |
| "val/ratio": 1.9965429306030273 |
| }, |
| { |
| "episode": 80, |
| "epoch": 0.0021945465518187304, |
| "loss/policy_avg": 0.11680221557617188, |
| "lr": 2.9894736842105264e-05, |
| "objective/entropy": 126.83998107910156, |
| "objective/kl": 22.160707473754883, |
| "objective/non_score_reward": -2.2160706520080566, |
| "objective/rlhf_reward": -4.46428314447403, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 121.82243347167969, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.680046796798706, |
| "step": 4, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 2, |
| "val/ratio": 1.9965834617614746 |
| }, |
| { |
| "episode": 96, |
| "epoch": 0.0026334558621824767, |
| "loss/policy_avg": 0.6160796284675598, |
| "lr": 2.986842105263158e-05, |
| "objective/entropy": 109.90502166748047, |
| "objective/kl": 18.486812591552734, |
| "objective/non_score_reward": -1.8486812114715576, |
| "objective/rlhf_reward": -2.9947244882583615, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 164.01425170898438, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.6090853810310364, |
| "step": 5, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 2, |
| "val/ratio": 1.9926069974899292 |
| }, |
| { |
| "episode": 112, |
| "epoch": 0.0030723651725462226, |
| "loss/policy_avg": 0.4473002552986145, |
| "lr": 2.9842105263157894e-05, |
| "objective/entropy": 171.5350341796875, |
| "objective/kl": 17.367782592773438, |
| "objective/non_score_reward": -1.7367782592773438, |
| "objective/rlhf_reward": -6.547113275527954, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 34.7076301574707, |
| "policy/clipfrac_avg": 0.75, |
| "policy/entropy_avg": 0.65798020362854, |
| "step": 6, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 0, |
| "val/ratio": 1.9968626499176025 |
| }, |
| { |
| "episode": 128, |
| "epoch": 0.0035112744829099685, |
| "loss/policy_avg": 0.19109274446964264, |
| "lr": 2.9815789473684212e-05, |
| "objective/entropy": 134.9733428955078, |
| "objective/kl": 25.905290603637695, |
| "objective/non_score_reward": -2.590528964996338, |
| "objective/rlhf_reward": -8.414705256895957, |
| "objective/scores": 0.4868528072345416, |
| "policy/approxkl_avg": 122.29130554199219, |
| "policy/clipfrac_avg": 1.5, |
| "policy/entropy_avg": 0.5049231052398682, |
| "step": 7, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 0, |
| "val/ratio": 1.9954898357391357 |
| }, |
| { |
| "episode": 144, |
| "epoch": 0.003950183793273714, |
| "loss/policy_avg": -0.12436092644929886, |
| "lr": 2.9789473684210527e-05, |
| "objective/entropy": 199.00608825683594, |
| "objective/kl": 45.45051574707031, |
| "objective/non_score_reward": -4.545051574707031, |
| "objective/rlhf_reward": -13.78020534515381, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 3.8954527378082275, |
| "policy/clipfrac_avg": 1.5, |
| "policy/entropy_avg": 0.7475897073745728, |
| "step": 8, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 0, |
| "val/ratio": 2.000213623046875 |
| }, |
| { |
| "episode": 160, |
| "epoch": 0.004389093103637461, |
| "loss/policy_avg": 0.7675253748893738, |
| "lr": 2.9763157894736842e-05, |
| "objective/entropy": 142.65106201171875, |
| "objective/kl": 58.533485412597656, |
| "objective/non_score_reward": -5.853349208831787, |
| "objective/rlhf_reward": -23.01339683532715, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 63.6219596862793, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.5712227821350098, |
| "step": 9, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 0, |
| "val/ratio": 1.9968998432159424 |
| }, |
| { |
| "episode": 176, |
| "epoch": 0.004828002414001207, |
| "loss/policy_avg": 0.6077067852020264, |
| "lr": 2.9736842105263157e-05, |
| "objective/entropy": 187.40093994140625, |
| "objective/kl": 55.569950103759766, |
| "objective/non_score_reward": -5.556994915008545, |
| "objective/rlhf_reward": -17.82797966003418, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 6.850739002227783, |
| "policy/clipfrac_avg": 0.75, |
| "policy/entropy_avg": 0.7330949902534485, |
| "step": 10, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 0, |
| "val/ratio": 1.9972755908966064 |
| }, |
| { |
| "episode": 192, |
| "epoch": 0.005266911724364953, |
| "loss/policy_avg": 0.9427204132080078, |
| "lr": 2.9710526315789472e-05, |
| "objective/entropy": 169.49903869628906, |
| "objective/kl": 43.81541442871094, |
| "objective/non_score_reward": -4.3815412521362305, |
| "objective/rlhf_reward": -19.526166915893555, |
| "objective/scores": -0.5, |
| "policy/approxkl_avg": 1.3010352849960327, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.637176513671875, |
| "step": 11, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 0, |
| "val/ratio": 2.003171920776367 |
| }, |
| { |
| "episode": 208, |
| "epoch": 0.005705821034728699, |
| "loss/policy_avg": 0.3998699188232422, |
| "lr": 2.968421052631579e-05, |
| "objective/entropy": 136.76266479492188, |
| "objective/kl": 48.54095458984375, |
| "objective/non_score_reward": -4.854095935821533, |
| "objective/rlhf_reward": -15.016384339332582, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 33.5024528503418, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.5481820106506348, |
| "step": 12, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 0, |
| "val/ratio": 1.9996936321258545 |
| }, |
| { |
| "episode": 224, |
| "epoch": 0.006144730345092445, |
| "loss/policy_avg": 0.1956150382757187, |
| "lr": 2.9657894736842106e-05, |
| "objective/entropy": 136.42724609375, |
| "objective/kl": 57.16869354248047, |
| "objective/non_score_reward": -5.716869354248047, |
| "objective/rlhf_reward": -18.46747741699219, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 7.101135730743408, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.5565441250801086, |
| "step": 13, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 1, |
| "val/ratio": 2.000586748123169 |
| }, |
| { |
| "episode": 240, |
| "epoch": 0.0065836396554561916, |
| "loss/policy_avg": 1.4479000568389893, |
| "lr": 2.963157894736842e-05, |
| "objective/entropy": 180.2125244140625, |
| "objective/kl": 33.086238861083984, |
| "objective/non_score_reward": -3.308624029159546, |
| "objective/rlhf_reward": -15.234495162963867, |
| "objective/scores": -0.5, |
| "policy/approxkl_avg": 3.269646406173706, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.726157546043396, |
| "step": 14, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 0, |
| "val/ratio": 2.0014841556549072 |
| }, |
| { |
| "episode": 256, |
| "epoch": 0.007022548965819937, |
| "loss/policy_avg": -0.2384345531463623, |
| "lr": 2.9605263157894735e-05, |
| "objective/entropy": 148.7078399658203, |
| "objective/kl": 47.15727996826172, |
| "objective/non_score_reward": -4.715727806091309, |
| "objective/rlhf_reward": -16.740206422583135, |
| "objective/scores": 0.5306765580733931, |
| "policy/approxkl_avg": 148.09915161132812, |
| "policy/clipfrac_avg": 1.5, |
| "policy/entropy_avg": 0.5780054330825806, |
| "step": 15, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 0, |
| "val/ratio": 1.997875690460205 |
| }, |
| { |
| "episode": 272, |
| "epoch": 0.007461458276183683, |
| "loss/policy_avg": 0.8147653341293335, |
| "lr": 2.957894736842105e-05, |
| "objective/entropy": 171.16403198242188, |
| "objective/kl": 43.11994934082031, |
| "objective/non_score_reward": -4.311994552612305, |
| "objective/rlhf_reward": -19.24797821044922, |
| "objective/scores": -0.5, |
| "policy/approxkl_avg": 8.860128402709961, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.6768981218338013, |
| "step": 16, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 0, |
| "val/ratio": 1.9993420839309692 |
| }, |
| { |
| "episode": 288, |
| "epoch": 0.007900367586547429, |
| "loss/policy_avg": 0.5893005728721619, |
| "lr": 2.955263157894737e-05, |
| "objective/entropy": 123.02507781982422, |
| "objective/kl": 49.891517639160156, |
| "objective/non_score_reward": -4.9891510009765625, |
| "objective/rlhf_reward": -17.032885228039, |
| "objective/scores": 0.7309297535714575, |
| "policy/approxkl_avg": 6.687857151031494, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.4659116864204407, |
| "step": 17, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 0, |
| "val/ratio": 2.002612352371216 |
| }, |
| { |
| "episode": 304, |
| "epoch": 0.008339276896911175, |
| "loss/policy_avg": 2.1211345195770264, |
| "lr": 2.9526315789473684e-05, |
| "objective/entropy": -174.25625610351562, |
| "objective/kl": 35.093326568603516, |
| "objective/non_score_reward": -3.5093321800231934, |
| "objective/rlhf_reward": -16.037328720092773, |
| "objective/scores": -0.5, |
| "policy/approxkl_avg": 1.4923981428146362, |
| "policy/clipfrac_avg": 0.5, |
| "policy/entropy_avg": 0.6755027770996094, |
| "step": 18, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 2, |
| "val/ratio": 1.9988504648208618 |
| }, |
| { |
| "episode": 320, |
| "epoch": 0.008778186207274922, |
| "loss/policy_avg": 4.73185396194458, |
| "lr": 2.95e-05, |
| "objective/entropy": 155.82408142089844, |
| "objective/kl": 64.35771179199219, |
| "objective/non_score_reward": -6.4357709884643555, |
| "objective/rlhf_reward": -25.343085741996767, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 19.07648468017578, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.6366579532623291, |
| "step": 19, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 1, |
| "val/ratio": 2.0000834465026855 |
| }, |
| { |
| "episode": 336, |
| "epoch": 0.009217095517638668, |
| "loss/policy_avg": 0.7567238211631775, |
| "lr": 2.9473684210526314e-05, |
| "objective/entropy": 134.41641235351562, |
| "objective/kl": 29.073949813842773, |
| "objective/non_score_reward": -2.9073948860168457, |
| "objective/rlhf_reward": -13.629579544067383, |
| "objective/scores": -0.5, |
| "policy/approxkl_avg": 550.8251953125, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.5258337259292603, |
| "step": 20, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 0, |
| "val/ratio": 1.9921021461486816 |
| }, |
| { |
| "episode": 352, |
| "epoch": 0.009656004828002414, |
| "loss/policy_avg": 0.7278516888618469, |
| "lr": 2.9447368421052635e-05, |
| "objective/entropy": 156.93792724609375, |
| "objective/kl": 41.105464935302734, |
| "objective/non_score_reward": -4.110546588897705, |
| "objective/rlhf_reward": -18.44218635559082, |
| "objective/scores": -0.5, |
| "policy/approxkl_avg": 242.4786376953125, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.6374942064285278, |
| "step": 21, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 0, |
| "val/ratio": 1.9990363121032715 |
| }, |
| { |
| "episode": 368, |
| "epoch": 0.01009491413836616, |
| "loss/policy_avg": -0.12158381938934326, |
| "lr": 2.942105263157895e-05, |
| "objective/entropy": 162.37814331054688, |
| "objective/kl": 47.4278450012207, |
| "objective/non_score_reward": -4.74278450012207, |
| "objective/rlhf_reward": -20.97113800048828, |
| "objective/scores": -0.5, |
| "policy/approxkl_avg": 1.3796932697296143, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.6018426418304443, |
| "step": 22, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 0, |
| "val/ratio": 2.00181245803833 |
| }, |
| { |
| "episode": 384, |
| "epoch": 0.010533823448729907, |
| "loss/policy_avg": 1.9867658615112305, |
| "lr": 2.9394736842105265e-05, |
| "objective/entropy": 164.5052947998047, |
| "objective/kl": 66.38179779052734, |
| "objective/non_score_reward": -6.638179779052734, |
| "objective/rlhf_reward": -22.1527184009552, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 35.980594635009766, |
| "policy/clipfrac_avg": 0.75, |
| "policy/entropy_avg": 0.6241673231124878, |
| "step": 23, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 0, |
| "val/ratio": 1.9957327842712402 |
| }, |
| { |
| "episode": 400, |
| "epoch": 0.010972732759093651, |
| "loss/policy_avg": -0.5419085025787354, |
| "lr": 2.936842105263158e-05, |
| "objective/entropy": 145.57208251953125, |
| "objective/kl": 43.03409957885742, |
| "objective/non_score_reward": -4.303410053253174, |
| "objective/rlhf_reward": -15.551780229032623, |
| "objective/scores": 0.41546487678572874, |
| "policy/approxkl_avg": 163.27047729492188, |
| "policy/clipfrac_avg": 1.5, |
| "policy/entropy_avg": 0.547484815120697, |
| "step": 24, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 0, |
| "val/ratio": 2.0120882987976074 |
| }, |
| { |
| "episode": 416, |
| "epoch": 0.011411642069457398, |
| "loss/policy_avg": -0.24992617964744568, |
| "lr": 2.9342105263157895e-05, |
| "objective/entropy": 127.15754699707031, |
| "objective/kl": 41.97152328491211, |
| "objective/non_score_reward": -4.197152137756348, |
| "objective/rlhf_reward": -18.78860855102539, |
| "objective/scores": -0.5, |
| "policy/approxkl_avg": 3.1116750240325928, |
| "policy/clipfrac_avg": 1.5, |
| "policy/entropy_avg": 0.5002591609954834, |
| "step": 25, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 0, |
| "val/ratio": 2.001451015472412 |
| }, |
| { |
| "episode": 432, |
| "epoch": 0.011850551379821144, |
| "loss/policy_avg": 1.5105552673339844, |
| "lr": 2.9315789473684214e-05, |
| "objective/entropy": 197.1464080810547, |
| "objective/kl": 56.54367446899414, |
| "objective/non_score_reward": -5.654367923736572, |
| "objective/rlhf_reward": -24.61747169494629, |
| "objective/scores": -0.5, |
| "policy/approxkl_avg": 3.033173084259033, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.7858256101608276, |
| "step": 26, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 0, |
| "val/ratio": 1.9994834661483765 |
| }, |
| { |
| "episode": 448, |
| "epoch": 0.01228946069018489, |
| "loss/policy_avg": 1.556326985359192, |
| "lr": 2.928947368421053e-05, |
| "objective/entropy": 142.51620483398438, |
| "objective/kl": 45.598716735839844, |
| "objective/non_score_reward": -4.559871673583984, |
| "objective/rlhf_reward": -13.83948621749878, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 282.253173828125, |
| "policy/clipfrac_avg": 0.75, |
| "policy/entropy_avg": 0.6140550374984741, |
| "step": 27, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 2, |
| "val/ratio": 1.998161792755127 |
| }, |
| { |
| "episode": 464, |
| "epoch": 0.012728370000548637, |
| "loss/policy_avg": 0.05400470644235611, |
| "lr": 2.9263157894736844e-05, |
| "objective/entropy": 133.95001220703125, |
| "objective/kl": 27.514503479003906, |
| "objective/non_score_reward": -2.751450538635254, |
| "objective/rlhf_reward": -13.005802154541016, |
| "objective/scores": -0.5, |
| "policy/approxkl_avg": 1.0789940357208252, |
| "policy/clipfrac_avg": 0.5, |
| "policy/entropy_avg": 0.6318522691726685, |
| "step": 28, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 1, |
| "val/ratio": 2.0006635189056396 |
| }, |
| { |
| "episode": 480, |
| "epoch": 0.013167279310912383, |
| "loss/policy_avg": -0.2949943244457245, |
| "lr": 2.923684210526316e-05, |
| "objective/entropy": 147.1396942138672, |
| "objective/kl": 54.152565002441406, |
| "objective/non_score_reward": -5.415256500244141, |
| "objective/rlhf_reward": -21.261026477813722, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 14.0246000289917, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.5865544676780701, |
| "step": 29, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 0, |
| "val/ratio": 2.000847339630127 |
| }, |
| { |
| "episode": 496, |
| "epoch": 0.01360618862127613, |
| "loss/policy_avg": 0.7968569397926331, |
| "lr": 2.9210526315789474e-05, |
| "objective/entropy": 171.741943359375, |
| "objective/kl": 41.975685119628906, |
| "objective/non_score_reward": -4.197568893432617, |
| "objective/rlhf_reward": -18.79027557373047, |
| "objective/scores": -0.5, |
| "policy/approxkl_avg": 186.8196563720703, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.6779192090034485, |
| "step": 30, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 0, |
| "val/ratio": 2.001958131790161 |
| }, |
| { |
| "episode": 512, |
| "epoch": 0.014045097931639874, |
| "loss/policy_avg": 0.456326961517334, |
| "lr": 2.9184210526315792e-05, |
| "objective/entropy": 131.41111755371094, |
| "objective/kl": 44.016998291015625, |
| "objective/non_score_reward": -4.401699542999268, |
| "objective/rlhf_reward": -13.206798887252809, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 2.3007960319519043, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.6488356590270996, |
| "step": 31, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 1, |
| "val/ratio": 1.9991300106048584 |
| }, |
| { |
| "episode": 528, |
| "epoch": 0.01448400724200362, |
| "loss/policy_avg": 0.5430006980895996, |
| "lr": 2.9157894736842107e-05, |
| "objective/entropy": 190.74668884277344, |
| "objective/kl": 42.17557907104492, |
| "objective/non_score_reward": -4.217557907104492, |
| "objective/rlhf_reward": -18.87023162841797, |
| "objective/scores": -0.5, |
| "policy/approxkl_avg": 224.27468872070312, |
| "policy/clipfrac_avg": 1.5, |
| "policy/entropy_avg": 0.6937891840934753, |
| "step": 32, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 1, |
| "val/ratio": 1.999834656715393 |
| }, |
| { |
| "episode": 544, |
| "epoch": 0.014922916552367367, |
| "loss/policy_avg": 0.9642138481140137, |
| "lr": 2.9131578947368422e-05, |
| "objective/entropy": 192.7371826171875, |
| "objective/kl": 72.31754302978516, |
| "objective/non_score_reward": -7.231754302978516, |
| "objective/rlhf_reward": -24.527017211914064, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 4.005494117736816, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.7295612692832947, |
| "step": 33, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 0, |
| "val/ratio": 1.9996287822723389 |
| }, |
| { |
| "episode": 560, |
| "epoch": 0.015361825862731113, |
| "loss/policy_avg": 0.09143030643463135, |
| "lr": 2.9105263157894737e-05, |
| "objective/entropy": 226.00430297851562, |
| "objective/kl": 47.70861053466797, |
| "objective/non_score_reward": -4.7708611488342285, |
| "objective/rlhf_reward": -21.083444595336914, |
| "objective/scores": -0.5, |
| "policy/approxkl_avg": 8.21639633178711, |
| "policy/clipfrac_avg": 1.5, |
| "policy/entropy_avg": 0.8707884550094604, |
| "step": 34, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 0, |
| "val/ratio": 2.0034539699554443 |
| }, |
| { |
| "episode": 576, |
| "epoch": 0.015800735173094858, |
| "loss/policy_avg": 0.8646840453147888, |
| "lr": 2.9078947368421055e-05, |
| "objective/entropy": 120.56684875488281, |
| "objective/kl": 43.689491271972656, |
| "objective/non_score_reward": -4.3689494132995605, |
| "objective/rlhf_reward": -19.475799560546875, |
| "objective/scores": -0.5, |
| "policy/approxkl_avg": 3.5479981899261475, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.6465896964073181, |
| "step": 35, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 2, |
| "val/ratio": 1.9970377683639526 |
| }, |
| { |
| "episode": 592, |
| "epoch": 0.016239644483458604, |
| "loss/policy_avg": -0.38367098569869995, |
| "lr": 2.905263157894737e-05, |
| "objective/entropy": 154.9678955078125, |
| "objective/kl": 35.52610397338867, |
| "objective/non_score_reward": -3.5526108741760254, |
| "objective/rlhf_reward": -9.810442781448366, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 4.522809982299805, |
| "policy/clipfrac_avg": 1.5, |
| "policy/entropy_avg": 0.6246684789657593, |
| "step": 36, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 1, |
| "val/ratio": 2.0017809867858887 |
| }, |
| { |
| "episode": 608, |
| "epoch": 0.01667855379382235, |
| "loss/policy_avg": 0.15106165409088135, |
| "lr": 2.9026315789473685e-05, |
| "objective/entropy": 146.68177795410156, |
| "objective/kl": 53.049625396728516, |
| "objective/non_score_reward": -5.304962635040283, |
| "objective/rlhf_reward": -16.819850540161134, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 3.3007020950317383, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.7446657419204712, |
| "step": 37, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 2, |
| "val/ratio": 2.0011558532714844 |
| }, |
| { |
| "episode": 624, |
| "epoch": 0.017117463104186097, |
| "loss/policy_avg": -0.546560525894165, |
| "lr": 2.9e-05, |
| "objective/entropy": 173.34613037109375, |
| "objective/kl": 52.43684005737305, |
| "objective/non_score_reward": -5.243683815002441, |
| "objective/rlhf_reward": -22.974735260009766, |
| "objective/scores": -0.5, |
| "policy/approxkl_avg": 169.8699188232422, |
| "policy/clipfrac_avg": 1.75, |
| "policy/entropy_avg": 0.6890407800674438, |
| "step": 38, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 1, |
| "val/ratio": 2.007899761199951 |
| }, |
| { |
| "episode": 640, |
| "epoch": 0.017556372414549843, |
| "loss/policy_avg": 0.015053331851959229, |
| "lr": 2.8973684210526315e-05, |
| "objective/entropy": 173.69979858398438, |
| "objective/kl": 40.57459259033203, |
| "objective/non_score_reward": -4.057459354400635, |
| "objective/rlhf_reward": -18.22983741760254, |
| "objective/scores": -0.5, |
| "policy/approxkl_avg": 7.52875280380249, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.9174096584320068, |
| "step": 39, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 2, |
| "val/ratio": 1.9973030090332031 |
| }, |
| { |
| "episode": 656, |
| "epoch": 0.01799528172491359, |
| "loss/policy_avg": -0.5114358067512512, |
| "lr": 2.8947368421052634e-05, |
| "objective/entropy": -104.62008666992188, |
| "objective/kl": 22.244895935058594, |
| "objective/non_score_reward": -2.224490165710449, |
| "objective/rlhf_reward": -4.49795994758606, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 138.92034912109375, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.7481294274330139, |
| "step": 40, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 5, |
| "val/ratio": 1.997785210609436 |
| }, |
| { |
| "episode": 672, |
| "epoch": 0.018434191035277336, |
| "loss/policy_avg": 0.7354981899261475, |
| "lr": 2.892105263157895e-05, |
| "objective/entropy": 135.70901489257812, |
| "objective/kl": 39.70330047607422, |
| "objective/non_score_reward": -3.970329999923706, |
| "objective/rlhf_reward": -15.481319880485536, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 192.59478759765625, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.7106625437736511, |
| "step": 41, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 2, |
| "val/ratio": 1.9986684322357178 |
| }, |
| { |
| "episode": 688, |
| "epoch": 0.018873100345641082, |
| "loss/policy_avg": -0.014256155118346214, |
| "lr": 2.8894736842105263e-05, |
| "objective/entropy": 128.75439453125, |
| "objective/kl": 49.399986267089844, |
| "objective/non_score_reward": -4.939998626708984, |
| "objective/rlhf_reward": -19.359994983673097, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 0.645370364189148, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.7817822098731995, |
| "step": 42, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 2, |
| "val/ratio": 2.00180983543396 |
| }, |
| { |
| "episode": 704, |
| "epoch": 0.01931200965600483, |
| "loss/policy_avg": 1.0903524160385132, |
| "lr": 2.886842105263158e-05, |
| "objective/entropy": 124.43045043945312, |
| "objective/kl": 33.735557556152344, |
| "objective/non_score_reward": -3.3735556602478027, |
| "objective/rlhf_reward": -9.094222164154052, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 1.955012559890747, |
| "policy/clipfrac_avg": 0.75, |
| "policy/entropy_avg": 0.6910259127616882, |
| "step": 43, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 1, |
| "val/ratio": 2.001138210296631 |
| }, |
| { |
| "episode": 720, |
| "epoch": 0.019750918966368575, |
| "loss/policy_avg": 12.43572998046875, |
| "lr": 2.8842105263157897e-05, |
| "objective/entropy": 2.1484909057617188, |
| "objective/kl": 39.105247497558594, |
| "objective/non_score_reward": -3.910524845123291, |
| "objective/rlhf_reward": -15.242099142074586, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 32.037132263183594, |
| "policy/clipfrac_avg": 0.5, |
| "policy/entropy_avg": 0.6875019669532776, |
| "step": 44, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 2, |
| "val/ratio": 2.000812530517578 |
| }, |
| { |
| "episode": 736, |
| "epoch": 0.02018982827673232, |
| "loss/policy_avg": 2.7653656005859375, |
| "lr": 2.8815789473684212e-05, |
| "objective/entropy": 151.14181518554688, |
| "objective/kl": 39.08449935913086, |
| "objective/non_score_reward": -3.9084503650665283, |
| "objective/rlhf_reward": -17.633800506591797, |
| "objective/scores": -0.5, |
| "policy/approxkl_avg": 12.080581665039062, |
| "policy/clipfrac_avg": 0.75, |
| "policy/entropy_avg": 0.6674988865852356, |
| "step": 45, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 1, |
| "val/ratio": 2.001593589782715 |
| }, |
| { |
| "episode": 752, |
| "epoch": 0.020628737587096067, |
| "loss/policy_avg": 0.26068630814552307, |
| "lr": 2.8789473684210527e-05, |
| "objective/entropy": 131.2846221923828, |
| "objective/kl": 46.933719635009766, |
| "objective/non_score_reward": -4.6933722496032715, |
| "objective/rlhf_reward": -14.37348852157593, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 1.599510669708252, |
| "policy/clipfrac_avg": 0.75, |
| "policy/entropy_avg": 0.6592761874198914, |
| "step": 46, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 2, |
| "val/ratio": 2.000056028366089 |
| }, |
| { |
| "episode": 768, |
| "epoch": 0.021067646897459814, |
| "loss/policy_avg": 0.36868762969970703, |
| "lr": 2.876315789473684e-05, |
| "objective/entropy": 168.22177124023438, |
| "objective/kl": 59.16243362426758, |
| "objective/non_score_reward": -5.916243076324463, |
| "objective/rlhf_reward": -25.66497230529785, |
| "objective/scores": -0.5, |
| "policy/approxkl_avg": 22.434894561767578, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.7536077499389648, |
| "step": 47, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 1, |
| "val/ratio": 1.9961597919464111 |
| }, |
| { |
| "episode": 784, |
| "epoch": 0.02150655620782356, |
| "loss/policy_avg": -0.34539520740509033, |
| "lr": 2.8736842105263157e-05, |
| "objective/entropy": 28.4791259765625, |
| "objective/kl": 55.672943115234375, |
| "objective/non_score_reward": -5.567294597625732, |
| "objective/rlhf_reward": -24.269176483154297, |
| "objective/scores": -0.5, |
| "policy/approxkl_avg": 92.67494201660156, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.768832802772522, |
| "step": 48, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 1, |
| "val/ratio": 2.0010273456573486 |
| }, |
| { |
| "episode": 800, |
| "epoch": 0.021945465518187303, |
| "loss/policy_avg": 2.7739362716674805, |
| "lr": 2.8710526315789475e-05, |
| "objective/entropy": 152.84979248046875, |
| "objective/kl": 74.48204803466797, |
| "objective/non_score_reward": -7.44820499420166, |
| "objective/rlhf_reward": -27.67011326767591, |
| "objective/scores": 0.5306765580733931, |
| "policy/approxkl_avg": 20.374004364013672, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.7515650987625122, |
| "step": 49, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 2, |
| "val/ratio": 2.0020508766174316 |
| }, |
| { |
| "episode": 816, |
| "epoch": 0.02238437482855105, |
| "loss/policy_avg": 0.8626595735549927, |
| "lr": 2.868421052631579e-05, |
| "objective/entropy": 91.1478271484375, |
| "objective/kl": 45.17556381225586, |
| "objective/non_score_reward": -4.517556667327881, |
| "objective/rlhf_reward": -15.146507178188536, |
| "objective/scores": 0.7309297535714575, |
| "policy/approxkl_avg": 10.44746208190918, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.6176143884658813, |
| "step": 50, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 3, |
| "val/ratio": 2.000572681427002 |
| }, |
| { |
| "episode": 832, |
| "epoch": 0.022823284138914796, |
| "loss/policy_avg": 3.834225654602051, |
| "lr": 2.8657894736842105e-05, |
| "objective/entropy": 79.80493927001953, |
| "objective/kl": 56.04777526855469, |
| "objective/non_score_reward": -5.6047773361206055, |
| "objective/rlhf_reward": -20.68577505747477, |
| "objective/scores": 0.43333333333333335, |
| "policy/approxkl_avg": 16.849597930908203, |
| "policy/clipfrac_avg": 0.75, |
| "policy/entropy_avg": 0.7000266909599304, |
| "step": 51, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 3, |
| "val/ratio": 1.9981000423431396 |
| }, |
| { |
| "episode": 848, |
| "epoch": 0.023262193449278542, |
| "loss/policy_avg": 1.604856252670288, |
| "lr": 2.863157894736842e-05, |
| "objective/entropy": 91.78755187988281, |
| "objective/kl": 62.33799362182617, |
| "objective/non_score_reward": -6.233799934387207, |
| "objective/rlhf_reward": -26.935199737548828, |
| "objective/scores": -0.5, |
| "policy/approxkl_avg": 156.7439422607422, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.6846381425857544, |
| "step": 52, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 2, |
| "val/ratio": 1.9976816177368164 |
| }, |
| { |
| "episode": 864, |
| "epoch": 0.023701102759642288, |
| "loss/policy_avg": 0.5365209579467773, |
| "lr": 2.8605263157894735e-05, |
| "objective/entropy": -6.100943565368652, |
| "objective/kl": 42.75518798828125, |
| "objective/non_score_reward": -4.275519371032715, |
| "objective/rlhf_reward": -16.70207724571228, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 314.5965270996094, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.6669777631759644, |
| "step": 53, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.99760103225708 |
| }, |
| { |
| "episode": 880, |
| "epoch": 0.024140012070006035, |
| "loss/policy_avg": -0.05100756883621216, |
| "lr": 2.8578947368421053e-05, |
| "objective/entropy": 120.99382781982422, |
| "objective/kl": 45.327186584472656, |
| "objective/non_score_reward": -4.532718658447266, |
| "objective/rlhf_reward": -13.730874633789064, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 1.7438147068023682, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.8100802898406982, |
| "step": 54, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 3, |
| "val/ratio": 2.0016448497772217 |
| }, |
| { |
| "episode": 896, |
| "epoch": 0.02457892138036978, |
| "loss/policy_avg": 0.9957195520401001, |
| "lr": 2.8552631578947368e-05, |
| "objective/entropy": 171.9380645751953, |
| "objective/kl": 78.47545623779297, |
| "objective/non_score_reward": -7.847545623779297, |
| "objective/rlhf_reward": -30.99018201828003, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 112.00227355957031, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.6961182355880737, |
| "step": 55, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 1, |
| "val/ratio": 1.9984160661697388 |
| }, |
| { |
| "episode": 912, |
| "epoch": 0.025017830690733527, |
| "loss/policy_avg": 0.5854477286338806, |
| "lr": 2.8526315789473683e-05, |
| "objective/entropy": 101.01473236083984, |
| "objective/kl": 57.770263671875, |
| "objective/non_score_reward": -5.777026176452637, |
| "objective/rlhf_reward": -25.108104705810547, |
| "objective/scores": -0.5, |
| "policy/approxkl_avg": 10.684706687927246, |
| "policy/clipfrac_avg": 0.75, |
| "policy/entropy_avg": 0.8344231843948364, |
| "step": 56, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 2, |
| "val/ratio": 1.999055027961731 |
| }, |
| { |
| "episode": 928, |
| "epoch": 0.025456740001097274, |
| "loss/policy_avg": 1.005487322807312, |
| "lr": 2.8499999999999998e-05, |
| "objective/entropy": -35.803977966308594, |
| "objective/kl": 30.102699279785156, |
| "objective/non_score_reward": -3.010270118713379, |
| "objective/rlhf_reward": -11.641080474853517, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 27.260387420654297, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.9162927865982056, |
| "step": 57, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 3, |
| "val/ratio": 1.9970905780792236 |
| }, |
| { |
| "episode": 944, |
| "epoch": 0.02589564931146102, |
| "loss/policy_avg": -0.5104620456695557, |
| "lr": 2.8473684210526317e-05, |
| "objective/entropy": 131.767822265625, |
| "objective/kl": 40.71240234375, |
| "objective/non_score_reward": -4.071239948272705, |
| "objective/rlhf_reward": -11.884960508346559, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 1.580596685409546, |
| "policy/clipfrac_avg": 1.5, |
| "policy/entropy_avg": 0.8555915355682373, |
| "step": 58, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 2, |
| "val/ratio": 2.0032856464385986 |
| }, |
| { |
| "episode": 960, |
| "epoch": 0.026334558621824766, |
| "loss/policy_avg": -0.0323227159678936, |
| "lr": 2.844736842105263e-05, |
| "objective/entropy": 95.74278259277344, |
| "objective/kl": 50.46125793457031, |
| "objective/non_score_reward": -5.046125888824463, |
| "objective/rlhf_reward": -19.784503555297853, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 346.01953125, |
| "policy/clipfrac_avg": 1.5, |
| "policy/entropy_avg": 0.7172888517379761, |
| "step": 59, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 2, |
| "val/ratio": 2.000260591506958 |
| }, |
| { |
| "episode": 976, |
| "epoch": 0.026773467932188513, |
| "loss/policy_avg": 0.30933094024658203, |
| "lr": 2.8421052631578946e-05, |
| "objective/entropy": -6.390421390533447, |
| "objective/kl": 42.69683074951172, |
| "objective/non_score_reward": -4.269682884216309, |
| "objective/rlhf_reward": -12.678731775283813, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 1.9299302101135254, |
| "policy/clipfrac_avg": 0.5, |
| "policy/entropy_avg": 0.7608700394630432, |
| "step": 60, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9992871284484863 |
| }, |
| { |
| "episode": 992, |
| "epoch": 0.02721237724255226, |
| "loss/policy_avg": 0.9537198543548584, |
| "lr": 2.839473684210526e-05, |
| "objective/entropy": 144.63778686523438, |
| "objective/kl": 42.113922119140625, |
| "objective/non_score_reward": -4.211391925811768, |
| "objective/rlhf_reward": -18.84556770324707, |
| "objective/scores": -0.5, |
| "policy/approxkl_avg": 14.351249694824219, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.6079916954040527, |
| "step": 61, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 1, |
| "val/ratio": 1.9990580081939697 |
| }, |
| { |
| "episode": 1008, |
| "epoch": 0.027651286552916005, |
| "loss/policy_avg": -0.13979224860668182, |
| "lr": 2.836842105263158e-05, |
| "objective/entropy": 52.73601531982422, |
| "objective/kl": 42.785003662109375, |
| "objective/non_score_reward": -4.278500556945801, |
| "objective/rlhf_reward": -12.714001274108888, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 3.0111756324768066, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.5955897569656372, |
| "step": 62, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.0026631355285645 |
| }, |
| { |
| "episode": 1024, |
| "epoch": 0.028090195863279748, |
| "loss/policy_avg": 0.7119593620300293, |
| "lr": 2.8342105263157898e-05, |
| "objective/entropy": 37.45541763305664, |
| "objective/kl": 48.38474655151367, |
| "objective/non_score_reward": -4.838474750518799, |
| "objective/rlhf_reward": -17.62056566874186, |
| "objective/scores": 0.43333333333333335, |
| "policy/approxkl_avg": 1.2988009452819824, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.7282307147979736, |
| "step": 63, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9981870651245117 |
| }, |
| { |
| "episode": 1040, |
| "epoch": 0.028529105173643494, |
| "loss/policy_avg": 0.22343069314956665, |
| "lr": 2.8315789473684213e-05, |
| "objective/entropy": -0.5227775573730469, |
| "objective/kl": 44.62450408935547, |
| "objective/non_score_reward": -4.4624505043029785, |
| "objective/rlhf_reward": -17.449802970886232, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 8.680784225463867, |
| "policy/clipfrac_avg": 1.5, |
| "policy/entropy_avg": 0.47706353664398193, |
| "step": 64, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 3, |
| "val/ratio": 1.9991333484649658 |
| }, |
| { |
| "episode": 1056, |
| "epoch": 0.02896801448400724, |
| "loss/policy_avg": 2.4624288082122803, |
| "lr": 2.8289473684210528e-05, |
| "objective/entropy": 44.281429290771484, |
| "objective/kl": 56.82603454589844, |
| "objective/non_score_reward": -5.682603359222412, |
| "objective/rlhf_reward": -20.33041343688965, |
| "objective/scores": 0.6, |
| "policy/approxkl_avg": 190.14102172851562, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.6275349259376526, |
| "step": 65, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 3, |
| "val/ratio": 1.9954755306243896 |
| }, |
| { |
| "episode": 1072, |
| "epoch": 0.029406923794370987, |
| "loss/policy_avg": 3.7873077392578125, |
| "lr": 2.8263157894736843e-05, |
| "objective/entropy": 21.29256820678711, |
| "objective/kl": 61.06785202026367, |
| "objective/non_score_reward": -6.106784820556641, |
| "objective/rlhf_reward": -24.02714011669159, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 13.861494064331055, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.5628820657730103, |
| "step": 66, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 3, |
| "val/ratio": 1.9979462623596191 |
| }, |
| { |
| "episode": 1088, |
| "epoch": 0.029845833104734733, |
| "loss/policy_avg": 1.122948169708252, |
| "lr": 2.823684210526316e-05, |
| "objective/entropy": 199.56639099121094, |
| "objective/kl": 67.14576721191406, |
| "objective/non_score_reward": -6.714576244354248, |
| "objective/rlhf_reward": -22.458304977416994, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 260.06036376953125, |
| "policy/clipfrac_avg": 0.75, |
| "policy/entropy_avg": 0.9761279225349426, |
| "step": 67, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 2, |
| "val/ratio": 1.9947412014007568 |
| }, |
| { |
| "episode": 1104, |
| "epoch": 0.03028474241509848, |
| "loss/policy_avg": -0.40396255254745483, |
| "lr": 2.8210526315789476e-05, |
| "objective/entropy": 143.4803924560547, |
| "objective/kl": 73.37672424316406, |
| "objective/non_score_reward": -7.337671279907227, |
| "objective/rlhf_reward": -31.350685119628906, |
| "objective/scores": -0.5, |
| "policy/approxkl_avg": 65.98458862304688, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.8562291860580444, |
| "step": 68, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 2, |
| "val/ratio": 2.0034923553466797 |
| }, |
| { |
| "episode": 1120, |
| "epoch": 0.030723651725462226, |
| "loss/policy_avg": -0.6217052936553955, |
| "lr": 2.818421052631579e-05, |
| "objective/entropy": 61.76605224609375, |
| "objective/kl": 55.31629180908203, |
| "objective/non_score_reward": -5.53162956237793, |
| "objective/rlhf_reward": -19.202798043132994, |
| "objective/scores": 0.7309297535714575, |
| "policy/approxkl_avg": 102.99107360839844, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.7685708999633789, |
| "step": 69, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 2, |
| "val/ratio": 2.0019445419311523 |
| }, |
| { |
| "episode": 1136, |
| "epoch": 0.031162561035825972, |
| "loss/policy_avg": 0.9746075868606567, |
| "lr": 2.8157894736842106e-05, |
| "objective/entropy": 182.8162078857422, |
| "objective/kl": 73.46858215332031, |
| "objective/non_score_reward": -7.346858501434326, |
| "objective/rlhf_reward": -28.987434005737306, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 309.7725830078125, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.9557604193687439, |
| "step": 70, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 2, |
| "val/ratio": 1.999297857284546 |
| }, |
| { |
| "episode": 1152, |
| "epoch": 0.031601470346189715, |
| "loss/policy_avg": 0.011278927326202393, |
| "lr": 2.813157894736842e-05, |
| "objective/entropy": 177.80795288085938, |
| "objective/kl": 79.76069641113281, |
| "objective/non_score_reward": -7.976069927215576, |
| "objective/rlhf_reward": -31.504280185699464, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 8.640438079833984, |
| "policy/clipfrac_avg": 1.5, |
| "policy/entropy_avg": 0.7453687787055969, |
| "step": 71, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 1, |
| "val/ratio": 2.0047388076782227 |
| }, |
| { |
| "episode": 1168, |
| "epoch": 0.032040379656553465, |
| "loss/policy_avg": 0.2741953730583191, |
| "lr": 2.810526315789474e-05, |
| "objective/entropy": 8.855690002441406, |
| "objective/kl": 39.5793342590332, |
| "objective/non_score_reward": -3.9579336643218994, |
| "objective/rlhf_reward": -15.43173418045044, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 1.3167859315872192, |
| "policy/clipfrac_avg": 0.75, |
| "policy/entropy_avg": 0.6513254642486572, |
| "step": 72, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 3, |
| "val/ratio": 1.9987623691558838 |
| }, |
| { |
| "episode": 1184, |
| "epoch": 0.03247928896691721, |
| "loss/policy_avg": 2.404010772705078, |
| "lr": 2.8078947368421055e-05, |
| "objective/entropy": 104.23138427734375, |
| "objective/kl": 71.9098892211914, |
| "objective/non_score_reward": -7.1909894943237305, |
| "objective/rlhf_reward": -30.763957977294922, |
| "objective/scores": -0.5, |
| "policy/approxkl_avg": 6.468600273132324, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.6281869411468506, |
| "step": 73, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 2, |
| "val/ratio": 1.9987789392471313 |
| }, |
| { |
| "episode": 1200, |
| "epoch": 0.03291819827728096, |
| "loss/policy_avg": 1.7633247375488281, |
| "lr": 2.805263157894737e-05, |
| "objective/entropy": 90.97634887695312, |
| "objective/kl": 81.6705322265625, |
| "objective/non_score_reward": -8.16705322265625, |
| "objective/rlhf_reward": -29.744492922664854, |
| "objective/scores": 0.7309297535714575, |
| "policy/approxkl_avg": 318.6219482421875, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.7491735219955444, |
| "step": 74, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 2, |
| "val/ratio": 1.9995994567871094 |
| }, |
| { |
| "episode": 1216, |
| "epoch": 0.0333571075876447, |
| "loss/policy_avg": 0.7622026801109314, |
| "lr": 2.8026315789473685e-05, |
| "objective/entropy": 109.73323059082031, |
| "objective/kl": 59.60847854614258, |
| "objective/non_score_reward": -5.960847854614258, |
| "objective/rlhf_reward": -25.84339141845703, |
| "objective/scores": -0.5, |
| "policy/approxkl_avg": 54.609107971191406, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.6811318397521973, |
| "step": 75, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 2, |
| "val/ratio": 1.9944725036621094 |
| }, |
| { |
| "episode": 1232, |
| "epoch": 0.03379601689800845, |
| "loss/policy_avg": -0.2917378544807434, |
| "lr": 2.8e-05, |
| "objective/entropy": 185.174072265625, |
| "objective/kl": 68.6063232421875, |
| "objective/non_score_reward": -6.860632419586182, |
| "objective/rlhf_reward": -29.442529678344727, |
| "objective/scores": -0.5, |
| "policy/approxkl_avg": 4.8592119216918945, |
| "policy/clipfrac_avg": 1.75, |
| "policy/entropy_avg": 0.8129308223724365, |
| "step": 76, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 2, |
| "val/ratio": 1.9995498657226562 |
| }, |
| { |
| "episode": 1248, |
| "epoch": 0.03423492620837219, |
| "loss/policy_avg": -0.41292351484298706, |
| "lr": 2.7973684210526318e-05, |
| "objective/entropy": 140.15478515625, |
| "objective/kl": 64.91577911376953, |
| "objective/non_score_reward": -6.491578102111816, |
| "objective/rlhf_reward": -25.56631193161011, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 2.9902713298797607, |
| "policy/clipfrac_avg": 1.5, |
| "policy/entropy_avg": 0.5978768467903137, |
| "step": 77, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 2, |
| "val/ratio": 2.0020546913146973 |
| }, |
| { |
| "episode": 1264, |
| "epoch": 0.03467383551873594, |
| "loss/policy_avg": 0.9187192916870117, |
| "lr": 2.7947368421052633e-05, |
| "objective/entropy": 35.76869201660156, |
| "objective/kl": 52.17973327636719, |
| "objective/non_score_reward": -5.217973709106445, |
| "objective/rlhf_reward": -20.471893405914308, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 117.89864349365234, |
| "policy/clipfrac_avg": 0.75, |
| "policy/entropy_avg": 0.6329915523529053, |
| "step": 78, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9939618110656738 |
| }, |
| { |
| "episode": 1280, |
| "epoch": 0.035112744829099686, |
| "loss/policy_avg": 0.5058541297912598, |
| "lr": 2.7921052631578948e-05, |
| "objective/entropy": 125.1489028930664, |
| "objective/kl": 48.87464904785156, |
| "objective/non_score_reward": -4.88746452331543, |
| "objective/rlhf_reward": -21.54985809326172, |
| "objective/scores": -0.5, |
| "policy/approxkl_avg": 9.59840202331543, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.7014065384864807, |
| "step": 79, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 2, |
| "val/ratio": 1.9995161294937134 |
| }, |
| { |
| "episode": 1296, |
| "epoch": 0.035551654139463436, |
| "loss/policy_avg": 0.38564199209213257, |
| "lr": 2.7894736842105263e-05, |
| "objective/entropy": 20.80208969116211, |
| "objective/kl": 59.019775390625, |
| "objective/non_score_reward": -5.901978015899658, |
| "objective/rlhf_reward": -19.207911586761476, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 2.5623955726623535, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.6976651549339294, |
| "step": 80, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9989688396453857 |
| }, |
| { |
| "episode": 1312, |
| "epoch": 0.03599056344982718, |
| "loss/policy_avg": 0.5554705858230591, |
| "lr": 2.786842105263158e-05, |
| "objective/entropy": -90.24222564697266, |
| "objective/kl": 38.30194854736328, |
| "objective/non_score_reward": -3.8301947116851807, |
| "objective/rlhf_reward": -17.320777893066406, |
| "objective/scores": -0.5, |
| "policy/approxkl_avg": 201.749267578125, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.7411468029022217, |
| "step": 81, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9972386360168457 |
| }, |
| { |
| "episode": 1328, |
| "epoch": 0.03642947276019093, |
| "loss/policy_avg": 0.0963105857372284, |
| "lr": 2.7842105263157896e-05, |
| "objective/entropy": -7.672168731689453, |
| "objective/kl": 51.06816101074219, |
| "objective/non_score_reward": -5.106816291809082, |
| "objective/rlhf_reward": -16.02726492881775, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 4.051983833312988, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.7723426818847656, |
| "step": 82, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9965218305587769 |
| }, |
| { |
| "episode": 1344, |
| "epoch": 0.03686838207055467, |
| "loss/policy_avg": 0.019892290234565735, |
| "lr": 2.781578947368421e-05, |
| "objective/entropy": 64.86529541015625, |
| "objective/kl": 33.41741943359375, |
| "objective/non_score_reward": -3.341742515563965, |
| "objective/rlhf_reward": -8.966969466209413, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 1.7174110412597656, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.6913268566131592, |
| "step": 83, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 3, |
| "val/ratio": 1.9992713928222656 |
| }, |
| { |
| "episode": 1360, |
| "epoch": 0.03730729138091842, |
| "loss/policy_avg": -0.16097909212112427, |
| "lr": 2.7789473684210526e-05, |
| "objective/entropy": 100.59518432617188, |
| "objective/kl": 54.423255920410156, |
| "objective/non_score_reward": -5.442325592041016, |
| "objective/rlhf_reward": -17.36930379867554, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 0.42956307530403137, |
| "policy/clipfrac_avg": 1.5, |
| "policy/entropy_avg": 0.7463341951370239, |
| "step": 84, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 3, |
| "val/ratio": 2.0013723373413086 |
| }, |
| { |
| "episode": 1376, |
| "epoch": 0.037746200691282164, |
| "loss/policy_avg": 1.31124746799469, |
| "lr": 2.776315789473684e-05, |
| "objective/entropy": 29.62479591369629, |
| "objective/kl": 52.713645935058594, |
| "objective/non_score_reward": -5.271364688873291, |
| "objective/rlhf_reward": -20.685458755493165, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 17.273426055908203, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.9209033250808716, |
| "step": 85, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.000173568725586 |
| }, |
| { |
| "episode": 1392, |
| "epoch": 0.03818511000164591, |
| "loss/policy_avg": 0.1676180362701416, |
| "lr": 2.773684210526316e-05, |
| "objective/entropy": 9.287511825561523, |
| "objective/kl": 51.08624267578125, |
| "objective/non_score_reward": -5.108624458312988, |
| "objective/rlhf_reward": -18.91872652748459, |
| "objective/scores": 0.37894294565112985, |
| "policy/approxkl_avg": 202.3461456298828, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.7541092038154602, |
| "step": 86, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.999753475189209 |
| }, |
| { |
| "episode": 1408, |
| "epoch": 0.03862401931200966, |
| "loss/policy_avg": 0.9255491495132446, |
| "lr": 2.7710526315789474e-05, |
| "objective/entropy": 17.094314575195312, |
| "objective/kl": 48.69655990600586, |
| "objective/non_score_reward": -4.869655609130859, |
| "objective/rlhf_reward": -19.078624105453493, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 88.98983764648438, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.8517658114433289, |
| "step": 87, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9959460496902466 |
| }, |
| { |
| "episode": 1424, |
| "epoch": 0.0390629286223734, |
| "loss/policy_avg": 2.683220624923706, |
| "lr": 2.768421052631579e-05, |
| "objective/entropy": 122.18838500976562, |
| "objective/kl": 57.215179443359375, |
| "objective/non_score_reward": -5.721518516540527, |
| "objective/rlhf_reward": -22.486073350906373, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 8.430204391479492, |
| "policy/clipfrac_avg": 0.5, |
| "policy/entropy_avg": 0.6825472116470337, |
| "step": 88, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9960262775421143 |
| }, |
| { |
| "episode": 1440, |
| "epoch": 0.03950183793273715, |
| "loss/policy_avg": 1.1903173923492432, |
| "lr": 2.7657894736842104e-05, |
| "objective/entropy": 56.404361724853516, |
| "objective/kl": 67.56021118164062, |
| "objective/non_score_reward": -6.756021499633789, |
| "objective/rlhf_reward": -26.624085998535158, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 13.910991668701172, |
| "policy/clipfrac_avg": 0.75, |
| "policy/entropy_avg": 0.6265389919281006, |
| "step": 89, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9977799654006958 |
| }, |
| { |
| "episode": 1456, |
| "epoch": 0.03994074724310089, |
| "loss/policy_avg": 0.586733877658844, |
| "lr": 2.7631578947368423e-05, |
| "objective/entropy": -15.051795959472656, |
| "objective/kl": 43.27064514160156, |
| "objective/non_score_reward": -4.327064514160156, |
| "objective/rlhf_reward": -16.908258533477785, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 1.233497142791748, |
| "policy/clipfrac_avg": 0.75, |
| "policy/entropy_avg": 0.7934204936027527, |
| "step": 90, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.0031657218933105 |
| }, |
| { |
| "episode": 1472, |
| "epoch": 0.04037965655346464, |
| "loss/policy_avg": 1.8827378749847412, |
| "lr": 2.7605263157894738e-05, |
| "objective/entropy": 49.19090270996094, |
| "objective/kl": 60.559024810791016, |
| "objective/non_score_reward": -6.055902481079102, |
| "objective/rlhf_reward": -19.823610877990724, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 17.631580352783203, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 1.0130832195281982, |
| "step": 91, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9948384761810303 |
| }, |
| { |
| "episode": 1488, |
| "epoch": 0.040818565863828385, |
| "loss/policy_avg": -0.12379920482635498, |
| "lr": 2.7578947368421053e-05, |
| "objective/entropy": -266.6684265136719, |
| "objective/kl": 19.84296417236328, |
| "objective/non_score_reward": -1.9842965602874756, |
| "objective/rlhf_reward": -7.537186002731324, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 0.5475006103515625, |
| "policy/clipfrac_avg": 0.75, |
| "policy/entropy_avg": 0.7140687704086304, |
| "step": 92, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 6, |
| "val/ratio": 1.9999473094940186 |
| }, |
| { |
| "episode": 1504, |
| "epoch": 0.041257475174192135, |
| "loss/policy_avg": 2.4287641048431396, |
| "lr": 2.7552631578947368e-05, |
| "objective/entropy": 61.6163444519043, |
| "objective/kl": 75.09857940673828, |
| "objective/non_score_reward": -7.509858131408691, |
| "objective/rlhf_reward": -29.63943181037903, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 205.6100616455078, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.7793152928352356, |
| "step": 93, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 3, |
| "val/ratio": 1.9955990314483643 |
| }, |
| { |
| "episode": 1520, |
| "epoch": 0.04169638448455588, |
| "loss/policy_avg": 0.1086156815290451, |
| "lr": 2.7526315789473683e-05, |
| "objective/entropy": -23.803211212158203, |
| "objective/kl": 41.80360412597656, |
| "objective/non_score_reward": -4.180360794067383, |
| "objective/rlhf_reward": -12.321443176269533, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 1.97320556640625, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.5993040800094604, |
| "step": 94, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9978277683258057 |
| }, |
| { |
| "episode": 1536, |
| "epoch": 0.04213529379491963, |
| "loss/policy_avg": -0.9118296504020691, |
| "lr": 2.75e-05, |
| "objective/entropy": -129.44325256347656, |
| "objective/kl": 56.03433609008789, |
| "objective/non_score_reward": -5.603433609008789, |
| "objective/rlhf_reward": -20.680401102701822, |
| "objective/scores": 0.43333333333333335, |
| "policy/approxkl_avg": 161.11831665039062, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.8422608375549316, |
| "step": 95, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 5, |
| "val/ratio": 2.0000882148742676 |
| }, |
| { |
| "episode": 1552, |
| "epoch": 0.04257420310528337, |
| "loss/policy_avg": 0.2865448296070099, |
| "lr": 2.7473684210526316e-05, |
| "objective/entropy": 9.751440048217773, |
| "objective/kl": 50.09703063964844, |
| "objective/non_score_reward": -5.009703159332275, |
| "objective/rlhf_reward": -17.916106405035528, |
| "objective/scores": 0.5306765580733931, |
| "policy/approxkl_avg": 297.1142272949219, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.8490245342254639, |
| "step": 96, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9978770017623901 |
| }, |
| { |
| "episode": 1568, |
| "epoch": 0.04301311241564712, |
| "loss/policy_avg": 1.2864903211593628, |
| "lr": 2.744736842105263e-05, |
| "objective/entropy": -15.848587036132812, |
| "objective/kl": 72.71647644042969, |
| "objective/non_score_reward": -7.271647930145264, |
| "objective/rlhf_reward": -26.162872706295225, |
| "objective/scores": 0.7309297535714575, |
| "policy/approxkl_avg": 9.334358215332031, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.7372955679893494, |
| "step": 97, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.0022706985473633 |
| }, |
| { |
| "episode": 1584, |
| "epoch": 0.04345202172601086, |
| "loss/policy_avg": -0.0980822816491127, |
| "lr": 2.7421052631578946e-05, |
| "objective/entropy": -245.53875732421875, |
| "objective/kl": 20.317668914794922, |
| "objective/non_score_reward": -2.031766891479492, |
| "objective/rlhf_reward": -3.7270678043365475, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 0.7220726013183594, |
| "policy/clipfrac_avg": 0.5, |
| "policy/entropy_avg": 0.9007136821746826, |
| "step": 98, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 6, |
| "val/ratio": 1.9996488094329834 |
| }, |
| { |
| "episode": 1600, |
| "epoch": 0.043890931036374606, |
| "loss/policy_avg": -0.025203801691532135, |
| "lr": 2.739473684210526e-05, |
| "objective/entropy": -198.08126831054688, |
| "objective/kl": 38.61351776123047, |
| "objective/non_score_reward": -3.86135196685791, |
| "objective/rlhf_reward": -11.04540786743164, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 74.81521606445312, |
| "policy/clipfrac_avg": 0.75, |
| "policy/entropy_avg": 0.9700057506561279, |
| "step": 99, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 5, |
| "val/ratio": 1.9973889589309692 |
| }, |
| { |
| "episode": 1616, |
| "epoch": 0.044329840346738356, |
| "loss/policy_avg": 0.768902063369751, |
| "lr": 2.736842105263158e-05, |
| "objective/entropy": 72.47509765625, |
| "objective/kl": 53.5683479309082, |
| "objective/non_score_reward": -5.35683536529541, |
| "objective/rlhf_reward": -17.02733979225159, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 3.6070337295532227, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.8009264469146729, |
| "step": 100, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 3, |
| "val/ratio": 2.0045204162597656 |
| }, |
| { |
| "episode": 1632, |
| "epoch": 0.0447687496571021, |
| "loss/policy_avg": 0.12479447573423386, |
| "lr": 2.7342105263157894e-05, |
| "objective/entropy": -71.50438690185547, |
| "objective/kl": 46.817623138427734, |
| "objective/non_score_reward": -4.681761741638184, |
| "objective/rlhf_reward": -18.32704839706421, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 1.7676063776016235, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.6292232275009155, |
| "step": 101, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.99995756149292 |
| }, |
| { |
| "episode": 1648, |
| "epoch": 0.04520765896746585, |
| "loss/policy_avg": -0.7869347929954529, |
| "lr": 2.7315789473684213e-05, |
| "objective/entropy": -67.84761047363281, |
| "objective/kl": 38.6337890625, |
| "objective/non_score_reward": -3.8633787631988525, |
| "objective/rlhf_reward": -11.053514814376832, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 65.58836364746094, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.6918781399726868, |
| "step": 102, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.0002877712249756 |
| }, |
| { |
| "episode": 1664, |
| "epoch": 0.04564656827782959, |
| "loss/policy_avg": 1.888732671737671, |
| "lr": 2.7289473684210528e-05, |
| "objective/entropy": 45.654727935791016, |
| "objective/kl": 65.4364013671875, |
| "objective/non_score_reward": -6.54364013671875, |
| "objective/rlhf_reward": -25.77456102371216, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 158.3827667236328, |
| "policy/clipfrac_avg": 0.75, |
| "policy/entropy_avg": 0.7598006725311279, |
| "step": 103, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 3, |
| "val/ratio": 1.995233416557312 |
| }, |
| { |
| "episode": 1680, |
| "epoch": 0.04608547758819334, |
| "loss/policy_avg": 0.5027464032173157, |
| "lr": 2.7263157894736846e-05, |
| "objective/entropy": -67.26053619384766, |
| "objective/kl": 46.42061996459961, |
| "objective/non_score_reward": -4.642061710357666, |
| "objective/rlhf_reward": -14.168247556686403, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 409.32861328125, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.7045955657958984, |
| "step": 104, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9950696229934692 |
| }, |
| { |
| "episode": 1696, |
| "epoch": 0.046524386898557084, |
| "loss/policy_avg": 1.1565163135528564, |
| "lr": 2.723684210526316e-05, |
| "objective/entropy": 21.27078628540039, |
| "objective/kl": 54.91717529296875, |
| "objective/non_score_reward": -5.491717338562012, |
| "objective/rlhf_reward": -17.566870307922365, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 169.4134521484375, |
| "policy/clipfrac_avg": 0.75, |
| "policy/entropy_avg": 0.817981481552124, |
| "step": 105, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.997206449508667 |
| }, |
| { |
| "episode": 1712, |
| "epoch": 0.046963296208920834, |
| "loss/policy_avg": 1.359951138496399, |
| "lr": 2.7210526315789476e-05, |
| "objective/entropy": 73.11656951904297, |
| "objective/kl": 42.99860382080078, |
| "objective/non_score_reward": -4.29986047744751, |
| "objective/rlhf_reward": -16.79944190979004, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 160.0785369873047, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.7364996671676636, |
| "step": 106, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 3, |
| "val/ratio": 1.999701976776123 |
| }, |
| { |
| "episode": 1728, |
| "epoch": 0.047402205519284576, |
| "loss/policy_avg": 0.6170438528060913, |
| "lr": 2.718421052631579e-05, |
| "objective/entropy": 18.827709197998047, |
| "objective/kl": 59.75545120239258, |
| "objective/non_score_reward": -5.975545406341553, |
| "objective/rlhf_reward": -25.90218162536621, |
| "objective/scores": -0.5, |
| "policy/approxkl_avg": 12.630553245544434, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.8481165170669556, |
| "step": 107, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.0005557537078857 |
| }, |
| { |
| "episode": 1744, |
| "epoch": 0.047841114829648326, |
| "loss/policy_avg": 0.6389247179031372, |
| "lr": 2.7157894736842106e-05, |
| "objective/entropy": -15.529239654541016, |
| "objective/kl": 29.512508392333984, |
| "objective/non_score_reward": -2.9512510299682617, |
| "objective/rlhf_reward": -7.405003643035888, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 1.9877429008483887, |
| "policy/clipfrac_avg": 0.5, |
| "policy/entropy_avg": 0.9178982973098755, |
| "step": 108, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 5, |
| "val/ratio": 2.0009372234344482 |
| }, |
| { |
| "episode": 1760, |
| "epoch": 0.04828002414001207, |
| "loss/policy_avg": 0.5083436965942383, |
| "lr": 2.7131578947368424e-05, |
| "objective/entropy": -31.206829071044922, |
| "objective/kl": 58.42845916748047, |
| "objective/non_score_reward": -5.842845916748047, |
| "objective/rlhf_reward": -18.971384143829347, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 246.38031005859375, |
| "policy/clipfrac_avg": 0.75, |
| "policy/entropy_avg": 0.8161805868148804, |
| "step": 109, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9968533515930176 |
| }, |
| { |
| "episode": 1776, |
| "epoch": 0.04871893345037582, |
| "loss/policy_avg": -0.40812578797340393, |
| "lr": 2.710526315789474e-05, |
| "objective/entropy": 31.53291130065918, |
| "objective/kl": 29.56689453125, |
| "objective/non_score_reward": -2.9566893577575684, |
| "objective/rlhf_reward": -13.826757431030273, |
| "objective/scores": -0.5, |
| "policy/approxkl_avg": 10.323970794677734, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.7529888153076172, |
| "step": 110, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 3, |
| "val/ratio": 2.0055394172668457 |
| }, |
| { |
| "episode": 1792, |
| "epoch": 0.04915784276073956, |
| "loss/policy_avg": 1.3316415548324585, |
| "lr": 2.7078947368421054e-05, |
| "objective/entropy": 12.768815994262695, |
| "objective/kl": 54.86576461791992, |
| "objective/non_score_reward": -5.486576557159424, |
| "objective/rlhf_reward": -17.546306228637697, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 3.3613033294677734, |
| "policy/clipfrac_avg": 0.5, |
| "policy/entropy_avg": 0.7278101444244385, |
| "step": 111, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9984724521636963 |
| }, |
| { |
| "episode": 1808, |
| "epoch": 0.04959675207110331, |
| "loss/policy_avg": -0.11871880292892456, |
| "lr": 2.705263157894737e-05, |
| "objective/entropy": -7.827598571777344, |
| "objective/kl": 39.736026763916016, |
| "objective/non_score_reward": -3.973602771759033, |
| "objective/rlhf_reward": -11.494411563873292, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 5.851396560668945, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.7495956420898438, |
| "step": 112, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9964585304260254 |
| }, |
| { |
| "episode": 1824, |
| "epoch": 0.050035661381467054, |
| "loss/policy_avg": 1.761976957321167, |
| "lr": 2.7026315789473684e-05, |
| "objective/entropy": 128.9440460205078, |
| "objective/kl": 119.60091400146484, |
| "objective/non_score_reward": -11.960092544555664, |
| "objective/rlhf_reward": -47.440368270874025, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 141.6197967529297, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.8293796181678772, |
| "step": 113, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 2, |
| "val/ratio": 1.9977341890335083 |
| }, |
| { |
| "episode": 1840, |
| "epoch": 0.0504745706918308, |
| "loss/policy_avg": -0.7025829553604126, |
| "lr": 2.7000000000000002e-05, |
| "objective/entropy": 41.96453857421875, |
| "objective/kl": 52.472015380859375, |
| "objective/non_score_reward": -5.247201919555664, |
| "objective/rlhf_reward": -18.06508771026251, |
| "objective/scores": 0.7309297535714575, |
| "policy/approxkl_avg": 4.035026550292969, |
| "policy/clipfrac_avg": 1.5, |
| "policy/entropy_avg": 0.7776132822036743, |
| "step": 114, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.0039517879486084 |
| }, |
| { |
| "episode": 1856, |
| "epoch": 0.05091348000219455, |
| "loss/policy_avg": 0.7849704623222351, |
| "lr": 2.6973684210526317e-05, |
| "objective/entropy": -171.7941131591797, |
| "objective/kl": 46.76850128173828, |
| "objective/non_score_reward": -4.676850318908691, |
| "objective/rlhf_reward": -16.30740032196045, |
| "objective/scores": 0.6, |
| "policy/approxkl_avg": 233.18243408203125, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.7888088822364807, |
| "step": 115, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9987388849258423 |
| }, |
| { |
| "episode": 1872, |
| "epoch": 0.05135238931255829, |
| "loss/policy_avg": -0.09497790038585663, |
| "lr": 2.6947368421052632e-05, |
| "objective/entropy": -106.61396789550781, |
| "objective/kl": 29.52130699157715, |
| "objective/non_score_reward": -2.9521307945251465, |
| "objective/rlhf_reward": -7.4085229396820065, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 0.43795135617256165, |
| "policy/clipfrac_avg": 1.5, |
| "policy/entropy_avg": 0.8204243183135986, |
| "step": 116, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 5, |
| "val/ratio": 2.000943183898926 |
| }, |
| { |
| "episode": 1888, |
| "epoch": 0.05179129862292204, |
| "loss/policy_avg": 0.32255253195762634, |
| "lr": 2.6921052631578947e-05, |
| "objective/entropy": -91.3974838256836, |
| "objective/kl": 43.5883903503418, |
| "objective/non_score_reward": -4.35883903503418, |
| "objective/rlhf_reward": -13.035355901718141, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 5.264688968658447, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.7290156483650208, |
| "step": 117, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.0012741088867188 |
| }, |
| { |
| "episode": 1904, |
| "epoch": 0.05223020793328578, |
| "loss/policy_avg": -0.016802702099084854, |
| "lr": 2.6894736842105266e-05, |
| "objective/entropy": -53.469398498535156, |
| "objective/kl": 59.04955291748047, |
| "objective/non_score_reward": -5.9049553871154785, |
| "objective/rlhf_reward": -19.219821548461915, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 1.983826756477356, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.742132306098938, |
| "step": 118, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.000786542892456 |
| }, |
| { |
| "episode": 1920, |
| "epoch": 0.05266911724364953, |
| "loss/policy_avg": 1.3971296548843384, |
| "lr": 2.686842105263158e-05, |
| "objective/entropy": -29.92015266418457, |
| "objective/kl": 50.994483947753906, |
| "objective/non_score_reward": -5.099448204040527, |
| "objective/rlhf_reward": -15.99779305458069, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 112.21920776367188, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.884390652179718, |
| "step": 119, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9959502220153809 |
| }, |
| { |
| "episode": 1936, |
| "epoch": 0.053108026554013275, |
| "loss/policy_avg": 0.22061039507389069, |
| "lr": 2.6842105263157896e-05, |
| "objective/entropy": -107.27847290039062, |
| "objective/kl": 49.88347244262695, |
| "objective/non_score_reward": -4.988347053527832, |
| "objective/rlhf_reward": -15.553388690948488, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 1.4191887378692627, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.7550037503242493, |
| "step": 120, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9977598190307617 |
| }, |
| { |
| "episode": 1952, |
| "epoch": 0.053546935864377025, |
| "loss/policy_avg": 0.3377835750579834, |
| "lr": 2.681578947368421e-05, |
| "objective/entropy": -58.49903869628906, |
| "objective/kl": 37.71872329711914, |
| "objective/non_score_reward": -3.7718722820281982, |
| "objective/rlhf_reward": -10.687489128112793, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 1.646287441253662, |
| "policy/clipfrac_avg": 0.25, |
| "policy/entropy_avg": 0.6332191228866577, |
| "step": 121, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 5, |
| "val/ratio": 1.9996297359466553 |
| }, |
| { |
| "episode": 1968, |
| "epoch": 0.05398584517474077, |
| "loss/policy_avg": -0.24410778284072876, |
| "lr": 2.6789473684210526e-05, |
| "objective/entropy": -20.791318893432617, |
| "objective/kl": 42.270294189453125, |
| "objective/non_score_reward": -4.227029800415039, |
| "objective/rlhf_reward": -15.08328973797233, |
| "objective/scores": 0.4562071871080222, |
| "policy/approxkl_avg": 2.9298739433288574, |
| "policy/clipfrac_avg": 1.5, |
| "policy/entropy_avg": 0.8968937397003174, |
| "step": 122, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.0011134147644043 |
| }, |
| { |
| "episode": 1984, |
| "epoch": 0.05442475448510452, |
| "loss/policy_avg": -0.020455077290534973, |
| "lr": 2.6763157894736844e-05, |
| "objective/entropy": 53.92317199707031, |
| "objective/kl": 44.21133804321289, |
| "objective/non_score_reward": -4.421133518218994, |
| "objective/rlhf_reward": -14.760815297008726, |
| "objective/scores": 0.7309297535714575, |
| "policy/approxkl_avg": 146.8616943359375, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.8489948511123657, |
| "step": 123, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9991151094436646 |
| }, |
| { |
| "episode": 2000, |
| "epoch": 0.05486366379546826, |
| "loss/policy_avg": -0.6532711982727051, |
| "lr": 2.673684210526316e-05, |
| "objective/entropy": 95.89879608154297, |
| "objective/kl": 70.10568237304688, |
| "objective/non_score_reward": -7.0105671882629395, |
| "objective/rlhf_reward": -25.118550692440245, |
| "objective/scores": 0.7309297535714575, |
| "policy/approxkl_avg": 1.0512518882751465, |
| "policy/clipfrac_avg": 1.5, |
| "policy/entropy_avg": 0.7362265586853027, |
| "step": 124, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 3, |
| "val/ratio": 2.0022377967834473 |
| }, |
| { |
| "episode": 2016, |
| "epoch": 0.05530257310583201, |
| "loss/policy_avg": -0.532859206199646, |
| "lr": 2.6710526315789474e-05, |
| "objective/entropy": -134.69949340820312, |
| "objective/kl": 26.79535675048828, |
| "objective/non_score_reward": -2.6795358657836914, |
| "objective/rlhf_reward": -6.318143224716187, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 131.03668212890625, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.7895087599754333, |
| "step": 125, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 5, |
| "val/ratio": 2.000466823577881 |
| }, |
| { |
| "episode": 2032, |
| "epoch": 0.05574148241619575, |
| "loss/policy_avg": -0.6944832801818848, |
| "lr": 2.668421052631579e-05, |
| "objective/entropy": -70.2201919555664, |
| "objective/kl": 39.88362121582031, |
| "objective/non_score_reward": -3.9883623123168945, |
| "objective/rlhf_reward": -17.953449249267578, |
| "objective/scores": -0.5, |
| "policy/approxkl_avg": 142.29962158203125, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.7750124335289001, |
| "step": 126, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 5, |
| "val/ratio": 1.9993183612823486 |
| }, |
| { |
| "episode": 2048, |
| "epoch": 0.056180391726559496, |
| "loss/policy_avg": 0.4301793575286865, |
| "lr": 2.6657894736842107e-05, |
| "objective/entropy": 41.7333984375, |
| "objective/kl": 50.46965789794922, |
| "objective/non_score_reward": -5.046966075897217, |
| "objective/rlhf_reward": -19.787864542007448, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 8.3101167678833, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.7716444730758667, |
| "step": 127, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.000278949737549 |
| }, |
| { |
| "episode": 2064, |
| "epoch": 0.056619301036923246, |
| "loss/policy_avg": 0.20978103578090668, |
| "lr": 2.6631578947368422e-05, |
| "objective/entropy": -39.1937370300293, |
| "objective/kl": 60.34259033203125, |
| "objective/non_score_reward": -6.034258842468262, |
| "objective/rlhf_reward": -23.737036323547365, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 6.2546772956848145, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.6956591010093689, |
| "step": 128, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 5, |
| "val/ratio": 2.0086145401000977 |
| }, |
| { |
| "episode": 2080, |
| "epoch": 0.05705821034728699, |
| "loss/policy_avg": 1.5271130800247192, |
| "lr": 2.6605263157894737e-05, |
| "objective/entropy": -15.993308067321777, |
| "objective/kl": 42.31775665283203, |
| "objective/non_score_reward": -4.231775760650635, |
| "objective/rlhf_reward": -12.527102565765382, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 0.8814833164215088, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.839857280254364, |
| "step": 129, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.0023293495178223 |
| }, |
| { |
| "episode": 2096, |
| "epoch": 0.05749711965765074, |
| "loss/policy_avg": 1.5832982063293457, |
| "lr": 2.6578947368421052e-05, |
| "objective/entropy": 23.444358825683594, |
| "objective/kl": 67.05261993408203, |
| "objective/non_score_reward": -6.705262184143066, |
| "objective/rlhf_reward": -26.421048736572267, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 343.2151184082031, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.6558287143707275, |
| "step": 130, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 5, |
| "val/ratio": 2.003596067428589 |
| }, |
| { |
| "episode": 2112, |
| "epoch": 0.05793602896801448, |
| "loss/policy_avg": 4.708208084106445, |
| "lr": 2.6552631578947367e-05, |
| "objective/entropy": -47.06990432739258, |
| "objective/kl": 54.011878967285156, |
| "objective/non_score_reward": -5.401188373565674, |
| "objective/rlhf_reward": -21.204753494262697, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 10.141319274902344, |
| "policy/clipfrac_avg": 0.5, |
| "policy/entropy_avg": 0.7859508991241455, |
| "step": 131, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.996903896331787 |
| }, |
| { |
| "episode": 2128, |
| "epoch": 0.05837493827837823, |
| "loss/policy_avg": 0.5339508056640625, |
| "lr": 2.6526315789473685e-05, |
| "objective/entropy": 1.606778621673584, |
| "objective/kl": 49.70311737060547, |
| "objective/non_score_reward": -4.970311641693115, |
| "objective/rlhf_reward": -15.481246566772462, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 1.387594223022461, |
| "policy/clipfrac_avg": 0.5, |
| "policy/entropy_avg": 0.8247156143188477, |
| "step": 132, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9987622499465942 |
| }, |
| { |
| "episode": 2144, |
| "epoch": 0.058813847588741974, |
| "loss/policy_avg": 0.5222880840301514, |
| "lr": 2.65e-05, |
| "objective/entropy": -71.43392944335938, |
| "objective/kl": 67.5316162109375, |
| "objective/non_score_reward": -6.753161907196045, |
| "objective/rlhf_reward": -26.612646675109865, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 3.4301528930664062, |
| "policy/clipfrac_avg": 0.75, |
| "policy/entropy_avg": 0.6953130960464478, |
| "step": 133, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9979286193847656 |
| }, |
| { |
| "episode": 2160, |
| "epoch": 0.059252756899105724, |
| "loss/policy_avg": 0.3055562674999237, |
| "lr": 2.6473684210526315e-05, |
| "objective/entropy": -49.49654769897461, |
| "objective/kl": 45.72821044921875, |
| "objective/non_score_reward": -4.572821140289307, |
| "objective/rlhf_reward": -17.891284561157228, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 3.059330463409424, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.5824675559997559, |
| "step": 134, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.0010061264038086 |
| }, |
| { |
| "episode": 2176, |
| "epoch": 0.05969166620946947, |
| "loss/policy_avg": 0.23712539672851562, |
| "lr": 2.644736842105263e-05, |
| "objective/entropy": -80.8898696899414, |
| "objective/kl": 64.44762420654297, |
| "objective/non_score_reward": -6.44476318359375, |
| "objective/rlhf_reward": -25.379051303863527, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 200.35958862304688, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.8632474541664124, |
| "step": 135, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9975115060806274 |
| }, |
| { |
| "episode": 2192, |
| "epoch": 0.06013057551983322, |
| "loss/policy_avg": -0.052902236580848694, |
| "lr": 2.6421052631578945e-05, |
| "objective/entropy": -6.771919250488281, |
| "objective/kl": 57.73787307739258, |
| "objective/non_score_reward": -5.773787498474121, |
| "objective/rlhf_reward": -18.695149517059328, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 10.29161262512207, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.7512908577919006, |
| "step": 136, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9991791248321533 |
| }, |
| { |
| "episode": 2208, |
| "epoch": 0.06056948483019696, |
| "loss/policy_avg": -0.9740344285964966, |
| "lr": 2.6394736842105264e-05, |
| "objective/entropy": -20.03300666809082, |
| "objective/kl": 39.889991760253906, |
| "objective/non_score_reward": -3.988999366760254, |
| "objective/rlhf_reward": -14.008585999684271, |
| "objective/scores": 0.4868528072345416, |
| "policy/approxkl_avg": 1.1283724308013916, |
| "policy/clipfrac_avg": 1.5, |
| "policy/entropy_avg": 0.7417764663696289, |
| "step": 137, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.0027053356170654 |
| }, |
| { |
| "episode": 2224, |
| "epoch": 0.06100839414056071, |
| "loss/policy_avg": 0.14382201433181763, |
| "lr": 2.636842105263158e-05, |
| "objective/entropy": -43.97955322265625, |
| "objective/kl": 37.197532653808594, |
| "objective/non_score_reward": -3.7197535037994385, |
| "objective/rlhf_reward": -10.479013776779176, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 5.3562726974487305, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.7394824028015137, |
| "step": 138, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.999990463256836 |
| }, |
| { |
| "episode": 2240, |
| "epoch": 0.06144730345092445, |
| "loss/policy_avg": 1.9168037176132202, |
| "lr": 2.6342105263157894e-05, |
| "objective/entropy": 7.118698596954346, |
| "objective/kl": 42.77621078491211, |
| "objective/non_score_reward": -4.277621269226074, |
| "objective/rlhf_reward": -12.71048483848572, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 7.454709053039551, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.7237066030502319, |
| "step": 139, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9988505840301514 |
| }, |
| { |
| "episode": 2256, |
| "epoch": 0.0618862127612882, |
| "loss/policy_avg": 1.2066419124603271, |
| "lr": 2.631578947368421e-05, |
| "objective/entropy": 4.71856689453125, |
| "objective/kl": 43.93148422241211, |
| "objective/non_score_reward": -4.393148422241211, |
| "objective/rlhf_reward": -13.172594642639162, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 6.769146919250488, |
| "policy/clipfrac_avg": 0.75, |
| "policy/entropy_avg": 0.7349320650100708, |
| "step": 140, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 3, |
| "val/ratio": 1.9952998161315918 |
| }, |
| { |
| "episode": 2272, |
| "epoch": 0.062325122071651945, |
| "loss/policy_avg": -0.01632899045944214, |
| "lr": 2.6289473684210527e-05, |
| "objective/entropy": 23.46413230895996, |
| "objective/kl": 56.494224548339844, |
| "objective/non_score_reward": -5.649422645568848, |
| "objective/rlhf_reward": -18.19769105911255, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 1.6812927722930908, |
| "policy/clipfrac_avg": 1.5, |
| "policy/entropy_avg": 0.6015833616256714, |
| "step": 141, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 3, |
| "val/ratio": 2.0010690689086914 |
| }, |
| { |
| "episode": 2288, |
| "epoch": 0.0627640313820157, |
| "loss/policy_avg": -0.19681307673454285, |
| "lr": 2.6263157894736842e-05, |
| "objective/entropy": -24.22689437866211, |
| "objective/kl": 40.63903045654297, |
| "objective/non_score_reward": -4.063903331756592, |
| "objective/rlhf_reward": -11.85561261177063, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 5.7771735191345215, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.675006628036499, |
| "step": 142, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.0051536560058594 |
| }, |
| { |
| "episode": 2304, |
| "epoch": 0.06320294069237943, |
| "loss/policy_avg": -0.12234362959861755, |
| "lr": 2.623684210526316e-05, |
| "objective/entropy": -47.442970275878906, |
| "objective/kl": 35.8116455078125, |
| "objective/non_score_reward": -3.581164836883545, |
| "objective/rlhf_reward": -9.9246591091156, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 1.1870553493499756, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.8034932017326355, |
| "step": 143, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9990580081939697 |
| }, |
| { |
| "episode": 2320, |
| "epoch": 0.06364185000274318, |
| "loss/policy_avg": 2.628115177154541, |
| "lr": 2.6210526315789475e-05, |
| "objective/entropy": -15.868587493896484, |
| "objective/kl": 60.868995666503906, |
| "objective/non_score_reward": -6.086899280548096, |
| "objective/rlhf_reward": -19.947597599029542, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 13.940417289733887, |
| "policy/clipfrac_avg": 0.75, |
| "policy/entropy_avg": 0.5825701951980591, |
| "step": 144, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9940749406814575 |
| }, |
| { |
| "episode": 2336, |
| "epoch": 0.06408075931310693, |
| "loss/policy_avg": 1.0517263412475586, |
| "lr": 2.618421052631579e-05, |
| "objective/entropy": -58.0733528137207, |
| "objective/kl": 43.175994873046875, |
| "objective/non_score_reward": -4.317599296569824, |
| "objective/rlhf_reward": -12.870397424697877, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 285.7511901855469, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.6195497512817383, |
| "step": 145, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9964275360107422 |
| }, |
| { |
| "episode": 2352, |
| "epoch": 0.06451966862347068, |
| "loss/policy_avg": 0.2179277539253235, |
| "lr": 2.615789473684211e-05, |
| "objective/entropy": 30.72907829284668, |
| "objective/kl": 58.64939880371094, |
| "objective/non_score_reward": -5.864940643310547, |
| "objective/rlhf_reward": -19.059761619567873, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 10.522979736328125, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.739693820476532, |
| "step": 146, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9976598024368286 |
| }, |
| { |
| "episode": 2368, |
| "epoch": 0.06495857793383442, |
| "loss/policy_avg": 0.25930777192115784, |
| "lr": 2.6131578947368424e-05, |
| "objective/entropy": -27.965259552001953, |
| "objective/kl": 49.60761642456055, |
| "objective/non_score_reward": -4.960761070251465, |
| "objective/rlhf_reward": -16.919326697231504, |
| "objective/scores": 0.7309297535714575, |
| "policy/approxkl_avg": 1.7648733854293823, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.5860817432403564, |
| "step": 147, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9973572492599487 |
| }, |
| { |
| "episode": 2384, |
| "epoch": 0.06539748724419817, |
| "loss/policy_avg": 1.8243403434753418, |
| "lr": 2.610526315789474e-05, |
| "objective/entropy": -18.670982360839844, |
| "objective/kl": 60.018455505371094, |
| "objective/non_score_reward": -6.001845836639404, |
| "objective/rlhf_reward": -23.60738286972046, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 1.463629126548767, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.6600210070610046, |
| "step": 148, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 6, |
| "val/ratio": 2.0028176307678223 |
| }, |
| { |
| "episode": 2400, |
| "epoch": 0.06583639655456192, |
| "loss/policy_avg": 0.2627159059047699, |
| "lr": 2.6078947368421053e-05, |
| "objective/entropy": -24.919069290161133, |
| "objective/kl": 38.713436126708984, |
| "objective/non_score_reward": -3.8713436126708984, |
| "objective/rlhf_reward": -11.085374450683593, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 1.5450901985168457, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.7100886702537537, |
| "step": 149, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.99765944480896 |
| }, |
| { |
| "episode": 2416, |
| "epoch": 0.06627530586492567, |
| "loss/policy_avg": -0.16113287210464478, |
| "lr": 2.605263157894737e-05, |
| "objective/entropy": -233.61676025390625, |
| "objective/kl": 25.49152183532715, |
| "objective/non_score_reward": -2.549152374267578, |
| "objective/rlhf_reward": -12.196609497070312, |
| "objective/scores": -0.5, |
| "policy/approxkl_avg": 1.3775527477264404, |
| "policy/clipfrac_avg": 0.75, |
| "policy/entropy_avg": 0.572956919670105, |
| "step": 150, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.0008134841918945 |
| }, |
| { |
| "episode": 2432, |
| "epoch": 0.0667142151752894, |
| "loss/policy_avg": 1.506213665008545, |
| "lr": 2.6026315789473687e-05, |
| "objective/entropy": 17.05105972290039, |
| "objective/kl": 88.19961547851562, |
| "objective/non_score_reward": -8.819961547851562, |
| "objective/rlhf_reward": -34.87984714508057, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 127.97711181640625, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.8008217811584473, |
| "step": 151, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9965453147888184 |
| }, |
| { |
| "episode": 2448, |
| "epoch": 0.06715312448565315, |
| "loss/policy_avg": 0.3404674828052521, |
| "lr": 2.6000000000000002e-05, |
| "objective/entropy": -31.00487518310547, |
| "objective/kl": 70.51848602294922, |
| "objective/non_score_reward": -7.051849365234375, |
| "objective/rlhf_reward": -23.807396507263185, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 9.886850357055664, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.7234030961990356, |
| "step": 152, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9982991218566895 |
| }, |
| { |
| "episode": 2464, |
| "epoch": 0.0675920337960169, |
| "loss/policy_avg": 1.5734370946884155, |
| "lr": 2.5973684210526317e-05, |
| "objective/entropy": -27.52907943725586, |
| "objective/kl": 56.56928634643555, |
| "objective/non_score_reward": -5.656929016113281, |
| "objective/rlhf_reward": -18.227714633941652, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 5.53957462310791, |
| "policy/clipfrac_avg": 0.75, |
| "policy/entropy_avg": 0.5384600162506104, |
| "step": 153, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9993629455566406 |
| }, |
| { |
| "episode": 2480, |
| "epoch": 0.06803094310638065, |
| "loss/policy_avg": 0.3518854081630707, |
| "lr": 2.5947368421052632e-05, |
| "objective/entropy": -261.23638916015625, |
| "objective/kl": 18.554153442382812, |
| "objective/non_score_reward": -1.8554154634475708, |
| "objective/rlhf_reward": -3.021661853790283, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 0.608569860458374, |
| "policy/clipfrac_avg": 0.25, |
| "policy/entropy_avg": 0.6520816087722778, |
| "step": 154, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 5, |
| "val/ratio": 1.999368667602539 |
| }, |
| { |
| "episode": 2496, |
| "epoch": 0.06846985241674439, |
| "loss/policy_avg": -0.8293436169624329, |
| "lr": 2.592105263157895e-05, |
| "objective/entropy": -169.88430786132812, |
| "objective/kl": 61.74763488769531, |
| "objective/non_score_reward": -6.1747636795043945, |
| "objective/rlhf_reward": -24.29905471801758, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 108.35233306884766, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.6774731278419495, |
| "step": 155, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 5, |
| "val/ratio": 1.9995229244232178 |
| }, |
| { |
| "episode": 2512, |
| "epoch": 0.06890876172710814, |
| "loss/policy_avg": 0.14700853824615479, |
| "lr": 2.5894736842105265e-05, |
| "objective/entropy": -85.36224365234375, |
| "objective/kl": 55.885929107666016, |
| "objective/non_score_reward": -5.588593482971191, |
| "objective/rlhf_reward": -17.95437297821045, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 4.13096284866333, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.6663363575935364, |
| "step": 156, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.0007362365722656 |
| }, |
| { |
| "episode": 2528, |
| "epoch": 0.06934767103747189, |
| "loss/policy_avg": -0.4546607434749603, |
| "lr": 2.586842105263158e-05, |
| "objective/entropy": -116.53895568847656, |
| "objective/kl": 44.87690734863281, |
| "objective/non_score_reward": -4.4876909255981445, |
| "objective/rlhf_reward": -13.550763225555421, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 135.5211944580078, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.7589835524559021, |
| "step": 157, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.99733304977417 |
| }, |
| { |
| "episode": 2544, |
| "epoch": 0.06978658034783562, |
| "loss/policy_avg": 6.644524574279785, |
| "lr": 2.5842105263157895e-05, |
| "objective/entropy": 224.932373046875, |
| "objective/kl": 77.01321411132812, |
| "objective/non_score_reward": -7.701322078704834, |
| "objective/rlhf_reward": -30.405288791656496, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 16.4545955657959, |
| "policy/clipfrac_avg": 0.5, |
| "policy/entropy_avg": 0.9313648343086243, |
| "step": 158, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 2, |
| "val/ratio": 1.9967166185379028 |
| }, |
| { |
| "episode": 2560, |
| "epoch": 0.07022548965819937, |
| "loss/policy_avg": -0.7700405120849609, |
| "lr": 2.581578947368421e-05, |
| "objective/entropy": -299.10150146484375, |
| "objective/kl": 37.77906036376953, |
| "objective/non_score_reward": -3.7779064178466797, |
| "objective/rlhf_reward": -14.711626148223878, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 0.6640864610671997, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.7569674849510193, |
| "step": 159, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 8, |
| "val/ratio": 1.9998410940170288 |
| }, |
| { |
| "episode": 2576, |
| "epoch": 0.07066439896856312, |
| "loss/policy_avg": -1.5657492876052856, |
| "lr": 2.578947368421053e-05, |
| "objective/entropy": -37.76366424560547, |
| "objective/kl": 66.01406860351562, |
| "objective/non_score_reward": -6.601407051086426, |
| "objective/rlhf_reward": -22.005628204345705, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 97.5102310180664, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.8600292205810547, |
| "step": 160, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.004817485809326 |
| }, |
| { |
| "episode": 2592, |
| "epoch": 0.07110330827892687, |
| "loss/policy_avg": 0.8615214228630066, |
| "lr": 2.5763157894736843e-05, |
| "objective/entropy": -48.53110885620117, |
| "objective/kl": 50.90060806274414, |
| "objective/non_score_reward": -5.090060234069824, |
| "objective/rlhf_reward": -19.960242366790773, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 271.98052978515625, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.7262279987335205, |
| "step": 161, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 5, |
| "val/ratio": 1.9973949193954468 |
| }, |
| { |
| "episode": 2608, |
| "epoch": 0.07154221758929061, |
| "loss/policy_avg": 0.14006884396076202, |
| "lr": 2.5736842105263158e-05, |
| "objective/entropy": -87.22216796875, |
| "objective/kl": 48.421382904052734, |
| "objective/non_score_reward": -4.842138290405273, |
| "objective/rlhf_reward": -14.968552684783937, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 1.7606146335601807, |
| "policy/clipfrac_avg": 0.75, |
| "policy/entropy_avg": 0.6789655685424805, |
| "step": 162, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9983569383621216 |
| }, |
| { |
| "episode": 2624, |
| "epoch": 0.07198112689965436, |
| "loss/policy_avg": 0.11756910383701324, |
| "lr": 2.5710526315789473e-05, |
| "objective/entropy": -90.09169006347656, |
| "objective/kl": 48.95092010498047, |
| "objective/non_score_reward": -4.895092010498047, |
| "objective/rlhf_reward": -15.180367088317873, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 5.360994815826416, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.673121988773346, |
| "step": 163, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 5, |
| "val/ratio": 1.9987850189208984 |
| }, |
| { |
| "episode": 2640, |
| "epoch": 0.07242003621001811, |
| "loss/policy_avg": 0.6505356431007385, |
| "lr": 2.568421052631579e-05, |
| "objective/entropy": -105.59286499023438, |
| "objective/kl": 57.51460647583008, |
| "objective/non_score_reward": -5.751461029052734, |
| "objective/rlhf_reward": -18.605842685699464, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 0.9482333064079285, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.6159911751747131, |
| "step": 164, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.00016188621521 |
| }, |
| { |
| "episode": 2656, |
| "epoch": 0.07285894552038186, |
| "loss/policy_avg": 0.4072788655757904, |
| "lr": 2.5657894736842107e-05, |
| "objective/entropy": 30.31393814086914, |
| "objective/kl": 62.92792510986328, |
| "objective/non_score_reward": -6.292792320251465, |
| "objective/rlhf_reward": -20.77116928100586, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 41.36127471923828, |
| "policy/clipfrac_avg": 0.75, |
| "policy/entropy_avg": 0.7414021492004395, |
| "step": 165, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 3, |
| "val/ratio": 1.994492769241333 |
| }, |
| { |
| "episode": 2672, |
| "epoch": 0.07329785483074559, |
| "loss/policy_avg": 0.6451830863952637, |
| "lr": 2.563157894736842e-05, |
| "objective/entropy": -93.72520446777344, |
| "objective/kl": 68.08780670166016, |
| "objective/non_score_reward": -6.808781147003174, |
| "objective/rlhf_reward": -22.83512411117554, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 380.79705810546875, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.778990626335144, |
| "step": 166, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.0007405281066895 |
| }, |
| { |
| "episode": 2688, |
| "epoch": 0.07373676414110934, |
| "loss/policy_avg": 0.30001240968704224, |
| "lr": 2.5605263157894737e-05, |
| "objective/entropy": -105.16179656982422, |
| "objective/kl": 41.9052619934082, |
| "objective/non_score_reward": -4.190526008605957, |
| "objective/rlhf_reward": -16.362104749679567, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 3.618799924850464, |
| "policy/clipfrac_avg": 1.5, |
| "policy/entropy_avg": 0.7631393074989319, |
| "step": 167, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.0000078678131104 |
| }, |
| { |
| "episode": 2704, |
| "epoch": 0.07417567345147309, |
| "loss/policy_avg": 0.49671614170074463, |
| "lr": 2.557894736842105e-05, |
| "objective/entropy": -53.44061279296875, |
| "objective/kl": 57.806434631347656, |
| "objective/non_score_reward": -5.780643463134766, |
| "objective/rlhf_reward": -18.722572898864748, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 6.265882968902588, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.7590473294258118, |
| "step": 168, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9990757703781128 |
| }, |
| { |
| "episode": 2720, |
| "epoch": 0.07461458276183684, |
| "loss/policy_avg": 1.5006479024887085, |
| "lr": 2.555263157894737e-05, |
| "objective/entropy": -62.24983596801758, |
| "objective/kl": 45.70757293701172, |
| "objective/non_score_reward": -4.570757865905762, |
| "objective/rlhf_reward": -13.883030509948732, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 1.5961132049560547, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.7392877340316772, |
| "step": 169, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9993603229522705 |
| }, |
| { |
| "episode": 2736, |
| "epoch": 0.07505349207220058, |
| "loss/policy_avg": -0.012972153723239899, |
| "lr": 2.5526315789473685e-05, |
| "objective/entropy": -65.46833801269531, |
| "objective/kl": 66.47633361816406, |
| "objective/non_score_reward": -6.647633075714111, |
| "objective/rlhf_reward": -22.190532779693605, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 3.807072162628174, |
| "policy/clipfrac_avg": 0.75, |
| "policy/entropy_avg": 0.64030921459198, |
| "step": 170, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 5, |
| "val/ratio": 2.000433921813965 |
| }, |
| { |
| "episode": 2752, |
| "epoch": 0.07549240138256433, |
| "loss/policy_avg": 0.2781289219856262, |
| "lr": 2.55e-05, |
| "objective/entropy": 16.252883911132812, |
| "objective/kl": 39.9376106262207, |
| "objective/non_score_reward": -3.9937610626220703, |
| "objective/rlhf_reward": -15.575044488906862, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 123.21941375732422, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.7487983107566833, |
| "step": 171, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 3, |
| "val/ratio": 1.999727487564087 |
| }, |
| { |
| "episode": 2768, |
| "epoch": 0.07593131069292808, |
| "loss/policy_avg": 0.4936387538909912, |
| "lr": 2.5473684210526315e-05, |
| "objective/entropy": -25.21156883239746, |
| "objective/kl": 54.01347351074219, |
| "objective/non_score_reward": -5.4013471603393555, |
| "objective/rlhf_reward": -17.20538911819458, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 8.337639808654785, |
| "policy/clipfrac_avg": 0.75, |
| "policy/entropy_avg": 0.828056812286377, |
| "step": 172, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9998791217803955 |
| }, |
| { |
| "episode": 2784, |
| "epoch": 0.07637022000329181, |
| "loss/policy_avg": 0.9954125881195068, |
| "lr": 2.544736842105263e-05, |
| "objective/entropy": -17.563995361328125, |
| "objective/kl": 53.30859375, |
| "objective/non_score_reward": -5.330860137939453, |
| "objective/rlhf_reward": -20.923440074920656, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 3.786358118057251, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.7179369926452637, |
| "step": 173, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9982266426086426 |
| }, |
| { |
| "episode": 2800, |
| "epoch": 0.07680912931365556, |
| "loss/policy_avg": 0.03012019395828247, |
| "lr": 2.5421052631578948e-05, |
| "objective/entropy": 5.954471588134766, |
| "objective/kl": 54.192142486572266, |
| "objective/non_score_reward": -5.419214248657227, |
| "objective/rlhf_reward": -21.27685651779175, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 13.544754028320312, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.8271291255950928, |
| "step": 174, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.997292399406433 |
| }, |
| { |
| "episode": 2816, |
| "epoch": 0.07724803862401931, |
| "loss/policy_avg": -0.3252931833267212, |
| "lr": 2.5394736842105263e-05, |
| "objective/entropy": -62.24835205078125, |
| "objective/kl": 52.21865463256836, |
| "objective/non_score_reward": -5.221865653991699, |
| "objective/rlhf_reward": -16.48746213912964, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 83.18733215332031, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.8194577693939209, |
| "step": 175, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9963982105255127 |
| }, |
| { |
| "episode": 2832, |
| "epoch": 0.07768694793438306, |
| "loss/policy_avg": -0.061586543917655945, |
| "lr": 2.5368421052631578e-05, |
| "objective/entropy": -232.89175415039062, |
| "objective/kl": 31.382434844970703, |
| "objective/non_score_reward": -3.1382434368133545, |
| "objective/rlhf_reward": -14.552973747253418, |
| "objective/scores": -0.5, |
| "policy/approxkl_avg": 1.4224392175674438, |
| "policy/clipfrac_avg": 0.5, |
| "policy/entropy_avg": 0.638608455657959, |
| "step": 176, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.998449683189392 |
| }, |
| { |
| "episode": 2848, |
| "epoch": 0.0781258572447468, |
| "loss/policy_avg": 2.2170538902282715, |
| "lr": 2.5342105263157893e-05, |
| "objective/entropy": 50.82858657836914, |
| "objective/kl": 76.44434356689453, |
| "objective/non_score_reward": -7.644434928894043, |
| "objective/rlhf_reward": -26.177738761901857, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 268.76092529296875, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.7145341634750366, |
| "step": 177, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.000556707382202 |
| }, |
| { |
| "episode": 2864, |
| "epoch": 0.07856476655511055, |
| "loss/policy_avg": 0.03309518098831177, |
| "lr": 2.531578947368421e-05, |
| "objective/entropy": -38.59127426147461, |
| "objective/kl": 41.92137908935547, |
| "objective/non_score_reward": -4.192137718200684, |
| "objective/rlhf_reward": -14.645845117346319, |
| "objective/scores": 0.5306765580733931, |
| "policy/approxkl_avg": 2.4393868446350098, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.7346012592315674, |
| "step": 178, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 5, |
| "val/ratio": 2.000610828399658 |
| }, |
| { |
| "episode": 2880, |
| "epoch": 0.0790036758654743, |
| "loss/policy_avg": -0.08147536963224411, |
| "lr": 2.5289473684210526e-05, |
| "objective/entropy": 22.475852966308594, |
| "objective/kl": 39.314239501953125, |
| "objective/non_score_reward": -3.931424379348755, |
| "objective/rlhf_reward": -17.725696563720703, |
| "objective/scores": -0.5, |
| "policy/approxkl_avg": 3.216597080230713, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.6433309316635132, |
| "step": 179, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9997375011444092 |
| }, |
| { |
| "episode": 2896, |
| "epoch": 0.07944258517583805, |
| "loss/policy_avg": 1.5342857837677002, |
| "lr": 2.526315789473684e-05, |
| "objective/entropy": 12.662029266357422, |
| "objective/kl": 44.605552673339844, |
| "objective/non_score_reward": -4.460555553436279, |
| "objective/rlhf_reward": -17.44222221374512, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 5.570178031921387, |
| "policy/clipfrac_avg": 0.75, |
| "policy/entropy_avg": 0.9360150098800659, |
| "step": 180, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9990198612213135 |
| }, |
| { |
| "episode": 2912, |
| "epoch": 0.07988149448620178, |
| "loss/policy_avg": 1.0636322498321533, |
| "lr": 2.5236842105263156e-05, |
| "objective/entropy": -20.420055389404297, |
| "objective/kl": 43.089805603027344, |
| "objective/non_score_reward": -4.308980941772461, |
| "objective/rlhf_reward": -12.835923767089845, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 8.459303855895996, |
| "policy/clipfrac_avg": 0.5, |
| "policy/entropy_avg": 0.6522048711776733, |
| "step": 181, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.998940348625183 |
| }, |
| { |
| "episode": 2928, |
| "epoch": 0.08032040379656553, |
| "loss/policy_avg": 1.1111366748809814, |
| "lr": 2.521052631578947e-05, |
| "objective/entropy": -286.44207763671875, |
| "objective/kl": 25.992015838623047, |
| "objective/non_score_reward": -2.5992014408111572, |
| "objective/rlhf_reward": -5.9968057632446286, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 5.6512064933776855, |
| "policy/clipfrac_avg": 0.5, |
| "policy/entropy_avg": 0.7038178443908691, |
| "step": 182, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 7, |
| "val/ratio": 1.9989608526229858 |
| }, |
| { |
| "episode": 2944, |
| "epoch": 0.08075931310692928, |
| "loss/policy_avg": 0.3593832850456238, |
| "lr": 2.518421052631579e-05, |
| "objective/entropy": -23.417577743530273, |
| "objective/kl": 55.88574981689453, |
| "objective/non_score_reward": -5.58857536315918, |
| "objective/rlhf_reward": -17.954300975799562, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 7.021892070770264, |
| "policy/clipfrac_avg": 0.5, |
| "policy/entropy_avg": 0.6839828491210938, |
| "step": 183, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.998976230621338 |
| }, |
| { |
| "episode": 2960, |
| "epoch": 0.08119822241729302, |
| "loss/policy_avg": 1.0649125576019287, |
| "lr": 2.5157894736842108e-05, |
| "objective/entropy": -28.054828643798828, |
| "objective/kl": 52.482826232910156, |
| "objective/non_score_reward": -5.248283386230469, |
| "objective/rlhf_reward": -16.593132114410402, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 6.408839225769043, |
| "policy/clipfrac_avg": 0.75, |
| "policy/entropy_avg": 0.6410462856292725, |
| "step": 184, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 5, |
| "val/ratio": 1.9986329078674316 |
| }, |
| { |
| "episode": 2976, |
| "epoch": 0.08163713172765677, |
| "loss/policy_avg": 0.09679454565048218, |
| "lr": 2.5131578947368423e-05, |
| "objective/entropy": 16.912199020385742, |
| "objective/kl": 57.1039924621582, |
| "objective/non_score_reward": -5.710399627685547, |
| "objective/rlhf_reward": -18.441597557067873, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 1.548750638961792, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.7216329574584961, |
| "step": 185, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.000427722930908 |
| }, |
| { |
| "episode": 2992, |
| "epoch": 0.08207604103802052, |
| "loss/policy_avg": 2.137105941772461, |
| "lr": 2.5105263157894738e-05, |
| "objective/entropy": 4.954294204711914, |
| "objective/kl": 45.99482727050781, |
| "objective/non_score_reward": -4.599482536315918, |
| "objective/rlhf_reward": -13.997929906845094, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 2.8815433979034424, |
| "policy/clipfrac_avg": 0.75, |
| "policy/entropy_avg": 0.7191178798675537, |
| "step": 186, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9966540336608887 |
| }, |
| { |
| "episode": 3008, |
| "epoch": 0.08251495034838427, |
| "loss/policy_avg": 1.5447564125061035, |
| "lr": 2.5078947368421056e-05, |
| "objective/entropy": 72.04828643798828, |
| "objective/kl": 52.15823745727539, |
| "objective/non_score_reward": -5.215824127197266, |
| "objective/rlhf_reward": -16.463295555114748, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 2.3372530937194824, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.7812240123748779, |
| "step": 187, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 3, |
| "val/ratio": 1.9983248710632324 |
| }, |
| { |
| "episode": 3024, |
| "epoch": 0.082953859658748, |
| "loss/policy_avg": -0.10642924904823303, |
| "lr": 2.505263157894737e-05, |
| "objective/entropy": -7.903879165649414, |
| "objective/kl": 40.129364013671875, |
| "objective/non_score_reward": -4.012936115264893, |
| "objective/rlhf_reward": -11.651745176315309, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 2.9247307777404785, |
| "policy/clipfrac_avg": 1.5, |
| "policy/entropy_avg": 0.6996808052062988, |
| "step": 188, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.999596118927002 |
| }, |
| { |
| "episode": 3040, |
| "epoch": 0.08339276896911176, |
| "loss/policy_avg": -0.6931325793266296, |
| "lr": 2.5026315789473686e-05, |
| "objective/entropy": -112.98082733154297, |
| "objective/kl": 45.408485412597656, |
| "objective/non_score_reward": -4.540848255157471, |
| "objective/rlhf_reward": -13.7633939743042, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 85.60306549072266, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.9481509923934937, |
| "step": 189, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 5, |
| "val/ratio": 1.99983811378479 |
| }, |
| { |
| "episode": 3056, |
| "epoch": 0.0838316782794755, |
| "loss/policy_avg": 0.543519139289856, |
| "lr": 2.5e-05, |
| "objective/entropy": -18.81544303894043, |
| "objective/kl": 71.94432830810547, |
| "objective/non_score_reward": -7.194432735443115, |
| "objective/rlhf_reward": -24.37773141860962, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 1.9382588863372803, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.8083455562591553, |
| "step": 190, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9974308013916016 |
| }, |
| { |
| "episode": 3072, |
| "epoch": 0.08427058758983925, |
| "loss/policy_avg": 0.05518569424748421, |
| "lr": 2.4973684210526316e-05, |
| "objective/entropy": 60.97806167602539, |
| "objective/kl": 76.83258819580078, |
| "objective/non_score_reward": -7.6832594871521, |
| "objective/rlhf_reward": -26.3330379486084, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 0.5206363797187805, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.67827308177948, |
| "step": 191, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 3, |
| "val/ratio": 2.001342535018921 |
| }, |
| { |
| "episode": 3088, |
| "epoch": 0.08470949690020299, |
| "loss/policy_avg": 0.1450192928314209, |
| "lr": 2.4947368421052635e-05, |
| "objective/entropy": 81.1869125366211, |
| "objective/kl": 51.66636276245117, |
| "objective/non_score_reward": -5.1666364669799805, |
| "objective/rlhf_reward": -22.666545867919922, |
| "objective/scores": -0.5, |
| "policy/approxkl_avg": 125.81236267089844, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.8107389807701111, |
| "step": 192, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 2, |
| "val/ratio": 2.0004963874816895 |
| }, |
| { |
| "episode": 3104, |
| "epoch": 0.08514840621056674, |
| "loss/policy_avg": -0.575951099395752, |
| "lr": 2.492105263157895e-05, |
| "objective/entropy": -129.48512268066406, |
| "objective/kl": 38.8513298034668, |
| "objective/non_score_reward": -3.8851335048675537, |
| "objective/rlhf_reward": -11.140533542633058, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 232.32797241210938, |
| "policy/clipfrac_avg": 0.75, |
| "policy/entropy_avg": 0.6589675545692444, |
| "step": 193, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 5, |
| "val/ratio": 1.9972124099731445 |
| }, |
| { |
| "episode": 3120, |
| "epoch": 0.08558731552093049, |
| "loss/policy_avg": -0.8726249933242798, |
| "lr": 2.4894736842105264e-05, |
| "objective/entropy": -98.82183837890625, |
| "objective/kl": 38.976009368896484, |
| "objective/non_score_reward": -3.8976006507873535, |
| "objective/rlhf_reward": -11.190402841567995, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 231.56631469726562, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.7292333245277405, |
| "step": 194, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 5, |
| "val/ratio": 2.000427722930908 |
| }, |
| { |
| "episode": 3136, |
| "epoch": 0.08602622483129424, |
| "loss/policy_avg": -0.5889065861701965, |
| "lr": 2.486842105263158e-05, |
| "objective/entropy": -3.191420078277588, |
| "objective/kl": 56.826507568359375, |
| "objective/non_score_reward": -5.682650566101074, |
| "objective/rlhf_reward": -19.80688277328131, |
| "objective/scores": 0.7309297535714575, |
| "policy/approxkl_avg": 8.074029922485352, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.7311458587646484, |
| "step": 195, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.999056100845337 |
| }, |
| { |
| "episode": 3152, |
| "epoch": 0.08646513414165798, |
| "loss/policy_avg": 0.0854165256023407, |
| "lr": 2.4842105263157894e-05, |
| "objective/entropy": 110.20230102539062, |
| "objective/kl": 75.08110046386719, |
| "objective/non_score_reward": -7.508110523223877, |
| "objective/rlhf_reward": -25.632442569732667, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 1.1298476457595825, |
| "policy/clipfrac_avg": 0.75, |
| "policy/entropy_avg": 0.7107164263725281, |
| "step": 196, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 2, |
| "val/ratio": 1.9999678134918213 |
| }, |
| { |
| "episode": 3168, |
| "epoch": 0.08690404345202173, |
| "loss/policy_avg": 0.2902490496635437, |
| "lr": 2.4815789473684213e-05, |
| "objective/entropy": 72.5558853149414, |
| "objective/kl": 70.04791259765625, |
| "objective/non_score_reward": -7.004791736602783, |
| "objective/rlhf_reward": -23.619165992736818, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 1.1979445219039917, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.7555328607559204, |
| "step": 197, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.000892400741577 |
| }, |
| { |
| "episode": 3184, |
| "epoch": 0.08734295276238548, |
| "loss/policy_avg": -1.6277761459350586, |
| "lr": 2.4789473684210528e-05, |
| "objective/entropy": 7.621055603027344, |
| "objective/kl": 61.288002014160156, |
| "objective/non_score_reward": -6.128800868988037, |
| "objective/rlhf_reward": -26.51520347595215, |
| "objective/scores": -0.5, |
| "policy/approxkl_avg": 187.9342041015625, |
| "policy/clipfrac_avg": 1.5, |
| "policy/entropy_avg": 0.8816299438476562, |
| "step": 198, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 3, |
| "val/ratio": 1.9992430210113525 |
| }, |
| { |
| "episode": 3200, |
| "epoch": 0.08778186207274921, |
| "loss/policy_avg": 0.22336675226688385, |
| "lr": 2.4763157894736843e-05, |
| "objective/entropy": 140.80392456054688, |
| "objective/kl": 71.45215606689453, |
| "objective/non_score_reward": -7.145215034484863, |
| "objective/rlhf_reward": -24.180860137939455, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 0.8151704668998718, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.844842791557312, |
| "step": 199, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9999359846115112 |
| }, |
| { |
| "episode": 3216, |
| "epoch": 0.08822077138311296, |
| "loss/policy_avg": -0.7593020796775818, |
| "lr": 2.4736842105263158e-05, |
| "objective/entropy": -4.5475921630859375, |
| "objective/kl": 52.44508361816406, |
| "objective/non_score_reward": -5.244508743286133, |
| "objective/rlhf_reward": -16.57803592681885, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 52.09981155395508, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.8364603519439697, |
| "step": 200, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 5, |
| "val/ratio": 2.000638484954834 |
| }, |
| { |
| "episode": 3232, |
| "epoch": 0.08865968069347671, |
| "loss/policy_avg": 0.44816046953201294, |
| "lr": 2.4710526315789476e-05, |
| "objective/entropy": 92.43020629882812, |
| "objective/kl": 97.43000793457031, |
| "objective/non_score_reward": -9.743000984191895, |
| "objective/rlhf_reward": -34.572002506256105, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 7.917519569396973, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.6928500533103943, |
| "step": 201, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 3, |
| "val/ratio": 2.000586986541748 |
| }, |
| { |
| "episode": 3248, |
| "epoch": 0.08909859000384046, |
| "loss/policy_avg": -0.15180611610412598, |
| "lr": 2.468421052631579e-05, |
| "objective/entropy": 115.11419677734375, |
| "objective/kl": 68.31903076171875, |
| "objective/non_score_reward": -6.831902980804443, |
| "objective/rlhf_reward": -29.327611923217773, |
| "objective/scores": -0.5, |
| "policy/approxkl_avg": 1.6768676042556763, |
| "policy/clipfrac_avg": 1.5, |
| "policy/entropy_avg": 0.7814561128616333, |
| "step": 202, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 3, |
| "val/ratio": 2.0007996559143066 |
| }, |
| { |
| "episode": 3264, |
| "epoch": 0.0895374993142042, |
| "loss/policy_avg": 2.1507365703582764, |
| "lr": 2.4657894736842106e-05, |
| "objective/entropy": 131.67218017578125, |
| "objective/kl": 87.50892639160156, |
| "objective/non_score_reward": -8.750892639160156, |
| "objective/rlhf_reward": -34.60357151031494, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 9.72176742553711, |
| "policy/clipfrac_avg": 0.75, |
| "policy/entropy_avg": 0.8055911064147949, |
| "step": 203, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9976446628570557 |
| }, |
| { |
| "episode": 3280, |
| "epoch": 0.08997640862456795, |
| "loss/policy_avg": 0.9095442891120911, |
| "lr": 2.463157894736842e-05, |
| "objective/entropy": -201.7689208984375, |
| "objective/kl": 39.41615295410156, |
| "objective/non_score_reward": -3.941615581512451, |
| "objective/rlhf_reward": -17.766462326049805, |
| "objective/scores": -0.5, |
| "policy/approxkl_avg": 2.0174264907836914, |
| "policy/clipfrac_avg": 0.25, |
| "policy/entropy_avg": 0.743715763092041, |
| "step": 204, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9988057613372803 |
| }, |
| { |
| "episode": 3296, |
| "epoch": 0.0904153179349317, |
| "loss/policy_avg": 2.7806615829467773, |
| "lr": 2.4605263157894736e-05, |
| "objective/entropy": 70.67823028564453, |
| "objective/kl": 75.30030822753906, |
| "objective/non_score_reward": -7.530030250549316, |
| "objective/rlhf_reward": -25.720120763778688, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 23.9725399017334, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.8070772290229797, |
| "step": 205, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9965420961380005 |
| }, |
| { |
| "episode": 3312, |
| "epoch": 0.09085422724529545, |
| "loss/policy_avg": -0.4679667055606842, |
| "lr": 2.4578947368421054e-05, |
| "objective/entropy": -221.43614196777344, |
| "objective/kl": 38.17326736450195, |
| "objective/non_score_reward": -3.8173270225524902, |
| "objective/rlhf_reward": -10.86930856704712, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 7.382453918457031, |
| "policy/clipfrac_avg": 0.75, |
| "policy/entropy_avg": 0.7196519374847412, |
| "step": 206, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 6, |
| "val/ratio": 2.0000643730163574 |
| }, |
| { |
| "episode": 3328, |
| "epoch": 0.09129313655565918, |
| "loss/policy_avg": 0.7069023847579956, |
| "lr": 2.455263157894737e-05, |
| "objective/entropy": 46.485408782958984, |
| "objective/kl": 52.93321228027344, |
| "objective/non_score_reward": -5.29332160949707, |
| "objective/rlhf_reward": -20.773285484313966, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 0.7949851155281067, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.8590609431266785, |
| "step": 207, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.002488374710083 |
| }, |
| { |
| "episode": 3344, |
| "epoch": 0.09173204586602293, |
| "loss/policy_avg": -0.6623063683509827, |
| "lr": 2.4526315789473684e-05, |
| "objective/entropy": 45.29491424560547, |
| "objective/kl": 68.79707336425781, |
| "objective/non_score_reward": -6.879707336425781, |
| "objective/rlhf_reward": -27.118828868865968, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 9.599810600280762, |
| "policy/clipfrac_avg": 1.5, |
| "policy/entropy_avg": 0.7060703039169312, |
| "step": 208, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.0040082931518555 |
| }, |
| { |
| "episode": 3360, |
| "epoch": 0.09217095517638668, |
| "loss/policy_avg": 0.8098376989364624, |
| "lr": 2.45e-05, |
| "objective/entropy": 71.23324584960938, |
| "objective/kl": 51.93505096435547, |
| "objective/non_score_reward": -5.193504810333252, |
| "objective/rlhf_reward": -16.37401876449585, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 4.758597373962402, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.7107915878295898, |
| "step": 209, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 3, |
| "val/ratio": 1.996649980545044 |
| }, |
| { |
| "episode": 3376, |
| "epoch": 0.09260986448675043, |
| "loss/policy_avg": -0.045729756355285645, |
| "lr": 2.4473684210526318e-05, |
| "objective/entropy": 68.11517333984375, |
| "objective/kl": 71.54877471923828, |
| "objective/non_score_reward": -7.154877662658691, |
| "objective/rlhf_reward": -24.219510650634767, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 2.4835081100463867, |
| "policy/clipfrac_avg": 1.5, |
| "policy/entropy_avg": 0.938834547996521, |
| "step": 210, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.003654956817627 |
| }, |
| { |
| "episode": 3392, |
| "epoch": 0.09304877379711417, |
| "loss/policy_avg": -0.398199200630188, |
| "lr": 2.4447368421052633e-05, |
| "objective/entropy": 22.62654685974121, |
| "objective/kl": 52.52642059326172, |
| "objective/non_score_reward": -5.252641677856445, |
| "objective/rlhf_reward": -16.6105676651001, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 0.71232008934021, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.7540408372879028, |
| "step": 211, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.0004401206970215 |
| }, |
| { |
| "episode": 3408, |
| "epoch": 0.09348768310747792, |
| "loss/policy_avg": 0.09533184766769409, |
| "lr": 2.4421052631578948e-05, |
| "objective/entropy": 80.2608642578125, |
| "objective/kl": 69.30525207519531, |
| "objective/non_score_reward": -6.930525779724121, |
| "objective/rlhf_reward": -23.32210216522217, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 1.8454056978225708, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.9530247449874878, |
| "step": 212, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.998732566833496 |
| }, |
| { |
| "episode": 3424, |
| "epoch": 0.09392659241784167, |
| "loss/policy_avg": 0.5622943639755249, |
| "lr": 2.4394736842105262e-05, |
| "objective/entropy": 5.986083984375, |
| "objective/kl": 52.243106842041016, |
| "objective/non_score_reward": -5.224310398101807, |
| "objective/rlhf_reward": -18.77453631378797, |
| "objective/scores": 0.5306765580733931, |
| "policy/approxkl_avg": 185.1870880126953, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.9663141965866089, |
| "step": 213, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9961705207824707 |
| }, |
| { |
| "episode": 3440, |
| "epoch": 0.0943655017282054, |
| "loss/policy_avg": 0.1358652114868164, |
| "lr": 2.4368421052631577e-05, |
| "objective/entropy": 42.141456604003906, |
| "objective/kl": 73.0504150390625, |
| "objective/non_score_reward": -7.305041313171387, |
| "objective/rlhf_reward": -28.82016525268555, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 1.950047254562378, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.6339312791824341, |
| "step": 214, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.0002665519714355 |
| }, |
| { |
| "episode": 3456, |
| "epoch": 0.09480441103856915, |
| "loss/policy_avg": 0.0715370774269104, |
| "lr": 2.4342105263157896e-05, |
| "objective/entropy": 42.893985748291016, |
| "objective/kl": 65.23382568359375, |
| "objective/non_score_reward": -6.523383140563965, |
| "objective/rlhf_reward": -25.69353303909302, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 87.95655822753906, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.7504739165306091, |
| "step": 215, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9980251789093018 |
| }, |
| { |
| "episode": 3472, |
| "epoch": 0.0952433203489329, |
| "loss/policy_avg": 8.101757049560547, |
| "lr": 2.431578947368421e-05, |
| "objective/entropy": 85.4518051147461, |
| "objective/kl": 72.85520935058594, |
| "objective/non_score_reward": -7.285521030426025, |
| "objective/rlhf_reward": -31.1420841217041, |
| "objective/scores": -0.5, |
| "policy/approxkl_avg": 16.570175170898438, |
| "policy/clipfrac_avg": 0.5, |
| "policy/entropy_avg": 0.7926974296569824, |
| "step": 216, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 3, |
| "val/ratio": 2.0041887760162354 |
| }, |
| { |
| "episode": 3488, |
| "epoch": 0.09568222965929665, |
| "loss/policy_avg": 0.8208998441696167, |
| "lr": 2.4289473684210526e-05, |
| "objective/entropy": 54.86122131347656, |
| "objective/kl": 60.157432556152344, |
| "objective/non_score_reward": -6.015743732452393, |
| "objective/rlhf_reward": -19.662973976135255, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 1.3636417388916016, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.9655085206031799, |
| "step": 217, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.0007495880126953 |
| }, |
| { |
| "episode": 3504, |
| "epoch": 0.09612113896966039, |
| "loss/policy_avg": -0.5492388010025024, |
| "lr": 2.426315789473684e-05, |
| "objective/entropy": -81.31658935546875, |
| "objective/kl": 44.720726013183594, |
| "objective/non_score_reward": -4.472072601318359, |
| "objective/rlhf_reward": -17.48829040527344, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 60.92509078979492, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.843732476234436, |
| "step": 218, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 5, |
| "val/ratio": 1.9994643926620483 |
| }, |
| { |
| "episode": 3520, |
| "epoch": 0.09656004828002414, |
| "loss/policy_avg": 1.4506752490997314, |
| "lr": 2.4236842105263156e-05, |
| "objective/entropy": 45.367652893066406, |
| "objective/kl": 65.80734252929688, |
| "objective/non_score_reward": -6.580735206604004, |
| "objective/rlhf_reward": -21.922941303253175, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 1.5029066801071167, |
| "policy/clipfrac_avg": 0.5, |
| "policy/entropy_avg": 0.7406171560287476, |
| "step": 219, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9987130165100098 |
| }, |
| { |
| "episode": 3536, |
| "epoch": 0.09699895759038789, |
| "loss/policy_avg": 0.04402471333742142, |
| "lr": 2.4210526315789474e-05, |
| "objective/entropy": -187.66488647460938, |
| "objective/kl": 39.494014739990234, |
| "objective/non_score_reward": -3.949401378631592, |
| "objective/rlhf_reward": -15.397605514526369, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 0.20192278921604156, |
| "policy/clipfrac_avg": 0.5, |
| "policy/entropy_avg": 0.8446219563484192, |
| "step": 220, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.0014162063598633 |
| }, |
| { |
| "episode": 3552, |
| "epoch": 0.09743786690075164, |
| "loss/policy_avg": -0.6722557544708252, |
| "lr": 2.418421052631579e-05, |
| "objective/entropy": 132.76797485351562, |
| "objective/kl": 84.23394775390625, |
| "objective/non_score_reward": -8.423394203186035, |
| "objective/rlhf_reward": -33.293578958511354, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 120.05321502685547, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.9575395584106445, |
| "step": 221, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.997431755065918 |
| }, |
| { |
| "episode": 3568, |
| "epoch": 0.09787677621111537, |
| "loss/policy_avg": -0.47086307406425476, |
| "lr": 2.4157894736842104e-05, |
| "objective/entropy": -79.6368408203125, |
| "objective/kl": 54.54154968261719, |
| "objective/non_score_reward": -5.454154968261719, |
| "objective/rlhf_reward": -17.416619873046876, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 54.655574798583984, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.8305655717849731, |
| "step": 222, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 5, |
| "val/ratio": 1.997011661529541 |
| }, |
| { |
| "episode": 3584, |
| "epoch": 0.09831568552147912, |
| "loss/policy_avg": -0.0004381835460662842, |
| "lr": 2.413157894736842e-05, |
| "objective/entropy": 61.53660583496094, |
| "objective/kl": 74.82394409179688, |
| "objective/non_score_reward": -7.482394218444824, |
| "objective/rlhf_reward": -25.529577827453615, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 1.7358994483947754, |
| "policy/clipfrac_avg": 0.5, |
| "policy/entropy_avg": 0.8543496131896973, |
| "step": 223, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9997599124908447 |
| }, |
| { |
| "episode": 3600, |
| "epoch": 0.09875459483184287, |
| "loss/policy_avg": 0.07636167109012604, |
| "lr": 2.410526315789474e-05, |
| "objective/entropy": 142.87892150878906, |
| "objective/kl": 79.087890625, |
| "objective/non_score_reward": -7.90878963470459, |
| "objective/rlhf_reward": -27.235158061981203, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 7.760828971862793, |
| "policy/clipfrac_avg": 1.5, |
| "policy/entropy_avg": 0.9195695519447327, |
| "step": 224, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.0002522468566895 |
| }, |
| { |
| "episode": 3616, |
| "epoch": 0.09919350414220662, |
| "loss/policy_avg": 0.6181021928787231, |
| "lr": 2.4078947368421056e-05, |
| "objective/entropy": 55.48746871948242, |
| "objective/kl": 55.08421325683594, |
| "objective/non_score_reward": -5.508421421051025, |
| "objective/rlhf_reward": -21.633685207366945, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 2.1698455810546875, |
| "policy/clipfrac_avg": 0.75, |
| "policy/entropy_avg": 0.8132386207580566, |
| "step": 225, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9993584156036377 |
| }, |
| { |
| "episode": 3632, |
| "epoch": 0.09963241345257036, |
| "loss/policy_avg": 0.6648176908493042, |
| "lr": 2.405263157894737e-05, |
| "objective/entropy": -195.64773559570312, |
| "objective/kl": 44.247493743896484, |
| "objective/non_score_reward": -4.424749374389648, |
| "objective/rlhf_reward": -13.298997497558595, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 0.9296014904975891, |
| "policy/clipfrac_avg": 0.25, |
| "policy/entropy_avg": 0.8615956902503967, |
| "step": 226, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9994629621505737 |
| }, |
| { |
| "episode": 3648, |
| "epoch": 0.10007132276293411, |
| "loss/policy_avg": 1.097282886505127, |
| "lr": 2.4026315789473686e-05, |
| "objective/entropy": 28.437664031982422, |
| "objective/kl": 63.49753952026367, |
| "objective/non_score_reward": -6.3497538566589355, |
| "objective/rlhf_reward": -20.999015426635744, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 2.303382635116577, |
| "policy/clipfrac_avg": 0.75, |
| "policy/entropy_avg": 1.031259298324585, |
| "step": 227, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9980473518371582 |
| }, |
| { |
| "episode": 3664, |
| "epoch": 0.10051023207329786, |
| "loss/policy_avg": 0.3590712547302246, |
| "lr": 2.4e-05, |
| "objective/entropy": -5.089465141296387, |
| "objective/kl": 73.07501983642578, |
| "objective/non_score_reward": -7.307501792907715, |
| "objective/rlhf_reward": -24.830005264282228, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 3.681659698486328, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.8207137584686279, |
| "step": 228, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.0043158531188965 |
| }, |
| { |
| "episode": 3680, |
| "epoch": 0.1009491413836616, |
| "loss/policy_avg": -0.26667794585227966, |
| "lr": 2.397368421052632e-05, |
| "objective/entropy": 4.93023681640625, |
| "objective/kl": 48.020545959472656, |
| "objective/non_score_reward": -4.802054405212402, |
| "objective/rlhf_reward": -14.808218097686769, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 1.0555940866470337, |
| "policy/clipfrac_avg": 1.5, |
| "policy/entropy_avg": 1.0309653282165527, |
| "step": 229, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.000304698944092 |
| }, |
| { |
| "episode": 3696, |
| "epoch": 0.10138805069402534, |
| "loss/policy_avg": -1.0239133834838867, |
| "lr": 2.3947368421052634e-05, |
| "objective/entropy": -117.7593994140625, |
| "objective/kl": 66.29994201660156, |
| "objective/non_score_reward": -6.6299943923950195, |
| "objective/rlhf_reward": -22.11997756958008, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 126.52104187011719, |
| "policy/clipfrac_avg": 0.75, |
| "policy/entropy_avg": 0.9115900993347168, |
| "step": 230, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 5, |
| "val/ratio": 1.9966094493865967 |
| }, |
| { |
| "episode": 3712, |
| "epoch": 0.1018269600043891, |
| "loss/policy_avg": -0.3394533395767212, |
| "lr": 2.392105263157895e-05, |
| "objective/entropy": -26.78044891357422, |
| "objective/kl": 74.928466796875, |
| "objective/non_score_reward": -7.492847442626953, |
| "objective/rlhf_reward": -25.571388816833498, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 0.7065678834915161, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.8705970048904419, |
| "step": 231, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9999688863754272 |
| }, |
| { |
| "episode": 3728, |
| "epoch": 0.10226586931475284, |
| "loss/policy_avg": 0.10001493245363235, |
| "lr": 2.3894736842105264e-05, |
| "objective/entropy": -13.011394500732422, |
| "objective/kl": 45.67135238647461, |
| "objective/non_score_reward": -4.567135334014893, |
| "objective/rlhf_reward": -13.86854157447815, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 0.24752560257911682, |
| "policy/clipfrac_avg": 0.5, |
| "policy/entropy_avg": 0.810563325881958, |
| "step": 232, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.000312566757202 |
| }, |
| { |
| "episode": 3744, |
| "epoch": 0.10270477862511658, |
| "loss/policy_avg": 0.6616812944412231, |
| "lr": 2.386842105263158e-05, |
| "objective/entropy": 7.605260848999023, |
| "objective/kl": 51.76225280761719, |
| "objective/non_score_reward": -5.176225662231445, |
| "objective/rlhf_reward": -16.304901218414308, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 2.828341007232666, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 1.038901448249817, |
| "step": 233, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.0011584758758545 |
| }, |
| { |
| "episode": 3760, |
| "epoch": 0.10314368793548033, |
| "loss/policy_avg": 0.9933425784111023, |
| "lr": 2.3842105263157897e-05, |
| "objective/entropy": -255.31588745117188, |
| "objective/kl": 24.543170928955078, |
| "objective/non_score_reward": -2.454317092895508, |
| "objective/rlhf_reward": -8.336315753872753, |
| "objective/scores": 0.3702381544273198, |
| "policy/approxkl_avg": 3.459949493408203, |
| "policy/clipfrac_avg": 0.25, |
| "policy/entropy_avg": 0.9003795385360718, |
| "step": 234, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 6, |
| "val/ratio": 1.9975780248641968 |
| }, |
| { |
| "episode": 3776, |
| "epoch": 0.10358259724584408, |
| "loss/policy_avg": 2.148421049118042, |
| "lr": 2.3815789473684212e-05, |
| "objective/entropy": 90.86304473876953, |
| "objective/kl": 81.66354370117188, |
| "objective/non_score_reward": -8.166354179382324, |
| "objective/rlhf_reward": -32.265417671203615, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 23.5389347076416, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.9693481922149658, |
| "step": 235, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 3, |
| "val/ratio": 1.998524785041809 |
| }, |
| { |
| "episode": 3792, |
| "epoch": 0.10402150655620783, |
| "loss/policy_avg": 1.1006348133087158, |
| "lr": 2.3789473684210527e-05, |
| "objective/entropy": 13.458210945129395, |
| "objective/kl": 63.5147590637207, |
| "objective/non_score_reward": -6.351475715637207, |
| "objective/rlhf_reward": -25.005903816223146, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 0.5866320729255676, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.84203040599823, |
| "step": 236, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.0002055168151855 |
| }, |
| { |
| "episode": 3808, |
| "epoch": 0.10446041586657157, |
| "loss/policy_avg": 0.1537262499332428, |
| "lr": 2.3763157894736842e-05, |
| "objective/entropy": -280.88623046875, |
| "objective/kl": 25.253185272216797, |
| "objective/non_score_reward": -2.5253186225891113, |
| "objective/rlhf_reward": -5.701274490356445, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 0.43143346905708313, |
| "policy/clipfrac_avg": 0.5, |
| "policy/entropy_avg": 0.7017552256584167, |
| "step": 237, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 5, |
| "val/ratio": 2.0003671646118164 |
| }, |
| { |
| "episode": 3824, |
| "epoch": 0.10489932517693532, |
| "loss/policy_avg": -1.110314130783081, |
| "lr": 2.373684210526316e-05, |
| "objective/entropy": -110.31336975097656, |
| "objective/kl": 62.17368698120117, |
| "objective/non_score_reward": -6.217369079589844, |
| "objective/rlhf_reward": -20.469476318359376, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 41.373291015625, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.7687387466430664, |
| "step": 238, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 5, |
| "val/ratio": 1.9982497692108154 |
| }, |
| { |
| "episode": 3840, |
| "epoch": 0.10533823448729907, |
| "loss/policy_avg": 0.35918915271759033, |
| "lr": 2.3710526315789475e-05, |
| "objective/entropy": 8.541339874267578, |
| "objective/kl": 67.1448974609375, |
| "objective/non_score_reward": -6.714488983154297, |
| "objective/rlhf_reward": -26.457956886291505, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 3.951134204864502, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.8946194052696228, |
| "step": 239, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9990932941436768 |
| }, |
| { |
| "episode": 3856, |
| "epoch": 0.10577714379766281, |
| "loss/policy_avg": -0.18191561102867126, |
| "lr": 2.368421052631579e-05, |
| "objective/entropy": 26.545223236083984, |
| "objective/kl": 42.12986755371094, |
| "objective/non_score_reward": -4.212986946105957, |
| "objective/rlhf_reward": -12.451948022842407, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 2.1634092330932617, |
| "policy/clipfrac_avg": 1.75, |
| "policy/entropy_avg": 0.8075993657112122, |
| "step": 240, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.0009357929229736 |
| }, |
| { |
| "episode": 3872, |
| "epoch": 0.10621605310802655, |
| "loss/policy_avg": -0.21272988617420197, |
| "lr": 2.3657894736842105e-05, |
| "objective/entropy": -43.799278259277344, |
| "objective/kl": 57.01869583129883, |
| "objective/non_score_reward": -5.701869487762451, |
| "objective/rlhf_reward": -18.407477951049806, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 0.31846147775650024, |
| "policy/clipfrac_avg": 1.5, |
| "policy/entropy_avg": 0.9049383401870728, |
| "step": 241, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.00126051902771 |
| }, |
| { |
| "episode": 3888, |
| "epoch": 0.1066549624183903, |
| "loss/policy_avg": 0.45139366388320923, |
| "lr": 2.363157894736842e-05, |
| "objective/entropy": -8.316286087036133, |
| "objective/kl": 66.7007064819336, |
| "objective/non_score_reward": -6.670070648193359, |
| "objective/rlhf_reward": -22.280281162261964, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 3.046037197113037, |
| "policy/clipfrac_avg": 0.75, |
| "policy/entropy_avg": 0.8059285879135132, |
| "step": 242, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9966351985931396 |
| }, |
| { |
| "episode": 3904, |
| "epoch": 0.10709387172875405, |
| "loss/policy_avg": 0.02522527426481247, |
| "lr": 2.360526315789474e-05, |
| "objective/entropy": -59.79475402832031, |
| "objective/kl": 43.16062927246094, |
| "objective/non_score_reward": -4.316062927246094, |
| "objective/rlhf_reward": -16.864251947402956, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 0.9877862930297852, |
| "policy/clipfrac_avg": 0.75, |
| "policy/entropy_avg": 0.7408619523048401, |
| "step": 243, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9999268054962158 |
| }, |
| { |
| "episode": 3920, |
| "epoch": 0.10753278103911779, |
| "loss/policy_avg": -0.4486180543899536, |
| "lr": 2.3578947368421054e-05, |
| "objective/entropy": -58.7701530456543, |
| "objective/kl": 48.37897491455078, |
| "objective/non_score_reward": -4.837897300720215, |
| "objective/rlhf_reward": -14.95158920288086, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 108.83811950683594, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.847912073135376, |
| "step": 244, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 5, |
| "val/ratio": 1.9981459379196167 |
| }, |
| { |
| "episode": 3936, |
| "epoch": 0.10797169034948154, |
| "loss/policy_avg": 1.2412970066070557, |
| "lr": 2.355263157894737e-05, |
| "objective/entropy": -6.253645896911621, |
| "objective/kl": 60.986305236816406, |
| "objective/non_score_reward": -6.098630905151367, |
| "objective/rlhf_reward": -23.994523143768312, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 5.727560997009277, |
| "policy/clipfrac_avg": 0.5, |
| "policy/entropy_avg": 0.8397929668426514, |
| "step": 245, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.997018814086914 |
| }, |
| { |
| "episode": 3952, |
| "epoch": 0.10841059965984529, |
| "loss/policy_avg": 0.993728518486023, |
| "lr": 2.3526315789473684e-05, |
| "objective/entropy": -7.986781120300293, |
| "objective/kl": 75.30459594726562, |
| "objective/non_score_reward": -7.530459880828857, |
| "objective/rlhf_reward": -25.72183952331543, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 207.10345458984375, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.6667543649673462, |
| "step": 246, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9957817792892456 |
| }, |
| { |
| "episode": 3968, |
| "epoch": 0.10884950897020904, |
| "loss/policy_avg": -0.0747595876455307, |
| "lr": 2.3500000000000002e-05, |
| "objective/entropy": -87.14791870117188, |
| "objective/kl": 53.44728469848633, |
| "objective/non_score_reward": -5.344728469848633, |
| "objective/rlhf_reward": -20.978913164138795, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 0.7891916036605835, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.9153791069984436, |
| "step": 247, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9984283447265625 |
| }, |
| { |
| "episode": 3984, |
| "epoch": 0.10928841828057277, |
| "loss/policy_avg": 1.0588536262512207, |
| "lr": 2.3473684210526317e-05, |
| "objective/entropy": -253.19564819335938, |
| "objective/kl": 25.629247665405273, |
| "objective/non_score_reward": -2.562924861907959, |
| "objective/rlhf_reward": -9.851699447631837, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 0.8195428848266602, |
| "policy/clipfrac_avg": 0.75, |
| "policy/entropy_avg": 0.7758696675300598, |
| "step": 248, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 6, |
| "val/ratio": 2.0022389888763428 |
| }, |
| { |
| "episode": 4000, |
| "epoch": 0.10972732759093652, |
| "loss/policy_avg": 0.5859658718109131, |
| "lr": 2.3447368421052632e-05, |
| "objective/entropy": -90.75294494628906, |
| "objective/kl": 59.36740493774414, |
| "objective/non_score_reward": -5.936740875244141, |
| "objective/rlhf_reward": -23.346962547302248, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 0.5736149549484253, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.7139400243759155, |
| "step": 249, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 5, |
| "val/ratio": 2.002814292907715 |
| }, |
| { |
| "episode": 4016, |
| "epoch": 0.11016623690130027, |
| "loss/policy_avg": 1.2698756456375122, |
| "lr": 2.3421052631578947e-05, |
| "objective/entropy": 8.61981201171875, |
| "objective/kl": 45.71005630493164, |
| "objective/non_score_reward": -4.571005821228027, |
| "objective/rlhf_reward": -13.884023761749269, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 4.179641246795654, |
| "policy/clipfrac_avg": 0.25, |
| "policy/entropy_avg": 0.9620411396026611, |
| "step": 250, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9981392621994019 |
| }, |
| { |
| "episode": 4032, |
| "epoch": 0.11060514621166402, |
| "loss/policy_avg": -0.1636083722114563, |
| "lr": 2.3394736842105262e-05, |
| "objective/entropy": -46.70410919189453, |
| "objective/kl": 52.554317474365234, |
| "objective/non_score_reward": -5.25543212890625, |
| "objective/rlhf_reward": -16.621727204322816, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 185.56044006347656, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.7868727445602417, |
| "step": 251, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 5, |
| "val/ratio": 2.0018248558044434 |
| }, |
| { |
| "episode": 4048, |
| "epoch": 0.11104405552202776, |
| "loss/policy_avg": 0.20726332068443298, |
| "lr": 2.336842105263158e-05, |
| "objective/entropy": 42.283023834228516, |
| "objective/kl": 60.69983673095703, |
| "objective/non_score_reward": -6.06998348236084, |
| "objective/rlhf_reward": -23.879934883117677, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 299.7823486328125, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.8600380420684814, |
| "step": 252, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.001103401184082 |
| }, |
| { |
| "episode": 4064, |
| "epoch": 0.1114829648323915, |
| "loss/policy_avg": -0.7184450030326843, |
| "lr": 2.3342105263157895e-05, |
| "objective/entropy": -167.64480590820312, |
| "objective/kl": 39.79289245605469, |
| "objective/non_score_reward": -3.9792890548706055, |
| "objective/rlhf_reward": -13.517155742645265, |
| "objective/scores": 0.6, |
| "policy/approxkl_avg": 35.35560607910156, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.7276349663734436, |
| "step": 253, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 5, |
| "val/ratio": 2.0005733966827393 |
| }, |
| { |
| "episode": 4080, |
| "epoch": 0.11192187414275526, |
| "loss/policy_avg": 0.17252863943576813, |
| "lr": 2.331578947368421e-05, |
| "objective/entropy": -28.098251342773438, |
| "objective/kl": 77.68197631835938, |
| "objective/non_score_reward": -7.768197536468506, |
| "objective/rlhf_reward": -30.67278919219971, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 190.54446411132812, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.7322374582290649, |
| "step": 254, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.996679663658142 |
| }, |
| { |
| "episode": 4096, |
| "epoch": 0.11236078345311899, |
| "loss/policy_avg": 0.4479329288005829, |
| "lr": 2.3289473684210525e-05, |
| "objective/entropy": -36.64995193481445, |
| "objective/kl": 59.990604400634766, |
| "objective/non_score_reward": -5.999059677124023, |
| "objective/rlhf_reward": -21.07252112472174, |
| "objective/scores": 0.7309297535714575, |
| "policy/approxkl_avg": 2.55191969871521, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.8982864618301392, |
| "step": 255, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9995880126953125 |
| }, |
| { |
| "episode": 4112, |
| "epoch": 0.11279969276348274, |
| "loss/policy_avg": 0.10965825617313385, |
| "lr": 2.326315789473684e-05, |
| "objective/entropy": -107.83656311035156, |
| "objective/kl": 57.40251922607422, |
| "objective/non_score_reward": -5.740252494812012, |
| "objective/rlhf_reward": -18.56100950241089, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 0.5369773507118225, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.9820707440376282, |
| "step": 256, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 5, |
| "val/ratio": 1.9997625350952148 |
| }, |
| { |
| "episode": 4128, |
| "epoch": 0.11323860207384649, |
| "loss/policy_avg": 1.0212502479553223, |
| "lr": 2.323684210526316e-05, |
| "objective/entropy": -31.868837356567383, |
| "objective/kl": 72.33842468261719, |
| "objective/non_score_reward": -7.233841896057129, |
| "objective/rlhf_reward": -24.535368537902833, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 5.164497375488281, |
| "policy/clipfrac_avg": 0.75, |
| "policy/entropy_avg": 0.8438643217086792, |
| "step": 257, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9965769052505493 |
| }, |
| { |
| "episode": 4144, |
| "epoch": 0.11367751138421024, |
| "loss/policy_avg": 0.17471346259117126, |
| "lr": 2.3210526315789473e-05, |
| "objective/entropy": -30.013629913330078, |
| "objective/kl": 78.91902160644531, |
| "objective/non_score_reward": -7.891902923583984, |
| "objective/rlhf_reward": -31.16761121749878, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 4.068915367126465, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.7715609073638916, |
| "step": 258, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.999107003211975 |
| }, |
| { |
| "episode": 4160, |
| "epoch": 0.11411642069457398, |
| "loss/policy_avg": 0.03435403108596802, |
| "lr": 2.318421052631579e-05, |
| "objective/entropy": -285.7454833984375, |
| "objective/kl": 18.901094436645508, |
| "objective/non_score_reward": -1.8901095390319824, |
| "objective/rlhf_reward": -3.1604381561279293, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 0.1570599377155304, |
| "policy/clipfrac_avg": 0.25, |
| "policy/entropy_avg": 0.9358274936676025, |
| "step": 259, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 6, |
| "val/ratio": 2.000042676925659 |
| }, |
| { |
| "episode": 4176, |
| "epoch": 0.11455533000493773, |
| "loss/policy_avg": 0.253903329372406, |
| "lr": 2.3157894736842103e-05, |
| "objective/entropy": -43.559417724609375, |
| "objective/kl": 62.325313568115234, |
| "objective/non_score_reward": -6.2325310707092285, |
| "objective/rlhf_reward": -24.530125236511232, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 0.8882020711898804, |
| "policy/clipfrac_avg": 1.5, |
| "policy/entropy_avg": 1.0222952365875244, |
| "step": 260, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 5, |
| "val/ratio": 2.002045154571533 |
| }, |
| { |
| "episode": 4192, |
| "epoch": 0.11499423931530148, |
| "loss/policy_avg": -0.259467214345932, |
| "lr": 2.3131578947368422e-05, |
| "objective/entropy": -50.13906478881836, |
| "objective/kl": 56.37471008300781, |
| "objective/non_score_reward": -5.6374711990356445, |
| "objective/rlhf_reward": -22.149885749816896, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 1.73235023021698, |
| "policy/clipfrac_avg": 1.75, |
| "policy/entropy_avg": 0.9029992818832397, |
| "step": 261, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.000865936279297 |
| }, |
| { |
| "episode": 4208, |
| "epoch": 0.11543314862566523, |
| "loss/policy_avg": -0.007709167897701263, |
| "lr": 2.3105263157894737e-05, |
| "objective/entropy": -27.86676788330078, |
| "objective/kl": 55.32978439331055, |
| "objective/non_score_reward": -5.532978534698486, |
| "objective/rlhf_reward": -21.731915092468263, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 0.949405312538147, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.7955001592636108, |
| "step": 262, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.0025434494018555 |
| }, |
| { |
| "episode": 4224, |
| "epoch": 0.11587205793602896, |
| "loss/policy_avg": 0.07850819826126099, |
| "lr": 2.3078947368421052e-05, |
| "objective/entropy": -71.25007629394531, |
| "objective/kl": 45.41839599609375, |
| "objective/non_score_reward": -4.541839599609375, |
| "objective/rlhf_reward": -17.767357921600343, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 3.0701725482940674, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.690671980381012, |
| "step": 263, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.001460313796997 |
| }, |
| { |
| "episode": 4240, |
| "epoch": 0.11631096724639271, |
| "loss/policy_avg": -0.4324986934661865, |
| "lr": 2.3052631578947367e-05, |
| "objective/entropy": -28.658288955688477, |
| "objective/kl": 58.944786071777344, |
| "objective/non_score_reward": -5.894477844238281, |
| "objective/rlhf_reward": -23.177911853790285, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 3.1384832859039307, |
| "policy/clipfrac_avg": 1.5, |
| "policy/entropy_avg": 0.7700298428535461, |
| "step": 264, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.0013980865478516 |
| }, |
| { |
| "episode": 4256, |
| "epoch": 0.11674987655675646, |
| "loss/policy_avg": 0.03035491704940796, |
| "lr": 2.3026315789473685e-05, |
| "objective/entropy": -80.87973022460938, |
| "objective/kl": 47.47106170654297, |
| "objective/non_score_reward": -4.747106075286865, |
| "objective/rlhf_reward": -18.588424301147462, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 0.1451961100101471, |
| "policy/clipfrac_avg": 0.75, |
| "policy/entropy_avg": 0.7782449722290039, |
| "step": 265, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.0004281997680664 |
| }, |
| { |
| "episode": 4272, |
| "epoch": 0.11718878586712021, |
| "loss/policy_avg": 0.15458990633487701, |
| "lr": 2.3000000000000003e-05, |
| "objective/entropy": -125.90150451660156, |
| "objective/kl": 68.26533508300781, |
| "objective/non_score_reward": -6.826533317565918, |
| "objective/rlhf_reward": -22.90613374710083, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 2.2503981590270996, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.6700058579444885, |
| "step": 266, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 5, |
| "val/ratio": 1.9996408224105835 |
| }, |
| { |
| "episode": 4288, |
| "epoch": 0.11762769517748395, |
| "loss/policy_avg": -0.07002025842666626, |
| "lr": 2.297368421052632e-05, |
| "objective/entropy": -39.36412811279297, |
| "objective/kl": 64.72374725341797, |
| "objective/non_score_reward": -6.47237491607666, |
| "objective/rlhf_reward": -21.489498233795167, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 0.8438606262207031, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.7219110727310181, |
| "step": 267, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.002565860748291 |
| }, |
| { |
| "episode": 4304, |
| "epoch": 0.1180666044878477, |
| "loss/policy_avg": 2.583950996398926, |
| "lr": 2.2947368421052633e-05, |
| "objective/entropy": -39.74678421020508, |
| "objective/kl": 60.672142028808594, |
| "objective/non_score_reward": -6.067214012145996, |
| "objective/rlhf_reward": -19.868857002258302, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 2.409726142883301, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.7915390729904175, |
| "step": 268, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.0011162757873535 |
| }, |
| { |
| "episode": 4320, |
| "epoch": 0.11850551379821145, |
| "loss/policy_avg": -0.9337702989578247, |
| "lr": 2.292105263157895e-05, |
| "objective/entropy": -6.267377853393555, |
| "objective/kl": 55.53129959106445, |
| "objective/non_score_reward": -5.553130149841309, |
| "objective/rlhf_reward": -21.812520599365236, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 1.0464932918548584, |
| "policy/clipfrac_avg": 1.5, |
| "policy/entropy_avg": 0.8756027221679688, |
| "step": 269, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.0002498626708984 |
| }, |
| { |
| "episode": 4336, |
| "epoch": 0.11894442310857518, |
| "loss/policy_avg": 0.41067397594451904, |
| "lr": 2.2894736842105263e-05, |
| "objective/entropy": -10.552501678466797, |
| "objective/kl": 59.17110824584961, |
| "objective/non_score_reward": -5.917110443115234, |
| "objective/rlhf_reward": -19.268443202972414, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 0.47551509737968445, |
| "policy/clipfrac_avg": 0.75, |
| "policy/entropy_avg": 0.6988734006881714, |
| "step": 270, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.0003862380981445 |
| }, |
| { |
| "episode": 4352, |
| "epoch": 0.11938333241893893, |
| "loss/policy_avg": 0.19564735889434814, |
| "lr": 2.286842105263158e-05, |
| "objective/entropy": -27.893587112426758, |
| "objective/kl": 63.20972442626953, |
| "objective/non_score_reward": -6.320972442626953, |
| "objective/rlhf_reward": -20.883888816833498, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 1.4529670476913452, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.6984528303146362, |
| "step": 271, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.0030579566955566 |
| }, |
| { |
| "episode": 4368, |
| "epoch": 0.11982224172930268, |
| "loss/policy_avg": -0.5308524370193481, |
| "lr": 2.2842105263157897e-05, |
| "objective/entropy": -49.50511169433594, |
| "objective/kl": 54.02558135986328, |
| "objective/non_score_reward": -5.402558326721191, |
| "objective/rlhf_reward": -17.21023235321045, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 1.7996609210968018, |
| "policy/clipfrac_avg": 1.5, |
| "policy/entropy_avg": 0.7930437326431274, |
| "step": 272, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.001534938812256 |
| }, |
| { |
| "episode": 4384, |
| "epoch": 0.12026115103966643, |
| "loss/policy_avg": 0.7527059316635132, |
| "lr": 2.281578947368421e-05, |
| "objective/entropy": -54.087158203125, |
| "objective/kl": 74.53093719482422, |
| "objective/non_score_reward": -7.453094482421875, |
| "objective/rlhf_reward": -25.412376976013185, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 4.522714138031006, |
| "policy/clipfrac_avg": 0.5, |
| "policy/entropy_avg": 0.84335857629776, |
| "step": 273, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 5, |
| "val/ratio": 1.9970676898956299 |
| }, |
| { |
| "episode": 4400, |
| "epoch": 0.12070006035003017, |
| "loss/policy_avg": 2.354644536972046, |
| "lr": 2.2789473684210527e-05, |
| "objective/entropy": -57.67284393310547, |
| "objective/kl": 61.635345458984375, |
| "objective/non_score_reward": -6.163534164428711, |
| "objective/rlhf_reward": -20.25413808822632, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 3.8387179374694824, |
| "policy/clipfrac_avg": 0.5, |
| "policy/entropy_avg": 0.7114452123641968, |
| "step": 274, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 5, |
| "val/ratio": 1.9973610639572144 |
| }, |
| { |
| "episode": 4416, |
| "epoch": 0.12113896966039392, |
| "loss/policy_avg": 0.33613622188568115, |
| "lr": 2.2763157894736845e-05, |
| "objective/entropy": -16.483137130737305, |
| "objective/kl": 80.16790771484375, |
| "objective/non_score_reward": -8.016791343688965, |
| "objective/rlhf_reward": -30.119753668980536, |
| "objective/scores": 0.4868528072345416, |
| "policy/approxkl_avg": 384.392578125, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.7436220645904541, |
| "step": 275, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.999522089958191 |
| }, |
| { |
| "episode": 4432, |
| "epoch": 0.12157787897075767, |
| "loss/policy_avg": 1.4383784532546997, |
| "lr": 2.273684210526316e-05, |
| "objective/entropy": -57.518455505371094, |
| "objective/kl": 60.987579345703125, |
| "objective/non_score_reward": -6.098758220672607, |
| "objective/rlhf_reward": -22.27232569672254, |
| "objective/scores": 0.5306765580733931, |
| "policy/approxkl_avg": 4.061164855957031, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.6441831588745117, |
| "step": 276, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9998172521591187 |
| }, |
| { |
| "episode": 4448, |
| "epoch": 0.12201678828112142, |
| "loss/policy_avg": 1.3634085655212402, |
| "lr": 2.2710526315789475e-05, |
| "objective/entropy": -76.76998901367188, |
| "objective/kl": 48.89331817626953, |
| "objective/non_score_reward": -4.889332294464111, |
| "objective/rlhf_reward": -15.157328701019289, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 2.423219919204712, |
| "policy/clipfrac_avg": 0.75, |
| "policy/entropy_avg": 0.7695807814598083, |
| "step": 277, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9966275691986084 |
| }, |
| { |
| "episode": 4464, |
| "epoch": 0.12245569759148515, |
| "loss/policy_avg": 0.3571290373802185, |
| "lr": 2.268421052631579e-05, |
| "objective/entropy": -40.89424514770508, |
| "objective/kl": 55.5726432800293, |
| "objective/non_score_reward": -5.55726432800293, |
| "objective/rlhf_reward": -17.829056835174562, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 1.3838818073272705, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.7596747875213623, |
| "step": 278, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.000396728515625 |
| }, |
| { |
| "episode": 4480, |
| "epoch": 0.1228946069018489, |
| "loss/policy_avg": 0.5862584710121155, |
| "lr": 2.2657894736842105e-05, |
| "objective/entropy": -71.57767486572266, |
| "objective/kl": 64.86282348632812, |
| "objective/non_score_reward": -6.4862823486328125, |
| "objective/rlhf_reward": -21.545130825042726, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 0.7939838171005249, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.6686462163925171, |
| "step": 279, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.001072883605957 |
| }, |
| { |
| "episode": 4496, |
| "epoch": 0.12333351621221265, |
| "loss/policy_avg": -0.3135361969470978, |
| "lr": 2.2631578947368423e-05, |
| "objective/entropy": -197.05056762695312, |
| "objective/kl": 54.46437072753906, |
| "objective/non_score_reward": -5.446436882019043, |
| "objective/rlhf_reward": -17.385747528076173, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 128.93673706054688, |
| "policy/clipfrac_avg": 0.75, |
| "policy/entropy_avg": 0.8125574588775635, |
| "step": 280, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 5, |
| "val/ratio": 1.997973918914795 |
| }, |
| { |
| "episode": 4512, |
| "epoch": 0.1237724255225764, |
| "loss/policy_avg": 0.19447273015975952, |
| "lr": 2.2605263157894738e-05, |
| "objective/entropy": -70.06492614746094, |
| "objective/kl": 65.292724609375, |
| "objective/non_score_reward": -6.52927303314209, |
| "objective/rlhf_reward": -23.71709213256836, |
| "objective/scores": 0.6, |
| "policy/approxkl_avg": 4.314207553863525, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.8678346872329712, |
| "step": 281, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.997523546218872 |
| }, |
| { |
| "episode": 4528, |
| "epoch": 0.12421133483294014, |
| "loss/policy_avg": 1.5640308856964111, |
| "lr": 2.2578947368421053e-05, |
| "objective/entropy": 5.301499366760254, |
| "objective/kl": 56.73728942871094, |
| "objective/non_score_reward": -5.673728942871094, |
| "objective/rlhf_reward": -24.694915771484375, |
| "objective/scores": -0.5, |
| "policy/approxkl_avg": 221.279052734375, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.539535403251648, |
| "step": 282, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 2, |
| "val/ratio": 1.9981220960617065 |
| }, |
| { |
| "episode": 4544, |
| "epoch": 0.12465024414330389, |
| "loss/policy_avg": -0.15352113544940948, |
| "lr": 2.2552631578947368e-05, |
| "objective/entropy": -116.27902221679688, |
| "objective/kl": 70.43946838378906, |
| "objective/non_score_reward": -7.043946743011475, |
| "objective/rlhf_reward": -27.77578649520874, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 0.8236973881721497, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.5488464832305908, |
| "step": 283, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 6, |
| "val/ratio": 2.0008316040039062 |
| }, |
| { |
| "episode": 4560, |
| "epoch": 0.12508915345366764, |
| "loss/policy_avg": -0.3702731728553772, |
| "lr": 2.2526315789473686e-05, |
| "objective/entropy": -167.31336975097656, |
| "objective/kl": 39.96023178100586, |
| "objective/non_score_reward": -3.996023178100586, |
| "objective/rlhf_reward": -15.584092235565187, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 56.35317611694336, |
| "policy/clipfrac_avg": 0.75, |
| "policy/entropy_avg": 0.9425455331802368, |
| "step": 284, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9981685876846313 |
| }, |
| { |
| "episode": 4576, |
| "epoch": 0.1255280627640314, |
| "loss/policy_avg": 0.20314961671829224, |
| "lr": 2.25e-05, |
| "objective/entropy": -204.00357055664062, |
| "objective/kl": 47.920372009277344, |
| "objective/non_score_reward": -4.792037010192871, |
| "objective/rlhf_reward": -14.768148040771486, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 79.69387817382812, |
| "policy/clipfrac_avg": 0.75, |
| "policy/entropy_avg": 0.8037855625152588, |
| "step": 285, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 5, |
| "val/ratio": 1.9953657388687134 |
| }, |
| { |
| "episode": 4592, |
| "epoch": 0.12596697207439514, |
| "loss/policy_avg": 1.1937806606292725, |
| "lr": 2.2473684210526316e-05, |
| "objective/entropy": -74.75482177734375, |
| "objective/kl": 61.91727828979492, |
| "objective/non_score_reward": -6.191727638244629, |
| "objective/rlhf_reward": -22.644205751196417, |
| "objective/scores": 0.5306765580733931, |
| "policy/approxkl_avg": 0.780440628528595, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.9212397933006287, |
| "step": 286, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 5, |
| "val/ratio": 2.000117540359497 |
| }, |
| { |
| "episode": 4608, |
| "epoch": 0.12640588138475886, |
| "loss/policy_avg": -0.05165944993495941, |
| "lr": 2.244736842105263e-05, |
| "objective/entropy": -65.29678344726562, |
| "objective/kl": 58.81901168823242, |
| "objective/non_score_reward": -5.881901264190674, |
| "objective/rlhf_reward": -19.127605056762697, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 1.41273033618927, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.5402939915657043, |
| "step": 287, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.998548984527588 |
| }, |
| { |
| "episode": 4624, |
| "epoch": 0.1268447906951226, |
| "loss/policy_avg": 0.4261413514614105, |
| "lr": 2.2421052631578946e-05, |
| "objective/entropy": -260.0628662109375, |
| "objective/kl": 54.19302749633789, |
| "objective/non_score_reward": -5.419302940368652, |
| "objective/rlhf_reward": -21.27721176147461, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 1.7060222625732422, |
| "policy/clipfrac_avg": 0.75, |
| "policy/entropy_avg": 0.6860594749450684, |
| "step": 288, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 6, |
| "val/ratio": 2.0012943744659424 |
| }, |
| { |
| "episode": 4640, |
| "epoch": 0.12728370000548636, |
| "loss/policy_avg": 0.21566912531852722, |
| "lr": 2.2394736842105265e-05, |
| "objective/entropy": -55.46482849121094, |
| "objective/kl": 62.80320739746094, |
| "objective/non_score_reward": -6.28032112121582, |
| "objective/rlhf_reward": -24.721283531188966, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 1.185427188873291, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.9172104597091675, |
| "step": 289, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9994546175003052 |
| }, |
| { |
| "episode": 4656, |
| "epoch": 0.1277226093158501, |
| "loss/policy_avg": 0.4541619122028351, |
| "lr": 2.236842105263158e-05, |
| "objective/entropy": -61.278629302978516, |
| "objective/kl": 46.581844329833984, |
| "objective/non_score_reward": -4.658184051513672, |
| "objective/rlhf_reward": -14.232737159729005, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 2.593280076980591, |
| "policy/clipfrac_avg": 0.75, |
| "policy/entropy_avg": 0.6413055658340454, |
| "step": 290, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9979616403579712 |
| }, |
| { |
| "episode": 4672, |
| "epoch": 0.12816151862621386, |
| "loss/policy_avg": 2.0460634231567383, |
| "lr": 2.2342105263157895e-05, |
| "objective/entropy": -106.77190399169922, |
| "objective/kl": 54.43701934814453, |
| "objective/non_score_reward": -5.443702220916748, |
| "objective/rlhf_reward": -17.374808883666994, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 1.7687790393829346, |
| "policy/clipfrac_avg": 0.5, |
| "policy/entropy_avg": 0.7337887287139893, |
| "step": 291, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 5, |
| "val/ratio": 1.9968886375427246 |
| }, |
| { |
| "episode": 4688, |
| "epoch": 0.1286004279365776, |
| "loss/policy_avg": 0.13285712897777557, |
| "lr": 2.231578947368421e-05, |
| "objective/entropy": -54.86618423461914, |
| "objective/kl": 64.16263580322266, |
| "objective/non_score_reward": -6.416263580322266, |
| "objective/rlhf_reward": -21.265054798126222, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 1.0963797569274902, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.7351999282836914, |
| "step": 292, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 5, |
| "val/ratio": 1.9985344409942627 |
| }, |
| { |
| "episode": 4704, |
| "epoch": 0.12903933724694136, |
| "loss/policy_avg": 0.5066732168197632, |
| "lr": 2.2289473684210525e-05, |
| "objective/entropy": -5.0934295654296875, |
| "objective/kl": 55.561546325683594, |
| "objective/non_score_reward": -5.556154727935791, |
| "objective/rlhf_reward": -17.824618911743165, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 0.9651141166687012, |
| "policy/clipfrac_avg": 0.5, |
| "policy/entropy_avg": 0.736973762512207, |
| "step": 293, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9991990327835083 |
| }, |
| { |
| "episode": 4720, |
| "epoch": 0.1294782465573051, |
| "loss/policy_avg": -0.49058061838150024, |
| "lr": 2.2263157894736843e-05, |
| "objective/entropy": -31.643150329589844, |
| "objective/kl": 91.68218994140625, |
| "objective/non_score_reward": -9.168219566345215, |
| "objective/rlhf_reward": -32.27287635803223, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 50.992034912109375, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.8162568807601929, |
| "step": 294, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.000028133392334 |
| }, |
| { |
| "episode": 4736, |
| "epoch": 0.12991715586766883, |
| "loss/policy_avg": 0.280429482460022, |
| "lr": 2.2236842105263158e-05, |
| "objective/entropy": -37.46349334716797, |
| "objective/kl": 56.00715637207031, |
| "objective/non_score_reward": -5.600715637207031, |
| "objective/rlhf_reward": -18.002863502502443, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 1.3156410455703735, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.8110268115997314, |
| "step": 295, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9986287355422974 |
| }, |
| { |
| "episode": 4752, |
| "epoch": 0.13035606517803258, |
| "loss/policy_avg": 0.46459612250328064, |
| "lr": 2.2210526315789473e-05, |
| "objective/entropy": -301.3935241699219, |
| "objective/kl": 36.17216110229492, |
| "objective/non_score_reward": -3.617216110229492, |
| "objective/rlhf_reward": -14.06886444091797, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 1.247771978378296, |
| "policy/clipfrac_avg": 0.5, |
| "policy/entropy_avg": 0.8073223233222961, |
| "step": 296, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 6, |
| "val/ratio": 1.9980659484863281 |
| }, |
| { |
| "episode": 4768, |
| "epoch": 0.13079497448839633, |
| "loss/policy_avg": -0.6589095592498779, |
| "lr": 2.2184210526315788e-05, |
| "objective/entropy": -87.08273315429688, |
| "objective/kl": 53.673248291015625, |
| "objective/non_score_reward": -5.3673248291015625, |
| "objective/rlhf_reward": -17.06929979324341, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 41.84059143066406, |
| "policy/clipfrac_avg": 0.75, |
| "policy/entropy_avg": 0.8697096705436707, |
| "step": 297, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 3, |
| "val/ratio": 1.999763011932373 |
| }, |
| { |
| "episode": 4784, |
| "epoch": 0.13123388379876008, |
| "loss/policy_avg": 0.5723211765289307, |
| "lr": 2.2157894736842106e-05, |
| "objective/entropy": -102.64724731445312, |
| "objective/kl": 60.75276184082031, |
| "objective/non_score_reward": -6.075276851654053, |
| "objective/rlhf_reward": -19.901106929779054, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 2.05747127532959, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.7324357032775879, |
| "step": 298, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9990253448486328 |
| }, |
| { |
| "episode": 4800, |
| "epoch": 0.13167279310912383, |
| "loss/policy_avg": 0.4579891562461853, |
| "lr": 2.213157894736842e-05, |
| "objective/entropy": -297.0541687011719, |
| "objective/kl": 35.25475311279297, |
| "objective/non_score_reward": -3.525475263595581, |
| "objective/rlhf_reward": -12.4977810717264, |
| "objective/scores": 0.40102999566398123, |
| "policy/approxkl_avg": 0.13317279517650604, |
| "policy/clipfrac_avg": 0.5, |
| "policy/entropy_avg": 0.7126107811927795, |
| "step": 299, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 7, |
| "val/ratio": 1.9996960163116455 |
| }, |
| { |
| "episode": 4816, |
| "epoch": 0.13211170241948758, |
| "loss/policy_avg": 5.555450439453125, |
| "lr": 2.2105263157894736e-05, |
| "objective/entropy": -8.623800277709961, |
| "objective/kl": 106.37024688720703, |
| "objective/non_score_reward": -10.637025833129883, |
| "objective/rlhf_reward": -42.148100471496576, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 105.52049255371094, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.7556277513504028, |
| "step": 300, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9987742900848389 |
| }, |
| { |
| "episode": 4832, |
| "epoch": 0.13255061172985133, |
| "loss/policy_avg": 1.14644193649292, |
| "lr": 2.207894736842105e-05, |
| "objective/entropy": -50.783111572265625, |
| "objective/kl": 64.76817321777344, |
| "objective/non_score_reward": -6.4768171310424805, |
| "objective/rlhf_reward": -21.507267570495607, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 0.36064863204956055, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.7552889585494995, |
| "step": 301, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.0014567375183105 |
| }, |
| { |
| "episode": 4848, |
| "epoch": 0.13298952104021505, |
| "loss/policy_avg": -1.4459398984909058, |
| "lr": 2.2052631578947366e-05, |
| "objective/entropy": -131.6910858154297, |
| "objective/kl": 60.966819763183594, |
| "objective/non_score_reward": -6.096682548522949, |
| "objective/rlhf_reward": -19.986729240417482, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 84.77787780761719, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.6685715913772583, |
| "step": 302, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 6, |
| "val/ratio": 1.9999291896820068 |
| }, |
| { |
| "episode": 4864, |
| "epoch": 0.1334284303505788, |
| "loss/policy_avg": 2.2287280559539795, |
| "lr": 2.2026315789473684e-05, |
| "objective/entropy": -86.05450439453125, |
| "objective/kl": 63.66607666015625, |
| "objective/non_score_reward": -6.366608619689941, |
| "objective/rlhf_reward": -25.06643376350403, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 118.51611328125, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.8448888063430786, |
| "step": 303, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 5, |
| "val/ratio": 1.9962316751480103 |
| }, |
| { |
| "episode": 4880, |
| "epoch": 0.13386733966094255, |
| "loss/policy_avg": -0.04208461940288544, |
| "lr": 2.2e-05, |
| "objective/entropy": -136.94601440429688, |
| "objective/kl": 46.04341506958008, |
| "objective/non_score_reward": -4.604341506958008, |
| "objective/rlhf_reward": -14.01736650466919, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 2.618884325027466, |
| "policy/clipfrac_avg": 1.75, |
| "policy/entropy_avg": 0.6870962381362915, |
| "step": 304, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 5, |
| "val/ratio": 2.000941038131714 |
| }, |
| { |
| "episode": 4896, |
| "epoch": 0.1343062489713063, |
| "loss/policy_avg": 0.08847332000732422, |
| "lr": 2.1973684210526314e-05, |
| "objective/entropy": -113.82402801513672, |
| "objective/kl": 73.18362426757812, |
| "objective/non_score_reward": -7.318362236022949, |
| "objective/rlhf_reward": -24.873450374603273, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 2.0231006145477295, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.8040682077407837, |
| "step": 305, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 5, |
| "val/ratio": 1.9995307922363281 |
| }, |
| { |
| "episode": 4912, |
| "epoch": 0.13474515828167005, |
| "loss/policy_avg": -0.6677903532981873, |
| "lr": 2.1947368421052633e-05, |
| "objective/entropy": -184.19952392578125, |
| "objective/kl": 45.321136474609375, |
| "objective/non_score_reward": -4.532113075256348, |
| "objective/rlhf_reward": -15.728453254699708, |
| "objective/scores": 0.6, |
| "policy/approxkl_avg": 19.28260040283203, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.8651016354560852, |
| "step": 306, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 6, |
| "val/ratio": 1.9984790086746216 |
| }, |
| { |
| "episode": 4928, |
| "epoch": 0.1351840675920338, |
| "loss/policy_avg": -0.6809933185577393, |
| "lr": 2.192105263157895e-05, |
| "objective/entropy": -110.36442565917969, |
| "objective/kl": 64.39363098144531, |
| "objective/non_score_reward": -6.4393630027771, |
| "objective/rlhf_reward": -21.3574520111084, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 1.8575007915496826, |
| "policy/clipfrac_avg": 1.5, |
| "policy/entropy_avg": 0.7239686250686646, |
| "step": 307, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 5, |
| "val/ratio": 1.9992657899856567 |
| }, |
| { |
| "episode": 4944, |
| "epoch": 0.13562297690239755, |
| "loss/policy_avg": 0.12154096364974976, |
| "lr": 2.1894736842105266e-05, |
| "objective/entropy": -269.3300476074219, |
| "objective/kl": 36.127044677734375, |
| "objective/non_score_reward": -3.6127047538757324, |
| "objective/rlhf_reward": -11.527099524379942, |
| "objective/scores": 0.7309297535714575, |
| "policy/approxkl_avg": 0.3908675014972687, |
| "policy/clipfrac_avg": 0.5, |
| "policy/entropy_avg": 0.8144656419754028, |
| "step": 308, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 6, |
| "val/ratio": 1.9992928504943848 |
| }, |
| { |
| "episode": 4960, |
| "epoch": 0.1360618862127613, |
| "loss/policy_avg": -0.45784494280815125, |
| "lr": 2.186842105263158e-05, |
| "objective/entropy": -53.075233459472656, |
| "objective/kl": 55.653831481933594, |
| "objective/non_score_reward": -5.565382957458496, |
| "objective/rlhf_reward": -17.861531829833986, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 2.0932250022888184, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.6408039331436157, |
| "step": 309, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9994187355041504 |
| }, |
| { |
| "episode": 4976, |
| "epoch": 0.13650079552312502, |
| "loss/policy_avg": 1.0032461881637573, |
| "lr": 2.1842105263157896e-05, |
| "objective/entropy": -32.92076873779297, |
| "objective/kl": 68.55870056152344, |
| "objective/non_score_reward": -6.855870246887207, |
| "objective/rlhf_reward": -27.02348051071167, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 3.289876699447632, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.8963852524757385, |
| "step": 310, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 5, |
| "val/ratio": 1.9983676671981812 |
| }, |
| { |
| "episode": 4992, |
| "epoch": 0.13693970483348877, |
| "loss/policy_avg": -0.9319555759429932, |
| "lr": 2.181578947368421e-05, |
| "objective/entropy": -155.17787170410156, |
| "objective/kl": 57.721900939941406, |
| "objective/non_score_reward": -5.772191047668457, |
| "objective/rlhf_reward": -18.68876419067383, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 104.70545959472656, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.9497686624526978, |
| "step": 311, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 6, |
| "val/ratio": 1.9971158504486084 |
| }, |
| { |
| "episode": 5008, |
| "epoch": 0.13737861414385252, |
| "loss/policy_avg": 4.808812141418457, |
| "lr": 2.178947368421053e-05, |
| "objective/entropy": 16.19580841064453, |
| "objective/kl": 73.25709533691406, |
| "objective/non_score_reward": -7.325709342956543, |
| "objective/rlhf_reward": -28.902838027477266, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 66.1017837524414, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.7183545827865601, |
| "step": 312, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 3, |
| "val/ratio": 1.9997210502624512 |
| }, |
| { |
| "episode": 5024, |
| "epoch": 0.13781752345421627, |
| "loss/policy_avg": -0.00417676568031311, |
| "lr": 2.1763157894736844e-05, |
| "objective/entropy": -36.66968536376953, |
| "objective/kl": 69.7296142578125, |
| "objective/non_score_reward": -6.972960948944092, |
| "objective/rlhf_reward": -24.968125258327696, |
| "objective/scores": 0.7309297535714575, |
| "policy/approxkl_avg": 0.3599999248981476, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.8020345568656921, |
| "step": 313, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.0006017684936523 |
| }, |
| { |
| "episode": 5040, |
| "epoch": 0.13825643276458002, |
| "loss/policy_avg": 1.2215163707733154, |
| "lr": 2.173684210526316e-05, |
| "objective/entropy": 31.91278648376465, |
| "objective/kl": 60.51493453979492, |
| "objective/non_score_reward": -6.0514936447143555, |
| "objective/rlhf_reward": -22.690202319415743, |
| "objective/scores": 0.37894294565112985, |
| "policy/approxkl_avg": 3.8936374187469482, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.9649239778518677, |
| "step": 314, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9966028928756714 |
| }, |
| { |
| "episode": 5056, |
| "epoch": 0.13869534207494377, |
| "loss/policy_avg": 0.34088611602783203, |
| "lr": 2.1710526315789474e-05, |
| "objective/entropy": -262.01800537109375, |
| "objective/kl": 25.127941131591797, |
| "objective/non_score_reward": -2.512794256210327, |
| "objective/rlhf_reward": -9.65117702484131, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 1.9674937725067139, |
| "policy/clipfrac_avg": 0.25, |
| "policy/entropy_avg": 1.1907947063446045, |
| "step": 315, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 7, |
| "val/ratio": 1.9991812705993652 |
| }, |
| { |
| "episode": 5072, |
| "epoch": 0.13913425138530752, |
| "loss/policy_avg": 0.8795095086097717, |
| "lr": 2.168421052631579e-05, |
| "objective/entropy": -25.965755462646484, |
| "objective/kl": 67.08163452148438, |
| "objective/non_score_reward": -6.708163738250732, |
| "objective/rlhf_reward": -26.43265542984009, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 1.6068958044052124, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.9191763401031494, |
| "step": 316, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9982707500457764 |
| }, |
| { |
| "episode": 5088, |
| "epoch": 0.13957316069567124, |
| "loss/policy_avg": 4.1575822830200195, |
| "lr": 2.1657894736842108e-05, |
| "objective/entropy": 75.68894958496094, |
| "objective/kl": 100.57203674316406, |
| "objective/non_score_reward": -10.057204246520996, |
| "objective/rlhf_reward": -42.228816986083984, |
| "objective/scores": -0.5, |
| "policy/approxkl_avg": 114.35267639160156, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.9920985698699951, |
| "step": 317, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 3, |
| "val/ratio": 1.9952189922332764 |
| }, |
| { |
| "episode": 5104, |
| "epoch": 0.140012070006035, |
| "loss/policy_avg": 5.1927056312561035, |
| "lr": 2.1631578947368423e-05, |
| "objective/entropy": -85.5977783203125, |
| "objective/kl": 47.872772216796875, |
| "objective/non_score_reward": -4.787276744842529, |
| "objective/rlhf_reward": -21.149106979370117, |
| "objective/scores": -0.5, |
| "policy/approxkl_avg": 68.31144714355469, |
| "policy/clipfrac_avg": 0.75, |
| "policy/entropy_avg": 0.7419743537902832, |
| "step": 318, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 3, |
| "val/ratio": 1.9981746673583984 |
| }, |
| { |
| "episode": 5120, |
| "epoch": 0.14045097931639874, |
| "loss/policy_avg": 0.17260417342185974, |
| "lr": 2.1605263157894738e-05, |
| "objective/entropy": 5.903343200683594, |
| "objective/kl": 58.757911682128906, |
| "objective/non_score_reward": -5.875791072845459, |
| "objective/rlhf_reward": -23.103164291381837, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 0.40057724714279175, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.9249540567398071, |
| "step": 319, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.001283645629883 |
| }, |
| { |
| "episode": 5136, |
| "epoch": 0.1408898886267625, |
| "loss/policy_avg": 0.7069985866546631, |
| "lr": 2.1578947368421053e-05, |
| "objective/entropy": -40.48529052734375, |
| "objective/kl": 66.60336303710938, |
| "objective/non_score_reward": -6.660336494445801, |
| "objective/rlhf_reward": -26.24134693145752, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 0.6525074243545532, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.6802330613136292, |
| "step": 320, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.0002126693725586 |
| }, |
| { |
| "episode": 5152, |
| "epoch": 0.14132879793712624, |
| "loss/policy_avg": 1.3065457344055176, |
| "lr": 2.155263157894737e-05, |
| "objective/entropy": -42.642120361328125, |
| "objective/kl": 65.40618896484375, |
| "objective/non_score_reward": -6.540618419647217, |
| "objective/rlhf_reward": -21.762474632263185, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 87.9366455078125, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.7430741190910339, |
| "step": 321, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9962996244430542 |
| }, |
| { |
| "episode": 5168, |
| "epoch": 0.14176770724749, |
| "loss/policy_avg": 1.3831791877746582, |
| "lr": 2.1526315789473686e-05, |
| "objective/entropy": -79.70892333984375, |
| "objective/kl": 57.3241081237793, |
| "objective/non_score_reward": -5.732410907745361, |
| "objective/rlhf_reward": -18.529643630981447, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 2.5671920776367188, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.7802742719650269, |
| "step": 322, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.002181053161621 |
| }, |
| { |
| "episode": 5184, |
| "epoch": 0.14220661655785374, |
| "loss/policy_avg": 0.1442442387342453, |
| "lr": 2.15e-05, |
| "objective/entropy": -68.2364273071289, |
| "objective/kl": 58.37102127075195, |
| "objective/non_score_reward": -5.837101936340332, |
| "objective/rlhf_reward": -18.948408222198488, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 1.7003833055496216, |
| "policy/clipfrac_avg": 1.5, |
| "policy/entropy_avg": 0.8646501302719116, |
| "step": 323, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.0002686977386475 |
| }, |
| { |
| "episode": 5200, |
| "epoch": 0.1426455258682175, |
| "loss/policy_avg": 0.2340829223394394, |
| "lr": 2.1473684210526316e-05, |
| "objective/entropy": 40.878807067871094, |
| "objective/kl": 64.59869384765625, |
| "objective/non_score_reward": -6.459869384765625, |
| "objective/rlhf_reward": -21.439478492736818, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 0.2884216904640198, |
| "policy/clipfrac_avg": 0.75, |
| "policy/entropy_avg": 0.8738260269165039, |
| "step": 324, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.000551223754883 |
| }, |
| { |
| "episode": 5216, |
| "epoch": 0.14308443517858122, |
| "loss/policy_avg": 1.2863520383834839, |
| "lr": 2.144736842105263e-05, |
| "objective/entropy": -44.597232818603516, |
| "objective/kl": 65.76396179199219, |
| "objective/non_score_reward": -6.5763959884643555, |
| "objective/rlhf_reward": -25.90558443069458, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 75.11996459960938, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.9290659427642822, |
| "step": 325, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9984740018844604 |
| }, |
| { |
| "episode": 5232, |
| "epoch": 0.14352334448894496, |
| "loss/policy_avg": -0.5511414408683777, |
| "lr": 2.142105263157895e-05, |
| "objective/entropy": -234.2386474609375, |
| "objective/kl": 39.21967315673828, |
| "objective/non_score_reward": -3.921967029571533, |
| "objective/rlhf_reward": -17.687868118286133, |
| "objective/scores": -0.5, |
| "policy/approxkl_avg": 0.12211395800113678, |
| "policy/clipfrac_avg": 0.75, |
| "policy/entropy_avg": 0.897687554359436, |
| "step": 326, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 5, |
| "val/ratio": 2.0014986991882324 |
| }, |
| { |
| "episode": 5248, |
| "epoch": 0.14396225379930871, |
| "loss/policy_avg": 0.07166372239589691, |
| "lr": 2.1394736842105264e-05, |
| "objective/entropy": -281.99237060546875, |
| "objective/kl": 26.73297691345215, |
| "objective/non_score_reward": -2.673297882080078, |
| "objective/rlhf_reward": -6.293191528320313, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 0.3802681267261505, |
| "policy/clipfrac_avg": 0.0, |
| "policy/entropy_avg": 0.7150620818138123, |
| "step": 327, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 6, |
| "val/ratio": 2.000077247619629 |
| }, |
| { |
| "episode": 5264, |
| "epoch": 0.14440116310967246, |
| "loss/policy_avg": 0.7263858318328857, |
| "lr": 2.136842105263158e-05, |
| "objective/entropy": -50.91541290283203, |
| "objective/kl": 56.45762634277344, |
| "objective/non_score_reward": -5.6457624435424805, |
| "objective/rlhf_reward": -18.18305025100708, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 3.6975631713867188, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.981597900390625, |
| "step": 328, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 5, |
| "val/ratio": 1.9971437454223633 |
| }, |
| { |
| "episode": 5280, |
| "epoch": 0.14484007242003621, |
| "loss/policy_avg": -0.8948829174041748, |
| "lr": 2.1342105263157894e-05, |
| "objective/entropy": -104.03028869628906, |
| "objective/kl": 64.45817565917969, |
| "objective/non_score_reward": -6.445817947387695, |
| "objective/rlhf_reward": -23.660566510931524, |
| "objective/scores": 0.5306765580733931, |
| "policy/approxkl_avg": 73.4510498046875, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.8826221227645874, |
| "step": 329, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 5, |
| "val/ratio": 1.9989889860153198 |
| }, |
| { |
| "episode": 5296, |
| "epoch": 0.14527898173039996, |
| "loss/policy_avg": -1.3438373804092407, |
| "lr": 2.1315789473684212e-05, |
| "objective/entropy": -59.31224060058594, |
| "objective/kl": 93.35623168945312, |
| "objective/non_score_reward": -9.335622787475586, |
| "objective/rlhf_reward": -36.94249258041381, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 14.088260650634766, |
| "policy/clipfrac_avg": 0.75, |
| "policy/entropy_avg": 0.9897010326385498, |
| "step": 330, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.0010061264038086 |
| }, |
| { |
| "episode": 5312, |
| "epoch": 0.14571789104076371, |
| "loss/policy_avg": 0.26144880056381226, |
| "lr": 2.1289473684210527e-05, |
| "objective/entropy": -51.645599365234375, |
| "objective/kl": 65.5647201538086, |
| "objective/non_score_reward": -6.556471824645996, |
| "objective/rlhf_reward": -21.825886821746828, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 1.4604711532592773, |
| "policy/clipfrac_avg": 0.5, |
| "policy/entropy_avg": 0.8496096134185791, |
| "step": 331, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.0012714862823486 |
| }, |
| { |
| "episode": 5328, |
| "epoch": 0.14615680035112744, |
| "loss/policy_avg": 1.6116310358047485, |
| "lr": 2.1263157894736842e-05, |
| "objective/entropy": -2.5801925659179688, |
| "objective/kl": 90.74581909179688, |
| "objective/non_score_reward": -9.074581146240234, |
| "objective/rlhf_reward": -31.898327445983888, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 161.30035400390625, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.8422311544418335, |
| "step": 332, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9967368841171265 |
| }, |
| { |
| "episode": 5344, |
| "epoch": 0.14659570966149119, |
| "loss/policy_avg": 0.12108969688415527, |
| "lr": 2.1236842105263157e-05, |
| "objective/entropy": -26.530567169189453, |
| "objective/kl": 51.3416748046875, |
| "objective/non_score_reward": -5.134167194366455, |
| "objective/rlhf_reward": -16.13666877746582, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 1.6582493782043457, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.8019247651100159, |
| "step": 333, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9982410669326782 |
| }, |
| { |
| "episode": 5360, |
| "epoch": 0.14703461897185494, |
| "loss/policy_avg": 0.024289570748806, |
| "lr": 2.1210526315789472e-05, |
| "objective/entropy": -281.92706298828125, |
| "objective/kl": 22.00769805908203, |
| "objective/non_score_reward": -2.2007696628570557, |
| "objective/rlhf_reward": -6.680372419134651, |
| "objective/scores": 0.5306765580733931, |
| "policy/approxkl_avg": 0.024459868669509888, |
| "policy/clipfrac_avg": 0.25, |
| "policy/entropy_avg": 0.8491325974464417, |
| "step": 334, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 6, |
| "val/ratio": 2.000946521759033 |
| }, |
| { |
| "episode": 5376, |
| "epoch": 0.14747352828221869, |
| "loss/policy_avg": 0.25369805097579956, |
| "lr": 2.118421052631579e-05, |
| "objective/entropy": -22.282852172851562, |
| "objective/kl": 61.02508544921875, |
| "objective/non_score_reward": -6.102509021759033, |
| "objective/rlhf_reward": -20.010036087036134, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 0.20567560195922852, |
| "policy/clipfrac_avg": 0.5, |
| "policy/entropy_avg": 0.861437201499939, |
| "step": 335, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.000655174255371 |
| }, |
| { |
| "episode": 5392, |
| "epoch": 0.14791243759258244, |
| "loss/policy_avg": -1.4644429683685303, |
| "lr": 2.1157894736842106e-05, |
| "objective/entropy": -44.415863037109375, |
| "objective/kl": 66.73587036132812, |
| "objective/non_score_reward": -6.673586368560791, |
| "objective/rlhf_reward": -22.294345951080324, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 34.78900146484375, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 1.028685212135315, |
| "step": 336, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9993712902069092 |
| }, |
| { |
| "episode": 5408, |
| "epoch": 0.14835134690294619, |
| "loss/policy_avg": 0.9412789344787598, |
| "lr": 2.113157894736842e-05, |
| "objective/entropy": -100.75498962402344, |
| "objective/kl": 66.7685775756836, |
| "objective/non_score_reward": -6.676857948303223, |
| "objective/rlhf_reward": -22.307430839538576, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 2.902991533279419, |
| "policy/clipfrac_avg": 0.75, |
| "policy/entropy_avg": 0.8672486543655396, |
| "step": 337, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9970085620880127 |
| }, |
| { |
| "episode": 5424, |
| "epoch": 0.14879025621330993, |
| "loss/policy_avg": -0.2066299468278885, |
| "lr": 2.1105263157894736e-05, |
| "objective/entropy": -302.25445556640625, |
| "objective/kl": 27.72308921813965, |
| "objective/non_score_reward": -2.7723090648651123, |
| "objective/rlhf_reward": -10.689236021041872, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 1.4395582675933838, |
| "policy/clipfrac_avg": 0.5, |
| "policy/entropy_avg": 0.6688382029533386, |
| "step": 338, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 7, |
| "val/ratio": 2.000229835510254 |
| }, |
| { |
| "episode": 5440, |
| "epoch": 0.14922916552367368, |
| "loss/policy_avg": 0.2728902995586395, |
| "lr": 2.107894736842105e-05, |
| "objective/entropy": -61.32304382324219, |
| "objective/kl": 47.950096130371094, |
| "objective/non_score_reward": -4.795009613037109, |
| "objective/rlhf_reward": -14.780039167404176, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 0.8936206102371216, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.9472643733024597, |
| "step": 339, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.0016930103302 |
| }, |
| { |
| "episode": 5456, |
| "epoch": 0.1496680748340374, |
| "loss/policy_avg": -0.455877423286438, |
| "lr": 2.105263157894737e-05, |
| "objective/entropy": -281.2245788574219, |
| "objective/kl": 46.91923522949219, |
| "objective/non_score_reward": -4.69192361831665, |
| "objective/rlhf_reward": -20.7676944732666, |
| "objective/scores": -0.5, |
| "policy/approxkl_avg": 2.1040773391723633, |
| "policy/clipfrac_avg": 0.5, |
| "policy/entropy_avg": 0.751963198184967, |
| "step": 340, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 6, |
| "val/ratio": 1.999725103378296 |
| }, |
| { |
| "episode": 5472, |
| "epoch": 0.15010698414440116, |
| "loss/policy_avg": 0.18102796375751495, |
| "lr": 2.1026315789473684e-05, |
| "objective/entropy": -39.01849365234375, |
| "objective/kl": 57.12322998046875, |
| "objective/non_score_reward": -5.712323188781738, |
| "objective/rlhf_reward": -18.449292278289796, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 0.9643039703369141, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.9084256887435913, |
| "step": 341, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.998090147972107 |
| }, |
| { |
| "episode": 5488, |
| "epoch": 0.1505458934547649, |
| "loss/policy_avg": -0.9690545201301575, |
| "lr": 2.1e-05, |
| "objective/entropy": 34.1061897277832, |
| "objective/kl": 62.09593200683594, |
| "objective/non_score_reward": -6.209592819213867, |
| "objective/rlhf_reward": -20.438370800018312, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 1.2767698764801025, |
| "policy/clipfrac_avg": 1.75, |
| "policy/entropy_avg": 0.8822450637817383, |
| "step": 342, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.0016417503356934 |
| }, |
| { |
| "episode": 5504, |
| "epoch": 0.15098480276512866, |
| "loss/policy_avg": -1.0985370874404907, |
| "lr": 2.0973684210526314e-05, |
| "objective/entropy": -98.5644302368164, |
| "objective/kl": 60.704673767089844, |
| "objective/non_score_reward": -6.070467948913574, |
| "objective/rlhf_reward": -23.881870365142824, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 60.376220703125, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.9388371706008911, |
| "step": 343, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 6, |
| "val/ratio": 2.000264883041382 |
| }, |
| { |
| "episode": 5520, |
| "epoch": 0.1514237120754924, |
| "loss/policy_avg": 0.04451874643564224, |
| "lr": 2.0947368421052632e-05, |
| "objective/entropy": -291.84930419921875, |
| "objective/kl": 31.81546401977539, |
| "objective/non_score_reward": -3.181546688079834, |
| "objective/rlhf_reward": -12.32618627548218, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 0.256332665681839, |
| "policy/clipfrac_avg": 0.5, |
| "policy/entropy_avg": 0.9044315814971924, |
| "step": 344, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 5, |
| "val/ratio": 1.999337911605835 |
| }, |
| { |
| "episode": 5536, |
| "epoch": 0.15186262138585616, |
| "loss/policy_avg": 0.33280524611473083, |
| "lr": 2.0921052631578947e-05, |
| "objective/entropy": -297.44451904296875, |
| "objective/kl": 35.726036071777344, |
| "objective/non_score_reward": -3.572603702545166, |
| "objective/rlhf_reward": -16.290414810180664, |
| "objective/scores": -0.5, |
| "policy/approxkl_avg": 0.19790010154247284, |
| "policy/clipfrac_avg": 0.0, |
| "policy/entropy_avg": 0.7775914072990417, |
| "step": 345, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 6, |
| "val/ratio": 2.0004537105560303 |
| }, |
| { |
| "episode": 5552, |
| "epoch": 0.1523015306962199, |
| "loss/policy_avg": 0.2544030249118805, |
| "lr": 2.0894736842105262e-05, |
| "objective/entropy": 17.410404205322266, |
| "objective/kl": 64.44038391113281, |
| "objective/non_score_reward": -6.444038391113281, |
| "objective/rlhf_reward": -21.37615261077881, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 0.9380027651786804, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.8260934352874756, |
| "step": 346, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9999771118164062 |
| }, |
| { |
| "episode": 5568, |
| "epoch": 0.15274044000658363, |
| "loss/policy_avg": 0.10114617645740509, |
| "lr": 2.086842105263158e-05, |
| "objective/entropy": -246.6290283203125, |
| "objective/kl": 27.598003387451172, |
| "objective/non_score_reward": -2.759800434112549, |
| "objective/rlhf_reward": -8.639202213287355, |
| "objective/scores": 0.6, |
| "policy/approxkl_avg": 0.4102931618690491, |
| "policy/clipfrac_avg": 0.5, |
| "policy/entropy_avg": 0.7962213754653931, |
| "step": 347, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 5, |
| "val/ratio": 1.999171257019043 |
| }, |
| { |
| "episode": 5584, |
| "epoch": 0.15317934931694738, |
| "loss/policy_avg": -0.27266669273376465, |
| "lr": 2.0842105263157895e-05, |
| "objective/entropy": -11.595855712890625, |
| "objective/kl": 87.985595703125, |
| "objective/non_score_reward": -8.798559188842773, |
| "objective/rlhf_reward": -30.794237232208253, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 1.9808310270309448, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.7044839262962341, |
| "step": 348, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.0007858276367188 |
| }, |
| { |
| "episode": 5600, |
| "epoch": 0.15361825862731113, |
| "loss/policy_avg": -0.88209068775177, |
| "lr": 2.0815789473684214e-05, |
| "objective/entropy": -190.73699951171875, |
| "objective/kl": 50.43879318237305, |
| "objective/non_score_reward": -5.04387903213501, |
| "objective/rlhf_reward": -15.77551612854004, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 156.59344482421875, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.677651047706604, |
| "step": 349, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 5, |
| "val/ratio": 1.9986798763275146 |
| }, |
| { |
| "episode": 5616, |
| "epoch": 0.15405716793767488, |
| "loss/policy_avg": 0.14822402596473694, |
| "lr": 2.078947368421053e-05, |
| "objective/entropy": -283.6138916015625, |
| "objective/kl": 20.200992584228516, |
| "objective/non_score_reward": -2.02009916305542, |
| "objective/rlhf_reward": -3.6803968906402584, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 1.2338528633117676, |
| "policy/clipfrac_avg": 0.5, |
| "policy/entropy_avg": 0.8573083877563477, |
| "step": 350, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 6, |
| "val/ratio": 2.0005390644073486 |
| }, |
| { |
| "episode": 5632, |
| "epoch": 0.15449607724803863, |
| "loss/policy_avg": 0.0547795295715332, |
| "lr": 2.0763157894736844e-05, |
| "objective/entropy": -284.02154541015625, |
| "objective/kl": 32.1505241394043, |
| "objective/non_score_reward": -3.215052604675293, |
| "objective/rlhf_reward": -8.460210895538331, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 0.7220900654792786, |
| "policy/clipfrac_avg": 0.5, |
| "policy/entropy_avg": 0.7125214338302612, |
| "step": 351, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 6, |
| "val/ratio": 1.999941110610962 |
| }, |
| { |
| "episode": 5648, |
| "epoch": 0.15493498655840238, |
| "loss/policy_avg": 0.5199474692344666, |
| "lr": 2.073684210526316e-05, |
| "objective/entropy": 1.3252010345458984, |
| "objective/kl": 80.30017852783203, |
| "objective/non_score_reward": -8.030017852783203, |
| "objective/rlhf_reward": -27.720070457458498, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 2.922848701477051, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.8046101331710815, |
| "step": 352, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 3, |
| "val/ratio": 2.0003271102905273 |
| }, |
| { |
| "episode": 5664, |
| "epoch": 0.15537389586876613, |
| "loss/policy_avg": 0.06510049104690552, |
| "lr": 2.0710526315789474e-05, |
| "objective/entropy": -4.9978132247924805, |
| "objective/kl": 74.41607666015625, |
| "objective/non_score_reward": -7.441607475280762, |
| "objective/rlhf_reward": -29.366430377960206, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 0.37788572907447815, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.9078077077865601, |
| "step": 353, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.0011749267578125 |
| }, |
| { |
| "episode": 5680, |
| "epoch": 0.15581280517912985, |
| "loss/policy_avg": -0.5775541663169861, |
| "lr": 2.0684210526315792e-05, |
| "objective/entropy": -262.08123779296875, |
| "objective/kl": 39.923824310302734, |
| "objective/non_score_reward": -3.992382526397705, |
| "objective/rlhf_reward": -15.569530105590822, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 1.2174054384231567, |
| "policy/clipfrac_avg": 0.5, |
| "policy/entropy_avg": 0.7082418203353882, |
| "step": 354, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 6, |
| "val/ratio": 2.0004000663757324 |
| }, |
| { |
| "episode": 5696, |
| "epoch": 0.1562517144894936, |
| "loss/policy_avg": -0.7071057558059692, |
| "lr": 2.0657894736842107e-05, |
| "objective/entropy": -120.38491821289062, |
| "objective/kl": 57.850921630859375, |
| "objective/non_score_reward": -5.785092353820801, |
| "objective/rlhf_reward": -21.53624895579012, |
| "objective/scores": 0.40102999566398123, |
| "policy/approxkl_avg": 30.95258331298828, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.7739803194999695, |
| "step": 355, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9983584880828857 |
| }, |
| { |
| "episode": 5712, |
| "epoch": 0.15669062379985735, |
| "loss/policy_avg": -0.019969038665294647, |
| "lr": 2.0631578947368422e-05, |
| "objective/entropy": -7.5037031173706055, |
| "objective/kl": 49.77817153930664, |
| "objective/non_score_reward": -4.977817058563232, |
| "objective/rlhf_reward": -15.51126847267151, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 4.679505825042725, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.7911888360977173, |
| "step": 356, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9971606731414795 |
| }, |
| { |
| "episode": 5728, |
| "epoch": 0.1571295331102211, |
| "loss/policy_avg": -0.15620523691177368, |
| "lr": 2.0605263157894737e-05, |
| "objective/entropy": -52.470314025878906, |
| "objective/kl": 50.46641540527344, |
| "objective/non_score_reward": -5.0466413497924805, |
| "objective/rlhf_reward": -15.786566829681398, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 0.52745521068573, |
| "policy/clipfrac_avg": 1.5, |
| "policy/entropy_avg": 0.7481960654258728, |
| "step": 357, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9998908042907715 |
| }, |
| { |
| "episode": 5744, |
| "epoch": 0.15756844242058485, |
| "loss/policy_avg": 0.10630068182945251, |
| "lr": 2.0578947368421055e-05, |
| "objective/entropy": -234.93450927734375, |
| "objective/kl": 25.975557327270508, |
| "objective/non_score_reward": -2.5975558757781982, |
| "objective/rlhf_reward": -5.990223503112793, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 0.9941245317459106, |
| "policy/clipfrac_avg": 0.5, |
| "policy/entropy_avg": 0.7744235396385193, |
| "step": 358, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 6, |
| "val/ratio": 1.9997868537902832 |
| }, |
| { |
| "episode": 5760, |
| "epoch": 0.1580073517309486, |
| "loss/policy_avg": 1.8786392211914062, |
| "lr": 2.055263157894737e-05, |
| "objective/entropy": -9.861099243164062, |
| "objective/kl": 97.31732940673828, |
| "objective/non_score_reward": -9.731733322143555, |
| "objective/rlhf_reward": -38.52693424224854, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 0.8872553110122681, |
| "policy/clipfrac_avg": 0.75, |
| "policy/entropy_avg": 0.6962687969207764, |
| "step": 359, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9990880489349365 |
| }, |
| { |
| "episode": 5776, |
| "epoch": 0.15844626104131235, |
| "loss/policy_avg": 0.09301671385765076, |
| "lr": 2.0526315789473685e-05, |
| "objective/entropy": 40.30408477783203, |
| "objective/kl": 54.91043472290039, |
| "objective/non_score_reward": -5.4910430908203125, |
| "objective/rlhf_reward": -17.564173793792726, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 1.559497356414795, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.8001800179481506, |
| "step": 360, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.998398780822754 |
| }, |
| { |
| "episode": 5792, |
| "epoch": 0.1588851703516761, |
| "loss/policy_avg": -0.1022261455655098, |
| "lr": 2.05e-05, |
| "objective/entropy": -116.18830108642578, |
| "objective/kl": 57.52555465698242, |
| "objective/non_score_reward": -5.752555847167969, |
| "objective/rlhf_reward": -18.61022243499756, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 0.282173216342926, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.720761239528656, |
| "step": 361, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 5, |
| "val/ratio": 2.001110076904297 |
| }, |
| { |
| "episode": 5808, |
| "epoch": 0.15932407966203982, |
| "loss/policy_avg": -1.0800527334213257, |
| "lr": 2.0473684210526315e-05, |
| "objective/entropy": -125.45774841308594, |
| "objective/kl": 58.299625396728516, |
| "objective/non_score_reward": -5.829962253570557, |
| "objective/rlhf_reward": -22.919849491119386, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 22.697772979736328, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.6915639638900757, |
| "step": 362, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.0005617141723633 |
| }, |
| { |
| "episode": 5824, |
| "epoch": 0.15976298897240357, |
| "loss/policy_avg": 0.93153977394104, |
| "lr": 2.0447368421052634e-05, |
| "objective/entropy": -43.68049621582031, |
| "objective/kl": 73.64720153808594, |
| "objective/non_score_reward": -7.364720821380615, |
| "objective/rlhf_reward": -25.058883285522462, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 0.5285642147064209, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.8717088103294373, |
| "step": 363, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.001168727874756 |
| }, |
| { |
| "episode": 5840, |
| "epoch": 0.16020189828276732, |
| "loss/policy_avg": -0.5737454295158386, |
| "lr": 2.042105263157895e-05, |
| "objective/entropy": -144.2489013671875, |
| "objective/kl": 56.47989273071289, |
| "objective/non_score_reward": -5.647989273071289, |
| "objective/rlhf_reward": -18.191957569122316, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 48.41154479980469, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.6994253993034363, |
| "step": 364, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 5, |
| "val/ratio": 2.0013179779052734 |
| }, |
| { |
| "episode": 5856, |
| "epoch": 0.16064080759313107, |
| "loss/policy_avg": -0.5543741583824158, |
| "lr": 2.0394736842105264e-05, |
| "objective/entropy": -153.12057495117188, |
| "objective/kl": 63.56580352783203, |
| "objective/non_score_reward": -6.356581211090088, |
| "objective/rlhf_reward": -25.026324844360353, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 32.669097900390625, |
| "policy/clipfrac_avg": 0.75, |
| "policy/entropy_avg": 0.7884219288825989, |
| "step": 365, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 5, |
| "val/ratio": 1.9991981983184814 |
| }, |
| { |
| "episode": 5872, |
| "epoch": 0.16107971690349482, |
| "loss/policy_avg": 0.06399692595005035, |
| "lr": 2.036842105263158e-05, |
| "objective/entropy": -10.777188301086426, |
| "objective/kl": 75.06257629394531, |
| "objective/non_score_reward": -7.506258487701416, |
| "objective/rlhf_reward": -29.625033473968507, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 0.7900074124336243, |
| "policy/clipfrac_avg": 0.75, |
| "policy/entropy_avg": 0.8327258229255676, |
| "step": 366, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9993977546691895 |
| }, |
| { |
| "episode": 5888, |
| "epoch": 0.16151862621385857, |
| "loss/policy_avg": 0.1322728395462036, |
| "lr": 2.0342105263157897e-05, |
| "objective/entropy": -25.25229835510254, |
| "objective/kl": 55.4914665222168, |
| "objective/non_score_reward": -5.54914665222168, |
| "objective/rlhf_reward": -17.79658708572388, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 0.7297406196594238, |
| "policy/clipfrac_avg": 0.75, |
| "policy/entropy_avg": 0.7416528463363647, |
| "step": 367, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9996728897094727 |
| }, |
| { |
| "episode": 5904, |
| "epoch": 0.16195753552422232, |
| "loss/policy_avg": 0.13267464935779572, |
| "lr": 2.0315789473684212e-05, |
| "objective/entropy": -74.82369995117188, |
| "objective/kl": 50.54753494262695, |
| "objective/non_score_reward": -5.054753303527832, |
| "objective/rlhf_reward": -19.819014644622804, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 1.003377914428711, |
| "policy/clipfrac_avg": 0.75, |
| "policy/entropy_avg": 0.7704557776451111, |
| "step": 368, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.002319574356079 |
| }, |
| { |
| "episode": 5920, |
| "epoch": 0.16239644483458604, |
| "loss/policy_avg": 0.1616273820400238, |
| "lr": 2.0289473684210527e-05, |
| "objective/entropy": -311.86871337890625, |
| "objective/kl": 34.5989990234375, |
| "objective/non_score_reward": -3.459900140762329, |
| "objective/rlhf_reward": -9.439600563049318, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 0.149490624666214, |
| "policy/clipfrac_avg": 0.25, |
| "policy/entropy_avg": 0.7689305543899536, |
| "step": 369, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 8, |
| "val/ratio": 2.000070810317993 |
| }, |
| { |
| "episode": 5936, |
| "epoch": 0.1628353541449498, |
| "loss/policy_avg": -0.322374165058136, |
| "lr": 2.0263157894736842e-05, |
| "objective/entropy": -186.23793029785156, |
| "objective/kl": 45.43787384033203, |
| "objective/non_score_reward": -4.543787002563477, |
| "objective/rlhf_reward": -13.775148963928224, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 36.16376495361328, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.8869510889053345, |
| "step": 370, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 5, |
| "val/ratio": 1.9983677864074707 |
| }, |
| { |
| "episode": 5952, |
| "epoch": 0.16327426345531354, |
| "loss/policy_avg": 0.10794153809547424, |
| "lr": 2.0236842105263157e-05, |
| "objective/entropy": -311.2743225097656, |
| "objective/kl": 34.24767303466797, |
| "objective/non_score_reward": -3.4247677326202393, |
| "objective/rlhf_reward": -15.699070930480957, |
| "objective/scores": -0.5, |
| "policy/approxkl_avg": 0.39599934220314026, |
| "policy/clipfrac_avg": 0.25, |
| "policy/entropy_avg": 0.7125707268714905, |
| "step": 371, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.0000691413879395 |
| }, |
| { |
| "episode": 5968, |
| "epoch": 0.1637131727656773, |
| "loss/policy_avg": -1.7316912412643433, |
| "lr": 2.0210526315789475e-05, |
| "objective/entropy": -150.6534423828125, |
| "objective/kl": 53.55550003051758, |
| "objective/non_score_reward": -5.3555498123168945, |
| "objective/rlhf_reward": -17.022200679779054, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 51.17546081542969, |
| "policy/clipfrac_avg": 1.5, |
| "policy/entropy_avg": 0.7921953797340393, |
| "step": 372, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 5, |
| "val/ratio": 1.9999009370803833 |
| }, |
| { |
| "episode": 5984, |
| "epoch": 0.16415208207604104, |
| "loss/policy_avg": -0.33673471212387085, |
| "lr": 2.018421052631579e-05, |
| "objective/entropy": -80.92591094970703, |
| "objective/kl": 67.56401062011719, |
| "objective/non_score_reward": -6.756401062011719, |
| "objective/rlhf_reward": -22.625604724884035, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 1.118476152420044, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.6927839517593384, |
| "step": 373, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 5, |
| "val/ratio": 1.9994900226593018 |
| }, |
| { |
| "episode": 6000, |
| "epoch": 0.1645909913864048, |
| "loss/policy_avg": -0.3108822703361511, |
| "lr": 2.0157894736842105e-05, |
| "objective/entropy": -145.98187255859375, |
| "objective/kl": 50.68169021606445, |
| "objective/non_score_reward": -5.068169116973877, |
| "objective/rlhf_reward": -15.872675991058351, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 41.044952392578125, |
| "policy/clipfrac_avg": 0.75, |
| "policy/entropy_avg": 0.9443225860595703, |
| "step": 374, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 5, |
| "val/ratio": 1.997762680053711 |
| }, |
| { |
| "episode": 6016, |
| "epoch": 0.16502990069676854, |
| "loss/policy_avg": 0.3729490339756012, |
| "lr": 2.013157894736842e-05, |
| "objective/entropy": -270.9046630859375, |
| "objective/kl": 54.419471740722656, |
| "objective/non_score_reward": -5.441946983337402, |
| "objective/rlhf_reward": -17.367788887023927, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 0.4963139295578003, |
| "policy/clipfrac_avg": 0.5, |
| "policy/entropy_avg": 0.8751029968261719, |
| "step": 375, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 6, |
| "val/ratio": 1.9989949464797974 |
| }, |
| { |
| "episode": 6032, |
| "epoch": 0.1654688100071323, |
| "loss/policy_avg": 0.10866791009902954, |
| "lr": 2.0105263157894735e-05, |
| "objective/entropy": -292.86962890625, |
| "objective/kl": 46.78014373779297, |
| "objective/non_score_reward": -4.678014278411865, |
| "objective/rlhf_reward": -20.71205711364746, |
| "objective/scores": -0.5, |
| "policy/approxkl_avg": 2.655266046524048, |
| "policy/clipfrac_avg": 0.5, |
| "policy/entropy_avg": 0.8966959714889526, |
| "step": 376, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 8, |
| "val/ratio": 2.0000298023223877 |
| }, |
| { |
| "episode": 6048, |
| "epoch": 0.165907719317496, |
| "loss/policy_avg": -0.5902384519577026, |
| "lr": 2.0078947368421053e-05, |
| "objective/entropy": -193.88943481445312, |
| "objective/kl": 53.728702545166016, |
| "objective/non_score_reward": -5.372870445251465, |
| "objective/rlhf_reward": -17.09148178100586, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 75.42146301269531, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.986204981803894, |
| "step": 377, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 5, |
| "val/ratio": 2.0006065368652344 |
| }, |
| { |
| "episode": 6064, |
| "epoch": 0.16634662862785976, |
| "loss/policy_avg": -0.6812242865562439, |
| "lr": 2.0052631578947368e-05, |
| "objective/entropy": -107.44984436035156, |
| "objective/kl": 87.9330062866211, |
| "objective/non_score_reward": -8.79330062866211, |
| "objective/rlhf_reward": -34.773203468322755, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 2.6081008911132812, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.7108145952224731, |
| "step": 378, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.0003364086151123 |
| }, |
| { |
| "episode": 6080, |
| "epoch": 0.1667855379382235, |
| "loss/policy_avg": 0.2358405739068985, |
| "lr": 2.0026315789473683e-05, |
| "objective/entropy": -33.92523956298828, |
| "objective/kl": 57.368141174316406, |
| "objective/non_score_reward": -5.736814022064209, |
| "objective/rlhf_reward": -18.547257041931154, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 0.742258608341217, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.7838228940963745, |
| "step": 379, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.0000722408294678 |
| }, |
| { |
| "episode": 6096, |
| "epoch": 0.16722444724858726, |
| "loss/policy_avg": 0.5731798410415649, |
| "lr": 1.9999999999999998e-05, |
| "objective/entropy": -114.15296936035156, |
| "objective/kl": 51.251705169677734, |
| "objective/non_score_reward": -5.125170707702637, |
| "objective/rlhf_reward": -16.10068235397339, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 0.509912371635437, |
| "policy/clipfrac_avg": 0.25, |
| "policy/entropy_avg": 0.7274678945541382, |
| "step": 380, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 5, |
| "val/ratio": 2.0001626014709473 |
| }, |
| { |
| "episode": 6112, |
| "epoch": 0.167663356558951, |
| "loss/policy_avg": 0.10960416495800018, |
| "lr": 1.9973684210526317e-05, |
| "objective/entropy": -75.13426208496094, |
| "objective/kl": 71.34457397460938, |
| "objective/non_score_reward": -7.134457588195801, |
| "objective/rlhf_reward": -28.137829875946046, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 1.02106511592865, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.8250123262405396, |
| "step": 381, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 5, |
| "val/ratio": 1.9984166622161865 |
| }, |
| { |
| "episode": 6128, |
| "epoch": 0.16810226586931476, |
| "loss/policy_avg": 1.2399119138717651, |
| "lr": 1.994736842105263e-05, |
| "objective/entropy": -34.498966217041016, |
| "objective/kl": 60.02134704589844, |
| "objective/non_score_reward": -6.002135276794434, |
| "objective/rlhf_reward": -21.08482113921759, |
| "objective/scores": 0.7309297535714575, |
| "policy/approxkl_avg": 1.7387114763259888, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.8664690256118774, |
| "step": 382, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.001873731613159 |
| }, |
| { |
| "episode": 6144, |
| "epoch": 0.1685411751796785, |
| "loss/policy_avg": 0.9964497089385986, |
| "lr": 1.9921052631578947e-05, |
| "objective/entropy": -114.55979919433594, |
| "objective/kl": 48.91990661621094, |
| "objective/non_score_reward": -4.891991138458252, |
| "objective/rlhf_reward": -19.16796455383301, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 0.6877788305282593, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.7417900562286377, |
| "step": 383, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9994585514068604 |
| }, |
| { |
| "episode": 6160, |
| "epoch": 0.16898008449004223, |
| "loss/policy_avg": -0.14047479629516602, |
| "lr": 1.989473684210526e-05, |
| "objective/entropy": -73.96463012695312, |
| "objective/kl": 70.61947631835938, |
| "objective/non_score_reward": -7.061947345733643, |
| "objective/rlhf_reward": -25.32407036864874, |
| "objective/scores": 0.7309297535714575, |
| "policy/approxkl_avg": 0.5930333733558655, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.8347901105880737, |
| "step": 384, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.0012753009796143 |
| }, |
| { |
| "episode": 6176, |
| "epoch": 0.16941899380040598, |
| "loss/policy_avg": -0.8877277374267578, |
| "lr": 1.9868421052631576e-05, |
| "objective/entropy": -26.141101837158203, |
| "objective/kl": 70.60355377197266, |
| "objective/non_score_reward": -7.0603556632995605, |
| "objective/rlhf_reward": -27.841422653198244, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 2.1280431747436523, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.8992083668708801, |
| "step": 385, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.0029306411743164 |
| }, |
| { |
| "episode": 6192, |
| "epoch": 0.16985790311076973, |
| "loss/policy_avg": -0.07291531562805176, |
| "lr": 1.9842105263157895e-05, |
| "objective/entropy": -157.21595764160156, |
| "objective/kl": 46.20996856689453, |
| "objective/non_score_reward": -4.620996475219727, |
| "objective/rlhf_reward": -15.560267840267393, |
| "objective/scores": 0.7309297535714575, |
| "policy/approxkl_avg": 82.55020141601562, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.8402323722839355, |
| "step": 386, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 5, |
| "val/ratio": 2.0009355545043945 |
| }, |
| { |
| "episode": 6208, |
| "epoch": 0.17029681242113348, |
| "loss/policy_avg": -0.2972855269908905, |
| "lr": 1.9815789473684213e-05, |
| "objective/entropy": -103.44124603271484, |
| "objective/kl": 63.92302703857422, |
| "objective/non_score_reward": -6.392302989959717, |
| "objective/rlhf_reward": -21.16921195983887, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 1.8578600883483887, |
| "policy/clipfrac_avg": 0.75, |
| "policy/entropy_avg": 0.6705944538116455, |
| "step": 387, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 5, |
| "val/ratio": 2.0004239082336426 |
| }, |
| { |
| "episode": 6224, |
| "epoch": 0.17073572173149723, |
| "loss/policy_avg": 0.29273730516433716, |
| "lr": 1.9789473684210528e-05, |
| "objective/entropy": -67.3672866821289, |
| "objective/kl": 58.002750396728516, |
| "objective/non_score_reward": -5.800274848937988, |
| "objective/rlhf_reward": -21.644841520991875, |
| "objective/scores": 0.38906482631788786, |
| "policy/approxkl_avg": 1.0682644844055176, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.7647286057472229, |
| "step": 388, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9988304376602173 |
| }, |
| { |
| "episode": 6240, |
| "epoch": 0.17117463104186098, |
| "loss/policy_avg": -0.1294967234134674, |
| "lr": 1.9763157894736843e-05, |
| "objective/entropy": -200.57763671875, |
| "objective/kl": 68.5810317993164, |
| "objective/non_score_reward": -6.858103275299072, |
| "objective/rlhf_reward": -25.91664179542893, |
| "objective/scores": 0.37894294565112985, |
| "policy/approxkl_avg": 55.38726806640625, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.7542761564254761, |
| "step": 389, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9999221563339233 |
| }, |
| { |
| "episode": 6256, |
| "epoch": 0.17161354035222473, |
| "loss/policy_avg": 0.58003830909729, |
| "lr": 1.9736842105263158e-05, |
| "objective/entropy": -17.135601043701172, |
| "objective/kl": 76.6408920288086, |
| "objective/non_score_reward": -7.664089679718018, |
| "objective/rlhf_reward": -30.256357288360597, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 0.9818647503852844, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.7245423793792725, |
| "step": 390, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.002509117126465 |
| }, |
| { |
| "episode": 6272, |
| "epoch": 0.17205244966258848, |
| "loss/policy_avg": -0.8786243796348572, |
| "lr": 1.9710526315789476e-05, |
| "objective/entropy": -170.98220825195312, |
| "objective/kl": 59.838829040527344, |
| "objective/non_score_reward": -5.983882904052734, |
| "objective/rlhf_reward": -25.935531616210938, |
| "objective/scores": -0.5, |
| "policy/approxkl_avg": 76.37075805664062, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.8124063014984131, |
| "step": 391, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9987341165542603 |
| }, |
| { |
| "episode": 6288, |
| "epoch": 0.1724913589729522, |
| "loss/policy_avg": 0.4738132357597351, |
| "lr": 1.968421052631579e-05, |
| "objective/entropy": -26.218795776367188, |
| "objective/kl": 57.189857482910156, |
| "objective/non_score_reward": -5.7189860343933105, |
| "objective/rlhf_reward": -18.475943660736085, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 0.8383350372314453, |
| "policy/clipfrac_avg": 0.5, |
| "policy/entropy_avg": 0.8988721966743469, |
| "step": 392, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.999671459197998 |
| }, |
| { |
| "episode": 6304, |
| "epoch": 0.17293026828331595, |
| "loss/policy_avg": 0.6148244738578796, |
| "lr": 1.9657894736842106e-05, |
| "objective/entropy": -279.04248046875, |
| "objective/kl": 27.996761322021484, |
| "objective/non_score_reward": -2.7996764183044434, |
| "objective/rlhf_reward": -13.198705673217773, |
| "objective/scores": -0.5, |
| "policy/approxkl_avg": 4.403238773345947, |
| "policy/clipfrac_avg": 0.5, |
| "policy/entropy_avg": 0.955359935760498, |
| "step": 393, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 6, |
| "val/ratio": 1.998351812362671 |
| }, |
| { |
| "episode": 6320, |
| "epoch": 0.1733691775936797, |
| "loss/policy_avg": 0.5641376972198486, |
| "lr": 1.963157894736842e-05, |
| "objective/entropy": -278.6837463378906, |
| "objective/kl": 31.391254425048828, |
| "objective/non_score_reward": -3.139125347137451, |
| "objective/rlhf_reward": -8.156501865386964, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 0.7701074481010437, |
| "policy/clipfrac_avg": 0.25, |
| "policy/entropy_avg": 0.9388406276702881, |
| "step": 394, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 7, |
| "val/ratio": 1.9990991353988647 |
| }, |
| { |
| "episode": 6336, |
| "epoch": 0.17380808690404345, |
| "loss/policy_avg": -0.5742035508155823, |
| "lr": 1.960526315789474e-05, |
| "objective/entropy": -31.725059509277344, |
| "objective/kl": 70.22592163085938, |
| "objective/non_score_reward": -7.022592544555664, |
| "objective/rlhf_reward": -25.166651640773985, |
| "objective/scores": 0.7309297535714575, |
| "policy/approxkl_avg": 1.2569208145141602, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.7669047117233276, |
| "step": 395, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.998677134513855 |
| }, |
| { |
| "episode": 6352, |
| "epoch": 0.1742469962144072, |
| "loss/policy_avg": -0.42700430750846863, |
| "lr": 1.9578947368421055e-05, |
| "objective/entropy": -337.1484680175781, |
| "objective/kl": 34.36861801147461, |
| "objective/non_score_reward": -3.436861991882324, |
| "objective/rlhf_reward": -9.34744749069214, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 0.8713659644126892, |
| "policy/clipfrac_avg": 0.5, |
| "policy/entropy_avg": 0.6901314854621887, |
| "step": 396, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 7, |
| "val/ratio": 2.0013864040374756 |
| }, |
| { |
| "episode": 6368, |
| "epoch": 0.17468590552477095, |
| "loss/policy_avg": 0.5263411402702332, |
| "lr": 1.955263157894737e-05, |
| "objective/entropy": -352.5277099609375, |
| "objective/kl": 33.26110076904297, |
| "objective/non_score_reward": -3.326110363006592, |
| "objective/rlhf_reward": -8.90444097518921, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 0.5582944750785828, |
| "policy/clipfrac_avg": 0.25, |
| "policy/entropy_avg": 0.7719149589538574, |
| "step": 397, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 7, |
| "val/ratio": 1.9988962411880493 |
| }, |
| { |
| "episode": 6384, |
| "epoch": 0.1751248148351347, |
| "loss/policy_avg": 0.5565706491470337, |
| "lr": 1.9526315789473685e-05, |
| "objective/entropy": -280.6494445800781, |
| "objective/kl": 28.447463989257812, |
| "objective/non_score_reward": -2.8447465896606445, |
| "objective/rlhf_reward": -6.9789863586425795, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 1.1234630346298218, |
| "policy/clipfrac_avg": 0.25, |
| "policy/entropy_avg": 1.0970081090927124, |
| "step": 398, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 7, |
| "val/ratio": 1.9993960857391357 |
| }, |
| { |
| "episode": 6400, |
| "epoch": 0.17556372414549842, |
| "loss/policy_avg": 0.9028674364089966, |
| "lr": 1.95e-05, |
| "objective/entropy": -307.14703369140625, |
| "objective/kl": 34.96384048461914, |
| "objective/non_score_reward": -3.4963841438293457, |
| "objective/rlhf_reward": -9.585536575317384, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 0.13779303431510925, |
| "policy/clipfrac_avg": 0.25, |
| "policy/entropy_avg": 0.849014163017273, |
| "step": 399, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 6, |
| "val/ratio": 2.000701427459717 |
| }, |
| { |
| "episode": 6416, |
| "epoch": 0.17600263345586217, |
| "loss/policy_avg": 0.09404882788658142, |
| "lr": 1.9473684210526318e-05, |
| "objective/entropy": -319.993896484375, |
| "objective/kl": 26.75142478942871, |
| "objective/non_score_reward": -2.675142765045166, |
| "objective/rlhf_reward": -6.300570583343506, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 0.08198696374893188, |
| "policy/clipfrac_avg": 0.5, |
| "policy/entropy_avg": 0.8347125053405762, |
| "step": 400, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 8, |
| "val/ratio": 1.999713659286499 |
| }, |
| { |
| "episode": 6432, |
| "epoch": 0.17644154276622592, |
| "loss/policy_avg": -0.5681114196777344, |
| "lr": 1.9447368421052633e-05, |
| "objective/entropy": 5.593948841094971, |
| "objective/kl": 81.45667266845703, |
| "objective/non_score_reward": -8.145668029785156, |
| "objective/rlhf_reward": -28.182670211791994, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 2.9207358360290527, |
| "policy/clipfrac_avg": 1.5, |
| "policy/entropy_avg": 0.9207296371459961, |
| "step": 401, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9994817972183228 |
| }, |
| { |
| "episode": 6448, |
| "epoch": 0.17688045207658967, |
| "loss/policy_avg": -0.19854994118213654, |
| "lr": 1.9421052631578948e-05, |
| "objective/entropy": -295.1614074707031, |
| "objective/kl": 35.05091857910156, |
| "objective/non_score_reward": -3.505092144012451, |
| "objective/rlhf_reward": -13.620368099212648, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 0.7516396045684814, |
| "policy/clipfrac_avg": 0.75, |
| "policy/entropy_avg": 0.9505492448806763, |
| "step": 402, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 6, |
| "val/ratio": 2.001508951187134 |
| }, |
| { |
| "episode": 6464, |
| "epoch": 0.17731936138695342, |
| "loss/policy_avg": -0.1034904420375824, |
| "lr": 1.9394736842105263e-05, |
| "objective/entropy": -260.1594543457031, |
| "objective/kl": 30.833967208862305, |
| "objective/non_score_reward": -3.0833964347839355, |
| "objective/rlhf_reward": -14.333585739135742, |
| "objective/scores": -0.5, |
| "policy/approxkl_avg": 0.2162019908428192, |
| "policy/clipfrac_avg": 0.5, |
| "policy/entropy_avg": 0.8382043838500977, |
| "step": 403, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 7, |
| "val/ratio": 2.000133752822876 |
| }, |
| { |
| "episode": 6480, |
| "epoch": 0.17775827069731717, |
| "loss/policy_avg": -0.9927380681037903, |
| "lr": 1.936842105263158e-05, |
| "objective/entropy": -140.57418823242188, |
| "objective/kl": 60.026126861572266, |
| "objective/non_score_reward": -6.002613067626953, |
| "objective/rlhf_reward": -23.610451316833498, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 16.303983688354492, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 1.1447197198867798, |
| "step": 404, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 6, |
| "val/ratio": 1.9993970394134521 |
| }, |
| { |
| "episode": 6496, |
| "epoch": 0.17819718000768092, |
| "loss/policy_avg": 3.3291196823120117, |
| "lr": 1.9342105263157896e-05, |
| "objective/entropy": -25.995943069458008, |
| "objective/kl": 84.37167358398438, |
| "objective/non_score_reward": -8.43716812133789, |
| "objective/rlhf_reward": -33.348669624328615, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 0.6764692664146423, |
| "policy/clipfrac_avg": 0.25, |
| "policy/entropy_avg": 0.9281838536262512, |
| "step": 405, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.0005123615264893 |
| }, |
| { |
| "episode": 6512, |
| "epoch": 0.17863608931804467, |
| "loss/policy_avg": -0.6397604942321777, |
| "lr": 1.931578947368421e-05, |
| "objective/entropy": -198.416748046875, |
| "objective/kl": 60.98023223876953, |
| "objective/non_score_reward": -6.098023414611816, |
| "objective/rlhf_reward": -19.992093658447267, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 2.191061019897461, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.8035732507705688, |
| "step": 406, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 5, |
| "val/ratio": 2.0002763271331787 |
| }, |
| { |
| "episode": 6528, |
| "epoch": 0.1790749986284084, |
| "loss/policy_avg": -0.08131340146064758, |
| "lr": 1.9289473684210526e-05, |
| "objective/entropy": -295.3315124511719, |
| "objective/kl": 36.48168182373047, |
| "objective/non_score_reward": -3.6481685638427734, |
| "objective/rlhf_reward": -16.592674255371094, |
| "objective/scores": -0.5, |
| "policy/approxkl_avg": 1.0901284217834473, |
| "policy/clipfrac_avg": 0.5, |
| "policy/entropy_avg": 0.833161473274231, |
| "step": 407, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 6, |
| "val/ratio": 2.001061201095581 |
| }, |
| { |
| "episode": 6544, |
| "epoch": 0.17951390793877214, |
| "loss/policy_avg": 0.4617496728897095, |
| "lr": 1.926315789473684e-05, |
| "objective/entropy": -66.05003356933594, |
| "objective/kl": 48.92518997192383, |
| "objective/non_score_reward": -4.892518997192383, |
| "objective/rlhf_reward": -15.170076227188112, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 1.639676809310913, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.7556887865066528, |
| "step": 408, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 5, |
| "val/ratio": 2.0015039443969727 |
| }, |
| { |
| "episode": 6560, |
| "epoch": 0.1799528172491359, |
| "loss/policy_avg": 0.08634293079376221, |
| "lr": 1.923684210526316e-05, |
| "objective/entropy": -70.57524108886719, |
| "objective/kl": 73.93417358398438, |
| "objective/non_score_reward": -7.393417835235596, |
| "objective/rlhf_reward": -25.173671340942384, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 0.5882269144058228, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.8048997521400452, |
| "step": 409, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 5, |
| "val/ratio": 1.9990966320037842 |
| }, |
| { |
| "episode": 6576, |
| "epoch": 0.18039172655949964, |
| "loss/policy_avg": 0.033458832651376724, |
| "lr": 1.9210526315789474e-05, |
| "objective/entropy": -297.69122314453125, |
| "objective/kl": 40.81647491455078, |
| "objective/non_score_reward": -4.081647872924805, |
| "objective/rlhf_reward": -11.926590538024904, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 0.15943774580955505, |
| "policy/clipfrac_avg": 0.0, |
| "policy/entropy_avg": 0.9514541029930115, |
| "step": 410, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 6, |
| "val/ratio": 1.9999594688415527 |
| }, |
| { |
| "episode": 6592, |
| "epoch": 0.1808306358698634, |
| "loss/policy_avg": 0.6446475982666016, |
| "lr": 1.918421052631579e-05, |
| "objective/entropy": -141.07623291015625, |
| "objective/kl": 61.068145751953125, |
| "objective/non_score_reward": -6.106815338134766, |
| "objective/rlhf_reward": -20.027261352539064, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 0.7178910970687866, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.8605374693870544, |
| "step": 411, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 6, |
| "val/ratio": 2.0003395080566406 |
| }, |
| { |
| "episode": 6608, |
| "epoch": 0.18126954518022714, |
| "loss/policy_avg": 0.2619036138057709, |
| "lr": 1.9157894736842104e-05, |
| "objective/entropy": -278.59210205078125, |
| "objective/kl": 27.3831787109375, |
| "objective/non_score_reward": -2.7383179664611816, |
| "objective/rlhf_reward": -6.553271865844727, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 0.32318031787872314, |
| "policy/clipfrac_avg": 0.5, |
| "policy/entropy_avg": 0.8319592475891113, |
| "step": 412, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 6, |
| "val/ratio": 1.9992783069610596 |
| }, |
| { |
| "episode": 6624, |
| "epoch": 0.1817084544905909, |
| "loss/policy_avg": 0.2453235387802124, |
| "lr": 1.913157894736842e-05, |
| "objective/entropy": -309.0776672363281, |
| "objective/kl": 25.942588806152344, |
| "objective/non_score_reward": -2.5942587852478027, |
| "objective/rlhf_reward": -5.9770351409912115, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 1.0987516641616821, |
| "policy/clipfrac_avg": 0.5, |
| "policy/entropy_avg": 1.0278055667877197, |
| "step": 413, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 8, |
| "val/ratio": 1.9998819828033447 |
| }, |
| { |
| "episode": 6640, |
| "epoch": 0.18214736380095461, |
| "loss/policy_avg": -0.6481926441192627, |
| "lr": 1.9105263157894738e-05, |
| "objective/entropy": -180.14288330078125, |
| "objective/kl": 47.951805114746094, |
| "objective/non_score_reward": -4.795180797576904, |
| "objective/rlhf_reward": -14.780723190307619, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 54.561119079589844, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 1.0198578834533691, |
| "step": 414, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9996154308319092 |
| }, |
| { |
| "episode": 6656, |
| "epoch": 0.18258627311131836, |
| "loss/policy_avg": 0.03629662096500397, |
| "lr": 1.9078947368421053e-05, |
| "objective/entropy": -9.615211486816406, |
| "objective/kl": 65.70553588867188, |
| "objective/non_score_reward": -6.570553779602051, |
| "objective/rlhf_reward": -25.882215595245363, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 3.8190488815307617, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.9529430866241455, |
| "step": 415, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9984461069107056 |
| }, |
| { |
| "episode": 6672, |
| "epoch": 0.18302518242168211, |
| "loss/policy_avg": -0.17124596238136292, |
| "lr": 1.9052631578947368e-05, |
| "objective/entropy": -296.9742736816406, |
| "objective/kl": 31.674793243408203, |
| "objective/non_score_reward": -3.1674790382385254, |
| "objective/rlhf_reward": -8.269916391372682, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 0.6360380053520203, |
| "policy/clipfrac_avg": 0.75, |
| "policy/entropy_avg": 0.9433181881904602, |
| "step": 416, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 5, |
| "val/ratio": 2.000105142593384 |
| }, |
| { |
| "episode": 6688, |
| "epoch": 0.18346409173204586, |
| "loss/policy_avg": -0.10602147877216339, |
| "lr": 1.9026315789473683e-05, |
| "objective/entropy": 26.670429229736328, |
| "objective/kl": 58.556495666503906, |
| "objective/non_score_reward": -5.855649948120117, |
| "objective/rlhf_reward": -23.02259979248047, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 1.7600481510162354, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 1.0238035917282104, |
| "step": 417, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.000761032104492 |
| }, |
| { |
| "episode": 6704, |
| "epoch": 0.18390300104240961, |
| "loss/policy_avg": 0.16803205013275146, |
| "lr": 1.9e-05, |
| "objective/entropy": -16.96014976501465, |
| "objective/kl": 62.71726989746094, |
| "objective/non_score_reward": -6.271727085113525, |
| "objective/rlhf_reward": -20.686907863616945, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 1.8721787929534912, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.9119787216186523, |
| "step": 418, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.0002262592315674 |
| }, |
| { |
| "episode": 6720, |
| "epoch": 0.18434191035277336, |
| "loss/policy_avg": 0.8746110200881958, |
| "lr": 1.8973684210526316e-05, |
| "objective/entropy": 6.184518814086914, |
| "objective/kl": 61.64329528808594, |
| "objective/non_score_reward": -6.164329528808594, |
| "objective/rlhf_reward": -24.25731716156006, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 1.2244923114776611, |
| "policy/clipfrac_avg": 0.75, |
| "policy/entropy_avg": 0.9992606043815613, |
| "step": 419, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9991247653961182 |
| }, |
| { |
| "episode": 6736, |
| "epoch": 0.1847808196631371, |
| "loss/policy_avg": 0.014954586513340473, |
| "lr": 1.894736842105263e-05, |
| "objective/entropy": -267.61376953125, |
| "objective/kl": 27.085674285888672, |
| "objective/non_score_reward": -2.7085673809051514, |
| "objective/rlhf_reward": -6.434269523620606, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 0.14659488201141357, |
| "policy/clipfrac_avg": 0.25, |
| "policy/entropy_avg": 0.9907639026641846, |
| "step": 420, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 6, |
| "val/ratio": 2.0013108253479004 |
| }, |
| { |
| "episode": 6752, |
| "epoch": 0.18521972897350086, |
| "loss/policy_avg": 0.8699166178703308, |
| "lr": 1.8921052631578946e-05, |
| "objective/entropy": -77.01543426513672, |
| "objective/kl": 48.32474899291992, |
| "objective/non_score_reward": -4.832475185394287, |
| "objective/rlhf_reward": -14.929901218414308, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 2.799189805984497, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.7855837345123291, |
| "step": 421, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.999213457107544 |
| }, |
| { |
| "episode": 6768, |
| "epoch": 0.18565863828386459, |
| "loss/policy_avg": -0.27239370346069336, |
| "lr": 1.889473684210526e-05, |
| "objective/entropy": -10.540843963623047, |
| "objective/kl": 58.51908874511719, |
| "objective/non_score_reward": -5.851909637451172, |
| "objective/rlhf_reward": -19.007637119293214, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 1.3941487073898315, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.8914047479629517, |
| "step": 422, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.0000998973846436 |
| }, |
| { |
| "episode": 6784, |
| "epoch": 0.18609754759422834, |
| "loss/policy_avg": 0.5779774785041809, |
| "lr": 1.886842105263158e-05, |
| "objective/entropy": -37.48634338378906, |
| "objective/kl": 60.117984771728516, |
| "objective/non_score_reward": -6.011798858642578, |
| "objective/rlhf_reward": -19.64719400405884, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 3.9815831184387207, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 1.0184441804885864, |
| "step": 423, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.998115062713623 |
| }, |
| { |
| "episode": 6800, |
| "epoch": 0.18653645690459209, |
| "loss/policy_avg": 0.49414119124412537, |
| "lr": 1.8842105263157894e-05, |
| "objective/entropy": 50.2642707824707, |
| "objective/kl": 78.58097076416016, |
| "objective/non_score_reward": -7.858097076416016, |
| "objective/rlhf_reward": -31.032388305664064, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 141.35354614257812, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.8828184604644775, |
| "step": 424, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9957654476165771 |
| }, |
| { |
| "episode": 6816, |
| "epoch": 0.18697536621495583, |
| "loss/policy_avg": 0.014625292271375656, |
| "lr": 1.881578947368421e-05, |
| "objective/entropy": -72.2008285522461, |
| "objective/kl": 67.50498962402344, |
| "objective/non_score_reward": -6.7504987716674805, |
| "objective/rlhf_reward": -22.601995086669923, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 0.5733897686004639, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.8627150654792786, |
| "step": 425, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.0004444122314453 |
| }, |
| { |
| "episode": 6832, |
| "epoch": 0.18741427552531958, |
| "loss/policy_avg": 0.3940482437610626, |
| "lr": 1.8789473684210524e-05, |
| "objective/entropy": -109.4649429321289, |
| "objective/kl": 57.591156005859375, |
| "objective/non_score_reward": -5.759115219116211, |
| "objective/rlhf_reward": -18.63646183013916, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 1.5976098775863647, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.6642797589302063, |
| "step": 426, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 5, |
| "val/ratio": 1.9996377229690552 |
| }, |
| { |
| "episode": 6848, |
| "epoch": 0.18785318483568333, |
| "loss/policy_avg": 0.6501445174217224, |
| "lr": 1.8763157894736843e-05, |
| "objective/entropy": -93.68934631347656, |
| "objective/kl": 52.76403045654297, |
| "objective/non_score_reward": -5.276403427124023, |
| "objective/rlhf_reward": -16.70561275482178, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 93.70457458496094, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.8544291853904724, |
| "step": 427, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.000861406326294 |
| }, |
| { |
| "episode": 6864, |
| "epoch": 0.18829209414604708, |
| "loss/policy_avg": -0.7759616374969482, |
| "lr": 1.873684210526316e-05, |
| "objective/entropy": -127.28284454345703, |
| "objective/kl": 52.26542282104492, |
| "objective/non_score_reward": -5.2265424728393555, |
| "objective/rlhf_reward": -16.50617036819458, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 44.17460250854492, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 1.0000516176223755, |
| "step": 428, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 5, |
| "val/ratio": 1.9975546598434448 |
| }, |
| { |
| "episode": 6880, |
| "epoch": 0.1887310034564108, |
| "loss/policy_avg": -0.11000016331672668, |
| "lr": 1.8710526315789476e-05, |
| "objective/entropy": -19.487102508544922, |
| "objective/kl": 55.983280181884766, |
| "objective/non_score_reward": -5.598328113555908, |
| "objective/rlhf_reward": -17.993312454223634, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 2.473930597305298, |
| "policy/clipfrac_avg": 1.75, |
| "policy/entropy_avg": 0.9288405179977417, |
| "step": 429, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.000844717025757 |
| }, |
| { |
| "episode": 6896, |
| "epoch": 0.18916991276677456, |
| "loss/policy_avg": -0.06470927596092224, |
| "lr": 1.868421052631579e-05, |
| "objective/entropy": -254.17868041992188, |
| "objective/kl": 34.887691497802734, |
| "objective/non_score_reward": -3.4887688159942627, |
| "objective/rlhf_reward": -13.555075263977052, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 0.26364630460739136, |
| "policy/clipfrac_avg": 0.75, |
| "policy/entropy_avg": 1.0304815769195557, |
| "step": 430, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 5, |
| "val/ratio": 2.0000686645507812 |
| }, |
| { |
| "episode": 6912, |
| "epoch": 0.1896088220771383, |
| "loss/policy_avg": 0.21356704831123352, |
| "lr": 1.8657894736842106e-05, |
| "objective/entropy": -261.9208679199219, |
| "objective/kl": 31.915531158447266, |
| "objective/non_score_reward": -3.1915531158447266, |
| "objective/rlhf_reward": -8.36621198654175, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 2.789092540740967, |
| "policy/clipfrac_avg": 0.5, |
| "policy/entropy_avg": 0.8163321018218994, |
| "step": 431, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 6, |
| "val/ratio": 1.9990161657333374 |
| }, |
| { |
| "episode": 6928, |
| "epoch": 0.19004773138750206, |
| "loss/policy_avg": -1.0231928825378418, |
| "lr": 1.8631578947368424e-05, |
| "objective/entropy": -168.65093994140625, |
| "objective/kl": 51.64683532714844, |
| "objective/non_score_reward": -5.1646833419799805, |
| "objective/rlhf_reward": -22.658733367919922, |
| "objective/scores": -0.5, |
| "policy/approxkl_avg": 32.607357025146484, |
| "policy/clipfrac_avg": 0.5, |
| "policy/entropy_avg": 0.8486956357955933, |
| "step": 432, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 5, |
| "val/ratio": 1.9985682964324951 |
| }, |
| { |
| "episode": 6944, |
| "epoch": 0.1904866406978658, |
| "loss/policy_avg": -0.6227490901947021, |
| "lr": 1.860526315789474e-05, |
| "objective/entropy": -65.97626495361328, |
| "objective/kl": 63.84180450439453, |
| "objective/non_score_reward": -6.384180545806885, |
| "objective/rlhf_reward": -25.1367226600647, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 0.5096147656440735, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.6590231657028198, |
| "step": 433, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.0007688999176025 |
| }, |
| { |
| "episode": 6960, |
| "epoch": 0.19092555000822956, |
| "loss/policy_avg": 0.4701375365257263, |
| "lr": 1.8578947368421054e-05, |
| "objective/entropy": -10.970794677734375, |
| "objective/kl": 62.22464370727539, |
| "objective/non_score_reward": -6.222464561462402, |
| "objective/rlhf_reward": -20.489859199523927, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 173.43112182617188, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.9498796463012695, |
| "step": 434, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9947084188461304 |
| }, |
| { |
| "episode": 6976, |
| "epoch": 0.1913644593185933, |
| "loss/policy_avg": 0.099261075258255, |
| "lr": 1.855263157894737e-05, |
| "objective/entropy": -267.3575134277344, |
| "objective/kl": 27.131763458251953, |
| "objective/non_score_reward": -2.7131762504577637, |
| "objective/rlhf_reward": -12.852705001831055, |
| "objective/scores": -0.5, |
| "policy/approxkl_avg": 0.024509262293577194, |
| "policy/clipfrac_avg": 0.0, |
| "policy/entropy_avg": 0.6670312881469727, |
| "step": 435, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.0010085105895996 |
| }, |
| { |
| "episode": 6992, |
| "epoch": 0.19180336862895705, |
| "loss/policy_avg": -0.4680057466030121, |
| "lr": 1.8526315789473684e-05, |
| "objective/entropy": -42.91291427612305, |
| "objective/kl": 66.02111053466797, |
| "objective/non_score_reward": -6.602110862731934, |
| "objective/rlhf_reward": -22.008442974090578, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 1.3675117492675781, |
| "policy/clipfrac_avg": 1.5, |
| "policy/entropy_avg": 0.6913959980010986, |
| "step": 436, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9998745918273926 |
| }, |
| { |
| "episode": 7008, |
| "epoch": 0.19224227793932078, |
| "loss/policy_avg": 2.381070137023926, |
| "lr": 1.8500000000000002e-05, |
| "objective/entropy": -19.4685115814209, |
| "objective/kl": 47.43255615234375, |
| "objective/non_score_reward": -4.743255615234375, |
| "objective/rlhf_reward": -14.573023653030397, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 0.7034322619438171, |
| "policy/clipfrac_avg": 0.25, |
| "policy/entropy_avg": 0.837364673614502, |
| "step": 437, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.003263473510742 |
| }, |
| { |
| "episode": 7024, |
| "epoch": 0.19268118724968453, |
| "loss/policy_avg": -0.018530648201704025, |
| "lr": 1.8473684210526317e-05, |
| "objective/entropy": -32.53235626220703, |
| "objective/kl": 59.6943244934082, |
| "objective/non_score_reward": -5.969432830810547, |
| "objective/rlhf_reward": -19.477729892730714, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 0.5509717464447021, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.8037828207015991, |
| "step": 438, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.0012922286987305 |
| }, |
| { |
| "episode": 7040, |
| "epoch": 0.19312009656004828, |
| "loss/policy_avg": -0.009570784866809845, |
| "lr": 1.8447368421052632e-05, |
| "objective/entropy": -50.37032699584961, |
| "objective/kl": 56.81880187988281, |
| "objective/non_score_reward": -5.681879997253418, |
| "objective/rlhf_reward": -18.327521419525148, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 0.6729666590690613, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.8526865839958191, |
| "step": 439, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9991214275360107 |
| }, |
| { |
| "episode": 7056, |
| "epoch": 0.19355900587041203, |
| "loss/policy_avg": -0.930114209651947, |
| "lr": 1.8421052631578947e-05, |
| "objective/entropy": -178.06288146972656, |
| "objective/kl": 45.6494026184082, |
| "objective/non_score_reward": -4.564940452575684, |
| "objective/rlhf_reward": -13.859762287139894, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 123.52664184570312, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.8203810453414917, |
| "step": 440, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 5, |
| "val/ratio": 1.9983216524124146 |
| }, |
| { |
| "episode": 7072, |
| "epoch": 0.19399791518077578, |
| "loss/policy_avg": 1.49681556224823, |
| "lr": 1.8394736842105266e-05, |
| "objective/entropy": -21.60431671142578, |
| "objective/kl": 63.327247619628906, |
| "objective/non_score_reward": -6.332725524902344, |
| "objective/rlhf_reward": -20.930900669097902, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 1.75593900680542, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.7101706266403198, |
| "step": 441, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.001399040222168 |
| }, |
| { |
| "episode": 7088, |
| "epoch": 0.19443682449113953, |
| "loss/policy_avg": 0.45096832513809204, |
| "lr": 1.836842105263158e-05, |
| "objective/entropy": -5.946907043457031, |
| "objective/kl": 75.59844970703125, |
| "objective/non_score_reward": -7.559844970703125, |
| "objective/rlhf_reward": -29.839378929138185, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 1.7098190784454346, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.766690731048584, |
| "step": 442, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9985554218292236 |
| }, |
| { |
| "episode": 7104, |
| "epoch": 0.19487573380150328, |
| "loss/policy_avg": 0.263136088848114, |
| "lr": 1.8342105263157896e-05, |
| "objective/entropy": -17.822843551635742, |
| "objective/kl": 51.40583038330078, |
| "objective/non_score_reward": -5.140583515167236, |
| "objective/rlhf_reward": -16.16233334541321, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 1.2211589813232422, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.5617699027061462, |
| "step": 443, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 3, |
| "val/ratio": 1.9987659454345703 |
| }, |
| { |
| "episode": 7120, |
| "epoch": 0.195314643111867, |
| "loss/policy_avg": -0.007531791925430298, |
| "lr": 1.831578947368421e-05, |
| "objective/entropy": -16.001230239868164, |
| "objective/kl": 80.43534851074219, |
| "objective/non_score_reward": -8.043535232543945, |
| "objective/rlhf_reward": -27.77414140701294, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 0.3839130401611328, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.8183437585830688, |
| "step": 444, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.0007972717285156 |
| }, |
| { |
| "episode": 7136, |
| "epoch": 0.19575355242223075, |
| "loss/policy_avg": 1.0919990539550781, |
| "lr": 1.8289473684210526e-05, |
| "objective/entropy": -254.75714111328125, |
| "objective/kl": 31.01543426513672, |
| "objective/non_score_reward": -3.101543426513672, |
| "objective/rlhf_reward": -8.006174182891847, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 1.4650940895080566, |
| "policy/clipfrac_avg": 0.25, |
| "policy/entropy_avg": 0.6774981021881104, |
| "step": 445, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 5, |
| "val/ratio": 1.9985923767089844 |
| }, |
| { |
| "episode": 7152, |
| "epoch": 0.1961924617325945, |
| "loss/policy_avg": -0.019719071686267853, |
| "lr": 1.8263157894736844e-05, |
| "objective/entropy": -237.97215270996094, |
| "objective/kl": 38.12819290161133, |
| "objective/non_score_reward": -3.812819480895996, |
| "objective/rlhf_reward": -10.851277446746828, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 0.24818766117095947, |
| "policy/clipfrac_avg": 0.5, |
| "policy/entropy_avg": 0.8377676606178284, |
| "step": 446, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 6, |
| "val/ratio": 1.999452829360962 |
| }, |
| { |
| "episode": 7168, |
| "epoch": 0.19663137104295825, |
| "loss/policy_avg": 0.27718091011047363, |
| "lr": 1.823684210526316e-05, |
| "objective/entropy": -8.873601913452148, |
| "objective/kl": 49.01349639892578, |
| "objective/non_score_reward": -4.9013495445251465, |
| "objective/rlhf_reward": -15.205398416519166, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 0.9532676935195923, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.6659794449806213, |
| "step": 447, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9996001720428467 |
| }, |
| { |
| "episode": 7184, |
| "epoch": 0.197070280353322, |
| "loss/policy_avg": -0.06615559011697769, |
| "lr": 1.8210526315789474e-05, |
| "objective/entropy": -44.73011016845703, |
| "objective/kl": 54.297813415527344, |
| "objective/non_score_reward": -5.429781913757324, |
| "objective/rlhf_reward": -21.31912717819214, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 0.26587027311325073, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.712435245513916, |
| "step": 448, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.0008835792541504 |
| }, |
| { |
| "episode": 7200, |
| "epoch": 0.19750918966368575, |
| "loss/policy_avg": 0.9200335741043091, |
| "lr": 1.818421052631579e-05, |
| "objective/entropy": -270.44720458984375, |
| "objective/kl": 34.97774124145508, |
| "objective/non_score_reward": -3.497774362564087, |
| "objective/rlhf_reward": -13.591097450256349, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 2.7731072902679443, |
| "policy/clipfrac_avg": 0.5, |
| "policy/entropy_avg": 0.7718012928962708, |
| "step": 449, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 6, |
| "val/ratio": 2.001753807067871 |
| }, |
| { |
| "episode": 7216, |
| "epoch": 0.1979480989740495, |
| "loss/policy_avg": -0.2271648347377777, |
| "lr": 1.8157894736842107e-05, |
| "objective/entropy": 17.676624298095703, |
| "objective/kl": 70.4654769897461, |
| "objective/non_score_reward": -7.046547889709473, |
| "objective/rlhf_reward": -25.786191082000734, |
| "objective/scores": 0.6, |
| "policy/approxkl_avg": 2.0250697135925293, |
| "policy/clipfrac_avg": 1.5, |
| "policy/entropy_avg": 0.7303136587142944, |
| "step": 450, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9997398853302002 |
| }, |
| { |
| "episode": 7232, |
| "epoch": 0.19838700828441325, |
| "loss/policy_avg": 1.256649136543274, |
| "lr": 1.8131578947368422e-05, |
| "objective/entropy": -0.7466411590576172, |
| "objective/kl": 61.498382568359375, |
| "objective/non_score_reward": -6.149837970733643, |
| "objective/rlhf_reward": -20.19935188293457, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 70.4373550415039, |
| "policy/clipfrac_avg": 0.75, |
| "policy/entropy_avg": 0.712023138999939, |
| "step": 451, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.994720697402954 |
| }, |
| { |
| "episode": 7248, |
| "epoch": 0.19882591759477697, |
| "loss/policy_avg": -0.515707790851593, |
| "lr": 1.8105263157894737e-05, |
| "objective/entropy": -30.733827590942383, |
| "objective/kl": 75.01179504394531, |
| "objective/non_score_reward": -7.5011796951293945, |
| "objective/rlhf_reward": -25.60471830368042, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 8.555524826049805, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.6430870294570923, |
| "step": 452, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.000035047531128 |
| }, |
| { |
| "episode": 7264, |
| "epoch": 0.19926482690514072, |
| "loss/policy_avg": 0.050750162452459335, |
| "lr": 1.8078947368421052e-05, |
| "objective/entropy": -46.318687438964844, |
| "objective/kl": 74.29559326171875, |
| "objective/non_score_reward": -7.429559230804443, |
| "objective/rlhf_reward": -25.318237400054933, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 0.9510049819946289, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.6830945014953613, |
| "step": 453, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9981689453125 |
| }, |
| { |
| "episode": 7280, |
| "epoch": 0.19970373621550447, |
| "loss/policy_avg": 0.30346107482910156, |
| "lr": 1.8052631578947367e-05, |
| "objective/entropy": -264.3983459472656, |
| "objective/kl": 43.06207275390625, |
| "objective/non_score_reward": -4.306207180023193, |
| "objective/rlhf_reward": -16.824828243255617, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 1.0833698511123657, |
| "policy/clipfrac_avg": 0.75, |
| "policy/entropy_avg": 0.7494672536849976, |
| "step": 454, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 6, |
| "val/ratio": 1.9986538887023926 |
| }, |
| { |
| "episode": 7296, |
| "epoch": 0.20014264552586822, |
| "loss/policy_avg": 1.5475409030914307, |
| "lr": 1.8026315789473685e-05, |
| "objective/entropy": 29.105066299438477, |
| "objective/kl": 46.912784576416016, |
| "objective/non_score_reward": -4.691278457641602, |
| "objective/rlhf_reward": -20.765113830566406, |
| "objective/scores": -0.5, |
| "policy/approxkl_avg": 94.84734344482422, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.5901237726211548, |
| "step": 455, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 3, |
| "val/ratio": 1.9971956014633179 |
| }, |
| { |
| "episode": 7312, |
| "epoch": 0.20058155483623197, |
| "loss/policy_avg": 0.40649881958961487, |
| "lr": 1.8e-05, |
| "objective/entropy": -20.784526824951172, |
| "objective/kl": 64.50088500976562, |
| "objective/non_score_reward": -6.4500885009765625, |
| "objective/rlhf_reward": -21.40035400390625, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 5.087962627410889, |
| "policy/clipfrac_avg": 0.75, |
| "policy/entropy_avg": 0.680627703666687, |
| "step": 456, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9978127479553223 |
| }, |
| { |
| "episode": 7328, |
| "epoch": 0.20102046414659572, |
| "loss/policy_avg": 0.028734341263771057, |
| "lr": 1.7973684210526315e-05, |
| "objective/entropy": -282.66912841796875, |
| "objective/kl": 29.973798751831055, |
| "objective/non_score_reward": -2.997379779815674, |
| "objective/rlhf_reward": -9.065800104976866, |
| "objective/scores": 0.7309297535714575, |
| "policy/approxkl_avg": 0.2122015655040741, |
| "policy/clipfrac_avg": 0.5, |
| "policy/entropy_avg": 0.6922552585601807, |
| "step": 457, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 6, |
| "val/ratio": 2.0007247924804688 |
| }, |
| { |
| "episode": 7344, |
| "epoch": 0.20145937345695947, |
| "loss/policy_avg": -0.22284090518951416, |
| "lr": 1.794736842105263e-05, |
| "objective/entropy": -73.5594711303711, |
| "objective/kl": 46.40571975708008, |
| "objective/non_score_reward": -4.6405720710754395, |
| "objective/rlhf_reward": -14.162287569046022, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 1.1293284893035889, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.8148968815803528, |
| "step": 458, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.0002856254577637 |
| }, |
| { |
| "episode": 7360, |
| "epoch": 0.2018982827673232, |
| "loss/policy_avg": -0.16610336303710938, |
| "lr": 1.7921052631578945e-05, |
| "objective/entropy": -63.282432556152344, |
| "objective/kl": 54.72997283935547, |
| "objective/non_score_reward": -5.472997665405273, |
| "objective/rlhf_reward": -17.491990184783937, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 1.9718881845474243, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.6093668341636658, |
| "step": 459, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9995301961898804 |
| }, |
| { |
| "episode": 7376, |
| "epoch": 0.20233719207768694, |
| "loss/policy_avg": -0.5106132626533508, |
| "lr": 1.7894736842105264e-05, |
| "objective/entropy": -32.18394088745117, |
| "objective/kl": 60.436309814453125, |
| "objective/non_score_reward": -6.043630599975586, |
| "objective/rlhf_reward": -19.774522638320924, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 2.607424736022949, |
| "policy/clipfrac_avg": 1.75, |
| "policy/entropy_avg": 0.6205296516418457, |
| "step": 460, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9996771812438965 |
| }, |
| { |
| "episode": 7392, |
| "epoch": 0.2027761013880507, |
| "loss/policy_avg": -0.11848235130310059, |
| "lr": 1.786842105263158e-05, |
| "objective/entropy": -250.755859375, |
| "objective/kl": 24.229875564575195, |
| "objective/non_score_reward": -2.422987461090088, |
| "objective/rlhf_reward": -11.691949844360352, |
| "objective/scores": -0.5, |
| "policy/approxkl_avg": 0.8377648591995239, |
| "policy/clipfrac_avg": 0.75, |
| "policy/entropy_avg": 0.8346691131591797, |
| "step": 461, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 6, |
| "val/ratio": 1.99947988986969 |
| }, |
| { |
| "episode": 7408, |
| "epoch": 0.20321501069841444, |
| "loss/policy_avg": 0.08530762791633606, |
| "lr": 1.7842105263157894e-05, |
| "objective/entropy": 6.682670593261719, |
| "objective/kl": 51.59654998779297, |
| "objective/non_score_reward": -5.159655570983887, |
| "objective/rlhf_reward": -16.238621330261232, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 2.4281816482543945, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.535011351108551, |
| "step": 462, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.998945951461792 |
| }, |
| { |
| "episode": 7424, |
| "epoch": 0.2036539200087782, |
| "loss/policy_avg": 1.1254315376281738, |
| "lr": 1.781578947368421e-05, |
| "objective/entropy": -52.357872009277344, |
| "objective/kl": 50.13798522949219, |
| "objective/non_score_reward": -5.013798713684082, |
| "objective/rlhf_reward": -15.655193901062013, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 0.8155397772789001, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.6176036596298218, |
| "step": 463, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.0029804706573486 |
| }, |
| { |
| "episode": 7440, |
| "epoch": 0.20409282931914194, |
| "loss/policy_avg": 1.055740237236023, |
| "lr": 1.7789473684210527e-05, |
| "objective/entropy": -6.067316055297852, |
| "objective/kl": 59.782745361328125, |
| "objective/non_score_reward": -5.978274345397949, |
| "objective/rlhf_reward": -23.513098812103273, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 5.5082688331604, |
| "policy/clipfrac_avg": 1.5, |
| "policy/entropy_avg": 0.6876584887504578, |
| "step": 464, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.000401258468628 |
| }, |
| { |
| "episode": 7456, |
| "epoch": 0.2045317386295057, |
| "loss/policy_avg": 0.6007944345474243, |
| "lr": 1.7763157894736842e-05, |
| "objective/entropy": -19.558338165283203, |
| "objective/kl": 60.92905044555664, |
| "objective/non_score_reward": -6.092905044555664, |
| "objective/rlhf_reward": -22.248913469091924, |
| "objective/scores": 0.5306765580733931, |
| "policy/approxkl_avg": 1.0873606204986572, |
| "policy/clipfrac_avg": 0.75, |
| "policy/entropy_avg": 0.5196924209594727, |
| "step": 465, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9983136653900146 |
| }, |
| { |
| "episode": 7472, |
| "epoch": 0.20497064793986944, |
| "loss/policy_avg": -0.22906312346458435, |
| "lr": 1.7736842105263157e-05, |
| "objective/entropy": -40.17732238769531, |
| "objective/kl": 65.0643310546875, |
| "objective/non_score_reward": -6.50643253326416, |
| "objective/rlhf_reward": -21.62573108673096, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 0.7022728323936462, |
| "policy/clipfrac_avg": 1.75, |
| "policy/entropy_avg": 0.6174463033676147, |
| "step": 466, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.0009613037109375 |
| }, |
| { |
| "episode": 7488, |
| "epoch": 0.20540955725023316, |
| "loss/policy_avg": -0.7405965328216553, |
| "lr": 1.7710526315789472e-05, |
| "objective/entropy": -157.75514221191406, |
| "objective/kl": 49.42311096191406, |
| "objective/non_score_reward": -4.9423112869262695, |
| "objective/rlhf_reward": -19.369244194030763, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 65.77047729492188, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.5402215719223022, |
| "step": 467, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9999876022338867 |
| }, |
| { |
| "episode": 7504, |
| "epoch": 0.2058484665605969, |
| "loss/policy_avg": 0.04996461793780327, |
| "lr": 1.7684210526315787e-05, |
| "objective/entropy": -275.37030029296875, |
| "objective/kl": 33.15436553955078, |
| "objective/non_score_reward": -3.315436840057373, |
| "objective/rlhf_reward": -11.139041127935918, |
| "objective/scores": 0.5306765580733931, |
| "policy/approxkl_avg": 0.5296927094459534, |
| "policy/clipfrac_avg": 0.25, |
| "policy/entropy_avg": 0.6478952169418335, |
| "step": 468, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 6, |
| "val/ratio": 1.999742865562439 |
| }, |
| { |
| "episode": 7520, |
| "epoch": 0.20628737587096066, |
| "loss/policy_avg": 0.7616822123527527, |
| "lr": 1.765789473684211e-05, |
| "objective/entropy": -32.41627502441406, |
| "objective/kl": 87.37818145751953, |
| "objective/non_score_reward": -8.737818717956543, |
| "objective/rlhf_reward": -34.55127391815186, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 106.83147430419922, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.6894011497497559, |
| "step": 469, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9993350505828857 |
| }, |
| { |
| "episode": 7536, |
| "epoch": 0.2067262851813244, |
| "loss/policy_avg": -0.7901378273963928, |
| "lr": 1.7631578947368424e-05, |
| "objective/entropy": -130.1688232421875, |
| "objective/kl": 46.754058837890625, |
| "objective/non_score_reward": -4.675406455993652, |
| "objective/rlhf_reward": -15.777905856014463, |
| "objective/scores": 0.7309297535714575, |
| "policy/approxkl_avg": 8.254480361938477, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.5577253103256226, |
| "step": 470, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 5, |
| "val/ratio": 1.9982564449310303 |
| }, |
| { |
| "episode": 7552, |
| "epoch": 0.20716519449168816, |
| "loss/policy_avg": 0.019479047507047653, |
| "lr": 1.760526315789474e-05, |
| "objective/entropy": -301.51361083984375, |
| "objective/kl": 30.93421173095703, |
| "objective/non_score_reward": -3.093421220779419, |
| "objective/rlhf_reward": -14.373684883117676, |
| "objective/scores": -0.5, |
| "policy/approxkl_avg": 1.615177869796753, |
| "policy/clipfrac_avg": 0.5, |
| "policy/entropy_avg": 0.5213165283203125, |
| "step": 471, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 6, |
| "val/ratio": 1.9974719285964966 |
| }, |
| { |
| "episode": 7568, |
| "epoch": 0.2076041038020519, |
| "loss/policy_avg": 0.01017309445887804, |
| "lr": 1.7578947368421054e-05, |
| "objective/entropy": -243.85569763183594, |
| "objective/kl": 34.48377227783203, |
| "objective/non_score_reward": -3.4483771324157715, |
| "objective/rlhf_reward": -9.393509006500246, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 0.7559428215026855, |
| "policy/clipfrac_avg": 0.5, |
| "policy/entropy_avg": 0.8406962156295776, |
| "step": 472, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.999021291732788 |
| }, |
| { |
| "episode": 7584, |
| "epoch": 0.20804301311241566, |
| "loss/policy_avg": 1.2839194536209106, |
| "lr": 1.755263157894737e-05, |
| "objective/entropy": -78.8662109375, |
| "objective/kl": 60.683692932128906, |
| "objective/non_score_reward": -6.0683698654174805, |
| "objective/rlhf_reward": -23.873478507995607, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 1.5784714221954346, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.543493926525116, |
| "step": 473, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.998354434967041 |
| }, |
| { |
| "episode": 7600, |
| "epoch": 0.20848192242277938, |
| "loss/policy_avg": 0.1725112497806549, |
| "lr": 1.7526315789473687e-05, |
| "objective/entropy": 12.784589767456055, |
| "objective/kl": 58.08510971069336, |
| "objective/non_score_reward": -5.808510780334473, |
| "objective/rlhf_reward": -18.83404407501221, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 0.7866100072860718, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.5890865921974182, |
| "step": 474, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9998912811279297 |
| }, |
| { |
| "episode": 7616, |
| "epoch": 0.20892083173314313, |
| "loss/policy_avg": -1.373124361038208, |
| "lr": 1.7500000000000002e-05, |
| "objective/entropy": -33.494956970214844, |
| "objective/kl": 69.98075103759766, |
| "objective/non_score_reward": -6.998075485229492, |
| "objective/rlhf_reward": -29.99230194091797, |
| "objective/scores": -0.5, |
| "policy/approxkl_avg": 54.548370361328125, |
| "policy/clipfrac_avg": 0.75, |
| "policy/entropy_avg": 0.64041668176651, |
| "step": 475, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 3, |
| "val/ratio": 1.9978668689727783 |
| }, |
| { |
| "episode": 7632, |
| "epoch": 0.20935974104350688, |
| "loss/policy_avg": 0.07895299792289734, |
| "lr": 1.7473684210526317e-05, |
| "objective/entropy": -11.28443431854248, |
| "objective/kl": 53.7340087890625, |
| "objective/non_score_reward": -5.373400688171387, |
| "objective/rlhf_reward": -17.09360227584839, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 1.7228981256484985, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.6447443962097168, |
| "step": 476, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.998573660850525 |
| }, |
| { |
| "episode": 7648, |
| "epoch": 0.20979865035387063, |
| "loss/policy_avg": -1.7310700416564941, |
| "lr": 1.7447368421052632e-05, |
| "objective/entropy": -142.7119903564453, |
| "objective/kl": 53.508262634277344, |
| "objective/non_score_reward": -5.350826740264893, |
| "objective/rlhf_reward": -23.40330696105957, |
| "objective/scores": -0.5, |
| "policy/approxkl_avg": 84.33183288574219, |
| "policy/clipfrac_avg": 1.5, |
| "policy/entropy_avg": 0.5232048034667969, |
| "step": 477, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.999740719795227 |
| }, |
| { |
| "episode": 7664, |
| "epoch": 0.21023755966423438, |
| "loss/policy_avg": 0.08662360906600952, |
| "lr": 1.742105263157895e-05, |
| "objective/entropy": -298.08477783203125, |
| "objective/kl": 29.51595687866211, |
| "objective/non_score_reward": -2.9515957832336426, |
| "objective/rlhf_reward": -11.406383132934572, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 0.8490124940872192, |
| "policy/clipfrac_avg": 0.75, |
| "policy/entropy_avg": 0.620236873626709, |
| "step": 478, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 6, |
| "val/ratio": 2.0023481845855713 |
| }, |
| { |
| "episode": 7680, |
| "epoch": 0.21067646897459813, |
| "loss/policy_avg": 4.753633975982666, |
| "lr": 1.7394736842105265e-05, |
| "objective/entropy": 4.008540153503418, |
| "objective/kl": 50.286949157714844, |
| "objective/non_score_reward": -5.028695106506348, |
| "objective/rlhf_reward": -17.714780187606813, |
| "objective/scores": 0.6, |
| "policy/approxkl_avg": 2.038093090057373, |
| "policy/clipfrac_avg": 0.25, |
| "policy/entropy_avg": 0.6810034513473511, |
| "step": 479, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9995425939559937 |
| }, |
| { |
| "episode": 7696, |
| "epoch": 0.21111537828496188, |
| "loss/policy_avg": -0.013529837131500244, |
| "lr": 1.736842105263158e-05, |
| "objective/entropy": -62.08034133911133, |
| "objective/kl": 98.79060363769531, |
| "objective/non_score_reward": -9.879060745239258, |
| "objective/rlhf_reward": -35.116244888305665, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 8.784490585327148, |
| "policy/clipfrac_avg": 1.75, |
| "policy/entropy_avg": 0.6048997640609741, |
| "step": 480, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.0008130073547363 |
| }, |
| { |
| "episode": 7712, |
| "epoch": 0.21155428759532563, |
| "loss/policy_avg": -0.44553932547569275, |
| "lr": 1.7342105263157895e-05, |
| "objective/entropy": -287.85150146484375, |
| "objective/kl": 18.73577117919922, |
| "objective/non_score_reward": -1.8735771179199219, |
| "objective/rlhf_reward": -3.094308471679687, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 0.242295041680336, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.5571756362915039, |
| "step": 481, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 6, |
| "val/ratio": 2.0010554790496826 |
| }, |
| { |
| "episode": 7728, |
| "epoch": 0.21199319690568935, |
| "loss/policy_avg": -0.17571601271629333, |
| "lr": 1.731578947368421e-05, |
| "objective/entropy": -26.461956024169922, |
| "objective/kl": 72.46046447753906, |
| "objective/non_score_reward": -7.24604606628418, |
| "objective/rlhf_reward": -24.584185218811037, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 1.8817163705825806, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.6041799187660217, |
| "step": 482, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.0004825592041016 |
| }, |
| { |
| "episode": 7744, |
| "epoch": 0.2124321062160531, |
| "loss/policy_avg": 0.004539132118225098, |
| "lr": 1.728947368421053e-05, |
| "objective/entropy": -290.8310546875, |
| "objective/kl": 24.83023452758789, |
| "objective/non_score_reward": -2.4830234050750732, |
| "objective/rlhf_reward": -9.532093620300294, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 0.04867856949567795, |
| "policy/clipfrac_avg": 0.25, |
| "policy/entropy_avg": 0.5443418622016907, |
| "step": 483, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 5, |
| "val/ratio": 2.0004634857177734 |
| }, |
| { |
| "episode": 7760, |
| "epoch": 0.21287101552641685, |
| "loss/policy_avg": 0.8424244523048401, |
| "lr": 1.7263157894736843e-05, |
| "objective/entropy": -43.61247253417969, |
| "objective/kl": 67.6912841796875, |
| "objective/non_score_reward": -6.769128799438477, |
| "objective/rlhf_reward": -26.67651472091675, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 0.6011160016059875, |
| "policy/clipfrac_avg": 1.5, |
| "policy/entropy_avg": 0.5028409957885742, |
| "step": 484, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.0024876594543457 |
| }, |
| { |
| "episode": 7776, |
| "epoch": 0.2133099248367806, |
| "loss/policy_avg": 0.05304345488548279, |
| "lr": 1.723684210526316e-05, |
| "objective/entropy": -58.04655456542969, |
| "objective/kl": 60.33159637451172, |
| "objective/non_score_reward": -6.033159255981445, |
| "objective/rlhf_reward": -19.7326379776001, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 0.44679179787635803, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.5934137105941772, |
| "step": 485, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9990103244781494 |
| }, |
| { |
| "episode": 7792, |
| "epoch": 0.21374883414714435, |
| "loss/policy_avg": -0.1699434518814087, |
| "lr": 1.7210526315789473e-05, |
| "objective/entropy": -118.589599609375, |
| "objective/kl": 57.44417953491211, |
| "objective/non_score_reward": -5.744418144226074, |
| "objective/rlhf_reward": -18.577671623229982, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 0.6742945313453674, |
| "policy/clipfrac_avg": 1.5, |
| "policy/entropy_avg": 0.5328694581985474, |
| "step": 486, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.00083065032959 |
| }, |
| { |
| "episode": 7808, |
| "epoch": 0.2141877434575081, |
| "loss/policy_avg": 0.04833655059337616, |
| "lr": 1.718421052631579e-05, |
| "objective/entropy": -38.469444274902344, |
| "objective/kl": 45.26972198486328, |
| "objective/non_score_reward": -4.52697229385376, |
| "objective/rlhf_reward": -17.707888698577882, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 0.6370303630828857, |
| "policy/clipfrac_avg": 0.75, |
| "policy/entropy_avg": 0.6407510638237, |
| "step": 487, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9995996952056885 |
| }, |
| { |
| "episode": 7824, |
| "epoch": 0.21462665276787185, |
| "loss/policy_avg": 0.1121654063463211, |
| "lr": 1.7157894736842107e-05, |
| "objective/entropy": -55.31531524658203, |
| "objective/kl": 48.76139831542969, |
| "objective/non_score_reward": -4.876140117645264, |
| "objective/rlhf_reward": -15.104560470581056, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 0.7145019769668579, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.5792022943496704, |
| "step": 488, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9987767934799194 |
| }, |
| { |
| "episode": 7840, |
| "epoch": 0.21506556207823557, |
| "loss/policy_avg": 0.07800394296646118, |
| "lr": 1.713157894736842e-05, |
| "objective/entropy": -19.052385330200195, |
| "objective/kl": 85.97078704833984, |
| "objective/non_score_reward": -8.597079277038574, |
| "objective/rlhf_reward": -29.988315677642824, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 3.3856687545776367, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.7141093015670776, |
| "step": 489, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.00225567817688 |
| }, |
| { |
| "episode": 7856, |
| "epoch": 0.21550447138859932, |
| "loss/policy_avg": 2.34645414352417, |
| "lr": 1.7105263157894737e-05, |
| "objective/entropy": 11.276354789733887, |
| "objective/kl": 74.60820007324219, |
| "objective/non_score_reward": -7.460819721221924, |
| "objective/rlhf_reward": -27.720573606268438, |
| "objective/scores": 0.5306765580733931, |
| "policy/approxkl_avg": 177.46253967285156, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.6245430707931519, |
| "step": 490, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 3, |
| "val/ratio": 1.9974071979522705 |
| }, |
| { |
| "episode": 7872, |
| "epoch": 0.21594338069896307, |
| "loss/policy_avg": 1.0574601888656616, |
| "lr": 1.707894736842105e-05, |
| "objective/entropy": -76.87249755859375, |
| "objective/kl": 66.18588256835938, |
| "objective/non_score_reward": -6.618587970733643, |
| "objective/rlhf_reward": -22.07435188293457, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 0.6070568561553955, |
| "policy/clipfrac_avg": 0.75, |
| "policy/entropy_avg": 0.5290701389312744, |
| "step": 491, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9992444515228271 |
| }, |
| { |
| "episode": 7888, |
| "epoch": 0.21638229000932682, |
| "loss/policy_avg": -0.13727782666683197, |
| "lr": 1.705263157894737e-05, |
| "objective/entropy": -46.14100646972656, |
| "objective/kl": 64.03702545166016, |
| "objective/non_score_reward": -6.403702735900879, |
| "objective/rlhf_reward": -25.214810943603517, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 160.15899658203125, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.5764521360397339, |
| "step": 492, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.996391773223877 |
| }, |
| { |
| "episode": 7904, |
| "epoch": 0.21682119931969057, |
| "loss/policy_avg": -1.2387105226516724, |
| "lr": 1.7026315789473685e-05, |
| "objective/entropy": -138.5601043701172, |
| "objective/kl": 58.91579818725586, |
| "objective/non_score_reward": -5.891580104827881, |
| "objective/rlhf_reward": -25.566320419311523, |
| "objective/scores": -0.5, |
| "policy/approxkl_avg": 82.98390197753906, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.7035410404205322, |
| "step": 493, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 5, |
| "val/ratio": 1.9981887340545654 |
| }, |
| { |
| "episode": 7920, |
| "epoch": 0.21726010863005432, |
| "loss/policy_avg": -0.45713070034980774, |
| "lr": 1.7e-05, |
| "objective/entropy": -44.855960845947266, |
| "objective/kl": 51.5128173828125, |
| "objective/non_score_reward": -5.151281833648682, |
| "objective/rlhf_reward": -20.205127334594728, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 0.6437796354293823, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.6304531097412109, |
| "step": 494, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.00028657913208 |
| }, |
| { |
| "episode": 7936, |
| "epoch": 0.21769901794041807, |
| "loss/policy_avg": 0.4890076220035553, |
| "lr": 1.6973684210526315e-05, |
| "objective/entropy": -85.00923156738281, |
| "objective/kl": 56.18220520019531, |
| "objective/non_score_reward": -5.618220329284668, |
| "objective/rlhf_reward": -22.07288179397583, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 1.5769182443618774, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.6442561149597168, |
| "step": 495, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.0013489723205566 |
| }, |
| { |
| "episode": 7952, |
| "epoch": 0.2181379272507818, |
| "loss/policy_avg": -0.9529117345809937, |
| "lr": 1.694736842105263e-05, |
| "objective/entropy": -68.37138366699219, |
| "objective/kl": 56.5475959777832, |
| "objective/non_score_reward": -5.654759407043457, |
| "objective/rlhf_reward": -18.219038820266725, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 1.1973304748535156, |
| "policy/clipfrac_avg": 1.5, |
| "policy/entropy_avg": 0.5495026707649231, |
| "step": 496, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.000663995742798 |
| }, |
| { |
| "episode": 7968, |
| "epoch": 0.21857683656114554, |
| "loss/policy_avg": 0.12810270488262177, |
| "lr": 1.6921052631578948e-05, |
| "objective/entropy": -281.0614929199219, |
| "objective/kl": 24.029754638671875, |
| "objective/non_score_reward": -2.402975559234619, |
| "objective/rlhf_reward": -5.211901998519897, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 0.1585916429758072, |
| "policy/clipfrac_avg": 0.5, |
| "policy/entropy_avg": 0.5986806154251099, |
| "step": 497, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 6, |
| "val/ratio": 1.99949049949646 |
| }, |
| { |
| "episode": 7984, |
| "epoch": 0.2190157458715093, |
| "loss/policy_avg": -1.2483512163162231, |
| "lr": 1.6894736842105263e-05, |
| "objective/entropy": -125.2818374633789, |
| "objective/kl": 48.96977996826172, |
| "objective/non_score_reward": -4.89697790145874, |
| "objective/rlhf_reward": -15.1879123210907, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 38.351585388183594, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.5701537132263184, |
| "step": 498, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9982032775878906 |
| }, |
| { |
| "episode": 8000, |
| "epoch": 0.21945465518187304, |
| "loss/policy_avg": 0.2582131624221802, |
| "lr": 1.6868421052631578e-05, |
| "objective/entropy": -43.63482666015625, |
| "objective/kl": 83.76309967041016, |
| "objective/non_score_reward": -8.376310348510742, |
| "objective/rlhf_reward": -33.10524425506592, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 0.616728663444519, |
| "policy/clipfrac_avg": 0.75, |
| "policy/entropy_avg": 0.6804277896881104, |
| "step": 499, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.0020856857299805 |
| }, |
| { |
| "episode": 8016, |
| "epoch": 0.2198935644922368, |
| "loss/policy_avg": -0.2930386960506439, |
| "lr": 1.6842105263157893e-05, |
| "objective/entropy": -47.91590118408203, |
| "objective/kl": 48.19269561767578, |
| "objective/non_score_reward": -4.819270133972168, |
| "objective/rlhf_reward": -14.877080774307252, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 2.4650521278381348, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.7425109148025513, |
| "step": 500, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 5, |
| "val/ratio": 2.0012712478637695 |
| }, |
| { |
| "episode": 8032, |
| "epoch": 0.22033247380260054, |
| "loss/policy_avg": -1.611896276473999, |
| "lr": 1.681578947368421e-05, |
| "objective/entropy": -103.2376480102539, |
| "objective/kl": 59.12322998046875, |
| "objective/non_score_reward": -5.912322998046875, |
| "objective/rlhf_reward": -19.24929246902466, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 150.0416259765625, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.6628369092941284, |
| "step": 501, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9990419149398804 |
| }, |
| { |
| "episode": 8048, |
| "epoch": 0.2207713831129643, |
| "loss/policy_avg": 0.293116956949234, |
| "lr": 1.6789473684210526e-05, |
| "objective/entropy": -84.19717407226562, |
| "objective/kl": 43.60019302368164, |
| "objective/non_score_reward": -4.360019207000732, |
| "objective/rlhf_reward": -13.040077304840088, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 0.25065314769744873, |
| "policy/clipfrac_avg": 0.25, |
| "policy/entropy_avg": 0.5742840766906738, |
| "step": 502, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.0006816387176514 |
| }, |
| { |
| "episode": 8064, |
| "epoch": 0.22121029242332804, |
| "loss/policy_avg": -0.2314561903476715, |
| "lr": 1.676315789473684e-05, |
| "objective/entropy": -20.76449966430664, |
| "objective/kl": 50.151161193847656, |
| "objective/non_score_reward": -5.015115737915039, |
| "objective/rlhf_reward": -15.660464620590211, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 2.5777597427368164, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.6621568202972412, |
| "step": 503, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.001661777496338 |
| }, |
| { |
| "episode": 8080, |
| "epoch": 0.22164920173369176, |
| "loss/policy_avg": 0.26323428750038147, |
| "lr": 1.6736842105263156e-05, |
| "objective/entropy": -250.92178344726562, |
| "objective/kl": 26.257488250732422, |
| "objective/non_score_reward": -2.625748872756958, |
| "objective/rlhf_reward": -6.102995491027832, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 0.509668231010437, |
| "policy/clipfrac_avg": 0.25, |
| "policy/entropy_avg": 0.6861035823822021, |
| "step": 504, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 6, |
| "val/ratio": 1.9993526935577393 |
| }, |
| { |
| "episode": 8096, |
| "epoch": 0.22208811104405551, |
| "loss/policy_avg": 0.7368256449699402, |
| "lr": 1.671052631578947e-05, |
| "objective/entropy": -49.587276458740234, |
| "objective/kl": 54.560203552246094, |
| "objective/non_score_reward": -5.456020355224609, |
| "objective/rlhf_reward": -21.424079990386964, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 40.43186950683594, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.5872527956962585, |
| "step": 505, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.002885103225708 |
| }, |
| { |
| "episode": 8112, |
| "epoch": 0.22252702035441926, |
| "loss/policy_avg": -0.13276471197605133, |
| "lr": 1.668421052631579e-05, |
| "objective/entropy": -29.59518814086914, |
| "objective/kl": 59.4058837890625, |
| "objective/non_score_reward": -5.940588474273682, |
| "objective/rlhf_reward": -19.362353897094728, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 2.369570016860962, |
| "policy/clipfrac_avg": 1.5, |
| "policy/entropy_avg": 0.6469683647155762, |
| "step": 506, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.0005695819854736 |
| }, |
| { |
| "episode": 8128, |
| "epoch": 0.222965929664783, |
| "loss/policy_avg": 0.15404559671878815, |
| "lr": 1.6657894736842105e-05, |
| "objective/entropy": -288.75341796875, |
| "objective/kl": 16.826412200927734, |
| "objective/non_score_reward": -1.6826412677764893, |
| "objective/rlhf_reward": -2.3305650711059567, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 0.13429397344589233, |
| "policy/clipfrac_avg": 0.5, |
| "policy/entropy_avg": 0.6506304740905762, |
| "step": 507, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 6, |
| "val/ratio": 2.001171827316284 |
| }, |
| { |
| "episode": 8144, |
| "epoch": 0.22340483897514676, |
| "loss/policy_avg": 0.06800419837236404, |
| "lr": 1.663157894736842e-05, |
| "objective/entropy": -30.991519927978516, |
| "objective/kl": 53.74003601074219, |
| "objective/non_score_reward": -5.3740034103393555, |
| "objective/rlhf_reward": -17.09601459503174, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 0.6434616446495056, |
| "policy/clipfrac_avg": 0.75, |
| "policy/entropy_avg": 0.6338897347450256, |
| "step": 508, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9999275207519531 |
| }, |
| { |
| "episode": 8160, |
| "epoch": 0.2238437482855105, |
| "loss/policy_avg": 0.010863058269023895, |
| "lr": 1.6605263157894738e-05, |
| "objective/entropy": -300.00469970703125, |
| "objective/kl": 22.95432472229004, |
| "objective/non_score_reward": -2.2954325675964355, |
| "objective/rlhf_reward": -8.781729793548585, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 0.30962803959846497, |
| "policy/clipfrac_avg": 0.0, |
| "policy/entropy_avg": 0.7447618246078491, |
| "step": 509, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 6, |
| "val/ratio": 1.999950647354126 |
| }, |
| { |
| "episode": 8176, |
| "epoch": 0.22428265759587426, |
| "loss/policy_avg": -0.005725979804992676, |
| "lr": 1.6578947368421053e-05, |
| "objective/entropy": -72.19743347167969, |
| "objective/kl": 67.5067138671875, |
| "objective/non_score_reward": -6.75067138671875, |
| "objective/rlhf_reward": -22.602685070037843, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 1.3745484352111816, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.7517286539077759, |
| "step": 510, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9990617036819458 |
| }, |
| { |
| "episode": 8192, |
| "epoch": 0.22472156690623799, |
| "loss/policy_avg": -0.49819329380989075, |
| "lr": 1.655263157894737e-05, |
| "objective/entropy": -100.77090454101562, |
| "objective/kl": 52.09931945800781, |
| "objective/non_score_reward": -5.20993185043335, |
| "objective/rlhf_reward": -16.439727878570558, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 0.40050771832466125, |
| "policy/clipfrac_avg": 1.5, |
| "policy/entropy_avg": 0.6259418725967407, |
| "step": 511, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.0012173652648926 |
| }, |
| { |
| "episode": 8208, |
| "epoch": 0.22516047621660173, |
| "loss/policy_avg": -0.2459656298160553, |
| "lr": 1.6526315789473686e-05, |
| "objective/entropy": -64.88304138183594, |
| "objective/kl": 54.07443618774414, |
| "objective/non_score_reward": -5.407444000244141, |
| "objective/rlhf_reward": -21.22977457046509, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 1.1960783004760742, |
| "policy/clipfrac_avg": 1.5, |
| "policy/entropy_avg": 0.71941077709198, |
| "step": 512, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9999362230300903 |
| }, |
| { |
| "episode": 8224, |
| "epoch": 0.22559938552696548, |
| "loss/policy_avg": 0.015581846237182617, |
| "lr": 1.65e-05, |
| "objective/entropy": -86.62329864501953, |
| "objective/kl": 63.60594177246094, |
| "objective/non_score_reward": -6.360594749450684, |
| "objective/rlhf_reward": -21.042378520965578, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 2.669529914855957, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.5121915936470032, |
| "step": 513, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9988899230957031 |
| }, |
| { |
| "episode": 8240, |
| "epoch": 0.22603829483732923, |
| "loss/policy_avg": -1.0670742988586426, |
| "lr": 1.6473684210526316e-05, |
| "objective/entropy": -126.68997955322266, |
| "objective/kl": 48.829551696777344, |
| "objective/non_score_reward": -4.882955074310303, |
| "objective/rlhf_reward": -15.131820535659791, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 43.48120880126953, |
| "policy/clipfrac_avg": 0.75, |
| "policy/entropy_avg": 0.5280458927154541, |
| "step": 514, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9984208345413208 |
| }, |
| { |
| "episode": 8256, |
| "epoch": 0.22647720414769298, |
| "loss/policy_avg": -1.7157282829284668, |
| "lr": 1.6447368421052635e-05, |
| "objective/entropy": -78.12884521484375, |
| "objective/kl": 58.25613784790039, |
| "objective/non_score_reward": -5.8256144523620605, |
| "objective/rlhf_reward": -18.90245876312256, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 66.62440490722656, |
| "policy/clipfrac_avg": 1.5, |
| "policy/entropy_avg": 0.6942804455757141, |
| "step": 515, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.0003674030303955 |
| }, |
| { |
| "episode": 8272, |
| "epoch": 0.22691611345805673, |
| "loss/policy_avg": 1.5511988401412964, |
| "lr": 1.642105263157895e-05, |
| "objective/entropy": -47.62477111816406, |
| "objective/kl": 81.21244812011719, |
| "objective/non_score_reward": -8.121244430541992, |
| "objective/rlhf_reward": -28.08497772216797, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 58.765281677246094, |
| "policy/clipfrac_avg": 0.75, |
| "policy/entropy_avg": 0.632847785949707, |
| "step": 516, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9979219436645508 |
| }, |
| { |
| "episode": 8288, |
| "epoch": 0.22735502276842048, |
| "loss/policy_avg": -0.03130514919757843, |
| "lr": 1.6394736842105265e-05, |
| "objective/entropy": -84.12074279785156, |
| "objective/kl": 70.67733764648438, |
| "objective/non_score_reward": -7.067734241485596, |
| "objective/rlhf_reward": -27.870936012268068, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 2.3221986293792725, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.7155718803405762, |
| "step": 517, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.999363899230957 |
| }, |
| { |
| "episode": 8304, |
| "epoch": 0.22779393207878423, |
| "loss/policy_avg": 0.8689873218536377, |
| "lr": 1.636842105263158e-05, |
| "objective/entropy": -128.02854919433594, |
| "objective/kl": 57.08744812011719, |
| "objective/non_score_reward": -5.708744525909424, |
| "objective/rlhf_reward": -19.911259089351866, |
| "objective/scores": 0.7309297535714575, |
| "policy/approxkl_avg": 1.4211647510528564, |
| "policy/clipfrac_avg": 0.5, |
| "policy/entropy_avg": 0.6031744480133057, |
| "step": 518, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 5, |
| "val/ratio": 1.9977145195007324 |
| }, |
| { |
| "episode": 8320, |
| "epoch": 0.22823284138914796, |
| "loss/policy_avg": -0.5332244634628296, |
| "lr": 1.6342105263157894e-05, |
| "objective/entropy": -185.8155059814453, |
| "objective/kl": 34.196075439453125, |
| "objective/non_score_reward": -3.419607639312744, |
| "objective/rlhf_reward": -15.678430557250977, |
| "objective/scores": -0.5, |
| "policy/approxkl_avg": 91.76868438720703, |
| "policy/clipfrac_avg": 0.75, |
| "policy/entropy_avg": 0.666742205619812, |
| "step": 519, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 5, |
| "val/ratio": 1.9985761642456055 |
| }, |
| { |
| "episode": 8336, |
| "epoch": 0.2286717506995117, |
| "loss/policy_avg": -0.2599008083343506, |
| "lr": 1.6315789473684213e-05, |
| "objective/entropy": -39.81787109375, |
| "objective/kl": 55.39878845214844, |
| "objective/non_score_reward": -5.539878845214844, |
| "objective/rlhf_reward": -21.759516334533693, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 1.2191312313079834, |
| "policy/clipfrac_avg": 1.5, |
| "policy/entropy_avg": 0.7273707389831543, |
| "step": 520, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.000648021697998 |
| }, |
| { |
| "episode": 8352, |
| "epoch": 0.22911066000987546, |
| "loss/policy_avg": 1.1705384254455566, |
| "lr": 1.6289473684210528e-05, |
| "objective/entropy": 14.593412399291992, |
| "objective/kl": 67.65731811523438, |
| "objective/non_score_reward": -6.765731334686279, |
| "objective/rlhf_reward": -29.062925338745117, |
| "objective/scores": -0.5, |
| "policy/approxkl_avg": 108.43498229980469, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.689389705657959, |
| "step": 521, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 3, |
| "val/ratio": 1.9988114833831787 |
| }, |
| { |
| "episode": 8368, |
| "epoch": 0.2295495693202392, |
| "loss/policy_avg": -0.0511559396982193, |
| "lr": 1.6263157894736843e-05, |
| "objective/entropy": -110.55036926269531, |
| "objective/kl": 51.608421325683594, |
| "objective/non_score_reward": -5.160842418670654, |
| "objective/rlhf_reward": -16.243369913101198, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 1.193710446357727, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.6105633974075317, |
| "step": 522, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.000009536743164 |
| }, |
| { |
| "episode": 8384, |
| "epoch": 0.22998847863060295, |
| "loss/policy_avg": -1.15142822265625, |
| "lr": 1.6236842105263158e-05, |
| "objective/entropy": -194.12693786621094, |
| "objective/kl": 60.674110412597656, |
| "objective/non_score_reward": -6.067410469055176, |
| "objective/rlhf_reward": -23.86964330673218, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 37.457462310791016, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.7633137106895447, |
| "step": 523, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 6, |
| "val/ratio": 1.9969725608825684 |
| }, |
| { |
| "episode": 8400, |
| "epoch": 0.2304273879409667, |
| "loss/policy_avg": -0.3951420783996582, |
| "lr": 1.6210526315789476e-05, |
| "objective/entropy": -70.58984375, |
| "objective/kl": 50.954105377197266, |
| "objective/non_score_reward": -5.0954108238220215, |
| "objective/rlhf_reward": -15.98164281845093, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 0.5035173892974854, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.6034559011459351, |
| "step": 524, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.0016956329345703 |
| }, |
| { |
| "episode": 8416, |
| "epoch": 0.23086629725133045, |
| "loss/policy_avg": -0.012867942452430725, |
| "lr": 1.618421052631579e-05, |
| "objective/entropy": -262.4700012207031, |
| "objective/kl": 26.607215881347656, |
| "objective/non_score_reward": -2.660721778869629, |
| "objective/rlhf_reward": -6.242887115478516, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 0.36631155014038086, |
| "policy/clipfrac_avg": 0.5, |
| "policy/entropy_avg": 0.6922171115875244, |
| "step": 525, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 5, |
| "val/ratio": 2.0013232231140137 |
| }, |
| { |
| "episode": 8432, |
| "epoch": 0.23130520656169418, |
| "loss/policy_avg": 0.1977730691432953, |
| "lr": 1.6157894736842106e-05, |
| "objective/entropy": -9.29283332824707, |
| "objective/kl": 75.08651733398438, |
| "objective/non_score_reward": -7.5086517333984375, |
| "objective/rlhf_reward": -29.634605979919435, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 2.5576517581939697, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.8048279285430908, |
| "step": 526, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9986631870269775 |
| }, |
| { |
| "episode": 8448, |
| "epoch": 0.23174411587205793, |
| "loss/policy_avg": 0.09318825602531433, |
| "lr": 1.613157894736842e-05, |
| "objective/entropy": -24.686296463012695, |
| "objective/kl": 62.26502990722656, |
| "objective/non_score_reward": -6.226503372192383, |
| "objective/rlhf_reward": -20.506013011932374, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 107.91770935058594, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.58833909034729, |
| "step": 527, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.0000178813934326 |
| }, |
| { |
| "episode": 8464, |
| "epoch": 0.23218302518242168, |
| "loss/policy_avg": 0.030599527060985565, |
| "lr": 1.6105263157894736e-05, |
| "objective/entropy": -306.718017578125, |
| "objective/kl": 33.813175201416016, |
| "objective/non_score_reward": -3.381317615509033, |
| "objective/rlhf_reward": -15.525270462036133, |
| "objective/scores": -0.5, |
| "policy/approxkl_avg": 1.0647120475769043, |
| "policy/clipfrac_avg": 0.5, |
| "policy/entropy_avg": 0.7297744154930115, |
| "step": 528, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 5, |
| "val/ratio": 1.998337745666504 |
| }, |
| { |
| "episode": 8480, |
| "epoch": 0.23262193449278543, |
| "loss/policy_avg": 0.49856454133987427, |
| "lr": 1.6078947368421054e-05, |
| "objective/entropy": -59.88057327270508, |
| "objective/kl": 62.05354309082031, |
| "objective/non_score_reward": -6.205354690551758, |
| "objective/rlhf_reward": -20.421417808532716, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 0.5137290358543396, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.7658047676086426, |
| "step": 529, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 5, |
| "val/ratio": 1.9996016025543213 |
| }, |
| { |
| "episode": 8496, |
| "epoch": 0.23306084380314918, |
| "loss/policy_avg": 1.4718656539916992, |
| "lr": 1.605263157894737e-05, |
| "objective/entropy": -23.841388702392578, |
| "objective/kl": 50.9389762878418, |
| "objective/non_score_reward": -5.093897819519043, |
| "objective/rlhf_reward": -15.975590801239015, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 1.899733304977417, |
| "policy/clipfrac_avg": 0.75, |
| "policy/entropy_avg": 0.6414656639099121, |
| "step": 530, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9971559047698975 |
| }, |
| { |
| "episode": 8512, |
| "epoch": 0.23349975311351293, |
| "loss/policy_avg": -0.14778290688991547, |
| "lr": 1.6026315789473684e-05, |
| "objective/entropy": -280.56488037109375, |
| "objective/kl": 39.72999572753906, |
| "objective/non_score_reward": -3.9729995727539062, |
| "objective/rlhf_reward": -11.491998291015626, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 0.9470717310905457, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.6742441654205322, |
| "step": 531, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 6, |
| "val/ratio": 1.9999909400939941 |
| }, |
| { |
| "episode": 8528, |
| "epoch": 0.23393866242387668, |
| "loss/policy_avg": -0.8402799367904663, |
| "lr": 1.6e-05, |
| "objective/entropy": -146.46067810058594, |
| "objective/kl": 55.393707275390625, |
| "objective/non_score_reward": -5.539371013641357, |
| "objective/rlhf_reward": -17.757483100891115, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 65.72979736328125, |
| "policy/clipfrac_avg": 0.75, |
| "policy/entropy_avg": 0.6336046457290649, |
| "step": 532, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 5, |
| "val/ratio": 2.0008506774902344 |
| }, |
| { |
| "episode": 8544, |
| "epoch": 0.23437757173424043, |
| "loss/policy_avg": 0.10066896677017212, |
| "lr": 1.5973684210526314e-05, |
| "objective/entropy": -54.2482795715332, |
| "objective/kl": 59.967933654785156, |
| "objective/non_score_reward": -5.996793270111084, |
| "objective/rlhf_reward": -19.587173080444337, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 0.9665546417236328, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.6454309225082397, |
| "step": 533, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.002523899078369 |
| }, |
| { |
| "episode": 8560, |
| "epoch": 0.23481648104460415, |
| "loss/policy_avg": 0.96723473072052, |
| "lr": 1.5947368421052633e-05, |
| "objective/entropy": -75.4139633178711, |
| "objective/kl": 53.41941452026367, |
| "objective/non_score_reward": -5.3419413566589355, |
| "objective/rlhf_reward": -16.967765903472902, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 1.5414352416992188, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.6739579439163208, |
| "step": 534, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9997044801712036 |
| }, |
| { |
| "episode": 8576, |
| "epoch": 0.2352553903549679, |
| "loss/policy_avg": 0.21909461915493011, |
| "lr": 1.5921052631578948e-05, |
| "objective/entropy": -55.3682861328125, |
| "objective/kl": 55.079891204833984, |
| "objective/non_score_reward": -5.507988929748535, |
| "objective/rlhf_reward": -17.631956911087038, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 1.1223952770233154, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.6993370056152344, |
| "step": 535, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9986274242401123 |
| }, |
| { |
| "episode": 8592, |
| "epoch": 0.23569429966533165, |
| "loss/policy_avg": 0.16766348481178284, |
| "lr": 1.5894736842105263e-05, |
| "objective/entropy": -38.26525115966797, |
| "objective/kl": 53.806514739990234, |
| "objective/non_score_reward": -5.380651473999023, |
| "objective/rlhf_reward": -17.122606372833253, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 0.6757107973098755, |
| "policy/clipfrac_avg": 0.5, |
| "policy/entropy_avg": 0.6718076467514038, |
| "step": 536, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.001012086868286 |
| }, |
| { |
| "episode": 8608, |
| "epoch": 0.2361332089756954, |
| "loss/policy_avg": -0.13367152214050293, |
| "lr": 1.5868421052631578e-05, |
| "objective/entropy": -64.89817810058594, |
| "objective/kl": 70.96744537353516, |
| "objective/non_score_reward": -7.096744537353516, |
| "objective/rlhf_reward": -23.98697957992554, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 2.0274198055267334, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.6000114679336548, |
| "step": 537, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9985002279281616 |
| }, |
| { |
| "episode": 8624, |
| "epoch": 0.23657211828605915, |
| "loss/policy_avg": 0.6538009643554688, |
| "lr": 1.5842105263157896e-05, |
| "objective/entropy": -54.44984436035156, |
| "objective/kl": 61.42521667480469, |
| "objective/non_score_reward": -6.142521858215332, |
| "objective/rlhf_reward": -20.170088386535646, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 2.1359775066375732, |
| "policy/clipfrac_avg": 0.5, |
| "policy/entropy_avg": 0.6809964179992676, |
| "step": 538, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.000568389892578 |
| }, |
| { |
| "episode": 8640, |
| "epoch": 0.2370110275964229, |
| "loss/policy_avg": -0.14812606573104858, |
| "lr": 1.581578947368421e-05, |
| "objective/entropy": -267.172607421875, |
| "objective/kl": 30.55646324157715, |
| "objective/non_score_reward": -3.0556464195251465, |
| "objective/rlhf_reward": -14.222585678100586, |
| "objective/scores": -0.5, |
| "policy/approxkl_avg": 1.3976173400878906, |
| "policy/clipfrac_avg": 0.75, |
| "policy/entropy_avg": 0.7651552557945251, |
| "step": 539, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 5, |
| "val/ratio": 2.000613212585449 |
| }, |
| { |
| "episode": 8656, |
| "epoch": 0.23744993690678665, |
| "loss/policy_avg": -0.5527013540267944, |
| "lr": 1.5789473684210526e-05, |
| "objective/entropy": -38.92097091674805, |
| "objective/kl": 90.69183349609375, |
| "objective/non_score_reward": -9.069183349609375, |
| "objective/rlhf_reward": -35.87673292160034, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 3.2288694381713867, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.7611846923828125, |
| "step": 540, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.0006866455078125 |
| }, |
| { |
| "episode": 8672, |
| "epoch": 0.23788884621715037, |
| "loss/policy_avg": 0.3605126142501831, |
| "lr": 1.576315789473684e-05, |
| "objective/entropy": 8.301453590393066, |
| "objective/kl": 55.14949417114258, |
| "objective/non_score_reward": -5.514949798583984, |
| "objective/rlhf_reward": -21.65979919433594, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 1.4897167682647705, |
| "policy/clipfrac_avg": 0.75, |
| "policy/entropy_avg": 0.6536012887954712, |
| "step": 541, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9994428157806396 |
| }, |
| { |
| "episode": 8688, |
| "epoch": 0.23832775552751412, |
| "loss/policy_avg": -0.628774106502533, |
| "lr": 1.5736842105263156e-05, |
| "objective/entropy": -87.08699035644531, |
| "objective/kl": 42.880611419677734, |
| "objective/non_score_reward": -4.288061141967773, |
| "objective/rlhf_reward": -15.54812458521517, |
| "objective/scores": 0.40102999566398123, |
| "policy/approxkl_avg": 122.45773315429688, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.741459846496582, |
| "step": 542, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9991590976715088 |
| }, |
| { |
| "episode": 8704, |
| "epoch": 0.23876666483787787, |
| "loss/policy_avg": 0.4420792758464813, |
| "lr": 1.5710526315789474e-05, |
| "objective/entropy": -31.224285125732422, |
| "objective/kl": 41.453887939453125, |
| "objective/non_score_reward": -4.145388603210449, |
| "objective/rlhf_reward": -12.181554651260377, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 1.1506004333496094, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.7803224325180054, |
| "step": 543, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9997355937957764 |
| }, |
| { |
| "episode": 8720, |
| "epoch": 0.23920557414824162, |
| "loss/policy_avg": 0.09505406767129898, |
| "lr": 1.568421052631579e-05, |
| "objective/entropy": -271.14013671875, |
| "objective/kl": 28.504985809326172, |
| "objective/non_score_reward": -2.850498676300049, |
| "objective/rlhf_reward": -7.001994705200196, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 0.29007285833358765, |
| "policy/clipfrac_avg": 0.5, |
| "policy/entropy_avg": 0.856664776802063, |
| "step": 544, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 6, |
| "val/ratio": 1.9991320371627808 |
| }, |
| { |
| "episode": 8736, |
| "epoch": 0.23964448345860537, |
| "loss/policy_avg": 0.429214209318161, |
| "lr": 1.5657894736842104e-05, |
| "objective/entropy": -288.30877685546875, |
| "objective/kl": 22.891695022583008, |
| "objective/non_score_reward": -2.2891695499420166, |
| "objective/rlhf_reward": -7.331849451335977, |
| "objective/scores": 0.4562071871080222, |
| "policy/approxkl_avg": 0.47718560695648193, |
| "policy/clipfrac_avg": 0.25, |
| "policy/entropy_avg": 0.6876716613769531, |
| "step": 545, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 6, |
| "val/ratio": 1.9994748830795288 |
| }, |
| { |
| "episode": 8752, |
| "epoch": 0.24008339276896912, |
| "loss/policy_avg": -0.6105255484580994, |
| "lr": 1.563157894736842e-05, |
| "objective/entropy": -192.4208984375, |
| "objective/kl": 39.915897369384766, |
| "objective/non_score_reward": -3.9915900230407715, |
| "objective/rlhf_reward": -11.566360092163087, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 62.829139709472656, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.5915755033493042, |
| "step": 546, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 5, |
| "val/ratio": 1.9990711212158203 |
| }, |
| { |
| "episode": 8768, |
| "epoch": 0.24052230207933287, |
| "loss/policy_avg": -0.9540656208992004, |
| "lr": 1.5605263157894737e-05, |
| "objective/entropy": -123.78657531738281, |
| "objective/kl": 41.60905075073242, |
| "objective/non_score_reward": -4.160904884338379, |
| "objective/rlhf_reward": -16.243620014190675, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 84.0649185180664, |
| "policy/clipfrac_avg": 1.5, |
| "policy/entropy_avg": 0.5818493366241455, |
| "step": 547, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.0009255409240723 |
| }, |
| { |
| "episode": 8784, |
| "epoch": 0.24096121138969662, |
| "loss/policy_avg": -0.5903968811035156, |
| "lr": 1.5578947368421052e-05, |
| "objective/entropy": -53.922210693359375, |
| "objective/kl": 70.95716857910156, |
| "objective/non_score_reward": -7.095716953277588, |
| "objective/rlhf_reward": -27.982867813110353, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 1.5617876052856445, |
| "policy/clipfrac_avg": 1.5, |
| "policy/entropy_avg": 0.6649601459503174, |
| "step": 548, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.999706745147705 |
| }, |
| { |
| "episode": 8800, |
| "epoch": 0.24140012070006034, |
| "loss/policy_avg": 0.12424759566783905, |
| "lr": 1.5552631578947367e-05, |
| "objective/entropy": -291.2635498046875, |
| "objective/kl": 25.438182830810547, |
| "objective/non_score_reward": -2.543818235397339, |
| "objective/rlhf_reward": -5.775272941589355, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 0.44543614983558655, |
| "policy/clipfrac_avg": 0.25, |
| "policy/entropy_avg": 0.8033074736595154, |
| "step": 549, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 6, |
| "val/ratio": 1.9994637966156006 |
| }, |
| { |
| "episode": 8816, |
| "epoch": 0.2418390300104241, |
| "loss/policy_avg": -1.257480263710022, |
| "lr": 1.5526315789473686e-05, |
| "objective/entropy": -137.553466796875, |
| "objective/kl": 48.276798248291016, |
| "objective/non_score_reward": -4.827679634094238, |
| "objective/rlhf_reward": -16.910718774795534, |
| "objective/scores": 0.6, |
| "policy/approxkl_avg": 121.47811126708984, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.5976001024246216, |
| "step": 550, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 5, |
| "val/ratio": 1.9993959665298462 |
| }, |
| { |
| "episode": 8832, |
| "epoch": 0.24227793932078784, |
| "loss/policy_avg": -0.5651368498802185, |
| "lr": 1.55e-05, |
| "objective/entropy": -161.20584106445312, |
| "objective/kl": 46.88820266723633, |
| "objective/non_score_reward": -4.688819885253906, |
| "objective/rlhf_reward": -16.930451269420693, |
| "objective/scores": 0.4562071871080222, |
| "policy/approxkl_avg": 53.305084228515625, |
| "policy/clipfrac_avg": 1.5, |
| "policy/entropy_avg": 0.7755599021911621, |
| "step": 551, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 5, |
| "val/ratio": 2.000209331512451 |
| }, |
| { |
| "episode": 8848, |
| "epoch": 0.2427168486311516, |
| "loss/policy_avg": -0.052873387932777405, |
| "lr": 1.547368421052632e-05, |
| "objective/entropy": -93.70918273925781, |
| "objective/kl": 56.23017883300781, |
| "objective/non_score_reward": -5.62301778793335, |
| "objective/rlhf_reward": -18.092072105407716, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 1.4745566844940186, |
| "policy/clipfrac_avg": 1.5, |
| "policy/entropy_avg": 0.7499211430549622, |
| "step": 552, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.999940037727356 |
| }, |
| { |
| "episode": 8864, |
| "epoch": 0.24315575794151534, |
| "loss/policy_avg": -0.07624203711748123, |
| "lr": 1.5447368421052634e-05, |
| "objective/entropy": -70.65289306640625, |
| "objective/kl": 51.36430358886719, |
| "objective/non_score_reward": -5.136430740356445, |
| "objective/rlhf_reward": -16.145722961425783, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 0.9020864963531494, |
| "policy/clipfrac_avg": 0.75, |
| "policy/entropy_avg": 0.5754582285881042, |
| "step": 553, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.999817132949829 |
| }, |
| { |
| "episode": 8880, |
| "epoch": 0.2435946672518791, |
| "loss/policy_avg": -0.33316770195961, |
| "lr": 1.542105263157895e-05, |
| "objective/entropy": -100.49559020996094, |
| "objective/kl": 47.4459228515625, |
| "objective/non_score_reward": -4.744592666625977, |
| "objective/rlhf_reward": -14.578370666503908, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 0.17755243182182312, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.5925395488739014, |
| "step": 554, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.0002267360687256 |
| }, |
| { |
| "episode": 8896, |
| "epoch": 0.24403357656224284, |
| "loss/policy_avg": -0.24473661184310913, |
| "lr": 1.5394736842105264e-05, |
| "objective/entropy": -85.58175659179688, |
| "objective/kl": 60.863525390625, |
| "objective/non_score_reward": -6.086352348327637, |
| "objective/rlhf_reward": -23.94540939331055, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 1.0000783205032349, |
| "policy/clipfrac_avg": 1.75, |
| "policy/entropy_avg": 0.6594964861869812, |
| "step": 555, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.999789834022522 |
| }, |
| { |
| "episode": 8912, |
| "epoch": 0.24447248587260656, |
| "loss/policy_avg": 0.9693965911865234, |
| "lr": 1.536842105263158e-05, |
| "objective/entropy": -82.48194885253906, |
| "objective/kl": 73.32645416259766, |
| "objective/non_score_reward": -7.332645416259766, |
| "objective/rlhf_reward": -24.930581188201906, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 134.82119750976562, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.6412348747253418, |
| "step": 556, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9974418878555298 |
| }, |
| { |
| "episode": 8928, |
| "epoch": 0.2449113951829703, |
| "loss/policy_avg": -0.02250886708498001, |
| "lr": 1.5342105263157897e-05, |
| "objective/entropy": -291.52117919921875, |
| "objective/kl": 33.25413513183594, |
| "objective/non_score_reward": -3.325413703918457, |
| "objective/rlhf_reward": -15.301654815673828, |
| "objective/scores": -0.5, |
| "policy/approxkl_avg": 0.8158763647079468, |
| "policy/clipfrac_avg": 0.75, |
| "policy/entropy_avg": 0.5648782253265381, |
| "step": 557, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 6, |
| "val/ratio": 1.999756932258606 |
| }, |
| { |
| "episode": 8944, |
| "epoch": 0.24535030449333406, |
| "loss/policy_avg": -1.834641695022583, |
| "lr": 1.5315789473684212e-05, |
| "objective/entropy": -98.22699737548828, |
| "objective/kl": 65.66693115234375, |
| "objective/non_score_reward": -6.566692352294922, |
| "objective/rlhf_reward": -21.866769886016847, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 60.248626708984375, |
| "policy/clipfrac_avg": 1.25, |
| "policy/entropy_avg": 0.6188458204269409, |
| "step": 558, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9987174272537231 |
| }, |
| { |
| "episode": 8960, |
| "epoch": 0.2457892138036978, |
| "loss/policy_avg": 0.5259836316108704, |
| "lr": 1.5289473684210527e-05, |
| "objective/entropy": -80.89381408691406, |
| "objective/kl": 49.18425750732422, |
| "objective/non_score_reward": -4.918426513671875, |
| "objective/rlhf_reward": -15.273705101013185, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 0.9190274477005005, |
| "policy/clipfrac_avg": 0.25, |
| "policy/entropy_avg": 0.6806643009185791, |
| "step": 559, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9992485046386719 |
| }, |
| { |
| "episode": 8976, |
| "epoch": 0.24622812311406156, |
| "loss/policy_avg": 0.2278147041797638, |
| "lr": 1.5263157894736842e-05, |
| "objective/entropy": -60.12748718261719, |
| "objective/kl": 55.052490234375, |
| "objective/non_score_reward": -5.5052490234375, |
| "objective/rlhf_reward": -21.62099657058716, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 0.8754554986953735, |
| "policy/clipfrac_avg": 0.75, |
| "policy/entropy_avg": 0.5408790707588196, |
| "step": 560, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9984455108642578 |
| }, |
| { |
| "episode": 8992, |
| "epoch": 0.2466670324244253, |
| "loss/policy_avg": 0.31462305784225464, |
| "lr": 1.5236842105263159e-05, |
| "objective/entropy": -285.16705322265625, |
| "objective/kl": 36.19426727294922, |
| "objective/non_score_reward": -3.6194264888763428, |
| "objective/rlhf_reward": -12.652877207073281, |
| "objective/scores": 0.4562071871080222, |
| "policy/approxkl_avg": 1.1670053005218506, |
| "policy/clipfrac_avg": 0.5, |
| "policy/entropy_avg": 0.6850937008857727, |
| "step": 561, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9999622106552124 |
| }, |
| { |
| "episode": 9008, |
| "epoch": 0.24710594173478906, |
| "loss/policy_avg": 0.40112099051475525, |
| "lr": 1.5210526315789476e-05, |
| "objective/entropy": -69.30557250976562, |
| "objective/kl": 55.801090240478516, |
| "objective/non_score_reward": -5.580108642578125, |
| "objective/rlhf_reward": -17.920436000823976, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 2.1320338249206543, |
| "policy/clipfrac_avg": 0.75, |
| "policy/entropy_avg": 0.8245863318443298, |
| "step": 562, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.0004453659057617 |
| }, |
| { |
| "episode": 9024, |
| "epoch": 0.2475448510451528, |
| "loss/policy_avg": 1.3591008186340332, |
| "lr": 1.518421052631579e-05, |
| "objective/entropy": -142.46173095703125, |
| "objective/kl": 52.2000617980957, |
| "objective/non_score_reward": -5.220005989074707, |
| "objective/rlhf_reward": -16.480024433135988, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 61.359336853027344, |
| "policy/clipfrac_avg": 0.75, |
| "policy/entropy_avg": 0.7486914396286011, |
| "step": 563, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 5, |
| "val/ratio": 1.9949495792388916 |
| }, |
| { |
| "episode": 9040, |
| "epoch": 0.24798376035551653, |
| "loss/policy_avg": -0.2928787171840668, |
| "lr": 1.5157894736842105e-05, |
| "objective/entropy": -53.85539245605469, |
| "objective/kl": 69.98612213134766, |
| "objective/non_score_reward": -6.998612403869629, |
| "objective/rlhf_reward": -23.5944486618042, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 1.6392881870269775, |
| "policy/clipfrac_avg": 1.5, |
| "policy/entropy_avg": 0.6351510286331177, |
| "step": 564, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9990606307983398 |
| }, |
| { |
| "episode": 9056, |
| "epoch": 0.24842266966588028, |
| "loss/policy_avg": 0.3248935341835022, |
| "lr": 1.5131578947368422e-05, |
| "objective/entropy": -45.36878204345703, |
| "objective/kl": 59.32015609741211, |
| "objective/non_score_reward": -5.932015419006348, |
| "objective/rlhf_reward": -23.32806262969971, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 181.79592895507812, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.675167441368103, |
| "step": 565, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9971544742584229 |
| }, |
| { |
| "episode": 9072, |
| "epoch": 0.24886157897624403, |
| "loss/policy_avg": 0.10387852787971497, |
| "lr": 1.5105263157894737e-05, |
| "objective/entropy": -40.72087860107422, |
| "objective/kl": 57.553802490234375, |
| "objective/non_score_reward": -5.755380630493164, |
| "objective/rlhf_reward": -18.62152156829834, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 2.47540283203125, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.588182806968689, |
| "step": 566, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9985510110855103 |
| }, |
| { |
| "episode": 9088, |
| "epoch": 0.24930048828660778, |
| "loss/policy_avg": 0.3820185959339142, |
| "lr": 1.5078947368421054e-05, |
| "objective/entropy": -82.16580200195312, |
| "objective/kl": 60.49824523925781, |
| "objective/non_score_reward": -6.049825191497803, |
| "objective/rlhf_reward": -19.799300289154054, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 0.949872612953186, |
| "policy/clipfrac_avg": 0.75, |
| "policy/entropy_avg": 0.6572426557540894, |
| "step": 567, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9995747804641724 |
| }, |
| { |
| "episode": 9104, |
| "epoch": 0.24973939759697153, |
| "loss/policy_avg": 1.8019391298294067, |
| "lr": 1.5052631578947369e-05, |
| "objective/entropy": -60.920955657958984, |
| "objective/kl": 69.83343505859375, |
| "objective/non_score_reward": -6.983344078063965, |
| "objective/rlhf_reward": -23.533374881744386, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 1.9492524862289429, |
| "policy/clipfrac_avg": 0.5, |
| "policy/entropy_avg": 0.6470024585723877, |
| "step": 568, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9977750778198242 |
| }, |
| { |
| "episode": 9120, |
| "epoch": 0.2501783069073353, |
| "loss/policy_avg": 0.1746143400669098, |
| "lr": 1.5026315789473685e-05, |
| "objective/entropy": -273.22198486328125, |
| "objective/kl": 20.09288215637207, |
| "objective/non_score_reward": -2.0092883110046387, |
| "objective/rlhf_reward": -10.037153244018555, |
| "objective/scores": -0.5, |
| "policy/approxkl_avg": 0.23779484629631042, |
| "policy/clipfrac_avg": 0.5, |
| "policy/entropy_avg": 0.6256122589111328, |
| "step": 569, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 5, |
| "val/ratio": 1.9998234510421753 |
| }, |
| { |
| "episode": 9136, |
| "epoch": 0.250617216217699, |
| "loss/policy_avg": -1.2724919319152832, |
| "lr": 1.5e-05, |
| "objective/entropy": -167.3118896484375, |
| "objective/kl": 67.96356201171875, |
| "objective/non_score_reward": -6.796355724334717, |
| "objective/rlhf_reward": -26.78542289733887, |
| "objective/scores": 0.1, |
| "policy/approxkl_avg": 129.8609619140625, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.5831791162490845, |
| "step": 570, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 5, |
| "val/ratio": 2.0005290508270264 |
| }, |
| { |
| "episode": 9152, |
| "epoch": 0.2510561255280628, |
| "loss/policy_avg": -0.5978893041610718, |
| "lr": 1.4973684210526315e-05, |
| "objective/entropy": -162.8898468017578, |
| "objective/kl": 44.88909912109375, |
| "objective/non_score_reward": -4.48891019821167, |
| "objective/rlhf_reward": -13.555640316009523, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 58.46027374267578, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.6906276941299438, |
| "step": 571, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 5, |
| "val/ratio": 2.0012764930725098 |
| }, |
| { |
| "episode": 9168, |
| "epoch": 0.2514950348384265, |
| "loss/policy_avg": 0.5245893597602844, |
| "lr": 1.4947368421052632e-05, |
| "objective/entropy": -91.91587829589844, |
| "objective/kl": 55.85387420654297, |
| "objective/non_score_reward": -5.585387229919434, |
| "objective/rlhf_reward": -17.941549396514894, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 1.2534908056259155, |
| "policy/clipfrac_avg": 0.75, |
| "policy/entropy_avg": 0.5883944630622864, |
| "step": 572, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9999537467956543 |
| }, |
| { |
| "episode": 9184, |
| "epoch": 0.2519339441487903, |
| "loss/policy_avg": -0.6780993342399597, |
| "lr": 1.4921052631578947e-05, |
| "objective/entropy": -72.03091430664062, |
| "objective/kl": 63.12275695800781, |
| "objective/non_score_reward": -6.3122758865356445, |
| "objective/rlhf_reward": -22.84910306930542, |
| "objective/scores": 0.6, |
| "policy/approxkl_avg": 0.9321353435516357, |
| "policy/clipfrac_avg": 1.75, |
| "policy/entropy_avg": 0.7479265332221985, |
| "step": 573, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.0003271102905273 |
| }, |
| { |
| "episode": 9200, |
| "epoch": 0.252372853459154, |
| "loss/policy_avg": 0.025867890566587448, |
| "lr": 1.4894736842105264e-05, |
| "objective/entropy": -305.865966796875, |
| "objective/kl": 30.30899429321289, |
| "objective/non_score_reward": -3.0308995246887207, |
| "objective/rlhf_reward": -14.123598098754883, |
| "objective/scores": -0.5, |
| "policy/approxkl_avg": 0.22707101702690125, |
| "policy/clipfrac_avg": 0.25, |
| "policy/entropy_avg": 0.6340222358703613, |
| "step": 574, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9998149871826172 |
| }, |
| { |
| "episode": 9216, |
| "epoch": 0.2528117627695177, |
| "loss/policy_avg": 0.2892727851867676, |
| "lr": 1.4868421052631579e-05, |
| "objective/entropy": -4.849845886230469, |
| "objective/kl": 57.78361511230469, |
| "objective/non_score_reward": -5.7783613204956055, |
| "objective/rlhf_reward": -18.713445281982423, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 0.7950266003608704, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.7515757083892822, |
| "step": 575, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.0013017654418945 |
| }, |
| { |
| "episode": 9232, |
| "epoch": 0.2532506720798815, |
| "loss/policy_avg": 3.0340328216552734, |
| "lr": 1.4842105263157895e-05, |
| "objective/entropy": -41.5596923828125, |
| "objective/kl": 67.7565689086914, |
| "objective/non_score_reward": -6.775656700134277, |
| "objective/rlhf_reward": -22.70262727737427, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 1.2664908170700073, |
| "policy/clipfrac_avg": 0.5, |
| "policy/entropy_avg": 0.609063982963562, |
| "step": 576, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9999446868896484 |
| }, |
| { |
| "episode": 9248, |
| "epoch": 0.2536895813902452, |
| "loss/policy_avg": 0.24365383386611938, |
| "lr": 1.481578947368421e-05, |
| "objective/entropy": -62.568878173828125, |
| "objective/kl": 73.23529052734375, |
| "objective/non_score_reward": -7.3235297203063965, |
| "objective/rlhf_reward": -24.894118881225587, |
| "objective/scores": 1.1, |
| "policy/approxkl_avg": 0.7512961626052856, |
| "policy/clipfrac_avg": 1.0, |
| "policy/entropy_avg": 0.6471009850502014, |
| "step": 577, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 2.0002264976501465 |
| }, |
| { |
| "episode": 9264, |
| "epoch": 0.254128490700609, |
| "loss/policy_avg": 1.6541283130645752, |
| "lr": 1.4789473684210525e-05, |
| "objective/entropy": -75.43449401855469, |
| "objective/kl": 60.49390411376953, |
| "objective/non_score_reward": -6.0493903160095215, |
| "objective/rlhf_reward": -21.273842249752256, |
| "objective/scores": 0.7309297535714575, |
| "policy/approxkl_avg": 0.5560513734817505, |
| "policy/clipfrac_avg": 0.25, |
| "policy/entropy_avg": 0.5372088551521301, |
| "step": 578, |
| "val/clipfrac_avg": 0.0, |
| "val/num_eos_tokens": 4, |
| "val/ratio": 1.9995195865631104 |
| } |
| ], |
| "logging_steps": 500, |
| "max_steps": 570, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1.0, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0, |
| "train_batch_size": null, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|