{ "best_metric": null, "best_model_checkpoint": null, "epoch": 6.769172932330827, "eval_steps": 500, "global_step": 9000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "advantages": 2.118448499288661e-07, "advantages_std": 1.497294020652771, "clip_ratio": 0.0, "completion_length": 86.44166946411133, "epoch": 0.007518796992481203, "grad_norm": 6.75, "kl": 0.02395845539867878, "learning_rate": 4.996240601503759e-06, "loss": 0.0064, "num_tokens": 303965.0, "reward": -2.5252880096435546, "reward_std": 7.708663702011108, "rewards/get_chromagram_reward": 0.6135738074779511, "rewards/get_chromagram_reward_std": 0.12009836137294769, "rewards/get_intelligibility_reward": -8.167745923995971, "rewards/get_intelligibility_reward_std": 11.17201633453369, "rewards/get_target_len_reward": -0.021691455319523812, "rewards/get_target_len_reward_std": 0.06159961894154549, "step": 10 }, { "advantages": -7.053215966834614e-08, "advantages_std": 1.5645875334739685, "clip_ratio": 0.0, "completion_length": 88.32143020629883, "epoch": 0.015037593984962405, "grad_norm": 7.09375, "kl": 0.08431399557739497, "learning_rate": 4.992481203007519e-06, "loss": 0.0118, "num_tokens": 614856.0, "reward": -1.9682563066482544, "reward_std": 7.121526718139648, "rewards/get_chromagram_reward": 0.6209080219268799, "rewards/get_chromagram_reward_std": 0.11176137179136277, "rewards/get_intelligibility_reward": -6.507082128524781, "rewards/get_intelligibility_reward_std": 10.901963329315185, "rewards/get_target_len_reward": -0.018594418559223412, "rewards/get_target_len_reward_std": 0.05279657319188118, "step": 20 }, { "advantages": -1.517434867537304e-07, "advantages_std": 1.6699465274810792, "clip_ratio": 0.0, "completion_length": 87.67381134033204, "epoch": 0.022556390977443608, "grad_norm": 11.1875, "kl": 0.05091430209577084, "learning_rate": 4.9887218045112785e-06, "loss": 0.0063, "num_tokens": 924413.0, "reward": -1.7607180513441563, "reward_std": 6.793271017074585, "rewards/get_chromagram_reward": 0.6260063648223877, "rewards/get_chromagram_reward_std": 0.1159092791378498, "rewards/get_intelligibility_reward": -5.883437991142273, "rewards/get_intelligibility_reward_std": 10.413993167877198, "rewards/get_target_len_reward": -0.024722003471106292, "rewards/get_target_len_reward_std": 0.06130978129804134, "step": 30 }, { "advantages": 4.1971603508272893e-07, "advantages_std": 1.6840260982513429, "clip_ratio": 0.0, "completion_length": 86.18273849487305, "epoch": 0.03007518796992481, "grad_norm": 9.0625, "kl": 0.08079373240470886, "learning_rate": 4.984962406015038e-06, "loss": 0.0128, "num_tokens": 1229476.0, "reward": -1.8878801107406615, "reward_std": 7.23775429725647, "rewards/get_chromagram_reward": 0.6211916923522949, "rewards/get_chromagram_reward_std": 0.11408754736185074, "rewards/get_intelligibility_reward": -6.262418246269226, "rewards/get_intelligibility_reward_std": 11.161653137207031, "rewards/get_target_len_reward": -0.022413293924182655, "rewards/get_target_len_reward_std": 0.06784052737057208, "step": 40 }, { "advantages": -1.251697586468481e-07, "advantages_std": 1.4488242149353028, "clip_ratio": 0.0, "completion_length": 90.10297927856445, "epoch": 0.03759398496240601, "grad_norm": 6.875, "kl": 0.09215598180890083, "learning_rate": 4.981203007518797e-06, "loss": 0.0095, "num_tokens": 1544823.0, "reward": -1.7895142793655396, "reward_std": 6.443612480163575, "rewards/get_chromagram_reward": 0.6228613257408142, "rewards/get_chromagram_reward_std": 0.10370689854025841, "rewards/get_intelligibility_reward": -5.974599933624267, "rewards/get_intelligibility_reward_std": 9.874517250061036, "rewards/get_target_len_reward": -0.016803824808448553, "rewards/get_target_len_reward_std": 0.045621754601597786, "step": 50 }, { "advantages": 5.180637305812752e-07, "advantages_std": 1.553832471370697, "clip_ratio": 0.0, "completion_length": 87.78928756713867, "epoch": 0.045112781954887216, "grad_norm": 6.5, "kl": 0.12433034032583237, "learning_rate": 4.977443609022557e-06, "loss": 0.0151, "num_tokens": 1853259.0, "reward": -1.7618976533412933, "reward_std": 7.031143283843994, "rewards/get_chromagram_reward": 0.6234481394290924, "rewards/get_chromagram_reward_std": 0.12234157994389534, "rewards/get_intelligibility_reward": -5.885248851776123, "rewards/get_intelligibility_reward_std": 11.020007610321045, "rewards/get_target_len_reward": -0.023891913425177335, "rewards/get_target_len_reward_std": 0.06750189382582902, "step": 60 }, { "advantages": -1.1424223771427932e-07, "advantages_std": 1.6184780716896057, "clip_ratio": 0.0, "completion_length": 87.79166793823242, "epoch": 0.05263157894736842, "grad_norm": 7.96875, "kl": 0.18404756337404252, "learning_rate": 4.973684210526316e-06, "loss": 0.0199, "num_tokens": 2161781.0, "reward": -1.8432800233364106, "reward_std": 7.268847322463989, "rewards/get_chromagram_reward": 0.6163238942623138, "rewards/get_chromagram_reward_std": 0.11489329114556313, "rewards/get_intelligibility_reward": -6.128581380844116, "rewards/get_intelligibility_reward_std": 11.405856323242187, "rewards/get_target_len_reward": -0.017582453973591327, "rewards/get_target_len_reward_std": 0.046123570390045644, "step": 70 }, { "advantages": 4.713734142569592e-07, "advantages_std": 1.726417601108551, "clip_ratio": 0.0, "completion_length": 87.27023849487304, "epoch": 0.06015037593984962, "grad_norm": 16.25, "kl": 0.41561629995703697, "learning_rate": 4.969924812030076e-06, "loss": 0.0458, "num_tokens": 2469378.0, "reward": -2.222189700603485, "reward_std": 8.024344348907471, "rewards/get_chromagram_reward": 0.6164293169975281, "rewards/get_chromagram_reward_std": 0.11355845630168915, "rewards/get_intelligibility_reward": -7.259270143508911, "rewards/get_intelligibility_reward_std": 12.41039228439331, "rewards/get_target_len_reward": -0.02372810449451208, "rewards/get_target_len_reward_std": 0.06544858254492283, "step": 80 }, { "advantages": -3.9115550976021043e-08, "advantages_std": 1.5857127904891968, "clip_ratio": 0.0, "completion_length": 83.47380981445312, "epoch": 0.06766917293233082, "grad_norm": 5.9375, "kl": 0.38582088649272916, "learning_rate": 4.966165413533835e-06, "loss": 0.0418, "num_tokens": 2767074.0, "reward": -2.1240372538566588, "reward_std": 7.091709327697754, "rewards/get_chromagram_reward": 0.6215401530265808, "rewards/get_chromagram_reward_std": 0.1213473655283451, "rewards/get_intelligibility_reward": -6.968455457687378, "rewards/get_intelligibility_reward_std": 10.730904293060302, "rewards/get_target_len_reward": -0.025196005031466483, "rewards/get_target_len_reward_std": 0.06093333307653666, "step": 90 }, { "advantages": -1.0828177181565479e-07, "advantages_std": 1.5523891210556031, "clip_ratio": 0.0, "completion_length": 86.3976203918457, "epoch": 0.07518796992481203, "grad_norm": 7.84375, "kl": 0.1602418266236782, "learning_rate": 4.962406015037594e-06, "loss": 0.0229, "num_tokens": 3072540.0, "reward": -2.1356523513793944, "reward_std": 7.14356107711792, "rewards/get_chromagram_reward": 0.6146622836589813, "rewards/get_chromagram_reward_std": 0.11833304241299629, "rewards/get_intelligibility_reward": -6.999609637260437, "rewards/get_intelligibility_reward_std": 10.735732078552246, "rewards/get_target_len_reward": -0.02200925387442112, "rewards/get_target_len_reward_std": 0.06754342392086983, "step": 100 }, { "advantages": -1.3721486098461354e-07, "advantages_std": 1.5168035984039308, "clip_ratio": 0.0, "completion_length": 87.88928680419922, "epoch": 0.08270676691729323, "grad_norm": 8.0625, "kl": 0.21410228833556175, "learning_rate": 4.958646616541354e-06, "loss": 0.0238, "num_tokens": 3381491.0, "reward": -1.6413340508937835, "reward_std": 6.78236951828003, "rewards/get_chromagram_reward": 0.618117380142212, "rewards/get_chromagram_reward_std": 0.11681264266371727, "rewards/get_intelligibility_reward": -5.526298701763153, "rewards/get_intelligibility_reward_std": 10.602624130249023, "rewards/get_target_len_reward": -0.015820635203272104, "rewards/get_target_len_reward_std": 0.045049076154828074, "step": 110 }, { "advantages": -2.128382519117622e-07, "advantages_std": 1.590258038043976, "clip_ratio": 0.0, "completion_length": 88.00714492797852, "epoch": 0.09022556390977443, "grad_norm": 5.34375, "kl": 0.1765545964241028, "learning_rate": 4.954887218045113e-06, "loss": 0.0232, "num_tokens": 3691706.0, "reward": -1.7672514021396637, "reward_std": 6.887362623214722, "rewards/get_chromagram_reward": 0.6296150028705597, "rewards/get_chromagram_reward_std": 0.10867820680141449, "rewards/get_intelligibility_reward": -5.908388066291809, "rewards/get_intelligibility_reward_std": 10.689667415618896, "rewards/get_target_len_reward": -0.022980824299156665, "rewards/get_target_len_reward_std": 0.06642410941421986, "step": 120 }, { "advantages": -1.5621384186204068e-07, "advantages_std": 1.4537077307701112, "clip_ratio": 0.0, "completion_length": 87.18214492797851, "epoch": 0.09774436090225563, "grad_norm": 7.40625, "kl": 0.22367219924926757, "learning_rate": 4.951127819548872e-06, "loss": 0.0251, "num_tokens": 3998700.0, "reward": -2.014804148674011, "reward_std": 6.904703187942505, "rewards/get_chromagram_reward": 0.616754686832428, "rewards/get_chromagram_reward_std": 0.11644136309623718, "rewards/get_intelligibility_reward": -6.640150189399719, "rewards/get_intelligibility_reward_std": 10.418735027313232, "rewards/get_target_len_reward": -0.021016582287847994, "rewards/get_target_len_reward_std": 0.061346288211643694, "step": 130 }, { "advantages": -8.443991836415421e-07, "advantages_std": 1.5730493187904357, "clip_ratio": 0.0, "completion_length": 84.52380981445313, "epoch": 0.10526315789473684, "grad_norm": 8.125, "kl": 0.3281571701169014, "learning_rate": 4.947368421052632e-06, "loss": 0.0385, "num_tokens": 4298262.0, "reward": -1.8539611220359802, "reward_std": 6.75795578956604, "rewards/get_chromagram_reward": 0.6141342937946319, "rewards/get_chromagram_reward_std": 0.11726542636752128, "rewards/get_intelligibility_reward": -6.155655121803283, "rewards/get_intelligibility_reward_std": 10.387967538833617, "rewards/get_target_len_reward": -0.02036202410236001, "rewards/get_target_len_reward_std": 0.05991331338882446, "step": 140 }, { "advantages": 8.443992953743873e-08, "advantages_std": 1.4925345063209534, "clip_ratio": 0.0, "completion_length": 89.01964416503907, "epoch": 0.11278195488721804, "grad_norm": 16.5, "kl": 0.2456020161509514, "learning_rate": 4.943609022556392e-06, "loss": 0.0272, "num_tokens": 4611748.0, "reward": -1.597898268699646, "reward_std": 6.498913764953613, "rewards/get_chromagram_reward": 0.6427377462387085, "rewards/get_chromagram_reward_std": 0.11222934648394585, "rewards/get_intelligibility_reward": -5.41141984462738, "rewards/get_intelligibility_reward_std": 10.233173274993897, "rewards/get_target_len_reward": -0.025012334156781436, "rewards/get_target_len_reward_std": 0.05679422654211521, "step": 150 }, { "advantages": -1.0927518871994834e-07, "advantages_std": 1.5491583943367004, "clip_ratio": 0.0, "completion_length": 86.72500152587891, "epoch": 0.12030075187969924, "grad_norm": 7.3125, "kl": 0.19152849316596984, "learning_rate": 4.9398496240601505e-06, "loss": 0.0232, "num_tokens": 4917996.0, "reward": -2.06285679936409, "reward_std": 7.055027866363526, "rewards/get_chromagram_reward": 0.6176262974739075, "rewards/get_chromagram_reward_std": 0.11738575920462609, "rewards/get_intelligibility_reward": -6.784622621536255, "rewards/get_intelligibility_reward_std": 10.655733203887939, "rewards/get_target_len_reward": -0.021573868487030266, "rewards/get_target_len_reward_std": 0.061045969277620314, "step": 160 }, { "advantages": 1.5075007411269326e-07, "advantages_std": 1.6391386032104491, "clip_ratio": 0.0, "completion_length": 86.58154907226563, "epoch": 0.12781954887218044, "grad_norm": 8.1875, "kl": 0.216806098818779, "learning_rate": 4.93609022556391e-06, "loss": 0.0204, "num_tokens": 5223687.0, "reward": -1.6740633368492126, "reward_std": 6.215735626220703, "rewards/get_chromagram_reward": 0.6173975050449372, "rewards/get_chromagram_reward_std": 0.11676538810133934, "rewards/get_intelligibility_reward": -5.620945620536804, "rewards/get_intelligibility_reward_std": 9.571168136596679, "rewards/get_target_len_reward": -0.018641630932688714, "rewards/get_target_len_reward_std": 0.04243884533643723, "step": 170 }, { "advantages": 3.563860104804917e-08, "advantages_std": 1.605065941810608, "clip_ratio": 0.0, "completion_length": 85.777978515625, "epoch": 0.13533834586466165, "grad_norm": 7.1875, "kl": 0.22296204417943954, "learning_rate": 4.93233082706767e-06, "loss": 0.0279, "num_tokens": 5526129.0, "reward": -1.9188735783100128, "reward_std": 7.194452619552612, "rewards/get_chromagram_reward": 0.6230684220790863, "rewards/get_chromagram_reward_std": 0.10260485261678695, "rewards/get_intelligibility_reward": -6.356639158725739, "rewards/get_intelligibility_reward_std": 11.165844345092774, "rewards/get_target_len_reward": -0.0230495841242373, "rewards/get_target_len_reward_std": 0.06901184841990471, "step": 180 }, { "advantages": 5.2899112823467934e-08, "advantages_std": 1.6138009309768677, "clip_ratio": 0.0, "completion_length": 84.4803596496582, "epoch": 0.14285714285714285, "grad_norm": 5.40625, "kl": 0.4960071489214897, "learning_rate": 4.928571428571429e-06, "loss": 0.0515, "num_tokens": 5826103.0, "reward": -1.8541862666606903, "reward_std": 6.528654289245606, "rewards/get_chromagram_reward": 0.6299790501594543, "rewards/get_chromagram_reward_std": 0.12313227728009224, "rewards/get_intelligibility_reward": -6.169015526771545, "rewards/get_intelligibility_reward_std": 9.88207130432129, "rewards/get_target_len_reward": -0.02352191610261798, "rewards/get_target_len_reward_std": 0.05509545002132654, "step": 190 }, { "advantages": -3.269563261909525e-07, "advantages_std": 1.5400643467903137, "clip_ratio": 0.0, "completion_length": 87.34881057739258, "epoch": 0.15037593984962405, "grad_norm": 6.53125, "kl": 0.23356708884239197, "learning_rate": 4.924812030075188e-06, "loss": 0.0275, "num_tokens": 6134473.0, "reward": -1.7080044865608215, "reward_std": 6.700563287734985, "rewards/get_chromagram_reward": 0.6138969004154206, "rewards/get_chromagram_reward_std": 0.1239325612783432, "rewards/get_intelligibility_reward": -5.716689348220825, "rewards/get_intelligibility_reward_std": 10.459880065917968, "rewards/get_target_len_reward": -0.021220722515136004, "rewards/get_target_len_reward_std": 0.05877160653471947, "step": 200 }, { "advantages": -4.68889902549563e-07, "advantages_std": 1.5320081472396851, "clip_ratio": 0.0, "completion_length": 87.94166793823243, "epoch": 0.15789473684210525, "grad_norm": 6.375, "kl": 0.20200251489877702, "learning_rate": 4.921052631578948e-06, "loss": 0.0273, "num_tokens": 6444424.0, "reward": -1.5870545089244843, "reward_std": 6.537412786483765, "rewards/get_chromagram_reward": 0.6228235900402069, "rewards/get_chromagram_reward_std": 0.11699960082769394, "rewards/get_intelligibility_reward": -5.36222653388977, "rewards/get_intelligibility_reward_std": 10.262313938140869, "rewards/get_target_len_reward": -0.02176028909161687, "rewards/get_target_len_reward_std": 0.06819032784551382, "step": 210 }, { "advantages": 1.400709237486808e-07, "advantages_std": 1.5696740984916686, "clip_ratio": 0.0, "completion_length": 86.96012115478516, "epoch": 0.16541353383458646, "grad_norm": 7.84375, "kl": 0.20887088924646377, "learning_rate": 4.9172932330827075e-06, "loss": 0.0224, "num_tokens": 6751704.0, "reward": -2.110741305351257, "reward_std": 7.709843921661377, "rewards/get_chromagram_reward": 0.6346747756004334, "rewards/get_chromagram_reward_std": 0.1108613669872284, "rewards/get_intelligibility_reward": -6.943783760070801, "rewards/get_intelligibility_reward_std": 11.854671812057495, "rewards/get_target_len_reward": -0.023114563897252083, "rewards/get_target_len_reward_std": 0.06313695535063743, "step": 220 }, { "advantages": -1.0306637108215e-07, "advantages_std": 1.4813214898109437, "clip_ratio": 0.0, "completion_length": 85.79404830932617, "epoch": 0.17293233082706766, "grad_norm": 6.03125, "kl": 0.2533964037895203, "learning_rate": 4.913533834586466e-06, "loss": 0.0297, "num_tokens": 7055744.0, "reward": -1.7548074908554554, "reward_std": 7.328680038452148, "rewards/get_chromagram_reward": 0.6251808404922485, "rewards/get_chromagram_reward_std": 0.11388902738690376, "rewards/get_intelligibility_reward": -5.8636813282966616, "rewards/get_intelligibility_reward_std": 11.45258846282959, "rewards/get_target_len_reward": -0.02592161502689123, "rewards/get_target_len_reward_std": 0.08212394453585148, "step": 230 }, { "advantages": -1.2392800385896408e-07, "advantages_std": 1.5091304302215576, "clip_ratio": 0.0, "completion_length": 88.00178833007813, "epoch": 0.18045112781954886, "grad_norm": 5.375, "kl": 0.2697433799505234, "learning_rate": 4.909774436090226e-06, "loss": 0.0281, "num_tokens": 7365315.0, "reward": -1.947976952791214, "reward_std": 7.370342445373535, "rewards/get_chromagram_reward": 0.6169031858444214, "rewards/get_chromagram_reward_std": 0.10551710426807404, "rewards/get_intelligibility_reward": -6.44406920671463, "rewards/get_intelligibility_reward_std": 11.373730850219726, "rewards/get_target_len_reward": -0.016764528863132, "rewards/get_target_len_reward_std": 0.05088087841868401, "step": 240 }, { "advantages": 1.3088186108234367e-07, "advantages_std": 1.4498517632484436, "clip_ratio": 0.0, "completion_length": 87.54881134033204, "epoch": 0.18796992481203006, "grad_norm": 15.0625, "kl": 0.22136173099279405, "learning_rate": 4.906015037593986e-06, "loss": 0.033, "num_tokens": 7672896.0, "reward": -2.0891721487045287, "reward_std": 7.358000087738037, "rewards/get_chromagram_reward": 0.6265693724155426, "rewards/get_chromagram_reward_std": 0.11023145914077759, "rewards/get_intelligibility_reward": -6.870501017570495, "rewards/get_intelligibility_reward_std": 11.197034549713134, "rewards/get_target_len_reward": -0.02358451336622238, "rewards/get_target_len_reward_std": 0.07190894670784473, "step": 250 }, { "advantages": 1.5969078219768563e-07, "advantages_std": 1.628383994102478, "clip_ratio": 0.0, "completion_length": 86.22857284545898, "epoch": 0.19548872180451127, "grad_norm": 6.5625, "kl": 0.24279214888811113, "learning_rate": 4.902255639097745e-06, "loss": 0.0225, "num_tokens": 7978116.0, "reward": -1.7696778357028962, "reward_std": 7.20601077079773, "rewards/get_chromagram_reward": 0.6273474216461181, "rewards/get_chromagram_reward_std": 0.11232817322015762, "rewards/get_intelligibility_reward": -5.913970136642456, "rewards/get_intelligibility_reward_std": 11.329379558563232, "rewards/get_target_len_reward": -0.02241056999191642, "rewards/get_target_len_reward_std": 0.060182999819517136, "step": 260 }, { "advantages": 2.610186704998796e-07, "advantages_std": 1.6137927174568176, "clip_ratio": 0.0, "completion_length": 86.38928756713867, "epoch": 0.20300751879699247, "grad_norm": 6.5, "kl": 0.36207431107759475, "learning_rate": 4.898496240601504e-06, "loss": 0.038, "num_tokens": 8283066.0, "reward": -1.8501620173454285, "reward_std": 6.904186582565307, "rewards/get_chromagram_reward": 0.6266819715499878, "rewards/get_chromagram_reward_std": 0.11233701780438424, "rewards/get_intelligibility_reward": -6.15733824968338, "rewards/get_intelligibility_reward_std": 10.550333213806152, "rewards/get_target_len_reward": -0.019829432107508184, "rewards/get_target_len_reward_std": 0.05274516306817532, "step": 270 }, { "advantages": 3.568828223166065e-07, "advantages_std": 1.4528794765472413, "clip_ratio": 0.0, "completion_length": 84.30774002075195, "epoch": 0.21052631578947367, "grad_norm": 6.0625, "kl": 0.23077704459428788, "learning_rate": 4.894736842105264e-06, "loss": 0.027, "num_tokens": 8581555.0, "reward": -2.233861434459686, "reward_std": 7.116700315475464, "rewards/get_chromagram_reward": 0.6151746988296509, "rewards/get_chromagram_reward_std": 0.1124894380569458, "rewards/get_intelligibility_reward": -7.2921109914779665, "rewards/get_intelligibility_reward_std": 10.465060329437256, "rewards/get_target_len_reward": -0.024647843185812236, "rewards/get_target_len_reward_std": 0.06898632310330868, "step": 280 }, { "advantages": -2.6598572944180887e-07, "advantages_std": 1.6519518733024596, "clip_ratio": 0.0, "completion_length": 85.73809585571288, "epoch": 0.21804511278195488, "grad_norm": 5.25, "kl": 0.2224901869893074, "learning_rate": 4.890977443609023e-06, "loss": 0.0233, "num_tokens": 8885105.0, "reward": -1.7631924510002137, "reward_std": 7.07942156791687, "rewards/get_chromagram_reward": 0.6191496014595032, "rewards/get_chromagram_reward_std": 0.10151097774505616, "rewards/get_intelligibility_reward": -5.888655805587769, "rewards/get_intelligibility_reward_std": 11.147787237167359, "rewards/get_target_len_reward": -0.02007094845175743, "rewards/get_target_len_reward_std": 0.054488342627882956, "step": 290 }, { "advantages": 4.023313522338867e-07, "advantages_std": 1.5905011296272278, "clip_ratio": 0.0, "completion_length": 88.19524002075195, "epoch": 0.22556390977443608, "grad_norm": 13.75, "kl": 0.25747594237327576, "learning_rate": 4.887218045112782e-06, "loss": 0.0286, "num_tokens": 9194808.0, "reward": -1.5700021982192993, "reward_std": 6.991564178466797, "rewards/get_chromagram_reward": 0.6341972947120667, "rewards/get_chromagram_reward_std": 0.11429327800869941, "rewards/get_intelligibility_reward": -5.320208358764648, "rewards/get_intelligibility_reward_std": 11.084502124786377, "rewards/get_target_len_reward": -0.023995132092386483, "rewards/get_target_len_reward_std": 0.06535121817141772, "step": 300 }, { "advantages": -1.0132790144723458e-07, "advantages_std": 1.4907500624656678, "clip_ratio": 0.0, "completion_length": 82.57321624755859, "epoch": 0.23308270676691728, "grad_norm": 6.84375, "kl": 0.2589078933000565, "learning_rate": 4.883458646616542e-06, "loss": 0.0285, "num_tokens": 9489624.0, "reward": -2.0285483241081237, "reward_std": 7.1084287643432615, "rewards/get_chromagram_reward": 0.6235000729560852, "rewards/get_chromagram_reward_std": 0.11943832337856293, "rewards/get_intelligibility_reward": -6.6851557970046995, "rewards/get_intelligibility_reward_std": 10.78179931640625, "rewards/get_target_len_reward": -0.023988985922187567, "rewards/get_target_len_reward_std": 0.06262606605887414, "step": 310 }, { "advantages": 2.3469328738201512e-07, "advantages_std": 1.6342792153358459, "clip_ratio": 0.0, "completion_length": 88.48750228881836, "epoch": 0.24060150375939848, "grad_norm": 9.0, "kl": 0.2543763667345047, "learning_rate": 4.8796992481203006e-06, "loss": 0.0294, "num_tokens": 9800418.0, "reward": -1.7057337164878845, "reward_std": 6.9817795753479, "rewards/get_chromagram_reward": 0.6268013715744019, "rewards/get_chromagram_reward_std": 0.11317485049366952, "rewards/get_intelligibility_reward": -5.723447632789612, "rewards/get_intelligibility_reward_std": 11.025568771362305, "rewards/get_target_len_reward": -0.02055465867742896, "rewards/get_target_len_reward_std": 0.05412652175873518, "step": 320 }, { "advantages": 1.4081598394000138e-07, "advantages_std": 1.5237973570823669, "clip_ratio": 0.0, "completion_length": 87.94702529907227, "epoch": 0.24812030075187969, "grad_norm": 6.28125, "kl": 0.20852650851011276, "learning_rate": 4.875939849624061e-06, "loss": 0.0254, "num_tokens": 10109643.0, "reward": -1.5429876923561097, "reward_std": 6.826085138320923, "rewards/get_chromagram_reward": 0.6277043044567108, "rewards/get_chromagram_reward_std": 0.12131512090563774, "rewards/get_intelligibility_reward": -5.235681021213532, "rewards/get_intelligibility_reward_std": 10.872391033172608, "rewards/get_target_len_reward": -0.020986052136868237, "rewards/get_target_len_reward_std": 0.06118348352611065, "step": 330 }, { "advantages": -4.728635323303365e-07, "advantages_std": 1.60645192861557, "clip_ratio": 0.0, "completion_length": 81.48571548461913, "epoch": 0.2556390977443609, "grad_norm": 25.5, "kl": 0.2564759775996208, "learning_rate": 4.87218045112782e-06, "loss": 0.029, "num_tokens": 10401241.0, "reward": -1.7328977763652802, "reward_std": 6.528843545913697, "rewards/get_chromagram_reward": 0.616835993528366, "rewards/get_chromagram_reward_std": 0.10688713267445564, "rewards/get_intelligibility_reward": -5.793001580238342, "rewards/get_intelligibility_reward_std": 10.110911083221435, "rewards/get_target_len_reward": -0.022527353093028068, "rewards/get_target_len_reward_std": 0.06940292119979859, "step": 340 }, { "advantages": 1.7856558258699807e-07, "advantages_std": 1.4808288097381592, "clip_ratio": 0.0, "completion_length": 87.45893096923828, "epoch": 0.2631578947368421, "grad_norm": 8.375, "kl": 0.29431896060705187, "learning_rate": 4.8684210526315795e-06, "loss": 0.0392, "num_tokens": 10708869.0, "reward": -1.878147792816162, "reward_std": 6.971077013015747, "rewards/get_chromagram_reward": 0.6234214305877686, "rewards/get_chromagram_reward_std": 0.10851850062608719, "rewards/get_intelligibility_reward": -6.233998012542725, "rewards/get_intelligibility_reward_std": 10.780960750579833, "rewards/get_target_len_reward": -0.023866467643529177, "rewards/get_target_len_reward_std": 0.06917856726795435, "step": 350 }, { "advantages": 2.4487575629450474e-07, "advantages_std": 1.381740403175354, "clip_ratio": 0.0, "completion_length": 89.93631134033203, "epoch": 0.2706766917293233, "grad_norm": 7.375, "kl": 0.32826483249664307, "learning_rate": 4.864661654135338e-06, "loss": 0.0364, "num_tokens": 11023929.0, "reward": -1.3972072571516037, "reward_std": 7.187353134155273, "rewards/get_chromagram_reward": 0.6259436666965484, "rewards/get_chromagram_reward_std": 0.10643556043505668, "rewards/get_intelligibility_reward": -4.795684731006622, "rewards/get_intelligibility_reward_std": 11.673350143432618, "rewards/get_target_len_reward": -0.021880417317152023, "rewards/get_target_len_reward_std": 0.06496078819036484, "step": 360 }, { "advantages": 9.31322603037188e-08, "advantages_std": 1.4747216343879699, "clip_ratio": 0.0, "completion_length": 86.92321548461913, "epoch": 0.2781954887218045, "grad_norm": 6.71875, "kl": 0.26946457624435427, "learning_rate": 4.860902255639098e-06, "loss": 0.0315, "num_tokens": 11330383.0, "reward": -1.709691733121872, "reward_std": 7.236200475692749, "rewards/get_chromagram_reward": 0.6254827082157135, "rewards/get_chromagram_reward_std": 0.11119709685444831, "rewards/get_intelligibility_reward": -5.735943913459778, "rewards/get_intelligibility_reward_std": 11.42770071029663, "rewards/get_target_len_reward": -0.018613758590072395, "rewards/get_target_len_reward_std": 0.05823171120136976, "step": 370 }, { "advantages": 2.741813730722242e-07, "advantages_std": 1.5728921175003052, "clip_ratio": 0.0, "completion_length": 87.16845245361328, "epoch": 0.2857142857142857, "grad_norm": 8.6875, "kl": 0.32719208896160124, "learning_rate": 4.857142857142858e-06, "loss": 0.0363, "num_tokens": 11637826.0, "reward": -1.6900139684788882, "reward_std": 7.238736009597778, "rewards/get_chromagram_reward": 0.6199962019920349, "rewards/get_chromagram_reward_std": 0.12772160023450851, "rewards/get_intelligibility_reward": -5.663939923048019, "rewards/get_intelligibility_reward_std": 11.40099401473999, "rewards/get_target_len_reward": -0.026098042167723177, "rewards/get_target_len_reward_std": 0.0803416196256876, "step": 380 }, { "advantages": -4.66903049556322e-08, "advantages_std": 1.6298266768455505, "clip_ratio": 0.0, "completion_length": 88.75774002075195, "epoch": 0.2932330827067669, "grad_norm": 4.78125, "kl": 0.22528714388608934, "learning_rate": 4.853383458646617e-06, "loss": 0.0239, "num_tokens": 11949639.0, "reward": -1.7683505415916443, "reward_std": 6.958176136016846, "rewards/get_chromagram_reward": 0.627706092596054, "rewards/get_chromagram_reward_std": 0.11281427592039109, "rewards/get_intelligibility_reward": -5.915233945846557, "rewards/get_intelligibility_reward_std": 10.804437732696533, "rewards/get_target_len_reward": -0.017523423489183187, "rewards/get_target_len_reward_std": 0.046101400069892405, "step": 390 }, { "advantages": 3.0547367302347084e-08, "advantages_std": 1.5157369017601012, "clip_ratio": 0.0, "completion_length": 86.27678833007812, "epoch": 0.3007518796992481, "grad_norm": 6.125, "kl": 0.2560649961233139, "learning_rate": 4.849624060150376e-06, "loss": 0.0237, "num_tokens": 12255149.0, "reward": -1.8036470532417297, "reward_std": 7.095457553863525, "rewards/get_chromagram_reward": 0.6217517971992492, "rewards/get_chromagram_reward_std": 0.12013033628463746, "rewards/get_intelligibility_reward": -6.012101840972901, "rewards/get_intelligibility_reward_std": 10.966209888458252, "rewards/get_target_len_reward": -0.02059096023440361, "rewards/get_target_len_reward_std": 0.0436623141169548, "step": 400 }, { "advantages": 1.4603139817381816e-07, "advantages_std": 1.5756688952445983, "clip_ratio": 0.0, "completion_length": 87.86666946411133, "epoch": 0.3082706766917293, "grad_norm": 7.71875, "kl": 0.2444481447339058, "learning_rate": 4.845864661654136e-06, "loss": 0.0282, "num_tokens": 12564269.0, "reward": -1.677663379907608, "reward_std": 6.837050580978394, "rewards/get_chromagram_reward": 0.6197855114936829, "rewards/get_chromagram_reward_std": 0.11356526985764503, "rewards/get_intelligibility_reward": -5.63051826953888, "rewards/get_intelligibility_reward_std": 10.768561553955077, "rewards/get_target_len_reward": -0.022257220838218926, "rewards/get_target_len_reward_std": 0.06741791926324367, "step": 410 }, { "advantages": 3.0659140577427023e-07, "advantages_std": 1.4965197563171386, "clip_ratio": 0.0, "completion_length": 90.12321472167969, "epoch": 0.3157894736842105, "grad_norm": 17.75, "kl": 0.23634643405675887, "learning_rate": 4.842105263157895e-06, "loss": 0.0247, "num_tokens": 12880120.0, "reward": -1.583085983991623, "reward_std": 6.680713558197022, "rewards/get_chromagram_reward": 0.6274874389171601, "rewards/get_chromagram_reward_std": 0.11383199393749237, "rewards/get_intelligibility_reward": -5.357515811920166, "rewards/get_intelligibility_reward_std": 10.511133098602295, "rewards/get_target_len_reward": -0.019229174684733154, "rewards/get_target_len_reward_std": 0.05312262093648314, "step": 420 }, { "advantages": 1.2268624587363776e-07, "advantages_std": 1.6010493755340576, "clip_ratio": 0.0, "completion_length": 88.22738265991211, "epoch": 0.3233082706766917, "grad_norm": 37.75, "kl": 0.3049021452665329, "learning_rate": 4.838345864661654e-06, "loss": 0.0352, "num_tokens": 13190566.0, "reward": -1.7616869747638702, "reward_std": 6.855478191375733, "rewards/get_chromagram_reward": 0.6218553423881531, "rewards/get_chromagram_reward_std": 0.11711084693670273, "rewards/get_intelligibility_reward": -5.8811728954315186, "rewards/get_intelligibility_reward_std": 10.717539930343628, "rewards/get_target_len_reward": -0.02574317567050457, "rewards/get_target_len_reward_std": 0.06924263034015894, "step": 430 }, { "advantages": -2.2165477844282578e-07, "advantages_std": 1.6048481464385986, "clip_ratio": 0.0, "completion_length": 85.9375015258789, "epoch": 0.3308270676691729, "grad_norm": 6.90625, "kl": 0.2963700398802757, "learning_rate": 4.834586466165414e-06, "loss": 0.0339, "num_tokens": 13494215.0, "reward": -1.6599295616149903, "reward_std": 6.307693576812744, "rewards/get_chromagram_reward": 0.6308047652244568, "rewards/get_chromagram_reward_std": 0.11237408369779586, "rewards/get_intelligibility_reward": -5.5879511594772335, "rewards/get_intelligibility_reward_std": 9.72415108680725, "rewards/get_target_len_reward": -0.022642039228230715, "rewards/get_target_len_reward_std": 0.06010422967374325, "step": 440 }, { "advantages": -1.1374552357779067e-07, "advantages_std": 1.632838749885559, "clip_ratio": 0.0, "completion_length": 84.73333435058593, "epoch": 0.3383458646616541, "grad_norm": 8.9375, "kl": 0.350592827796936, "learning_rate": 4.830827067669173e-06, "loss": 0.0397, "num_tokens": 13795055.0, "reward": -1.6300417900085449, "reward_std": 6.483615875244141, "rewards/get_chromagram_reward": 0.6207803785800934, "rewards/get_chromagram_reward_std": 0.11223144382238388, "rewards/get_intelligibility_reward": -5.485245895385742, "rewards/get_intelligibility_reward_std": 10.055084419250488, "rewards/get_target_len_reward": -0.02565964898094535, "rewards/get_target_len_reward_std": 0.07205168101936579, "step": 450 }, { "advantages": 3.3900142284437604e-07, "advantages_std": 1.5068569421768188, "clip_ratio": 0.0, "completion_length": 91.00416717529296, "epoch": 0.3458646616541353, "grad_norm": 8.25, "kl": 23.371902348101138, "learning_rate": 4.827067669172933e-06, "loss": 2.337, "num_tokens": 14113145.0, "reward": -1.2409809799864888, "reward_std": 6.696157693862915, "rewards/get_chromagram_reward": 0.6164524137973786, "rewards/get_chromagram_reward_std": 0.10411357581615448, "rewards/get_intelligibility_reward": -4.322750660777092, "rewards/get_intelligibility_reward_std": 10.822736454010009, "rewards/get_target_len_reward": -0.016644550208002328, "rewards/get_target_len_reward_std": 0.04289772268384695, "step": 460 }, { "advantages": 2.8361877362215183e-07, "advantages_std": 1.6239615321159362, "clip_ratio": 0.0, "completion_length": 86.97738342285156, "epoch": 0.3533834586466165, "grad_norm": 14.4375, "kl": 0.31207115948200226, "learning_rate": 4.823308270676692e-06, "loss": 0.0404, "num_tokens": 14420277.0, "reward": -1.7174145102500915, "reward_std": 7.22825779914856, "rewards/get_chromagram_reward": 0.6044569492340088, "rewards/get_chromagram_reward_std": 0.12300211787223816, "rewards/get_intelligibility_reward": -5.731238055229187, "rewards/get_intelligibility_reward_std": 11.418381404876708, "rewards/get_target_len_reward": -0.025462107546627522, "rewards/get_target_len_reward_std": 0.0857331132516265, "step": 470 }, { "advantages": 2.9876830467401303e-07, "advantages_std": 1.5418254494667054, "clip_ratio": 0.0, "completion_length": 87.61726379394531, "epoch": 0.3609022556390977, "grad_norm": 7.78125, "kl": 0.2414279818534851, "learning_rate": 4.8195488721804515e-06, "loss": 0.0237, "num_tokens": 14729195.0, "reward": -1.6702099859714508, "reward_std": 6.8579872131347654, "rewards/get_chromagram_reward": 0.632961118221283, "rewards/get_chromagram_reward_std": 0.10743627920746804, "rewards/get_intelligibility_reward": -5.624613666534424, "rewards/get_intelligibility_reward_std": 10.713689804077148, "rewards/get_target_len_reward": -0.0189772330224514, "rewards/get_target_len_reward_std": 0.051045392826199534, "step": 480 }, { "advantages": -1.8800299841359447e-07, "advantages_std": 1.564692234992981, "clip_ratio": 0.0, "completion_length": 88.64166870117188, "epoch": 0.3684210526315789, "grad_norm": 9.8125, "kl": 0.26591150760650634, "learning_rate": 4.815789473684211e-06, "loss": 0.0295, "num_tokens": 15041323.0, "reward": -1.4515798807144165, "reward_std": 6.639446020126343, "rewards/get_chromagram_reward": 0.6145521402359009, "rewards/get_chromagram_reward_std": 0.11701491698622704, "rewards/get_intelligibility_reward": -4.948449277877808, "rewards/get_intelligibility_reward_std": 10.588137817382812, "rewards/get_target_len_reward": -0.020842281449586154, "rewards/get_target_len_reward_std": 0.060225320421159266, "step": 490 }, { "advantages": -9.822350222066234e-08, "advantages_std": 1.6208269357681275, "clip_ratio": 0.0, "completion_length": 87.24940719604493, "epoch": 0.37593984962406013, "grad_norm": 9.875, "kl": 0.24478698670864105, "learning_rate": 4.81203007518797e-06, "loss": 0.0304, "num_tokens": 15348400.0, "reward": -1.7204331919550895, "reward_std": 6.657948637008667, "rewards/get_chromagram_reward": 0.6120970249176025, "rewards/get_chromagram_reward_std": 0.10694977194070816, "rewards/get_intelligibility_reward": -5.753721928596496, "rewards/get_intelligibility_reward_std": 10.243351125717163, "rewards/get_target_len_reward": -0.019674433302134274, "rewards/get_target_len_reward_std": 0.07041770461946725, "step": 500 }, { "advantages": 2.849847236419123e-07, "advantages_std": 1.589078712463379, "clip_ratio": 0.0, "completion_length": 83.6428596496582, "epoch": 0.38345864661654133, "grad_norm": 6.03125, "kl": 0.26985139548778536, "learning_rate": 4.80827067669173e-06, "loss": 0.0268, "num_tokens": 15646198.0, "reward": -1.8068750977516175, "reward_std": 6.750207424163818, "rewards/get_chromagram_reward": 0.6317034482955932, "rewards/get_chromagram_reward_std": 0.11371424272656441, "rewards/get_intelligibility_reward": -6.03351776599884, "rewards/get_intelligibility_reward_std": 10.352978706359863, "rewards/get_target_len_reward": -0.018810535687953232, "rewards/get_target_len_reward_std": 0.05019157826900482, "step": 510 }, { "advantages": -4.1474899603599624e-07, "advantages_std": 1.5117250084877014, "clip_ratio": 0.0, "completion_length": 87.92976303100586, "epoch": 0.39097744360902253, "grad_norm": 7.15625, "kl": 0.2585775926709175, "learning_rate": 4.804511278195489e-06, "loss": 0.029, "num_tokens": 15956086.0, "reward": -1.6853113710880279, "reward_std": 6.823435592651367, "rewards/get_chromagram_reward": 0.6293639838695526, "rewards/get_chromagram_reward_std": 0.11193648576736451, "rewards/get_intelligibility_reward": -5.663621878623962, "rewards/get_intelligibility_reward_std": 10.723005723953246, "rewards/get_target_len_reward": -0.021675997786223887, "rewards/get_target_len_reward_std": 0.06440430246293545, "step": 520 }, { "advantages": -9.18905129765335e-09, "advantages_std": 1.5427281141281128, "clip_ratio": 0.0, "completion_length": 87.26071624755859, "epoch": 0.39849624060150374, "grad_norm": 14.5, "kl": 0.2736205294728279, "learning_rate": 4.800751879699249e-06, "loss": 0.0322, "num_tokens": 16263270.0, "reward": -1.678658276796341, "reward_std": 6.964376831054688, "rewards/get_chromagram_reward": 0.6212248384952546, "rewards/get_chromagram_reward_std": 0.1208167664706707, "rewards/get_intelligibility_reward": -5.635741448402404, "rewards/get_intelligibility_reward_std": 10.981070423126221, "rewards/get_target_len_reward": -0.02145816870033741, "rewards/get_target_len_reward_std": 0.0654794754460454, "step": 530 }, { "advantages": 9.809930503479337e-08, "advantages_std": 1.6744032025337219, "clip_ratio": 0.0, "completion_length": 87.52143096923828, "epoch": 0.40601503759398494, "grad_norm": 5.59375, "kl": 0.29339379668235777, "learning_rate": 4.796992481203008e-06, "loss": 0.0315, "num_tokens": 16571800.0, "reward": -1.3652776062488556, "reward_std": 6.553081464767456, "rewards/get_chromagram_reward": 0.6332614958286286, "rewards/get_chromagram_reward_std": 0.11830071583390236, "rewards/get_intelligibility_reward": -4.706533789634705, "rewards/get_intelligibility_reward_std": 10.460928821563721, "rewards/get_target_len_reward": -0.02256029974669218, "rewards/get_target_len_reward_std": 0.060732940770685674, "step": 540 }, { "advantages": 2.3345152975196015e-07, "advantages_std": 1.5902326703071594, "clip_ratio": 0.0, "completion_length": 83.64523849487304, "epoch": 0.41353383458646614, "grad_norm": 39.5, "kl": 0.252433679997921, "learning_rate": 4.793233082706767e-06, "loss": 0.0288, "num_tokens": 16868510.0, "reward": -2.3146336674690247, "reward_std": 7.112114381790161, "rewards/get_chromagram_reward": 0.6218239188194274, "rewards/get_chromagram_reward_std": 0.12277880832552909, "rewards/get_intelligibility_reward": -7.544064474105835, "rewards/get_intelligibility_reward_std": 10.471257495880128, "rewards/get_target_len_reward": -0.021660258620977403, "rewards/get_target_len_reward_std": 0.06123478710651398, "step": 550 }, { "advantages": 2.9032429722519736e-07, "advantages_std": 1.516219162940979, "clip_ratio": 0.0, "completion_length": 85.42381134033204, "epoch": 0.42105263157894735, "grad_norm": 13.4375, "kl": 0.25826217532157897, "learning_rate": 4.789473684210527e-06, "loss": 0.0308, "num_tokens": 17171120.0, "reward": -1.5234275877475738, "reward_std": 6.649781656265259, "rewards/get_chromagram_reward": 0.6296819686889649, "rewards/get_chromagram_reward_std": 0.11613814607262611, "rewards/get_intelligibility_reward": -5.17573721408844, "rewards/get_intelligibility_reward_std": 10.52333984375, "rewards/get_target_len_reward": -0.024227123986929656, "rewards/get_target_len_reward_std": 0.06795755084604024, "step": 560 }, { "advantages": -3.3006072328589655e-07, "advantages_std": 1.5026580929756164, "clip_ratio": 0.0, "completion_length": 88.41131134033203, "epoch": 0.42857142857142855, "grad_norm": 7.15625, "kl": 0.5135219663381576, "learning_rate": 4.785714285714287e-06, "loss": 0.056, "num_tokens": 17481979.0, "reward": -1.4849580019712447, "reward_std": 6.6246805667877195, "rewards/get_chromagram_reward": 0.6225826203823089, "rewards/get_chromagram_reward_std": 0.11443077027797699, "rewards/get_intelligibility_reward": -5.055980670452118, "rewards/get_intelligibility_reward_std": 10.499218845367432, "rewards/get_target_len_reward": -0.021475626993924378, "rewards/get_target_len_reward_std": 0.06761925015598536, "step": 570 }, { "advantages": -3.099441585163731e-07, "advantages_std": 1.5539302945137023, "clip_ratio": 0.0, "completion_length": 90.10595474243163, "epoch": 0.43609022556390975, "grad_norm": 8.6875, "kl": 0.41011454313993456, "learning_rate": 4.781954887218045e-06, "loss": 0.0467, "num_tokens": 17797588.0, "reward": -1.4988545447587966, "reward_std": 7.042573499679565, "rewards/get_chromagram_reward": 0.6192649960517883, "rewards/get_chromagram_reward_std": 0.12368768453598022, "rewards/get_intelligibility_reward": -5.092814598977566, "rewards/get_intelligibility_reward_std": 11.2423526763916, "rewards/get_target_len_reward": -0.023013710416853426, "rewards/get_target_len_reward_std": 0.06633382495492697, "step": 580 }, { "advantages": 1.0530154903598543e-07, "advantages_std": 1.600118923187256, "clip_ratio": 0.0, "completion_length": 83.48333511352538, "epoch": 0.44360902255639095, "grad_norm": 7.875, "kl": 0.26907303333282473, "learning_rate": 4.778195488721805e-06, "loss": 0.0326, "num_tokens": 18093997.0, "reward": -2.03454070687294, "reward_std": 7.202506160736084, "rewards/get_chromagram_reward": 0.6092650592327118, "rewards/get_chromagram_reward_std": 0.10631671249866485, "rewards/get_intelligibility_reward": -6.693427014350891, "rewards/get_intelligibility_reward_std": 10.90904884338379, "rewards/get_target_len_reward": -0.01945957327261567, "rewards/get_target_len_reward_std": 0.06454089805483817, "step": 590 }, { "advantages": -4.1872264446851657e-07, "advantages_std": 1.4982729434967041, "clip_ratio": 0.0, "completion_length": 84.88095397949219, "epoch": 0.45112781954887216, "grad_norm": 5.96875, "kl": 0.2921657621860504, "learning_rate": 4.774436090225565e-06, "loss": 0.034, "num_tokens": 18394582.0, "reward": -1.8129005968570708, "reward_std": 7.103170919418335, "rewards/get_chromagram_reward": 0.6202451944351196, "rewards/get_chromagram_reward_std": 0.11972960755228997, "rewards/get_intelligibility_reward": -6.034282064437866, "rewards/get_intelligibility_reward_std": 11.095508289337157, "rewards/get_target_len_reward": -0.024664431624114514, "rewards/get_target_len_reward_std": 0.07610471770167351, "step": 600 }, { "advantages": 5.799035989184631e-08, "advantages_std": 1.5440994024276733, "clip_ratio": 0.0, "completion_length": 87.07857360839844, "epoch": 0.45864661654135336, "grad_norm": 6.21875, "kl": 0.4257489159703255, "learning_rate": 4.7706766917293235e-06, "loss": 0.0471, "num_tokens": 18701907.0, "reward": -1.700702142715454, "reward_std": 6.709496259689331, "rewards/get_chromagram_reward": 0.6193202555179596, "rewards/get_chromagram_reward_std": 0.12053735405206681, "rewards/get_intelligibility_reward": -5.697566413879395, "rewards/get_intelligibility_reward_std": 10.440969467163086, "rewards/get_target_len_reward": -0.023859874997287988, "rewards/get_target_len_reward_std": 0.06426951251924037, "step": 610 }, { "advantages": 5.923211645608717e-07, "advantages_std": 1.5804563403129577, "clip_ratio": 0.0, "completion_length": 85.25654907226563, "epoch": 0.46616541353383456, "grad_norm": 8.0625, "kl": 0.3186333954334259, "learning_rate": 4.766917293233083e-06, "loss": 0.0386, "num_tokens": 19004660.0, "reward": -1.5786213517189025, "reward_std": 6.751915645599365, "rewards/get_chromagram_reward": 0.6187632083892822, "rewards/get_chromagram_reward_std": 0.10962516814470291, "rewards/get_intelligibility_reward": -5.333805966377258, "rewards/get_intelligibility_reward_std": 10.596128702163696, "rewards/get_target_len_reward": -0.020821068761870266, "rewards/get_target_len_reward_std": 0.07383420001715421, "step": 620 }, { "advantages": 2.384185933124172e-07, "advantages_std": 1.6757961630821228, "clip_ratio": 0.0, "completion_length": 85.70238189697265, "epoch": 0.47368421052631576, "grad_norm": 1440.0, "kl": 0.4250665083527565, "learning_rate": 4.763157894736842e-06, "loss": 0.0464, "num_tokens": 19308680.0, "reward": -1.7613612473011018, "reward_std": 6.973189926147461, "rewards/get_chromagram_reward": 0.6115583896636962, "rewards/get_chromagram_reward_std": 0.11262777373194695, "rewards/get_intelligibility_reward": -5.871619367599488, "rewards/get_intelligibility_reward_std": 10.821762990951537, "rewards/get_target_len_reward": -0.02402249900624156, "rewards/get_target_len_reward_std": 0.061053736694157125, "step": 630 }, { "advantages": -4.579623663403254e-07, "advantages_std": 1.5364292025566102, "clip_ratio": 0.0, "completion_length": 87.05357131958007, "epoch": 0.48120300751879697, "grad_norm": 7.75, "kl": 0.30670359134674074, "learning_rate": 4.759398496240602e-06, "loss": 0.0329, "num_tokens": 19616135.0, "reward": -1.9259871065616607, "reward_std": 7.262286853790283, "rewards/get_chromagram_reward": 0.6155335962772369, "rewards/get_chromagram_reward_std": 0.10379507169127464, "rewards/get_intelligibility_reward": -6.371912264823914, "rewards/get_intelligibility_reward_std": 11.193008613586425, "rewards/get_target_len_reward": -0.021582304313778878, "rewards/get_target_len_reward_std": 0.06231225673109293, "step": 640 }, { "advantages": -1.8179416159114225e-07, "advantages_std": 1.6486274361610413, "clip_ratio": 0.0, "completion_length": 88.81666870117188, "epoch": 0.48872180451127817, "grad_norm": 30336.0, "kl": 2.4305228680372237, "learning_rate": 4.755639097744361e-06, "loss": 0.2454, "num_tokens": 19928080.0, "reward": -1.6895100951194764, "reward_std": 7.358239984512329, "rewards/get_chromagram_reward": 0.6077431797981262, "rewards/get_chromagram_reward_std": 0.11492864713072777, "rewards/get_intelligibility_reward": -5.655220425128936, "rewards/get_intelligibility_reward_std": 11.619172477722168, "rewards/get_target_len_reward": -0.021052911598235368, "rewards/get_target_len_reward_std": 0.06442425940185785, "step": 650 }, { "advantages": -1.544753786220099e-07, "advantages_std": 1.6463606476783752, "clip_ratio": 0.0, "completion_length": 85.95714416503907, "epoch": 0.49624060150375937, "grad_norm": 5.46875, "kl": 0.3190068453550339, "learning_rate": 4.751879699248121e-06, "loss": 0.0348, "num_tokens": 20231918.0, "reward": -2.009367752075195, "reward_std": 7.01142954826355, "rewards/get_chromagram_reward": 0.6336705982685089, "rewards/get_chromagram_reward_std": 0.1101664699614048, "rewards/get_intelligibility_reward": -6.642132258415222, "rewards/get_intelligibility_reward_std": 10.66052188873291, "rewards/get_target_len_reward": -0.01964133554138243, "rewards/get_target_len_reward_std": 0.04977958481758833, "step": 660 }, { "advantages": -1.4205774192532772e-07, "advantages_std": 1.5998609900474547, "clip_ratio": 0.0, "completion_length": 84.0226203918457, "epoch": 0.5037593984962406, "grad_norm": 5.5625, "kl": 0.2667311102151871, "learning_rate": 4.74812030075188e-06, "loss": 0.0347, "num_tokens": 20531077.0, "reward": -2.143736845254898, "reward_std": 7.50629448890686, "rewards/get_chromagram_reward": 0.6176834642887116, "rewards/get_chromagram_reward_std": 0.12896764725446702, "rewards/get_intelligibility_reward": -7.02304618358612, "rewards/get_intelligibility_reward_std": 11.318552112579345, "rewards/get_target_len_reward": -0.025847438164055346, "rewards/get_target_len_reward_std": 0.08172615952789783, "step": 670 }, { "advantages": -2.93925414673879e-07, "advantages_std": 1.6468318104743958, "clip_ratio": 0.0, "completion_length": 84.94345397949219, "epoch": 0.5112781954887218, "grad_norm": 7.0625, "kl": 0.3305581882596016, "learning_rate": 4.744360902255639e-06, "loss": 0.041, "num_tokens": 20832587.0, "reward": -1.2728881180286407, "reward_std": 6.217760515213013, "rewards/get_chromagram_reward": 0.6311124086380004, "rewards/get_chromagram_reward_std": 0.11825463101267815, "rewards/get_intelligibility_reward": -4.425248765945435, "rewards/get_intelligibility_reward_std": 9.891574048995972, "rewards/get_target_len_reward": -0.02452768972143531, "rewards/get_target_len_reward_std": 0.07514422051608563, "step": 680 }, { "advantages": -2.4388234116656803e-07, "advantages_std": 1.4703909873962402, "clip_ratio": 0.0, "completion_length": 85.74226379394531, "epoch": 0.518796992481203, "grad_norm": 9.625, "kl": 0.2962498337030411, "learning_rate": 4.740601503759399e-06, "loss": 0.0323, "num_tokens": 21135905.0, "reward": -1.868764042854309, "reward_std": 7.241525459289551, "rewards/get_chromagram_reward": 0.6176757931709289, "rewards/get_chromagram_reward_std": 0.12186905890703201, "rewards/get_intelligibility_reward": -6.201520228385926, "rewards/get_intelligibility_reward_std": 11.264957427978516, "rewards/get_target_len_reward": -0.022447278164327143, "rewards/get_target_len_reward_std": 0.055239592865109446, "step": 690 }, { "advantages": 5.366901660863732e-07, "advantages_std": 1.5638061165809631, "clip_ratio": 0.0, "completion_length": 86.67500152587891, "epoch": 0.5263157894736842, "grad_norm": 5.125, "kl": 0.28444311767816544, "learning_rate": 4.736842105263158e-06, "loss": 0.0319, "num_tokens": 21442155.0, "reward": -1.47476726770401, "reward_std": 6.729415082931519, "rewards/get_chromagram_reward": 0.6268014788627625, "rewards/get_chromagram_reward_std": 0.11580024063587188, "rewards/get_intelligibility_reward": -5.027878886461258, "rewards/get_intelligibility_reward_std": 10.612294578552246, "rewards/get_target_len_reward": -0.023224306292831898, "rewards/get_target_len_reward_std": 0.07012978214770556, "step": 700 }, { "advantages": 6.116927021793117e-07, "advantages_std": 1.6555665493011475, "clip_ratio": 0.0, "completion_length": 88.07381134033203, "epoch": 0.5338345864661654, "grad_norm": 25.875, "kl": 0.31802781522274015, "learning_rate": 4.733082706766917e-06, "loss": 0.0379, "num_tokens": 21751478.0, "reward": -2.0176144003868104, "reward_std": 7.255285930633545, "rewards/get_chromagram_reward": 0.6271386921405793, "rewards/get_chromagram_reward_std": 0.11421655938029289, "rewards/get_intelligibility_reward": -6.658082771301269, "rewards/get_intelligibility_reward_std": 11.103228569030762, "rewards/get_target_len_reward": -0.021898697735741733, "rewards/get_target_len_reward_std": 0.061879089660942556, "step": 710 }, { "advantages": -2.7765831447368326e-07, "advantages_std": 1.6042242527008057, "clip_ratio": 0.0, "completion_length": 85.33214416503907, "epoch": 0.5413533834586466, "grad_norm": 5.78125, "kl": 0.31839183866977694, "learning_rate": 4.729323308270677e-06, "loss": 0.0369, "num_tokens": 22054086.0, "reward": -1.5850762702524661, "reward_std": 6.881160306930542, "rewards/get_chromagram_reward": 0.6308808922767639, "rewards/get_chromagram_reward_std": 0.11955713406205178, "rewards/get_intelligibility_reward": -5.366273105144501, "rewards/get_intelligibility_reward_std": 10.855913162231445, "rewards/get_target_len_reward": -0.019836284592747687, "rewards/get_target_len_reward_std": 0.05966003518551588, "step": 720 }, { "advantages": 1.5373030635146278e-07, "advantages_std": 1.6363926649093627, "clip_ratio": 0.0, "completion_length": 87.30714492797851, "epoch": 0.5488721804511278, "grad_norm": 9.0, "kl": 0.2782985955476761, "learning_rate": 4.725563909774437e-06, "loss": 0.031, "num_tokens": 22361267.0, "reward": -1.9439775586128234, "reward_std": 7.178706884384155, "rewards/get_chromagram_reward": 0.6102202415466309, "rewards/get_chromagram_reward_std": 0.11201724261045456, "rewards/get_intelligibility_reward": -6.42216944694519, "rewards/get_intelligibility_reward_std": 11.05174961090088, "rewards/get_target_len_reward": -0.019982862658798693, "rewards/get_target_len_reward_std": 0.05483436398208141, "step": 730 }, { "advantages": -2.774099560731713e-07, "advantages_std": 1.6907092094421388, "clip_ratio": 0.0, "completion_length": 85.42440567016601, "epoch": 0.556390977443609, "grad_norm": 83.5, "kl": 0.30403348952531817, "learning_rate": 4.7218045112781955e-06, "loss": 0.0391, "num_tokens": 22662925.0, "reward": -1.8942086100578308, "reward_std": 6.965086030960083, "rewards/get_chromagram_reward": 0.6224843442440033, "rewards/get_chromagram_reward_std": 0.1091718964278698, "rewards/get_intelligibility_reward": -6.277362990379333, "rewards/get_intelligibility_reward_std": 10.695509052276611, "rewards/get_target_len_reward": -0.027746990974992513, "rewards/get_target_len_reward_std": 0.09219296015799046, "step": 740 }, { "advantages": -1.3162691061552323e-07, "advantages_std": 1.654877233505249, "clip_ratio": 0.0, "completion_length": 85.3678581237793, "epoch": 0.5639097744360902, "grad_norm": 12.375, "kl": 0.8061857357621193, "learning_rate": 4.718045112781955e-06, "loss": 0.0843, "num_tokens": 22965571.0, "reward": -1.688933926820755, "reward_std": 6.675320863723755, "rewards/get_chromagram_reward": 0.6081489741802215, "rewards/get_chromagram_reward_std": 0.12180505245923996, "rewards/get_intelligibility_reward": -5.65586256980896, "rewards/get_intelligibility_reward_std": 10.319089794158936, "rewards/get_target_len_reward": -0.019087663665413857, "rewards/get_target_len_reward_std": 0.058424011990427974, "step": 750 }, { "advantages": -2.2302070163959796e-07, "advantages_std": 1.612697470188141, "clip_ratio": 0.0, "completion_length": 86.45774002075196, "epoch": 0.5714285714285714, "grad_norm": 17.375, "kl": 0.3379309445619583, "learning_rate": 4.714285714285715e-06, "loss": 0.0397, "num_tokens": 23271039.0, "reward": -1.8412569880485534, "reward_std": 6.6673956394195555, "rewards/get_chromagram_reward": 0.6226194262504577, "rewards/get_chromagram_reward_std": 0.12411452755331993, "rewards/get_intelligibility_reward": -6.122761702537536, "rewards/get_intelligibility_reward_std": 10.182913446426392, "rewards/get_target_len_reward": -0.02362839113920927, "rewards/get_target_len_reward_std": 0.06820572800934314, "step": 760 }, { "advantages": -7.972120670274308e-08, "advantages_std": 1.614003050327301, "clip_ratio": 0.0, "completion_length": 84.36309661865235, "epoch": 0.5789473684210527, "grad_norm": 7.5, "kl": 0.32674605399370193, "learning_rate": 4.710526315789474e-06, "loss": 0.0362, "num_tokens": 23570408.0, "reward": -1.632363921403885, "reward_std": 6.429037570953369, "rewards/get_chromagram_reward": 0.6164280056953431, "rewards/get_chromagram_reward_std": 0.11995449736714363, "rewards/get_intelligibility_reward": -5.492781853675842, "rewards/get_intelligibility_reward_std": 9.978094005584717, "rewards/get_target_len_reward": -0.020737541373819113, "rewards/get_target_len_reward_std": 0.05414057523012161, "step": 770 }, { "advantages": -1.0542570532123818e-07, "advantages_std": 1.5682517766952515, "clip_ratio": 0.0, "completion_length": 87.86547775268555, "epoch": 0.5864661654135338, "grad_norm": 47.0, "kl": 0.4949570521712303, "learning_rate": 4.706766917293233e-06, "loss": 0.0534, "num_tokens": 23879465.0, "reward": -1.6829198122024536, "reward_std": 6.87082347869873, "rewards/get_chromagram_reward": 0.6280323505401612, "rewards/get_chromagram_reward_std": 0.1086762361228466, "rewards/get_intelligibility_reward": -5.656386470794677, "rewards/get_intelligibility_reward_std": 10.67564115524292, "rewards/get_target_len_reward": -0.020404842961579562, "rewards/get_target_len_reward_std": 0.05386058986186981, "step": 780 }, { "advantages": -1.0952354614346405e-07, "advantages_std": 1.6235635638237, "clip_ratio": 0.0, "completion_length": 89.78155059814453, "epoch": 0.5939849624060151, "grad_norm": 6.125, "kl": 0.40520851165056226, "learning_rate": 4.703007518796993e-06, "loss": 0.0419, "num_tokens": 24193456.0, "reward": -1.6209597945213319, "reward_std": 6.467411375045776, "rewards/get_chromagram_reward": 0.6285930275917053, "rewards/get_chromagram_reward_std": 0.121580471098423, "rewards/get_intelligibility_reward": -5.469432234764099, "rewards/get_intelligibility_reward_std": 10.131909942626953, "rewards/get_target_len_reward": -0.022039972990751267, "rewards/get_target_len_reward_std": 0.05911780633032322, "step": 790 }, { "advantages": -1.2144447119055713e-07, "advantages_std": 1.7115575075149536, "clip_ratio": 0.0, "completion_length": 84.20952529907227, "epoch": 0.6015037593984962, "grad_norm": 32.5, "kl": 0.4055857822299004, "learning_rate": 4.6992481203007525e-06, "loss": 0.0433, "num_tokens": 24492762.0, "reward": -1.5891988754272461, "reward_std": 6.875995683670044, "rewards/get_chromagram_reward": 0.6362646162509918, "rewards/get_chromagram_reward_std": 0.11419221684336663, "rewards/get_intelligibility_reward": -5.380186462402344, "rewards/get_intelligibility_reward_std": 10.897884845733643, "rewards/get_target_len_reward": -0.023674625623971223, "rewards/get_target_len_reward_std": 0.06239906083792448, "step": 800 }, { "advantages": -2.900759483281945e-07, "advantages_std": 1.5877565264701843, "clip_ratio": 0.0, "completion_length": 87.95714492797852, "epoch": 0.6090225563909775, "grad_norm": 5.78125, "kl": 0.27903091013431547, "learning_rate": 4.695488721804511e-06, "loss": 0.0378, "num_tokens": 24801534.0, "reward": -1.5388197422027587, "reward_std": 6.82519645690918, "rewards/get_chromagram_reward": 0.6175784945487977, "rewards/get_chromagram_reward_std": 0.11102894842624664, "rewards/get_intelligibility_reward": -5.2068812370300295, "rewards/get_intelligibility_reward_std": 10.757489204406738, "rewards/get_target_len_reward": -0.027156245335936545, "rewards/get_target_len_reward_std": 0.0964772343635559, "step": 810 }, { "advantages": 1.3982256774625057e-07, "advantages_std": 1.4997711062431336, "clip_ratio": 0.0, "completion_length": 88.94762115478515, "epoch": 0.6165413533834586, "grad_norm": 7.28125, "kl": 0.29116481095552443, "learning_rate": 4.691729323308271e-06, "loss": 0.0281, "num_tokens": 25113845.0, "reward": -1.3392221808433533, "reward_std": 6.566954040527344, "rewards/get_chromagram_reward": 0.6304997444152832, "rewards/get_chromagram_reward_std": 0.1095633253455162, "rewards/get_intelligibility_reward": -4.630185222625732, "rewards/get_intelligibility_reward_std": 10.61223382949829, "rewards/get_target_len_reward": -0.017980954982340334, "rewards/get_target_len_reward_std": 0.04479764401912689, "step": 820 }, { "advantages": -6.432333492512043e-08, "advantages_std": 1.621793019771576, "clip_ratio": 0.0, "completion_length": 86.63928680419922, "epoch": 0.6240601503759399, "grad_norm": 6.21875, "kl": 0.2632207229733467, "learning_rate": 4.687969924812031e-06, "loss": 0.0303, "num_tokens": 25419723.0, "reward": -1.6574245631694793, "reward_std": 6.641559648513794, "rewards/get_chromagram_reward": 0.6183900475502014, "rewards/get_chromagram_reward_std": 0.12058342695236206, "rewards/get_intelligibility_reward": -5.569248151779175, "rewards/get_intelligibility_reward_std": 10.371752309799195, "rewards/get_target_len_reward": -0.021415283996611835, "rewards/get_target_len_reward_std": 0.062272250093519686, "step": 830 }, { "advantages": 4.901861231587645e-07, "advantages_std": 1.491612982749939, "clip_ratio": 0.0, "completion_length": 86.89464416503907, "epoch": 0.631578947368421, "grad_norm": 88.5, "kl": 0.3433301538228989, "learning_rate": 4.68421052631579e-06, "loss": 0.0352, "num_tokens": 25726539.0, "reward": -1.5290105819702149, "reward_std": 6.745681285858154, "rewards/get_chromagram_reward": 0.614410787820816, "rewards/get_chromagram_reward_std": 0.10712209716439247, "rewards/get_intelligibility_reward": -5.180805230140686, "rewards/get_intelligibility_reward_std": 10.757587146759032, "rewards/get_target_len_reward": -0.020636959094554187, "rewards/get_target_len_reward_std": 0.06199995744973421, "step": 840 }, { "advantages": 2.0439424215368262e-07, "advantages_std": 1.6053584575653077, "clip_ratio": 0.0, "completion_length": 83.75238189697265, "epoch": 0.6390977443609023, "grad_norm": 7.75, "kl": 0.32621364295482635, "learning_rate": 4.680451127819549e-06, "loss": 0.0354, "num_tokens": 26025367.0, "reward": -1.6455067068338394, "reward_std": 6.85167384147644, "rewards/get_chromagram_reward": 0.6253707230091095, "rewards/get_chromagram_reward_std": 0.12833054959774018, "rewards/get_intelligibility_reward": -5.537108218669891, "rewards/get_intelligibility_reward_std": 10.775818157196046, "rewards/get_target_len_reward": -0.02478252612054348, "rewards/get_target_len_reward_std": 0.05785290952771902, "step": 850 }, { "advantages": -2.25504242123975e-07, "advantages_std": 1.7222684264183044, "clip_ratio": 0.0, "completion_length": 87.38392944335938, "epoch": 0.6466165413533834, "grad_norm": 6.46875, "kl": 0.5005932718515396, "learning_rate": 4.676691729323309e-06, "loss": 0.0556, "num_tokens": 26333639.0, "reward": -1.5956645965576173, "reward_std": 7.104577875137329, "rewards/get_chromagram_reward": 0.6158910393714905, "rewards/get_chromagram_reward_std": 0.11812372878193855, "rewards/get_intelligibility_reward": -5.373835563659668, "rewards/get_intelligibility_reward_std": 11.39167242050171, "rewards/get_target_len_reward": -0.0290489312261343, "rewards/get_target_len_reward_std": 0.09160682074725628, "step": 860 }, { "advantages": 2.6449561438823823e-07, "advantages_std": 1.6398038148880005, "clip_ratio": 0.0, "completion_length": 86.44583435058594, "epoch": 0.6541353383458647, "grad_norm": 7.1875, "kl": 0.3201334476470947, "learning_rate": 4.672932330827068e-06, "loss": 0.039, "num_tokens": 26639675.0, "reward": -1.377868014574051, "reward_std": 6.831578731536865, "rewards/get_chromagram_reward": 0.6151859581470489, "rewards/get_chromagram_reward_std": 0.12284478545188904, "rewards/get_intelligibility_reward": -4.722595846652984, "rewards/get_intelligibility_reward_std": 10.993205451965332, "rewards/get_target_len_reward": -0.026193857286125423, "rewards/get_target_len_reward_std": 0.0736829001456499, "step": 870 }, { "advantages": 2.769132663615892e-07, "advantages_std": 1.663590395450592, "clip_ratio": 0.0, "completion_length": 84.027978515625, "epoch": 0.6616541353383458, "grad_norm": 14.25, "kl": 0.29166722744703294, "learning_rate": 4.669172932330828e-06, "loss": 0.0297, "num_tokens": 26939198.0, "reward": -1.411170706152916, "reward_std": 6.868584156036377, "rewards/get_chromagram_reward": 0.6172568500041962, "rewards/get_chromagram_reward_std": 0.11014089062809944, "rewards/get_intelligibility_reward": -4.832118815183639, "rewards/get_intelligibility_reward_std": 11.0176420211792, "rewards/get_target_len_reward": -0.01864988887682557, "rewards/get_target_len_reward_std": 0.0528477106243372, "step": 880 }, { "advantages": -1.1473893692937054e-07, "advantages_std": 1.6940292239189148, "clip_ratio": 0.0, "completion_length": 87.0202392578125, "epoch": 0.6691729323308271, "grad_norm": 5.6875, "kl": 0.39647049456834793, "learning_rate": 4.665413533834587e-06, "loss": 0.0423, "num_tokens": 27246872.0, "reward": -1.597488921880722, "reward_std": 7.147005319595337, "rewards/get_chromagram_reward": 0.6287810504436493, "rewards/get_chromagram_reward_std": 0.12597450688481332, "rewards/get_intelligibility_reward": -5.393320921063423, "rewards/get_intelligibility_reward_std": 11.286065196990966, "rewards/get_target_len_reward": -0.02792652351781726, "rewards/get_target_len_reward_std": 0.06876220367848873, "step": 890 }, { "advantages": 7.887681448437433e-07, "advantages_std": 1.6071461677551269, "clip_ratio": 0.0, "completion_length": 86.16785888671875, "epoch": 0.6766917293233082, "grad_norm": 5.9375, "kl": 0.6452051237225532, "learning_rate": 4.661654135338346e-06, "loss": 0.0648, "num_tokens": 27551525.0, "reward": -1.593279379606247, "reward_std": 6.675328016281128, "rewards/get_chromagram_reward": 0.6114086985588074, "rewards/get_chromagram_reward_std": 0.11248691827058792, "rewards/get_intelligibility_reward": -5.372988653182984, "rewards/get_intelligibility_reward_std": 10.505959796905518, "rewards/get_target_len_reward": -0.018257823958992957, "rewards/get_target_len_reward_std": 0.04569785110652447, "step": 900 }, { "advantages": -4.731118766088116e-07, "advantages_std": 1.5220891833305359, "clip_ratio": 0.0, "completion_length": 87.25535888671875, "epoch": 0.6842105263157895, "grad_norm": 49.75, "kl": 0.38621631264686584, "learning_rate": 4.657894736842106e-06, "loss": 0.0444, "num_tokens": 27859147.0, "reward": -1.7658088684082032, "reward_std": 7.283221912384033, "rewards/get_chromagram_reward": 0.6201018691062927, "rewards/get_chromagram_reward_std": 0.1176083043217659, "rewards/get_intelligibility_reward": -5.895946288108826, "rewards/get_intelligibility_reward_std": 11.443974113464355, "rewards/get_target_len_reward": -0.02158181704580784, "rewards/get_target_len_reward_std": 0.05893752183765173, "step": 910 }, { "advantages": 2.9044847664749797e-07, "advantages_std": 1.5768916845321654, "clip_ratio": 0.0, "completion_length": 86.76369247436523, "epoch": 0.6917293233082706, "grad_norm": 5.5625, "kl": 0.2984780207276344, "learning_rate": 4.654135338345865e-06, "loss": 0.0331, "num_tokens": 28165265.0, "reward": -1.2902542769908905, "reward_std": 6.306678295135498, "rewards/get_chromagram_reward": 0.6366979598999023, "rewards/get_chromagram_reward_std": 0.10054028406739235, "rewards/get_intelligibility_reward": -4.488920116424561, "rewards/get_intelligibility_reward_std": 10.173047637939453, "rewards/get_target_len_reward": -0.018540383130311967, "rewards/get_target_len_reward_std": 0.046944990381598474, "step": 920 }, { "advantages": 3.6557516409629897e-07, "advantages_std": 1.625953483581543, "clip_ratio": 0.0, "completion_length": 84.18392944335938, "epoch": 0.6992481203007519, "grad_norm": 5.84375, "kl": 0.2764819011092186, "learning_rate": 4.6503759398496245e-06, "loss": 0.0343, "num_tokens": 28464361.0, "reward": -1.5938740998506546, "reward_std": 6.704269456863403, "rewards/get_chromagram_reward": 0.6101159989833832, "rewards/get_chromagram_reward_std": 0.11639057248830795, "rewards/get_intelligibility_reward": -5.371188521385193, "rewards/get_intelligibility_reward_std": 10.497893142700196, "rewards/get_target_len_reward": -0.020549843832850457, "rewards/get_target_len_reward_std": 0.06374723017215729, "step": 930 }, { "advantages": 3.159046173095703e-07, "advantages_std": 1.573643147945404, "clip_ratio": 0.0, "completion_length": 90.86190643310547, "epoch": 0.706766917293233, "grad_norm": 5.53125, "kl": 0.3013840883970261, "learning_rate": 4.646616541353383e-06, "loss": 0.0398, "num_tokens": 28781202.0, "reward": -1.6651936948299408, "reward_std": 6.829349184036255, "rewards/get_chromagram_reward": 0.646953010559082, "rewards/get_chromagram_reward_std": 0.11124408766627311, "rewards/get_intelligibility_reward": -5.6138955950737, "rewards/get_intelligibility_reward_std": 10.676132678985596, "rewards/get_target_len_reward": -0.028638134244829416, "rewards/get_target_len_reward_std": 0.08526257313787937, "step": 940 }, { "advantages": 2.5058785411147257e-07, "advantages_std": 1.6995879650115966, "clip_ratio": 0.0, "completion_length": 88.37024002075195, "epoch": 0.7142857142857143, "grad_norm": 6.78125, "kl": 0.3869833633303642, "learning_rate": 4.642857142857144e-06, "loss": 0.0397, "num_tokens": 29092284.0, "reward": -1.2819394290447235, "reward_std": 6.067027044296265, "rewards/get_chromagram_reward": 0.6137153327465057, "rewards/get_chromagram_reward_std": 0.10772662758827209, "rewards/get_intelligibility_reward": -4.441144847869873, "rewards/get_intelligibility_reward_std": 9.736154413223266, "rewards/get_target_len_reward": -0.018388483859598636, "rewards/get_target_len_reward_std": 0.05689036846160889, "step": 950 }, { "advantages": 1.5944243330068276e-07, "advantages_std": 1.5586883306503296, "clip_ratio": 0.0, "completion_length": 85.98333511352538, "epoch": 0.7218045112781954, "grad_norm": 5.25, "kl": 0.498983108997345, "learning_rate": 4.639097744360903e-06, "loss": 0.0583, "num_tokens": 29396896.0, "reward": -1.1031381070613862, "reward_std": 6.2076152801513675, "rewards/get_chromagram_reward": 0.6318881809711456, "rewards/get_chromagram_reward_std": 0.11726146414875985, "rewards/get_intelligibility_reward": -3.916832911968231, "rewards/get_intelligibility_reward_std": 10.10825605392456, "rewards/get_target_len_reward": -0.024469429068267344, "rewards/get_target_len_reward_std": 0.07591898571699858, "step": 960 }, { "advantages": -4.048150259450267e-08, "advantages_std": 1.519583487510681, "clip_ratio": 0.0, "completion_length": 85.97916870117187, "epoch": 0.7293233082706767, "grad_norm": 6.40625, "kl": 0.3494548827409744, "learning_rate": 4.635338345864662e-06, "loss": 0.0376, "num_tokens": 29701172.0, "reward": -1.7784659802913665, "reward_std": 6.481203222274781, "rewards/get_chromagram_reward": 0.6209641814231872, "rewards/get_chromagram_reward_std": 0.10738300830125809, "rewards/get_intelligibility_reward": -5.936930441856385, "rewards/get_intelligibility_reward_std": 9.880269956588744, "rewards/get_target_len_reward": -0.019431459810584785, "rewards/get_target_len_reward_std": 0.05564035829156637, "step": 970 }, { "advantages": 5.463760022195175e-09, "advantages_std": 1.399158489704132, "clip_ratio": 0.0, "completion_length": 86.23690643310547, "epoch": 0.7368421052631579, "grad_norm": 6.90625, "kl": 0.3064037337899208, "learning_rate": 4.631578947368421e-06, "loss": 0.031, "num_tokens": 30005992.0, "reward": -1.6206971883773804, "reward_std": 6.9678229808807375, "rewards/get_chromagram_reward": 0.6120537519454956, "rewards/get_chromagram_reward_std": 0.11391936540603638, "rewards/get_intelligibility_reward": -5.458502078056336, "rewards/get_intelligibility_reward_std": 10.979090690612793, "rewards/get_target_len_reward": -0.015642876317724586, "rewards/get_target_len_reward_std": 0.03758895331993699, "step": 980 }, { "advantages": 9.114544923249923e-08, "advantages_std": 1.6145791053771972, "clip_ratio": 0.0, "completion_length": 81.36369247436524, "epoch": 0.7443609022556391, "grad_norm": 7.34375, "kl": 0.36196746826171877, "learning_rate": 4.6278195488721815e-06, "loss": 0.0395, "num_tokens": 30297509.0, "reward": -1.9562557220458985, "reward_std": 6.989383935928345, "rewards/get_chromagram_reward": 0.605682373046875, "rewards/get_chromagram_reward_std": 0.11706684604287147, "rewards/get_intelligibility_reward": -6.451294040679931, "rewards/get_intelligibility_reward_std": 10.68880500793457, "rewards/get_target_len_reward": -0.02315532071515918, "rewards/get_target_len_reward_std": 0.0718101266771555, "step": 990 }, { "advantages": 1.899898109058995e-07, "advantages_std": 1.6159561038017274, "clip_ratio": 0.0, "completion_length": 93.06369247436524, "epoch": 0.7518796992481203, "grad_norm": 4.96875, "kl": 0.49673385322093966, "learning_rate": 4.62406015037594e-06, "loss": 0.0528, "num_tokens": 30621053.0, "reward": -1.3294874399900436, "reward_std": 6.693207502365112, "rewards/get_chromagram_reward": 0.6109622955322266, "rewards/get_chromagram_reward_std": 0.10932595655322075, "rewards/get_intelligibility_reward": -4.581081557273865, "rewards/get_intelligibility_reward_std": 10.80920705795288, "rewards/get_target_len_reward": -0.018342763558030127, "rewards/get_target_len_reward_std": 0.04740550182759762, "step": 1000 }, { "advantages": 2.8510890643929087e-07, "advantages_std": 1.6630101799964905, "clip_ratio": 0.0, "completion_length": 84.70238189697265, "epoch": 0.7593984962406015, "grad_norm": 5.75, "kl": 0.31738368421792984, "learning_rate": 4.620300751879699e-06, "loss": 0.0351, "num_tokens": 30922303.0, "reward": -1.5521818846464157, "reward_std": 6.914695501327515, "rewards/get_chromagram_reward": 0.6350254416465759, "rewards/get_chromagram_reward_std": 0.11678898185491562, "rewards/get_intelligibility_reward": -5.263093185424805, "rewards/get_intelligibility_reward_std": 10.940269947052002, "rewards/get_target_len_reward": -0.028477614279836416, "rewards/get_target_len_reward_std": 0.07360137198120356, "step": 1010 }, { "advantages": 1.989926062151426e-07, "advantages_std": 1.4804226577281951, "clip_ratio": 0.0, "completion_length": 85.90238189697266, "epoch": 0.7669172932330827, "grad_norm": 18.75, "kl": 0.2990993529558182, "learning_rate": 4.616541353383459e-06, "loss": 0.0338, "num_tokens": 31225914.0, "reward": -1.722965794801712, "reward_std": 6.612655448913574, "rewards/get_chromagram_reward": 0.6062596440315247, "rewards/get_chromagram_reward_std": 0.11684568524360657, "rewards/get_intelligibility_reward": -5.754314303398132, "rewards/get_intelligibility_reward_std": 10.259960079193116, "rewards/get_target_len_reward": -0.020842405408620833, "rewards/get_target_len_reward_std": 0.05934775285422802, "step": 1020 }, { "advantages": -5.247692380194735e-07, "advantages_std": 1.5217979907989503, "clip_ratio": 0.0, "completion_length": 88.8470245361328, "epoch": 0.7744360902255639, "grad_norm": 9.125, "kl": 0.3498847380280495, "learning_rate": 4.612781954887218e-06, "loss": 0.0394, "num_tokens": 31538857.0, "reward": -1.323236495256424, "reward_std": 6.522344350814819, "rewards/get_chromagram_reward": 0.6311689078807831, "rewards/get_chromagram_reward_std": 0.11285480856895447, "rewards/get_intelligibility_reward": -4.577942156791687, "rewards/get_intelligibility_reward_std": 10.531904697418213, "rewards/get_target_len_reward": -0.022935927845537663, "rewards/get_target_len_reward_std": 0.05773061886429787, "step": 1030 }, { "advantages": -9.474654660834858e-08, "advantages_std": 1.600497829914093, "clip_ratio": 0.0, "completion_length": 84.66666717529297, "epoch": 0.7819548872180451, "grad_norm": 7.78125, "kl": 0.31537162363529203, "learning_rate": 4.609022556390978e-06, "loss": 0.0395, "num_tokens": 31839416.0, "reward": -2.097861647605896, "reward_std": 7.330296373367309, "rewards/get_chromagram_reward": 0.6081015944480896, "rewards/get_chromagram_reward_std": 0.11224598959088325, "rewards/get_intelligibility_reward": -6.872477197647095, "rewards/get_intelligibility_reward_std": 11.225436401367187, "rewards/get_target_len_reward": -0.02920899149030447, "rewards/get_target_len_reward_std": 0.09407919310033322, "step": 1040 }, { "advantages": -1.1250377056626348e-07, "advantages_std": 1.580076313018799, "clip_ratio": 0.0, "completion_length": 87.11666717529297, "epoch": 0.7894736842105263, "grad_norm": 8.875, "kl": 0.2959355965256691, "learning_rate": 4.605263157894737e-06, "loss": 0.0323, "num_tokens": 32146897.0, "reward": -1.5526989638805389, "reward_std": 6.767483377456665, "rewards/get_chromagram_reward": 0.6345309376716614, "rewards/get_chromagram_reward_std": 0.1191826693713665, "rewards/get_intelligibility_reward": -5.266578364372253, "rewards/get_intelligibility_reward_std": 10.712108993530274, "rewards/get_target_len_reward": -0.02604932654649019, "rewards/get_target_len_reward_std": 0.07846166621893644, "step": 1050 }, { "advantages": 1.502533697461672e-07, "advantages_std": 1.5794021248817445, "clip_ratio": 0.0, "completion_length": 86.48452606201172, "epoch": 0.7969924812030075, "grad_norm": 31.375, "kl": 0.2713562995195389, "learning_rate": 4.6015037593984965e-06, "loss": 0.0278, "num_tokens": 32452918.0, "reward": -1.7753393650054932, "reward_std": 6.538304424285888, "rewards/get_chromagram_reward": 0.6110261261463166, "rewards/get_chromagram_reward_std": 0.11173097193241119, "rewards/get_intelligibility_reward": -5.921868181228637, "rewards/get_intelligibility_reward_std": 10.043649768829345, "rewards/get_target_len_reward": -0.015175698138773442, "rewards/get_target_len_reward_std": 0.042067173309624194, "step": 1060 }, { "advantages": 1.1244168627300155e-07, "advantages_std": 1.675466275215149, "clip_ratio": 0.0, "completion_length": 86.32500228881835, "epoch": 0.8045112781954887, "grad_norm": 10.5, "kl": 0.4505449026823044, "learning_rate": 4.597744360902256e-06, "loss": 0.0514, "num_tokens": 32757810.0, "reward": -1.6142403960227967, "reward_std": 6.906636905670166, "rewards/get_chromagram_reward": 0.6085878312587738, "rewards/get_chromagram_reward_std": 0.11353035345673561, "rewards/get_intelligibility_reward": -5.427689337730408, "rewards/get_intelligibility_reward_std": 10.920014953613281, "rewards/get_target_len_reward": -0.02361916834488511, "rewards/get_target_len_reward_std": 0.07267850339412689, "step": 1070 }, { "advantages": -7.525086758164434e-08, "advantages_std": 1.6398833632469176, "clip_ratio": 0.0, "completion_length": 88.7452392578125, "epoch": 0.8120300751879699, "grad_norm": 63.25, "kl": 0.30239309668540953, "learning_rate": 4.593984962406016e-06, "loss": 0.0342, "num_tokens": 33069816.0, "reward": -1.7610064923763276, "reward_std": 7.087743330001831, "rewards/get_chromagram_reward": 0.6198269784450531, "rewards/get_chromagram_reward_std": 0.11029869988560677, "rewards/get_intelligibility_reward": -5.880134701728821, "rewards/get_intelligibility_reward_std": 11.074260044097901, "rewards/get_target_len_reward": -0.022711543925106527, "rewards/get_target_len_reward_std": 0.06780009865760803, "step": 1080 }, { "advantages": 1.7409523564992923e-07, "advantages_std": 1.4920554280281066, "clip_ratio": 0.0, "completion_length": 88.64821548461914, "epoch": 0.8195488721804511, "grad_norm": 5.625, "kl": 2.4893749192357064, "learning_rate": 4.5902255639097746e-06, "loss": 0.2518, "num_tokens": 33380904.0, "reward": -1.623878252506256, "reward_std": 6.674379110336304, "rewards/get_chromagram_reward": 0.6122495532035828, "rewards/get_chromagram_reward_std": 0.10934195294976234, "rewards/get_intelligibility_reward": -5.4654449939727785, "rewards/get_intelligibility_reward_std": 10.542883014678955, "rewards/get_target_len_reward": -0.018439139425754546, "rewards/get_target_len_reward_std": 0.05776769071817398, "step": 1090 }, { "advantages": -6.531675573739904e-08, "advantages_std": 1.6210807323455811, "clip_ratio": 0.0, "completion_length": 87.26309661865234, "epoch": 0.8270676691729323, "grad_norm": 9.125, "kl": 0.3473955288529396, "learning_rate": 4.586466165413534e-06, "loss": 0.0368, "num_tokens": 33688342.0, "reward": -1.305153553187847, "reward_std": 6.34152626991272, "rewards/get_chromagram_reward": 0.6365042924880981, "rewards/get_chromagram_reward_std": 0.11087472662329674, "rewards/get_intelligibility_reward": -4.5303013920784, "rewards/get_intelligibility_reward_std": 10.122239017486573, "rewards/get_target_len_reward": -0.021663395036011935, "rewards/get_target_len_reward_std": 0.059238927252590654, "step": 1100 }, { "advantages": 4.557271902072557e-07, "advantages_std": 1.661526906490326, "clip_ratio": 0.0, "completion_length": 87.11726379394531, "epoch": 0.8345864661654135, "grad_norm": 6.75, "kl": 0.5738143682479858, "learning_rate": 4.582706766917294e-06, "loss": 0.0596, "num_tokens": 33995647.0, "reward": -1.4435159385204315, "reward_std": 6.458550071716308, "rewards/get_chromagram_reward": 0.6249020397663116, "rewards/get_chromagram_reward_std": 0.11777897700667381, "rewards/get_intelligibility_reward": -4.9328501462936405, "rewards/get_intelligibility_reward_std": 10.187185144424438, "rewards/get_target_len_reward": -0.022599360160529613, "rewards/get_target_len_reward_std": 0.058740793541073796, "step": 1110 }, { "advantages": 1.9073487269594125e-07, "advantages_std": 1.5964321136474608, "clip_ratio": 0.0, "completion_length": 87.53690643310547, "epoch": 0.8421052631578947, "grad_norm": 8.375, "kl": 0.27469114661216737, "learning_rate": 4.578947368421053e-06, "loss": 0.0335, "num_tokens": 34304204.0, "reward": -1.526003235578537, "reward_std": 6.978189182281494, "rewards/get_chromagram_reward": 0.6308319568634033, "rewards/get_chromagram_reward_std": 0.10873896330595016, "rewards/get_intelligibility_reward": -5.183152413368225, "rewards/get_intelligibility_reward_std": 11.189978885650635, "rewards/get_target_len_reward": -0.025688940472900868, "rewards/get_target_len_reward_std": 0.0640136267989874, "step": 1120 }, { "advantages": 1.0542571917682153e-06, "advantages_std": 1.7151224732398986, "clip_ratio": 0.0, "completion_length": 88.56905059814453, "epoch": 0.849624060150376, "grad_norm": 5.90625, "kl": 0.31005347073078154, "learning_rate": 4.575187969924812e-06, "loss": 0.0312, "num_tokens": 34616297.0, "reward": -1.14257645085454, "reward_std": 6.571909236907959, "rewards/get_chromagram_reward": 0.6334330260753631, "rewards/get_chromagram_reward_std": 0.10452088415622711, "rewards/get_intelligibility_reward": -4.038772355020046, "rewards/get_intelligibility_reward_std": 10.711493492126465, "rewards/get_target_len_reward": -0.022389863990247248, "rewards/get_target_len_reward_std": 0.05032932460308075, "step": 1130 }, { "advantages": 1.379599261497333e-07, "advantages_std": 1.608151626586914, "clip_ratio": 0.0, "completion_length": 86.90714492797852, "epoch": 0.8571428571428571, "grad_norm": 32.25, "kl": 0.40623040348291395, "learning_rate": 4.571428571428572e-06, "loss": 0.0441, "num_tokens": 34921856.0, "reward": -1.3641541302204132, "reward_std": 6.569147109985352, "rewards/get_chromagram_reward": 0.6162215530872345, "rewards/get_chromagram_reward_std": 0.11952584758400916, "rewards/get_intelligibility_reward": -4.683884525299073, "rewards/get_intelligibility_reward_std": 10.552340030670166, "rewards/get_target_len_reward": -0.02479925286024809, "rewards/get_target_len_reward_std": 0.07111198548227549, "step": 1140 }, { "advantages": -3.4620365880755343e-07, "advantages_std": 1.6208989381790162, "clip_ratio": 0.0, "completion_length": 87.70654907226563, "epoch": 0.8646616541353384, "grad_norm": 12.375, "kl": 0.3247146025300026, "learning_rate": 4.567669172932332e-06, "loss": 0.0369, "num_tokens": 35230640.0, "reward": -1.501368111371994, "reward_std": 6.6000793933868405, "rewards/get_chromagram_reward": 0.6304889559745789, "rewards/get_chromagram_reward_std": 0.11283566728234291, "rewards/get_intelligibility_reward": -5.113024723529816, "rewards/get_intelligibility_reward_std": 10.441469764709472, "rewards/get_target_len_reward": -0.021568275708705186, "rewards/get_target_len_reward_std": 0.05896295178681612, "step": 1150 }, { "advantages": -6.382664125226256e-07, "advantages_std": 1.5868653297424316, "clip_ratio": 0.0, "completion_length": 88.5958351135254, "epoch": 0.8721804511278195, "grad_norm": 8.875, "kl": 0.3063840791583061, "learning_rate": 4.56390977443609e-06, "loss": 0.0347, "num_tokens": 35542296.0, "reward": -1.5182266354560852, "reward_std": 6.986057329177856, "rewards/get_chromagram_reward": 0.6088717997074127, "rewards/get_chromagram_reward_std": 0.12039782926440239, "rewards/get_intelligibility_reward": -5.142949795722961, "rewards/get_intelligibility_reward_std": 11.212296390533448, "rewards/get_target_len_reward": -0.020601730328053236, "rewards/get_target_len_reward_std": 0.0644838048145175, "step": 1160 }, { "advantages": 7.761022402519302e-08, "advantages_std": 1.5734822034835816, "clip_ratio": 0.0, "completion_length": 83.76071624755859, "epoch": 0.8796992481203008, "grad_norm": 6.4375, "kl": 2.6361778348684313, "learning_rate": 4.56015037593985e-06, "loss": 0.2637, "num_tokens": 35840099.0, "reward": -1.675141602754593, "reward_std": 6.357161331176758, "rewards/get_chromagram_reward": 0.6211533367633819, "rewards/get_chromagram_reward_std": 0.11045403182506561, "rewards/get_intelligibility_reward": -5.627998042106628, "rewards/get_intelligibility_reward_std": 9.793633270263673, "rewards/get_target_len_reward": -0.018579850811511277, "rewards/get_target_len_reward_std": 0.04803097825497389, "step": 1170 }, { "advantages": -1.194576420004978e-07, "advantages_std": 1.5758933544158935, "clip_ratio": 0.0, "completion_length": 89.42916793823242, "epoch": 0.8872180451127819, "grad_norm": 4.9375, "kl": 0.39932370483875274, "learning_rate": 4.55639097744361e-06, "loss": 0.0422, "num_tokens": 36153899.0, "reward": -1.36173208206892, "reward_std": 6.48907585144043, "rewards/get_chromagram_reward": 0.625183516740799, "rewards/get_chromagram_reward_std": 0.10821353197097779, "rewards/get_intelligibility_reward": -4.691559541225433, "rewards/get_intelligibility_reward_std": 10.373832702636719, "rewards/get_target_len_reward": -0.01881989361718297, "rewards/get_target_len_reward_std": 0.050335131399333474, "step": 1180 }, { "advantages": -3.0174851559650053e-07, "advantages_std": 1.5266151547431945, "clip_ratio": 0.0, "completion_length": 86.88333511352539, "epoch": 0.8947368421052632, "grad_norm": 8.3125, "kl": 0.2715902358293533, "learning_rate": 4.552631578947369e-06, "loss": 0.0298, "num_tokens": 36461761.0, "reward": -1.0700063236057757, "reward_std": 6.352678012847901, "rewards/get_chromagram_reward": 0.6274643957614898, "rewards/get_chromagram_reward_std": 0.12471745386719704, "rewards/get_intelligibility_reward": -3.818689227104187, "rewards/get_intelligibility_reward_std": 10.39213514328003, "rewards/get_target_len_reward": -0.018793891929090024, "rewards/get_target_len_reward_std": 0.04841918870806694, "step": 1190 }, { "advantages": -2.0364921482496356e-07, "advantages_std": 1.6584547877311706, "clip_ratio": 0.0, "completion_length": 85.5297637939453, "epoch": 0.9022556390977443, "grad_norm": 8.125, "kl": 0.441507688164711, "learning_rate": 4.548872180451128e-06, "loss": 0.0474, "num_tokens": 36763911.0, "reward": -1.7878079771995545, "reward_std": 7.366923475265503, "rewards/get_chromagram_reward": 0.6185015857219696, "rewards/get_chromagram_reward_std": 0.11396012380719185, "rewards/get_intelligibility_reward": -5.96343092918396, "rewards/get_intelligibility_reward_std": 11.580034923553466, "rewards/get_target_len_reward": -0.018494250997900964, "rewards/get_target_len_reward_std": 0.05283417291939259, "step": 1200 }, { "advantages": -6.829698726562583e-08, "advantages_std": 1.6967475652694701, "clip_ratio": 0.0, "completion_length": 87.72916870117187, "epoch": 0.9097744360902256, "grad_norm": 6.21875, "kl": 0.2949825465679169, "learning_rate": 4.545112781954888e-06, "loss": 0.0315, "num_tokens": 37073042.0, "reward": -1.2314562678337098, "reward_std": 7.152263164520264, "rewards/get_chromagram_reward": 0.630731874704361, "rewards/get_chromagram_reward_std": 0.10836580172181129, "rewards/get_intelligibility_reward": -4.306980383396149, "rewards/get_intelligibility_reward_std": 11.745508003234864, "rewards/get_target_len_reward": -0.01812002747319639, "rewards/get_target_len_reward_std": 0.04881219994276762, "step": 1210 }, { "advantages": -4.892548526314044e-08, "advantages_std": 1.5888686656951905, "clip_ratio": 0.0, "completion_length": 84.1827392578125, "epoch": 0.9172932330827067, "grad_norm": 6.5625, "kl": 0.2977434679865837, "learning_rate": 4.541353383458647e-06, "loss": 0.0301, "num_tokens": 37372829.0, "reward": -1.401185193657875, "reward_std": 6.355694580078125, "rewards/get_chromagram_reward": 0.6275815725326538, "rewards/get_chromagram_reward_std": 0.1175346054136753, "rewards/get_intelligibility_reward": -4.810519421100617, "rewards/get_intelligibility_reward_std": 10.111643552780151, "rewards/get_target_len_reward": -0.02061743279919028, "rewards/get_target_len_reward_std": 0.05778510309755802, "step": 1220 }, { "advantages": 5.101164489929033e-07, "advantages_std": 1.5641892433166504, "clip_ratio": 0.0, "completion_length": 85.9928596496582, "epoch": 0.924812030075188, "grad_norm": 29.25, "kl": 0.3739720702171326, "learning_rate": 4.537593984962406e-06, "loss": 0.042, "num_tokens": 37676822.0, "reward": -1.3860167860984802, "reward_std": 6.516747045516968, "rewards/get_chromagram_reward": 0.6117733120918274, "rewards/get_chromagram_reward_std": 0.1162826582789421, "rewards/get_intelligibility_reward": -4.744295835494995, "rewards/get_intelligibility_reward_std": 10.425153636932373, "rewards/get_target_len_reward": -0.025527626182883977, "rewards/get_target_len_reward_std": 0.07178398761898279, "step": 1230 }, { "advantages": 5.960440319086047e-09, "advantages_std": 1.5911370515823364, "clip_ratio": 0.0, "completion_length": 90.75774002075195, "epoch": 0.9323308270676691, "grad_norm": 7.4375, "kl": 0.31194020956754687, "learning_rate": 4.533834586466166e-06, "loss": 0.0321, "num_tokens": 37994117.0, "reward": -1.3426182121038437, "reward_std": 6.672745943069458, "rewards/get_chromagram_reward": 0.6171811401844025, "rewards/get_chromagram_reward_std": 0.10649881064891815, "rewards/get_intelligibility_reward": -4.626643079519272, "rewards/get_intelligibility_reward_std": 10.690338182449342, "rewards/get_target_len_reward": -0.018392365891486406, "rewards/get_target_len_reward_std": 0.049134592339396474, "step": 1240 }, { "advantages": 6.659577451273436e-07, "advantages_std": 1.5247369408607483, "clip_ratio": 0.0, "completion_length": 90.20654907226563, "epoch": 0.9398496240601504, "grad_norm": 6.75, "kl": 0.30897647738456724, "learning_rate": 4.530075187969925e-06, "loss": 0.0347, "num_tokens": 38309634.0, "reward": -1.3368622988462449, "reward_std": 6.864240121841431, "rewards/get_chromagram_reward": 0.6257738709449768, "rewards/get_chromagram_reward_std": 0.12027077302336693, "rewards/get_intelligibility_reward": -4.608479511737824, "rewards/get_intelligibility_reward_std": 11.090731430053712, "rewards/get_target_len_reward": -0.027881059050559997, "rewards/get_target_len_reward_std": 0.07647916078567504, "step": 1250 }, { "advantages": -1.5484790054642873e-07, "advantages_std": 1.590539848804474, "clip_ratio": 0.0, "completion_length": 89.33333587646484, "epoch": 0.9473684210526315, "grad_norm": 6.5625, "kl": 0.3056371137499809, "learning_rate": 4.526315789473685e-06, "loss": 0.032, "num_tokens": 38623372.0, "reward": -1.5094636023044585, "reward_std": 6.879690170288086, "rewards/get_chromagram_reward": 0.6226464509963989, "rewards/get_chromagram_reward_std": 0.11185586228966712, "rewards/get_intelligibility_reward": -5.134295892715454, "rewards/get_intelligibility_reward_std": 10.881260585784911, "rewards/get_target_len_reward": -0.016741198487579823, "rewards/get_target_len_reward_std": 0.04208283200860023, "step": 1260 }, { "advantages": 6.342927775904173e-07, "advantages_std": 1.4530799746513368, "clip_ratio": 0.0, "completion_length": 88.98988342285156, "epoch": 0.9548872180451128, "grad_norm": 5.96875, "kl": 0.370340433716774, "learning_rate": 4.522556390977444e-06, "loss": 0.0399, "num_tokens": 38935033.0, "reward": -1.4293070256710052, "reward_std": 6.392971324920654, "rewards/get_chromagram_reward": 0.622153103351593, "rewards/get_chromagram_reward_std": 0.11032605618238449, "rewards/get_intelligibility_reward": -4.890468895435333, "rewards/get_intelligibility_reward_std": 10.122354888916016, "rewards/get_target_len_reward": -0.019604854937642812, "rewards/get_target_len_reward_std": 0.05110882055014372, "step": 1270 }, { "advantages": 2.257525977711339e-07, "advantages_std": 1.5813369393348693, "clip_ratio": 0.0, "completion_length": 87.82321624755859, "epoch": 0.9624060150375939, "grad_norm": 5.6875, "kl": 0.4109964817762375, "learning_rate": 4.518796992481204e-06, "loss": 0.0457, "num_tokens": 39244958.0, "reward": -1.6143119536340236, "reward_std": 7.410225963592529, "rewards/get_chromagram_reward": 0.6208023488521576, "rewards/get_chromagram_reward_std": 0.10917952060699462, "rewards/get_intelligibility_reward": -5.437799453735352, "rewards/get_intelligibility_reward_std": 11.794871520996093, "rewards/get_target_len_reward": -0.0259383799508214, "rewards/get_target_len_reward_std": 0.07941694743931293, "step": 1280 }, { "advantages": -2.6561321639917426e-07, "advantages_std": 1.5743435859680175, "clip_ratio": 0.0, "completion_length": 83.72857284545898, "epoch": 0.9699248120300752, "grad_norm": 7.8125, "kl": 0.331342040002346, "learning_rate": 4.515037593984962e-06, "loss": 0.0327, "num_tokens": 39544497.0, "reward": -1.3727825999259948, "reward_std": 6.4047469139099125, "rewards/get_chromagram_reward": 0.6227813065052032, "rewards/get_chromagram_reward_std": 0.11946281418204308, "rewards/get_intelligibility_reward": -4.721085810661316, "rewards/get_intelligibility_reward_std": 10.246169281005859, "rewards/get_target_len_reward": -0.02004314949735999, "rewards/get_target_len_reward_std": 0.04626058042049408, "step": 1290 }, { "advantages": 7.500250394087971e-08, "advantages_std": 1.6308319687843322, "clip_ratio": 0.0, "completion_length": 90.06666946411133, "epoch": 0.9774436090225563, "grad_norm": 19.0, "kl": 0.4505035996437073, "learning_rate": 4.511278195488722e-06, "loss": 0.0485, "num_tokens": 39860399.0, "reward": -1.6213070809841157, "reward_std": 6.728913688659668, "rewards/get_chromagram_reward": 0.619669246673584, "rewards/get_chromagram_reward_std": 0.11839989796280861, "rewards/get_intelligibility_reward": -5.457677006721497, "rewards/get_intelligibility_reward_std": 10.566771411895752, "rewards/get_target_len_reward": -0.025913165416568518, "rewards/get_target_len_reward_std": 0.06257133968174458, "step": 1300 }, { "advantages": -6.455928271975608e-07, "advantages_std": 1.5221191763877868, "clip_ratio": 0.0, "completion_length": 84.05357284545899, "epoch": 0.9849624060150376, "grad_norm": 6.28125, "kl": 0.4204570382833481, "learning_rate": 4.507518796992482e-06, "loss": 0.0493, "num_tokens": 40159013.0, "reward": -1.8954948306083679, "reward_std": 6.913206434249878, "rewards/get_chromagram_reward": 0.6115539908409119, "rewards/get_chromagram_reward_std": 0.1196502335369587, "rewards/get_intelligibility_reward": -6.275245666503906, "rewards/get_intelligibility_reward_std": 10.659251880645751, "rewards/get_target_len_reward": -0.02279257755726576, "rewards/get_target_len_reward_std": 0.06787131484597922, "step": 1310 }, { "advantages": 7.599592493079399e-08, "advantages_std": 1.451578962802887, "clip_ratio": 0.0, "completion_length": 90.43928680419921, "epoch": 0.9924812030075187, "grad_norm": 6.40625, "kl": 0.3029856622219086, "learning_rate": 4.5037593984962405e-06, "loss": 0.037, "num_tokens": 40475013.0, "reward": -1.1015370726585387, "reward_std": 6.6790083885192875, "rewards/get_chromagram_reward": 0.6353936314582824, "rewards/get_chromagram_reward_std": 0.10527931228280067, "rewards/get_intelligibility_reward": -3.917825734615326, "rewards/get_intelligibility_reward_std": 10.991698837280273, "rewards/get_target_len_reward": -0.022179031558334828, "rewards/get_target_len_reward_std": 0.06528392806649208, "step": 1320 }, { "advantages": -3.712251807286293e-07, "advantages_std": 1.5168458461761474, "clip_ratio": 0.0, "completion_length": 86.39321594238281, "epoch": 1.000751879699248, "grad_norm": 28.625, "kl": 0.3049457028508186, "learning_rate": 4.5e-06, "loss": 0.0335, "num_tokens": 40777755.0, "reward": -1.4215580880641938, "reward_std": 6.318771314620972, "rewards/get_chromagram_reward": 0.601853746175766, "rewards/get_chromagram_reward_std": 0.11767267286777497, "rewards/get_intelligibility_reward": -4.846245145797729, "rewards/get_intelligibility_reward_std": 10.053415679931641, "rewards/get_target_len_reward": -0.020282691903412343, "rewards/get_target_len_reward_std": 0.06443829238414764, "step": 1330 }, { "advantages": -1.4180938734398296e-07, "advantages_std": 1.5976835131645202, "clip_ratio": 0.0, "completion_length": 89.9607147216797, "epoch": 1.0082706766917293, "grad_norm": 66.5, "kl": 0.28232268542051314, "learning_rate": 4.49624060150376e-06, "loss": 0.0355, "num_tokens": 41092861.0, "reward": -1.2550855576992035, "reward_std": 6.8124340057373045, "rewards/get_chromagram_reward": 0.6082775354385376, "rewards/get_chromagram_reward_std": 0.124069694429636, "rewards/get_intelligibility_reward": -4.347317087650299, "rewards/get_intelligibility_reward_std": 11.108979606628418, "rewards/get_target_len_reward": -0.026216878183186056, "rewards/get_target_len_reward_std": 0.07688224576413631, "step": 1340 }, { "advantages": -4.2368969257466913e-07, "advantages_std": 1.5768442749977112, "clip_ratio": 0.0, "completion_length": 88.80714416503906, "epoch": 1.0157894736842106, "grad_norm": 5.78125, "kl": 0.2955815777182579, "learning_rate": 4.492481203007519e-06, "loss": 0.032, "num_tokens": 41404610.0, "reward": -1.4556842476129532, "reward_std": 6.5793181419372555, "rewards/get_chromagram_reward": 0.6118521928787232, "rewards/get_chromagram_reward_std": 0.09616614580154419, "rewards/get_intelligibility_reward": -4.963257133960724, "rewards/get_intelligibility_reward_std": 10.508817291259765, "rewards/get_target_len_reward": -0.015647331532090903, "rewards/get_target_len_reward_std": 0.041581641510128976, "step": 1350 }, { "advantages": 4.92235039928346e-07, "advantages_std": 1.5769322037696838, "clip_ratio": 0.0, "completion_length": 87.67024002075195, "epoch": 1.0233082706766918, "grad_norm": 7.3125, "kl": 0.2699931785464287, "learning_rate": 4.488721804511278e-06, "loss": 0.0272, "num_tokens": 41713817.0, "reward": -1.4852119833230972, "reward_std": 6.430934286117553, "rewards/get_chromagram_reward": 0.6303296208381652, "rewards/get_chromagram_reward_std": 0.11175912097096444, "rewards/get_intelligibility_reward": -5.066480994224548, "rewards/get_intelligibility_reward_std": 10.17646369934082, "rewards/get_target_len_reward": -0.019484441634267567, "rewards/get_target_len_reward_std": 0.04843886476010084, "step": 1360 }, { "advantages": 7.37359146540939e-07, "advantages_std": 1.5006843090057373, "clip_ratio": 0.0, "completion_length": 89.53869247436523, "epoch": 1.0308270676691729, "grad_norm": 6.3125, "kl": 0.2454486146569252, "learning_rate": 4.484962406015038e-06, "loss": 0.0292, "num_tokens": 42028223.0, "reward": -1.110092930495739, "reward_std": 6.495879364013672, "rewards/get_chromagram_reward": 0.6152016639709472, "rewards/get_chromagram_reward_std": 0.11499225050210952, "rewards/get_intelligibility_reward": -3.9230158805847166, "rewards/get_intelligibility_reward_std": 10.612717962265014, "rewards/get_target_len_reward": -0.022464485326781868, "rewards/get_target_len_reward_std": 0.06751915938220918, "step": 1370 }, { "advantages": 7.003545921868693e-07, "advantages_std": 1.6903245568275451, "clip_ratio": 0.0, "completion_length": 86.36190643310547, "epoch": 1.0383458646616541, "grad_norm": 14.3125, "kl": 0.3273655205965042, "learning_rate": 4.4812030075187975e-06, "loss": 0.0377, "num_tokens": 42333747.0, "reward": -1.5490575909614563, "reward_std": 6.348045444488525, "rewards/get_chromagram_reward": 0.6144465744495392, "rewards/get_chromagram_reward_std": 0.11609260141849517, "rewards/get_intelligibility_reward": -5.239705562591553, "rewards/get_intelligibility_reward_std": 9.924682903289796, "rewards/get_target_len_reward": -0.021913580782711505, "rewards/get_target_len_reward_std": 0.07249412853270769, "step": 1380 }, { "advantages": 4.435579057826544e-07, "advantages_std": 1.6824481248855592, "clip_ratio": 0.0, "completion_length": 85.4571434020996, "epoch": 1.0458646616541354, "grad_norm": 24.0, "kl": 0.31759440451860427, "learning_rate": 4.477443609022556e-06, "loss": 0.0326, "num_tokens": 42636500.0, "reward": -1.533106380701065, "reward_std": 6.635279893875122, "rewards/get_chromagram_reward": 0.6223339438438416, "rewards/get_chromagram_reward_std": 0.12457952573895455, "rewards/get_intelligibility_reward": -5.1997171401977536, "rewards/get_intelligibility_reward_std": 10.557915115356446, "rewards/get_target_len_reward": -0.021935593243688344, "rewards/get_target_len_reward_std": 0.04879705365747213, "step": 1390 }, { "advantages": -8.630257042341327e-08, "advantages_std": 1.5478406071662902, "clip_ratio": 0.0, "completion_length": 88.22261962890624, "epoch": 1.0533834586466166, "grad_norm": 11.625, "kl": 0.35201059728860856, "learning_rate": 4.473684210526316e-06, "loss": 0.0351, "num_tokens": 42947076.0, "reward": -1.0654352620244025, "reward_std": 6.328466606140137, "rewards/get_chromagram_reward": 0.6265157759189606, "rewards/get_chromagram_reward_std": 0.1030153326690197, "rewards/get_intelligibility_reward": -3.8062858104705812, "rewards/get_intelligibility_reward_std": 10.39826946258545, "rewards/get_target_len_reward": -0.016535515151917934, "rewards/get_target_len_reward_std": 0.04152263272553682, "step": 1400 }, { "advantages": 5.507220883771424e-07, "advantages_std": 1.6665414214134215, "clip_ratio": 0.0, "completion_length": 88.79345397949218, "epoch": 1.0609022556390977, "grad_norm": 108.5, "kl": 0.3712936282157898, "learning_rate": 4.469924812030076e-06, "loss": 0.0386, "num_tokens": 43258728.0, "reward": -1.7698104798793792, "reward_std": 6.952770853042603, "rewards/get_chromagram_reward": 0.6143802165985107, "rewards/get_chromagram_reward_std": 0.10771603286266326, "rewards/get_intelligibility_reward": -5.902230358123779, "rewards/get_intelligibility_reward_std": 10.760121154785157, "rewards/get_target_len_reward": -0.02158096982166171, "rewards/get_target_len_reward_std": 0.06000328604131937, "step": 1410 }, { "advantages": -8.791684713571613e-08, "advantages_std": 1.59330712556839, "clip_ratio": 0.0, "completion_length": 89.35000152587891, "epoch": 1.068421052631579, "grad_norm": 21.625, "kl": 0.29032159596681595, "learning_rate": 4.466165413533835e-06, "loss": 0.0295, "num_tokens": 43572786.0, "reward": -1.209948765486479, "reward_std": 6.531015062332154, "rewards/get_chromagram_reward": 0.6338232755661011, "rewards/get_chromagram_reward_std": 0.10927505195140838, "rewards/get_intelligibility_reward": -4.243760868906975, "rewards/get_intelligibility_reward_std": 10.54481372833252, "rewards/get_target_len_reward": -0.01990845762193203, "rewards/get_target_len_reward_std": 0.05181889459490776, "step": 1420 }, { "advantages": -3.5638611493027386e-07, "advantages_std": 1.6873063921928406, "clip_ratio": 0.0, "completion_length": 87.25119171142578, "epoch": 1.0759398496240602, "grad_norm": 5.875, "kl": 0.3652739107608795, "learning_rate": 4.462406015037594e-06, "loss": 0.0441, "num_tokens": 43880015.0, "reward": -1.794026893377304, "reward_std": 6.88950924873352, "rewards/get_chromagram_reward": 0.6147673070430756, "rewards/get_chromagram_reward_std": 0.1085585631430149, "rewards/get_intelligibility_reward": -5.975696587562561, "rewards/get_intelligibility_reward_std": 10.655347537994384, "rewards/get_target_len_reward": -0.02115098023787141, "rewards/get_target_len_reward_std": 0.06407887656241655, "step": 1430 }, { "advantages": -2.2724272383811694e-07, "advantages_std": 1.597934901714325, "clip_ratio": 0.0, "completion_length": 85.1458351135254, "epoch": 1.0834586466165415, "grad_norm": 7.78125, "kl": 0.2844631150364876, "learning_rate": 4.458646616541354e-06, "loss": 0.034, "num_tokens": 44181556.0, "reward": -1.610038973391056, "reward_std": 7.0278373718261715, "rewards/get_chromagram_reward": 0.6216884851455688, "rewards/get_chromagram_reward_std": 0.10702887326478958, "rewards/get_intelligibility_reward": -5.433124041557312, "rewards/get_intelligibility_reward_std": 11.143450927734374, "rewards/get_target_len_reward": -0.018680935073643923, "rewards/get_target_len_reward_std": 0.052847124822437766, "step": 1440 }, { "advantages": -2.371768186293366e-07, "advantages_std": 1.6119914293289184, "clip_ratio": 0.0, "completion_length": 91.92738342285156, "epoch": 1.0909774436090225, "grad_norm": 6.625, "kl": 0.25501908659934996, "learning_rate": 4.454887218045113e-06, "loss": 0.0245, "num_tokens": 44501395.0, "reward": -1.4425207868218421, "reward_std": 7.0387735843658445, "rewards/get_chromagram_reward": 0.6270141720771789, "rewards/get_chromagram_reward_std": 0.10027562379837036, "rewards/get_intelligibility_reward": -4.93934919834137, "rewards/get_intelligibility_reward_std": 11.302708530426026, "rewards/get_target_len_reward": -0.015226956270635129, "rewards/get_target_len_reward_std": 0.041442089900374415, "step": 1450 }, { "advantages": 4.912416148528109e-07, "advantages_std": 1.5803248286247253, "clip_ratio": 0.0, "completion_length": 90.03154907226562, "epoch": 1.0984962406015037, "grad_norm": 16.5, "kl": 0.2846480205655098, "learning_rate": 4.451127819548873e-06, "loss": 0.034, "num_tokens": 44817065.0, "reward": -1.4160616666078567, "reward_std": 6.803660202026367, "rewards/get_chromagram_reward": 0.6224904239177704, "rewards/get_chromagram_reward_std": 0.10744070336222648, "rewards/get_intelligibility_reward": -4.849036240577698, "rewards/get_intelligibility_reward_std": 10.839382362365722, "rewards/get_target_len_reward": -0.021639053942635655, "rewards/get_target_len_reward_std": 0.05841308189556003, "step": 1460 }, { "advantages": 4.967053882865002e-07, "advantages_std": 1.580376970767975, "clip_ratio": 0.0, "completion_length": 87.665478515625, "epoch": 1.106015037593985, "grad_norm": 90.5, "kl": 0.29044296592473984, "learning_rate": 4.447368421052632e-06, "loss": 0.0303, "num_tokens": 45125318.0, "reward": -1.5038848042488098, "reward_std": 6.9014417171478275, "rewards/get_chromagram_reward": 0.6155295431613922, "rewards/get_chromagram_reward_std": 0.11622778475284576, "rewards/get_intelligibility_reward": -5.104975247383118, "rewards/get_intelligibility_reward_std": 11.041919040679932, "rewards/get_target_len_reward": -0.022208488639444114, "rewards/get_target_len_reward_std": 0.06804153546690941, "step": 1470 }, { "advantages": -2.2227563434285004e-08, "advantages_std": 1.5115014910697937, "clip_ratio": 0.0, "completion_length": 87.7130958557129, "epoch": 1.1135338345864663, "grad_norm": 6.0625, "kl": 0.5705481573939324, "learning_rate": 4.443609022556391e-06, "loss": 0.0599, "num_tokens": 45434347.0, "reward": -1.292085385322571, "reward_std": 6.833794784545899, "rewards/get_chromagram_reward": 0.6335037827491761, "rewards/get_chromagram_reward_std": 0.11769273206591606, "rewards/get_intelligibility_reward": -4.487040567398071, "rewards/get_intelligibility_reward_std": 11.127706336975098, "rewards/get_target_len_reward": -0.02271918151527643, "rewards/get_target_len_reward_std": 0.06205502189695835, "step": 1480 }, { "advantages": 2.8560555165313416e-08, "advantages_std": 1.6912749409675598, "clip_ratio": 0.0, "completion_length": 90.43214569091796, "epoch": 1.1210526315789473, "grad_norm": 8.75, "kl": 0.39698506742715833, "learning_rate": 4.439849624060151e-06, "loss": 0.0435, "num_tokens": 45750722.0, "reward": -1.3910152643918992, "reward_std": 6.621922302246094, "rewards/get_chromagram_reward": 0.6196567595005036, "rewards/get_chromagram_reward_std": 0.10777244716882706, "rewards/get_intelligibility_reward": -4.771567785739899, "rewards/get_intelligibility_reward_std": 10.633524322509766, "rewards/get_target_len_reward": -0.021134493965655567, "rewards/get_target_len_reward_std": 0.06209372207522392, "step": 1490 }, { "advantages": -5.6872755749282077e-08, "advantages_std": 1.4580247700214386, "clip_ratio": 0.0, "completion_length": 88.9755958557129, "epoch": 1.1285714285714286, "grad_norm": 5.03125, "kl": 0.28813485354185103, "learning_rate": 4.43609022556391e-06, "loss": 0.0355, "num_tokens": 46063025.0, "reward": -1.2929147403687238, "reward_std": 6.570118951797485, "rewards/get_chromagram_reward": 0.6155599892139435, "rewards/get_chromagram_reward_std": 0.11506677493453026, "rewards/get_intelligibility_reward": -4.472536733746528, "rewards/get_intelligibility_reward_std": 10.563865470886231, "rewards/get_target_len_reward": -0.021767213568091394, "rewards/get_target_len_reward_std": 0.06660667713731527, "step": 1500 }, { "advantages": 2.9082100407862297e-07, "advantages_std": 1.6153509140014648, "clip_ratio": 0.0, "completion_length": 87.2125015258789, "epoch": 1.1360902255639098, "grad_norm": 14.5, "kl": 0.3224494606256485, "learning_rate": 4.4323308270676695e-06, "loss": 0.0343, "num_tokens": 46370373.0, "reward": -1.3674847215414048, "reward_std": 6.626483583450318, "rewards/get_chromagram_reward": 0.6133666872978211, "rewards/get_chromagram_reward_std": 0.10720290318131447, "rewards/get_intelligibility_reward": -4.692685705423355, "rewards/get_intelligibility_reward_std": 10.596193408966064, "rewards/get_target_len_reward": -0.023134994506835937, "rewards/get_target_len_reward_std": 0.07376478314399719, "step": 1510 }, { "advantages": -1.9694368376121928e-07, "advantages_std": 1.6327782154083252, "clip_ratio": 0.0, "completion_length": 86.3755973815918, "epoch": 1.143609022556391, "grad_norm": 7.1875, "kl": 0.35077326446771623, "learning_rate": 4.428571428571429e-06, "loss": 0.0396, "num_tokens": 46675165.0, "reward": -1.8090145349502564, "reward_std": 6.713898372650147, "rewards/get_chromagram_reward": 0.6248087406158447, "rewards/get_chromagram_reward_std": 0.11518047973513604, "rewards/get_intelligibility_reward": -6.0250754117965695, "rewards/get_intelligibility_reward_std": 10.370805311203004, "rewards/get_target_len_reward": -0.02677653534337878, "rewards/get_target_len_reward_std": 0.07797287553548812, "step": 1520 }, { "advantages": -1.0505319445464067e-07, "advantages_std": 1.6318493604660034, "clip_ratio": 0.0, "completion_length": 87.5809539794922, "epoch": 1.151127819548872, "grad_norm": 8.375, "kl": 0.3616947069764137, "learning_rate": 4.424812030075189e-06, "loss": 0.0394, "num_tokens": 46983757.0, "reward": -1.3312961548566817, "reward_std": 6.356854820251465, "rewards/get_chromagram_reward": 0.6233540952205658, "rewards/get_chromagram_reward_std": 0.12999609112739563, "rewards/get_intelligibility_reward": -4.593122959136963, "rewards/get_intelligibility_reward_std": 10.18731756210327, "rewards/get_target_len_reward": -0.024119340069592, "rewards/get_target_len_reward_std": 0.06394902095198632, "step": 1530 }, { "advantages": -1.4652809312565296e-07, "advantages_std": 1.5422507643699646, "clip_ratio": 0.0, "completion_length": 85.48928756713867, "epoch": 1.1586466165413534, "grad_norm": 12.5625, "kl": 0.3347591429948807, "learning_rate": 4.4210526315789476e-06, "loss": 0.0322, "num_tokens": 47286357.0, "reward": -1.4546345457434655, "reward_std": 6.773113775253296, "rewards/get_chromagram_reward": 0.6325708985328674, "rewards/get_chromagram_reward_std": 0.10721831172704696, "rewards/get_intelligibility_reward": -4.977900385856628, "rewards/get_intelligibility_reward_std": 10.788327884674072, "rewards/get_target_len_reward": -0.018573809042572977, "rewards/get_target_len_reward_std": 0.04435503650456667, "step": 1540 }, { "advantages": 5.419055707278631e-07, "advantages_std": 1.6118939757347106, "clip_ratio": 0.0, "completion_length": 86.31428756713868, "epoch": 1.1661654135338346, "grad_norm": 6.125, "kl": 1.34088137447834, "learning_rate": 4.417293233082707e-06, "loss": 0.1428, "num_tokens": 47590543.0, "reward": -1.5086460947990417, "reward_std": 6.8800328254699705, "rewards/get_chromagram_reward": 0.6266894578933716, "rewards/get_chromagram_reward_std": 0.11288965046405793, "rewards/get_intelligibility_reward": -5.127788019180298, "rewards/get_intelligibility_reward_std": 10.920746994018554, "rewards/get_target_len_reward": -0.024839654657989742, "rewards/get_target_len_reward_std": 0.08086240235716105, "step": 1550 }, { "advantages": -4.6330195999644277e-07, "advantages_std": 1.6480433940887451, "clip_ratio": 0.0, "completion_length": 85.3125015258789, "epoch": 1.1736842105263159, "grad_norm": 9.5625, "kl": 0.3124460786581039, "learning_rate": 4.413533834586467e-06, "loss": 0.0367, "num_tokens": 47892098.0, "reward": -1.5736109614372253, "reward_std": 6.557619524002075, "rewards/get_chromagram_reward": 0.6038493335247039, "rewards/get_chromagram_reward_std": 0.1160405620932579, "rewards/get_intelligibility_reward": -5.302905559539795, "rewards/get_intelligibility_reward_std": 10.392185306549072, "rewards/get_target_len_reward": -0.021776399575173855, "rewards/get_target_len_reward_std": 0.0646151814609766, "step": 1560 }, { "advantages": 5.87354101355686e-08, "advantages_std": 1.4956356883049011, "clip_ratio": 0.0, "completion_length": 87.60774154663086, "epoch": 1.181203007518797, "grad_norm": 5.6875, "kl": 0.3692999482154846, "learning_rate": 4.4097744360902265e-06, "loss": 0.0477, "num_tokens": 48200190.0, "reward": -1.1628334634006023, "reward_std": 6.734441566467285, "rewards/get_chromagram_reward": 0.6324356317520141, "rewards/get_chromagram_reward_std": 0.10941554978489876, "rewards/get_intelligibility_reward": -4.098512363433838, "rewards/get_intelligibility_reward_std": 10.982434701919555, "rewards/get_target_len_reward": -0.022423355374485254, "rewards/get_target_len_reward_std": 0.06325810812413693, "step": 1570 }, { "advantages": -1.0244549315530094e-07, "advantages_std": 1.626529288291931, "clip_ratio": 0.0, "completion_length": 88.18154907226562, "epoch": 1.1887218045112782, "grad_norm": 6.34375, "kl": 0.9706477746367455, "learning_rate": 4.406015037593985e-06, "loss": 0.1031, "num_tokens": 48509733.0, "reward": -1.7460247814655303, "reward_std": 6.996130752563476, "rewards/get_chromagram_reward": 0.6319741845130921, "rewards/get_chromagram_reward_std": 0.11034553200006485, "rewards/get_intelligibility_reward": -5.849562883377075, "rewards/get_intelligibility_reward_std": 10.926602697372436, "rewards/get_target_len_reward": -0.020485294051468372, "rewards/get_target_len_reward_std": 0.05834382399916649, "step": 1580 }, { "advantages": -1.4839073099182086e-07, "advantages_std": 1.5476613879203795, "clip_ratio": 0.0, "completion_length": 88.67678909301758, "epoch": 1.1962406015037594, "grad_norm": 6.4375, "kl": 0.27188325226306914, "learning_rate": 4.402255639097744e-06, "loss": 0.0276, "num_tokens": 48821267.0, "reward": -1.2882464125752449, "reward_std": 6.523538208007812, "rewards/get_chromagram_reward": 0.6279970049858093, "rewards/get_chromagram_reward_std": 0.09582380726933479, "rewards/get_intelligibility_reward": -4.473076581954956, "rewards/get_intelligibility_reward_std": 10.490189599990845, "rewards/get_target_len_reward": -0.019659423362463714, "rewards/get_target_len_reward_std": 0.05062466654926538, "step": 1590 }, { "advantages": 6.534159325610745e-07, "advantages_std": 1.647250759601593, "clip_ratio": 0.0, "completion_length": 88.38154983520508, "epoch": 1.2037593984962407, "grad_norm": 27.5, "kl": 0.2886976793408394, "learning_rate": 4.398496240601504e-06, "loss": 0.0329, "num_tokens": 49132692.0, "reward": -1.3682509139180183, "reward_std": 6.9248096466064455, "rewards/get_chromagram_reward": 0.6313087105751037, "rewards/get_chromagram_reward_std": 0.11718138679862022, "rewards/get_intelligibility_reward": -4.710203987360001, "rewards/get_intelligibility_reward_std": 11.157450008392335, "rewards/get_target_len_reward": -0.025857241172343493, "rewards/get_target_len_reward_std": 0.07822401337325573, "step": 1600 }, { "advantages": 1.373390333014868e-07, "advantages_std": 1.6475409626960755, "clip_ratio": 0.0, "completion_length": 83.82619094848633, "epoch": 1.2112781954887217, "grad_norm": 5.53125, "kl": 0.3448040962219238, "learning_rate": 4.394736842105263e-06, "loss": 0.0403, "num_tokens": 49431312.0, "reward": -1.8660083770751954, "reward_std": 7.143636417388916, "rewards/get_chromagram_reward": 0.6275706827640534, "rewards/get_chromagram_reward_std": 0.11524273306131363, "rewards/get_intelligibility_reward": -6.202886414527893, "rewards/get_intelligibility_reward_std": 11.145586681365966, "rewards/get_target_len_reward": -0.02270910535007715, "rewards/get_target_len_reward_std": 0.07161459308117628, "step": 1610 }, { "advantages": 3.9339067661181557e-07, "advantages_std": 1.577373206615448, "clip_ratio": 0.0, "completion_length": 88.20535736083984, "epoch": 1.218796992481203, "grad_norm": 740.0, "kl": 0.4538608729839325, "learning_rate": 4.390977443609023e-06, "loss": 0.0523, "num_tokens": 49741967.0, "reward": -1.2801022872328758, "reward_std": 6.292341852188111, "rewards/get_chromagram_reward": 0.6265273749828338, "rewards/get_chromagram_reward_std": 0.10703437700867653, "rewards/get_intelligibility_reward": -4.4441596299409865, "rewards/get_intelligibility_reward_std": 10.096945667266846, "rewards/get_target_len_reward": -0.022674433421343565, "rewards/get_target_len_reward_std": 0.06971333101391793, "step": 1620 }, { "advantages": -3.42975042855187e-07, "advantages_std": 1.5779038429260255, "clip_ratio": 0.0, "completion_length": 89.30595397949219, "epoch": 1.2263157894736842, "grad_norm": 5.375, "kl": 38.55426201820374, "learning_rate": 4.387218045112782e-06, "loss": 3.8586, "num_tokens": 50056039.0, "reward": -1.3629911191761495, "reward_std": 6.501353454589844, "rewards/get_chromagram_reward": 0.6287332653999329, "rewards/get_chromagram_reward_std": 0.11263178661465645, "rewards/get_intelligibility_reward": -4.694042664766312, "rewards/get_intelligibility_reward_std": 10.31979284286499, "rewards/get_target_len_reward": -0.02366358144208789, "rewards/get_target_len_reward_std": 0.06714603845030069, "step": 1630 }, { "advantages": -4.142522893602063e-07, "advantages_std": 1.6763808131217957, "clip_ratio": 0.0, "completion_length": 87.64404830932617, "epoch": 1.2338345864661655, "grad_norm": 28.5, "kl": 0.3203793570399284, "learning_rate": 4.3834586466165415e-06, "loss": 0.0324, "num_tokens": 50364992.0, "reward": -1.573092085123062, "reward_std": 6.357421112060547, "rewards/get_chromagram_reward": 0.6227325141429901, "rewards/get_chromagram_reward_std": 0.11975778564810753, "rewards/get_intelligibility_reward": -5.323095595836639, "rewards/get_intelligibility_reward_std": 9.898041820526123, "rewards/get_target_len_reward": -0.018912956397980452, "rewards/get_target_len_reward_std": 0.04353561829775572, "step": 1640 }, { "advantages": 2.920627608204995e-07, "advantages_std": 1.6491339564323426, "clip_ratio": 0.0, "completion_length": 88.24226303100586, "epoch": 1.2413533834586465, "grad_norm": 8.9375, "kl": 2234.8600372612477, "learning_rate": 4.379699248120301e-06, "loss": 223.4912, "num_tokens": 50675660.0, "reward": -1.3432014167308808, "reward_std": 6.825479030609131, "rewards/get_chromagram_reward": 0.611679208278656, "rewards/get_chromagram_reward_std": 0.11697395518422127, "rewards/get_intelligibility_reward": -4.619905805587768, "rewards/get_intelligibility_reward_std": 11.043926239013672, "rewards/get_target_len_reward": -0.02137720864266157, "rewards/get_target_len_reward_std": 0.06526100691407918, "step": 1650 }, { "advantages": 4.4430297307940236e-07, "advantages_std": 1.5631268739700317, "clip_ratio": 0.0, "completion_length": 85.99643020629883, "epoch": 1.2488721804511278, "grad_norm": 10.3125, "kl": 0.41894365549087526, "learning_rate": 4.375939849624061e-06, "loss": 0.0469, "num_tokens": 50979381.0, "reward": -2.0171368598937987, "reward_std": 7.4840082168579105, "rewards/get_chromagram_reward": 0.6187068104743958, "rewards/get_chromagram_reward_std": 0.1058721587061882, "rewards/get_intelligibility_reward": -6.645796608924866, "rewards/get_intelligibility_reward_std": 11.541616916656494, "rewards/get_target_len_reward": -0.024320135079324245, "rewards/get_target_len_reward_std": 0.08446543496102095, "step": 1660 }, { "advantages": -7.599592279916579e-08, "advantages_std": 1.6220983147621155, "clip_ratio": 0.0, "completion_length": 85.78274078369141, "epoch": 1.256390977443609, "grad_norm": 9.4375, "kl": 0.3555022940039635, "learning_rate": 4.3721804511278196e-06, "loss": 0.0473, "num_tokens": 51283103.0, "reward": -1.3157795041799545, "reward_std": 6.486531400680542, "rewards/get_chromagram_reward": 0.6245897948741913, "rewards/get_chromagram_reward_std": 0.12780793830752374, "rewards/get_intelligibility_reward": -4.5451841205358505, "rewards/get_intelligibility_reward_std": 10.350716400146485, "rewards/get_target_len_reward": -0.02674414971843362, "rewards/get_target_len_reward_std": 0.08355995900928974, "step": 1670 }, { "advantages": -1.4441710263213282e-07, "advantages_std": 1.6307815790176392, "clip_ratio": 0.0, "completion_length": 90.22500228881836, "epoch": 1.2639097744360903, "grad_norm": 11.75, "kl": 0.5674268335103989, "learning_rate": 4.368421052631579e-06, "loss": 0.0603, "num_tokens": 51599142.0, "reward": -1.4558716148138047, "reward_std": 6.707897043228149, "rewards/get_chromagram_reward": 0.6259454905986785, "rewards/get_chromagram_reward_std": 0.10946089550852775, "rewards/get_intelligibility_reward": -4.973546826839447, "rewards/get_intelligibility_reward_std": 10.674462413787841, "rewards/get_target_len_reward": -0.020013115461915733, "rewards/get_target_len_reward_std": 0.060001314245164396, "step": 1680 }, { "advantages": 5.508462791681268e-07, "advantages_std": 1.4188524723052978, "clip_ratio": 0.0, "completion_length": 82.01190643310547, "epoch": 1.2714285714285714, "grad_norm": 15.125, "kl": 0.39633190631866455, "learning_rate": 4.364661654135339e-06, "loss": 0.0446, "num_tokens": 51892605.0, "reward": -1.8866865515708924, "reward_std": 6.593513154983521, "rewards/get_chromagram_reward": 0.6106093227863312, "rewards/get_chromagram_reward_std": 0.11626130864024162, "rewards/get_intelligibility_reward": -6.248795795440674, "rewards/get_intelligibility_reward_std": 9.956018733978272, "rewards/get_target_len_reward": -0.02187281660735607, "rewards/get_target_len_reward_std": 0.060151894204318525, "step": 1690 }, { "advantages": -1.322478150100892e-07, "advantages_std": 1.627256417274475, "clip_ratio": 0.0, "completion_length": 88.05714569091796, "epoch": 1.2789473684210526, "grad_norm": 7.53125, "kl": 0.38881611078977585, "learning_rate": 4.360902255639098e-06, "loss": 0.0482, "num_tokens": 52202844.0, "reward": -1.630874615907669, "reward_std": 7.069363403320312, "rewards/get_chromagram_reward": 0.6299600839614868, "rewards/get_chromagram_reward_std": 0.12561212480068207, "rewards/get_intelligibility_reward": -5.494401431083679, "rewards/get_intelligibility_reward_std": 11.227049160003663, "rewards/get_target_len_reward": -0.028182223346084355, "rewards/get_target_len_reward_std": 0.08260326832532883, "step": 1700 }, { "advantages": 3.118688951531112e-07, "advantages_std": 1.5345268487930297, "clip_ratio": 0.0, "completion_length": 87.63631210327148, "epoch": 1.2864661654135339, "grad_norm": 5.9375, "kl": 0.3392829239368439, "learning_rate": 4.357142857142857e-06, "loss": 0.0339, "num_tokens": 52511465.0, "reward": -1.732570117712021, "reward_std": 6.669246482849121, "rewards/get_chromagram_reward": 0.6242092669010162, "rewards/get_chromagram_reward_std": 0.11928762272000312, "rewards/get_intelligibility_reward": -5.80348813533783, "rewards/get_intelligibility_reward_std": 10.308567714691161, "rewards/get_target_len_reward": -0.018431250657886266, "rewards/get_target_len_reward_std": 0.04403619281947613, "step": 1710 }, { "advantages": -3.8954119361278574e-07, "advantages_std": 1.5930951476097106, "clip_ratio": 0.0, "completion_length": 84.03095474243165, "epoch": 1.293984962406015, "grad_norm": 7.96875, "kl": 0.3179944708943367, "learning_rate": 4.353383458646617e-06, "loss": 0.0405, "num_tokens": 52810252.0, "reward": -1.6103318095207215, "reward_std": 7.0111226558685305, "rewards/get_chromagram_reward": 0.6270187616348266, "rewards/get_chromagram_reward_std": 0.12597814574837685, "rewards/get_intelligibility_reward": -5.428212428092957, "rewards/get_intelligibility_reward_std": 11.129766941070557, "rewards/get_target_len_reward": -0.029801409970968962, "rewards/get_target_len_reward_std": 0.08510931301862001, "step": 1720 }, { "advantages": 5.165736070011917e-08, "advantages_std": 1.530632495880127, "clip_ratio": 0.0, "completion_length": 86.00535888671875, "epoch": 1.3015037593984962, "grad_norm": 7.75, "kl": 0.5288962870836258, "learning_rate": 4.349624060150377e-06, "loss": 0.0588, "num_tokens": 53114288.0, "reward": -1.5636741399765015, "reward_std": 6.584192562103271, "rewards/get_chromagram_reward": 0.6242028534412384, "rewards/get_chromagram_reward_std": 0.10919138565659522, "rewards/get_intelligibility_reward": -5.293649542331695, "rewards/get_intelligibility_reward_std": 10.293786716461181, "rewards/get_target_len_reward": -0.021575375087559225, "rewards/get_target_len_reward_std": 0.059037490375339986, "step": 1730 }, { "advantages": -4.000341135679264e-07, "advantages_std": 1.519398820400238, "clip_ratio": 0.0, "completion_length": 86.75714416503907, "epoch": 1.3090225563909774, "grad_norm": 13.0625, "kl": 0.32477793991565707, "learning_rate": 4.345864661654135e-06, "loss": 0.035, "num_tokens": 53420372.0, "reward": -1.5456707239151002, "reward_std": 6.6155702590942385, "rewards/get_chromagram_reward": 0.6140604197978974, "rewards/get_chromagram_reward_std": 0.11171316578984261, "rewards/get_intelligibility_reward": -5.232801699638367, "rewards/get_intelligibility_reward_std": 10.395722389221191, "rewards/get_target_len_reward": -0.01827064696699381, "rewards/get_target_len_reward_std": 0.052512189373373985, "step": 1740 }, { "advantages": -5.935629210362947e-08, "advantages_std": 1.6254116296768188, "clip_ratio": 0.0, "completion_length": 86.40178680419922, "epoch": 1.3165413533834587, "grad_norm": 20.25, "kl": 0.32791008800268173, "learning_rate": 4.342105263157895e-06, "loss": 0.0382, "num_tokens": 53725544.0, "reward": -1.2662768244743348, "reward_std": 6.619711637496948, "rewards/get_chromagram_reward": 0.6357514679431915, "rewards/get_chromagram_reward_std": 0.11389932408928871, "rewards/get_intelligibility_reward": -4.411121499538422, "rewards/get_intelligibility_reward_std": 10.643956756591797, "rewards/get_target_len_reward": -0.0234602483920753, "rewards/get_target_len_reward_std": 0.06484440937638283, "step": 1750 }, { "advantages": -3.2583874087777076e-07, "advantages_std": 1.6038388013839722, "clip_ratio": 0.0, "completion_length": 84.06904830932618, "epoch": 1.32406015037594, "grad_norm": 24.0, "kl": 0.364103789627552, "learning_rate": 4.338345864661655e-06, "loss": 0.0407, "num_tokens": 54024535.0, "reward": -1.9007295727729798, "reward_std": 6.875812959671021, "rewards/get_chromagram_reward": 0.627848082780838, "rewards/get_chromagram_reward_std": 0.1251745492219925, "rewards/get_intelligibility_reward": -6.303326368331909, "rewards/get_intelligibility_reward_std": 10.511895561218262, "rewards/get_target_len_reward": -0.02671011108905077, "rewards/get_target_len_reward_std": 0.07524531930685044, "step": 1760 }, { "advantages": -3.568828191191642e-07, "advantages_std": 1.6275288105010985, "clip_ratio": 0.0, "completion_length": 85.51726379394532, "epoch": 1.331578947368421, "grad_norm": 13.75, "kl": 26.64915532618761, "learning_rate": 4.334586466165414e-06, "loss": 2.6748, "num_tokens": 54326894.0, "reward": -1.6861646354198456, "reward_std": 6.675696802139282, "rewards/get_chromagram_reward": 0.6133986115455627, "rewards/get_chromagram_reward_std": 0.11093844547867775, "rewards/get_intelligibility_reward": -5.648205804824829, "rewards/get_intelligibility_reward_std": 10.412496376037598, "rewards/get_target_len_reward": -0.02368657514452934, "rewards/get_target_len_reward_std": 0.08231094852089882, "step": 1770 }, { "advantages": 1.353521980718142e-07, "advantages_std": 1.505396318435669, "clip_ratio": 0.0, "completion_length": 84.86666793823242, "epoch": 1.3390977443609022, "grad_norm": 6.90625, "kl": 0.30498138815164566, "learning_rate": 4.330827067669173e-06, "loss": 0.0313, "num_tokens": 54628689.0, "reward": -1.3113522812724114, "reward_std": 6.501615858078003, "rewards/get_chromagram_reward": 0.621231734752655, "rewards/get_chromagram_reward_std": 0.11187814921140671, "rewards/get_intelligibility_reward": -4.536672675609589, "rewards/get_intelligibility_reward_std": 10.441095304489135, "rewards/get_target_len_reward": -0.01861567758023739, "rewards/get_target_len_reward_std": 0.047264106944203375, "step": 1780 }, { "advantages": -3.361453636330225e-07, "advantages_std": 1.6669383287429809, "clip_ratio": 0.0, "completion_length": 86.45059738159179, "epoch": 1.3466165413533835, "grad_norm": 176.0, "kl": 0.39939187467098236, "learning_rate": 4.327067669172933e-06, "loss": 0.0483, "num_tokens": 54934200.0, "reward": -1.4774677753448486, "reward_std": 6.3739540576934814, "rewards/get_chromagram_reward": 0.6193186163902282, "rewards/get_chromagram_reward_std": 0.12128224372863769, "rewards/get_intelligibility_reward": -5.026852607727051, "rewards/get_intelligibility_reward_std": 10.082926750183105, "rewards/get_target_len_reward": -0.024869086034595967, "rewards/get_target_len_reward_std": 0.07926335744559765, "step": 1790 }, { "advantages": -4.2666993449458346e-07, "advantages_std": 1.6381709456443787, "clip_ratio": 0.0, "completion_length": 84.02321548461914, "epoch": 1.3541353383458645, "grad_norm": 8.875, "kl": 352.27155195474626, "learning_rate": 4.323308270676692e-06, "loss": 35.2321, "num_tokens": 55233137.0, "reward": -1.8081068694591522, "reward_std": 7.203808116912842, "rewards/get_chromagram_reward": 0.6083721816539764, "rewards/get_chromagram_reward_std": 0.1257934235036373, "rewards/get_intelligibility_reward": -6.011060285568237, "rewards/get_intelligibility_reward_std": 11.309279632568359, "rewards/get_target_len_reward": -0.021632275870069863, "rewards/get_target_len_reward_std": 0.06019601076841354, "step": 1800 }, { "advantages": -6.941457684206398e-07, "advantages_std": 1.4455040097236633, "clip_ratio": 0.0, "completion_length": 86.79880981445312, "epoch": 1.3616541353383458, "grad_norm": 7.9375, "kl": 0.3444279834628105, "learning_rate": 4.319548872180451e-06, "loss": 0.0375, "num_tokens": 55540102.0, "reward": -1.517743881419301, "reward_std": 7.033014822006225, "rewards/get_chromagram_reward": 0.6199808716773987, "rewards/get_chromagram_reward_std": 0.11536377593874932, "rewards/get_intelligibility_reward": -5.1520576775074005, "rewards/get_intelligibility_reward_std": 11.083409786224365, "rewards/get_target_len_reward": -0.021154607087373732, "rewards/get_target_len_reward_std": 0.05849700104445219, "step": 1810 }, { "advantages": -3.558894064781271e-07, "advantages_std": 1.5300377130508422, "clip_ratio": 0.0, "completion_length": 84.18690643310546, "epoch": 1.369172932330827, "grad_norm": 82.5, "kl": 0.30925544649362563, "learning_rate": 4.315789473684211e-06, "loss": 0.0301, "num_tokens": 55839635.0, "reward": -1.4298014640808105, "reward_std": 6.427416753768921, "rewards/get_chromagram_reward": 0.6322358787059784, "rewards/get_chromagram_reward_std": 0.1157355085015297, "rewards/get_intelligibility_reward": -4.903176188468933, "rewards/get_intelligibility_reward_std": 10.227682876586915, "rewards/get_target_len_reward": -0.018464045226573945, "rewards/get_target_len_reward_std": 0.043058661930263045, "step": 1820 }, { "advantages": 2.5952856859134953e-07, "advantages_std": 1.522819423675537, "clip_ratio": 0.0, "completion_length": 89.50892944335938, "epoch": 1.3766917293233083, "grad_norm": 5.65625, "kl": 0.33326326608657836, "learning_rate": 4.3120300751879705e-06, "loss": 0.0338, "num_tokens": 56152656.0, "reward": -1.698273527622223, "reward_std": 6.879736280441284, "rewards/get_chromagram_reward": 0.6160605251789093, "rewards/get_chromagram_reward_std": 0.11663919240236283, "rewards/get_intelligibility_reward": -5.691361594200134, "rewards/get_intelligibility_reward_std": 10.71043291091919, "rewards/get_target_len_reward": -0.019519127625972032, "rewards/get_target_len_reward_std": 0.04903759118169546, "step": 1830 }, { "advantages": 1.6887984060076634e-07, "advantages_std": 1.681946301460266, "clip_ratio": 0.0, "completion_length": 87.87678680419921, "epoch": 1.3842105263157896, "grad_norm": 368.0, "kl": 0.3765055865049362, "learning_rate": 4.30827067669173e-06, "loss": 0.0432, "num_tokens": 56461831.0, "reward": -1.584184655547142, "reward_std": 6.927801322937012, "rewards/get_chromagram_reward": 0.6217585206031799, "rewards/get_chromagram_reward_std": 0.11466614827513695, "rewards/get_intelligibility_reward": -5.353343296051025, "rewards/get_intelligibility_reward_std": 10.991171741485596, "rewards/get_target_len_reward": -0.020968902111053466, "rewards/get_target_len_reward_std": 0.06113089099526405, "step": 1840 }, { "advantages": -3.616015149532359e-07, "advantages_std": 1.618896448612213, "clip_ratio": 0.0, "completion_length": 89.11667022705078, "epoch": 1.3917293233082706, "grad_norm": 8.5, "kl": 0.3279285907745361, "learning_rate": 4.304511278195489e-06, "loss": 0.0355, "num_tokens": 56775245.0, "reward": -1.461240404844284, "reward_std": 6.541769075393677, "rewards/get_chromagram_reward": 0.6240219950675965, "rewards/get_chromagram_reward_std": 0.11915393397212029, "rewards/get_intelligibility_reward": -4.989465570449829, "rewards/get_intelligibility_reward_std": 10.445559215545654, "rewards/get_target_len_reward": -0.01827723728492856, "rewards/get_target_len_reward_std": 0.049859827384352684, "step": 1850 }, { "advantages": -2.9305610382834857e-08, "advantages_std": 1.6972397446632386, "clip_ratio": 0.0, "completion_length": 86.04404983520507, "epoch": 1.3992481203007519, "grad_norm": 14.0625, "kl": 0.32066617608070375, "learning_rate": 4.3007518796992486e-06, "loss": 0.0313, "num_tokens": 57079914.0, "reward": -1.1806714341044426, "reward_std": 6.98257007598877, "rewards/get_chromagram_reward": 0.6416585624217988, "rewards/get_chromagram_reward_std": 0.11144919693470001, "rewards/get_intelligibility_reward": -4.1637926429510115, "rewards/get_intelligibility_reward_std": 11.391026782989503, "rewards/get_target_len_reward": -0.019880097545683383, "rewards/get_target_len_reward_std": 0.046197598055005076, "step": 1860 }, { "advantages": -2.48849386252914e-07, "advantages_std": 1.6818946957588197, "clip_ratio": 0.0, "completion_length": 87.50714492797852, "epoch": 1.406766917293233, "grad_norm": 8.125, "kl": 0.3438696265220642, "learning_rate": 4.296992481203008e-06, "loss": 0.0383, "num_tokens": 57388572.0, "reward": -1.6199523091316224, "reward_std": 7.197538423538208, "rewards/get_chromagram_reward": 0.6259011566638947, "rewards/get_chromagram_reward_std": 0.10719226896762848, "rewards/get_intelligibility_reward": -5.459428668022156, "rewards/get_intelligibility_reward_std": 11.51907172203064, "rewards/get_target_len_reward": -0.02632923349738121, "rewards/get_target_len_reward_std": 0.08135125394910574, "step": 1870 }, { "advantages": -1.9955137986471527e-07, "advantages_std": 1.5469175934791566, "clip_ratio": 0.0, "completion_length": 84.44464416503907, "epoch": 1.4142857142857144, "grad_norm": 7.09375, "kl": 0.3164676412940025, "learning_rate": 4.293233082706768e-06, "loss": 0.0365, "num_tokens": 57688742.0, "reward": -1.4961084365844726, "reward_std": 6.876049757003784, "rewards/get_chromagram_reward": 0.6287756443023682, "rewards/get_chromagram_reward_std": 0.12433092966675759, "rewards/get_intelligibility_reward": -5.093908071517944, "rewards/get_intelligibility_reward_std": 11.01416654586792, "rewards/get_target_len_reward": -0.02319267261773348, "rewards/get_target_len_reward_std": 0.06343817189335824, "step": 1880 }, { "advantages": 4.1226548148642904e-07, "advantages_std": 1.6911157608032226, "clip_ratio": 0.0, "completion_length": 87.12797775268555, "epoch": 1.4218045112781956, "grad_norm": 7.25, "kl": 0.31820856481790544, "learning_rate": 4.289473684210527e-06, "loss": 0.034, "num_tokens": 57995800.0, "reward": -1.7069338321685792, "reward_std": 6.826904821395874, "rewards/get_chromagram_reward": 0.6184460759162903, "rewards/get_chromagram_reward_std": 0.10959328189492226, "rewards/get_intelligibility_reward": -5.722243046760559, "rewards/get_intelligibility_reward_std": 10.652085971832275, "rewards/get_target_len_reward": -0.017004191037267448, "rewards/get_target_len_reward_std": 0.040973109379410746, "step": 1890 }, { "advantages": -8.443988974704553e-09, "advantages_std": 1.610334575176239, "clip_ratio": 0.0, "completion_length": 87.94821701049804, "epoch": 1.4293233082706767, "grad_norm": 7.0, "kl": 0.38517433404922485, "learning_rate": 4.2857142857142855e-06, "loss": 0.0417, "num_tokens": 58305269.0, "reward": -1.8303340256214142, "reward_std": 7.070497989654541, "rewards/get_chromagram_reward": 0.6330944538116455, "rewards/get_chromagram_reward_std": 0.11860318705439568, "rewards/get_intelligibility_reward": -6.100761485099793, "rewards/get_intelligibility_reward_std": 10.920927333831788, "rewards/get_target_len_reward": -0.02333456613123417, "rewards/get_target_len_reward_std": 0.06448300629854202, "step": 1900 }, { "advantages": -4.054357674476705e-07, "advantages_std": 1.4755270838737489, "clip_ratio": 0.0, "completion_length": 88.48571701049805, "epoch": 1.436842105263158, "grad_norm": 5.1875, "kl": 0.3150394469499588, "learning_rate": 4.281954887218046e-06, "loss": 0.0335, "num_tokens": 58615675.0, "reward": -1.3779967308044434, "reward_std": 6.456488084793091, "rewards/get_chromagram_reward": 0.6358872652053833, "rewards/get_chromagram_reward_std": 0.10376172587275505, "rewards/get_intelligibility_reward": -4.750557589530945, "rewards/get_intelligibility_reward_std": 10.346266222000121, "rewards/get_target_len_reward": -0.019319624826312064, "rewards/get_target_len_reward_std": 0.05323235634714365, "step": 1910 }, { "advantages": 7.972121274235633e-08, "advantages_std": 1.5427042961120605, "clip_ratio": 0.0, "completion_length": 85.0898826599121, "epoch": 1.4443609022556392, "grad_norm": 43.25, "kl": 0.3174961805343628, "learning_rate": 4.278195488721805e-06, "loss": 0.0355, "num_tokens": 58917778.0, "reward": -1.9309344321489335, "reward_std": 7.068848085403443, "rewards/get_chromagram_reward": 0.6075616657733918, "rewards/get_chromagram_reward_std": 0.10997026190161704, "rewards/get_intelligibility_reward": -6.379441344738007, "rewards/get_intelligibility_reward_std": 10.813093662261963, "rewards/get_target_len_reward": -0.0209231847897172, "rewards/get_target_len_reward_std": 0.06707295998930932, "step": 1920 }, { "advantages": 3.0646723061522605e-07, "advantages_std": 1.5311549425125122, "clip_ratio": 0.0, "completion_length": 88.07678527832032, "epoch": 1.4518796992481202, "grad_norm": 6.03125, "kl": 1.02582398802042, "learning_rate": 4.274436090225564e-06, "loss": 0.1081, "num_tokens": 59227243.0, "reward": -1.4925808787345887, "reward_std": 6.5475245952606205, "rewards/get_chromagram_reward": 0.6171232759952545, "rewards/get_chromagram_reward_std": 0.105997933447361, "rewards/get_intelligibility_reward": -5.07579274745658, "rewards/get_intelligibility_reward_std": 10.237911939620972, "rewards/get_target_len_reward": -0.01907276422716677, "rewards/get_target_len_reward_std": 0.062690981477499, "step": 1930 }, { "advantages": -6.929040182512836e-07, "advantages_std": 1.4544852733612061, "clip_ratio": 0.0, "completion_length": 86.28988189697266, "epoch": 1.4593984962406015, "grad_norm": 74.0, "kl": 0.5569351270794869, "learning_rate": 4.270676691729323e-06, "loss": 0.0581, "num_tokens": 59533137.0, "reward": -1.0190489768981934, "reward_std": 6.246702671051025, "rewards/get_chromagram_reward": 0.6327454149723053, "rewards/get_chromagram_reward_std": 0.12573510631918908, "rewards/get_intelligibility_reward": -3.6655944168567656, "rewards/get_intelligibility_reward_std": 10.2553316116333, "rewards/get_target_len_reward": -0.024297917261719704, "rewards/get_target_len_reward_std": 0.06265397630631923, "step": 1940 }, { "advantages": 2.677241838000555e-07, "advantages_std": 1.668886387348175, "clip_ratio": 0.0, "completion_length": 86.92559661865235, "epoch": 1.4669172932330827, "grad_norm": 312.0, "kl": 0.3745987594127655, "learning_rate": 4.266917293233083e-06, "loss": 0.0421, "num_tokens": 59839979.0, "reward": -1.851040291786194, "reward_std": 7.3681464195251465, "rewards/get_chromagram_reward": 0.6237763226032257, "rewards/get_chromagram_reward_std": 0.11604878455400466, "rewards/get_intelligibility_reward": -6.155086278915405, "rewards/get_intelligibility_reward_std": 11.5657377243042, "rewards/get_target_len_reward": -0.021810497622936964, "rewards/get_target_len_reward_std": 0.059417030215263365, "step": 1950 }, { "advantages": -1.0927521287840136e-08, "advantages_std": 1.5496743440628051, "clip_ratio": 0.0, "completion_length": 86.44821548461914, "epoch": 1.474436090225564, "grad_norm": 9.9375, "kl": 0.43885346353054044, "learning_rate": 4.2631578947368425e-06, "loss": 0.0477, "num_tokens": 60144915.0, "reward": -1.4612555474042892, "reward_std": 6.437270307540894, "rewards/get_chromagram_reward": 0.6093687117099762, "rewards/get_chromagram_reward_std": 0.11680521965026855, "rewards/get_intelligibility_reward": -4.9737292408943174, "rewards/get_intelligibility_reward_std": 10.008707952499389, "rewards/get_target_len_reward": -0.019405988790094853, "rewards/get_target_len_reward_std": 0.055537658743560314, "step": 1960 }, { "advantages": -2.731879504835888e-08, "advantages_std": 1.5995185375213623, "clip_ratio": 0.0, "completion_length": 87.5625015258789, "epoch": 1.4819548872180452, "grad_norm": 5.96875, "kl": 2.4066055372357367, "learning_rate": 4.259398496240602e-06, "loss": 0.2514, "num_tokens": 60452724.0, "reward": -1.7014335989952087, "reward_std": 7.449602222442627, "rewards/get_chromagram_reward": 0.6065891265869141, "rewards/get_chromagram_reward_std": 0.12512124553322793, "rewards/get_intelligibility_reward": -5.687872338294983, "rewards/get_intelligibility_reward_std": 11.906324100494384, "rewards/get_target_len_reward": -0.02301737116649747, "rewards/get_target_len_reward_std": 0.08284691572189332, "step": 1970 }, { "advantages": -4.1027865336218385e-07, "advantages_std": 1.5075610518455504, "clip_ratio": 0.0, "completion_length": 86.51428833007813, "epoch": 1.4894736842105263, "grad_norm": 11.0625, "kl": 0.5192355632781982, "learning_rate": 4.255639097744361e-06, "loss": 0.0555, "num_tokens": 60758290.0, "reward": -1.4112955316901208, "reward_std": 6.756988954544068, "rewards/get_chromagram_reward": 0.6147924721240997, "rewards/get_chromagram_reward_std": 0.11576244533061981, "rewards/get_intelligibility_reward": -4.826417958736419, "rewards/get_intelligibility_reward_std": 10.790842008590698, "rewards/get_target_len_reward": -0.022260715905576944, "rewards/get_target_len_reward_std": 0.0649005737155676, "step": 1980 }, { "advantages": -4.5200177467563664e-08, "advantages_std": 1.6201247930526734, "clip_ratio": 0.0, "completion_length": 86.08214416503907, "epoch": 1.4969924812030075, "grad_norm": 7.46875, "kl": 0.3470937877893448, "learning_rate": 4.2518796992481206e-06, "loss": 0.0467, "num_tokens": 61062617.0, "reward": -1.359550093114376, "reward_std": 6.684824514389038, "rewards/get_chromagram_reward": 0.6119980812072754, "rewards/get_chromagram_reward_std": 0.11458643302321433, "rewards/get_intelligibility_reward": -4.662409788370132, "rewards/get_intelligibility_reward_std": 10.784858322143554, "rewards/get_target_len_reward": -0.028238356299698352, "rewards/get_target_len_reward_std": 0.08857602290809155, "step": 1990 }, { "advantages": -6.33299521268782e-08, "advantages_std": 1.6459670662879944, "clip_ratio": 0.0, "completion_length": 86.25833358764649, "epoch": 1.5045112781954888, "grad_norm": 7.0, "kl": 0.32820585519075396, "learning_rate": 4.24812030075188e-06, "loss": 0.0331, "num_tokens": 61367485.0, "reward": -1.6770533800125123, "reward_std": 6.5401702404022215, "rewards/get_chromagram_reward": 0.6255118787288666, "rewards/get_chromagram_reward_std": 0.11729757189750671, "rewards/get_intelligibility_reward": -5.6358928203582765, "rewards/get_intelligibility_reward_std": 10.16779546737671, "rewards/get_target_len_reward": -0.02077860590070486, "rewards/get_target_len_reward_std": 0.05197990909218788, "step": 2000 }, { "advantages": 2.6673079958072774e-07, "advantages_std": 1.6713383674621582, "clip_ratio": 0.0, "completion_length": 90.85238342285156, "epoch": 1.5120300751879698, "grad_norm": 8.9375, "kl": 0.5624005109071731, "learning_rate": 4.244360902255639e-06, "loss": 0.0571, "num_tokens": 61685194.0, "reward": -1.4429410874843598, "reward_std": 6.481061124801636, "rewards/get_chromagram_reward": 0.6198509991168976, "rewards/get_chromagram_reward_std": 0.1047507330775261, "rewards/get_intelligibility_reward": -4.931775975227356, "rewards/get_intelligibility_reward_std": 10.304393005371093, "rewards/get_target_len_reward": -0.01689812494441867, "rewards/get_target_len_reward_std": 0.046997369080781934, "step": 2010 }, { "advantages": -1.6391277597449515e-07, "advantages_std": 1.5733685612678527, "clip_ratio": 0.0, "completion_length": 86.6053581237793, "epoch": 1.519548872180451, "grad_norm": 6.625, "kl": 0.31895223557949065, "learning_rate": 4.240601503759399e-06, "loss": 0.0354, "num_tokens": 61991299.0, "reward": -1.4001641318202018, "reward_std": 6.923908948898315, "rewards/get_chromagram_reward": 0.6155903279781342, "rewards/get_chromagram_reward_std": 0.1192019023001194, "rewards/get_intelligibility_reward": -4.797593307495117, "rewards/get_intelligibility_reward_std": 11.075876808166504, "rewards/get_target_len_reward": -0.018489096034318208, "rewards/get_target_len_reward_std": 0.05505495984107256, "step": 2020 }, { "advantages": 1.416852043689687e-07, "advantages_std": 1.5786041617393494, "clip_ratio": 0.0, "completion_length": 88.1529769897461, "epoch": 1.5270676691729324, "grad_norm": 6.15625, "kl": 0.2888381630182266, "learning_rate": 4.236842105263158e-06, "loss": 0.0373, "num_tokens": 62301897.0, "reward": -1.2167622834444045, "reward_std": 6.394130229949951, "rewards/get_chromagram_reward": 0.6328611254692078, "rewards/get_chromagram_reward_std": 0.11856895983219147, "rewards/get_intelligibility_reward": -4.259195864200592, "rewards/get_intelligibility_reward_std": 10.343937587738036, "rewards/get_target_len_reward": -0.02395202973857522, "rewards/get_target_len_reward_std": 0.07318154443055391, "step": 2030 }, { "advantages": 1.4851491414447082e-07, "advantages_std": 1.6340704083442688, "clip_ratio": 0.0, "completion_length": 85.79464416503906, "epoch": 1.5345864661654134, "grad_norm": 7.875, "kl": 0.3205702111124992, "learning_rate": 4.233082706766918e-06, "loss": 0.0357, "num_tokens": 62606044.0, "reward": -1.099614891409874, "reward_std": 6.524851655960083, "rewards/get_chromagram_reward": 0.6311138451099396, "rewards/get_chromagram_reward_std": 0.11696632355451583, "rewards/get_intelligibility_reward": -3.907797175645828, "rewards/get_intelligibility_reward_std": 10.681060409545898, "rewards/get_target_len_reward": -0.022161136101931333, "rewards/get_target_len_reward_std": 0.0629130657762289, "step": 2040 }, { "advantages": 8.774300653158206e-07, "advantages_std": 1.6362772941589356, "clip_ratio": 0.0, "completion_length": 82.92381134033204, "epoch": 1.5421052631578949, "grad_norm": 7.375, "kl": 0.43492402136325836, "learning_rate": 4.229323308270677e-06, "loss": 0.048, "num_tokens": 62901928.0, "reward": -1.770355224609375, "reward_std": 7.1313379287719725, "rewards/get_chromagram_reward": 0.6375100195407868, "rewards/get_chromagram_reward_std": 0.12280775308609009, "rewards/get_intelligibility_reward": -5.923044615983963, "rewards/get_intelligibility_reward_std": 11.02694330215454, "rewards/get_target_len_reward": -0.025530668999999763, "rewards/get_target_len_reward_std": 0.07188544794917107, "step": 2050 }, { "advantages": -3.94135725656497e-07, "advantages_std": 1.5501110434532166, "clip_ratio": 0.0, "completion_length": 87.2125015258789, "epoch": 1.549624060150376, "grad_norm": 5.53125, "kl": 0.29535968899726867, "learning_rate": 4.225563909774436e-06, "loss": 0.0318, "num_tokens": 63209556.0, "reward": -1.536487441137433, "reward_std": 6.634403896331787, "rewards/get_chromagram_reward": 0.6187046766281128, "rewards/get_chromagram_reward_std": 0.1150432951748371, "rewards/get_intelligibility_reward": -5.209327453374863, "rewards/get_intelligibility_reward_std": 10.373435020446777, "rewards/get_target_len_reward": -0.018839253485202788, "rewards/get_target_len_reward_std": 0.060705633461475374, "step": 2060 }, { "advantages": -7.376074186993265e-08, "advantages_std": 1.679667603969574, "clip_ratio": 0.0, "completion_length": 88.85000228881836, "epoch": 1.5571428571428572, "grad_norm": 7.9375, "kl": 0.31674362570047376, "learning_rate": 4.221804511278196e-06, "loss": 0.0355, "num_tokens": 63521952.0, "reward": -1.2775519102811814, "reward_std": 6.381492376327515, "rewards/get_chromagram_reward": 0.6168266594409942, "rewards/get_chromagram_reward_std": 0.10888128727674484, "rewards/get_intelligibility_reward": -4.430067479610443, "rewards/get_intelligibility_reward_std": 10.242457675933839, "rewards/get_target_len_reward": -0.019414818566292524, "rewards/get_target_len_reward_std": 0.05602561179548502, "step": 2070 }, { "advantages": -3.116826341909018e-07, "advantages_std": 1.5748518228530883, "clip_ratio": 0.0, "completion_length": 86.72797622680665, "epoch": 1.5646616541353384, "grad_norm": 5.71875, "kl": 0.37901861518621444, "learning_rate": 4.218045112781956e-06, "loss": 0.0437, "num_tokens": 63828336.0, "reward": -1.6440318048000335, "reward_std": 7.379472351074218, "rewards/get_chromagram_reward": 0.6186399698257447, "rewards/get_chromagram_reward_std": 0.11654711589217186, "rewards/get_intelligibility_reward": -5.527314138412476, "rewards/get_intelligibility_reward_std": 11.792286777496338, "rewards/get_target_len_reward": -0.02342082476243377, "rewards/get_target_len_reward_std": 0.07802388649433852, "step": 2080 }, { "advantages": -3.8345656037108713e-07, "advantages_std": 1.5448354840278626, "clip_ratio": 0.0, "completion_length": 86.90119247436523, "epoch": 1.5721804511278195, "grad_norm": 452.0, "kl": 0.4000831454992294, "learning_rate": 4.2142857142857145e-06, "loss": 0.044, "num_tokens": 64135276.0, "reward": -1.2782334357500076, "reward_std": 6.2945537090301515, "rewards/get_chromagram_reward": 0.6352133572101593, "rewards/get_chromagram_reward_std": 0.10771576762199402, "rewards/get_intelligibility_reward": -4.444746363162994, "rewards/get_intelligibility_reward_std": 10.07327060699463, "rewards/get_target_len_reward": -0.0251670790836215, "rewards/get_target_len_reward_std": 0.07453116215765476, "step": 2090 }, { "advantages": 1.3907754237152403e-08, "advantages_std": 1.6230470061302185, "clip_ratio": 0.0, "completion_length": 87.97202606201172, "epoch": 1.5796992481203007, "grad_norm": 31.125, "kl": 0.39640209525823594, "learning_rate": 4.210526315789474e-06, "loss": 0.0425, "num_tokens": 64444876.0, "reward": -1.6344273149967194, "reward_std": 6.510859823226928, "rewards/get_chromagram_reward": 0.6180970013141632, "rewards/get_chromagram_reward_std": 0.12092170044779778, "rewards/get_intelligibility_reward": -5.5024089336395265, "rewards/get_intelligibility_reward_std": 10.168234968185425, "rewards/get_target_len_reward": -0.018969781789928676, "rewards/get_target_len_reward_std": 0.04659441541880369, "step": 2100 }, { "advantages": -1.7695124654437677e-08, "advantages_std": 1.5527965784072877, "clip_ratio": 0.0, "completion_length": 86.90178756713867, "epoch": 1.587218045112782, "grad_norm": 6.46875, "kl": 0.34301224946975706, "learning_rate": 4.206766917293234e-06, "loss": 0.0396, "num_tokens": 64751657.0, "reward": -1.6146832585334778, "reward_std": 7.331169462203979, "rewards/get_chromagram_reward": 0.6357932686805725, "rewards/get_chromagram_reward_std": 0.1203016348183155, "rewards/get_intelligibility_reward": -5.459855031967163, "rewards/get_intelligibility_reward_std": 11.720126533508301, "rewards/get_target_len_reward": -0.019987638201564552, "rewards/get_target_len_reward_std": 0.058079042471945286, "step": 2110 }, { "advantages": 6.993611819439139e-07, "advantages_std": 1.6053866624832154, "clip_ratio": 0.0, "completion_length": 85.5982162475586, "epoch": 1.594736842105263, "grad_norm": 8.875, "kl": 0.6178436279296875, "learning_rate": 4.2030075187969926e-06, "loss": 0.0617, "num_tokens": 65054328.0, "reward": -1.8031591415405273, "reward_std": 6.9044517993927, "rewards/get_chromagram_reward": 0.6330244064331054, "rewards/get_chromagram_reward_std": 0.11358195766806603, "rewards/get_intelligibility_reward": -6.023799133300781, "rewards/get_intelligibility_reward_std": 10.727534580230714, "rewards/get_target_len_reward": -0.018702456075698138, "rewards/get_target_len_reward_std": 0.04642644617706537, "step": 2120 }, { "advantages": -3.3875307146047364e-07, "advantages_std": 1.5467365503311157, "clip_ratio": 0.0, "completion_length": 87.56904983520508, "epoch": 1.6022556390977445, "grad_norm": 5.3125, "kl": 0.42215982377529143, "learning_rate": 4.199248120300752e-06, "loss": 0.049, "num_tokens": 65363307.0, "reward": -1.4519341588020325, "reward_std": 6.497218227386474, "rewards/get_chromagram_reward": 0.6397540211677551, "rewards/get_chromagram_reward_std": 0.12757838517427444, "rewards/get_intelligibility_reward": -4.968119716644287, "rewards/get_intelligibility_reward_std": 10.35838565826416, "rewards/get_target_len_reward": -0.027436566725373267, "rewards/get_target_len_reward_std": 0.07567532435059547, "step": 2130 }, { "advantages": 4.072984125969015e-07, "advantages_std": 1.5269123673439027, "clip_ratio": 0.0, "completion_length": 90.61607437133789, "epoch": 1.6097744360902255, "grad_norm": 8.375, "kl": 0.33207988142967226, "learning_rate": 4.195488721804512e-06, "loss": 0.0385, "num_tokens": 65679406.0, "reward": -1.6267223179340362, "reward_std": 6.979967403411865, "rewards/get_chromagram_reward": 0.6148672103881836, "rewards/get_chromagram_reward_std": 0.1095418579876423, "rewards/get_intelligibility_reward": -5.475715672969818, "rewards/get_intelligibility_reward_std": 10.973806667327882, "rewards/get_target_len_reward": -0.01931815128773451, "rewards/get_target_len_reward_std": 0.06183572188019752, "step": 2140 }, { "advantages": 5.145867760347756e-07, "advantages_std": 1.6080675721168518, "clip_ratio": 0.0, "completion_length": 85.91190643310547, "epoch": 1.6172932330827068, "grad_norm": 5.4375, "kl": 0.4411650985479355, "learning_rate": 4.1917293233082715e-06, "loss": 0.0475, "num_tokens": 65983772.0, "reward": -1.6623242631554604, "reward_std": 6.805143022537232, "rewards/get_chromagram_reward": 0.6309055805206298, "rewards/get_chromagram_reward_std": 0.12571141943335534, "rewards/get_intelligibility_reward": -5.5929530501365665, "rewards/get_intelligibility_reward_std": 10.54892177581787, "rewards/get_target_len_reward": -0.02492492999881506, "rewards/get_target_len_reward_std": 0.06787819992750883, "step": 2150 }, { "advantages": 6.680686985838235e-08, "advantages_std": 1.5221306800842285, "clip_ratio": 0.0, "completion_length": 85.75952529907227, "epoch": 1.624812030075188, "grad_norm": 5.25, "kl": 0.48574246764183043, "learning_rate": 4.18796992481203e-06, "loss": 0.0516, "num_tokens": 66286686.0, "reward": -1.631846097111702, "reward_std": 6.67475266456604, "rewards/get_chromagram_reward": 0.6112766861915588, "rewards/get_chromagram_reward_std": 0.10239461362361908, "rewards/get_intelligibility_reward": -5.489213287830353, "rewards/get_intelligibility_reward_std": 10.394185543060303, "rewards/get_target_len_reward": -0.017601419892162084, "rewards/get_target_len_reward_std": 0.0520940450951457, "step": 2160 }, { "advantages": 1.487632751207002e-07, "advantages_std": 1.5328869938850402, "clip_ratio": 0.0, "completion_length": 84.20238342285157, "epoch": 1.632330827067669, "grad_norm": 5.5, "kl": 0.33671319782733916, "learning_rate": 4.18421052631579e-06, "loss": 0.033, "num_tokens": 66586126.0, "reward": -1.4015221863985061, "reward_std": 6.84256010055542, "rewards/get_chromagram_reward": 0.6251766622066498, "rewards/get_chromagram_reward_std": 0.1158628709614277, "rewards/get_intelligibility_reward": -4.806045126914978, "rewards/get_intelligibility_reward_std": 10.973923015594483, "rewards/get_target_len_reward": -0.023697828501462938, "rewards/get_target_len_reward_std": 0.0575832212343812, "step": 2170 }, { "advantages": -1.763305235158441e-08, "advantages_std": 1.6057313919067382, "clip_ratio": 0.0, "completion_length": 86.18274002075195, "epoch": 1.6398496240601503, "grad_norm": 7.28125, "kl": 0.2764908343553543, "learning_rate": 4.18045112781955e-06, "loss": 0.029, "num_tokens": 66890272.0, "reward": -1.1156673699617385, "reward_std": 6.271809482574463, "rewards/get_chromagram_reward": 0.6292023539543152, "rewards/get_chromagram_reward_std": 0.11582249924540519, "rewards/get_intelligibility_reward": -3.95166752114892, "rewards/get_intelligibility_reward_std": 10.14079508781433, "rewards/get_target_len_reward": -0.0245367381721735, "rewards/get_target_len_reward_std": 0.06388699542731047, "step": 2180 }, { "advantages": -1.6887986475921933e-08, "advantages_std": 1.6172147035598754, "clip_ratio": 0.0, "completion_length": 90.51845474243164, "epoch": 1.6473684210526316, "grad_norm": 30.5, "kl": 0.37103464752435683, "learning_rate": 4.176691729323308e-06, "loss": 0.041, "num_tokens": 67206237.0, "reward": -1.6112347409129142, "reward_std": 6.924424266815185, "rewards/get_chromagram_reward": 0.6163883149623871, "rewards/get_chromagram_reward_std": 0.11688357889652252, "rewards/get_intelligibility_reward": -5.430252596735954, "rewards/get_intelligibility_reward_std": 10.924818515777588, "rewards/get_target_len_reward": -0.019839623104780914, "rewards/get_target_len_reward_std": 0.05312856025993824, "step": 2190 }, { "advantages": 2.2376577533123054e-07, "advantages_std": 1.6153385758399963, "clip_ratio": 0.0, "completion_length": 84.91428680419922, "epoch": 1.6548872180451126, "grad_norm": 6.5625, "kl": 0.32724575996398925, "learning_rate": 4.172932330827068e-06, "loss": 0.0363, "num_tokens": 67507543.0, "reward": -1.7804813146591187, "reward_std": 6.974139785766601, "rewards/get_chromagram_reward": 0.6214574337005615, "rewards/get_chromagram_reward_std": 0.1169826865196228, "rewards/get_intelligibility_reward": -5.942538261413574, "rewards/get_intelligibility_reward_std": 10.9132661819458, "rewards/get_target_len_reward": -0.020362868160009383, "rewards/get_target_len_reward_std": 0.06523161455988884, "step": 2200 }, { "advantages": 4.502634368463987e-07, "advantages_std": 1.508051860332489, "clip_ratio": 0.0, "completion_length": 87.18393020629883, "epoch": 1.662406015037594, "grad_norm": 6.21875, "kl": 0.3858113706111908, "learning_rate": 4.169172932330827e-06, "loss": 0.0411, "num_tokens": 67815097.0, "reward": -1.484154748916626, "reward_std": 6.593449640274048, "rewards/get_chromagram_reward": 0.6221783816814422, "rewards/get_chromagram_reward_std": 0.11762973815202712, "rewards/get_intelligibility_reward": -5.0492493391036986, "rewards/get_intelligibility_reward_std": 10.478116226196288, "rewards/get_target_len_reward": -0.02539309123530984, "rewards/get_target_len_reward_std": 0.06811941638588906, "step": 2210 }, { "advantages": -5.270044144189967e-07, "advantages_std": 1.5974279642105103, "clip_ratio": 0.0, "completion_length": 84.21190643310547, "epoch": 1.6699248120300751, "grad_norm": 11.0, "kl": 0.3089997261762619, "learning_rate": 4.165413533834587e-06, "loss": 0.0315, "num_tokens": 68113590.0, "reward": -1.5647227585315704, "reward_std": 6.139684581756592, "rewards/get_chromagram_reward": 0.6186821639537812, "rewards/get_chromagram_reward_std": 0.11593869104981422, "rewards/get_intelligibility_reward": -5.292714548110962, "rewards/get_intelligibility_reward_std": 9.444132328033447, "rewards/get_target_len_reward": -0.020135853625833987, "rewards/get_target_len_reward_std": 0.052642738446593285, "step": 2220 }, { "advantages": 6.243586710752425e-07, "advantages_std": 1.6422517776489258, "clip_ratio": 0.0, "completion_length": 87.30714416503906, "epoch": 1.6774436090225564, "grad_norm": 7.8125, "kl": 0.40762856900691985, "learning_rate": 4.161654135338346e-06, "loss": 0.0448, "num_tokens": 68421251.0, "reward": -1.326597476005554, "reward_std": 6.552673864364624, "rewards/get_chromagram_reward": 0.6257815420627594, "rewards/get_chromagram_reward_std": 0.11139634773135185, "rewards/get_intelligibility_reward": -4.584523618221283, "rewards/get_intelligibility_reward_std": 10.49343433380127, "rewards/get_target_len_reward": -0.021050124522298576, "rewards/get_target_len_reward_std": 0.05365060679614544, "step": 2230 }, { "advantages": -6.87937045995568e-08, "advantages_std": 1.6293469667434692, "clip_ratio": 0.0, "completion_length": 84.20000228881835, "epoch": 1.6849624060150377, "grad_norm": 6.59375, "kl": 0.34043067693710327, "learning_rate": 4.157894736842106e-06, "loss": 0.0404, "num_tokens": 68720116.0, "reward": -1.474801480770111, "reward_std": 6.295886039733887, "rewards/get_chromagram_reward": 0.617020720243454, "rewards/get_chromagram_reward_std": 0.11099176555871963, "rewards/get_intelligibility_reward": -5.020518136024475, "rewards/get_intelligibility_reward_std": 9.897148609161377, "rewards/get_target_len_reward": -0.020906874537467958, "rewards/get_target_len_reward_std": 0.05231982320547104, "step": 2240 }, { "advantages": 3.166496867379465e-07, "advantages_std": 1.5133726239204406, "clip_ratio": 0.0, "completion_length": 89.67440643310547, "epoch": 1.6924812030075187, "grad_norm": 6.0625, "kl": 0.4981168583035469, "learning_rate": 4.1541353383458646e-06, "loss": 0.0493, "num_tokens": 69034003.0, "reward": -1.402723914384842, "reward_std": 6.56936559677124, "rewards/get_chromagram_reward": 0.6300440192222595, "rewards/get_chromagram_reward_std": 0.11845290139317513, "rewards/get_intelligibility_reward": -4.820070219039917, "rewards/get_intelligibility_reward_std": 10.512784290313721, "rewards/get_target_len_reward": -0.018145297607406973, "rewards/get_target_len_reward_std": 0.03970406278967857, "step": 2250 }, { "advantages": -1.73598525776697e-07, "advantages_std": 1.5843783140182495, "clip_ratio": 0.0, "completion_length": 82.07440643310547, "epoch": 1.7, "grad_norm": 7.59375, "kl": 0.3415731221437454, "learning_rate": 4.150375939849624e-06, "loss": 0.037, "num_tokens": 69327549.0, "reward": -1.6767581880092621, "reward_std": 6.931660556793213, "rewards/get_chromagram_reward": 0.6320427298545838, "rewards/get_chromagram_reward_std": 0.119264155626297, "rewards/get_intelligibility_reward": -5.639317321777344, "rewards/get_intelligibility_reward_std": 10.879788684844971, "rewards/get_target_len_reward": -0.022999674873426558, "rewards/get_target_len_reward_std": 0.05821425933390856, "step": 2260 }, { "advantages": 4.4641398293521207e-07, "advantages_std": 1.5655923247337342, "clip_ratio": 0.0, "completion_length": 89.11309661865235, "epoch": 1.7075187969924812, "grad_norm": 6.8125, "kl": 0.313992902636528, "learning_rate": 4.146616541353384e-06, "loss": 0.0304, "num_tokens": 69640295.0, "reward": -1.3783277034759522, "reward_std": 6.3931385517120365, "rewards/get_chromagram_reward": 0.6265924870967865, "rewards/get_chromagram_reward_std": 0.11074900850653649, "rewards/get_intelligibility_reward": -4.743269371986389, "rewards/get_intelligibility_reward_std": 10.24311022758484, "rewards/get_target_len_reward": -0.018305783160030842, "rewards/get_target_len_reward_std": 0.0456329807639122, "step": 2270 }, { "advantages": 1.5174350380675606e-07, "advantages_std": 1.519785189628601, "clip_ratio": 0.0, "completion_length": 85.77321548461914, "epoch": 1.7150375939849622, "grad_norm": 8.9375, "kl": 0.32146727442741396, "learning_rate": 4.1428571428571435e-06, "loss": 0.0365, "num_tokens": 69944689.0, "reward": -1.4858910858631134, "reward_std": 6.917770576477051, "rewards/get_chromagram_reward": 0.6075939893722534, "rewards/get_chromagram_reward_std": 0.11109072864055633, "rewards/get_intelligibility_reward": -5.044983577728272, "rewards/get_intelligibility_reward_std": 11.057494640350342, "rewards/get_target_len_reward": -0.0202834433875978, "rewards/get_target_len_reward_std": 0.0654812516644597, "step": 2280 }, { "advantages": 4.7360858133060903e-07, "advantages_std": 1.5867118835449219, "clip_ratio": 0.0, "completion_length": 84.55357360839844, "epoch": 1.7225563909774437, "grad_norm": 6.8125, "kl": 0.2956387400627136, "learning_rate": 4.139097744360902e-06, "loss": 0.0298, "num_tokens": 70244751.0, "reward": -1.6686067134141922, "reward_std": 6.578378915786743, "rewards/get_chromagram_reward": 0.6141305983066558, "rewards/get_chromagram_reward_std": 0.11698191240429878, "rewards/get_intelligibility_reward": -5.6026394963264465, "rewards/get_intelligibility_reward_std": 10.14880304336548, "rewards/get_target_len_reward": -0.017310941684991123, "rewards/get_target_len_reward_std": 0.044035492651164534, "step": 2290 }, { "advantages": -5.366901731918006e-07, "advantages_std": 1.5838298201560974, "clip_ratio": 0.0, "completion_length": 90.17262115478516, "epoch": 1.7300751879699248, "grad_norm": 8.75, "kl": 0.3221535414457321, "learning_rate": 4.135338345864662e-06, "loss": 0.0324, "num_tokens": 70561382.0, "reward": -0.8596220046281815, "reward_std": 6.4721067428588865, "rewards/get_chromagram_reward": 0.6353691577911377, "rewards/get_chromagram_reward_std": 0.11232606545090676, "rewards/get_intelligibility_reward": -3.1946552455425263, "rewards/get_intelligibility_reward_std": 10.751107311248779, "rewards/get_target_len_reward": -0.019579828809946777, "rewards/get_target_len_reward_std": 0.04634752385318279, "step": 2300 }, { "advantages": 3.327924957829964e-08, "advantages_std": 1.629914653301239, "clip_ratio": 0.0, "completion_length": 85.86904907226562, "epoch": 1.737593984962406, "grad_norm": 6.21875, "kl": 0.39486820101737974, "learning_rate": 4.1315789473684216e-06, "loss": 0.0454, "num_tokens": 70864705.0, "reward": -1.6306380838155747, "reward_std": 6.833079147338867, "rewards/get_chromagram_reward": 0.6226745009422302, "rewards/get_chromagram_reward_std": 0.12161731049418449, "rewards/get_intelligibility_reward": -5.491730678081512, "rewards/get_intelligibility_reward_std": 10.75001802444458, "rewards/get_target_len_reward": -0.022857668250799178, "rewards/get_target_len_reward_std": 0.06112685557454824, "step": 2310 }, { "advantages": -2.4338557835790198e-08, "advantages_std": 1.6277719616889954, "clip_ratio": 0.0, "completion_length": 87.22678756713867, "epoch": 1.7451127819548873, "grad_norm": 6.5, "kl": 0.3618259161710739, "learning_rate": 4.12781954887218e-06, "loss": 0.0366, "num_tokens": 71172976.0, "reward": -1.1786362126469612, "reward_std": 6.607575082778931, "rewards/get_chromagram_reward": 0.624769514799118, "rewards/get_chromagram_reward_std": 0.12241154238581657, "rewards/get_intelligibility_reward": -4.138809217512607, "rewards/get_intelligibility_reward_std": 10.685311555862427, "rewards/get_target_len_reward": -0.021868737787008284, "rewards/get_target_len_reward_std": 0.04962801802903414, "step": 2320 }, { "advantages": -1.096477070916535e-07, "advantages_std": 1.7360329389572144, "clip_ratio": 0.0, "completion_length": 85.69107208251953, "epoch": 1.7526315789473683, "grad_norm": 6.1875, "kl": 1.3730736181139946, "learning_rate": 4.12406015037594e-06, "loss": 0.1449, "num_tokens": 71476310.0, "reward": -1.4866398930549622, "reward_std": 6.556139183044434, "rewards/get_chromagram_reward": 0.6211271047592163, "rewards/get_chromagram_reward_std": 0.12372498363256454, "rewards/get_intelligibility_reward": -5.057052373886108, "rewards/get_intelligibility_reward_std": 10.40001630783081, "rewards/get_target_len_reward": -0.023994168732315302, "rewards/get_target_len_reward_std": 0.065902035869658, "step": 2330 }, { "advantages": -7.177393968049728e-08, "advantages_std": 1.5481481909751893, "clip_ratio": 0.0, "completion_length": 84.09226303100586, "epoch": 1.7601503759398496, "grad_norm": 6.59375, "kl": 0.35464718043804166, "learning_rate": 4.1203007518797e-06, "loss": 0.0384, "num_tokens": 71774878.0, "reward": -1.677583646774292, "reward_std": 6.635879039764404, "rewards/get_chromagram_reward": 0.6253755390644073, "rewards/get_chromagram_reward_std": 0.11250732392072678, "rewards/get_intelligibility_reward": -5.636243104934692, "rewards/get_intelligibility_reward_std": 10.37077875137329, "rewards/get_target_len_reward": -0.021883081085979937, "rewards/get_target_len_reward_std": 0.0651377398520708, "step": 2340 }, { "advantages": 5.361934768188803e-07, "advantages_std": 1.6343234777450562, "clip_ratio": 0.0, "completion_length": 85.13214492797852, "epoch": 1.7676691729323308, "grad_norm": 5.5, "kl": 0.3477922797203064, "learning_rate": 4.116541353383459e-06, "loss": 0.0361, "num_tokens": 72076568.0, "reward": -1.9456369400024414, "reward_std": 6.959947204589843, "rewards/get_chromagram_reward": 0.6237083613872528, "rewards/get_chromagram_reward_std": 0.11222967356443406, "rewards/get_intelligibility_reward": -6.439011716842652, "rewards/get_intelligibility_reward_std": 10.623391437530518, "rewards/get_target_len_reward": -0.02160733174532652, "rewards/get_target_len_reward_std": 0.05893028676509857, "step": 2350 }, { "advantages": 2.334515258439751e-07, "advantages_std": 1.6544344305992127, "clip_ratio": 0.0, "completion_length": 86.15476303100586, "epoch": 1.7751879699248119, "grad_norm": 9.3125, "kl": 0.3129129856824875, "learning_rate": 4.112781954887218e-06, "loss": 0.0418, "num_tokens": 72380577.0, "reward": -1.5635125003755093, "reward_std": 6.368973064422607, "rewards/get_chromagram_reward": 0.6229640424251557, "rewards/get_chromagram_reward_std": 0.11633650735020637, "rewards/get_intelligibility_reward": -5.289789938926697, "rewards/get_intelligibility_reward_std": 9.956442642211915, "rewards/get_target_len_reward": -0.023711246997117998, "rewards/get_target_len_reward_std": 0.07678976822644472, "step": 2360 }, { "advantages": -4.721184708955661e-07, "advantages_std": 1.5377548336982727, "clip_ratio": 0.0, "completion_length": 89.5404769897461, "epoch": 1.7827067669172934, "grad_norm": 8.0, "kl": 0.2969405084848404, "learning_rate": 4.109022556390978e-06, "loss": 0.0343, "num_tokens": 72694892.0, "reward": -1.2615583300590516, "reward_std": 7.277207279205323, "rewards/get_chromagram_reward": 0.6268039345741272, "rewards/get_chromagram_reward_std": 0.11108435578644275, "rewards/get_intelligibility_reward": -4.389381170272827, "rewards/get_intelligibility_reward_std": 11.901940822601318, "rewards/get_target_len_reward": -0.022097578458487987, "rewards/get_target_len_reward_std": 0.06381280329078436, "step": 2370 }, { "advantages": 6.109476515803181e-08, "advantages_std": 1.6187481880187988, "clip_ratio": 0.0, "completion_length": 87.50178680419921, "epoch": 1.7902255639097744, "grad_norm": 7.5625, "kl": 0.4066790774464607, "learning_rate": 4.105263157894737e-06, "loss": 0.0417, "num_tokens": 73002358.0, "reward": -1.664416539669037, "reward_std": 7.076716756820678, "rewards/get_chromagram_reward": 0.6175857126712799, "rewards/get_chromagram_reward_std": 0.1235451377928257, "rewards/get_intelligibility_reward": -5.58691291809082, "rewards/get_intelligibility_reward_std": 11.194854640960694, "rewards/get_target_len_reward": -0.023922096379101275, "rewards/get_target_len_reward_std": 0.0660831168293953, "step": 2380 }, { "advantages": -6.780029102593232e-08, "advantages_std": 1.5943035006523132, "clip_ratio": 0.0, "completion_length": 88.36428756713867, "epoch": 1.7977443609022556, "grad_norm": 5.78125, "kl": 0.3713136985898018, "learning_rate": 4.101503759398496e-06, "loss": 0.0412, "num_tokens": 73313327.0, "reward": -1.5908078402280807, "reward_std": 7.35349440574646, "rewards/get_chromagram_reward": 0.6217695772647858, "rewards/get_chromagram_reward_std": 0.1202425293624401, "rewards/get_intelligibility_reward": -5.372699975967407, "rewards/get_intelligibility_reward_std": 11.718086051940919, "rewards/get_target_len_reward": -0.021492914762347937, "rewards/get_target_len_reward_std": 0.06425060071051121, "step": 2390 }, { "advantages": 1.4280279980738442e-07, "advantages_std": 1.6416914343833924, "clip_ratio": 0.0, "completion_length": 86.24524002075195, "epoch": 1.805263157894737, "grad_norm": 6.6875, "kl": 0.3477358803153038, "learning_rate": 4.097744360902256e-06, "loss": 0.0377, "num_tokens": 73618532.0, "reward": -1.3100897327065468, "reward_std": 6.019156885147095, "rewards/get_chromagram_reward": 0.6225740551948548, "rewards/get_chromagram_reward_std": 0.11594133004546166, "rewards/get_intelligibility_reward": -4.53080498832278, "rewards/get_intelligibility_reward_std": 9.37488775253296, "rewards/get_target_len_reward": -0.02203790657222271, "rewards/get_target_len_reward_std": 0.05875368323177099, "step": 2400 }, { "advantages": 1.4007092179468829e-07, "advantages_std": 1.6318424463272094, "clip_ratio": 0.0, "completion_length": 86.2976203918457, "epoch": 1.812781954887218, "grad_norm": 95.0, "kl": 0.3875324487686157, "learning_rate": 4.0939849624060155e-06, "loss": 0.0403, "num_tokens": 73923800.0, "reward": -1.3757910546846688, "reward_std": 6.7682945728302, "rewards/get_chromagram_reward": 0.6181758105754852, "rewards/get_chromagram_reward_std": 0.11742054596543312, "rewards/get_intelligibility_reward": -4.72544179558754, "rewards/get_intelligibility_reward_std": 10.851413440704345, "rewards/get_target_len_reward": -0.020106809958815575, "rewards/get_target_len_reward_std": 0.05670954566448927, "step": 2410 }, { "advantages": 4.967053657267684e-10, "advantages_std": 1.600301456451416, "clip_ratio": 0.0, "completion_length": 84.56428680419921, "epoch": 1.8203007518796992, "grad_norm": 6.6875, "kl": 0.31402217745780947, "learning_rate": 4.090225563909775e-06, "loss": 0.0425, "num_tokens": 74223740.0, "reward": -1.2864448690786958, "reward_std": 6.1337813377380375, "rewards/get_chromagram_reward": 0.6213976562023162, "rewards/get_chromagram_reward_std": 0.11770420670509338, "rewards/get_intelligibility_reward": -4.453081881999969, "rewards/get_intelligibility_reward_std": 9.727893400192261, "rewards/get_target_len_reward": -0.027650110237300397, "rewards/get_target_len_reward_std": 0.09467000924050809, "step": 2420 }, { "advantages": 4.803140981834986e-07, "advantages_std": 1.5512767672538756, "clip_ratio": 0.0, "completion_length": 85.27262115478516, "epoch": 1.8278195488721805, "grad_norm": 6.5625, "kl": 0.3004987627267838, "learning_rate": 4.086466165413534e-06, "loss": 0.0333, "num_tokens": 74526273.0, "reward": -1.2122848182916641, "reward_std": 6.635207271575927, "rewards/get_chromagram_reward": 0.6173213303089142, "rewards/get_chromagram_reward_std": 0.10879912301898002, "rewards/get_intelligibility_reward": -4.234836637973785, "rewards/get_intelligibility_reward_std": 10.808967781066894, "rewards/get_target_len_reward": -0.019338873215019703, "rewards/get_target_len_reward_std": 0.06528621017932892, "step": 2430 }, { "advantages": 8.18322206441735e-08, "advantages_std": 1.5040780901908875, "clip_ratio": 0.0, "completion_length": 86.69940567016602, "epoch": 1.8353383458646615, "grad_norm": 9.125, "kl": 0.32429891228675845, "learning_rate": 4.0827067669172936e-06, "loss": 0.037, "num_tokens": 74831879.0, "reward": -1.7913677096366882, "reward_std": 6.463911771774292, "rewards/get_chromagram_reward": 0.6092451333999633, "rewards/get_chromagram_reward_std": 0.11632038056850433, "rewards/get_intelligibility_reward": -5.965150308609009, "rewards/get_intelligibility_reward_std": 9.741594982147216, "rewards/get_target_len_reward": -0.01819766601547599, "rewards/get_target_len_reward_std": 0.05161431562155485, "step": 2440 }, { "advantages": -1.6440948584772742e-07, "advantages_std": 1.6510832905769348, "clip_ratio": 0.0, "completion_length": 87.42916793823242, "epoch": 1.842857142857143, "grad_norm": 8.9375, "kl": 0.28726502507925034, "learning_rate": 4.078947368421053e-06, "loss": 0.0282, "num_tokens": 75139614.0, "reward": -1.8547417521476746, "reward_std": 7.167141103744507, "rewards/get_chromagram_reward": 0.6293876945972443, "rewards/get_chromagram_reward_std": 0.10446088090538978, "rewards/get_intelligibility_reward": -6.176269102096557, "rewards/get_intelligibility_reward_std": 11.209553623199463, "rewards/get_target_len_reward": -0.017343681119382382, "rewards/get_target_len_reward_std": 0.05335175041109323, "step": 2450 }, { "advantages": -3.521641332326908e-07, "advantages_std": 1.5947361826896667, "clip_ratio": 0.0, "completion_length": 84.67024002075195, "epoch": 1.850375939849624, "grad_norm": 6.75, "kl": 0.32163482904434204, "learning_rate": 4.075187969924813e-06, "loss": 0.0303, "num_tokens": 75439855.0, "reward": -1.5484479904174804, "reward_std": 6.759703636169434, "rewards/get_chromagram_reward": 0.620575338602066, "rewards/get_chromagram_reward_std": 0.11032437160611153, "rewards/get_intelligibility_reward": -5.249281632900238, "rewards/get_intelligibility_reward_std": 10.744479370117187, "rewards/get_target_len_reward": -0.01663746191188693, "rewards/get_target_len_reward_std": 0.03786009326577187, "step": 2460 }, { "advantages": 1.6838313658951166e-07, "advantages_std": 1.5318275094032288, "clip_ratio": 0.0, "completion_length": 83.47559661865235, "epoch": 1.8578947368421053, "grad_norm": 6.0625, "kl": 0.3126231297850609, "learning_rate": 4.071428571428572e-06, "loss": 0.0333, "num_tokens": 75737651.0, "reward": -1.676826250553131, "reward_std": 6.993101596832275, "rewards/get_chromagram_reward": 0.6129578173160553, "rewards/get_chromagram_reward_std": 0.09878090545535087, "rewards/get_intelligibility_reward": -5.626195979118347, "rewards/get_intelligibility_reward_std": 11.033592128753662, "rewards/get_target_len_reward": -0.017240209318697453, "rewards/get_target_len_reward_std": 0.049395473673939705, "step": 2470 }, { "advantages": 2.471109095125712e-08, "advantages_std": 1.6018425345420837, "clip_ratio": 0.0, "completion_length": 83.61845397949219, "epoch": 1.8654135338345865, "grad_norm": 6.0625, "kl": 0.3190310463309288, "learning_rate": 4.067669172932331e-06, "loss": 0.037, "num_tokens": 76035766.0, "reward": -1.5238053441047668, "reward_std": 6.46599850654602, "rewards/get_chromagram_reward": 0.6349402129650116, "rewards/get_chromagram_reward_std": 0.12203380763530731, "rewards/get_intelligibility_reward": -5.180359315872193, "rewards/get_intelligibility_reward_std": 10.185456657409668, "rewards/get_target_len_reward": -0.025996736809611322, "rewards/get_target_len_reward_std": 0.06878238022327424, "step": 2480 }, { "advantages": 7.698935178268585e-09, "advantages_std": 1.583086097240448, "clip_ratio": 0.0, "completion_length": 89.21904907226562, "epoch": 1.8729323308270676, "grad_norm": 5.4375, "kl": 0.2938147783279419, "learning_rate": 4.063909774436091e-06, "loss": 0.0361, "num_tokens": 76348903.0, "reward": -1.237100750207901, "reward_std": 6.340283393859863, "rewards/get_chromagram_reward": 0.6105383217334748, "rewards/get_chromagram_reward_std": 0.10293650925159455, "rewards/get_intelligibility_reward": -4.298519229888916, "rewards/get_intelligibility_reward_std": 10.241126346588135, "rewards/get_target_len_reward": -0.0233210857026279, "rewards/get_target_len_reward_std": 0.0777428038418293, "step": 2490 }, { "advantages": 3.899138931728885e-08, "advantages_std": 1.5943516135215758, "clip_ratio": 0.0, "completion_length": 86.84702606201172, "epoch": 1.8804511278195488, "grad_norm": 368.0, "kl": 0.35679852962493896, "learning_rate": 4.06015037593985e-06, "loss": 0.0407, "num_tokens": 76655596.0, "reward": -1.6571479380130767, "reward_std": 7.034072685241699, "rewards/get_chromagram_reward": 0.6275037288665771, "rewards/get_chromagram_reward_std": 0.11990767642855645, "rewards/get_intelligibility_reward": -5.577424621582031, "rewards/get_intelligibility_reward_std": 11.100014400482177, "rewards/get_target_len_reward": -0.021522563882172108, "rewards/get_target_len_reward_std": 0.06714740544557571, "step": 2500 }, { "advantages": -1.862645149230957e-07, "advantages_std": 1.5596297979354858, "clip_ratio": 0.0, "completion_length": 88.28392944335937, "epoch": 1.88796992481203, "grad_norm": 9.375, "kl": 0.4866158485412598, "learning_rate": 4.056390977443609e-06, "loss": 0.0532, "num_tokens": 76965233.0, "reward": -1.3023281721398234, "reward_std": 6.401098299026489, "rewards/get_chromagram_reward": 0.6223603427410126, "rewards/get_chromagram_reward_std": 0.10559494495391845, "rewards/get_intelligibility_reward": -4.508207023143768, "rewards/get_intelligibility_reward_std": 10.275893402099609, "rewards/get_target_len_reward": -0.02113771978765726, "rewards/get_target_len_reward_std": 0.07548020184040069, "step": 2510 }, { "advantages": -3.576279308248331e-08, "advantages_std": 1.653851580619812, "clip_ratio": 0.0, "completion_length": 88.94940643310547, "epoch": 1.8954887218045111, "grad_norm": 7.125, "kl": 0.3362803116440773, "learning_rate": 4.052631578947368e-06, "loss": 0.0423, "num_tokens": 77276883.0, "reward": -1.6171175594441594, "reward_std": 6.4608911037445065, "rewards/get_chromagram_reward": 0.6122646510601044, "rewards/get_chromagram_reward_std": 0.11121488139033317, "rewards/get_intelligibility_reward": -5.440471267700195, "rewards/get_intelligibility_reward_std": 9.968835878372193, "rewards/get_target_len_reward": -0.0231459632050246, "rewards/get_target_len_reward_std": 0.0733029767870903, "step": 2520 }, { "advantages": -2.7803082787158927e-07, "advantages_std": 1.5271441459655761, "clip_ratio": 0.0, "completion_length": 87.99643020629883, "epoch": 1.9030075187969926, "grad_norm": 60.0, "kl": 0.32148386389017103, "learning_rate": 4.048872180451129e-06, "loss": 0.0359, "num_tokens": 77585961.0, "reward": -1.7723593652248382, "reward_std": 6.782654476165772, "rewards/get_chromagram_reward": 0.6239281296730042, "rewards/get_chromagram_reward_std": 0.12085575759410858, "rewards/get_intelligibility_reward": -5.919119071960449, "rewards/get_intelligibility_reward_std": 10.512956428527833, "rewards/get_target_len_reward": -0.02188691161572933, "rewards/get_target_len_reward_std": 0.060627135634422305, "step": 2530 }, { "advantages": 2.081195582093187e-07, "advantages_std": 1.5499732732772826, "clip_ratio": 0.0, "completion_length": 84.20000228881835, "epoch": 1.9105263157894736, "grad_norm": 11.9375, "kl": 0.426358599960804, "learning_rate": 4.0451127819548875e-06, "loss": 0.0456, "num_tokens": 77884894.0, "reward": -1.5123292624950408, "reward_std": 6.540386390686035, "rewards/get_chromagram_reward": 0.6197911500930786, "rewards/get_chromagram_reward_std": 0.10884481891989709, "rewards/get_intelligibility_reward": -5.130767846107483, "rewards/get_intelligibility_reward_std": 10.312138175964355, "rewards/get_target_len_reward": -0.026010839454829692, "rewards/get_target_len_reward_std": 0.08144442550837994, "step": 2540 }, { "advantages": 2.3071964534437938e-07, "advantages_std": 1.5312702655792236, "clip_ratio": 0.0, "completion_length": 89.43274002075195, "epoch": 1.9180451127819549, "grad_norm": 25.125, "kl": 5440.552889862656, "learning_rate": 4.041353383458647e-06, "loss": 544.0583, "num_tokens": 78198326.0, "reward": -1.667872903123498, "reward_std": 7.2336784362792965, "rewards/get_chromagram_reward": 0.6233785390853882, "rewards/get_chromagram_reward_std": 0.10803897455334663, "rewards/get_intelligibility_reward": -5.607472121715546, "rewards/get_intelligibility_reward_std": 11.474693489074706, "rewards/get_target_len_reward": -0.01952482983469963, "rewards/get_target_len_reward_std": 0.056231222674250604, "step": 2550 }, { "advantages": 1.4168521040858194e-07, "advantages_std": 1.6055476427078248, "clip_ratio": 0.0, "completion_length": 87.8934539794922, "epoch": 1.9255639097744361, "grad_norm": 7.21875, "kl": 55.368652729690076, "learning_rate": 4.037593984962406e-06, "loss": 5.54, "num_tokens": 78507568.0, "reward": -1.5169284701347352, "reward_std": 6.524043083190918, "rewards/get_chromagram_reward": 0.6211909174919128, "rewards/get_chromagram_reward_std": 0.10534756779670715, "rewards/get_intelligibility_reward": -5.150899171829224, "rewards/get_intelligibility_reward_std": 10.345247268676758, "rewards/get_target_len_reward": -0.021076841093599797, "rewards/get_target_len_reward_std": 0.06926373746246099, "step": 2560 }, { "advantages": 5.267560624133694e-07, "advantages_std": 1.4992515563964843, "clip_ratio": 0.0, "completion_length": 86.12797622680664, "epoch": 1.9330827067669172, "grad_norm": 12.8125, "kl": 0.3306959331035614, "learning_rate": 4.033834586466166e-06, "loss": 0.0387, "num_tokens": 78812057.0, "reward": -1.651641035079956, "reward_std": 6.678805828094482, "rewards/get_chromagram_reward": 0.6241105377674103, "rewards/get_chromagram_reward_std": 0.1160986490547657, "rewards/get_intelligibility_reward": -5.554108786582947, "rewards/get_intelligibility_reward_std": 10.46739559173584, "rewards/get_target_len_reward": -0.024924515280872583, "rewards/get_target_len_reward_std": 0.06897499226033688, "step": 2570 }, { "advantages": -2.997617171374145e-07, "advantages_std": 1.566463255882263, "clip_ratio": 0.0, "completion_length": 88.16309661865235, "epoch": 1.9406015037593987, "grad_norm": 5.90625, "kl": 0.4035792797803879, "learning_rate": 4.030075187969925e-06, "loss": 0.0422, "num_tokens": 79122637.0, "reward": -1.3191568836569787, "reward_std": 6.7891851425170895, "rewards/get_chromagram_reward": 0.6245894730091095, "rewards/get_chromagram_reward_std": 0.12556827813386917, "rewards/get_intelligibility_reward": -4.558280682563781, "rewards/get_intelligibility_reward_std": 10.986210060119628, "rewards/get_target_len_reward": -0.023779175989329816, "rewards/get_target_len_reward_std": 0.06274551041424274, "step": 2580 }, { "advantages": -3.576277929351335e-08, "advantages_std": 1.5424383997917175, "clip_ratio": 0.0, "completion_length": 86.0434555053711, "epoch": 1.9481203007518797, "grad_norm": 5.25, "kl": 0.4115982368588448, "learning_rate": 4.026315789473684e-06, "loss": 0.0447, "num_tokens": 79426425.0, "reward": -1.7672771275043488, "reward_std": 7.219090843200684, "rewards/get_chromagram_reward": 0.6099259614944458, "rewards/get_chromagram_reward_std": 0.09907660037279128, "rewards/get_intelligibility_reward": -5.89415819644928, "rewards/get_intelligibility_reward_std": 11.30065450668335, "rewards/get_target_len_reward": -0.017598634399473668, "rewards/get_target_len_reward_std": 0.05380655974149704, "step": 2590 }, { "advantages": -4.5175355865012536e-07, "advantages_std": 1.6156995177268982, "clip_ratio": 0.0, "completion_length": 85.45476379394532, "epoch": 1.955639097744361, "grad_norm": 9.1875, "kl": 0.3221691817045212, "learning_rate": 4.022556390977444e-06, "loss": 0.0364, "num_tokens": 79729374.0, "reward": -1.970957136154175, "reward_std": 6.993637561798096, "rewards/get_chromagram_reward": 0.6113289833068848, "rewards/get_chromagram_reward_std": 0.11882436200976372, "rewards/get_intelligibility_reward": -6.500963306427002, "rewards/get_intelligibility_reward_std": 10.691209602355958, "rewards/get_target_len_reward": -0.023236696422100068, "rewards/get_target_len_reward_std": 0.0706200659275055, "step": 2600 }, { "advantages": -3.07212287253833e-07, "advantages_std": 1.628948163986206, "clip_ratio": 0.0, "completion_length": 86.0678581237793, "epoch": 1.9631578947368422, "grad_norm": 8.5625, "kl": 1.3464050814509392, "learning_rate": 4.018796992481203e-06, "loss": 0.1366, "num_tokens": 80033923.0, "reward": -1.6657975971698762, "reward_std": 6.801527500152588, "rewards/get_chromagram_reward": 0.6163083136081695, "rewards/get_chromagram_reward_std": 0.1146535836160183, "rewards/get_intelligibility_reward": -5.594403171539307, "rewards/get_intelligibility_reward_std": 10.669664001464843, "rewards/get_target_len_reward": -0.019297726918011904, "rewards/get_target_len_reward_std": 0.05750475097447634, "step": 2610 }, { "advantages": -2.3345150879094944e-08, "advantages_std": 1.7088413119316102, "clip_ratio": 0.0, "completion_length": 82.7011932373047, "epoch": 1.9706766917293232, "grad_norm": 8.125, "kl": 0.34609042257070544, "learning_rate": 4.015037593984963e-06, "loss": 0.0435, "num_tokens": 80328255.0, "reward": -1.7058481693267822, "reward_std": 6.336285066604614, "rewards/get_chromagram_reward": 0.6239156484603882, "rewards/get_chromagram_reward_std": 0.10946919023990631, "rewards/get_intelligibility_reward": -5.717773270606995, "rewards/get_intelligibility_reward_std": 9.749144554138184, "rewards/get_target_len_reward": -0.023686547577381135, "rewards/get_target_len_reward_std": 0.06974872462451458, "step": 2620 }, { "advantages": -9.05245698845647e-08, "advantages_std": 1.530289900302887, "clip_ratio": 0.0, "completion_length": 88.38690643310547, "epoch": 1.9781954887218045, "grad_norm": 8.5625, "kl": 0.28629245460033415, "learning_rate": 4.011278195488722e-06, "loss": 0.029, "num_tokens": 80638435.0, "reward": -1.2314883843064308, "reward_std": 6.625270700454712, "rewards/get_chromagram_reward": 0.6178261160850524, "rewards/get_chromagram_reward_std": 0.10950247719883918, "rewards/get_intelligibility_reward": -4.294896459579467, "rewards/get_intelligibility_reward_std": 10.598248863220215, "rewards/get_target_len_reward": -0.017394708935171366, "rewards/get_target_len_reward_std": 0.04538180362433195, "step": 2630 }, { "advantages": 1.9421179899836716e-07, "advantages_std": 1.732315731048584, "clip_ratio": 0.0, "completion_length": 91.10000228881836, "epoch": 1.9857142857142858, "grad_norm": 872.0, "kl": 26.47857711613178, "learning_rate": 4.007518796992481e-06, "loss": 2.6515, "num_tokens": 80956909.0, "reward": -1.3893529994413256, "reward_std": 6.811365032196045, "rewards/get_chromagram_reward": 0.631500905752182, "rewards/get_chromagram_reward_std": 0.11041677147150039, "rewards/get_intelligibility_reward": -4.777693957090378, "rewards/get_intelligibility_reward_std": 10.829482650756836, "rewards/get_target_len_reward": -0.02186558600515127, "rewards/get_target_len_reward_std": 0.05735799949616194, "step": 2640 }, { "advantages": -8.928279369158076e-08, "advantages_std": 1.5102097868919373, "clip_ratio": 0.0, "completion_length": 88.96607208251953, "epoch": 1.9932330827067668, "grad_norm": 65.5, "kl": 0.3440512865781784, "learning_rate": 4.003759398496241e-06, "loss": 0.0355, "num_tokens": 81269102.0, "reward": -1.2409762933850288, "reward_std": 6.411077070236206, "rewards/get_chromagram_reward": 0.6272790014743805, "rewards/get_chromagram_reward_std": 0.1086908034980297, "rewards/get_intelligibility_reward": -4.325918501615524, "rewards/get_intelligibility_reward_std": 10.257991981506347, "rewards/get_target_len_reward": -0.024289328791201114, "rewards/get_target_len_reward_std": 0.06517439857125282, "step": 2650 }, { "advantages": -1.0939936316844978e-07, "advantages_std": 1.575232243537903, "clip_ratio": 0.0, "completion_length": 84.14226303100585, "epoch": 2.001503759398496, "grad_norm": 5.25, "kl": 1.0052132874727249, "learning_rate": 4.000000000000001e-06, "loss": 0.1059, "num_tokens": 81572089.0, "reward": -1.6181729942560197, "reward_std": 6.623514938354492, "rewards/get_chromagram_reward": 0.6281363487243652, "rewards/get_chromagram_reward_std": 0.11101439595222473, "rewards/get_intelligibility_reward": -5.455703794956207, "rewards/get_intelligibility_reward_std": 10.291347360610962, "rewards/get_target_len_reward": -0.02695103920996189, "rewards/get_target_len_reward_std": 0.07068178877234459, "step": 2660 }, { "advantages": 1.2417700645528385e-09, "advantages_std": 1.5857953310012818, "clip_ratio": 0.0, "completion_length": 83.08214492797852, "epoch": 2.0090225563909776, "grad_norm": 6.34375, "kl": 0.29621861577034, "learning_rate": 3.9962406015037595e-06, "loss": 0.0321, "num_tokens": 81868085.0, "reward": -1.6860800623893737, "reward_std": 6.562015771865845, "rewards/get_chromagram_reward": 0.6251458704471589, "rewards/get_chromagram_reward_std": 0.11826840862631798, "rewards/get_intelligibility_reward": -5.666773128509521, "rewards/get_intelligibility_reward_std": 10.22925615310669, "rewards/get_target_len_reward": -0.016612501721829175, "rewards/get_target_len_reward_std": 0.050476128607988356, "step": 2670 }, { "advantages": 7.829318747099023e-07, "advantages_std": 1.627654242515564, "clip_ratio": 0.0, "completion_length": 84.9095245361328, "epoch": 2.0165413533834586, "grad_norm": 93.5, "kl": 0.3101460263133049, "learning_rate": 3.992481203007519e-06, "loss": 0.0334, "num_tokens": 82169486.0, "reward": -1.5961270749568939, "reward_std": 6.337138366699219, "rewards/get_chromagram_reward": 0.6360050141811371, "rewards/get_chromagram_reward_std": 0.11316153407096863, "rewards/get_intelligibility_reward": -5.403832316398621, "rewards/get_intelligibility_reward_std": 9.89303879737854, "rewards/get_target_len_reward": -0.020553555525839328, "rewards/get_target_len_reward_std": 0.05809407290071249, "step": 2680 }, { "advantages": 4.1648745252587105e-07, "advantages_std": 1.6060652375221252, "clip_ratio": 0.0, "completion_length": 86.80476303100586, "epoch": 2.0240601503759397, "grad_norm": 9.4375, "kl": 0.3532222270965576, "learning_rate": 3.988721804511279e-06, "loss": 0.0344, "num_tokens": 82476181.0, "reward": -1.5671575158834457, "reward_std": 6.547077798843384, "rewards/get_chromagram_reward": 0.6348513245582581, "rewards/get_chromagram_reward_std": 0.10924804285168647, "rewards/get_intelligibility_reward": -5.3163529396057125, "rewards/get_intelligibility_reward_std": 10.257298564910888, "rewards/get_target_len_reward": -0.019970581401139498, "rewards/get_target_len_reward_std": 0.05366719178855419, "step": 2690 }, { "advantages": 1.631677217783789e-07, "advantages_std": 1.6509637475013732, "clip_ratio": 0.0, "completion_length": 83.5428596496582, "epoch": 2.031578947368421, "grad_norm": 16.0, "kl": 0.31794759780168536, "learning_rate": 3.9849624060150376e-06, "loss": 0.041, "num_tokens": 82773681.0, "reward": -1.5188110053539277, "reward_std": 6.337612199783325, "rewards/get_chromagram_reward": 0.6124212205410003, "rewards/get_chromagram_reward_std": 0.10746575593948364, "rewards/get_intelligibility_reward": -5.145204424858093, "rewards/get_intelligibility_reward_std": 9.923934173583984, "rewards/get_target_len_reward": -0.023649390833452345, "rewards/get_target_len_reward_std": 0.07802547551691533, "step": 2700 }, { "advantages": 6.283323514821859e-08, "advantages_std": 1.6156264424324036, "clip_ratio": 0.0, "completion_length": 86.15595321655273, "epoch": 2.039097744360902, "grad_norm": 7.0, "kl": 0.2870593532919884, "learning_rate": 3.981203007518797e-06, "loss": 0.0345, "num_tokens": 83077770.0, "reward": -1.752515721321106, "reward_std": 6.815318632125854, "rewards/get_chromagram_reward": 0.6136973381042481, "rewards/get_chromagram_reward_std": 0.12048554718494416, "rewards/get_intelligibility_reward": -5.849648785591126, "rewards/get_intelligibility_reward_std": 10.635982704162597, "rewards/get_target_len_reward": -0.021595498360693455, "rewards/get_target_len_reward_std": 0.06771521810442209, "step": 2710 }, { "advantages": 7.996956128408783e-08, "advantages_std": 1.5599609971046449, "clip_ratio": 0.0, "completion_length": 87.32440719604492, "epoch": 2.0466165413533837, "grad_norm": 9.5625, "kl": 0.3296610161662102, "learning_rate": 3.977443609022557e-06, "loss": 0.0379, "num_tokens": 83386010.0, "reward": -1.3757877141237258, "reward_std": 6.675065898895264, "rewards/get_chromagram_reward": 0.6006149351596832, "rewards/get_chromagram_reward_std": 0.11269161626696586, "rewards/get_intelligibility_reward": -4.707658588886261, "rewards/get_intelligibility_reward_std": 10.665964126586914, "rewards/get_target_len_reward": -0.02031925953924656, "rewards/get_target_len_reward_std": 0.0578795462846756, "step": 2720 }, { "advantages": -3.005068265338195e-08, "advantages_std": 1.6625032067298888, "clip_ratio": 0.0, "completion_length": 81.84345474243165, "epoch": 2.0541353383458647, "grad_norm": 5.625, "kl": 0.32018242329359053, "learning_rate": 3.9736842105263165e-06, "loss": 0.0344, "num_tokens": 83679359.0, "reward": -1.37432102560997, "reward_std": 6.967769908905029, "rewards/get_chromagram_reward": 0.6192000329494476, "rewards/get_chromagram_reward_std": 0.12201750725507736, "rewards/get_intelligibility_reward": -4.716173662245273, "rewards/get_intelligibility_reward_std": 11.24532117843628, "rewards/get_target_len_reward": -0.025989135075360537, "rewards/get_target_len_reward_std": 0.07435446102172136, "step": 2730 }, { "advantages": -9.636084961073265e-08, "advantages_std": 1.5627854466438293, "clip_ratio": 0.0, "completion_length": 91.02619171142578, "epoch": 2.0616541353383457, "grad_norm": 170.0, "kl": 0.3474865362048149, "learning_rate": 3.969924812030075e-06, "loss": 0.0374, "num_tokens": 83997524.0, "reward": -1.208175851404667, "reward_std": 6.546644783020019, "rewards/get_chromagram_reward": 0.6278281271457672, "rewards/get_chromagram_reward_std": 0.1091654047369957, "rewards/get_intelligibility_reward": -4.231800414249301, "rewards/get_intelligibility_reward_std": 10.602706050872802, "rewards/get_target_len_reward": -0.020555054116994143, "rewards/get_target_len_reward_std": 0.05810644961893559, "step": 2740 }, { "advantages": -2.4835263445766032e-08, "advantages_std": 1.731394600868225, "clip_ratio": 0.0, "completion_length": 84.61904983520508, "epoch": 2.069172932330827, "grad_norm": 35.75, "kl": 0.3340544059872627, "learning_rate": 3.966165413533835e-06, "loss": 0.0418, "num_tokens": 84297845.0, "reward": -1.816446018218994, "reward_std": 6.996577215194702, "rewards/get_chromagram_reward": 0.6161438524723053, "rewards/get_chromagram_reward_std": 0.11226251423358917, "rewards/get_intelligibility_reward": -6.046895384788513, "rewards/get_intelligibility_reward_std": 10.867814064025879, "rewards/get_target_len_reward": -0.018586322385817765, "rewards/get_target_len_reward_std": 0.06211254261434078, "step": 2750 }, { "advantages": 5.16573589948166e-07, "advantages_std": 1.553725790977478, "clip_ratio": 0.0, "completion_length": 86.25059814453125, "epoch": 2.0766917293233083, "grad_norm": 6.0, "kl": 0.34573080837726594, "learning_rate": 3.9624060150375946e-06, "loss": 0.0424, "num_tokens": 84602520.0, "reward": -1.4252728760242461, "reward_std": 6.637792301177979, "rewards/get_chromagram_reward": 0.639812707901001, "rewards/get_chromagram_reward_std": 0.11356036961078644, "rewards/get_intelligibility_reward": -4.889979219436645, "rewards/get_intelligibility_reward_std": 10.599310779571534, "rewards/get_target_len_reward": -0.025651910994201898, "rewards/get_target_len_reward_std": 0.07295588366687297, "step": 2760 }, { "advantages": 2.972161013303776e-07, "advantages_std": 1.388267707824707, "clip_ratio": 0.0, "completion_length": 86.68154907226562, "epoch": 2.0842105263157893, "grad_norm": 54.5, "kl": 0.3174739718437195, "learning_rate": 3.958646616541354e-06, "loss": 0.0344, "num_tokens": 84908029.0, "reward": -1.9035109996795654, "reward_std": 7.167625379562378, "rewards/get_chromagram_reward": 0.6314962983131409, "rewards/get_chromagram_reward_std": 0.10467702820897103, "rewards/get_intelligibility_reward": -6.32180278301239, "rewards/get_intelligibility_reward_std": 10.970943498611451, "rewards/get_target_len_reward": -0.02022632034495473, "rewards/get_target_len_reward_std": 0.050012038089334965, "step": 2770 }, { "advantages": -1.9321838635733e-07, "advantages_std": 1.6621986269950866, "clip_ratio": 0.0, "completion_length": 87.88869171142578, "epoch": 2.0917293233082708, "grad_norm": 8.25, "kl": 0.36265345960855483, "learning_rate": 3.954887218045113e-06, "loss": 0.0378, "num_tokens": 85217100.0, "reward": -1.5520632922649384, "reward_std": 6.731218671798706, "rewards/get_chromagram_reward": 0.6277051866054535, "rewards/get_chromagram_reward_std": 0.10972578823566437, "rewards/get_intelligibility_reward": -5.26589868068695, "rewards/get_intelligibility_reward_std": 10.702633094787597, "rewards/get_target_len_reward": -0.01799613079056144, "rewards/get_target_len_reward_std": 0.049734361842274664, "step": 2780 }, { "advantages": 7.376073654086213e-08, "advantages_std": 1.509410297870636, "clip_ratio": 0.0, "completion_length": 88.70059661865234, "epoch": 2.099248120300752, "grad_norm": 9.25, "kl": 0.3428507328033447, "learning_rate": 3.951127819548873e-06, "loss": 0.0368, "num_tokens": 85528743.0, "reward": -1.1163109362125396, "reward_std": 6.350103139877319, "rewards/get_chromagram_reward": 0.6196650564670563, "rewards/get_chromagram_reward_std": 0.10964875966310501, "rewards/get_intelligibility_reward": -3.947629976272583, "rewards/get_intelligibility_reward_std": 10.319130992889404, "rewards/get_target_len_reward": -0.02096777716651559, "rewards/get_target_len_reward_std": 0.06917067337781191, "step": 2790 }, { "advantages": -4.4753156434040877e-07, "advantages_std": 1.6299754142761231, "clip_ratio": 0.0, "completion_length": 87.36190567016601, "epoch": 2.1067669172932333, "grad_norm": 9.6875, "kl": 0.3407274499535561, "learning_rate": 3.947368421052632e-06, "loss": 0.0433, "num_tokens": 85837424.0, "reward": -1.3424668543040752, "reward_std": 6.6862287521362305, "rewards/get_chromagram_reward": 0.6276401698589325, "rewards/get_chromagram_reward_std": 0.10829192474484443, "rewards/get_intelligibility_reward": -4.629968780279159, "rewards/get_intelligibility_reward_std": 10.745279312133789, "rewards/get_target_len_reward": -0.025071771629154683, "rewards/get_target_len_reward_std": 0.07464848496019841, "step": 2800 }, { "advantages": 2.4065376322823797e-07, "advantages_std": 1.612348747253418, "clip_ratio": 0.0, "completion_length": 85.93452529907226, "epoch": 2.1142857142857143, "grad_norm": 170.0, "kl": 0.3182450085878372, "learning_rate": 3.943609022556391e-06, "loss": 0.039, "num_tokens": 86141792.0, "reward": -1.6066758632659912, "reward_std": 7.027578115463257, "rewards/get_chromagram_reward": 0.6162883937358856, "rewards/get_chromagram_reward_std": 0.12795912325382233, "rewards/get_intelligibility_reward": -5.414373850822448, "rewards/get_intelligibility_reward_std": 11.074428367614747, "rewards/get_target_len_reward": -0.02194185955449939, "rewards/get_target_len_reward_std": 0.06819509305059909, "step": 2810 }, { "advantages": 4.0705006405516996e-07, "advantages_std": 1.6682619452476501, "clip_ratio": 0.0, "completion_length": 87.75238342285157, "epoch": 2.1218045112781954, "grad_norm": 6.34375, "kl": 0.32629688531160356, "learning_rate": 3.939849624060151e-06, "loss": 0.0405, "num_tokens": 86450610.0, "reward": -1.5439964354038238, "reward_std": 6.698471546173096, "rewards/get_chromagram_reward": 0.6298602938652038, "rewards/get_chromagram_reward_std": 0.11802728474140167, "rewards/get_intelligibility_reward": -5.238238763809204, "rewards/get_intelligibility_reward_std": 10.606562519073487, "rewards/get_target_len_reward": -0.023610591888427734, "rewards/get_target_len_reward_std": 0.06324613895267248, "step": 2820 }, { "advantages": -5.220373740399964e-07, "advantages_std": 1.566174530982971, "clip_ratio": 0.0, "completion_length": 86.38928833007813, "epoch": 2.129323308270677, "grad_norm": 10.8125, "kl": 0.5685814306139946, "learning_rate": 3.9360902255639095e-06, "loss": 0.0659, "num_tokens": 86755590.0, "reward": -1.7503463923931122, "reward_std": 6.914837121963501, "rewards/get_chromagram_reward": 0.6366630434989929, "rewards/get_chromagram_reward_std": 0.12139641642570495, "rewards/get_intelligibility_reward": -5.858629488945008, "rewards/get_intelligibility_reward_std": 10.730266666412353, "rewards/get_target_len_reward": -0.029072243347764017, "rewards/get_target_len_reward_std": 0.07542071975767613, "step": 2830 }, { "advantages": 1.614292443719023e-07, "advantages_std": 1.549390870332718, "clip_ratio": 0.0, "completion_length": 87.40119247436523, "epoch": 2.136842105263158, "grad_norm": 24.0, "kl": 0.3246032640337944, "learning_rate": 3.93233082706767e-06, "loss": 0.0372, "num_tokens": 87063339.0, "reward": -1.4028959453105927, "reward_std": 6.6174252986907955, "rewards/get_chromagram_reward": 0.6235669672489166, "rewards/get_chromagram_reward_std": 0.11671028062701225, "rewards/get_intelligibility_reward": -4.811532521247864, "rewards/get_intelligibility_reward_std": 10.584142017364503, "rewards/get_target_len_reward": -0.020721999648958444, "rewards/get_target_len_reward_std": 0.06385829038918019, "step": 2840 }, { "advantages": -2.299746029166272e-07, "advantages_std": 1.6184433102607727, "clip_ratio": 0.0, "completion_length": 86.2208351135254, "epoch": 2.144360902255639, "grad_norm": 7.3125, "kl": 0.30331481248140335, "learning_rate": 3.928571428571429e-06, "loss": 0.0306, "num_tokens": 87368626.0, "reward": -1.2707931637763976, "reward_std": 6.4112237930297855, "rewards/get_chromagram_reward": 0.6089653193950653, "rewards/get_chromagram_reward_std": 0.11852159649133683, "rewards/get_intelligibility_reward": -4.396771430969238, "rewards/get_intelligibility_reward_std": 10.385689878463745, "rewards/get_target_len_reward": -0.02457326604053378, "rewards/get_target_len_reward_std": 0.0729073267430067, "step": 2850 }, { "advantages": -2.3146470482515724e-07, "advantages_std": 1.4920684814453125, "clip_ratio": 0.0, "completion_length": 88.57321624755859, "epoch": 2.1518796992481204, "grad_norm": 5.6875, "kl": 0.2971339821815491, "learning_rate": 3.9248120300751885e-06, "loss": 0.0382, "num_tokens": 87679743.0, "reward": -1.4728783011436462, "reward_std": 6.793437814712524, "rewards/get_chromagram_reward": 0.6393875896930694, "rewards/get_chromagram_reward_std": 0.11733865663409233, "rewards/get_intelligibility_reward": -5.031013822555542, "rewards/get_intelligibility_reward_std": 10.67188892364502, "rewards/get_target_len_reward": -0.027008223440498115, "rewards/get_target_len_reward_std": 0.08672410566359759, "step": 2860 }, { "advantages": -6.073465172562465e-07, "advantages_std": 1.5244192004203796, "clip_ratio": 0.0, "completion_length": 86.31845474243164, "epoch": 2.1593984962406014, "grad_norm": 4.9375, "kl": 0.3206807836890221, "learning_rate": 3.921052631578947e-06, "loss": 0.0363, "num_tokens": 87984918.0, "reward": -1.6187335789203643, "reward_std": 6.374583053588867, "rewards/get_chromagram_reward": 0.6180616199970246, "rewards/get_chromagram_reward_std": 0.11240240931510925, "rewards/get_intelligibility_reward": -5.454882073402405, "rewards/get_intelligibility_reward_std": 9.929090690612792, "rewards/get_target_len_reward": -0.019380014995113014, "rewards/get_target_len_reward_std": 0.05369943529367447, "step": 2870 }, { "advantages": -3.774961040647895e-07, "advantages_std": 1.7331120729446412, "clip_ratio": 0.0, "completion_length": 86.33511962890626, "epoch": 2.166917293233083, "grad_norm": 5.1875, "kl": 9068.70393010974, "learning_rate": 3.917293233082707e-06, "loss": 906.8738, "num_tokens": 88290049.0, "reward": -0.9701588183641434, "reward_std": 6.336407804489136, "rewards/get_chromagram_reward": 0.6315642714500427, "rewards/get_chromagram_reward_std": 0.12316482216119766, "rewards/get_intelligibility_reward": -3.5188528180122374, "rewards/get_intelligibility_reward_std": 10.371418666839599, "rewards/get_target_len_reward": -0.02318771481513977, "rewards/get_target_len_reward_std": 0.058829471841454505, "step": 2880 }, { "advantages": -4.755953710855465e-08, "advantages_std": 1.6271605849266053, "clip_ratio": 0.0, "completion_length": 85.03988265991211, "epoch": 2.174436090225564, "grad_norm": 6.40625, "kl": 0.3325976699590683, "learning_rate": 3.9135338345864666e-06, "loss": 0.0346, "num_tokens": 88591385.0, "reward": -1.823885554075241, "reward_std": 6.934875011444092, "rewards/get_chromagram_reward": 0.6075676620006562, "rewards/get_chromagram_reward_std": 0.10233750753104687, "rewards/get_intelligibility_reward": -6.061479997634888, "rewards/get_intelligibility_reward_std": 10.791695308685302, "rewards/get_target_len_reward": -0.017743840347975492, "rewards/get_target_len_reward_std": 0.05164923332631588, "step": 2890 }, { "advantages": 3.4285089807184476e-07, "advantages_std": 1.563487422466278, "clip_ratio": 0.0, "completion_length": 88.96369171142578, "epoch": 2.181954887218045, "grad_norm": 194.0, "kl": 0.30613133758306504, "learning_rate": 3.909774436090225e-06, "loss": 0.0326, "num_tokens": 88904159.0, "reward": -1.0743597209453584, "reward_std": 6.527523565292358, "rewards/get_chromagram_reward": 0.6418916761875153, "rewards/get_chromagram_reward_std": 0.11317120790481568, "rewards/get_intelligibility_reward": -3.8442397631704806, "rewards/get_intelligibility_reward_std": 10.655349826812744, "rewards/get_target_len_reward": -0.020730842463672162, "rewards/get_target_len_reward_std": 0.05648756790906191, "step": 2900 }, { "advantages": 3.5390256698519805e-07, "advantages_std": 1.5687660932540894, "clip_ratio": 0.0, "completion_length": 88.92916946411133, "epoch": 2.1894736842105265, "grad_norm": 79360.0, "kl": 23.718442597985266, "learning_rate": 3.906015037593985e-06, "loss": 2.3749, "num_tokens": 89216874.0, "reward": -0.9435413286089898, "reward_std": 6.158167028427124, "rewards/get_chromagram_reward": 0.6337945759296417, "rewards/get_chromagram_reward_std": 0.12330271378159523, "rewards/get_intelligibility_reward": -3.438680863380432, "rewards/get_intelligibility_reward_std": 10.161257123947143, "rewards/get_target_len_reward": -0.02573751602321863, "rewards/get_target_len_reward_std": 0.06352040991187095, "step": 2910 }, { "advantages": 7.947287627985134e-09, "advantages_std": 1.7121564388275146, "clip_ratio": 0.0, "completion_length": 86.64881057739258, "epoch": 2.1969924812030075, "grad_norm": 6.03125, "kl": 0.4889916032552719, "learning_rate": 3.902255639097745e-06, "loss": 0.0531, "num_tokens": 89523055.0, "reward": -1.565059586800635, "reward_std": 6.954021406173706, "rewards/get_chromagram_reward": 0.607182401418686, "rewards/get_chromagram_reward_std": 0.09809157475829125, "rewards/get_intelligibility_reward": -5.282276725769043, "rewards/get_intelligibility_reward_std": 10.995676708221435, "rewards/get_target_len_reward": -0.02008410422131419, "rewards/get_target_len_reward_std": 0.06095631066709757, "step": 2920 }, { "advantages": -5.918244847968878e-07, "advantages_std": 1.6808531880378723, "clip_ratio": 0.0, "completion_length": 85.08393020629883, "epoch": 2.2045112781954885, "grad_norm": 28.625, "kl": 0.29349366426467893, "learning_rate": 3.898496240601504e-06, "loss": 0.0305, "num_tokens": 89823971.0, "reward": -1.5419997453689576, "reward_std": 6.375377082824707, "rewards/get_chromagram_reward": 0.6257684767246247, "rewards/get_chromagram_reward_std": 0.11149628758430481, "rewards/get_intelligibility_reward": -5.231714677810669, "rewards/get_intelligibility_reward_std": 9.97823076248169, "rewards/get_target_len_reward": -0.020052669383585454, "rewards/get_target_len_reward_std": 0.0618388619273901, "step": 2930 }, { "advantages": 1.589457234274505e-07, "advantages_std": 1.594847321510315, "clip_ratio": 0.0, "completion_length": 86.02381057739258, "epoch": 2.21203007518797, "grad_norm": 15.9375, "kl": 0.34517409056425097, "learning_rate": 3.894736842105263e-06, "loss": 0.0369, "num_tokens": 90128555.0, "reward": -1.4202000886201858, "reward_std": 6.686147880554199, "rewards/get_chromagram_reward": 0.6276867032051087, "rewards/get_chromagram_reward_std": 0.12245145216584205, "rewards/get_intelligibility_reward": -4.86482664346695, "rewards/get_intelligibility_reward_std": 10.5703914642334, "rewards/get_target_len_reward": -0.023460079357028006, "rewards/get_target_len_reward_std": 0.06456102542579174, "step": 2940 }, { "advantages": 1.5497208067927203e-07, "advantages_std": 1.538965892791748, "clip_ratio": 0.0, "completion_length": 90.55238265991211, "epoch": 2.219548872180451, "grad_norm": 5.375, "kl": 0.3080202296376228, "learning_rate": 3.890977443609023e-06, "loss": 0.035, "num_tokens": 90445397.0, "reward": -1.2401035517454146, "reward_std": 6.639403533935547, "rewards/get_chromagram_reward": 0.6180085897445678, "rewards/get_chromagram_reward_std": 0.09938773810863495, "rewards/get_intelligibility_reward": -4.320744025707245, "rewards/get_intelligibility_reward_std": 10.65583438873291, "rewards/get_target_len_reward": -0.017574947141110898, "rewards/get_target_len_reward_std": 0.05353275462985039, "step": 2950 }, { "advantages": -6.544093196225731e-08, "advantages_std": 1.6957322597503661, "clip_ratio": 0.0, "completion_length": 89.52262115478516, "epoch": 2.2270676691729325, "grad_norm": 7.125, "kl": 0.3561790719628334, "learning_rate": 3.887218045112782e-06, "loss": 0.041, "num_tokens": 90759465.0, "reward": -1.3227708965539933, "reward_std": 6.579633474349976, "rewards/get_chromagram_reward": 0.6285708963871002, "rewards/get_chromagram_reward_std": 0.1098080925643444, "rewards/get_intelligibility_reward": -4.573193967342377, "rewards/get_intelligibility_reward_std": 10.618196725845337, "rewards/get_target_len_reward": -0.023689321987330914, "rewards/get_target_len_reward_std": 0.07437594067305327, "step": 2960 }, { "advantages": -2.3879112136526716e-07, "advantages_std": 1.5580598294734955, "clip_ratio": 0.0, "completion_length": 85.85178680419922, "epoch": 2.2345864661654136, "grad_norm": 6.0, "kl": 0.3017871379852295, "learning_rate": 3.883458646616542e-06, "loss": 0.0429, "num_tokens": 91062979.0, "reward": -1.7410799086093902, "reward_std": 6.695904397964478, "rewards/get_chromagram_reward": 0.6258544147014617, "rewards/get_chromagram_reward_std": 0.11486873552203178, "rewards/get_intelligibility_reward": -5.821660828590393, "rewards/get_intelligibility_reward_std": 10.377565860748291, "rewards/get_target_len_reward": -0.027432804461568594, "rewards/get_target_len_reward_std": 0.08908913023769856, "step": 2970 }, { "advantages": -6.120652017216344e-07, "advantages_std": 1.558844006061554, "clip_ratio": 0.0, "completion_length": 89.43392868041992, "epoch": 2.2421052631578946, "grad_norm": 27.25, "kl": 0.37718722224235535, "learning_rate": 3.879699248120301e-06, "loss": 0.0376, "num_tokens": 91375952.0, "reward": -1.268139982968569, "reward_std": 6.343207359313965, "rewards/get_chromagram_reward": 0.62771937251091, "rewards/get_chromagram_reward_std": 0.10384307354688645, "rewards/get_intelligibility_reward": -4.411396241188049, "rewards/get_intelligibility_reward_std": 10.165166664123536, "rewards/get_target_len_reward": -0.020742816664278508, "rewards/get_target_len_reward_std": 0.05104887764900923, "step": 2980 }, { "advantages": -5.987783431748994e-07, "advantages_std": 1.6743453860282898, "clip_ratio": 0.0, "completion_length": 86.1476203918457, "epoch": 2.249624060150376, "grad_norm": 11.75, "kl": 0.5795433431863785, "learning_rate": 3.8759398496240605e-06, "loss": 0.0562, "num_tokens": 91680564.0, "reward": -1.6921858012676239, "reward_std": 7.0761829853057865, "rewards/get_chromagram_reward": 0.62059086561203, "rewards/get_chromagram_reward_std": 0.11869660988450051, "rewards/get_intelligibility_reward": -5.6784823179245, "rewards/get_intelligibility_reward_std": 11.159746551513672, "rewards/get_target_len_reward": -0.01866564080119133, "rewards/get_target_len_reward_std": 0.04305282030254602, "step": 2990 }, { "advantages": 3.58372920583605e-07, "advantages_std": 1.5921060204505921, "clip_ratio": 0.0, "completion_length": 87.91011962890624, "epoch": 2.257142857142857, "grad_norm": 15.3125, "kl": 0.3640074670314789, "learning_rate": 3.87218045112782e-06, "loss": 0.0387, "num_tokens": 91989900.0, "reward": -1.9970873475074769, "reward_std": 7.173796224594116, "rewards/get_chromagram_reward": 0.6152364611625671, "rewards/get_chromagram_reward_std": 0.12383458390831947, "rewards/get_intelligibility_reward": -6.585516786575317, "rewards/get_intelligibility_reward_std": 10.964896774291992, "rewards/get_target_len_reward": -0.020981486793607472, "rewards/get_target_len_reward_std": 0.055330739729106425, "step": 3000 }, { "advantages": -6.300707894979496e-07, "advantages_std": 1.5493954777717591, "clip_ratio": 0.0, "completion_length": 91.04047775268555, "epoch": 2.264661654135338, "grad_norm": 6.59375, "kl": 0.35806858688592913, "learning_rate": 3.868421052631579e-06, "loss": 0.0393, "num_tokens": 92307862.0, "reward": -1.3623809725046159, "reward_std": 6.6515583992004395, "rewards/get_chromagram_reward": 0.5948490619659423, "rewards/get_chromagram_reward_std": 0.11022032350301743, "rewards/get_intelligibility_reward": -4.6636159181594845, "rewards/get_intelligibility_reward_std": 10.71325330734253, "rewards/get_target_len_reward": -0.0183757777325809, "rewards/get_target_len_reward_std": 0.06054406575858593, "step": 3010 }, { "advantages": -6.794929596765087e-07, "advantages_std": 1.497561240196228, "clip_ratio": 0.0, "completion_length": 83.14404830932617, "epoch": 2.2721804511278196, "grad_norm": 8.1875, "kl": 0.3727316588163376, "learning_rate": 3.8646616541353386e-06, "loss": 0.042, "num_tokens": 92604494.0, "reward": -1.775395917892456, "reward_std": 7.0423095703125, "rewards/get_chromagram_reward": 0.6238544166088105, "rewards/get_chromagram_reward_std": 0.13135162368416786, "rewards/get_intelligibility_reward": -5.922222852706909, "rewards/get_intelligibility_reward_std": 11.05590362548828, "rewards/get_target_len_reward": -0.027818970568478107, "rewards/get_target_len_reward_std": 0.075175317004323, "step": 3020 }, { "advantages": 2.9628475983756175e-07, "advantages_std": 1.6100945949554444, "clip_ratio": 0.0, "completion_length": 84.90952529907227, "epoch": 2.2796992481203007, "grad_norm": 6.4375, "kl": 0.40788850784301756, "learning_rate": 3.860902255639098e-06, "loss": 0.0404, "num_tokens": 92905243.0, "reward": -1.580957293510437, "reward_std": 6.97661657333374, "rewards/get_chromagram_reward": 0.6177934765815735, "rewards/get_chromagram_reward_std": 0.12050609439611434, "rewards/get_intelligibility_reward": -5.341689097881317, "rewards/get_intelligibility_reward_std": 11.07231788635254, "rewards/get_target_len_reward": -0.01897582933306694, "rewards/get_target_len_reward_std": 0.04809415116906166, "step": 3030 }, { "advantages": -4.5945250803924865e-08, "advantages_std": 1.5940813064575194, "clip_ratio": 0.0, "completion_length": 90.090478515625, "epoch": 2.287218045112782, "grad_norm": 17.375, "kl": 0.3240895554423332, "learning_rate": 3.857142857142858e-06, "loss": 0.0336, "num_tokens": 93221288.0, "reward": -1.0903705094009637, "reward_std": 6.505664348602295, "rewards/get_chromagram_reward": 0.6320096373558044, "rewards/get_chromagram_reward_std": 0.11818938925862313, "rewards/get_intelligibility_reward": -3.8794002890586854, "rewards/get_intelligibility_reward_std": 10.639200782775879, "rewards/get_target_len_reward": -0.023720779828727244, "rewards/get_target_len_reward_std": 0.06329209692776203, "step": 3040 }, { "advantages": 9.238720224402642e-08, "advantages_std": 1.5934074997901917, "clip_ratio": 0.0, "completion_length": 86.07559585571289, "epoch": 2.294736842105263, "grad_norm": 5.875, "kl": 0.6489260986447334, "learning_rate": 3.853383458646617e-06, "loss": 0.0662, "num_tokens": 93525561.0, "reward": -1.6520125150680542, "reward_std": 6.356071853637696, "rewards/get_chromagram_reward": 0.6185117423534393, "rewards/get_chromagram_reward_std": 0.12471347972750664, "rewards/get_intelligibility_reward": -5.55214421749115, "rewards/get_intelligibility_reward_std": 9.824131298065186, "rewards/get_target_len_reward": -0.022404894977808, "rewards/get_target_len_reward_std": 0.05886543095111847, "step": 3050 }, { "advantages": -2.0340086213099795e-07, "advantages_std": 1.4341715812683105, "clip_ratio": 0.0, "completion_length": 88.61488189697266, "epoch": 2.302255639097744, "grad_norm": 8.75, "kl": 0.3880483269691467, "learning_rate": 3.849624060150376e-06, "loss": 0.0407, "num_tokens": 93836947.0, "reward": -1.6591898486018182, "reward_std": 6.895362758636475, "rewards/get_chromagram_reward": 0.6150469839572906, "rewards/get_chromagram_reward_std": 0.11908129900693894, "rewards/get_intelligibility_reward": -5.571700441837311, "rewards/get_intelligibility_reward_std": 10.788173866271972, "rewards/get_target_len_reward": -0.020915597584098576, "rewards/get_target_len_reward_std": 0.055847865715622905, "step": 3060 }, { "advantages": 3.601114016760221e-07, "advantages_std": 1.7079174041748046, "clip_ratio": 0.0, "completion_length": 88.07024002075195, "epoch": 2.3097744360902257, "grad_norm": 27.25, "kl": 0.36389251947402956, "learning_rate": 3.845864661654136e-06, "loss": 0.042, "num_tokens": 94146528.0, "reward": -1.466315120458603, "reward_std": 6.648503303527832, "rewards/get_chromagram_reward": 0.6157117486000061, "rewards/get_chromagram_reward_std": 0.1228131890296936, "rewards/get_intelligibility_reward": -4.994232511520385, "rewards/get_intelligibility_reward_std": 10.560632991790772, "rewards/get_target_len_reward": -0.020424212515354156, "rewards/get_target_len_reward_std": 0.06263697929680348, "step": 3070 }, { "advantages": -3.859400834471671e-07, "advantages_std": 1.6529303193092346, "clip_ratio": 0.0, "completion_length": 87.0821434020996, "epoch": 2.3172932330827067, "grad_norm": 612.0, "kl": 0.5107886403799057, "learning_rate": 3.842105263157895e-06, "loss": 0.0555, "num_tokens": 94452819.0, "reward": -1.557619434595108, "reward_std": 6.65304388999939, "rewards/get_chromagram_reward": 0.6286049544811249, "rewards/get_chromagram_reward_std": 0.11012716889381409, "rewards/get_intelligibility_reward": -5.282437968254089, "rewards/get_intelligibility_reward_std": 10.468348979949951, "rewards/get_target_len_reward": -0.019024977181106805, "rewards/get_target_len_reward_std": 0.06559648010879755, "step": 3080 }, { "advantages": 3.0870238987290577e-07, "advantages_std": 1.6085237383842468, "clip_ratio": 0.0, "completion_length": 87.23928756713867, "epoch": 2.324812030075188, "grad_norm": 7.75, "kl": 0.4844575524330139, "learning_rate": 3.838345864661654e-06, "loss": 0.0544, "num_tokens": 94759931.0, "reward": -1.1725915879011155, "reward_std": 6.456204605102539, "rewards/get_chromagram_reward": 0.6367210924625397, "rewards/get_chromagram_reward_std": 0.11174852326512337, "rewards/get_intelligibility_reward": -4.1291744112968445, "rewards/get_intelligibility_reward_std": 10.456843757629395, "rewards/get_target_len_reward": -0.025321154668927193, "rewards/get_target_len_reward_std": 0.07306363489478826, "step": 3090 }, { "advantages": -4.76837168861266e-07, "advantages_std": 1.5963156700134278, "clip_ratio": 0.0, "completion_length": 88.89345474243164, "epoch": 2.3323308270676693, "grad_norm": 7.6875, "kl": 0.36408271491527555, "learning_rate": 3.834586466165414e-06, "loss": 0.0416, "num_tokens": 95071621.0, "reward": -1.2260048598051072, "reward_std": 5.992364072799683, "rewards/get_chromagram_reward": 0.6308612644672393, "rewards/get_chromagram_reward_std": 0.11509114354848862, "rewards/get_intelligibility_reward": -4.284396481513977, "rewards/get_intelligibility_reward_std": 9.64006485939026, "rewards/get_target_len_reward": -0.02447923384606838, "rewards/get_target_len_reward_std": 0.07114081848412752, "step": 3100 }, { "advantages": 3.2633543014526365e-07, "advantages_std": 1.6563255071640015, "clip_ratio": 0.0, "completion_length": 86.56488265991212, "epoch": 2.3398496240601503, "grad_norm": 84.5, "kl": 0.3443057775497437, "learning_rate": 3.830827067669174e-06, "loss": 0.0402, "num_tokens": 95377114.0, "reward": -1.6160699844360351, "reward_std": 6.532747888565064, "rewards/get_chromagram_reward": 0.6172747492790223, "rewards/get_chromagram_reward_std": 0.11039882078766823, "rewards/get_intelligibility_reward": -5.43791823387146, "rewards/get_intelligibility_reward_std": 10.185232734680175, "rewards/get_target_len_reward": -0.027566286642104386, "rewards/get_target_len_reward_std": 0.0706888772547245, "step": 3110 }, { "advantages": 6.825973692059506e-07, "advantages_std": 1.7419674158096314, "clip_ratio": 0.0, "completion_length": 87.59166870117187, "epoch": 2.3473684210526318, "grad_norm": 9.8125, "kl": 0.29823374897241595, "learning_rate": 3.8270676691729325e-06, "loss": 0.0376, "num_tokens": 95685240.0, "reward": -1.8026684641838073, "reward_std": 6.953642177581787, "rewards/get_chromagram_reward": 0.6273639619350433, "rewards/get_chromagram_reward_std": 0.11382095590233803, "rewards/get_intelligibility_reward": -6.013849997520447, "rewards/get_intelligibility_reward_std": 10.748786926269531, "rewards/get_target_len_reward": -0.021518971025943755, "rewards/get_target_len_reward_std": 0.06433455049991607, "step": 3120 }, { "advantages": 1.3858080478712508e-07, "advantages_std": 1.4858174562454223, "clip_ratio": 0.0, "completion_length": 85.37440567016601, "epoch": 2.354887218045113, "grad_norm": 5.59375, "kl": 0.3512969747185707, "learning_rate": 3.823308270676692e-06, "loss": 0.0383, "num_tokens": 95987277.0, "reward": -1.7362686932086944, "reward_std": 7.01571249961853, "rewards/get_chromagram_reward": 0.6190242350101471, "rewards/get_chromagram_reward_std": 0.11482224762439727, "rewards/get_intelligibility_reward": -5.808184885978699, "rewards/get_intelligibility_reward_std": 11.058324146270753, "rewards/get_target_len_reward": -0.019645236805081366, "rewards/get_target_len_reward_std": 0.05717686675488949, "step": 3130 }, { "advantages": -1.1151035437251266e-07, "advantages_std": 1.7415273547172547, "clip_ratio": 0.0, "completion_length": 87.55774002075195, "epoch": 2.362406015037594, "grad_norm": 37.25, "kl": 0.36580796390771864, "learning_rate": 3.819548872180452e-06, "loss": 0.039, "num_tokens": 96295325.0, "reward": -1.448211270570755, "reward_std": 6.679815864562988, "rewards/get_chromagram_reward": 0.6096611797809601, "rewards/get_chromagram_reward_std": 0.12779648303985597, "rewards/get_intelligibility_reward": -4.932143640518189, "rewards/get_intelligibility_reward_std": 10.682099866867066, "rewards/get_target_len_reward": -0.02215128391981125, "rewards/get_target_len_reward_std": 0.06295223757624627, "step": 3140 }, { "advantages": 4.5945250359835656e-08, "advantages_std": 1.5435296535491942, "clip_ratio": 0.0, "completion_length": 88.24166793823242, "epoch": 2.3699248120300753, "grad_norm": 5.40625, "kl": 0.29209394156932833, "learning_rate": 3.815789473684211e-06, "loss": 0.029, "num_tokens": 96606282.0, "reward": -1.2214705765247345, "reward_std": 6.132448053359985, "rewards/get_chromagram_reward": 0.6140880525112152, "rewards/get_chromagram_reward_std": 0.11201696321368218, "rewards/get_intelligibility_reward": -4.26198422908783, "rewards/get_intelligibility_reward_std": 9.883226490020752, "rewards/get_target_len_reward": -0.01651527201756835, "rewards/get_target_len_reward_std": 0.043393169157207014, "step": 3150 }, { "advantages": 1.6937654216953036e-07, "advantages_std": 1.4640429258346557, "clip_ratio": 0.0, "completion_length": 85.7232162475586, "epoch": 2.3774436090225564, "grad_norm": 7.1875, "kl": 0.482822397351265, "learning_rate": 3.81203007518797e-06, "loss": 0.0525, "num_tokens": 96908428.0, "reward": -1.7714835286140442, "reward_std": 6.791526174545288, "rewards/get_chromagram_reward": 0.6277442216873169, "rewards/get_chromagram_reward_std": 0.11088423728942871, "rewards/get_intelligibility_reward": -5.919493269920349, "rewards/get_intelligibility_reward_std": 10.559081554412842, "rewards/get_target_len_reward": -0.022701340448111295, "rewards/get_target_len_reward_std": 0.06931588556617499, "step": 3160 }, { "advantages": -2.0811954364319264e-07, "advantages_std": 1.6946855902671814, "clip_ratio": 0.0, "completion_length": 88.08869171142578, "epoch": 2.3849624060150374, "grad_norm": 7.03125, "kl": 0.419588178396225, "learning_rate": 3.80827067669173e-06, "loss": 0.0447, "num_tokens": 97218687.0, "reward": -1.6822382628917694, "reward_std": 6.9995640277862545, "rewards/get_chromagram_reward": 0.6256317377090455, "rewards/get_chromagram_reward_std": 0.11372272670269012, "rewards/get_intelligibility_reward": -5.649773263931275, "rewards/get_intelligibility_reward_std": 11.008953666687011, "rewards/get_target_len_reward": -0.022573013510555028, "rewards/get_target_len_reward_std": 0.06304403096437454, "step": 3170 }, { "advantages": -1.7061830615006102e-07, "advantages_std": 1.6064040184020996, "clip_ratio": 0.0, "completion_length": 86.60654983520507, "epoch": 2.392481203007519, "grad_norm": 8.5625, "kl": 0.3004844680428505, "learning_rate": 3.804511278195489e-06, "loss": 0.0407, "num_tokens": 97524048.0, "reward": -1.4096578717231751, "reward_std": 6.6632637023925785, "rewards/get_chromagram_reward": 0.6215019702911377, "rewards/get_chromagram_reward_std": 0.11183991581201554, "rewards/get_intelligibility_reward": -4.828030633926391, "rewards/get_intelligibility_reward_std": 10.725747776031493, "rewards/get_target_len_reward": -0.0224445603787899, "rewards/get_target_len_reward_std": 0.06768963728100061, "step": 3180 }, { "advantages": -4.1375557673362097e-07, "advantages_std": 1.5909752130508423, "clip_ratio": 0.0, "completion_length": 85.55059585571288, "epoch": 2.4, "grad_norm": 8.125, "kl": 0.35912114679813384, "learning_rate": 3.8007518796992483e-06, "loss": 0.0387, "num_tokens": 97826861.0, "reward": -1.456592407822609, "reward_std": 6.335251379013061, "rewards/get_chromagram_reward": 0.6159971415996551, "rewards/get_chromagram_reward_std": 0.10423725917935371, "rewards/get_intelligibility_reward": -4.967746996879578, "rewards/get_intelligibility_reward_std": 10.034010696411134, "rewards/get_target_len_reward": -0.018027166556566953, "rewards/get_target_len_reward_std": 0.05203140545636416, "step": 3190 }, { "advantages": -4.798173961262364e-07, "advantages_std": 1.6593538522720337, "clip_ratio": 0.0, "completion_length": 90.60654907226562, "epoch": 2.4075187969924814, "grad_norm": 6.59375, "kl": 0.6495398178696632, "learning_rate": 3.796992481203008e-06, "loss": 0.0662, "num_tokens": 98144452.0, "reward": -1.1371527172625064, "reward_std": 6.710154914855957, "rewards/get_chromagram_reward": 0.6279964745044708, "rewards/get_chromagram_reward_std": 0.11072349175810814, "rewards/get_intelligibility_reward": -4.014143347740173, "rewards/get_intelligibility_reward_std": 10.976589679718018, "rewards/get_target_len_reward": -0.02531114164739847, "rewards/get_target_len_reward_std": 0.07920041754841804, "step": 3200 }, { "advantages": -3.515432489109571e-07, "advantages_std": 1.6400597095489502, "clip_ratio": 0.0, "completion_length": 86.48393020629882, "epoch": 2.4150375939849624, "grad_norm": 5.46875, "kl": 0.3224791929125786, "learning_rate": 3.793233082706767e-06, "loss": 0.0342, "num_tokens": 98449632.0, "reward": -1.318561613559723, "reward_std": 6.473997402191162, "rewards/get_chromagram_reward": 0.6198614180088043, "rewards/get_chromagram_reward_std": 0.12230006903409958, "rewards/get_intelligibility_reward": -4.55453812032938, "rewards/get_intelligibility_reward_std": 10.3699782371521, "rewards/get_target_len_reward": -0.021007803454995155, "rewards/get_target_len_reward_std": 0.05592170432209968, "step": 3210 }, { "advantages": -9.536742879845406e-08, "advantages_std": 1.5742892980575562, "clip_ratio": 0.0, "completion_length": 85.17500305175781, "epoch": 2.4225563909774435, "grad_norm": 8.1875, "kl": 0.32046190053224566, "learning_rate": 3.789473684210527e-06, "loss": 0.0308, "num_tokens": 98751701.0, "reward": -1.5562925934791565, "reward_std": 6.561092281341553, "rewards/get_chromagram_reward": 0.6253645658493042, "rewards/get_chromagram_reward_std": 0.11320054829120636, "rewards/get_intelligibility_reward": -5.274776554107666, "rewards/get_intelligibility_reward_std": 10.261778974533081, "rewards/get_target_len_reward": -0.019465396646410227, "rewards/get_target_len_reward_std": 0.04603518862277269, "step": 3220 }, { "advantages": -7.872783953644103e-08, "advantages_std": 1.612378227710724, "clip_ratio": 0.0, "completion_length": 84.65952529907227, "epoch": 2.430075187969925, "grad_norm": 5.375, "kl": 0.3558365270495415, "learning_rate": 3.785714285714286e-06, "loss": 0.0406, "num_tokens": 99052318.0, "reward": -1.578985768556595, "reward_std": 6.9117063045501705, "rewards/get_chromagram_reward": 0.617356663942337, "rewards/get_chromagram_reward_std": 0.12302884310483933, "rewards/get_intelligibility_reward": -5.33222382068634, "rewards/get_intelligibility_reward_std": 10.899449586868286, "rewards/get_target_len_reward": -0.022089978307485582, "rewards/get_target_len_reward_std": 0.06822279021143914, "step": 3230 }, { "advantages": -2.3394824539835212e-07, "advantages_std": 1.5828737497329712, "clip_ratio": 0.0, "completion_length": 87.87024002075195, "epoch": 2.437593984962406, "grad_norm": 9.6875, "kl": 3.2737128630280496, "learning_rate": 3.7819548872180457e-06, "loss": 0.3328, "num_tokens": 99361553.0, "reward": -1.607145693525672, "reward_std": 6.754365253448486, "rewards/get_chromagram_reward": 0.6224554538726806, "rewards/get_chromagram_reward_std": 0.11792162135243416, "rewards/get_intelligibility_reward": -5.420157140493393, "rewards/get_intelligibility_reward_std": 10.555351066589356, "rewards/get_target_len_reward": -0.023735210206359623, "rewards/get_target_len_reward_std": 0.06005271524190903, "step": 3240 }, { "advantages": -5.570550882794123e-07, "advantages_std": 1.5856150448322297, "clip_ratio": 0.0, "completion_length": 86.98571548461913, "epoch": 2.4451127819548875, "grad_norm": 6.53125, "kl": 0.3818998262286186, "learning_rate": 3.778195488721805e-06, "loss": 0.0464, "num_tokens": 99669024.0, "reward": -1.3962798684835434, "reward_std": 7.34520378112793, "rewards/get_chromagram_reward": 0.6202928602695466, "rewards/get_chromagram_reward_std": 0.10778555646538734, "rewards/get_intelligibility_reward": -4.7882393300533295, "rewards/get_intelligibility_reward_std": 11.882244777679443, "rewards/get_target_len_reward": -0.020892890822142362, "rewards/get_target_len_reward_std": 0.06483328007161618, "step": 3250 }, { "advantages": 2.7529895376687816e-07, "advantages_std": 1.6412216067314147, "clip_ratio": 0.0, "completion_length": 84.91369171142578, "epoch": 2.4526315789473685, "grad_norm": 6.6875, "kl": 0.32161408066749575, "learning_rate": 3.7744360902255645e-06, "loss": 0.0369, "num_tokens": 99970373.0, "reward": -1.531099909543991, "reward_std": 6.41421217918396, "rewards/get_chromagram_reward": 0.610172426700592, "rewards/get_chromagram_reward_std": 0.1167138785123825, "rewards/get_intelligibility_reward": -5.1840015888214115, "rewards/get_intelligibility_reward_std": 10.05807113647461, "rewards/get_target_len_reward": -0.019470279663801195, "rewards/get_target_len_reward_std": 0.05536416377872229, "step": 3260 }, { "advantages": 2.468625751816944e-07, "advantages_std": 1.6378588914871215, "clip_ratio": 0.0, "completion_length": 85.82440795898438, "epoch": 2.4601503759398495, "grad_norm": 5.875, "kl": 0.3266435742378235, "learning_rate": 3.7706766917293237e-06, "loss": 0.0365, "num_tokens": 100274737.0, "reward": -1.4399422705173492, "reward_std": 6.60957703590393, "rewards/get_chromagram_reward": 0.6344795823097229, "rewards/get_chromagram_reward_std": 0.10898077189922332, "rewards/get_intelligibility_reward": -4.932069408893585, "rewards/get_intelligibility_reward_std": 10.547198295593262, "rewards/get_target_len_reward": -0.02223665835335851, "rewards/get_target_len_reward_std": 0.06397623158991336, "step": 3270 }, { "advantages": -1.778205387381604e-07, "advantages_std": 1.6094249129295348, "clip_ratio": 0.0, "completion_length": 86.78155059814453, "epoch": 2.467669172932331, "grad_norm": 6.4375, "kl": 0.3754301965236664, "learning_rate": 3.7669172932330825e-06, "loss": 0.0411, "num_tokens": 100580476.0, "reward": -1.7137349367141723, "reward_std": 6.945359897613526, "rewards/get_chromagram_reward": 0.6193328857421875, "rewards/get_chromagram_reward_std": 0.12088619396090508, "rewards/get_intelligibility_reward": -5.740228915214539, "rewards/get_intelligibility_reward_std": 10.897998905181884, "rewards/get_target_len_reward": -0.020308405719697477, "rewards/get_target_len_reward_std": 0.05714104510843754, "step": 3280 }, { "advantages": -2.9243536658896118e-08, "advantages_std": 1.6005398750305175, "clip_ratio": 0.0, "completion_length": 89.58571548461914, "epoch": 2.475187969924812, "grad_norm": 9.1875, "kl": 0.4362799167633057, "learning_rate": 3.7631578947368426e-06, "loss": 0.0501, "num_tokens": 100894376.0, "reward": -1.580004519224167, "reward_std": 7.118606805801392, "rewards/get_chromagram_reward": 0.6110920429229736, "rewards/get_chromagram_reward_std": 0.12410885691642762, "rewards/get_intelligibility_reward": -5.3250489950180055, "rewards/get_intelligibility_reward_std": 11.340526008605957, "rewards/get_target_len_reward": -0.026056183315813542, "rewards/get_target_len_reward_std": 0.08824401944875718, "step": 3290 }, { "advantages": -6.062289248376374e-07, "advantages_std": 1.5084069848060608, "clip_ratio": 0.0, "completion_length": 84.20535812377929, "epoch": 2.482706766917293, "grad_norm": 6.59375, "kl": 0.286747407913208, "learning_rate": 3.7593984962406014e-06, "loss": 0.0319, "num_tokens": 101194428.0, "reward": -1.3394009791314603, "reward_std": 6.1571714878082275, "rewards/get_chromagram_reward": 0.6238884270191193, "rewards/get_chromagram_reward_std": 0.11208853796124459, "rewards/get_intelligibility_reward": -4.623925578594208, "rewards/get_intelligibility_reward_std": 9.772040939331054, "rewards/get_target_len_reward": -0.01816573003306985, "rewards/get_target_len_reward_std": 0.05329264029860496, "step": 3300 }, { "advantages": 1.2964011562033306e-07, "advantages_std": 1.5899734497070312, "clip_ratio": 0.0, "completion_length": 89.80357360839844, "epoch": 2.4902255639097746, "grad_norm": 6.6875, "kl": 0.4089387819170952, "learning_rate": 3.7556390977443615e-06, "loss": 0.0431, "num_tokens": 101509800.0, "reward": -1.3904333353042602, "reward_std": 6.629190587997437, "rewards/get_chromagram_reward": 0.6224581658840179, "rewards/get_chromagram_reward_std": 0.11556925252079964, "rewards/get_intelligibility_reward": -4.774163477122784, "rewards/get_intelligibility_reward_std": 10.547905969619752, "rewards/get_target_len_reward": -0.01959448978304863, "rewards/get_target_len_reward_std": 0.046945799700915815, "step": 3310 }, { "advantages": -1.8005571291723755e-07, "advantages_std": 1.6132059335708617, "clip_ratio": 0.0, "completion_length": 84.47083435058593, "epoch": 2.4977443609022556, "grad_norm": 7.75, "kl": 2.8430706575512885, "learning_rate": 3.7518796992481203e-06, "loss": 0.2924, "num_tokens": 101809540.0, "reward": -1.7256600558757782, "reward_std": 6.812878942489624, "rewards/get_chromagram_reward": 0.614870798587799, "rewards/get_chromagram_reward_std": 0.11710697636008263, "rewards/get_intelligibility_reward": -5.769259071350097, "rewards/get_intelligibility_reward_std": 10.664492225646972, "rewards/get_target_len_reward": -0.0225916619412601, "rewards/get_target_len_reward_std": 0.06800402384251356, "step": 3320 }, { "advantages": 1.0130306378641762e-06, "advantages_std": 1.5711158871650697, "clip_ratio": 0.0, "completion_length": 87.52142944335938, "epoch": 2.5052631578947366, "grad_norm": 5.8125, "kl": 0.35297227203845977, "learning_rate": 3.7481203007518803e-06, "loss": 0.0416, "num_tokens": 102118313.0, "reward": -1.453517109155655, "reward_std": 7.165091848373413, "rewards/get_chromagram_reward": 0.6142861127853394, "rewards/get_chromagram_reward_std": 0.11068090200424194, "rewards/get_intelligibility_reward": -4.950346994400024, "rewards/get_intelligibility_reward_std": 11.612703037261962, "rewards/get_target_len_reward": -0.024490153044462205, "rewards/get_target_len_reward_std": 0.08085027951747178, "step": 3330 }, { "advantages": 4.87267990934015e-07, "advantages_std": 1.6833030700683593, "clip_ratio": 0.0, "completion_length": 91.52440490722657, "epoch": 2.512781954887218, "grad_norm": 46.0, "kl": 0.31111850887537, "learning_rate": 3.744360902255639e-06, "loss": 0.0383, "num_tokens": 102437971.0, "reward": -1.4131182849407196, "reward_std": 7.0444153308868405, "rewards/get_chromagram_reward": 0.6308394372463226, "rewards/get_chromagram_reward_std": 0.12373006641864777, "rewards/get_intelligibility_reward": -4.842600393295288, "rewards/get_intelligibility_reward_std": 11.395545768737794, "rewards/get_target_len_reward": -0.02759362943470478, "rewards/get_target_len_reward_std": 0.08597943410277367, "step": 3340 }, { "advantages": 2.942979349995767e-07, "advantages_std": 1.6357346177101135, "clip_ratio": 0.0, "completion_length": 83.11845397949219, "epoch": 2.520300751879699, "grad_norm": 8.5625, "kl": 0.36069548428058623, "learning_rate": 3.740601503759399e-06, "loss": 0.0416, "num_tokens": 102734010.0, "reward": -1.3619809970259666, "reward_std": 6.783789920806885, "rewards/get_chromagram_reward": 0.6247197687625885, "rewards/get_chromagram_reward_std": 0.1113676056265831, "rewards/get_intelligibility_reward": -4.687315640039742, "rewards/get_intelligibility_reward_std": 10.916985607147216, "rewards/get_target_len_reward": -0.023346869368106128, "rewards/get_target_len_reward_std": 0.08005320616066455, "step": 3350 }, { "advantages": -3.7377077433120576e-08, "advantages_std": 1.490761649608612, "clip_ratio": 0.0, "completion_length": 86.02976303100586, "epoch": 2.5278195488721806, "grad_norm": 6.90625, "kl": 0.27752266377210616, "learning_rate": 3.736842105263158e-06, "loss": 0.0325, "num_tokens": 103038877.0, "reward": -1.3506350100040436, "reward_std": 6.926352643966675, "rewards/get_chromagram_reward": 0.6217826008796692, "rewards/get_chromagram_reward_std": 0.10271879062056541, "rewards/get_intelligibility_reward": -4.6541990518569945, "rewards/get_intelligibility_reward_std": 11.229968643188476, "rewards/get_target_len_reward": -0.019488278403878213, "rewards/get_target_len_reward_std": 0.059296393766999245, "step": 3360 }, { "advantages": 7.4505798863810925e-09, "advantages_std": 1.633939754962921, "clip_ratio": 0.0, "completion_length": 87.66071472167968, "epoch": 2.5353383458646617, "grad_norm": 5.9375, "kl": 0.30983753949403764, "learning_rate": 3.733082706766918e-06, "loss": 0.0362, "num_tokens": 103346730.0, "reward": -1.7869456171989442, "reward_std": 6.962776184082031, "rewards/get_chromagram_reward": 0.6205675482749939, "rewards/get_chromagram_reward_std": 0.12276971340179443, "rewards/get_intelligibility_reward": -5.957375645637512, "rewards/get_intelligibility_reward_std": 10.834985589981079, "rewards/get_target_len_reward": -0.024028254952281714, "rewards/get_target_len_reward_std": 0.07155142314732074, "step": 3370 }, { "advantages": 1.924733538771761e-07, "advantages_std": 1.6906476497650147, "clip_ratio": 0.0, "completion_length": 86.42381057739257, "epoch": 2.5428571428571427, "grad_norm": 7.84375, "kl": 0.31835374385118487, "learning_rate": 3.729323308270677e-06, "loss": 0.034, "num_tokens": 103651890.0, "reward": -1.746234953403473, "reward_std": 6.781183338165283, "rewards/get_chromagram_reward": 0.6146263599395752, "rewards/get_chromagram_reward_std": 0.11475807204842567, "rewards/get_intelligibility_reward": -5.83338782787323, "rewards/get_intelligibility_reward_std": 10.554136180877686, "rewards/get_target_len_reward": -0.01994320354424417, "rewards/get_target_len_reward_std": 0.0581259747967124, "step": 3380 }, { "advantages": -1.7931065676712876e-07, "advantages_std": 1.5177346467971802, "clip_ratio": 0.0, "completion_length": 82.60952529907226, "epoch": 2.550375939849624, "grad_norm": 13.3125, "kl": 0.30254295021295546, "learning_rate": 3.725563909774436e-06, "loss": 0.038, "num_tokens": 103946485.0, "reward": -1.7833642423152924, "reward_std": 6.767720127105713, "rewards/get_chromagram_reward": 0.6146700859069825, "rewards/get_chromagram_reward_std": 0.1099303774535656, "rewards/get_intelligibility_reward": -5.944972562789917, "rewards/get_intelligibility_reward_std": 10.43216552734375, "rewards/get_target_len_reward": -0.01978995162062347, "rewards/get_target_len_reward_std": 0.06717491708695889, "step": 3390 }, { "advantages": -2.0427009701506903e-07, "advantages_std": 1.4660086750984191, "clip_ratio": 0.0, "completion_length": 89.39643020629883, "epoch": 2.557894736842105, "grad_norm": 5.5, "kl": 0.47956685572862623, "learning_rate": 3.7218045112781957e-06, "loss": 0.0466, "num_tokens": 104260840.0, "reward": -1.2805642530322074, "reward_std": 7.019152069091797, "rewards/get_chromagram_reward": 0.6259425520896912, "rewards/get_chromagram_reward_std": 0.10992063507437706, "rewards/get_intelligibility_reward": -4.452183805406094, "rewards/get_intelligibility_reward_std": 11.252341842651367, "rewards/get_target_len_reward": -0.015451249293982982, "rewards/get_target_len_reward_std": 0.039071221463382245, "step": 3400 }, { "advantages": 2.962847688081638e-07, "advantages_std": 1.5682914018630982, "clip_ratio": 0.0, "completion_length": 88.70714416503907, "epoch": 2.5654135338345867, "grad_norm": 9.0, "kl": 0.36460898965597155, "learning_rate": 3.718045112781955e-06, "loss": 0.0418, "num_tokens": 104573097.0, "reward": -1.1514586597681045, "reward_std": 6.740952682495117, "rewards/get_chromagram_reward": 0.615703922510147, "rewards/get_chromagram_reward_std": 0.11887889504432678, "rewards/get_intelligibility_reward": -4.045766282081604, "rewards/get_intelligibility_reward_std": 11.008774948120116, "rewards/get_target_len_reward": -0.02431353470310569, "rewards/get_target_len_reward_std": 0.07109942696988583, "step": 3410 }, { "advantages": -2.8014185247116077e-07, "advantages_std": 1.616212785243988, "clip_ratio": 0.0, "completion_length": 87.27381057739258, "epoch": 2.5729323308270677, "grad_norm": 6.625, "kl": 0.3183483988046646, "learning_rate": 3.7142857142857146e-06, "loss": 0.0322, "num_tokens": 104880351.0, "reward": -1.5393120527267456, "reward_std": 6.820896768569947, "rewards/get_chromagram_reward": 0.6322737574577332, "rewards/get_chromagram_reward_std": 0.10846047028899193, "rewards/get_intelligibility_reward": -5.230653858184814, "rewards/get_intelligibility_reward_std": 10.88029613494873, "rewards/get_target_len_reward": -0.019555770326405764, "rewards/get_target_len_reward_std": 0.04991299286484718, "step": 3420 }, { "advantages": -1.548479039215067e-07, "advantages_std": 1.531691586971283, "clip_ratio": 0.0, "completion_length": 91.88988189697265, "epoch": 2.5804511278195488, "grad_norm": 6.3125, "kl": 0.3366027757525444, "learning_rate": 3.710526315789474e-06, "loss": 0.0371, "num_tokens": 105200296.0, "reward": -1.3896465808153153, "reward_std": 6.5613306045532225, "rewards/get_chromagram_reward": 0.6323010861873627, "rewards/get_chromagram_reward_std": 0.10928079709410668, "rewards/get_intelligibility_reward": -4.779137639701366, "rewards/get_intelligibility_reward_std": 10.427274227142334, "rewards/get_target_len_reward": -0.022102872747927903, "rewards/get_target_len_reward_std": 0.06365882325917482, "step": 3430 }, { "advantages": -1.0319053966867386e-07, "advantages_std": 1.5140344619750976, "clip_ratio": 0.0, "completion_length": 85.56488265991212, "epoch": 2.58796992481203, "grad_norm": 13.8125, "kl": 0.33588795363903046, "learning_rate": 3.7067669172932335e-06, "loss": 0.0415, "num_tokens": 105503292.0, "reward": -1.707659161090851, "reward_std": 6.968575382232666, "rewards/get_chromagram_reward": 0.6287704050540924, "rewards/get_chromagram_reward_std": 0.10842615365982056, "rewards/get_intelligibility_reward": -5.729169940948486, "rewards/get_intelligibility_reward_std": 10.97379264831543, "rewards/get_target_len_reward": -0.022577523067593575, "rewards/get_target_len_reward_std": 0.06817054338753223, "step": 3440 }, { "advantages": -7.972122446631147e-08, "advantages_std": 1.5462905287742614, "clip_ratio": 0.0, "completion_length": 84.52559661865234, "epoch": 2.5954887218045113, "grad_norm": 7.9375, "kl": 0.34193562567234037, "learning_rate": 3.7030075187969927e-06, "loss": 0.0374, "num_tokens": 105802432.0, "reward": -1.729897018149495, "reward_std": 6.543156433105469, "rewards/get_chromagram_reward": 0.6110469579696656, "rewards/get_chromagram_reward_std": 0.1152818813920021, "rewards/get_intelligibility_reward": -5.780899262428283, "rewards/get_intelligibility_reward_std": 9.966917133331298, "rewards/get_target_len_reward": -0.019838462956249713, "rewards/get_target_len_reward_std": 0.055696993321180346, "step": 3450 }, { "advantages": 4.2989850967956045e-07, "advantages_std": 1.5565645217895507, "clip_ratio": 0.0, "completion_length": 84.73154907226562, "epoch": 2.6030075187969923, "grad_norm": 6.9375, "kl": 0.34386427104473116, "learning_rate": 3.6992481203007523e-06, "loss": 0.0412, "num_tokens": 106103651.0, "reward": -1.4579800248146058, "reward_std": 6.497630643844604, "rewards/get_chromagram_reward": 0.6378639221191407, "rewards/get_chromagram_reward_std": 0.11791711077094078, "rewards/get_intelligibility_reward": -4.987514853477478, "rewards/get_intelligibility_reward_std": 10.301973390579224, "rewards/get_target_len_reward": -0.02428892171010375, "rewards/get_target_len_reward_std": 0.06848178133368492, "step": 3460 }, { "advantages": -3.051012807731013e-07, "advantages_std": 1.5158230423927308, "clip_ratio": 0.0, "completion_length": 85.61190719604492, "epoch": 2.610526315789474, "grad_norm": 9.6875, "kl": 0.308964267373085, "learning_rate": 3.6954887218045116e-06, "loss": 0.0338, "num_tokens": 106406683.0, "reward": -1.5890803162008524, "reward_std": 6.915961217880249, "rewards/get_chromagram_reward": 0.6316801130771637, "rewards/get_chromagram_reward_std": 0.11551015973091125, "rewards/get_intelligibility_reward": -5.377793747186661, "rewards/get_intelligibility_reward_std": 10.922082614898681, "rewards/get_target_len_reward": -0.02112711127847433, "rewards/get_target_len_reward_std": 0.05452207550406456, "step": 3470 }, { "advantages": -5.841255443783666e-07, "advantages_std": 1.5797499895095826, "clip_ratio": 0.0, "completion_length": 87.48631134033204, "epoch": 2.618045112781955, "grad_norm": 9.25, "kl": 0.2983222767710686, "learning_rate": 3.6917293233082708e-06, "loss": 0.0311, "num_tokens": 106714280.0, "reward": -1.632106864452362, "reward_std": 7.033047151565552, "rewards/get_chromagram_reward": 0.6137499034404754, "rewards/get_chromagram_reward_std": 0.1243077963590622, "rewards/get_intelligibility_reward": -5.490492677688598, "rewards/get_intelligibility_reward_std": 11.182052993774414, "rewards/get_target_len_reward": -0.01957751587033272, "rewards/get_target_len_reward_std": 0.054054923728108405, "step": 3480 }, { "advantages": 8.183221353874614e-08, "advantages_std": 1.6531654238700866, "clip_ratio": 0.0, "completion_length": 88.08690719604492, "epoch": 2.625563909774436, "grad_norm": 5.96875, "kl": 0.43793293833732605, "learning_rate": 3.6879699248120304e-06, "loss": 0.0516, "num_tokens": 107023943.0, "reward": -1.421302282810211, "reward_std": 6.5189769744873045, "rewards/get_chromagram_reward": 0.6069125831127167, "rewards/get_chromagram_reward_std": 0.10778507739305496, "rewards/get_intelligibility_reward": -4.85057225227356, "rewards/get_intelligibility_reward_std": 10.41775884628296, "rewards/get_target_len_reward": -0.02024686587974429, "rewards/get_target_len_reward_std": 0.058713534660637376, "step": 3490 }, { "advantages": -3.988544264643679e-07, "advantages_std": 1.544123888015747, "clip_ratio": 0.0, "completion_length": 88.00178680419921, "epoch": 2.6330827067669174, "grad_norm": 8.3125, "kl": 0.35315332412719724, "learning_rate": 3.6842105263157896e-06, "loss": 0.0406, "num_tokens": 107333844.0, "reward": -1.5424182265996933, "reward_std": 6.796130657196045, "rewards/get_chromagram_reward": 0.6076584696769715, "rewards/get_chromagram_reward_std": 0.12150803357362747, "rewards/get_intelligibility_reward": -5.2072618186473845, "rewards/get_intelligibility_reward_std": 10.616606712341309, "rewards/get_target_len_reward": -0.027651109732687473, "rewards/get_target_len_reward_std": 0.07980751022696495, "step": 3500 }, { "advantages": -3.7650268751576733e-07, "advantages_std": 1.6298677563667296, "clip_ratio": 0.0, "completion_length": 85.09285888671874, "epoch": 2.6406015037593984, "grad_norm": 8.75, "kl": 0.390830771625042, "learning_rate": 3.6804511278195493e-06, "loss": 0.0416, "num_tokens": 107635284.0, "reward": -1.6289657175540924, "reward_std": 6.689491701126099, "rewards/get_chromagram_reward": 0.6175826072692872, "rewards/get_chromagram_reward_std": 0.11210766062140465, "rewards/get_intelligibility_reward": -5.485113549232483, "rewards/get_intelligibility_reward_std": 10.506939315795899, "rewards/get_target_len_reward": -0.01936584319919348, "rewards/get_target_len_reward_std": 0.05680835526436567, "step": 3510 }, { "advantages": 8.145969658812646e-08, "advantages_std": 1.6662519693374633, "clip_ratio": 0.0, "completion_length": 88.59464492797852, "epoch": 2.64812030075188, "grad_norm": 8.1875, "kl": 0.32551622688770293, "learning_rate": 3.6766917293233085e-06, "loss": 0.0348, "num_tokens": 107946453.0, "reward": -1.5802398189902305, "reward_std": 6.981956720352173, "rewards/get_chromagram_reward": 0.6107727229595185, "rewards/get_chromagram_reward_std": 0.11069920882582665, "rewards/get_intelligibility_reward": -5.333705711364746, "rewards/get_intelligibility_reward_std": 11.025648307800292, "rewards/get_target_len_reward": -0.017786071356385946, "rewards/get_target_len_reward_std": 0.054831051267683506, "step": 3520 }, { "advantages": -3.355244807323743e-07, "advantages_std": 1.6068529963493348, "clip_ratio": 0.0, "completion_length": 90.33928756713867, "epoch": 2.655639097744361, "grad_norm": 6.34375, "kl": 0.2986632138490677, "learning_rate": 3.672932330827068e-06, "loss": 0.0347, "num_tokens": 108262351.0, "reward": -1.345194971561432, "reward_std": 6.133174467086792, "rewards/get_chromagram_reward": 0.6212732017040252, "rewards/get_chromagram_reward_std": 0.11827879324555397, "rewards/get_intelligibility_reward": -4.638137435913086, "rewards/get_intelligibility_reward_std": 9.790009784698487, "rewards/get_target_len_reward": -0.018720502220094203, "rewards/get_target_len_reward_std": 0.058218426443636416, "step": 3530 }, { "advantages": 4.344930335520303e-07, "advantages_std": 1.6811386108398438, "clip_ratio": 0.0, "completion_length": 85.16071548461915, "epoch": 2.663157894736842, "grad_norm": 9.75, "kl": 0.8134607136249542, "learning_rate": 3.6691729323308274e-06, "loss": 0.0837, "num_tokens": 108563844.0, "reward": -0.9894768297672272, "reward_std": 6.26170506477356, "rewards/get_chromagram_reward": 0.6209167063236236, "rewards/get_chromagram_reward_std": 0.118373341858387, "rewards/get_intelligibility_reward": -3.568402390182018, "rewards/get_intelligibility_reward_std": 10.210508632659913, "rewards/get_target_len_reward": -0.020944639947265385, "rewards/get_target_len_reward_std": 0.055095212161540986, "step": 3540 }, { "advantages": 3.1739472916569866e-07, "advantages_std": 1.5644019007682801, "clip_ratio": 0.0, "completion_length": 89.62500076293945, "epoch": 2.6706766917293234, "grad_norm": 9.625, "kl": 0.3473496943712234, "learning_rate": 3.665413533834587e-06, "loss": 0.0381, "num_tokens": 108877733.0, "reward": -1.0514128148555755, "reward_std": 6.714984369277954, "rewards/get_chromagram_reward": 0.6352467834949493, "rewards/get_chromagram_reward_std": 0.10565011724829673, "rewards/get_intelligibility_reward": -3.770277237892151, "rewards/get_intelligibility_reward_std": 11.006027889251708, "rewards/get_target_len_reward": -0.019207827840000392, "rewards/get_target_len_reward_std": 0.051670771278440955, "step": 3550 }, { "advantages": 7.400909538546329e-08, "advantages_std": 1.5003403663635253, "clip_ratio": 0.0, "completion_length": 86.48154830932617, "epoch": 2.6781954887218045, "grad_norm": 6.9375, "kl": 0.3223287731409073, "learning_rate": 3.6616541353383462e-06, "loss": 0.0364, "num_tokens": 109183208.0, "reward": -1.3079241871833802, "reward_std": 6.220765161514282, "rewards/get_chromagram_reward": 0.6262296378612519, "rewards/get_chromagram_reward_std": 0.11949319913983344, "rewards/get_intelligibility_reward": -4.529001545906067, "rewards/get_intelligibility_reward_std": 9.840829277038575, "rewards/get_target_len_reward": -0.021000441908836365, "rewards/get_target_len_reward_std": 0.059491405822336675, "step": 3560 }, { "advantages": -2.870957139577968e-07, "advantages_std": 1.5407105803489685, "clip_ratio": 0.0, "completion_length": 89.83333435058594, "epoch": 2.685714285714286, "grad_norm": 5.53125, "kl": 0.28521771281957625, "learning_rate": 3.657894736842106e-06, "loss": 0.0328, "num_tokens": 109497724.0, "reward": -1.5617619916796683, "reward_std": 7.22755651473999, "rewards/get_chromagram_reward": 0.6161579072475434, "rewards/get_chromagram_reward_std": 0.11087241023778915, "rewards/get_intelligibility_reward": -5.28342290520668, "rewards/get_intelligibility_reward_std": 11.369152069091797, "rewards/get_target_len_reward": -0.018020586017519234, "rewards/get_target_len_reward_std": 0.05469904895871878, "step": 3570 }, { "advantages": -5.288670777758853e-07, "advantages_std": 1.5159010410308837, "clip_ratio": 0.0, "completion_length": 86.61845397949219, "epoch": 2.693233082706767, "grad_norm": 284.0, "kl": 0.4494665414094925, "learning_rate": 3.654135338345865e-06, "loss": 0.0504, "num_tokens": 109804278.0, "reward": -1.0567859530448913, "reward_std": 6.539412784576416, "rewards/get_chromagram_reward": 0.6181407809257508, "rewards/get_chromagram_reward_std": 0.11926394701004028, "rewards/get_intelligibility_reward": -3.7641510725021363, "rewards/get_intelligibility_reward_std": 10.725274467468262, "rewards/get_target_len_reward": -0.024347435776144268, "rewards/get_target_len_reward_std": 0.07297438457608223, "step": 3580 }, { "advantages": -5.935629019404587e-08, "advantages_std": 1.6441392660140992, "clip_ratio": 0.0, "completion_length": 85.87619094848633, "epoch": 2.700751879699248, "grad_norm": 7.5, "kl": 0.3371360570192337, "learning_rate": 3.6503759398496243e-06, "loss": 0.0402, "num_tokens": 110108086.0, "reward": -1.3475211262702942, "reward_std": 7.116556644439697, "rewards/get_chromagram_reward": 0.6257963180541992, "rewards/get_chromagram_reward_std": 0.11940487399697304, "rewards/get_intelligibility_reward": -4.644616198539734, "rewards/get_intelligibility_reward_std": 11.614391803741455, "rewards/get_target_len_reward": -0.02374332509934902, "rewards/get_target_len_reward_std": 0.0675284055992961, "step": 3590 }, { "advantages": -4.221995897779607e-07, "advantages_std": 1.4993983149528503, "clip_ratio": 0.0, "completion_length": 86.1422622680664, "epoch": 2.708270676691729, "grad_norm": 8.3125, "kl": 0.3693845167756081, "learning_rate": 3.646616541353384e-06, "loss": 0.0412, "num_tokens": 110412250.0, "reward": -1.4997180208563805, "reward_std": 6.539092111587524, "rewards/get_chromagram_reward": 0.6187716603279114, "rewards/get_chromagram_reward_std": 0.11276387199759483, "rewards/get_intelligibility_reward": -5.098487496376038, "rewards/get_intelligibility_reward_std": 10.326584005355835, "rewards/get_target_len_reward": -0.01943796221166849, "rewards/get_target_len_reward_std": 0.060646931640803814, "step": 3600 }, { "advantages": 2.351900015185038e-07, "advantages_std": 1.6777186632156371, "clip_ratio": 0.0, "completion_length": 87.4303596496582, "epoch": 2.7157894736842105, "grad_norm": 6.21875, "kl": 0.3518179655075073, "learning_rate": 3.642857142857143e-06, "loss": 0.0386, "num_tokens": 110720554.0, "reward": -1.5681921809911727, "reward_std": 6.521061992645263, "rewards/get_chromagram_reward": 0.6199930787086487, "rewards/get_chromagram_reward_std": 0.10925538167357444, "rewards/get_intelligibility_reward": -5.302760636806488, "rewards/get_intelligibility_reward_std": 10.143068408966064, "rewards/get_target_len_reward": -0.02180876871570945, "rewards/get_target_len_reward_std": 0.05977758429944515, "step": 3610 }, { "advantages": -1.136213683139431e-07, "advantages_std": 1.52393981218338, "clip_ratio": 0.0, "completion_length": 88.76190490722657, "epoch": 2.7233082706766916, "grad_norm": 6.96875, "kl": 0.315978978574276, "learning_rate": 3.639097744360903e-06, "loss": 0.0328, "num_tokens": 111032110.0, "reward": -1.4126427441835403, "reward_std": 6.398370981216431, "rewards/get_chromagram_reward": 0.6212054550647735, "rewards/get_chromagram_reward_std": 0.11859307289123536, "rewards/get_intelligibility_reward": -4.838269853591919, "rewards/get_intelligibility_reward_std": 10.18034839630127, "rewards/get_target_len_reward": -0.020863546431064604, "rewards/get_target_len_reward_std": 0.046022705547511575, "step": 3620 }, { "advantages": -5.416572275152021e-07, "advantages_std": 1.5852022886276245, "clip_ratio": 0.0, "completion_length": 89.3077407836914, "epoch": 2.730827067669173, "grad_norm": 6.09375, "kl": 0.29936513900756834, "learning_rate": 3.6353383458646616e-06, "loss": 0.0333, "num_tokens": 111345776.0, "reward": -1.4010808348655701, "reward_std": 6.9346442222595215, "rewards/get_chromagram_reward": 0.6170649528503418, "rewards/get_chromagram_reward_std": 0.10936603471636772, "rewards/get_intelligibility_reward": -4.802578103542328, "rewards/get_intelligibility_reward_std": 11.13853578567505, "rewards/get_target_len_reward": -0.017729100491851568, "rewards/get_target_len_reward_std": 0.0609966017305851, "step": 3630 }, { "advantages": 1.932183759656425e-07, "advantages_std": 1.5486809968948365, "clip_ratio": 0.0, "completion_length": 86.09583435058593, "epoch": 2.738345864661654, "grad_norm": 5.90625, "kl": 0.312508887052536, "learning_rate": 3.6315789473684217e-06, "loss": 0.0356, "num_tokens": 111650023.0, "reward": -1.6857449889183045, "reward_std": 6.547386407852173, "rewards/get_chromagram_reward": 0.6232929110527039, "rewards/get_chromagram_reward_std": 0.10652627125382423, "rewards/get_intelligibility_reward": -5.662791061401367, "rewards/get_intelligibility_reward_std": 10.1871337890625, "rewards/get_target_len_reward": -0.017736470606178046, "rewards/get_target_len_reward_std": 0.053376144357025625, "step": 3640 }, { "advantages": -4.919867109265397e-07, "advantages_std": 1.5394857764244079, "clip_ratio": 0.0, "completion_length": 89.41131057739258, "epoch": 2.745864661654135, "grad_norm": 14.1875, "kl": 27.45135252028704, "learning_rate": 3.6278195488721805e-06, "loss": 2.7469, "num_tokens": 111963669.0, "reward": -1.0812234073877334, "reward_std": 6.496358489990234, "rewards/get_chromagram_reward": 0.6205441057682037, "rewards/get_chromagram_reward_std": 0.10971427038311958, "rewards/get_intelligibility_reward": -3.8447015404701235, "rewards/get_intelligibility_reward_std": 10.631364631652833, "rewards/get_target_len_reward": -0.019512721337378025, "rewards/get_target_len_reward_std": 0.04978047218173742, "step": 3650 }, { "advantages": -3.029902586604294e-08, "advantages_std": 1.5976695895195008, "clip_ratio": 0.0, "completion_length": 87.70892944335938, "epoch": 2.7533834586466166, "grad_norm": 5.75, "kl": 0.36582956910133363, "learning_rate": 3.6240601503759406e-06, "loss": 0.0395, "num_tokens": 112272815.0, "reward": -1.3024214208126068, "reward_std": 6.48867712020874, "rewards/get_chromagram_reward": 0.6302835762500762, "rewards/get_chromagram_reward_std": 0.11696906760334969, "rewards/get_intelligibility_reward": -4.515799993276596, "rewards/get_intelligibility_reward_std": 10.368803787231446, "rewards/get_target_len_reward": -0.021747537422925234, "rewards/get_target_len_reward_std": 0.0546572208404541, "step": 3660 }, { "advantages": -1.8154581198359666e-07, "advantages_std": 1.5597105026245117, "clip_ratio": 0.0, "completion_length": 82.84464569091797, "epoch": 2.7609022556390976, "grad_norm": 13.75, "kl": 0.32838622480630875, "learning_rate": 3.6203007518796994e-06, "loss": 0.0338, "num_tokens": 112568512.0, "reward": -1.7773055016994477, "reward_std": 6.957116889953613, "rewards/get_chromagram_reward": 0.6281178295612335, "rewards/get_chromagram_reward_std": 0.11852559596300125, "rewards/get_intelligibility_reward": -5.9411616563797, "rewards/get_intelligibility_reward_std": 10.83169240951538, "rewards/get_target_len_reward": -0.018872402142733335, "rewards/get_target_len_reward_std": 0.04996256493031979, "step": 3670 }, { "advantages": -3.4918390241500673e-07, "advantages_std": 1.4727519631385804, "clip_ratio": 0.0, "completion_length": 86.79047775268555, "epoch": 2.768421052631579, "grad_norm": 8.4375, "kl": 0.5991848617792129, "learning_rate": 3.6165413533834586e-06, "loss": 0.0628, "num_tokens": 112875367.0, "reward": -1.557611984014511, "reward_std": 6.61163101196289, "rewards/get_chromagram_reward": 0.6248084425926208, "rewards/get_chromagram_reward_std": 0.10072682946920394, "rewards/get_intelligibility_reward": -5.278701066970825, "rewards/get_intelligibility_reward_std": 10.406602478027343, "rewards/get_target_len_reward": -0.018943112157285214, "rewards/get_target_len_reward_std": 0.05231909994035959, "step": 3680 }, { "advantages": -3.3626953666043846e-07, "advantages_std": 1.6818018198013305, "clip_ratio": 0.0, "completion_length": 86.50059661865234, "epoch": 2.77593984962406, "grad_norm": 14.6875, "kl": 0.6916846543550491, "learning_rate": 3.6127819548872182e-06, "loss": 0.0717, "num_tokens": 113181255.0, "reward": -1.0904676795005799, "reward_std": 6.335058259963989, "rewards/get_chromagram_reward": 0.6144516110420227, "rewards/get_chromagram_reward_std": 0.10726640596985818, "rewards/get_intelligibility_reward": -3.869361972808838, "rewards/get_intelligibility_reward_std": 10.41758222579956, "rewards/get_target_len_reward": -0.016492511704564096, "rewards/get_target_len_reward_std": 0.0513612063601613, "step": 3690 }, { "advantages": -9.735427113355399e-08, "advantages_std": 1.5762090682983398, "clip_ratio": 0.0, "completion_length": 86.51607284545898, "epoch": 2.783458646616541, "grad_norm": 7.0, "kl": 0.3062734708189964, "learning_rate": 3.6090225563909775e-06, "loss": 0.03, "num_tokens": 113487230.0, "reward": -1.5626688718795776, "reward_std": 6.707300853729248, "rewards/get_chromagram_reward": 0.6272768974304199, "rewards/get_chromagram_reward_std": 0.10489500313997269, "rewards/get_intelligibility_reward": -5.295509243011475, "rewards/get_intelligibility_reward_std": 10.637312984466552, "rewards/get_target_len_reward": -0.019774046447128057, "rewards/get_target_len_reward_std": 0.049518337845802306, "step": 3700 }, { "advantages": -1.7508864047499628e-07, "advantages_std": 1.4822612404823303, "clip_ratio": 0.0, "completion_length": 85.85535888671875, "epoch": 2.7909774436090227, "grad_norm": 18.625, "kl": 0.40415765792131425, "learning_rate": 3.605263157894737e-06, "loss": 0.0434, "num_tokens": 113791003.0, "reward": -1.4261482059955597, "reward_std": 7.004040002822876, "rewards/get_chromagram_reward": 0.6247886598110199, "rewards/get_chromagram_reward_std": 0.12303193882107735, "rewards/get_intelligibility_reward": -4.874522185325622, "rewards/get_intelligibility_reward_std": 11.273698616027833, "rewards/get_target_len_reward": -0.028710635006427766, "rewards/get_target_len_reward_std": 0.09238636270165443, "step": 3710 }, { "advantages": 6.842116633265505e-07, "advantages_std": 1.5441161036491393, "clip_ratio": 0.0, "completion_length": 89.55595397949219, "epoch": 2.7984962406015037, "grad_norm": 6.0625, "kl": 0.37307691723108294, "learning_rate": 3.6015037593984963e-06, "loss": 0.0426, "num_tokens": 114105102.0, "reward": -1.4877331912517548, "reward_std": 6.525564289093017, "rewards/get_chromagram_reward": 0.6080294728279114, "rewards/get_chromagram_reward_std": 0.11591408997774125, "rewards/get_intelligibility_reward": -5.049138689041138, "rewards/get_intelligibility_reward_std": 10.337861633300781, "rewards/get_target_len_reward": -0.02209009351208806, "rewards/get_target_len_reward_std": 0.06789065115153789, "step": 3720 }, { "advantages": -6.424884020361787e-07, "advantages_std": 1.5563165068626403, "clip_ratio": 0.0, "completion_length": 87.59642944335937, "epoch": 2.806015037593985, "grad_norm": 16.125, "kl": 0.34810103923082353, "learning_rate": 3.597744360902256e-06, "loss": 0.0368, "num_tokens": 114414329.0, "reward": -1.322847494482994, "reward_std": 6.680602407455444, "rewards/get_chromagram_reward": 0.6276551485061646, "rewards/get_chromagram_reward_std": 0.10801803767681122, "rewards/get_intelligibility_reward": -4.576816880702973, "rewards/get_intelligibility_reward_std": 10.717460823059081, "rewards/get_target_len_reward": -0.019380524475127458, "rewards/get_target_len_reward_std": 0.0489049194380641, "step": 3730 }, { "advantages": 5.6972107458364e-07, "advantages_std": 1.5420011162757874, "clip_ratio": 0.0, "completion_length": 85.07916717529297, "epoch": 2.813533834586466, "grad_norm": 6.15625, "kl": 0.3048412173986435, "learning_rate": 3.593984962406015e-06, "loss": 0.0339, "num_tokens": 114716213.0, "reward": -1.4711872577667235, "reward_std": 6.391526699066162, "rewards/get_chromagram_reward": 0.6427627861499786, "rewards/get_chromagram_reward_std": 0.106219232827425, "rewards/get_intelligibility_reward": -5.0332791090011595, "rewards/get_intelligibility_reward_std": 10.067411518096923, "rewards/get_target_len_reward": -0.023045250680297613, "rewards/get_target_len_reward_std": 0.06652447283267975, "step": 3740 }, { "advantages": -7.521361560236528e-07, "advantages_std": 1.563930594921112, "clip_ratio": 0.0, "completion_length": 84.58631134033203, "epoch": 2.8210526315789473, "grad_norm": 132.0, "kl": 0.3405220597982407, "learning_rate": 3.590225563909775e-06, "loss": 0.0427, "num_tokens": 115016687.0, "reward": -1.5656429648399353, "reward_std": 6.2206337451934814, "rewards/get_chromagram_reward": 0.6178694903850556, "rewards/get_chromagram_reward_std": 0.12440430745482445, "rewards/get_intelligibility_reward": -5.284491777420044, "rewards/get_intelligibility_reward_std": 9.689728498458862, "rewards/get_target_len_reward": -0.030306239239871503, "rewards/get_target_len_reward_std": 0.0898954387754202, "step": 3750 }, { "advantages": -2.9926500104693333e-07, "advantages_std": 1.6133185148239135, "clip_ratio": 0.0, "completion_length": 88.39285812377929, "epoch": 2.8285714285714287, "grad_norm": 129.0, "kl": 0.3441634550690651, "learning_rate": 3.586466165413534e-06, "loss": 0.0407, "num_tokens": 115326967.0, "reward": -1.3336735486984252, "reward_std": 6.554736709594726, "rewards/get_chromagram_reward": 0.6175411701202392, "rewards/get_chromagram_reward_std": 0.10903427228331566, "rewards/get_intelligibility_reward": -4.596753120422363, "rewards/get_intelligibility_reward_std": 10.53911051750183, "rewards/get_target_len_reward": -0.021808473207056523, "rewards/get_target_len_reward_std": 0.07356351651251317, "step": 3760 }, { "advantages": -1.4578303080270417e-07, "advantages_std": 1.6139382362365722, "clip_ratio": 0.0, "completion_length": 88.05833511352539, "epoch": 2.8360902255639098, "grad_norm": 5.65625, "kl": 0.35888722240924836, "learning_rate": 3.5827067669172937e-06, "loss": 0.0351, "num_tokens": 115636871.0, "reward": -1.2391679644584657, "reward_std": 6.162685680389404, "rewards/get_chromagram_reward": 0.6107660055160522, "rewards/get_chromagram_reward_std": 0.1182018756866455, "rewards/get_intelligibility_reward": -4.3092587232589725, "rewards/get_intelligibility_reward_std": 9.722102928161622, "rewards/get_target_len_reward": -0.01901089083403349, "rewards/get_target_len_reward_std": 0.04649979993700981, "step": 3770 }, { "advantages": 2.9330453656939424e-07, "advantages_std": 1.7181447625160218, "clip_ratio": 0.0, "completion_length": 87.07857284545898, "epoch": 2.8436090225563913, "grad_norm": 23.625, "kl": 0.31283538192510607, "learning_rate": 3.578947368421053e-06, "loss": 0.0374, "num_tokens": 115943108.0, "reward": -1.5981365263462066, "reward_std": 6.841731357574463, "rewards/get_chromagram_reward": 0.6245369374752044, "rewards/get_chromagram_reward_std": 0.11415834277868271, "rewards/get_intelligibility_reward": -5.397093820571899, "rewards/get_intelligibility_reward_std": 10.789442348480225, "rewards/get_target_len_reward": -0.021852330211549996, "rewards/get_target_len_reward_std": 0.06459587197750807, "step": 3780 }, { "advantages": 2.242624816517491e-07, "advantages_std": 1.5829466223716735, "clip_ratio": 0.0, "completion_length": 86.8458351135254, "epoch": 2.8511278195488723, "grad_norm": 7.8125, "kl": 0.3109581500291824, "learning_rate": 3.575187969924812e-06, "loss": 0.0357, "num_tokens": 116248830.0, "reward": -1.6930224657058717, "reward_std": 7.195408773422241, "rewards/get_chromagram_reward": 0.6122824370861053, "rewards/get_chromagram_reward_std": 0.11232773438096047, "rewards/get_intelligibility_reward": -5.672005653381348, "rewards/get_intelligibility_reward_std": 11.43618803024292, "rewards/get_target_len_reward": -0.01934403767809272, "rewards/get_target_len_reward_std": 0.0535100094974041, "step": 3790 }, { "advantages": -7.351239901254303e-08, "advantages_std": 1.6079047083854676, "clip_ratio": 0.0, "completion_length": 88.32797698974609, "epoch": 2.8586466165413533, "grad_norm": 7.40625, "kl": 0.29484367817640306, "learning_rate": 3.5714285714285718e-06, "loss": 0.0367, "num_tokens": 116558589.0, "reward": -2.1900202333927155, "reward_std": 7.080200862884522, "rewards/get_chromagram_reward": 0.6280510127544403, "rewards/get_chromagram_reward_std": 0.12109795212745667, "rewards/get_intelligibility_reward": -7.167077040672302, "rewards/get_intelligibility_reward_std": 10.389463520050048, "rewards/get_target_len_reward": -0.031034017261117697, "rewards/get_target_len_reward_std": 0.105182571336627, "step": 3800 }, { "advantages": -4.465381451268513e-07, "advantages_std": 1.5559608340263367, "clip_ratio": 0.0, "completion_length": 88.14107284545898, "epoch": 2.8661654135338344, "grad_norm": 6.71875, "kl": 0.3654972165822983, "learning_rate": 3.567669172932331e-06, "loss": 0.037, "num_tokens": 116868580.0, "reward": -1.5869385808706284, "reward_std": 6.509708309173584, "rewards/get_chromagram_reward": 0.6265568256378173, "rewards/get_chromagram_reward_std": 0.11053410097956658, "rewards/get_intelligibility_reward": -5.366451478004455, "rewards/get_intelligibility_reward_std": 10.103280210494995, "rewards/get_target_len_reward": -0.020920742489397525, "rewards/get_target_len_reward_std": 0.052569540590047835, "step": 3810 }, { "advantages": -2.2649765583082626e-07, "advantages_std": 1.6033626556396485, "clip_ratio": 0.0, "completion_length": 86.40774002075196, "epoch": 2.873684210526316, "grad_norm": 6.6875, "kl": 0.34228152930736544, "learning_rate": 3.5639097744360906e-06, "loss": 0.0368, "num_tokens": 117173648.0, "reward": -1.5541153252124786, "reward_std": 6.84669189453125, "rewards/get_chromagram_reward": 0.6283958792686463, "rewards/get_chromagram_reward_std": 0.11310142204165459, "rewards/get_intelligibility_reward": -5.272970819473267, "rewards/get_intelligibility_reward_std": 10.797915077209472, "rewards/get_target_len_reward": -0.01777073973789811, "rewards/get_target_len_reward_std": 0.04794793035835028, "step": 3820 }, { "advantages": 2.4760762897813036e-07, "advantages_std": 1.576991081237793, "clip_ratio": 0.0, "completion_length": 88.41488189697266, "epoch": 2.881203007518797, "grad_norm": 12.8125, "kl": 0.3005476787686348, "learning_rate": 3.56015037593985e-06, "loss": 0.0305, "num_tokens": 117485079.0, "reward": -1.2031879782676698, "reward_std": 6.401948547363281, "rewards/get_chromagram_reward": 0.6222064554691314, "rewards/get_chromagram_reward_std": 0.11551795303821563, "rewards/get_intelligibility_reward": -4.2140885353088375, "rewards/get_intelligibility_reward_std": 10.40561122894287, "rewards/get_target_len_reward": -0.017681639175862074, "rewards/get_target_len_reward_std": 0.043648559413850305, "step": 3830 }, { "advantages": -4.94718563004426e-07, "advantages_std": 1.6091697573661805, "clip_ratio": 0.0, "completion_length": 87.98154983520507, "epoch": 2.8887218045112784, "grad_norm": 7.125, "kl": 0.4117413073778152, "learning_rate": 3.5563909774436095e-06, "loss": 0.0405, "num_tokens": 117794729.0, "reward": -1.359292358160019, "reward_std": 6.418743133544922, "rewards/get_chromagram_reward": 0.6351268887519836, "rewards/get_chromagram_reward_std": 0.11723964139819146, "rewards/get_intelligibility_reward": -4.6920299410820006, "rewards/get_intelligibility_reward_std": 10.142711639404297, "rewards/get_target_len_reward": -0.02097375998273492, "rewards/get_target_len_reward_std": 0.05770694185048342, "step": 3840 }, { "advantages": -3.7675103357059927e-07, "advantages_std": 1.6376892924308777, "clip_ratio": 0.0, "completion_length": 90.702978515625, "epoch": 2.8962406015037594, "grad_norm": 59.0, "kl": 0.3368965730071068, "learning_rate": 3.5526315789473687e-06, "loss": 0.0362, "num_tokens": 118111543.0, "reward": -1.4667087614536285, "reward_std": 6.767246961593628, "rewards/get_chromagram_reward": 0.6119817018508911, "rewards/get_chromagram_reward_std": 0.11939271688461303, "rewards/get_intelligibility_reward": -4.9941377401351925, "rewards/get_intelligibility_reward_std": 10.768574285507203, "rewards/get_target_len_reward": -0.017970026470720768, "rewards/get_target_len_reward_std": 0.04830687399953604, "step": 3850 }, { "advantages": 3.47942140166424e-07, "advantages_std": 1.5706040978431701, "clip_ratio": 0.0, "completion_length": 84.92738189697266, "epoch": 2.9037593984962404, "grad_norm": 39.0, "kl": 0.36917597651481626, "learning_rate": 3.5488721804511284e-06, "loss": 0.0396, "num_tokens": 118412535.0, "reward": -1.5638644456863404, "reward_std": 6.324653100967407, "rewards/get_chromagram_reward": 0.6199922919273376, "rewards/get_chromagram_reward_std": 0.11465816274285316, "rewards/get_intelligibility_reward": -5.289899444580078, "rewards/get_intelligibility_reward_std": 9.817295694351197, "rewards/get_target_len_reward": -0.021685893088579176, "rewards/get_target_len_reward_std": 0.06315642818808556, "step": 3860 }, { "advantages": 5.0415576424711614e-08, "advantages_std": 1.6583212971687318, "clip_ratio": 0.0, "completion_length": 83.9226203918457, "epoch": 2.911278195488722, "grad_norm": 13.25, "kl": 0.3216040194034576, "learning_rate": 3.5451127819548876e-06, "loss": 0.0366, "num_tokens": 118710697.0, "reward": -1.5152422875165938, "reward_std": 6.521786212921143, "rewards/get_chromagram_reward": 0.6143280088901519, "rewards/get_chromagram_reward_std": 0.11242129802703857, "rewards/get_intelligibility_reward": -5.136507201194763, "rewards/get_intelligibility_reward_std": 10.324334335327148, "rewards/get_target_len_reward": -0.023547363001853228, "rewards/get_target_len_reward_std": 0.06723555289208889, "step": 3870 }, { "advantages": 2.756714764018398e-07, "advantages_std": 1.6054322957992553, "clip_ratio": 0.0, "completion_length": 84.46131134033203, "epoch": 2.918796992481203, "grad_norm": 7.125, "kl": 1.9177187487483025, "learning_rate": 3.541353383458647e-06, "loss": 0.195, "num_tokens": 119010444.0, "reward": -1.537421664595604, "reward_std": 6.536387872695923, "rewards/get_chromagram_reward": 0.6246936738491058, "rewards/get_chromagram_reward_std": 0.120548328012228, "rewards/get_intelligibility_reward": -5.211082994937897, "rewards/get_intelligibility_reward_std": 10.240724563598633, "rewards/get_target_len_reward": -0.025875354651361704, "rewards/get_target_len_reward_std": 0.07914061769843102, "step": 3880 }, { "advantages": -1.1523565177640193e-07, "advantages_std": 1.6115208506584167, "clip_ratio": 0.0, "completion_length": 83.78690567016602, "epoch": 2.9263157894736844, "grad_norm": 15.625, "kl": 0.4440599873661995, "learning_rate": 3.5375939849624065e-06, "loss": 0.0492, "num_tokens": 119308399.0, "reward": -1.7521640941500665, "reward_std": 6.507619380950928, "rewards/get_chromagram_reward": 0.6227650046348572, "rewards/get_chromagram_reward_std": 0.10821129679679871, "rewards/get_intelligibility_reward": -5.861425828933716, "rewards/get_intelligibility_reward_std": 9.89284119606018, "rewards/get_target_len_reward": -0.017831097729504107, "rewards/get_target_len_reward_std": 0.051672331802546975, "step": 3890 }, { "advantages": -1.3858080052386868e-07, "advantages_std": 1.5852330923080444, "clip_ratio": 0.0, "completion_length": 83.91012115478516, "epoch": 2.9338345864661655, "grad_norm": 8.75, "kl": 0.40669423937797544, "learning_rate": 3.5338345864661657e-06, "loss": 0.0424, "num_tokens": 119607390.0, "reward": -1.5483486637473107, "reward_std": 6.5325416088104244, "rewards/get_chromagram_reward": 0.6435725927352905, "rewards/get_chromagram_reward_std": 0.11586638018488885, "rewards/get_intelligibility_reward": -5.266064453125, "rewards/get_intelligibility_reward_std": 10.214018297195434, "rewards/get_target_len_reward": -0.022553973458707334, "rewards/get_target_len_reward_std": 0.05295693334192038, "step": 3900 }, { "advantages": 1.5149518617363357e-08, "advantages_std": 1.4693358659744262, "clip_ratio": 0.0, "completion_length": 83.45654907226563, "epoch": 2.9413533834586465, "grad_norm": 6.5, "kl": 0.33210055381059644, "learning_rate": 3.5300751879699253e-06, "loss": 0.0382, "num_tokens": 119905127.0, "reward": -1.5625251412391663, "reward_std": 6.870270299911499, "rewards/get_chromagram_reward": 0.6261168956756592, "rewards/get_chromagram_reward_std": 0.12031473070383072, "rewards/get_intelligibility_reward": -5.286927700042725, "rewards/get_intelligibility_reward_std": 10.942294502258301, "rewards/get_target_len_reward": -0.02676438381895423, "rewards/get_target_len_reward_std": 0.07906926460564137, "step": 3910 }, { "advantages": 4.013379450995558e-07, "advantages_std": 1.5395816683769226, "clip_ratio": 0.0, "completion_length": 82.18333587646484, "epoch": 2.948872180451128, "grad_norm": 49.25, "kl": 0.341029454767704, "learning_rate": 3.5263157894736846e-06, "loss": 0.0413, "num_tokens": 120199128.0, "reward": -1.3827453568577766, "reward_std": 6.446432161331177, "rewards/get_chromagram_reward": 0.641999465227127, "rewards/get_chromagram_reward_std": 0.12880906984210014, "rewards/get_intelligibility_reward": -4.763112473487854, "rewards/get_intelligibility_reward_std": 10.24520902633667, "rewards/get_target_len_reward": -0.02712297812104225, "rewards/get_target_len_reward_std": 0.06887171734124423, "step": 3920 }, { "advantages": -1.703699510358092e-07, "advantages_std": 1.6273478150367737, "clip_ratio": 0.0, "completion_length": 88.26131057739258, "epoch": 2.956390977443609, "grad_norm": 6.46875, "kl": 0.6627166286110878, "learning_rate": 3.522556390977444e-06, "loss": 0.0655, "num_tokens": 120509668.0, "reward": -1.0766555294394493, "reward_std": 6.545140647888184, "rewards/get_chromagram_reward": 0.6167593955993652, "rewards/get_chromagram_reward_std": 0.10943646654486656, "rewards/get_intelligibility_reward": -3.826186215877533, "rewards/get_intelligibility_reward_std": 10.702708339691162, "rewards/get_target_len_reward": -0.020539425686001776, "rewards/get_target_len_reward_std": 0.05624462254345417, "step": 3930 }, { "advantages": -2.739330188461508e-07, "advantages_std": 1.5973445296287536, "clip_ratio": 0.0, "completion_length": 88.50952606201172, "epoch": 2.9639097744360905, "grad_norm": 6.0, "kl": 0.49120003134012225, "learning_rate": 3.5187969924812034e-06, "loss": 0.0537, "num_tokens": 120820132.0, "reward": -1.2331419989466668, "reward_std": 6.877620697021484, "rewards/get_chromagram_reward": 0.6266380608081817, "rewards/get_chromagram_reward_std": 0.11246325150132179, "rewards/get_intelligibility_reward": -4.305523836612702, "rewards/get_intelligibility_reward_std": 11.225216484069824, "rewards/get_target_len_reward": -0.020539984665811063, "rewards/get_target_len_reward_std": 0.05534586645662785, "step": 3940 }, { "advantages": 3.228584937176038e-07, "advantages_std": 1.466474747657776, "clip_ratio": 0.0, "completion_length": 90.61488189697266, "epoch": 2.9714285714285715, "grad_norm": 4.96875, "kl": 0.28244465589523315, "learning_rate": 3.515037593984963e-06, "loss": 0.0314, "num_tokens": 121137356.0, "reward": -1.2253394410014153, "reward_std": 6.3654192924499515, "rewards/get_chromagram_reward": 0.6074323713779449, "rewards/get_chromagram_reward_std": 0.1043780043721199, "rewards/get_intelligibility_reward": -4.267912495136261, "rewards/get_intelligibility_reward_std": 10.203703880310059, "rewards/get_target_len_reward": -0.015537928231060505, "rewards/get_target_len_reward_std": 0.054243368841707704, "step": 3950 }, { "advantages": 3.1106175413242454e-07, "advantages_std": 1.6410012602806092, "clip_ratio": 0.0, "completion_length": 87.02619171142578, "epoch": 2.9789473684210526, "grad_norm": 50.25, "kl": 0.3348252400755882, "learning_rate": 3.511278195488722e-06, "loss": 0.0372, "num_tokens": 121444121.0, "reward": -1.5893162369728089, "reward_std": 6.552996873855591, "rewards/get_chromagram_reward": 0.6217017948627472, "rewards/get_chromagram_reward_std": 0.12044238150119782, "rewards/get_intelligibility_reward": -5.368297362327576, "rewards/get_intelligibility_reward_std": 10.290545845031739, "rewards/get_target_len_reward": -0.021352790016680955, "rewards/get_target_len_reward_std": 0.05885081067681312, "step": 3960 }, { "advantages": -1.2516975207432778e-07, "advantages_std": 1.56015487909317, "clip_ratio": 0.0, "completion_length": 83.93392944335938, "epoch": 2.9864661654135336, "grad_norm": 8.875, "kl": 0.49049554467201234, "learning_rate": 3.507518796992482e-06, "loss": 0.0536, "num_tokens": 121743241.0, "reward": -1.6406149506568908, "reward_std": 6.627180910110473, "rewards/get_chromagram_reward": 0.6197909057140351, "rewards/get_chromagram_reward_std": 0.1163574256002903, "rewards/get_intelligibility_reward": -5.520972895622253, "rewards/get_intelligibility_reward_std": 10.391157054901123, "rewards/get_target_len_reward": -0.020662406273186208, "rewards/get_target_len_reward_std": 0.05791729502379894, "step": 3970 }, { "advantages": -1.0728836059570313e-07, "advantages_std": 1.7315122485160828, "clip_ratio": 0.0, "completion_length": 87.04404830932617, "epoch": 2.993984962406015, "grad_norm": 5.9375, "kl": 0.33801840990781784, "learning_rate": 3.5037593984962407e-06, "loss": 0.0378, "num_tokens": 122050238.0, "reward": -1.2605494730174542, "reward_std": 6.6820306301116945, "rewards/get_chromagram_reward": 0.6320303499698638, "rewards/get_chromagram_reward_std": 0.106711595505476, "rewards/get_intelligibility_reward": -4.391807705163956, "rewards/get_intelligibility_reward_std": 10.786450862884521, "rewards/get_target_len_reward": -0.021870699431747197, "rewards/get_target_len_reward_std": 0.06427764222025871, "step": 3980 }, { "advantages": -3.352760788999376e-08, "advantages_std": 1.570639932155609, "clip_ratio": 0.0, "completion_length": 86.67654876708984, "epoch": 3.0022556390977444, "grad_norm": 6.625, "kl": 0.3771525263786316, "learning_rate": 3.5e-06, "loss": 0.0436, "num_tokens": 122356745.0, "reward": -1.5819456607103348, "reward_std": 6.953858041763306, "rewards/get_chromagram_reward": 0.6200494110584259, "rewards/get_chromagram_reward_std": 0.10788874998688698, "rewards/get_intelligibility_reward": -5.346407115459442, "rewards/get_intelligibility_reward_std": 11.080866146087647, "rewards/get_target_len_reward": -0.01947901090607047, "rewards/get_target_len_reward_std": 0.06660321317613124, "step": 3990 }, { "advantages": -7.202227827463049e-08, "advantages_std": 1.568999421596527, "clip_ratio": 0.0, "completion_length": 84.76785888671876, "epoch": 3.0097744360902254, "grad_norm": 5.65625, "kl": 0.28501827269792557, "learning_rate": 3.4962406015037596e-06, "loss": 0.0331, "num_tokens": 122657611.0, "reward": -1.4566645920276642, "reward_std": 6.35735330581665, "rewards/get_chromagram_reward": 0.6238480865955353, "rewards/get_chromagram_reward_std": 0.11462956219911576, "rewards/get_intelligibility_reward": -4.972574901580811, "rewards/get_intelligibility_reward_std": 10.054164218902589, "rewards/get_target_len_reward": -0.021266722306609153, "rewards/get_target_len_reward_std": 0.06082735937088728, "step": 4000 }, { "advantages": 2.60521970929517e-07, "advantages_std": 1.5789404511451721, "clip_ratio": 0.0, "completion_length": 87.2101203918457, "epoch": 3.017293233082707, "grad_norm": 4.84375, "kl": 0.29096812158823016, "learning_rate": 3.492481203007519e-06, "loss": 0.0343, "num_tokens": 122965654.0, "reward": -1.1574354212731124, "reward_std": 6.544857168197632, "rewards/get_chromagram_reward": 0.6343218684196472, "rewards/get_chromagram_reward_std": 0.12248421981930732, "rewards/get_intelligibility_reward": -4.086401665210724, "rewards/get_intelligibility_reward_std": 10.68290309906006, "rewards/get_target_len_reward": -0.020226311590522527, "rewards/get_target_len_reward_std": 0.060124521143734455, "step": 4010 }, { "advantages": 6.233653380149917e-08, "advantages_std": 1.6112825989723205, "clip_ratio": 0.0, "completion_length": 90.42916870117188, "epoch": 3.024812030075188, "grad_norm": 6.40625, "kl": 0.5243023946881294, "learning_rate": 3.4887218045112785e-06, "loss": 0.0519, "num_tokens": 123282490.0, "reward": -1.1066797733306886, "reward_std": 6.21809573173523, "rewards/get_chromagram_reward": 0.6188024818897248, "rewards/get_chromagram_reward_std": 0.10658950209617615, "rewards/get_intelligibility_reward": -3.921287989616394, "rewards/get_intelligibility_reward_std": 10.159177494049072, "rewards/get_target_len_reward": -0.017553656082600354, "rewards/get_target_len_reward_std": 0.04201601464301348, "step": 4020 }, { "advantages": -1.4801820782395226e-07, "advantages_std": 1.5155102372169496, "clip_ratio": 0.0, "completion_length": 83.6482162475586, "epoch": 3.032330827067669, "grad_norm": 7.03125, "kl": 0.2800201430916786, "learning_rate": 3.4849624060150377e-06, "loss": 0.0295, "num_tokens": 123579919.0, "reward": -1.9094644904136657, "reward_std": 6.683042049407959, "rewards/get_chromagram_reward": 0.6124842643737793, "rewards/get_chromagram_reward_std": 0.10707958713173867, "rewards/get_intelligibility_reward": -6.324036312103272, "rewards/get_intelligibility_reward_std": 10.17835292816162, "rewards/get_target_len_reward": -0.016841100715100765, "rewards/get_target_len_reward_std": 0.05463197343051433, "step": 4030 }, { "advantages": -3.23355206965914e-07, "advantages_std": 1.685700011253357, "clip_ratio": 0.0, "completion_length": 87.7982162475586, "epoch": 3.0398496240601505, "grad_norm": 7.03125, "kl": 0.33719453066587446, "learning_rate": 3.4812030075187973e-06, "loss": 0.0335, "num_tokens": 123889945.0, "reward": -1.158366894721985, "reward_std": 6.468680953979492, "rewards/get_chromagram_reward": 0.6212324619293212, "rewards/get_chromagram_reward_std": 0.10868031159043312, "rewards/get_intelligibility_reward": -4.077363419532776, "rewards/get_intelligibility_reward_std": 10.567723560333253, "rewards/get_target_len_reward": -0.018969547282904387, "rewards/get_target_len_reward_std": 0.055462539196014404, "step": 4040 }, { "advantages": -3.690520955501597e-07, "advantages_std": 1.6781371116638184, "clip_ratio": 0.0, "completion_length": 87.94702606201172, "epoch": 3.0473684210526315, "grad_norm": 12.4375, "kl": 0.35713528394699096, "learning_rate": 3.4774436090225565e-06, "loss": 0.0418, "num_tokens": 124199361.0, "reward": -1.5527551651000977, "reward_std": 6.463404417037964, "rewards/get_chromagram_reward": 0.6307406187057495, "rewards/get_chromagram_reward_std": 0.12030550241470336, "rewards/get_intelligibility_reward": -5.258705353736877, "rewards/get_intelligibility_reward_std": 10.130701637268066, "rewards/get_target_len_reward": -0.03030052110552788, "rewards/get_target_len_reward_std": 0.09487388208508492, "step": 4050 }, { "advantages": -8.524705986445724e-07, "advantages_std": 1.6060773849487304, "clip_ratio": 0.0, "completion_length": 85.1053581237793, "epoch": 3.054887218045113, "grad_norm": 240.0, "kl": 0.36646874248981476, "learning_rate": 3.473684210526316e-06, "loss": 0.0379, "num_tokens": 124500506.0, "reward": -1.5579203933477401, "reward_std": 6.218460988998413, "rewards/get_chromagram_reward": 0.6300382137298584, "rewards/get_chromagram_reward_std": 0.11434244513511657, "rewards/get_intelligibility_reward": -5.2850764155387875, "rewards/get_intelligibility_reward_std": 9.650420475006104, "rewards/get_target_len_reward": -0.018722762074321508, "rewards/get_target_len_reward_std": 0.060862186923623086, "step": 4060 }, { "advantages": 3.8469832190912713e-07, "advantages_std": 1.6515644788742065, "clip_ratio": 0.0, "completion_length": 87.67440567016601, "epoch": 3.062406015037594, "grad_norm": 7.625, "kl": 1.3306469723582268, "learning_rate": 3.4699248120300754e-06, "loss": 0.1346, "num_tokens": 124809554.0, "reward": -1.245470690727234, "reward_std": 6.5784827709198, "rewards/get_chromagram_reward": 0.617160576581955, "rewards/get_chromagram_reward_std": 0.12427505478262901, "rewards/get_intelligibility_reward": -4.331724762916565, "rewards/get_intelligibility_reward_std": 10.69231686592102, "rewards/get_target_len_reward": -0.02184757627546787, "rewards/get_target_len_reward_std": 0.058031280897557734, "step": 4070 }, { "advantages": 6.439785323664182e-07, "advantages_std": 1.5893252611160278, "clip_ratio": 0.0, "completion_length": 85.86964416503906, "epoch": 3.069924812030075, "grad_norm": 7.1875, "kl": 0.33594403713941573, "learning_rate": 3.4661654135338346e-06, "loss": 0.0411, "num_tokens": 125113679.0, "reward": -1.4050700664520264, "reward_std": 6.404324150085449, "rewards/get_chromagram_reward": 0.6259469866752625, "rewards/get_chromagram_reward_std": 0.12025773078203202, "rewards/get_intelligibility_reward": -4.814673900604248, "rewards/get_intelligibility_reward_std": 10.243736839294433, "rewards/get_target_len_reward": -0.026482987217605114, "rewards/get_target_len_reward_std": 0.09377836473286152, "step": 4080 }, { "advantages": -9.785095542724775e-08, "advantages_std": 1.579916250705719, "clip_ratio": 0.0, "completion_length": 88.43095474243164, "epoch": 3.0774436090225565, "grad_norm": 10.875, "kl": 0.34741307944059374, "learning_rate": 3.4624060150375943e-06, "loss": 0.0371, "num_tokens": 125425440.0, "reward": -1.3672380074858665, "reward_std": 6.848063945770264, "rewards/get_chromagram_reward": 0.6082988262176514, "rewards/get_chromagram_reward_std": 0.11984210386872292, "rewards/get_intelligibility_reward": -4.689711439609527, "rewards/get_intelligibility_reward_std": 11.027862167358398, "rewards/get_target_len_reward": -0.020301106479018928, "rewards/get_target_len_reward_std": 0.06079982779920101, "step": 4090 }, { "advantages": 4.721184776457221e-07, "advantages_std": 1.558591866493225, "clip_ratio": 0.0, "completion_length": 88.94464416503907, "epoch": 3.0849624060150376, "grad_norm": 5.875, "kl": 0.2803475186228752, "learning_rate": 3.4586466165413535e-06, "loss": 0.0303, "num_tokens": 125737918.0, "reward": -1.2318539798259736, "reward_std": 6.5522034645080565, "rewards/get_chromagram_reward": 0.6231365621089935, "rewards/get_chromagram_reward_std": 0.11571791395545006, "rewards/get_intelligibility_reward": -4.298397623747587, "rewards/get_intelligibility_reward_std": 10.569544792175293, "rewards/get_target_len_reward": -0.020300750527530907, "rewards/get_target_len_reward_std": 0.04793478585779667, "step": 4100 }, { "advantages": -1.5075007979703515e-07, "advantages_std": 1.5395202040672302, "clip_ratio": 0.0, "completion_length": 87.48690643310547, "epoch": 3.0924812030075186, "grad_norm": 5.28125, "kl": 1.836045852303505, "learning_rate": 3.454887218045113e-06, "loss": 0.1841, "num_tokens": 126045904.0, "reward": -1.3981635391712188, "reward_std": 6.713280916213989, "rewards/get_chromagram_reward": 0.6141719341278076, "rewards/get_chromagram_reward_std": 0.11686758324503899, "rewards/get_intelligibility_reward": -4.790717744827271, "rewards/get_intelligibility_reward_std": 10.80199375152588, "rewards/get_target_len_reward": -0.017944508977234364, "rewards/get_target_len_reward_std": 0.04508624579757452, "step": 4110 }, { "advantages": 5.513429854886454e-08, "advantages_std": 1.6173288822174072, "clip_ratio": 0.0, "completion_length": 87.71904983520508, "epoch": 3.1, "grad_norm": 5.4375, "kl": 0.2904201149940491, "learning_rate": 3.4511278195488724e-06, "loss": 0.0313, "num_tokens": 126355656.0, "reward": -1.0718179211020469, "reward_std": 6.851891374588012, "rewards/get_chromagram_reward": 0.6362843096256257, "rewards/get_chromagram_reward_std": 0.11849569082260132, "rewards/get_intelligibility_reward": -3.8306349754333495, "rewards/get_intelligibility_reward_std": 11.316781330108643, "rewards/get_target_len_reward": -0.021102873608469963, "rewards/get_target_len_reward_std": 0.05865535549819469, "step": 4120 }, { "advantages": -2.980231315063975e-08, "advantages_std": 1.531466042995453, "clip_ratio": 0.0, "completion_length": 89.23035888671875, "epoch": 3.107518796992481, "grad_norm": 6.6875, "kl": 0.34578198492527007, "learning_rate": 3.447368421052632e-06, "loss": 0.0366, "num_tokens": 126668631.0, "reward": -1.4846302151679993, "reward_std": 6.385497617721557, "rewards/get_chromagram_reward": 0.6100890100002289, "rewards/get_chromagram_reward_std": 0.11665020361542702, "rewards/get_intelligibility_reward": -5.043470954895019, "rewards/get_intelligibility_reward_std": 10.072954416275024, "rewards/get_target_len_reward": -0.020508491061627866, "rewards/get_target_len_reward_std": 0.05244751274585724, "step": 4130 }, { "advantages": 3.315510070933669e-08, "advantages_std": 1.5963869452476502, "clip_ratio": 0.0, "completion_length": 85.0803596496582, "epoch": 3.1150375939849626, "grad_norm": 9.6875, "kl": 0.3159720331430435, "learning_rate": 3.4436090225563912e-06, "loss": 0.0325, "num_tokens": 126970315.0, "reward": -1.7319731652736663, "reward_std": 7.204164934158325, "rewards/get_chromagram_reward": 0.6253066539764405, "rewards/get_chromagram_reward_std": 0.11562438681721687, "rewards/get_intelligibility_reward": -5.801904332637787, "rewards/get_intelligibility_reward_std": 11.298967266082764, "rewards/get_target_len_reward": -0.01932156188413501, "rewards/get_target_len_reward_std": 0.05420879852026701, "step": 4140 }, { "advantages": 6.544093583471522e-07, "advantages_std": 1.646132493019104, "clip_ratio": 0.0, "completion_length": 88.97619247436523, "epoch": 3.1225563909774436, "grad_norm": 6.9375, "kl": 0.3055782064795494, "learning_rate": 3.439849624060151e-06, "loss": 0.0408, "num_tokens": 127282894.0, "reward": -1.6036958336830138, "reward_std": 6.741269922256469, "rewards/get_chromagram_reward": 0.6201092064380646, "rewards/get_chromagram_reward_std": 0.11851846948266029, "rewards/get_intelligibility_reward": -5.403568816184998, "rewards/get_intelligibility_reward_std": 10.67822847366333, "rewards/get_target_len_reward": -0.027627493347972633, "rewards/get_target_len_reward_std": 0.08369314391165972, "step": 4150 }, { "advantages": 1.5820066820992906e-07, "advantages_std": 1.673720395565033, "clip_ratio": 0.0, "completion_length": 83.95952682495117, "epoch": 3.1300751879699247, "grad_norm": 712.0, "kl": 0.35793513655662534, "learning_rate": 3.43609022556391e-06, "loss": 0.0385, "num_tokens": 127581882.0, "reward": -1.3141912584193052, "reward_std": 6.152334928512573, "rewards/get_chromagram_reward": 0.6244514942169189, "rewards/get_chromagram_reward_std": 0.13061213493347168, "rewards/get_intelligibility_reward": -4.543868839740753, "rewards/get_intelligibility_reward_std": 9.792318058013915, "rewards/get_target_len_reward": -0.02315631527453661, "rewards/get_target_len_reward_std": 0.06220987867563963, "step": 4160 }, { "advantages": 4.592041378259637e-07, "advantages_std": 1.6728333115577698, "clip_ratio": 0.0, "completion_length": 88.5232162475586, "epoch": 3.137593984962406, "grad_norm": 6.71875, "kl": 0.29836671203374865, "learning_rate": 3.4323308270676693e-06, "loss": 0.0321, "num_tokens": 127893670.0, "reward": -1.011973148584366, "reward_std": 6.23221607208252, "rewards/get_chromagram_reward": 0.6238932073116302, "rewards/get_chromagram_reward_std": 0.10449873432517051, "rewards/get_intelligibility_reward": -3.6372927367687224, "rewards/get_intelligibility_reward_std": 10.114261817932128, "rewards/get_target_len_reward": -0.022519584652036427, "rewards/get_target_len_reward_std": 0.06431333236396312, "step": 4170 }, { "advantages": -4.6566130720293584e-07, "advantages_std": 1.4346014618873597, "clip_ratio": 0.0, "completion_length": 82.23154907226562, "epoch": 3.145112781954887, "grad_norm": 10.1875, "kl": 0.3649785041809082, "learning_rate": 3.428571428571429e-06, "loss": 0.0445, "num_tokens": 128187560.0, "reward": -1.5847628176212312, "reward_std": 6.996919250488281, "rewards/get_chromagram_reward": 0.6297712743282318, "rewards/get_chromagram_reward_std": 0.12266267240047454, "rewards/get_intelligibility_reward": -5.356306481361389, "rewards/get_intelligibility_reward_std": 11.103976488113403, "rewards/get_target_len_reward": -0.0277529826387763, "rewards/get_target_len_reward_std": 0.08649206086993218, "step": 4180 }, { "advantages": 1.5919408538067615e-07, "advantages_std": 1.5202927470207215, "clip_ratio": 0.0, "completion_length": 86.965478515625, "epoch": 3.1526315789473682, "grad_norm": 12.75, "kl": 0.3561545431613922, "learning_rate": 3.424812030075188e-06, "loss": 0.0376, "num_tokens": 128494265.0, "reward": -1.5775128185749054, "reward_std": 7.364692258834839, "rewards/get_chromagram_reward": 0.6272209942340851, "rewards/get_chromagram_reward_std": 0.11569681465625763, "rewards/get_intelligibility_reward": -5.338159966468811, "rewards/get_intelligibility_reward_std": 11.840453338623046, "rewards/get_target_len_reward": -0.021599231753498316, "rewards/get_target_len_reward_std": 0.06570550277829171, "step": 4190 }, { "advantages": 1.7397105858130147e-07, "advantages_std": 1.581853848695755, "clip_ratio": 0.0, "completion_length": 83.82678680419922, "epoch": 3.1601503759398497, "grad_norm": 6.59375, "kl": 0.48840090334415437, "learning_rate": 3.421052631578948e-06, "loss": 0.0553, "num_tokens": 128792206.0, "reward": -1.8744078114628793, "reward_std": 6.874015951156617, "rewards/get_chromagram_reward": 0.6129431843757629, "rewards/get_chromagram_reward_std": 0.11338778585195541, "rewards/get_intelligibility_reward": -6.217310810089112, "rewards/get_intelligibility_reward_std": 10.503016376495362, "rewards/get_target_len_reward": -0.01885540150105953, "rewards/get_target_len_reward_std": 0.0585413821041584, "step": 4200 }, { "advantages": 2.3220976776983094e-07, "advantages_std": 1.6209957242012023, "clip_ratio": 0.0, "completion_length": 88.77619247436523, "epoch": 3.1676691729323307, "grad_norm": 6.0, "kl": 0.302373868227005, "learning_rate": 3.417293233082707e-06, "loss": 0.0313, "num_tokens": 129104105.0, "reward": -1.3020709201693534, "reward_std": 6.620100355148315, "rewards/get_chromagram_reward": 0.6347114503383636, "rewards/get_chromagram_reward_std": 0.11503036171197892, "rewards/get_intelligibility_reward": -4.517418801784515, "rewards/get_intelligibility_reward_std": 10.546687459945678, "rewards/get_target_len_reward": -0.023505217209458352, "rewards/get_target_len_reward_std": 0.06372208669781684, "step": 4210 }, { "advantages": 1.820425197252007e-07, "advantages_std": 1.699629533290863, "clip_ratio": 0.0, "completion_length": 87.24583587646484, "epoch": 3.1751879699248122, "grad_norm": 7.4375, "kl": 0.3359279319643974, "learning_rate": 3.4135338345864667e-06, "loss": 0.0409, "num_tokens": 129412696.0, "reward": -1.1203251257538795, "reward_std": 6.46322226524353, "rewards/get_chromagram_reward": 0.6190076887607574, "rewards/get_chromagram_reward_std": 0.11276645958423615, "rewards/get_intelligibility_reward": -3.958110880851746, "rewards/get_intelligibility_reward_std": 10.538503217697144, "rewards/get_target_len_reward": -0.021872010454535483, "rewards/get_target_len_reward_std": 0.06405184045433998, "step": 4220 }, { "advantages": -7.053217050412286e-08, "advantages_std": 1.6343294858932496, "clip_ratio": 0.0, "completion_length": 84.58809661865234, "epoch": 3.1827067669172933, "grad_norm": 6.78125, "kl": 0.27931652069091795, "learning_rate": 3.409774436090226e-06, "loss": 0.0302, "num_tokens": 129713005.0, "reward": -1.5770192325115204, "reward_std": 6.417839813232422, "rewards/get_chromagram_reward": 0.6163597881793976, "rewards/get_chromagram_reward_std": 0.11609133034944534, "rewards/get_intelligibility_reward": -5.326514279842376, "rewards/get_intelligibility_reward_std": 9.951054191589355, "rewards/get_target_len_reward": -0.020902913995087147, "rewards/get_target_len_reward_std": 0.052859509550035, "step": 4230 }, { "advantages": 8.195635814445268e-09, "advantages_std": 1.5448844194412232, "clip_ratio": 0.0, "completion_length": 85.81488189697265, "epoch": 3.1902255639097743, "grad_norm": 69.5, "kl": 0.3631991773843765, "learning_rate": 3.4060150375939856e-06, "loss": 0.0406, "num_tokens": 130016638.0, "reward": -1.707671296596527, "reward_std": 6.869535779953003, "rewards/get_chromagram_reward": 0.6243698179721833, "rewards/get_chromagram_reward_std": 0.10792898535728454, "rewards/get_intelligibility_reward": -5.727483916282654, "rewards/get_intelligibility_reward_std": 10.7578125, "rewards/get_target_len_reward": -0.019899446703493594, "rewards/get_target_len_reward_std": 0.05657290127128363, "step": 4240 }, { "advantages": -5.8983766848541565e-08, "advantages_std": 1.5598833680152893, "clip_ratio": 0.0, "completion_length": 89.1851203918457, "epoch": 3.197744360902256, "grad_norm": 6.0, "kl": 0.3095327615737915, "learning_rate": 3.4022556390977448e-06, "loss": 0.0325, "num_tokens": 130330190.0, "reward": -1.2964784324169158, "reward_std": 6.4273745059967045, "rewards/get_chromagram_reward": 0.6348487615585328, "rewards/get_chromagram_reward_std": 0.10957509577274323, "rewards/get_intelligibility_reward": -4.506310987472534, "rewards/get_intelligibility_reward_std": 10.336654472351075, "rewards/get_target_len_reward": -0.0179728452116251, "rewards/get_target_len_reward_std": 0.04697036426514387, "step": 4250 }, { "advantages": 4.770855213109826e-07, "advantages_std": 1.5714811325073241, "clip_ratio": 0.0, "completion_length": 88.26726455688477, "epoch": 3.205263157894737, "grad_norm": 25.5, "kl": 0.3470078229904175, "learning_rate": 3.3984962406015044e-06, "loss": 0.0395, "num_tokens": 130639846.0, "reward": -1.596861571073532, "reward_std": 7.196740436553955, "rewards/get_chromagram_reward": 0.6274778127670289, "rewards/get_chromagram_reward_std": 0.10798213258385658, "rewards/get_intelligibility_reward": -5.391012513637543, "rewards/get_intelligibility_reward_std": 11.393933773040771, "rewards/get_target_len_reward": -0.027049866039305925, "rewards/get_target_len_reward_std": 0.08225924111902713, "step": 4260 }, { "advantages": -8.766849290964274e-08, "advantages_std": 1.6335813045501708, "clip_ratio": 0.0, "completion_length": 87.04404907226562, "epoch": 3.212781954887218, "grad_norm": 24.5, "kl": 0.37787252515554426, "learning_rate": 3.3947368421052636e-06, "loss": 0.0501, "num_tokens": 130947324.0, "reward": -1.5003271281719208, "reward_std": 6.7568199157714846, "rewards/get_chromagram_reward": 0.6264678537845612, "rewards/get_chromagram_reward_std": 0.11311981976032257, "rewards/get_intelligibility_reward": -5.102498412132263, "rewards/get_intelligibility_reward_std": 10.762168216705323, "rewards/get_target_len_reward": -0.02495050337165594, "rewards/get_target_len_reward_std": 0.07984323929995299, "step": 4270 }, { "advantages": 4.2983642032368154e-07, "advantages_std": 1.4764443516731263, "clip_ratio": 0.0, "completion_length": 87.52797775268554, "epoch": 3.2203007518796993, "grad_norm": 7.9375, "kl": 0.27519893944263457, "learning_rate": 3.3909774436090224e-06, "loss": 0.0331, "num_tokens": 131256343.0, "reward": -1.131436224281788, "reward_std": 6.341731071472168, "rewards/get_chromagram_reward": 0.6292557060718537, "rewards/get_chromagram_reward_std": 0.11208050698041916, "rewards/get_intelligibility_reward": -4.000416457653046, "rewards/get_intelligibility_reward_std": 10.307479953765869, "rewards/get_target_len_reward": -0.023147699516266586, "rewards/get_target_len_reward_std": 0.05790396872907877, "step": 4280 }, { "advantages": 1.567105432087601e-07, "advantages_std": 1.7350687742233277, "clip_ratio": 0.0, "completion_length": 86.61488265991211, "epoch": 3.2278195488721804, "grad_norm": 27.125, "kl": 0.33322153240442276, "learning_rate": 3.387218045112782e-06, "loss": 0.0367, "num_tokens": 131562398.0, "reward": -1.4957343488931656, "reward_std": 6.397630310058593, "rewards/get_chromagram_reward": 0.6309226214885711, "rewards/get_chromagram_reward_std": 0.11377585753798485, "rewards/get_intelligibility_reward": -5.09162460565567, "rewards/get_intelligibility_reward_std": 10.076897811889648, "rewards/get_target_len_reward": -0.02650076886638999, "rewards/get_target_len_reward_std": 0.06703396700322628, "step": 4290 }, { "advantages": -1.9446015784296832e-07, "advantages_std": 1.5864139795303345, "clip_ratio": 0.0, "completion_length": 90.55357284545899, "epoch": 3.235338345864662, "grad_norm": 9.3125, "kl": 2.30724019408226, "learning_rate": 3.3834586466165413e-06, "loss": 0.2299, "num_tokens": 131879855.0, "reward": -1.2683696322143079, "reward_std": 7.173541069030762, "rewards/get_chromagram_reward": 0.6216107368469238, "rewards/get_chromagram_reward_std": 0.10671053901314735, "rewards/get_intelligibility_reward": -4.412684118747711, "rewards/get_intelligibility_reward_std": 11.600953006744385, "rewards/get_target_len_reward": -0.014035291131585836, "rewards/get_target_len_reward_std": 0.03394932132214308, "step": 4300 }, { "advantages": 9.869535868567425e-07, "advantages_std": 1.5972688794136047, "clip_ratio": 0.0, "completion_length": 87.88631134033203, "epoch": 3.242857142857143, "grad_norm": 13.3125, "kl": 0.3564337491989136, "learning_rate": 3.379699248120301e-06, "loss": 0.0376, "num_tokens": 132188535.0, "reward": -1.5792127377353609, "reward_std": 6.730674457550049, "rewards/get_chromagram_reward": 0.6139270603656769, "rewards/get_chromagram_reward_std": 0.12355838790535927, "rewards/get_intelligibility_reward": -5.331458967924118, "rewards/get_intelligibility_reward_std": 10.577913284301758, "rewards/get_target_len_reward": -0.020106138475239278, "rewards/get_target_len_reward_std": 0.0641857735812664, "step": 4310 }, { "advantages": 2.992649868360786e-07, "advantages_std": 1.4977822065353394, "clip_ratio": 0.0, "completion_length": 83.48511962890625, "epoch": 3.250375939849624, "grad_norm": 6.6875, "kl": 0.3735776156187057, "learning_rate": 3.37593984962406e-06, "loss": 0.0468, "num_tokens": 132486001.0, "reward": -1.7604040503501892, "reward_std": 6.668347644805908, "rewards/get_chromagram_reward": 0.6218710958957672, "rewards/get_chromagram_reward_std": 0.10823953151702881, "rewards/get_intelligibility_reward": -5.87978732585907, "rewards/get_intelligibility_reward_std": 10.34724760055542, "rewards/get_target_len_reward": -0.02329575140029192, "rewards/get_target_len_reward_std": 0.07086060345172882, "step": 4320 }, { "advantages": 6.556511209510063e-08, "advantages_std": 1.5302933931350708, "clip_ratio": 0.0, "completion_length": 83.34404830932617, "epoch": 3.2578947368421054, "grad_norm": 688.0, "kl": 0.4539714246988297, "learning_rate": 3.37218045112782e-06, "loss": 0.051, "num_tokens": 132782535.0, "reward": -1.3723966896533966, "reward_std": 6.291196537017822, "rewards/get_chromagram_reward": 0.6126464605331421, "rewards/get_chromagram_reward_std": 0.11417317017912865, "rewards/get_intelligibility_reward": -4.7077836990356445, "rewards/get_intelligibility_reward_std": 10.042604541778564, "rewards/get_target_len_reward": -0.022052537463605405, "rewards/get_target_len_reward_std": 0.06837241873145103, "step": 4330 }, { "advantages": 6.258488127741657e-08, "advantages_std": 1.5377010941505431, "clip_ratio": 0.0, "completion_length": 87.44643096923828, "epoch": 3.2654135338345864, "grad_norm": 5.03125, "kl": 0.31609337627887724, "learning_rate": 3.368421052631579e-06, "loss": 0.0319, "num_tokens": 133090793.0, "reward": -1.359285932779312, "reward_std": 6.470762872695923, "rewards/get_chromagram_reward": 0.6181576669216156, "rewards/get_chromagram_reward_std": 0.113900126516819, "rewards/get_intelligibility_reward": -4.6754331350326535, "rewards/get_intelligibility_reward_std": 10.384093761444092, "rewards/get_target_len_reward": -0.020582077372819186, "rewards/get_target_len_reward_std": 0.05547735020518303, "step": 4340 }, { "advantages": -6.258487887933483e-08, "advantages_std": 1.5956597447395324, "clip_ratio": 0.0, "completion_length": 88.53869247436523, "epoch": 3.272932330827068, "grad_norm": 12.3125, "kl": 0.3077188953757286, "learning_rate": 3.3646616541353387e-06, "loss": 0.035, "num_tokens": 133402151.0, "reward": -1.2024756371974945, "reward_std": 6.525686979293823, "rewards/get_chromagram_reward": 0.6273522794246673, "rewards/get_chromagram_reward_std": 0.11311362758278846, "rewards/get_intelligibility_reward": -4.21550475358963, "rewards/get_intelligibility_reward_std": 10.441200542449952, "rewards/get_target_len_reward": -0.01927430145442486, "rewards/get_target_len_reward_std": 0.05576913226395845, "step": 4350 }, { "advantages": 1.668930101228483e-07, "advantages_std": 1.5654626369476319, "clip_ratio": 0.0, "completion_length": 90.2880973815918, "epoch": 3.280451127819549, "grad_norm": 72.5, "kl": 0.30325102657079694, "learning_rate": 3.360902255639098e-06, "loss": 0.0348, "num_tokens": 133718235.0, "reward": -1.231583520770073, "reward_std": 6.6298370361328125, "rewards/get_chromagram_reward": 0.6353707134723663, "rewards/get_chromagram_reward_std": 0.10845707952976227, "rewards/get_intelligibility_reward": -4.312479996681214, "rewards/get_intelligibility_reward_std": 10.759493350982666, "rewards/get_target_len_reward": -0.01764109553769231, "rewards/get_target_len_reward_std": 0.050665826164186, "step": 4360 }, { "advantages": 4.169841787415862e-07, "advantages_std": 1.686136043071747, "clip_ratio": 0.0, "completion_length": 86.95416870117188, "epoch": 3.28796992481203, "grad_norm": 12.0625, "kl": 0.30330993682146073, "learning_rate": 3.357142857142857e-06, "loss": 0.0316, "num_tokens": 134024293.0, "reward": -1.4639351397752762, "reward_std": 6.9123969078063965, "rewards/get_chromagram_reward": 0.6185981154441833, "rewards/get_chromagram_reward_std": 0.11106212437152863, "rewards/get_intelligibility_reward": -4.990768309682608, "rewards/get_intelligibility_reward_std": 11.024269771575927, "rewards/get_target_len_reward": -0.019634839612990618, "rewards/get_target_len_reward_std": 0.05602564513683319, "step": 4370 }, { "advantages": -2.493461053632018e-07, "advantages_std": 1.4997942090034484, "clip_ratio": 0.0, "completion_length": 85.8958351135254, "epoch": 3.2954887218045115, "grad_norm": 6.75, "kl": 0.37191055417060853, "learning_rate": 3.3533834586466168e-06, "loss": 0.0448, "num_tokens": 134327979.0, "reward": -1.4511731714010239, "reward_std": 6.788499164581299, "rewards/get_chromagram_reward": 0.6098757028579712, "rewards/get_chromagram_reward_std": 0.1217208631336689, "rewards/get_intelligibility_reward": -4.94060070514679, "rewards/get_intelligibility_reward_std": 10.785972690582275, "rewards/get_target_len_reward": -0.0227941183373332, "rewards/get_target_len_reward_std": 0.07210518475621938, "step": 4380 }, { "advantages": -1.7806888479299233e-07, "advantages_std": 1.7041251063346863, "clip_ratio": 0.0, "completion_length": 83.92202529907226, "epoch": 3.3030075187969925, "grad_norm": 8.125, "kl": 0.36032059490680696, "learning_rate": 3.349624060150376e-06, "loss": 0.0346, "num_tokens": 134626620.0, "reward": -1.2390080988407135, "reward_std": 6.203968286514282, "rewards/get_chromagram_reward": 0.6310724079608917, "rewards/get_chromagram_reward_std": 0.11540384292602539, "rewards/get_intelligibility_reward": -4.326459050178528, "rewards/get_intelligibility_reward_std": 9.993881130218506, "rewards/get_target_len_reward": -0.021637386176735163, "rewards/get_target_len_reward_std": 0.05318964570760727, "step": 4390 }, { "advantages": -1.288950535638378e-07, "advantages_std": 1.6415579080581666, "clip_ratio": 0.0, "completion_length": 85.83631134033203, "epoch": 3.3105263157894735, "grad_norm": 35.75, "kl": 0.3649152874946594, "learning_rate": 3.3458646616541356e-06, "loss": 0.0388, "num_tokens": 134930223.0, "reward": -1.3480420507490636, "reward_std": 6.561384868621826, "rewards/get_chromagram_reward": 0.627709686756134, "rewards/get_chromagram_reward_std": 0.10990332737565041, "rewards/get_intelligibility_reward": -4.6510482132434845, "rewards/get_intelligibility_reward_std": 10.510717582702636, "rewards/get_target_len_reward": -0.020787397399544716, "rewards/get_target_len_reward_std": 0.05646887123584747, "step": 4400 }, { "advantages": 6.740292057827446e-07, "advantages_std": 1.5014996767044066, "clip_ratio": 0.0, "completion_length": 86.23452453613281, "epoch": 3.318045112781955, "grad_norm": 6.0, "kl": 0.3484359845519066, "learning_rate": 3.342105263157895e-06, "loss": 0.0386, "num_tokens": 135235842.0, "reward": -1.4315605893731118, "reward_std": 6.73180627822876, "rewards/get_chromagram_reward": 0.6199473381042481, "rewards/get_chromagram_reward_std": 0.11735682263970375, "rewards/get_intelligibility_reward": -4.8925375759601595, "rewards/get_intelligibility_reward_std": 10.742376804351807, "rewards/get_target_len_reward": -0.02209125757217407, "rewards/get_target_len_reward_std": 0.05951045509427786, "step": 4410 }, { "advantages": 2.495944428915209e-07, "advantages_std": 1.6141934156417848, "clip_ratio": 0.0, "completion_length": 87.96666870117187, "epoch": 3.325563909774436, "grad_norm": 6.1875, "kl": 0.29121205657720567, "learning_rate": 3.3383458646616545e-06, "loss": 0.0353, "num_tokens": 135545071.0, "reward": -1.6051747798919678, "reward_std": 6.904460906982422, "rewards/get_chromagram_reward": 0.6305911779403687, "rewards/get_chromagram_reward_std": 0.11587974280118943, "rewards/get_intelligibility_reward": -5.420707416534424, "rewards/get_intelligibility_reward_std": 10.881964015960694, "rewards/get_target_len_reward": -0.02540780254639685, "rewards/get_target_len_reward_std": 0.08899888359010219, "step": 4420 }, { "advantages": 1.688798274557257e-08, "advantages_std": 1.6381218433380127, "clip_ratio": 0.0, "completion_length": 86.28392944335937, "epoch": 3.333082706766917, "grad_norm": 11.625, "kl": 0.32549781948328016, "learning_rate": 3.3345864661654137e-06, "loss": 0.0356, "num_tokens": 135850066.0, "reward": -1.3877425879240035, "reward_std": 6.77799243927002, "rewards/get_chromagram_reward": 0.6214170634746552, "rewards/get_chromagram_reward_std": 0.11292667016386986, "rewards/get_intelligibility_reward": -4.76411754488945, "rewards/get_intelligibility_reward_std": 10.808771133422852, "rewards/get_target_len_reward": -0.02052699653431773, "rewards/get_target_len_reward_std": 0.060480015352368355, "step": 4430 }, { "advantages": -5.215406439162962e-07, "advantages_std": 1.612470018863678, "clip_ratio": 0.0, "completion_length": 89.95238342285157, "epoch": 3.3406015037593986, "grad_norm": 6.46875, "kl": 0.40137475579977033, "learning_rate": 3.3308270676691734e-06, "loss": 0.0445, "num_tokens": 136164492.0, "reward": -1.291347751021385, "reward_std": 6.56026291847229, "rewards/get_chromagram_reward": 0.6152363359928131, "rewards/get_chromagram_reward_std": 0.1162784643471241, "rewards/get_intelligibility_reward": -4.46769335269928, "rewards/get_intelligibility_reward_std": 10.557899475097656, "rewards/get_target_len_reward": -0.021585862338542938, "rewards/get_target_len_reward_std": 0.06894133090972901, "step": 4440 }, { "advantages": -3.2906732094772906e-07, "advantages_std": 1.5803971409797668, "clip_ratio": 0.0, "completion_length": 84.28928604125977, "epoch": 3.3481203007518796, "grad_norm": 5.75, "kl": 0.3210767716169357, "learning_rate": 3.3270676691729326e-06, "loss": 0.0362, "num_tokens": 136463505.0, "reward": -1.60392969250679, "reward_std": 6.731313037872314, "rewards/get_chromagram_reward": 0.6151593327522278, "rewards/get_chromagram_reward_std": 0.1124209813773632, "rewards/get_intelligibility_reward": -5.408218407630921, "rewards/get_intelligibility_reward_std": 10.616331481933594, "rewards/get_target_len_reward": -0.01872968636453152, "rewards/get_target_len_reward_std": 0.059635018557310106, "step": 4450 }, { "advantages": -1.527369065001949e-07, "advantages_std": 1.6485271215438844, "clip_ratio": 0.0, "completion_length": 86.61131134033204, "epoch": 3.355639097744361, "grad_norm": 10.4375, "kl": 0.304203300178051, "learning_rate": 3.3233082706766922e-06, "loss": 0.0305, "num_tokens": 136769357.0, "reward": -1.5627503097057343, "reward_std": 6.813011837005615, "rewards/get_chromagram_reward": 0.6316563546657562, "rewards/get_chromagram_reward_std": 0.12207503393292427, "rewards/get_intelligibility_reward": -5.299207505583763, "rewards/get_intelligibility_reward_std": 10.698434257507325, "rewards/get_target_len_reward": -0.020699765533208847, "rewards/get_target_len_reward_std": 0.04991193488240242, "step": 4460 }, { "advantages": 4.86274569766465e-07, "advantages_std": 1.6251121640205384, "clip_ratio": 0.0, "completion_length": 89.69643096923828, "epoch": 3.363157894736842, "grad_norm": 4.84375, "kl": 0.42891152799129484, "learning_rate": 3.3195488721804515e-06, "loss": 0.0485, "num_tokens": 137084220.0, "reward": -1.1922965973615647, "reward_std": 6.639740610122681, "rewards/get_chromagram_reward": 0.6291784584522248, "rewards/get_chromagram_reward_std": 0.10962832942605019, "rewards/get_intelligibility_reward": -4.18378599062562, "rewards/get_intelligibility_reward_std": 10.700201082229615, "rewards/get_target_len_reward": -0.022282037045806648, "rewards/get_target_len_reward_std": 0.06122244410216808, "step": 4470 }, { "advantages": 2.7716159820556643e-07, "advantages_std": 1.5915523767471313, "clip_ratio": 0.0, "completion_length": 84.36666793823242, "epoch": 3.370676691729323, "grad_norm": 6.03125, "kl": 0.390315043926239, "learning_rate": 3.3157894736842107e-06, "loss": 0.0408, "num_tokens": 137384058.0, "reward": -1.3131475508213044, "reward_std": 6.7762237071990965, "rewards/get_chromagram_reward": 0.6207613468170166, "rewards/get_chromagram_reward_std": 0.11534344181418418, "rewards/get_intelligibility_reward": -4.539241921901703, "rewards/get_intelligibility_reward_std": 10.954913902282716, "rewards/get_target_len_reward": -0.02096187099814415, "rewards/get_target_len_reward_std": 0.061504085268825295, "step": 4480 }, { "advantages": 3.4123660181961667e-07, "advantages_std": 1.507073664665222, "clip_ratio": 0.0, "completion_length": 88.43333358764649, "epoch": 3.3781954887218046, "grad_norm": 6.53125, "kl": 0.32767518907785415, "learning_rate": 3.3120300751879703e-06, "loss": 0.0375, "num_tokens": 137694904.0, "reward": -1.521827945113182, "reward_std": 6.90161714553833, "rewards/get_chromagram_reward": 0.6179491519927979, "rewards/get_chromagram_reward_std": 0.10849400088191033, "rewards/get_intelligibility_reward": -5.163233387470245, "rewards/get_intelligibility_reward_std": 10.99599552154541, "rewards/get_target_len_reward": -0.02019930398091674, "rewards/get_target_len_reward_std": 0.06717992164194583, "step": 4490 }, { "advantages": -1.70990843173513e-07, "advantages_std": 1.6654401540756225, "clip_ratio": 0.0, "completion_length": 84.30773849487305, "epoch": 3.3857142857142857, "grad_norm": 6.125, "kl": 0.35676948428153993, "learning_rate": 3.3082706766917295e-06, "loss": 0.0372, "num_tokens": 137994260.0, "reward": -1.6494194865226746, "reward_std": 6.620905637741089, "rewards/get_chromagram_reward": 0.6207099735736847, "rewards/get_chromagram_reward_std": 0.11385154500603675, "rewards/get_intelligibility_reward": -5.549079060554504, "rewards/get_intelligibility_reward_std": 10.367657470703126, "rewards/get_target_len_reward": -0.019889032002538443, "rewards/get_target_len_reward_std": 0.054109343141317365, "step": 4500 }, { "advantages": 8.779268085845615e-08, "advantages_std": 1.5318343758583068, "clip_ratio": 0.0, "completion_length": 88.48333511352538, "epoch": 3.3909774436090228, "grad_norm": 10.6875, "kl": 0.2969478860497475, "learning_rate": 3.304511278195489e-06, "loss": 0.0394, "num_tokens": 310857.0, "reward": -1.3281305372714995, "reward_std": 6.489425706863403, "rewards/get_chromagram_reward": 0.6274168491363525, "rewards/get_chromagram_reward_std": 0.1028469517827034, "rewards/get_intelligibility_reward": -4.59235405921936, "rewards/get_intelligibility_reward_std": 10.438762092590332, "rewards/get_target_len_reward": -0.019454195350408553, "rewards/get_target_len_reward_std": 0.06737818010151386, "step": 4510 }, { "advantages": -2.575417425987325e-07, "advantages_std": 1.537089204788208, "clip_ratio": 0.0, "completion_length": 87.34881057739258, "epoch": 3.398496240601504, "grad_norm": 10.125, "kl": 0.29680820405483244, "learning_rate": 3.3007518796992484e-06, "loss": 0.0333, "num_tokens": 618257.0, "reward": -1.3931241035461426, "reward_std": 6.6218328952789305, "rewards/get_chromagram_reward": 0.6188321650028229, "rewards/get_chromagram_reward_std": 0.1196521833539009, "rewards/get_intelligibility_reward": -4.778675246238708, "rewards/get_intelligibility_reward_std": 10.661188697814941, "rewards/get_target_len_reward": -0.019528946094214916, "rewards/get_target_len_reward_std": 0.0580808324739337, "step": 4520 }, { "advantages": 6.085882752415728e-07, "advantages_std": 1.6382742047309875, "clip_ratio": 0.0, "completion_length": 87.75, "epoch": 3.406015037593985, "grad_norm": 74.5, "kl": 0.37056227773427963, "learning_rate": 3.296992481203008e-06, "loss": 0.0363, "num_tokens": 927145.0, "reward": -1.0709830440580845, "reward_std": 6.485338163375855, "rewards/get_chromagram_reward": 0.6164808750152588, "rewards/get_chromagram_reward_std": 0.10840248018503189, "rewards/get_intelligibility_reward": -3.810542845726013, "rewards/get_intelligibility_reward_std": 10.537764549255371, "rewards/get_target_len_reward": -0.01888696802780032, "rewards/get_target_len_reward_std": 0.0492866700515151, "step": 4530 }, { "advantages": 2.6449561971730875e-07, "advantages_std": 1.5188130497932435, "clip_ratio": 0.0, "completion_length": 84.0553581237793, "epoch": 3.4135338345864663, "grad_norm": 5.0625, "kl": 0.38036876618862153, "learning_rate": 3.2932330827067673e-06, "loss": 0.0437, "num_tokens": 1224484.0, "reward": -2.0424502193927765, "reward_std": 7.005794954299927, "rewards/get_chromagram_reward": 0.6144976735115051, "rewards/get_chromagram_reward_std": 0.11669495552778245, "rewards/get_intelligibility_reward": -6.720308995246887, "rewards/get_intelligibility_reward_std": 10.61027421951294, "rewards/get_target_len_reward": -0.021538918651640416, "rewards/get_target_len_reward_std": 0.07094106562435627, "step": 4540 }, { "advantages": -5.481143944052747e-07, "advantages_std": 1.5452426671981812, "clip_ratio": 0.0, "completion_length": 85.57797775268554, "epoch": 3.4210526315789473, "grad_norm": 18.125, "kl": 0.4040832698345184, "learning_rate": 3.289473684210527e-06, "loss": 0.0411, "num_tokens": 1527343.0, "reward": -1.387267404794693, "reward_std": 6.645217752456665, "rewards/get_chromagram_reward": 0.6343863129615783, "rewards/get_chromagram_reward_std": 0.1153879277408123, "rewards/get_intelligibility_reward": -4.774351906776428, "rewards/get_intelligibility_reward_std": 10.63620548248291, "rewards/get_target_len_reward": -0.021836266200989485, "rewards/get_target_len_reward_std": 0.05838818326592445, "step": 4550 }, { "advantages": -3.427267358802055e-07, "advantages_std": 1.5205845952033996, "clip_ratio": 0.0, "completion_length": 87.99464492797851, "epoch": 3.4285714285714284, "grad_norm": 6.46875, "kl": 0.332698717713356, "learning_rate": 3.285714285714286e-06, "loss": 0.034, "num_tokens": 1837449.0, "reward": -1.2367383658885955, "reward_std": 6.677933168411255, "rewards/get_chromagram_reward": 0.6336679756641388, "rewards/get_chromagram_reward_std": 0.11803798377513885, "rewards/get_intelligibility_reward": -4.325644779205322, "rewards/get_intelligibility_reward_std": 10.876698303222657, "rewards/get_target_len_reward": -0.018238031212240456, "rewards/get_target_len_reward_std": 0.04330310449004173, "step": 4560 }, { "advantages": -3.489355350438927e-07, "advantages_std": 1.6003393173217773, "clip_ratio": 0.0, "completion_length": 90.34761962890624, "epoch": 3.43609022556391, "grad_norm": 7.8125, "kl": 0.32162316888570786, "learning_rate": 3.281954887218045e-06, "loss": 0.0356, "num_tokens": 2153465.0, "reward": -1.3049261048436165, "reward_std": 6.861018323898316, "rewards/get_chromagram_reward": 0.6241649210453033, "rewards/get_chromagram_reward_std": 0.1164263904094696, "rewards/get_intelligibility_reward": -4.515636777877807, "rewards/get_intelligibility_reward_std": 11.109533786773682, "rewards/get_target_len_reward": -0.023306295182555913, "rewards/get_target_len_reward_std": 0.06607088632881641, "step": 4570 }, { "advantages": -1.3560057441353023e-07, "advantages_std": 1.559881293773651, "clip_ratio": 0.0, "completion_length": 83.56488342285157, "epoch": 3.443609022556391, "grad_norm": 6.0625, "kl": 0.35016684532165526, "learning_rate": 3.278195488721805e-06, "loss": 0.042, "num_tokens": 2449893.0, "reward": -1.8763112545013427, "reward_std": 7.130662345886231, "rewards/get_chromagram_reward": 0.6139516234397888, "rewards/get_chromagram_reward_std": 0.11391275078058243, "rewards/get_intelligibility_reward": -6.219006633758545, "rewards/get_intelligibility_reward_std": 10.96840171813965, "rewards/get_target_len_reward": -0.023878414928913117, "rewards/get_target_len_reward_std": 0.07354874908924103, "step": 4580 }, { "advantages": 2.5009116271235144e-07, "advantages_std": 1.6059733986854554, "clip_ratio": 0.0, "completion_length": 84.68631134033203, "epoch": 3.451127819548872, "grad_norm": 6.4375, "kl": 0.33188803791999816, "learning_rate": 3.274436090225564e-06, "loss": 0.035, "num_tokens": 2750204.0, "reward": -1.4741009950637818, "reward_std": 6.520594120025635, "rewards/get_chromagram_reward": 0.6148521661758423, "rewards/get_chromagram_reward_std": 0.12287932783365249, "rewards/get_intelligibility_reward": -5.016161251068115, "rewards/get_intelligibility_reward_std": 10.359986591339112, "rewards/get_target_len_reward": -0.02099353475496173, "rewards/get_target_len_reward_std": 0.05685290042310953, "step": 4590 }, { "advantages": -4.27166703786952e-08, "advantages_std": 1.4332894206047058, "clip_ratio": 0.0, "completion_length": 87.39881057739258, "epoch": 3.4586466165413534, "grad_norm": 7.875, "kl": 0.31006584167480467, "learning_rate": 3.270676691729324e-06, "loss": 0.0335, "num_tokens": 3058288.0, "reward": -1.3671068586409092, "reward_std": 6.128223896026611, "rewards/get_chromagram_reward": 0.6180779874324799, "rewards/get_chromagram_reward_std": 0.11426214426755905, "rewards/get_intelligibility_reward": -4.699258416891098, "rewards/get_intelligibility_reward_std": 9.655228281021119, "rewards/get_target_len_reward": -0.0201399652287364, "rewards/get_target_len_reward_std": 0.0592557929456234, "step": 4600 }, { "advantages": 1.0222197062148553e-06, "advantages_std": 1.5769393920898438, "clip_ratio": 0.0, "completion_length": 85.55357284545899, "epoch": 3.4661654135338344, "grad_norm": 17.875, "kl": 0.3076698824763298, "learning_rate": 3.2669172932330827e-06, "loss": 0.0353, "num_tokens": 3361429.0, "reward": -1.5159668922424316, "reward_std": 6.47130823135376, "rewards/get_chromagram_reward": 0.6088860273361206, "rewards/get_chromagram_reward_std": 0.12104258313775063, "rewards/get_intelligibility_reward": -5.12894773632288, "rewards/get_intelligibility_reward_std": 10.181937217712402, "rewards/get_target_len_reward": -0.02783880215138197, "rewards/get_target_len_reward_std": 0.10612631607800722, "step": 4610 }, { "advantages": 5.679826216464789e-07, "advantages_std": 1.5524175405502318, "clip_ratio": 0.0, "completion_length": 85.43214340209961, "epoch": 3.473684210526316, "grad_norm": 5.3125, "kl": 1.2340461641550065, "learning_rate": 3.2631578947368423e-06, "loss": 0.1223, "num_tokens": 3665014.0, "reward": -1.3473031282424928, "reward_std": 6.949818515777588, "rewards/get_chromagram_reward": 0.6227212309837341, "rewards/get_chromagram_reward_std": 0.12013640999794006, "rewards/get_intelligibility_reward": -4.6440167903900145, "rewards/get_intelligibility_reward_std": 11.21874017715454, "rewards/get_target_len_reward": -0.02061356231570244, "rewards/get_target_len_reward_std": 0.04689461421221495, "step": 4620 }, { "advantages": 5.125999696709016e-07, "advantages_std": 1.6045400023460388, "clip_ratio": 0.0, "completion_length": 87.08928756713867, "epoch": 3.481203007518797, "grad_norm": 278.0, "kl": 231.49522580206394, "learning_rate": 3.2593984962406015e-06, "loss": 23.1596, "num_tokens": 3972578.0, "reward": -1.8033069729804994, "reward_std": 7.021324205398559, "rewards/get_chromagram_reward": 0.6099605858325958, "rewards/get_chromagram_reward_std": 0.11153682023286819, "rewards/get_intelligibility_reward": -5.99813141822815, "rewards/get_intelligibility_reward_std": 10.870650100708009, "rewards/get_target_len_reward": -0.02174974959343672, "rewards/get_target_len_reward_std": 0.060309494659304616, "step": 4630 }, { "advantages": -6.929039315650698e-08, "advantages_std": 1.7106932163238526, "clip_ratio": 0.0, "completion_length": 88.45535812377929, "epoch": 3.488721804511278, "grad_norm": 388.0, "kl": 0.50369683355093, "learning_rate": 3.255639097744361e-06, "loss": 0.0563, "num_tokens": 4283808.0, "reward": -1.3488947361707688, "reward_std": 7.233368158340454, "rewards/get_chromagram_reward": 0.6195383369922638, "rewards/get_chromagram_reward_std": 0.11464283838868142, "rewards/get_intelligibility_reward": -4.639280533790588, "rewards/get_intelligibility_reward_std": 11.70107069015503, "rewards/get_target_len_reward": -0.026941781863570213, "rewards/get_target_len_reward_std": 0.07510890010744334, "step": 4640 }, { "advantages": 2.623846254934392e-07, "advantages_std": 1.5433639526367187, "clip_ratio": 0.0, "completion_length": 86.15416793823242, "epoch": 3.4962406015037595, "grad_norm": 17.5, "kl": 0.3447202920913696, "learning_rate": 3.2518796992481204e-06, "loss": 0.0393, "num_tokens": 4587949.0, "reward": -1.67192000746727, "reward_std": 6.874111652374268, "rewards/get_chromagram_reward": 0.6280596375465393, "rewards/get_chromagram_reward_std": 0.11481318324804306, "rewards/get_intelligibility_reward": -5.623130202293396, "rewards/get_intelligibility_reward_std": 10.843562889099122, "rewards/get_target_len_reward": -0.02068912973627448, "rewards/get_target_len_reward_std": 0.06118360720574856, "step": 4650 }, { "advantages": 2.0650526977306072e-07, "advantages_std": 1.5221888184547425, "clip_ratio": 0.0, "completion_length": 83.7500015258789, "epoch": 3.5037593984962405, "grad_norm": 6.875, "kl": 0.3721516489982605, "learning_rate": 3.24812030075188e-06, "loss": 0.0423, "num_tokens": 4886558.0, "reward": -1.9768889904022218, "reward_std": 7.324736595153809, "rewards/get_chromagram_reward": 0.6346181631088257, "rewards/get_chromagram_reward_std": 0.12539106458425522, "rewards/get_intelligibility_reward": -6.539831948280335, "rewards/get_intelligibility_reward_std": 11.161996984481812, "rewards/get_target_len_reward": -0.025452758464962245, "rewards/get_target_len_reward_std": 0.06582551747560501, "step": 4660 }, { "advantages": 6.457169860141221e-07, "advantages_std": 1.672259545326233, "clip_ratio": 0.0, "completion_length": 84.77797775268554, "epoch": 3.511278195488722, "grad_norm": 8.625, "kl": 0.46219568848609927, "learning_rate": 3.2443609022556393e-06, "loss": 0.05, "num_tokens": 5187815.0, "reward": -1.132588255405426, "reward_std": 6.148283672332764, "rewards/get_chromagram_reward": 0.6355343520641327, "rewards/get_chromagram_reward_std": 0.11606954038143158, "rewards/get_intelligibility_reward": -4.009374761581421, "rewards/get_intelligibility_reward_std": 9.920799160003662, "rewards/get_target_len_reward": -0.02392408112064004, "rewards/get_target_len_reward_std": 0.07128265760838985, "step": 4670 }, { "advantages": -6.233652989351413e-08, "advantages_std": 1.5050897002220154, "clip_ratio": 0.0, "completion_length": 85.82321624755859, "epoch": 3.518796992481203, "grad_norm": 5.28125, "kl": 0.32279101610183714, "learning_rate": 3.2406015037593985e-06, "loss": 0.037, "num_tokens": 5491254.0, "reward": -1.5685214262455702, "reward_std": 6.899728059768677, "rewards/get_chromagram_reward": 0.6327625930309295, "rewards/get_chromagram_reward_std": 0.11731386631727218, "rewards/get_intelligibility_reward": -5.3153788626194, "rewards/get_intelligibility_reward_std": 10.879010200500488, "rewards/get_target_len_reward": -0.022947657201439143, "rewards/get_target_len_reward_std": 0.06573955528438091, "step": 4680 }, { "advantages": 5.985300219890632e-08, "advantages_std": 1.7275002241134643, "clip_ratio": 0.0, "completion_length": 87.0125015258789, "epoch": 3.526315789473684, "grad_norm": 6.78125, "kl": 0.2822705447673798, "learning_rate": 3.236842105263158e-06, "loss": 0.0312, "num_tokens": 5798147.0, "reward": -1.4026454925537108, "reward_std": 6.647600078582764, "rewards/get_chromagram_reward": 0.6193685412406922, "rewards/get_chromagram_reward_std": 0.10237304717302323, "rewards/get_intelligibility_reward": -4.810842823982239, "rewards/get_intelligibility_reward_std": 10.632196998596191, "rewards/get_target_len_reward": -0.01646197042427957, "rewards/get_target_len_reward_std": 0.062437703087925914, "step": 4690 }, { "advantages": 3.8569172522429085e-07, "advantages_std": 1.3912248253822326, "clip_ratio": 0.0, "completion_length": 87.73393020629882, "epoch": 3.5338345864661656, "grad_norm": 4.40625, "kl": 0.30773247331380843, "learning_rate": 3.2330827067669174e-06, "loss": 0.0341, "num_tokens": 6106896.0, "reward": -1.9746524155139924, "reward_std": 7.20500168800354, "rewards/get_chromagram_reward": 0.6334104359149932, "rewards/get_chromagram_reward_std": 0.12139937430620193, "rewards/get_intelligibility_reward": -6.537299847602844, "rewards/get_intelligibility_reward_std": 11.083994102478027, "rewards/get_target_len_reward": -0.02006738306954503, "rewards/get_target_len_reward_std": 0.05874664410948753, "step": 4700 }, { "advantages": -2.4798018021243707e-07, "advantages_std": 1.584977638721466, "clip_ratio": 0.0, "completion_length": 85.10952529907226, "epoch": 3.5413533834586466, "grad_norm": 5.8125, "kl": 0.3366622805595398, "learning_rate": 3.229323308270677e-06, "loss": 0.0388, "num_tokens": 6409107.0, "reward": -1.544246843457222, "reward_std": 6.5916718482971195, "rewards/get_chromagram_reward": 0.6293317794799804, "rewards/get_chromagram_reward_std": 0.12301539331674576, "rewards/get_intelligibility_reward": -5.237197121977806, "rewards/get_intelligibility_reward_std": 10.314287614822387, "rewards/get_target_len_reward": -0.024875110294669867, "rewards/get_target_len_reward_std": 0.07248621061444283, "step": 4710 }, { "advantages": -9.549162074407037e-08, "advantages_std": 1.5304245591163634, "clip_ratio": 0.0, "completion_length": 87.42024002075195, "epoch": 3.548872180451128, "grad_norm": 7.09375, "kl": 0.3081626623868942, "learning_rate": 3.2255639097744362e-06, "loss": 0.0329, "num_tokens": 6716518.0, "reward": -2.0039563357830046, "reward_std": 7.060414791107178, "rewards/get_chromagram_reward": 0.6098299086093902, "rewards/get_chromagram_reward_std": 0.11160081923007965, "rewards/get_intelligibility_reward": -6.60257580280304, "rewards/get_intelligibility_reward_std": 10.738980102539063, "rewards/get_target_len_reward": -0.019122610334306955, "rewards/get_target_len_reward_std": 0.05139910690486431, "step": 4720 }, { "advantages": 2.8014182902325046e-07, "advantages_std": 1.6335660099983216, "clip_ratio": 0.0, "completion_length": 85.40833435058593, "epoch": 3.556390977443609, "grad_norm": 7.375, "kl": 0.27823727279901506, "learning_rate": 3.221804511278196e-06, "loss": 0.0361, "num_tokens": 7018209.0, "reward": -1.754353404045105, "reward_std": 6.8036487102508545, "rewards/get_chromagram_reward": 0.6300295114517211, "rewards/get_chromagram_reward_std": 0.10109626650810241, "rewards/get_intelligibility_reward": -5.869866466522216, "rewards/get_intelligibility_reward_std": 10.537944126129151, "rewards/get_target_len_reward": -0.0232229333370924, "rewards/get_target_len_reward_std": 0.07139034196734428, "step": 4730 }, { "advantages": -4.5696896577851476e-07, "advantages_std": 1.5473376512527466, "clip_ratio": 0.0, "completion_length": 85.30416793823242, "epoch": 3.56390977443609, "grad_norm": 6.09375, "kl": 0.4926650047302246, "learning_rate": 3.218045112781955e-06, "loss": 0.0546, "num_tokens": 7320787.0, "reward": -1.7560069799423217, "reward_std": 6.866075706481934, "rewards/get_chromagram_reward": 0.6117322325706482, "rewards/get_chromagram_reward_std": 0.12210858911275864, "rewards/get_intelligibility_reward": -5.8582494258880615, "rewards/get_intelligibility_reward_std": 10.652660131454468, "rewards/get_target_len_reward": -0.021503351628780365, "rewards/get_target_len_reward_std": 0.06012616865336895, "step": 4740 }, { "advantages": 3.178914056434223e-08, "advantages_std": 1.5654615759849548, "clip_ratio": 0.0, "completion_length": 86.27678604125977, "epoch": 3.571428571428571, "grad_norm": 92.0, "kl": 0.32612827718257903, "learning_rate": 3.2142857142857147e-06, "loss": 0.0354, "num_tokens": 7625945.0, "reward": -1.7317584097385406, "reward_std": 6.7727696895599365, "rewards/get_chromagram_reward": 0.6288199841976165, "rewards/get_chromagram_reward_std": 0.12393123582005501, "rewards/get_intelligibility_reward": -5.801967740058899, "rewards/get_intelligibility_reward_std": 10.51182508468628, "rewards/get_target_len_reward": -0.022127049788832665, "rewards/get_target_len_reward_std": 0.06373270452022553, "step": 4750 }, { "advantages": 1.9458433229146976e-07, "advantages_std": 1.6080638766288757, "clip_ratio": 0.0, "completion_length": 84.5077392578125, "epoch": 3.5789473684210527, "grad_norm": 4.71875, "kl": 0.6539877519011498, "learning_rate": 3.210526315789474e-06, "loss": 0.071, "num_tokens": 7925601.0, "reward": -1.5671055257320403, "reward_std": 6.523413801193238, "rewards/get_chromagram_reward": 0.6197600662708282, "rewards/get_chromagram_reward_std": 0.11611171290278435, "rewards/get_intelligibility_reward": -5.29871027469635, "rewards/get_intelligibility_reward_std": 10.249914932250977, "rewards/get_target_len_reward": -0.022366233076900242, "rewards/get_target_len_reward_std": 0.06861714329570531, "step": 4760 }, { "advantages": 2.4686258957018483e-07, "advantages_std": 1.670961356163025, "clip_ratio": 0.0, "completion_length": 88.0184539794922, "epoch": 3.5864661654135337, "grad_norm": 9.5, "kl": 4.304474097490311, "learning_rate": 3.206766917293233e-06, "loss": 0.4369, "num_tokens": 8234782.0, "reward": -1.4802373588085174, "reward_std": 6.493495321273803, "rewards/get_chromagram_reward": 0.6224713683128357, "rewards/get_chromagram_reward_std": 0.10268469974398613, "rewards/get_intelligibility_reward": -5.039081716537476, "rewards/get_intelligibility_reward_std": 10.251253795623779, "rewards/get_target_len_reward": -0.024101494625210763, "rewards/get_target_len_reward_std": 0.0734918974339962, "step": 4770 }, { "advantages": -3.136694559202624e-07, "advantages_std": 1.4593484938144683, "clip_ratio": 0.0, "completion_length": 89.94404907226563, "epoch": 3.593984962406015, "grad_norm": 9.3125, "kl": 0.34921103417873384, "learning_rate": 3.203007518796993e-06, "loss": 0.0377, "num_tokens": 8549105.0, "reward": -1.6996458053588868, "reward_std": 6.481504344940186, "rewards/get_chromagram_reward": 0.6326122224330902, "rewards/get_chromagram_reward_std": 0.12093279138207436, "rewards/get_intelligibility_reward": -5.710180354118347, "rewards/get_intelligibility_reward_std": 10.087297391891479, "rewards/get_target_len_reward": -0.021368958707898855, "rewards/get_target_len_reward_std": 0.052948375791311265, "step": 4780 }, { "advantages": -7.274250350519651e-07, "advantages_std": 1.6031969785690308, "clip_ratio": 0.0, "completion_length": 84.48095321655273, "epoch": 3.601503759398496, "grad_norm": 6.59375, "kl": 0.33725603520870207, "learning_rate": 3.199248120300752e-06, "loss": 0.0396, "num_tokens": 8848920.0, "reward": -1.6145397573709488, "reward_std": 6.478304386138916, "rewards/get_chromagram_reward": 0.6312460958957672, "rewards/get_chromagram_reward_std": 0.1125837966799736, "rewards/get_intelligibility_reward": -5.45187383890152, "rewards/get_intelligibility_reward_std": 10.112965631484986, "rewards/get_target_len_reward": -0.02299130242317915, "rewards/get_target_len_reward_std": 0.06794755682349204, "step": 4790 }, { "advantages": -1.6589960027957318e-07, "advantages_std": 1.6106690645217896, "clip_ratio": 0.0, "completion_length": 87.36369247436524, "epoch": 3.6090225563909772, "grad_norm": 6.09375, "kl": 0.3092425674200058, "learning_rate": 3.1954887218045117e-06, "loss": 0.0315, "num_tokens": 9156658.0, "reward": -1.5559602946043014, "reward_std": 6.706591558456421, "rewards/get_chromagram_reward": 0.6154250383377076, "rewards/get_chromagram_reward_std": 0.1058032289147377, "rewards/get_intelligibility_reward": -5.264953482151031, "rewards/get_intelligibility_reward_std": 10.521115493774413, "rewards/get_target_len_reward": -0.018352086283266546, "rewards/get_target_len_reward_std": 0.051684724539518355, "step": 4800 }, { "advantages": 5.599111375431676e-07, "advantages_std": 1.5521262526512145, "clip_ratio": 0.0, "completion_length": 88.90714492797852, "epoch": 3.6165413533834587, "grad_norm": 7.25, "kl": 0.9247394904494286, "learning_rate": 3.191729323308271e-06, "loss": 0.0967, "num_tokens": 9468894.0, "reward": -1.198024618625641, "reward_std": 6.5147710800170895, "rewards/get_chromagram_reward": 0.6325487613677978, "rewards/get_chromagram_reward_std": 0.11576045975089073, "rewards/get_intelligibility_reward": -4.203026843070984, "rewards/get_intelligibility_reward_std": 10.598825645446777, "rewards/get_target_len_reward": -0.02359545128419995, "rewards/get_target_len_reward_std": 0.0703369751572609, "step": 4810 }, { "advantages": 1.8129747232364933e-07, "advantages_std": 1.662482452392578, "clip_ratio": 0.0, "completion_length": 86.67857284545899, "epoch": 3.6240601503759398, "grad_norm": 8.875, "kl": 0.3507300466299057, "learning_rate": 3.1879699248120305e-06, "loss": 0.0386, "num_tokens": 9774695.0, "reward": -1.4802094399929047, "reward_std": 6.700056171417236, "rewards/get_chromagram_reward": 0.6103806674480439, "rewards/get_chromagram_reward_std": 0.11778130531311035, "rewards/get_intelligibility_reward": -5.0308449268341064, "rewards/get_intelligibility_reward_std": 10.702712249755859, "rewards/get_target_len_reward": -0.02016364596784115, "rewards/get_target_len_reward_std": 0.059031769074499606, "step": 4820 }, { "advantages": -4.316369796697472e-07, "advantages_std": 1.531548523902893, "clip_ratio": 0.0, "completion_length": 87.11250228881836, "epoch": 3.6315789473684212, "grad_norm": 6.40625, "kl": 0.3791714206337929, "learning_rate": 3.1842105263157898e-06, "loss": 0.0439, "num_tokens": 10081945.0, "reward": -1.2015679739415646, "reward_std": 6.660047101974487, "rewards/get_chromagram_reward": 0.6140450894832611, "rewards/get_chromagram_reward_std": 0.11099514588713646, "rewards/get_intelligibility_reward": -4.195827615261078, "rewards/get_intelligibility_reward_std": 10.852205181121827, "rewards/get_target_len_reward": -0.022921310737729073, "rewards/get_target_len_reward_std": 0.07505319323390722, "step": 4830 }, { "advantages": -6.854534433387016e-08, "advantages_std": 1.5445081114768981, "clip_ratio": 0.0, "completion_length": 84.11845397949219, "epoch": 3.6390977443609023, "grad_norm": 5.25, "kl": 0.33220981657505033, "learning_rate": 3.1804511278195494e-06, "loss": 0.0388, "num_tokens": 10381469.0, "reward": -1.2563096657395363, "reward_std": 6.40204119682312, "rewards/get_chromagram_reward": 0.6238720178604126, "rewards/get_chromagram_reward_std": 0.10978959575295448, "rewards/get_intelligibility_reward": -4.369728851318359, "rewards/get_intelligibility_reward_std": 10.306414556503295, "rewards/get_target_len_reward": -0.023071921616792678, "rewards/get_target_len_reward_std": 0.06400219611823559, "step": 4840 }, { "advantages": -5.635122782621238e-07, "advantages_std": 1.5880194425582885, "clip_ratio": 0.0, "completion_length": 87.5827407836914, "epoch": 3.6466165413533833, "grad_norm": 8.5, "kl": 2.8647284686565397, "learning_rate": 3.1766917293233086e-06, "loss": 0.2937, "num_tokens": 10690013.0, "reward": -1.343219232559204, "reward_std": 6.797761154174805, "rewards/get_chromagram_reward": 0.6102810621261596, "rewards/get_chromagram_reward_std": 0.11765508279204369, "rewards/get_intelligibility_reward": -4.6079377889633175, "rewards/get_intelligibility_reward_std": 11.04647216796875, "rewards/get_target_len_reward": -0.032000647950917484, "rewards/get_target_len_reward_std": 0.09445926304906607, "step": 4850 }, { "advantages": 2.635022156027844e-07, "advantages_std": 1.715597116947174, "clip_ratio": 0.0, "completion_length": 86.18869171142578, "epoch": 3.654135338345865, "grad_norm": 9.875, "kl": 0.2896317094564438, "learning_rate": 3.1729323308270683e-06, "loss": 0.0364, "num_tokens": 10995680.0, "reward": -1.201186391711235, "reward_std": 6.774267244338989, "rewards/get_chromagram_reward": 0.6166905164718628, "rewards/get_chromagram_reward_std": 0.1286042921245098, "rewards/get_intelligibility_reward": -4.195585256814956, "rewards/get_intelligibility_reward_std": 11.000476360321045, "rewards/get_target_len_reward": -0.024664169922471047, "rewards/get_target_len_reward_std": 0.0782824408262968, "step": 4860 }, { "advantages": -3.4297507873759513e-07, "advantages_std": 1.5256575226783753, "clip_ratio": 0.0, "completion_length": 84.12143096923828, "epoch": 3.661654135338346, "grad_norm": 11.0625, "kl": 0.31614808589220045, "learning_rate": 3.1691729323308275e-06, "loss": 0.0328, "num_tokens": 11295271.0, "reward": -1.4852625608444214, "reward_std": 6.9996030807495115, "rewards/get_chromagram_reward": 0.6112887680530548, "rewards/get_chromagram_reward_std": 0.1129858560860157, "rewards/get_intelligibility_reward": -5.048020737618208, "rewards/get_intelligibility_reward_std": 11.152630519866943, "rewards/get_target_len_reward": -0.019055381417274475, "rewards/get_target_len_reward_std": 0.050889964960515496, "step": 4870 }, { "advantages": -1.9197663547743105e-07, "advantages_std": 1.6212410807609559, "clip_ratio": 0.0, "completion_length": 87.62381134033203, "epoch": 3.6691729323308273, "grad_norm": 6.375, "kl": 0.3076537221670151, "learning_rate": 3.1654135338345863e-06, "loss": 0.0396, "num_tokens": 11604076.0, "reward": -1.5547768741846084, "reward_std": 6.924158525466919, "rewards/get_chromagram_reward": 0.6123470544815064, "rewards/get_chromagram_reward_std": 0.11614794582128525, "rewards/get_intelligibility_reward": -5.257422703504562, "rewards/get_intelligibility_reward_std": 10.931137180328369, "rewards/get_target_len_reward": -0.019254606403410434, "rewards/get_target_len_reward_std": 0.06625755876302719, "step": 4880 }, { "advantages": 3.3279261231200507e-07, "advantages_std": 1.529116427898407, "clip_ratio": 0.0, "completion_length": 86.49702529907226, "epoch": 3.6766917293233083, "grad_norm": 5.78125, "kl": 0.5446156710386276, "learning_rate": 3.1616541353383464e-06, "loss": 0.057, "num_tokens": 11909235.0, "reward": -1.4663063704967498, "reward_std": 6.5054491519927975, "rewards/get_chromagram_reward": 0.6186487138271332, "rewards/get_chromagram_reward_std": 0.11071438938379288, "rewards/get_intelligibility_reward": -4.994838905334473, "rewards/get_intelligibility_reward_std": 10.342679977416992, "rewards/get_target_len_reward": -0.022728720400482415, "rewards/get_target_len_reward_std": 0.06532426942139864, "step": 4890 }, { "advantages": 1.2839835150657564e-07, "advantages_std": 1.4912607192993164, "clip_ratio": 0.0, "completion_length": 87.38214416503907, "epoch": 3.6842105263157894, "grad_norm": 5.75, "kl": 0.32485940903425214, "learning_rate": 3.157894736842105e-06, "loss": 0.0372, "num_tokens": 12217138.0, "reward": -1.553096640110016, "reward_std": 6.913800048828125, "rewards/get_chromagram_reward": 0.6143791019916535, "rewards/get_chromagram_reward_std": 0.11344245597720146, "rewards/get_intelligibility_reward": -5.252873635292053, "rewards/get_intelligibility_reward_std": 11.019554710388183, "rewards/get_target_len_reward": -0.020795133616775274, "rewards/get_target_len_reward_std": 0.06670989170670509, "step": 4900 }, { "advantages": 9.685751578558666e-09, "advantages_std": 1.5933101773262024, "clip_ratio": 0.0, "completion_length": 86.33809661865234, "epoch": 3.6917293233082704, "grad_norm": 8.125, "kl": 0.3074103772640228, "learning_rate": 3.1541353383458652e-06, "loss": 0.0328, "num_tokens": 12522530.0, "reward": -1.1914991319179535, "reward_std": 6.638744497299195, "rewards/get_chromagram_reward": 0.6417280793190002, "rewards/get_chromagram_reward_std": 0.11801392138004303, "rewards/get_intelligibility_reward": -4.192426967620849, "rewards/get_intelligibility_reward_std": 10.866980838775635, "rewards/get_target_len_reward": -0.023798331245779993, "rewards/get_target_len_reward_std": 0.058177833631634715, "step": 4910 }, { "advantages": -1.671413589976467e-07, "advantages_std": 1.5453439235687256, "clip_ratio": 0.0, "completion_length": 84.20357360839844, "epoch": 3.699248120300752, "grad_norm": 6.34375, "kl": 0.32516286969184877, "learning_rate": 3.150375939849624e-06, "loss": 0.0346, "num_tokens": 12821557.0, "reward": -1.3945641126483679, "reward_std": 6.341374254226684, "rewards/get_chromagram_reward": 0.6144876718521118, "rewards/get_chromagram_reward_std": 0.10897763669490815, "rewards/get_intelligibility_reward": -4.780710679292679, "rewards/get_intelligibility_reward_std": 9.99721794128418, "rewards/get_target_len_reward": -0.017468852270394564, "rewards/get_target_len_reward_std": 0.04808750338852406, "step": 4920 }, { "advantages": 2.66482444999383e-07, "advantages_std": 1.658053195476532, "clip_ratio": 0.0, "completion_length": 90.80357208251954, "epoch": 3.706766917293233, "grad_norm": 6.0, "kl": 0.6095966547727585, "learning_rate": 3.146616541353384e-06, "loss": 0.0635, "num_tokens": 13138313.0, "reward": -1.650380975008011, "reward_std": 6.63803768157959, "rewards/get_chromagram_reward": 0.6273995757102966, "rewards/get_chromagram_reward_std": 0.11100057512521744, "rewards/get_intelligibility_reward": -5.559595322608947, "rewards/get_intelligibility_reward_std": 10.292674160003662, "rewards/get_target_len_reward": -0.018946948274970055, "rewards/get_target_len_reward_std": 0.05267423167824745, "step": 4930 }, { "advantages": -4.592041420892201e-07, "advantages_std": 1.539730954170227, "clip_ratio": 0.0, "completion_length": 88.36726379394531, "epoch": 3.7142857142857144, "grad_norm": 11.4375, "kl": 0.3707980513572693, "learning_rate": 3.142857142857143e-06, "loss": 0.042, "num_tokens": 13449400.0, "reward": -1.102984681725502, "reward_std": 6.229949712753296, "rewards/get_chromagram_reward": 0.6193095803260803, "rewards/get_chromagram_reward_std": 0.11251397728919983, "rewards/get_intelligibility_reward": -3.908903980255127, "rewards/get_intelligibility_reward_std": 10.1692476272583, "rewards/get_target_len_reward": -0.01935954224318266, "rewards/get_target_len_reward_std": 0.05940852351486683, "step": 4940 }, { "advantages": 3.3428273980007364e-07, "advantages_std": 1.5412377834320068, "clip_ratio": 0.0, "completion_length": 85.96607284545898, "epoch": 3.7218045112781954, "grad_norm": 5.4375, "kl": 0.36561394929885865, "learning_rate": 3.139097744360903e-06, "loss": 0.04, "num_tokens": 13753869.0, "reward": -1.2172423183918, "reward_std": 6.248193788528442, "rewards/get_chromagram_reward": 0.6351997315883636, "rewards/get_chromagram_reward_std": 0.1150067277252674, "rewards/get_intelligibility_reward": -4.263802683353424, "rewards/get_intelligibility_reward_std": 10.08340663909912, "rewards/get_target_len_reward": -0.02312365211546421, "rewards/get_target_len_reward_std": 0.06662276312708855, "step": 4950 }, { "advantages": -3.019968872308709e-07, "advantages_std": 1.5049192070961, "clip_ratio": 0.0, "completion_length": 85.79285888671875, "epoch": 3.7293233082706765, "grad_norm": 8.5, "kl": 0.34319745302200316, "learning_rate": 3.1353383458646618e-06, "loss": 0.0405, "num_tokens": 14057869.0, "reward": -1.6160476624965667, "reward_std": 6.543230485916138, "rewards/get_chromagram_reward": 0.6266816258430481, "rewards/get_chromagram_reward_std": 0.11776885390281677, "rewards/get_intelligibility_reward": -5.453230166435242, "rewards/get_intelligibility_reward_std": 10.165988731384278, "rewards/get_target_len_reward": -0.021594143752008677, "rewards/get_target_len_reward_std": 0.06191838830709458, "step": 4960 }, { "advantages": -2.3196142358017368e-07, "advantages_std": 1.4668985962867738, "clip_ratio": 0.0, "completion_length": 86.55059661865235, "epoch": 3.736842105263158, "grad_norm": 7.125, "kl": 0.32584773898124697, "learning_rate": 3.131578947368421e-06, "loss": 0.0348, "num_tokens": 14363201.0, "reward": -1.4537909626960754, "reward_std": 6.800926256179809, "rewards/get_chromagram_reward": 0.6184272468090057, "rewards/get_chromagram_reward_std": 0.11034496873617172, "rewards/get_intelligibility_reward": -4.961474227905273, "rewards/get_intelligibility_reward_std": 10.864050674438477, "rewards/get_target_len_reward": -0.018325691297650337, "rewards/get_target_len_reward_std": 0.05187810454517603, "step": 4970 }, { "advantages": -4.0121377935520287e-07, "advantages_std": 1.5062777817249298, "clip_ratio": 0.0, "completion_length": 81.4023826599121, "epoch": 3.744360902255639, "grad_norm": 44.0, "kl": 0.4334402531385422, "learning_rate": 3.1278195488721806e-06, "loss": 0.0473, "num_tokens": 14654849.0, "reward": -1.7004128456115724, "reward_std": 7.087176370620727, "rewards/get_chromagram_reward": 0.6041354537010193, "rewards/get_chromagram_reward_std": 0.1227414608001709, "rewards/get_intelligibility_reward": -5.686217975616455, "rewards/get_intelligibility_reward_std": 11.177903652191162, "rewards/get_target_len_reward": -0.019155793637037278, "rewards/get_target_len_reward_std": 0.054726789519190785, "step": 4980 }, { "advantages": -3.9388738315437877e-07, "advantages_std": 1.6339298367500306, "clip_ratio": 0.0, "completion_length": 92.73988265991211, "epoch": 3.7518796992481205, "grad_norm": 9.25, "kl": 0.3501076936721802, "learning_rate": 3.12406015037594e-06, "loss": 0.038, "num_tokens": 14977890.0, "reward": -1.289211356639862, "reward_std": 6.95147590637207, "rewards/get_chromagram_reward": 0.6183295786380768, "rewards/get_chromagram_reward_std": 0.11973841786384583, "rewards/get_intelligibility_reward": -4.466941666603089, "rewards/get_intelligibility_reward_std": 11.286186218261719, "rewards/get_target_len_reward": -0.01902186619117856, "rewards/get_target_len_reward_std": 0.04339134152978659, "step": 4990 }, { "advantages": -2.359350901315338e-08, "advantages_std": 1.6250181078910828, "clip_ratio": 0.0, "completion_length": 85.16071701049805, "epoch": 3.7593984962406015, "grad_norm": 5.125, "kl": 0.3183282628655434, "learning_rate": 3.1203007518796995e-06, "loss": 0.0353, "num_tokens": 15279955.0, "reward": -1.4234922677278519, "reward_std": 6.963759803771973, "rewards/get_chromagram_reward": 0.6406114637851715, "rewards/get_chromagram_reward_std": 0.11178898885846138, "rewards/get_intelligibility_reward": -4.882576875388622, "rewards/get_intelligibility_reward_std": 11.115171527862548, "rewards/get_target_len_reward": -0.028511168900877237, "rewards/get_target_len_reward_std": 0.08178133703768253, "step": 5000 }, { "advantages": -1.344829901661626e-07, "advantages_std": 1.6288373589515686, "clip_ratio": 0.0, "completion_length": 85.88869171142578, "epoch": 3.7669172932330826, "grad_norm": 5.96875, "kl": 0.3554558753967285, "learning_rate": 3.1165413533834587e-06, "loss": 0.0356, "num_tokens": 303525.0, "reward": -1.5104371786117554, "reward_std": 6.379262018203735, "rewards/get_chromagram_reward": 0.6029898881912231, "rewards/get_chromagram_reward_std": 0.10567670539021493, "rewards/get_intelligibility_reward": -5.115782928466797, "rewards/get_intelligibility_reward_std": 10.068166732788086, "rewards/get_target_len_reward": -0.018518290482461452, "rewards/get_target_len_reward_std": 0.050624676048755646, "step": 5010 }, { "advantages": -9.26852234783837e-07, "advantages_std": 1.5683493494987488, "clip_ratio": 0.0, "completion_length": 89.2023826599121, "epoch": 3.774436090225564, "grad_norm": 240.0, "kl": 0.32017101496458056, "learning_rate": 3.1127819548872184e-06, "loss": 0.0361, "num_tokens": 617139.0, "reward": -1.1960038989782333, "reward_std": 6.3604504585266115, "rewards/get_chromagram_reward": 0.6310937643051148, "rewards/get_chromagram_reward_std": 0.11309906244277954, "rewards/get_intelligibility_reward": -4.197369801998138, "rewards/get_intelligibility_reward_std": 10.345066165924072, "rewards/get_target_len_reward": -0.021735391952097415, "rewards/get_target_len_reward_std": 0.061658013984560965, "step": 5020 }, { "advantages": 6.482005375119115e-07, "advantages_std": 1.5073270559310914, "clip_ratio": 0.0, "completion_length": 84.32083663940429, "epoch": 3.781954887218045, "grad_norm": 6.375, "kl": 0.3702144831418991, "learning_rate": 3.1090225563909776e-06, "loss": 0.0369, "num_tokens": 917088.0, "reward": -1.6622222304344176, "reward_std": 6.832606649398803, "rewards/get_chromagram_reward": 0.6209613680839539, "rewards/get_chromagram_reward_std": 0.11562097668647767, "rewards/get_intelligibility_reward": -5.585749959945678, "rewards/get_intelligibility_reward_std": 10.770749187469482, "rewards/get_target_len_reward": -0.02187794419005513, "rewards/get_target_len_reward_std": 0.05569152720272541, "step": 5030 }, { "advantages": 5.339583211139143e-08, "advantages_std": 1.5658186316490172, "clip_ratio": 0.0, "completion_length": 87.19940643310547, "epoch": 3.7894736842105265, "grad_norm": 5.84375, "kl": 0.29390337616205214, "learning_rate": 3.1052631578947372e-06, "loss": 0.0324, "num_tokens": 1224793.0, "reward": -1.600793306529522, "reward_std": 7.168339109420776, "rewards/get_chromagram_reward": 0.6240476608276367, "rewards/get_chromagram_reward_std": 0.12075399681925773, "rewards/get_intelligibility_reward": -5.402066552639008, "rewards/get_intelligibility_reward_std": 11.336180496215821, "rewards/get_target_len_reward": -0.02436084356158972, "rewards/get_target_len_reward_std": 0.07069507241249084, "step": 5040 }, { "advantages": 3.2360356787553003e-07, "advantages_std": 1.6109248757362367, "clip_ratio": 0.0, "completion_length": 86.20297698974609, "epoch": 3.7969924812030076, "grad_norm": 23.5, "kl": 0.4897716358304024, "learning_rate": 3.1015037593984964e-06, "loss": 0.0565, "num_tokens": 1530154.0, "reward": -1.476647686958313, "reward_std": 6.300374603271484, "rewards/get_chromagram_reward": 0.6303463339805603, "rewards/get_chromagram_reward_std": 0.11567277759313584, "rewards/get_intelligibility_reward": -5.035875868797302, "rewards/get_intelligibility_reward_std": 9.897969913482665, "rewards/get_target_len_reward": -0.024413358047604562, "rewards/get_target_len_reward_std": 0.0697399366647005, "step": 5050 }, { "advantages": 1.1672577286958585e-07, "advantages_std": 1.5832074165344239, "clip_ratio": 0.0, "completion_length": 86.55000228881836, "epoch": 3.8045112781954886, "grad_norm": 9.3125, "kl": 0.38453815281391146, "learning_rate": 3.097744360902256e-06, "loss": 0.0451, "num_tokens": 1835452.0, "reward": -1.5706634759902953, "reward_std": 6.566565227508545, "rewards/get_chromagram_reward": 0.6134556949138641, "rewards/get_chromagram_reward_std": 0.11115473136305809, "rewards/get_intelligibility_reward": -5.305679714679718, "rewards/get_intelligibility_reward_std": 10.304097652435303, "rewards/get_target_len_reward": -0.019766069017350674, "rewards/get_target_len_reward_std": 0.06396161615848542, "step": 5060 }, { "advantages": 2.0898878716479885e-07, "advantages_std": 1.5226559519767762, "clip_ratio": 0.0, "completion_length": 88.90893096923828, "epoch": 3.8120300751879697, "grad_norm": 26.5, "kl": 0.4461306095123291, "learning_rate": 3.0939849624060153e-06, "loss": 0.0489, "num_tokens": 2147803.0, "reward": -1.4417099684476853, "reward_std": 6.737149572372436, "rewards/get_chromagram_reward": 0.6207392811775208, "rewards/get_chromagram_reward_std": 0.10657211765646935, "rewards/get_intelligibility_reward": -4.927727246284485, "rewards/get_intelligibility_reward_std": 10.79146318435669, "rewards/get_target_len_reward": -0.018141804076731206, "rewards/get_target_len_reward_std": 0.0579329727217555, "step": 5070 }, { "advantages": -4.967044731074566e-09, "advantages_std": 1.6526432633399963, "clip_ratio": 0.0, "completion_length": 88.12440567016601, "epoch": 3.819548872180451, "grad_norm": 29.375, "kl": 0.3443562790751457, "learning_rate": 3.0902255639097745e-06, "loss": 0.0343, "num_tokens": 2457928.0, "reward": -1.4987624168395997, "reward_std": 6.716041421890258, "rewards/get_chromagram_reward": 0.6303575217723847, "rewards/get_chromagram_reward_std": 0.12052299976348876, "rewards/get_intelligibility_reward": -5.10470449924469, "rewards/get_intelligibility_reward_std": 10.704877853393555, "rewards/get_target_len_reward": -0.02193996049463749, "rewards/get_target_len_reward_std": 0.049775147996842864, "step": 5080 }, { "advantages": -3.377596939913019e-08, "advantages_std": 1.5974238514900208, "clip_ratio": 0.0, "completion_length": 87.68869171142578, "epoch": 3.827067669172932, "grad_norm": 6.90625, "kl": 0.3182404175400734, "learning_rate": 3.086466165413534e-06, "loss": 0.0336, "num_tokens": 2766111.0, "reward": -1.2910590320825577, "reward_std": 6.435832595825195, "rewards/get_chromagram_reward": 0.6295618176460266, "rewards/get_chromagram_reward_std": 0.1058080993592739, "rewards/get_intelligibility_reward": -4.485034775733948, "rewards/get_intelligibility_reward_std": 10.362379455566407, "rewards/get_target_len_reward": -0.017704028356820344, "rewards/get_target_len_reward_std": 0.052550424635410306, "step": 5090 }, { "advantages": 4.005928971650974e-07, "advantages_std": 1.7196611046791077, "clip_ratio": 0.0, "completion_length": 87.6732162475586, "epoch": 3.8345864661654137, "grad_norm": 6.6875, "kl": 0.3247728988528252, "learning_rate": 3.0827067669172934e-06, "loss": 0.042, "num_tokens": 3074294.0, "reward": -1.3364479541778564, "reward_std": 6.6891755104064945, "rewards/get_chromagram_reward": 0.6277061879634858, "rewards/get_chromagram_reward_std": 0.11151268780231476, "rewards/get_intelligibility_reward": -4.606927335262299, "rewards/get_intelligibility_reward_std": 10.708397817611694, "rewards/get_target_len_reward": -0.03012237846851349, "rewards/get_target_len_reward_std": 0.09449879247695207, "step": 5100 }, { "advantages": -5.45382511063508e-07, "advantages_std": 1.5904954671859741, "clip_ratio": 0.0, "completion_length": 87.38333358764649, "epoch": 3.8421052631578947, "grad_norm": 103.0, "kl": 0.38762595504522324, "learning_rate": 3.078947368421053e-06, "loss": 0.0409, "num_tokens": 3382576.0, "reward": -1.5673535346984864, "reward_std": 7.143572378158569, "rewards/get_chromagram_reward": 0.6240078985691071, "rewards/get_chromagram_reward_std": 0.12458935901522636, "rewards/get_intelligibility_reward": -5.301816511154175, "rewards/get_intelligibility_reward_std": 11.481878900527954, "rewards/get_target_len_reward": -0.024251798167824745, "rewards/get_target_len_reward_std": 0.05677758939564228, "step": 5110 }, { "advantages": -7.698935178268585e-09, "advantages_std": 1.6819909691810608, "clip_ratio": 0.0, "completion_length": 88.9523826599121, "epoch": 3.8496240601503757, "grad_norm": 7.65625, "kl": 0.3676734402775764, "learning_rate": 3.0751879699248123e-06, "loss": 0.0389, "num_tokens": 3695398.0, "reward": -1.2386444240808487, "reward_std": 6.5803868770599365, "rewards/get_chromagram_reward": 0.6292356431484223, "rewards/get_chromagram_reward_std": 0.10431931540369987, "rewards/get_intelligibility_reward": -4.326017516851425, "rewards/get_intelligibility_reward_std": 10.606090307235718, "rewards/get_target_len_reward": -0.019151047244668006, "rewards/get_target_len_reward_std": 0.05265425220131874, "step": 5120 }, { "advantages": 2.2624931172998686e-07, "advantages_std": 1.6426711320877074, "clip_ratio": 0.0, "completion_length": 86.9380973815918, "epoch": 3.857142857142857, "grad_norm": 9.8125, "kl": 0.4054348558187485, "learning_rate": 3.071428571428572e-06, "loss": 0.0429, "num_tokens": 4001074.0, "reward": -1.4268037647008895, "reward_std": 6.356537961959839, "rewards/get_chromagram_reward": 0.6203414976596833, "rewards/get_chromagram_reward_std": 0.1233817383646965, "rewards/get_intelligibility_reward": -4.875811457633972, "rewards/get_intelligibility_reward_std": 10.090334129333495, "rewards/get_target_len_reward": -0.024940951261669397, "rewards/get_target_len_reward_std": 0.07505465373396873, "step": 5130 }, { "advantages": -2.1855025522654614e-08, "advantages_std": 1.6684409856796265, "clip_ratio": 0.0, "completion_length": 87.81845397949219, "epoch": 3.8646616541353382, "grad_norm": 49.5, "kl": 2.80745629966259, "learning_rate": 3.067669172932331e-06, "loss": 0.2834, "num_tokens": 4310100.0, "reward": -1.5709318161010741, "reward_std": 6.355284643173218, "rewards/get_chromagram_reward": 0.6249613583087921, "rewards/get_chromagram_reward_std": 0.11457760408520698, "rewards/get_intelligibility_reward": -5.31957859992981, "rewards/get_intelligibility_reward_std": 9.948035335540771, "rewards/get_target_len_reward": -0.018177997972816228, "rewards/get_target_len_reward_std": 0.04518711529672146, "step": 5140 }, { "advantages": -4.976987952431955e-07, "advantages_std": 1.5554571747779846, "clip_ratio": 0.0, "completion_length": 88.6583351135254, "epoch": 3.8721804511278197, "grad_norm": 13.0625, "kl": 0.36514002084732056, "learning_rate": 3.0639097744360908e-06, "loss": 0.041, "num_tokens": 4621810.0, "reward": -1.5353900849819184, "reward_std": 7.019238233566284, "rewards/get_chromagram_reward": 0.6143237709999084, "rewards/get_chromagram_reward_std": 0.11802843660116195, "rewards/get_intelligibility_reward": -5.199806427955627, "rewards/get_intelligibility_reward_std": 11.24576940536499, "rewards/get_target_len_reward": -0.020687189139425754, "rewards/get_target_len_reward_std": 0.06613438948988914, "step": 5150 }, { "advantages": -1.0952354614346405e-07, "advantages_std": 1.5083892703056336, "clip_ratio": 0.0, "completion_length": 84.21547775268554, "epoch": 3.8796992481203008, "grad_norm": 6.46875, "kl": 0.37466873079538343, "learning_rate": 3.06015037593985e-06, "loss": 0.0473, "num_tokens": 4920387.0, "reward": -1.8744422435760497, "reward_std": 6.897575569152832, "rewards/get_chromagram_reward": 0.6116405963897705, "rewards/get_chromagram_reward_std": 0.11380776911973953, "rewards/get_intelligibility_reward": -6.2126370668411255, "rewards/get_intelligibility_reward_std": 10.630679559707641, "rewards/get_target_len_reward": -0.022330059483647347, "rewards/get_target_len_reward_std": 0.06991768572479487, "step": 5160 }, { "advantages": 5.540748404087026e-07, "advantages_std": 1.548570156097412, "clip_ratio": 0.0, "completion_length": 89.57083587646484, "epoch": 3.887218045112782, "grad_norm": 5.03125, "kl": 0.37331474870443343, "learning_rate": 3.0563909774436092e-06, "loss": 0.0406, "num_tokens": 5234446.0, "reward": -1.1399612367153167, "reward_std": 6.340262699127197, "rewards/get_chromagram_reward": 0.6268186211585999, "rewards/get_chromagram_reward_std": 0.11353924125432968, "rewards/get_intelligibility_reward": -4.029066967964172, "rewards/get_intelligibility_reward_std": 10.31572060585022, "rewards/get_target_len_reward": -0.017635060101747514, "rewards/get_target_len_reward_std": 0.04803536366671324, "step": 5170 }, { "advantages": 1.4801821208720866e-07, "advantages_std": 1.6175037503242493, "clip_ratio": 0.0, "completion_length": 86.76726379394532, "epoch": 3.8947368421052633, "grad_norm": 5.9375, "kl": 0.3426645964384079, "learning_rate": 3.052631578947369e-06, "loss": 0.034, "num_tokens": 5542037.0, "reward": -0.9634460397064686, "reward_std": 6.810406589508057, "rewards/get_chromagram_reward": 0.6277351975440979, "rewards/get_chromagram_reward_std": 0.1226750746369362, "rewards/get_intelligibility_reward": -3.495256319642067, "rewards/get_intelligibility_reward_std": 11.23453130722046, "rewards/get_target_len_reward": -0.022816949151456357, "rewards/get_target_len_reward_std": 0.059058988466858864, "step": 5180 }, { "advantages": 6.829697838384163e-08, "advantages_std": 1.60453599691391, "clip_ratio": 0.0, "completion_length": 85.44404907226563, "epoch": 3.9022556390977443, "grad_norm": 5.6875, "kl": 0.29529436230659484, "learning_rate": 3.048872180451128e-06, "loss": 0.0377, "num_tokens": 5844093.0, "reward": -1.7944436550140381, "reward_std": 7.4176818370819095, "rewards/get_chromagram_reward": 0.6188875913619996, "rewards/get_chromagram_reward_std": 0.11493304148316383, "rewards/get_intelligibility_reward": -5.977695441246032, "rewards/get_intelligibility_reward_std": 11.72701063156128, "rewards/get_target_len_reward": -0.024522352125495674, "rewards/get_target_len_reward_std": 0.07692326549440623, "step": 5190 }, { "advantages": 1.5000502742168465e-07, "advantages_std": 1.6334968209266663, "clip_ratio": 0.0, "completion_length": 87.25774002075195, "epoch": 3.909774436090226, "grad_norm": 7.96875, "kl": 0.317596735060215, "learning_rate": 3.0451127819548877e-06, "loss": 0.0316, "num_tokens": 6152382.0, "reward": -1.4345587491989136, "reward_std": 7.033844709396362, "rewards/get_chromagram_reward": 0.6279944956302643, "rewards/get_chromagram_reward_std": 0.11789287552237511, "rewards/get_intelligibility_reward": -4.9106168985366825, "rewards/get_intelligibility_reward_std": 11.35363712310791, "rewards/get_target_len_reward": -0.021053369250148536, "rewards/get_target_len_reward_std": 0.05207511857151985, "step": 5200 }, { "advantages": 2.2264819250494837e-07, "advantages_std": 1.5829517245292664, "clip_ratio": 0.0, "completion_length": 84.24107284545899, "epoch": 3.917293233082707, "grad_norm": 10.75, "kl": 2.4750936955213545, "learning_rate": 3.0413533834586465e-06, "loss": 0.2519, "num_tokens": 6452238.0, "reward": -1.3005388617515563, "reward_std": 6.142632246017456, "rewards/get_chromagram_reward": 0.6174070298671722, "rewards/get_chromagram_reward_std": 0.115874382853508, "rewards/get_intelligibility_reward": -4.4966700077056885, "rewards/get_intelligibility_reward_std": 9.827916622161865, "rewards/get_target_len_reward": -0.022353346459567547, "rewards/get_target_len_reward_std": 0.06505865342915058, "step": 5210 }, { "advantages": -1.3584891953577482e-07, "advantages_std": 1.6031673908233643, "clip_ratio": 0.0, "completion_length": 85.95059661865234, "epoch": 3.924812030075188, "grad_norm": 6.28125, "kl": 0.3633052855730057, "learning_rate": 3.0375939849624066e-06, "loss": 0.0391, "num_tokens": 6756142.0, "reward": -1.3448066473007203, "reward_std": 6.417456722259521, "rewards/get_chromagram_reward": 0.6151858687400817, "rewards/get_chromagram_reward_std": 0.1155214361846447, "rewards/get_intelligibility_reward": -4.626984453201294, "rewards/get_intelligibility_reward_std": 10.28032922744751, "rewards/get_target_len_reward": -0.022621163725852968, "rewards/get_target_len_reward_std": 0.0578670272603631, "step": 5220 }, { "advantages": -1.2144447225637123e-07, "advantages_std": 1.6513906240463256, "clip_ratio": 0.0, "completion_length": 90.49821624755859, "epoch": 3.932330827067669, "grad_norm": 7.625, "kl": 0.3150393143296242, "learning_rate": 3.0338345864661654e-06, "loss": 0.0356, "num_tokens": 7073060.0, "reward": -1.3395723063498735, "reward_std": 6.531378984451294, "rewards/get_chromagram_reward": 0.6276501834392547, "rewards/get_chromagram_reward_std": 0.11458624824881554, "rewards/get_intelligibility_reward": -4.622202610969543, "rewards/get_intelligibility_reward_std": 10.413419675827026, "rewards/get_target_len_reward": -0.02416415549814701, "rewards/get_target_len_reward_std": 0.06485766638070345, "step": 5230 }, { "advantages": -5.2154058760578435e-08, "advantages_std": 1.5587757110595704, "clip_ratio": 0.0, "completion_length": 90.5952392578125, "epoch": 3.9398496240601504, "grad_norm": 6.6875, "kl": 0.3194952175021172, "learning_rate": 3.0300751879699255e-06, "loss": 0.0384, "num_tokens": 7389197.0, "reward": -1.1797830283641815, "reward_std": 6.843469095230103, "rewards/get_chromagram_reward": 0.6258788108825684, "rewards/get_chromagram_reward_std": 0.11164259016513825, "rewards/get_intelligibility_reward": -4.134021139144897, "rewards/get_intelligibility_reward_std": 11.231058502197266, "rewards/get_target_len_reward": -0.031206544488668442, "rewards/get_target_len_reward_std": 0.08871262595057487, "step": 5240 }, { "advantages": -1.1151036183321139e-07, "advantages_std": 1.6567686200141907, "clip_ratio": 0.0, "completion_length": 89.1255973815918, "epoch": 3.9473684210526314, "grad_norm": 34.0, "kl": 0.3676748931407928, "learning_rate": 3.0263157894736843e-06, "loss": 0.0399, "num_tokens": 7702461.0, "reward": -1.3696911913342773, "reward_std": 6.905460023880005, "rewards/get_chromagram_reward": 0.6132007837295532, "rewards/get_chromagram_reward_std": 0.11636709868907928, "rewards/get_intelligibility_reward": -4.701757583022117, "rewards/get_intelligibility_reward_std": 11.0114595413208, "rewards/get_target_len_reward": -0.020516569539904595, "rewards/get_target_len_reward_std": 0.05645679645240307, "step": 5250 }, { "advantages": 1.514020262050053e-07, "advantages_std": 1.469593095779419, "clip_ratio": 0.0, "completion_length": 88.95714492797852, "epoch": 3.954887218045113, "grad_norm": 10.4375, "kl": 0.32012175023555756, "learning_rate": 3.0225563909774443e-06, "loss": 0.0339, "num_tokens": 8014001.0, "reward": -1.283732157945633, "reward_std": 6.034783267974854, "rewards/get_chromagram_reward": 0.6218043804168701, "rewards/get_chromagram_reward_std": 0.11377415880560875, "rewards/get_intelligibility_reward": -4.453184795379639, "rewards/get_intelligibility_reward_std": 9.587284135818482, "rewards/get_target_len_reward": -0.01981568681076169, "rewards/get_target_len_reward_std": 0.05037029702216387, "step": 5260 }, { "advantages": -5.8636070718876e-07, "advantages_std": 1.4802544116973877, "clip_ratio": 0.0, "completion_length": 88.19524002075195, "epoch": 3.962406015037594, "grad_norm": 8.125, "kl": 0.31927538812160494, "learning_rate": 3.018796992481203e-06, "loss": 0.0385, "num_tokens": 8324684.0, "reward": -1.6413162469863891, "reward_std": 7.5248010635375975, "rewards/get_chromagram_reward": 0.6196599304676056, "rewards/get_chromagram_reward_std": 0.09920540302991868, "rewards/get_intelligibility_reward": -5.523752021789551, "rewards/get_intelligibility_reward_std": 12.014527988433837, "rewards/get_target_len_reward": -0.01985640712082386, "rewards/get_target_len_reward_std": 0.07063727751374245, "step": 5270 }, { "advantages": -4.023314090773056e-08, "advantages_std": 1.621880567073822, "clip_ratio": 0.0, "completion_length": 83.53392944335937, "epoch": 3.969924812030075, "grad_norm": 10.8125, "kl": 0.3786572337150574, "learning_rate": 3.0150375939849623e-06, "loss": 0.0384, "num_tokens": 8623832.0, "reward": -1.1323532313108444, "reward_std": 6.37744836807251, "rewards/get_chromagram_reward": 0.6239664614200592, "rewards/get_chromagram_reward_std": 0.11369733661413192, "rewards/get_intelligibility_reward": -4.0000452876091, "rewards/get_intelligibility_reward_std": 10.421091842651368, "rewards/get_target_len_reward": -0.02098073624074459, "rewards/get_target_len_reward_std": 0.050571346655488014, "step": 5280 }, { "advantages": 6.544092250315714e-08, "advantages_std": 1.570748794078827, "clip_ratio": 0.0, "completion_length": 90.32381134033203, "epoch": 3.9774436090225564, "grad_norm": 29.25, "kl": 0.30713569074869157, "learning_rate": 3.011278195488722e-06, "loss": 0.0364, "num_tokens": 8940216.0, "reward": -1.531138226389885, "reward_std": 6.566509771347046, "rewards/get_chromagram_reward": 0.6204177737236023, "rewards/get_chromagram_reward_std": 0.11796076446771622, "rewards/get_intelligibility_reward": -5.190147817134857, "rewards/get_intelligibility_reward_std": 10.306473445892333, "rewards/get_target_len_reward": -0.023684403765946625, "rewards/get_target_len_reward_std": 0.06714439634233713, "step": 5290 }, { "advantages": 5.191813119864718e-07, "advantages_std": 1.5766889333724976, "clip_ratio": 0.0, "completion_length": 83.86428680419922, "epoch": 3.9849624060150375, "grad_norm": 6.75, "kl": 0.34546895027160646, "learning_rate": 3.007518796992481e-06, "loss": 0.0395, "num_tokens": 9238461.0, "reward": -1.8154918551445007, "reward_std": 6.851765584945679, "rewards/get_chromagram_reward": 0.6145689308643341, "rewards/get_chromagram_reward_std": 0.11415692195296287, "rewards/get_intelligibility_reward": -6.03966007232666, "rewards/get_intelligibility_reward_std": 10.629767608642577, "rewards/get_target_len_reward": -0.02138425037264824, "rewards/get_target_len_reward_std": 0.07321446239948273, "step": 5300 }, { "advantages": -7.209678862807323e-07, "advantages_std": 1.627264392375946, "clip_ratio": 0.0, "completion_length": 90.0910743713379, "epoch": 3.992481203007519, "grad_norm": 5.25, "kl": 0.3158954918384552, "learning_rate": 3.003759398496241e-06, "loss": 0.036, "num_tokens": 9553891.0, "reward": -0.9919865518808365, "reward_std": 6.70737476348877, "rewards/get_chromagram_reward": 0.6357428431510925, "rewards/get_chromagram_reward_std": 0.11332304775714874, "rewards/get_intelligibility_reward": -3.587333357334137, "rewards/get_intelligibility_reward_std": 11.042314529418945, "rewards/get_target_len_reward": -0.02436893656849861, "rewards/get_target_len_reward_std": 0.07317685410380363, "step": 5310 }, { "advantages": 2.3655593013316433e-07, "advantages_std": 1.5000331044197082, "clip_ratio": 0.0, "completion_length": 85.18512115478515, "epoch": 4.000751879699248, "grad_norm": 10.0, "kl": 0.31935170739889146, "learning_rate": 3e-06, "loss": 0.0323, "num_tokens": 9856035.0, "reward": -1.2852010980248452, "reward_std": 6.327541875839233, "rewards/get_chromagram_reward": 0.613725996017456, "rewards/get_chromagram_reward_std": 0.11773486211895942, "rewards/get_intelligibility_reward": -4.447852373123169, "rewards/get_intelligibility_reward_std": 10.121273040771484, "rewards/get_target_len_reward": -0.02147660292685032, "rewards/get_target_len_reward_std": 0.05728430114686489, "step": 5320 }, { "advantages": -1.1920929736675135e-07, "advantages_std": 1.4331650376319884, "clip_ratio": 0.0, "completion_length": 89.57559738159179, "epoch": 4.00827067669173, "grad_norm": 9.3125, "kl": 0.3469295933842659, "learning_rate": 2.9962406015037597e-06, "loss": 0.0379, "num_tokens": 10170468.0, "reward": -0.9236498028039932, "reward_std": 6.302606296539307, "rewards/get_chromagram_reward": 0.6118164241313935, "rewards/get_chromagram_reward_std": 0.1398451879620552, "rewards/get_intelligibility_reward": -3.3608950555324553, "rewards/get_intelligibility_reward_std": 10.378479623794556, "rewards/get_target_len_reward": -0.021870562620460988, "rewards/get_target_len_reward_std": 0.05407197326421738, "step": 5330 }, { "advantages": 2.4115046244332915e-07, "advantages_std": 1.4898594737052917, "clip_ratio": 0.0, "completion_length": 88.45476379394532, "epoch": 4.015789473684211, "grad_norm": 6.5, "kl": 0.3065282255411148, "learning_rate": 2.992481203007519e-06, "loss": 0.0307, "num_tokens": 10481673.0, "reward": -1.1274623550474643, "reward_std": 6.374505424499512, "rewards/get_chromagram_reward": 0.6194785416126252, "rewards/get_chromagram_reward_std": 0.10782580673694611, "rewards/get_intelligibility_reward": -3.984870785474777, "rewards/get_intelligibility_reward_std": 10.420499992370605, "rewards/get_target_len_reward": -0.016994608193635942, "rewards/get_target_len_reward_std": 0.04643943645060063, "step": 5340 }, { "advantages": -6.996095393674296e-07, "advantages_std": 1.5091347455978394, "clip_ratio": 0.0, "completion_length": 87.61726303100586, "epoch": 4.023308270676692, "grad_norm": 6.40625, "kl": 0.318861123919487, "learning_rate": 2.9887218045112786e-06, "loss": 0.0322, "num_tokens": 10790785.0, "reward": -1.4366509914398193, "reward_std": 6.499625730514526, "rewards/get_chromagram_reward": 0.6275740385055542, "rewards/get_chromagram_reward_std": 0.11150631085038185, "rewards/get_intelligibility_reward": -4.918511700630188, "rewards/get_intelligibility_reward_std": 10.360735988616943, "rewards/get_target_len_reward": -0.0190150436013937, "rewards/get_target_len_reward_std": 0.046726927347481254, "step": 5350 }, { "advantages": 2.533197374532392e-07, "advantages_std": 1.444432508945465, "clip_ratio": 0.0, "completion_length": 89.69166946411133, "epoch": 4.030827067669173, "grad_norm": 464.0, "kl": 0.40669417977333067, "learning_rate": 2.984962406015038e-06, "loss": 0.0499, "num_tokens": 11105345.0, "reward": -0.9948464393615722, "reward_std": 6.640193319320678, "rewards/get_chromagram_reward": 0.605463171005249, "rewards/get_chromagram_reward_std": 0.12197316065430641, "rewards/get_intelligibility_reward": -3.5692497849464417, "rewards/get_intelligibility_reward_std": 10.988998126983642, "rewards/get_target_len_reward": -0.020752519182860852, "rewards/get_target_len_reward_std": 0.07180812451988458, "step": 5360 }, { "advantages": -4.1847427603158847e-07, "advantages_std": 1.4834824800491333, "clip_ratio": 0.0, "completion_length": 86.3851203918457, "epoch": 4.038345864661654, "grad_norm": 8.6875, "kl": 0.3193838641047478, "learning_rate": 2.981203007518797e-06, "loss": 0.0337, "num_tokens": 11410833.0, "reward": -1.4203301072120667, "reward_std": 6.485980701446533, "rewards/get_chromagram_reward": 0.6130522310733795, "rewards/get_chromagram_reward_std": 0.1125810906291008, "rewards/get_intelligibility_reward": -4.852126169204712, "rewards/get_intelligibility_reward_std": 10.270601749420166, "rewards/get_target_len_reward": -0.021915959380567073, "rewards/get_target_len_reward_std": 0.07191921528428794, "step": 5370 }, { "advantages": -8.195636169716636e-09, "advantages_std": 1.409821331501007, "clip_ratio": 0.0, "completion_length": 86.14285888671876, "epoch": 4.045864661654135, "grad_norm": 15.5, "kl": 0.4851821750402451, "learning_rate": 2.9774436090225567e-06, "loss": 0.0523, "num_tokens": 11714740.0, "reward": -1.6428377270698546, "reward_std": 6.631846380233765, "rewards/get_chromagram_reward": 0.6138918578624726, "rewards/get_chromagram_reward_std": 0.11235097497701645, "rewards/get_intelligibility_reward": -5.515289831161499, "rewards/get_intelligibility_reward_std": 10.434710884094239, "rewards/get_target_len_reward": -0.02711508497595787, "rewards/get_target_len_reward_std": 0.08662580009549856, "step": 5380 }, { "advantages": -6.115685017960005e-07, "advantages_std": 1.613796353340149, "clip_ratio": 0.0, "completion_length": 88.29404907226562, "epoch": 4.053383458646617, "grad_norm": 5.1875, "kl": 0.290841107070446, "learning_rate": 2.973684210526316e-06, "loss": 0.0296, "num_tokens": 12025384.0, "reward": -1.0370060920715332, "reward_std": 6.411907720565796, "rewards/get_chromagram_reward": 0.6243631541728973, "rewards/get_chromagram_reward_std": 0.1036013200879097, "rewards/get_intelligibility_reward": -3.718669390678406, "rewards/get_intelligibility_reward_std": 10.511144828796386, "rewards/get_target_len_reward": -0.016711823269724847, "rewards/get_target_len_reward_std": 0.0399149265140295, "step": 5390 }, { "advantages": 7.460514893864456e-07, "advantages_std": 1.6174774885177612, "clip_ratio": 0.0, "completion_length": 88.3398826599121, "epoch": 4.060902255639098, "grad_norm": 7.1875, "kl": 0.34107607007026675, "learning_rate": 2.9699248120300755e-06, "loss": 0.0348, "num_tokens": 12336297.0, "reward": -1.747072759270668, "reward_std": 7.106879091262817, "rewards/get_chromagram_reward": 0.6148701965808868, "rewards/get_chromagram_reward_std": 0.11414720416069031, "rewards/get_intelligibility_reward": -5.834241986274719, "rewards/get_intelligibility_reward_std": 11.133132362365723, "rewards/get_target_len_reward": -0.021846203505992888, "rewards/get_target_len_reward_std": 0.056982779502868654, "step": 5400 }, { "advantages": -1.835326429500128e-07, "advantages_std": 1.6123695611953734, "clip_ratio": 0.0, "completion_length": 89.4154769897461, "epoch": 4.068421052631579, "grad_norm": 10.0625, "kl": 0.5835892543196678, "learning_rate": 2.9661654135338348e-06, "loss": 0.0606, "num_tokens": 12650398.0, "reward": -1.3808288365602492, "reward_std": 6.739897966384888, "rewards/get_chromagram_reward": 0.6306729674339294, "rewards/get_chromagram_reward_std": 0.10669080466032028, "rewards/get_intelligibility_reward": -4.7529010534286495, "rewards/get_intelligibility_reward_std": 10.854820346832275, "rewards/get_target_len_reward": -0.02025802955031395, "rewards/get_target_len_reward_std": 0.05213299170136452, "step": 5410 }, { "advantages": 3.25590386296426e-07, "advantages_std": 1.5314580202102661, "clip_ratio": 0.0, "completion_length": 87.20892868041992, "epoch": 4.07593984962406, "grad_norm": 79.0, "kl": 0.5323092341423035, "learning_rate": 2.9624060150375944e-06, "loss": 0.0581, "num_tokens": 12957528.0, "reward": -1.5797663807868958, "reward_std": 6.701803588867188, "rewards/get_chromagram_reward": 0.6151145219802856, "rewards/get_chromagram_reward_std": 0.10405527502298355, "rewards/get_intelligibility_reward": -5.333010649681091, "rewards/get_intelligibility_reward_std": 10.452555561065674, "rewards/get_target_len_reward": -0.021402441896498203, "rewards/get_target_len_reward_std": 0.06293704155832529, "step": 5420 }, { "advantages": -1.4801821208720866e-07, "advantages_std": 1.6071329593658448, "clip_ratio": 0.0, "completion_length": 85.10119171142578, "epoch": 4.083458646616541, "grad_norm": 6.5, "kl": 0.2981695577502251, "learning_rate": 2.9586466165413536e-06, "loss": 0.0306, "num_tokens": 13259052.0, "reward": -1.3751710176467895, "reward_std": 6.559592008590698, "rewards/get_chromagram_reward": 0.6217553555965424, "rewards/get_chromagram_reward_std": 0.11424238607287407, "rewards/get_intelligibility_reward": -4.729965303838253, "rewards/get_intelligibility_reward_std": 10.498331451416016, "rewards/get_target_len_reward": -0.017302784696221353, "rewards/get_target_len_reward_std": 0.049371255189180376, "step": 5430 }, { "advantages": 5.831321175264747e-07, "advantages_std": 1.4913076996803283, "clip_ratio": 0.0, "completion_length": 91.3130973815918, "epoch": 4.090977443609023, "grad_norm": 12352.0, "kl": 1.394778886437416, "learning_rate": 2.9548872180451133e-06, "loss": 0.141, "num_tokens": 13577797.0, "reward": -1.4640146307647228, "reward_std": 7.061498022079467, "rewards/get_chromagram_reward": 0.6307594776153564, "rewards/get_chromagram_reward_std": 0.10615155696868897, "rewards/get_intelligibility_reward": -5.001734495162964, "rewards/get_intelligibility_reward_std": 11.286694431304932, "rewards/get_target_len_reward": -0.021068642288446425, "rewards/get_target_len_reward_std": 0.0496134627610445, "step": 5440 }, { "advantages": 4.137555919214719e-07, "advantages_std": 1.691400933265686, "clip_ratio": 0.0, "completion_length": 89.92916641235351, "epoch": 4.098496240601504, "grad_norm": 6.375, "kl": 0.3118233859539032, "learning_rate": 2.9511278195488725e-06, "loss": 0.036, "num_tokens": 13893208.0, "reward": -1.2735981404781342, "reward_std": 6.645020818710327, "rewards/get_chromagram_reward": 0.6332329690456391, "rewards/get_chromagram_reward_std": 0.11299219503998756, "rewards/get_intelligibility_reward": -4.42809339761734, "rewards/get_intelligibility_reward_std": 10.679800605773925, "rewards/get_target_len_reward": -0.02593356678262353, "rewards/get_target_len_reward_std": 0.07397942505776882, "step": 5450 }, { "advantages": 5.35696766945648e-07, "advantages_std": 1.7403199791908264, "clip_ratio": 0.0, "completion_length": 87.48869247436524, "epoch": 4.106015037593985, "grad_norm": 7.65625, "kl": 0.3418915793299675, "learning_rate": 2.9473684210526317e-06, "loss": 0.0369, "num_tokens": 14201166.0, "reward": -1.415563040971756, "reward_std": 6.7440876960754395, "rewards/get_chromagram_reward": 0.6164253532886506, "rewards/get_chromagram_reward_std": 0.11742549315094948, "rewards/get_intelligibility_reward": -4.8431459903717045, "rewards/get_intelligibility_reward_std": 10.860113525390625, "rewards/get_target_len_reward": -0.019968316610902547, "rewards/get_target_len_reward_std": 0.05547744482755661, "step": 5460 }, { "advantages": -2.1656354647348054e-07, "advantages_std": 1.6330742359161377, "clip_ratio": 0.0, "completion_length": 87.73690719604492, "epoch": 4.113533834586466, "grad_norm": 6.28125, "kl": 0.3976124137639999, "learning_rate": 2.9436090225563914e-06, "loss": 0.0383, "num_tokens": 14510130.0, "reward": -0.9823278225958347, "reward_std": 6.470097780227661, "rewards/get_chromagram_reward": 0.6272465288639069, "rewards/get_chromagram_reward_std": 0.12040900364518166, "rewards/get_intelligibility_reward": -3.553873872756958, "rewards/get_intelligibility_reward_std": 10.69575605392456, "rewards/get_target_len_reward": -0.020355920772999525, "rewards/get_target_len_reward_std": 0.04151589758694172, "step": 5470 }, { "advantages": -5.078812620240569e-07, "advantages_std": 1.4891107320785522, "clip_ratio": 0.0, "completion_length": 90.52381210327148, "epoch": 4.121052631578947, "grad_norm": 12.4375, "kl": 0.33563627153635023, "learning_rate": 2.9398496240601506e-06, "loss": 0.0344, "num_tokens": 14826580.0, "reward": -1.1729493260383606, "reward_std": 6.722590684890747, "rewards/get_chromagram_reward": 0.6255541265010833, "rewards/get_chromagram_reward_std": 0.1094115249812603, "rewards/get_intelligibility_reward": -4.125180602073669, "rewards/get_intelligibility_reward_std": 11.05735092163086, "rewards/get_target_len_reward": -0.01922136452049017, "rewards/get_target_len_reward_std": 0.05560791753232479, "step": 5480 }, { "advantages": 2.928078387753885e-07, "advantages_std": 1.6454651832580567, "clip_ratio": 0.0, "completion_length": 88.87262115478515, "epoch": 4.128571428571428, "grad_norm": 7.28125, "kl": 0.39739508330821993, "learning_rate": 2.9360902255639102e-06, "loss": 0.0451, "num_tokens": 15138643.0, "reward": -1.1072691828012466, "reward_std": 6.511484289169312, "rewards/get_chromagram_reward": 0.6285755276679993, "rewards/get_chromagram_reward_std": 0.11080964356660843, "rewards/get_intelligibility_reward": -3.926626533269882, "rewards/get_intelligibility_reward_std": 10.474292373657226, "rewards/get_target_len_reward": -0.023756447620689868, "rewards/get_target_len_reward_std": 0.06292850002646447, "step": 5490 }, { "advantages": 1.8253922888789021e-07, "advantages_std": 1.596780240535736, "clip_ratio": 0.0, "completion_length": 87.02202529907227, "epoch": 4.13609022556391, "grad_norm": 8.125, "kl": 0.2828808709979057, "learning_rate": 2.9323308270676694e-06, "loss": 0.0319, "num_tokens": 15445656.0, "reward": -1.3161565911024808, "reward_std": 6.487222576141358, "rewards/get_chromagram_reward": 0.6143887758255004, "rewards/get_chromagram_reward_std": 0.1173232764005661, "rewards/get_intelligibility_reward": -4.541189068555832, "rewards/get_intelligibility_reward_std": 10.356179904937743, "rewards/get_target_len_reward": -0.02166922325268388, "rewards/get_target_len_reward_std": 0.06364731937646866, "step": 5500 }, { "advantages": -1.914799199198569e-07, "advantages_std": 1.5466854929924012, "clip_ratio": 0.0, "completion_length": 86.2648826599121, "epoch": 4.143609022556391, "grad_norm": 6.53125, "kl": 0.344246631860733, "learning_rate": 2.928571428571429e-06, "loss": 0.0368, "num_tokens": 15750267.0, "reward": -1.5448312044143677, "reward_std": 6.672153091430664, "rewards/get_chromagram_reward": 0.6186954915523529, "rewards/get_chromagram_reward_std": 0.11632555276155472, "rewards/get_intelligibility_reward": -5.229874622821808, "rewards/get_intelligibility_reward_std": 10.553904008865356, "rewards/get_target_len_reward": -0.02331397421658039, "rewards/get_target_len_reward_std": 0.06315616220235824, "step": 5510 }, { "advantages": 1.713633551503335e-07, "advantages_std": 1.539741826057434, "clip_ratio": 0.0, "completion_length": 87.74524002075195, "epoch": 4.151127819548872, "grad_norm": 9.3125, "kl": 0.3175946220755577, "learning_rate": 2.9248120300751883e-06, "loss": 0.0348, "num_tokens": 16059223.0, "reward": -1.3458143293857574, "reward_std": 6.4293193340301515, "rewards/get_chromagram_reward": 0.6102846920490265, "rewards/get_chromagram_reward_std": 0.11779590845108032, "rewards/get_intelligibility_reward": -4.625285410881043, "rewards/get_intelligibility_reward_std": 10.368893718719482, "rewards/get_target_len_reward": -0.022442126646637917, "rewards/get_target_len_reward_std": 0.06540702283382416, "step": 5520 }, { "advantages": 4.1226550706596755e-08, "advantages_std": 1.5556996703147887, "clip_ratio": 0.0, "completion_length": 85.48750152587891, "epoch": 4.158646616541353, "grad_norm": 8.75, "kl": 0.3429344639182091, "learning_rate": 2.921052631578948e-06, "loss": 0.0391, "num_tokens": 16361772.0, "reward": -1.7375137686729432, "reward_std": 6.66321702003479, "rewards/get_chromagram_reward": 0.6258727490901947, "rewards/get_chromagram_reward_std": 0.11534877270460128, "rewards/get_intelligibility_reward": -5.815235280990601, "rewards/get_intelligibility_reward_std": 10.311593818664551, "rewards/get_target_len_reward": -0.023178601637482643, "rewards/get_target_len_reward_std": 0.060010458901524545, "step": 5530 }, { "advantages": 1.2442469881079888e-07, "advantages_std": 1.5329967498779298, "clip_ratio": 0.0, "completion_length": 86.05119247436524, "epoch": 4.166165413533834, "grad_norm": 6.1875, "kl": 0.3746921971440315, "learning_rate": 2.9172932330827068e-06, "loss": 0.0507, "num_tokens": 16665461.0, "reward": -1.7003209590911865, "reward_std": 6.663384437561035, "rewards/get_chromagram_reward": 0.6338975608348847, "rewards/get_chromagram_reward_std": 0.11849100887775421, "rewards/get_intelligibility_reward": -5.70644314289093, "rewards/get_intelligibility_reward_std": 10.250672149658204, "rewards/get_target_len_reward": -0.02841706983745098, "rewards/get_target_len_reward_std": 0.08157789278775454, "step": 5540 }, { "advantages": 2.5990108838414016e-07, "advantages_std": 1.5833655834197997, "clip_ratio": 0.0, "completion_length": 85.0321434020996, "epoch": 4.173684210526316, "grad_norm": 7.1875, "kl": 0.3090577393770218, "learning_rate": 2.913533834586467e-06, "loss": 0.0338, "num_tokens": 16966534.0, "reward": -1.4059063911437988, "reward_std": 6.507475471496582, "rewards/get_chromagram_reward": 0.61700838804245, "rewards/get_chromagram_reward_std": 0.12314107269048691, "rewards/get_intelligibility_reward": -4.815485262870789, "rewards/get_intelligibility_reward_std": 10.44538254737854, "rewards/get_target_len_reward": -0.019241928216069938, "rewards/get_target_len_reward_std": 0.054523023031651976, "step": 5550 }, { "advantages": 4.5100848637957825e-07, "advantages_std": 1.6410022377967834, "clip_ratio": 0.0, "completion_length": 87.3773811340332, "epoch": 4.181203007518797, "grad_norm": 7.125, "kl": 0.3442951112985611, "learning_rate": 2.9097744360902256e-06, "loss": 0.0377, "num_tokens": 17274196.0, "reward": -1.3003638498485088, "reward_std": 6.568748521804809, "rewards/get_chromagram_reward": 0.6183872222900391, "rewards/get_chromagram_reward_std": 0.11773400530219078, "rewards/get_intelligibility_reward": -4.495910170674324, "rewards/get_intelligibility_reward_std": 10.582419872283936, "rewards/get_target_len_reward": -0.02356829959899187, "rewards/get_target_len_reward_std": 0.06113504599779844, "step": 5560 }, { "advantages": -8.667507920279149e-08, "advantages_std": 1.654595386981964, "clip_ratio": 0.0, "completion_length": 87.95535888671876, "epoch": 4.188721804511278, "grad_norm": 6.125, "kl": 0.2908389538526535, "learning_rate": 2.906015037593985e-06, "loss": 0.0346, "num_tokens": 17583336.0, "reward": -1.6878258869051934, "reward_std": 6.9916908740997314, "rewards/get_chromagram_reward": 0.6317319989204406, "rewards/get_chromagram_reward_std": 0.11053061783313751, "rewards/get_intelligibility_reward": -5.674407863616944, "rewards/get_intelligibility_reward_std": 10.983782196044922, "rewards/get_target_len_reward": -0.020801611058413983, "rewards/get_target_len_reward_std": 0.06081876866519451, "step": 5570 }, { "advantages": 3.578762374445432e-07, "advantages_std": 1.6615803837776184, "clip_ratio": 0.0, "completion_length": 88.32202453613282, "epoch": 4.196240601503759, "grad_norm": 16.375, "kl": 0.44996539801359176, "learning_rate": 2.9022556390977445e-06, "loss": 0.0478, "num_tokens": 17894230.0, "reward": -1.3394523441791535, "reward_std": 6.421341896057129, "rewards/get_chromagram_reward": 0.6383058249950408, "rewards/get_chromagram_reward_std": 0.11125587001442909, "rewards/get_intelligibility_reward": -4.6326796293258665, "rewards/get_intelligibility_reward_std": 10.272837495803833, "rewards/get_target_len_reward": -0.023982838820666076, "rewards/get_target_len_reward_std": 0.05926213786005974, "step": 5580 }, { "advantages": -7.5052185337654e-07, "advantages_std": 1.6332746505737306, "clip_ratio": 0.0, "completion_length": 88.58750076293946, "epoch": 4.20375939849624, "grad_norm": 6.15625, "kl": 0.3342150181531906, "learning_rate": 2.8984962406015037e-06, "loss": 0.0402, "num_tokens": 18206003.0, "reward": -1.4808701485395432, "reward_std": 7.0243466854095455, "rewards/get_chromagram_reward": 0.6189306735992431, "rewards/get_chromagram_reward_std": 0.11732441484928131, "rewards/get_intelligibility_reward": -5.037647825479508, "rewards/get_intelligibility_reward_std": 11.133300590515137, "rewards/get_target_len_reward": -0.02389297802001238, "rewards/get_target_len_reward_std": 0.07138866055756807, "step": 5590 }, { "advantages": 5.985300020938667e-07, "advantages_std": 1.5033831596374512, "clip_ratio": 0.0, "completion_length": 83.35476379394531, "epoch": 4.211278195488722, "grad_norm": 44.5, "kl": 0.37868370711803434, "learning_rate": 2.8947368421052634e-06, "loss": 0.0388, "num_tokens": 18503732.0, "reward": -1.8184113264083863, "reward_std": 6.87455472946167, "rewards/get_chromagram_reward": 0.6338491857051849, "rewards/get_chromagram_reward_std": 0.11686685383319854, "rewards/get_intelligibility_reward": -6.063382339477539, "rewards/get_intelligibility_reward_std": 10.610133171081543, "rewards/get_target_len_reward": -0.025700561329722406, "rewards/get_target_len_reward_std": 0.06520087532699108, "step": 5600 }, { "advantages": -2.515812752790225e-07, "advantages_std": 1.4947779536247254, "clip_ratio": 0.0, "completion_length": 87.99464492797851, "epoch": 4.218796992481203, "grad_norm": 12.0625, "kl": 0.4214991435408592, "learning_rate": 2.8909774436090226e-06, "loss": 0.0469, "num_tokens": 18814136.0, "reward": -1.0186500787734984, "reward_std": 6.064566564559937, "rewards/get_chromagram_reward": 0.6329184353351593, "rewards/get_chromagram_reward_std": 0.10823804661631584, "rewards/get_intelligibility_reward": -3.66917080283165, "rewards/get_intelligibility_reward_std": 9.890348434448242, "rewards/get_target_len_reward": -0.019697726145386694, "rewards/get_target_len_reward_std": 0.05908492133021355, "step": 5610 }, { "advantages": -2.615153995577657e-07, "advantages_std": 1.5261252880096436, "clip_ratio": 0.0, "completion_length": 89.44642944335938, "epoch": 4.226315789473684, "grad_norm": 50.0, "kl": 0.9879576608538627, "learning_rate": 2.8872180451127822e-06, "loss": 0.1026, "num_tokens": 19128465.0, "reward": -1.367058303952217, "reward_std": 6.9173722743988035, "rewards/get_chromagram_reward": 0.6275076985359191, "rewards/get_chromagram_reward_std": 0.10938590541481971, "rewards/get_intelligibility_reward": -4.710054632276297, "rewards/get_intelligibility_reward_std": 11.131224727630615, "rewards/get_target_len_reward": -0.018627769872546197, "rewards/get_target_len_reward_std": 0.0545194873586297, "step": 5620 }, { "advantages": -9.983778284095025e-08, "advantages_std": 1.6729804039001466, "clip_ratio": 0.0, "completion_length": 87.58035736083984, "epoch": 4.233834586466165, "grad_norm": 6.6875, "kl": 3.7320891961455347, "learning_rate": 2.8834586466165414e-06, "loss": 0.3752, "num_tokens": 19437278.0, "reward": -1.323493231832981, "reward_std": 6.493629074096679, "rewards/get_chromagram_reward": 0.6278137683868408, "rewards/get_chromagram_reward_std": 0.12112269923090935, "rewards/get_intelligibility_reward": -4.576982426643371, "rewards/get_intelligibility_reward_std": 10.355177402496338, "rewards/get_target_len_reward": -0.021310653630644084, "rewards/get_target_len_reward_std": 0.05675790887326002, "step": 5630 }, { "advantages": -1.630435328081603e-07, "advantages_std": 1.5453658819198608, "clip_ratio": 0.0, "completion_length": 87.91547927856445, "epoch": 4.241353383458646, "grad_norm": 9.75, "kl": 0.38267752528190613, "learning_rate": 2.879699248120301e-06, "loss": 0.0406, "num_tokens": 19747203.0, "reward": -1.4079192280769348, "reward_std": 6.9246522903442385, "rewards/get_chromagram_reward": 0.6170772731304168, "rewards/get_chromagram_reward_std": 0.11740869060158729, "rewards/get_intelligibility_reward": -4.8179065704345705, "rewards/get_intelligibility_reward_std": 11.177426147460938, "rewards/get_target_len_reward": -0.022928063943982124, "rewards/get_target_len_reward_std": 0.061421534046530724, "step": 5640 }, { "advantages": 4.1338305045712787e-07, "advantages_std": 1.6221726179122924, "clip_ratio": 0.0, "completion_length": 85.67976303100586, "epoch": 4.248872180451128, "grad_norm": 6.9375, "kl": 5.435917441546917, "learning_rate": 2.8759398496240603e-06, "loss": 0.5473, "num_tokens": 20050422.0, "reward": -2.090642374753952, "reward_std": 7.292528247833252, "rewards/get_chromagram_reward": 0.6181860446929932, "rewards/get_chromagram_reward_std": 0.11201700642704963, "rewards/get_intelligibility_reward": -6.86620078086853, "rewards/get_intelligibility_reward_std": 11.104904079437256, "rewards/get_target_len_reward": -0.02391198929399252, "rewards/get_target_len_reward_std": 0.07691430859267712, "step": 5650 }, { "advantages": 1.1871259175677551e-07, "advantages_std": 1.589626944065094, "clip_ratio": 0.0, "completion_length": 85.58928680419922, "epoch": 4.256390977443609, "grad_norm": 82.5, "kl": 0.360989385843277, "learning_rate": 2.8721804511278195e-06, "loss": 0.0408, "num_tokens": 20353824.0, "reward": -1.523736972361803, "reward_std": 6.698141288757324, "rewards/get_chromagram_reward": 0.611247593164444, "rewards/get_chromagram_reward_std": 0.1271521754562855, "rewards/get_intelligibility_reward": -5.157619923353195, "rewards/get_intelligibility_reward_std": 10.54830846786499, "rewards/get_target_len_reward": -0.024838272668421268, "rewards/get_target_len_reward_std": 0.0677162567153573, "step": 5660 }, { "advantages": -4.0257972386825715e-07, "advantages_std": 1.6105037808418274, "clip_ratio": 0.0, "completion_length": 90.30833435058594, "epoch": 4.26390977443609, "grad_norm": 6.75, "kl": 0.3523553296923637, "learning_rate": 2.868421052631579e-06, "loss": 0.0402, "num_tokens": 20669973.0, "reward": -1.3020567789673805, "reward_std": 6.694693613052368, "rewards/get_chromagram_reward": 0.6207749962806701, "rewards/get_chromagram_reward_std": 0.11944424584507943, "rewards/get_intelligibility_reward": -4.507850003242493, "rewards/get_intelligibility_reward_std": 10.755848407745361, "rewards/get_target_len_reward": -0.01909503461793065, "rewards/get_target_len_reward_std": 0.05830676984041929, "step": 5670 }, { "advantages": 3.429750753625171e-07, "advantages_std": 1.5029768705368043, "clip_ratio": 0.0, "completion_length": 81.91309661865235, "epoch": 4.271428571428571, "grad_norm": 6.875, "kl": 0.32668228149414064, "learning_rate": 2.8646616541353384e-06, "loss": 0.0355, "num_tokens": 20963273.0, "reward": -1.5594689965248107, "reward_std": 6.404059028625488, "rewards/get_chromagram_reward": 0.6218547642230987, "rewards/get_chromagram_reward_std": 0.1123454861342907, "rewards/get_intelligibility_reward": -5.275913119316101, "rewards/get_intelligibility_reward_std": 10.039891481399536, "rewards/get_target_len_reward": -0.024348314758390187, "rewards/get_target_len_reward_std": 0.07023859769105911, "step": 5680 }, { "advantages": 3.874301484074749e-08, "advantages_std": 1.4815411686897277, "clip_ratio": 0.0, "completion_length": 88.20654907226563, "epoch": 4.278947368421052, "grad_norm": 8.75, "kl": 0.3460480824112892, "learning_rate": 2.860902255639098e-06, "loss": 0.0406, "num_tokens": 21273796.0, "reward": -1.302916806936264, "reward_std": 7.124443292617798, "rewards/get_chromagram_reward": 0.6314963459968567, "rewards/get_chromagram_reward_std": 0.1167108178138733, "rewards/get_intelligibility_reward": -4.514073705673217, "rewards/get_intelligibility_reward_std": 11.613936042785644, "rewards/get_target_len_reward": -0.026172821037471296, "rewards/get_target_len_reward_std": 0.08674208335578441, "step": 5690 }, { "advantages": 4.0456654346598955e-07, "advantages_std": 1.5474624276161193, "clip_ratio": 0.0, "completion_length": 87.81726379394532, "epoch": 4.286466165413533, "grad_norm": 10.6875, "kl": 0.30418709963560103, "learning_rate": 2.8571428571428573e-06, "loss": 0.0319, "num_tokens": 21582695.0, "reward": -1.9298257410526276, "reward_std": 6.67116961479187, "rewards/get_chromagram_reward": 0.6151466488838195, "rewards/get_chromagram_reward_std": 0.1229398138821125, "rewards/get_intelligibility_reward": -6.38664014339447, "rewards/get_intelligibility_reward_std": 10.024635791778564, "rewards/get_target_len_reward": -0.017983187455683947, "rewards/get_target_len_reward_std": 0.046716343890875575, "step": 5700 }, { "advantages": 6.929040683445464e-08, "advantages_std": 1.5471153259277344, "clip_ratio": 0.0, "completion_length": 84.2125015258789, "epoch": 4.293984962406015, "grad_norm": 22.0, "kl": 0.31379797756671907, "learning_rate": 2.853383458646617e-06, "loss": 0.0397, "num_tokens": 21881815.0, "reward": -1.4730405285954475, "reward_std": 6.675574111938476, "rewards/get_chromagram_reward": 0.6127762138843537, "rewards/get_chromagram_reward_std": 0.11312752440571786, "rewards/get_intelligibility_reward": -5.006561887264252, "rewards/get_intelligibility_reward_std": 10.592585468292237, "rewards/get_target_len_reward": -0.02533548539504409, "rewards/get_target_len_reward_std": 0.08211482018232345, "step": 5710 }, { "advantages": 2.2575259350787746e-07, "advantages_std": 1.615368866920471, "clip_ratio": 0.0, "completion_length": 86.06250228881837, "epoch": 4.301503759398496, "grad_norm": 9.8125, "kl": 0.36106809973716736, "learning_rate": 2.849624060150376e-06, "loss": 0.0412, "num_tokens": 22185867.0, "reward": -1.664530771970749, "reward_std": 6.669352960586548, "rewards/get_chromagram_reward": 0.6247674703598023, "rewards/get_chromagram_reward_std": 0.10432815700769424, "rewards/get_intelligibility_reward": -5.594386541843415, "rewards/get_intelligibility_reward_std": 10.220803928375243, "rewards/get_target_len_reward": -0.023972975183278324, "rewards/get_target_len_reward_std": 0.06604787111282348, "step": 5720 }, { "advantages": -2.2351742465787083e-07, "advantages_std": 1.6799475312232972, "clip_ratio": 0.0, "completion_length": 86.37381057739258, "epoch": 4.309022556390977, "grad_norm": 6.125, "kl": 0.3318557575345039, "learning_rate": 2.8458646616541358e-06, "loss": 0.0321, "num_tokens": 22491298.0, "reward": -1.5887473464012145, "reward_std": 6.799813938140869, "rewards/get_chromagram_reward": 0.6177287518978118, "rewards/get_chromagram_reward_std": 0.12023614346981049, "rewards/get_intelligibility_reward": -5.365889692306519, "rewards/get_intelligibility_reward_std": 10.740260219573974, "rewards/get_target_len_reward": -0.018080861307680607, "rewards/get_target_len_reward_std": 0.04215618222951889, "step": 5730 }, { "advantages": -2.6077032799776134e-07, "advantages_std": 1.5404383897781373, "clip_ratio": 0.0, "completion_length": 86.54404983520507, "epoch": 4.3165413533834585, "grad_norm": 9.4375, "kl": 0.30639515817165375, "learning_rate": 2.842105263157895e-06, "loss": 0.0359, "num_tokens": 22796677.0, "reward": -1.5064129531383514, "reward_std": 6.709846925735474, "rewards/get_chromagram_reward": 0.625343632698059, "rewards/get_chromagram_reward_std": 0.11143359690904617, "rewards/get_intelligibility_reward": -5.122057247161865, "rewards/get_intelligibility_reward_std": 10.567590045928956, "rewards/get_target_len_reward": -0.022525263484567404, "rewards/get_target_len_reward_std": 0.07170127313584089, "step": 5740 }, { "advantages": -7.40587732428466e-07, "advantages_std": 1.668913996219635, "clip_ratio": 0.0, "completion_length": 84.5898826599121, "epoch": 4.3240601503759395, "grad_norm": 23.25, "kl": 0.5153587549924851, "learning_rate": 2.8383458646616546e-06, "loss": 0.061, "num_tokens": 23096548.0, "reward": -2.0747710138559343, "reward_std": 7.238304138183594, "rewards/get_chromagram_reward": 0.6240490555763245, "rewards/get_chromagram_reward_std": 0.11386263146996498, "rewards/get_intelligibility_reward": -6.8224663734436035, "rewards/get_intelligibility_reward_std": 10.980531883239745, "rewards/get_target_len_reward": -0.025895378738641738, "rewards/get_target_len_reward_std": 0.078858133405447, "step": 5750 }, { "advantages": 8.49366280419872e-08, "advantages_std": 1.5249725341796876, "clip_ratio": 0.0, "completion_length": 85.34047927856446, "epoch": 4.331578947368421, "grad_norm": 7.0, "kl": 0.33452349007129667, "learning_rate": 2.834586466165414e-06, "loss": 0.042, "num_tokens": 23398591.0, "reward": -1.6896941900253295, "reward_std": 6.341463661193847, "rewards/get_chromagram_reward": 0.6132433891296387, "rewards/get_chromagram_reward_std": 0.11532968133687974, "rewards/get_intelligibility_reward": -5.659934663772583, "rewards/get_intelligibility_reward_std": 9.797483968734742, "rewards/get_target_len_reward": -0.022390842065215112, "rewards/get_target_len_reward_std": 0.07444148659706115, "step": 5760 }, { "advantages": -9.158005980225425e-08, "advantages_std": 1.5952982187271119, "clip_ratio": 0.0, "completion_length": 84.70654983520508, "epoch": 4.3390977443609025, "grad_norm": 10.4375, "kl": 0.4010193169116974, "learning_rate": 2.830827067669173e-06, "loss": 0.0402, "num_tokens": 23699966.0, "reward": -1.2389403194189073, "reward_std": 6.306393384933472, "rewards/get_chromagram_reward": 0.6315956294536591, "rewards/get_chromagram_reward_std": 0.12252237275242805, "rewards/get_intelligibility_reward": -4.322550284862518, "rewards/get_intelligibility_reward_std": 10.139668178558349, "rewards/get_target_len_reward": -0.02586617963388562, "rewards/get_target_len_reward_std": 0.060991795361042024, "step": 5770 }, { "advantages": 2.7641655195864703e-07, "advantages_std": 1.6074973464012146, "clip_ratio": 0.0, "completion_length": 86.55714492797851, "epoch": 4.3466165413533835, "grad_norm": 6.53125, "kl": 0.34225198477506635, "learning_rate": 2.8270676691729327e-06, "loss": 0.0425, "num_tokens": 24005654.0, "reward": -1.5039369583129882, "reward_std": 6.346541547775269, "rewards/get_chromagram_reward": 0.6210005640983581, "rewards/get_chromagram_reward_std": 0.11820514425635338, "rewards/get_intelligibility_reward": -5.111461114883423, "rewards/get_intelligibility_reward_std": 9.993875980377197, "rewards/get_target_len_reward": -0.021349991485476495, "rewards/get_target_len_reward_std": 0.06595882065594197, "step": 5780 }, { "advantages": -2.7815503500505656e-07, "advantages_std": 1.572441029548645, "clip_ratio": 0.0, "completion_length": 84.38393020629883, "epoch": 4.3541353383458645, "grad_norm": 12.5625, "kl": 0.35131756067276, "learning_rate": 2.823308270676692e-06, "loss": 0.0393, "num_tokens": 24305185.0, "reward": -1.598071312904358, "reward_std": 6.891125774383545, "rewards/get_chromagram_reward": 0.6182050764560699, "rewards/get_chromagram_reward_std": 0.12791308984160424, "rewards/get_intelligibility_reward": -5.390382766723633, "rewards/get_intelligibility_reward_std": 10.94007167816162, "rewards/get_target_len_reward": -0.022035928349941968, "rewards/get_target_len_reward_std": 0.05774991624057293, "step": 5790 }, { "advantages": 6.737808433854297e-07, "advantages_std": 1.6271725296974182, "clip_ratio": 0.0, "completion_length": 86.93452529907226, "epoch": 4.361654135338346, "grad_norm": 6.8125, "kl": 0.5817287877202034, "learning_rate": 2.8195488721804516e-06, "loss": 0.0607, "num_tokens": 24612401.0, "reward": -1.4664264440536499, "reward_std": 6.983836317062378, "rewards/get_chromagram_reward": 0.6144366025924682, "rewards/get_chromagram_reward_std": 0.1035338170826435, "rewards/get_intelligibility_reward": -4.996148389577866, "rewards/get_intelligibility_reward_std": 10.968209648132325, "rewards/get_target_len_reward": -0.017567448690533637, "rewards/get_target_len_reward_std": 0.05456386059522629, "step": 5800 }, { "advantages": 4.3933590703204574e-07, "advantages_std": 1.5751075744628906, "clip_ratio": 0.0, "completion_length": 84.38928756713867, "epoch": 4.369172932330827, "grad_norm": 5.46875, "kl": 0.3394057586789131, "learning_rate": 2.815789473684211e-06, "loss": 0.0349, "num_tokens": 24912174.0, "reward": -1.1729264855384827, "reward_std": 6.212002897262574, "rewards/get_chromagram_reward": 0.6229874014854431, "rewards/get_chromagram_reward_std": 0.11585783213376999, "rewards/get_intelligibility_reward": -4.122948503494262, "rewards/get_intelligibility_reward_std": 10.047537708282471, "rewards/get_target_len_reward": -0.018818165455013514, "rewards/get_target_len_reward_std": 0.056252822279930115, "step": 5810 }, { "advantages": 5.04652664323757e-07, "advantages_std": 1.6236942529678344, "clip_ratio": 0.0, "completion_length": 89.79702529907226, "epoch": 4.3766917293233085, "grad_norm": 5.21875, "kl": 0.30837641209363936, "learning_rate": 2.8120300751879705e-06, "loss": 0.0327, "num_tokens": 25225844.0, "reward": -1.4446959674358368, "reward_std": 6.88113808631897, "rewards/get_chromagram_reward": 0.616160649061203, "rewards/get_chromagram_reward_std": 0.11032316386699677, "rewards/get_intelligibility_reward": -4.931406950950622, "rewards/get_intelligibility_reward_std": 10.971225118637085, "rewards/get_target_len_reward": -0.018841464072465897, "rewards/get_target_len_reward_std": 0.05351784508675337, "step": 5820 }, { "advantages": -1.0083119761361559e-07, "advantages_std": 1.6048307299613953, "clip_ratio": 0.0, "completion_length": 88.46666870117187, "epoch": 4.38421052631579, "grad_norm": 11.75, "kl": 5.939772760868072, "learning_rate": 2.8082706766917297e-06, "loss": 0.6032, "num_tokens": 25536047.0, "reward": -1.379792395234108, "reward_std": 6.840978622436523, "rewards/get_chromagram_reward": 0.6219044208526612, "rewards/get_chromagram_reward_std": 0.1071255199611187, "rewards/get_intelligibility_reward": -4.737642979621887, "rewards/get_intelligibility_reward_std": 11.058589458465576, "rewards/get_target_len_reward": -0.023638481460511684, "rewards/get_target_len_reward_std": 0.08313055895268917, "step": 5830 }, { "advantages": 8.421639904554467e-07, "advantages_std": 1.7479196310043335, "clip_ratio": 0.0, "completion_length": 89.10416946411132, "epoch": 4.391729323308271, "grad_norm": 5.875, "kl": 0.3234842225909233, "learning_rate": 2.8045112781954893e-06, "loss": 0.0352, "num_tokens": 25849416.0, "reward": -1.7320613265037537, "reward_std": 6.80491828918457, "rewards/get_chromagram_reward": 0.6146539449691772, "rewards/get_chromagram_reward_std": 0.11938665956258773, "rewards/get_intelligibility_reward": -5.792370629310608, "rewards/get_intelligibility_reward_std": 10.65215892791748, "rewards/get_target_len_reward": -0.01846689051017165, "rewards/get_target_len_reward_std": 0.04976162984967232, "step": 5840 }, { "advantages": -6.41991583449908e-08, "advantages_std": 1.493267583847046, "clip_ratio": 0.0, "completion_length": 86.42857284545899, "epoch": 4.399248120300752, "grad_norm": 11.3125, "kl": 0.39334663897752764, "learning_rate": 2.8007518796992485e-06, "loss": 0.0486, "num_tokens": 26154685.0, "reward": -1.2574773922562599, "reward_std": 7.319183826446533, "rewards/get_chromagram_reward": 0.6342748403549194, "rewards/get_chromagram_reward_std": 0.11602752506732941, "rewards/get_intelligibility_reward": -4.3805874206125734, "rewards/get_intelligibility_reward_std": 11.956838607788086, "rewards/get_target_len_reward": -0.026119467988610267, "rewards/get_target_len_reward_std": 0.0734117228537798, "step": 5850 }, { "advantages": -6.218751522624189e-07, "advantages_std": 1.4529019594192505, "clip_ratio": 0.0, "completion_length": 87.32619247436523, "epoch": 4.406766917293233, "grad_norm": 12.625, "kl": 0.3028931975364685, "learning_rate": 2.7969924812030073e-06, "loss": 0.037, "num_tokens": 26463154.0, "reward": -1.484662154316902, "reward_std": 6.946614217758179, "rewards/get_chromagram_reward": 0.6176621794700623, "rewards/get_chromagram_reward_std": 0.11052877753973007, "rewards/get_intelligibility_reward": -5.0487511396408085, "rewards/get_intelligibility_reward_std": 11.156039571762085, "rewards/get_target_len_reward": -0.022897268738597633, "rewards/get_target_len_reward_std": 0.07214595098048449, "step": 5860 }, { "advantages": -2.3742517116787098e-07, "advantages_std": 1.6307622075080872, "clip_ratio": 0.0, "completion_length": 84.73988265991211, "epoch": 4.414285714285715, "grad_norm": 17.0, "kl": 0.3400808498263359, "learning_rate": 2.793233082706767e-06, "loss": 0.0393, "num_tokens": 26763952.0, "reward": -1.5526393622159957, "reward_std": 7.049071836471557, "rewards/get_chromagram_reward": 0.6250545501708984, "rewards/get_chromagram_reward_std": 0.11878361329436302, "rewards/get_intelligibility_reward": -5.262066769599914, "rewards/get_intelligibility_reward_std": 11.234350681304932, "rewards/get_target_len_reward": -0.020905664563179015, "rewards/get_target_len_reward_std": 0.061207803711295125, "step": 5870 }, { "advantages": 1.2715656794171082e-07, "advantages_std": 1.5928070425987244, "clip_ratio": 0.0, "completion_length": 86.91369247436523, "epoch": 4.421804511278196, "grad_norm": 6.875, "kl": 0.3369604110717773, "learning_rate": 2.789473684210526e-06, "loss": 0.0325, "num_tokens": 27070512.0, "reward": -1.6088357031345368, "reward_std": 6.812682485580444, "rewards/get_chromagram_reward": 0.6178681910037994, "rewards/get_chromagram_reward_std": 0.11353974491357803, "rewards/get_intelligibility_reward": -5.426232194900512, "rewards/get_intelligibility_reward_std": 10.729567432403565, "rewards/get_target_len_reward": -0.018142933025956154, "rewards/get_target_len_reward_std": 0.04331641979515553, "step": 5880 }, { "advantages": 3.601114144657913e-08, "advantages_std": 1.674160885810852, "clip_ratio": 0.0, "completion_length": 88.12619323730469, "epoch": 4.429323308270677, "grad_norm": 6.21875, "kl": 0.39946324825286866, "learning_rate": 2.785714285714286e-06, "loss": 0.0444, "num_tokens": 27380269.0, "reward": -1.5472537845373153, "reward_std": 6.7644494533538815, "rewards/get_chromagram_reward": 0.6225440621376037, "rewards/get_chromagram_reward_std": 0.11376380547881126, "rewards/get_intelligibility_reward": -5.241733276844025, "rewards/get_intelligibility_reward_std": 10.670757675170899, "rewards/get_target_len_reward": -0.022571913711726666, "rewards/get_target_len_reward_std": 0.06656058058142662, "step": 5890 }, { "advantages": -1.8551947178480077e-07, "advantages_std": 1.478583037853241, "clip_ratio": 0.0, "completion_length": 88.22381057739258, "epoch": 4.436842105263158, "grad_norm": 3104.0, "kl": 0.5128657639026641, "learning_rate": 2.781954887218045e-06, "loss": 0.0545, "num_tokens": 27690235.0, "reward": -1.3492045670747757, "reward_std": 6.4950531959533695, "rewards/get_chromagram_reward": 0.640613716840744, "rewards/get_chromagram_reward_std": 0.10715582817792893, "rewards/get_intelligibility_reward": -4.665818774700165, "rewards/get_intelligibility_reward_std": 10.406336688995362, "rewards/get_target_len_reward": -0.02240826766937971, "rewards/get_target_len_reward_std": 0.05692440681159496, "step": 5900 }, { "advantages": -8.891026226365284e-08, "advantages_std": 1.6906216025352478, "clip_ratio": 0.0, "completion_length": 84.81190719604493, "epoch": 4.444360902255639, "grad_norm": 6.875, "kl": 0.37526106983423235, "learning_rate": 2.7781954887218047e-06, "loss": 0.037, "num_tokens": 27991745.0, "reward": -1.7090884655714036, "reward_std": 6.80400128364563, "rewards/get_chromagram_reward": 0.6135359227657318, "rewards/get_chromagram_reward_std": 0.11744362115859985, "rewards/get_intelligibility_reward": -5.7205850839614865, "rewards/get_intelligibility_reward_std": 10.553275012969971, "rewards/get_target_len_reward": -0.020216171443462373, "rewards/get_target_len_reward_std": 0.051091530919075014, "step": 5910 }, { "advantages": 3.427265564681647e-08, "advantages_std": 1.5322723031044005, "clip_ratio": 0.0, "completion_length": 87.55000152587891, "epoch": 4.451879699248121, "grad_norm": 182.0, "kl": 0.3043200358748436, "learning_rate": 2.774436090225564e-06, "loss": 0.0305, "num_tokens": 28300332.0, "reward": -1.1945252060890197, "reward_std": 6.519233894348145, "rewards/get_chromagram_reward": 0.6296118319034576, "rewards/get_chromagram_reward_std": 0.11107028499245644, "rewards/get_intelligibility_reward": -4.194262075424194, "rewards/get_intelligibility_reward_std": 10.339669322967529, "rewards/get_target_len_reward": -0.01892517600208521, "rewards/get_target_len_reward_std": 0.05237778052687645, "step": 5920 }, { "advantages": -3.881752730805488e-07, "advantages_std": 1.521967101097107, "clip_ratio": 0.0, "completion_length": 86.82916870117188, "epoch": 4.459398496240602, "grad_norm": 58.75, "kl": 0.26121333986520767, "learning_rate": 2.7706766917293236e-06, "loss": 0.0365, "num_tokens": 28607212.0, "reward": -1.2213778406381608, "reward_std": 6.433756160736084, "rewards/get_chromagram_reward": 0.6160942673683166, "rewards/get_chromagram_reward_std": 0.11823522076010703, "rewards/get_intelligibility_reward": -4.2590295061469075, "rewards/get_intelligibility_reward_std": 10.392317485809325, "rewards/get_target_len_reward": -0.021197985392063856, "rewards/get_target_len_reward_std": 0.07294883448630571, "step": 5930 }, { "advantages": 1.671413798476351e-07, "advantages_std": 1.613970112800598, "clip_ratio": 0.0, "completion_length": 87.19940643310547, "epoch": 4.466917293233083, "grad_norm": 19.5, "kl": 0.2971110358834267, "learning_rate": 2.766917293233083e-06, "loss": 0.037, "num_tokens": 28914533.0, "reward": -1.7813652843236922, "reward_std": 7.045272636413574, "rewards/get_chromagram_reward": 0.6196362137794494, "rewards/get_chromagram_reward_std": 0.112163445353508, "rewards/get_intelligibility_reward": -5.94050749540329, "rewards/get_intelligibility_reward_std": 11.014069366455079, "rewards/get_target_len_reward": -0.02322419285774231, "rewards/get_target_len_reward_std": 0.07357236295938492, "step": 5940 }, { "advantages": -4.954636096954346e-08, "advantages_std": 1.5950510263442994, "clip_ratio": 0.0, "completion_length": 86.2148826599121, "epoch": 4.474436090225564, "grad_norm": 12.9375, "kl": 0.33923769295215606, "learning_rate": 2.7631578947368424e-06, "loss": 0.0401, "num_tokens": 29219031.0, "reward": -1.6365281403064729, "reward_std": 6.434076976776123, "rewards/get_chromagram_reward": 0.6088695049285888, "rewards/get_chromagram_reward_std": 0.12716799676418306, "rewards/get_intelligibility_reward": -5.496372726559639, "rewards/get_intelligibility_reward_std": 9.85527925491333, "rewards/get_target_len_reward": -0.02208081311546266, "rewards/get_target_len_reward_std": 0.05841316077858209, "step": 5950 }, { "advantages": -9.988745290456791e-07, "advantages_std": 1.6721760392189027, "clip_ratio": 0.0, "completion_length": 87.43869171142578, "epoch": 4.481954887218045, "grad_norm": 205.0, "kl": 0.35931061804294584, "learning_rate": 2.7593984962406017e-06, "loss": 0.0415, "num_tokens": 29526634.0, "reward": -1.6175777792930603, "reward_std": 7.441208934783935, "rewards/get_chromagram_reward": 0.6047444045543671, "rewards/get_chromagram_reward_std": 0.11898068115115165, "rewards/get_intelligibility_reward": -5.432984733581543, "rewards/get_intelligibility_reward_std": 11.966805171966552, "rewards/get_target_len_reward": -0.02449268251657486, "rewards/get_target_len_reward_std": 0.07319775484502315, "step": 5960 }, { "advantages": 4.3138864107561404e-07, "advantages_std": 1.5195866227149963, "clip_ratio": 0.0, "completion_length": 86.16488342285156, "epoch": 4.489473684210527, "grad_norm": 7.09375, "kl": 0.3500840783119202, "learning_rate": 2.755639097744361e-06, "loss": 0.038, "num_tokens": 29831733.0, "reward": -1.5222080022096633, "reward_std": 6.935330963134765, "rewards/get_chromagram_reward": 0.6276670038700104, "rewards/get_chromagram_reward_std": 0.11789945214986801, "rewards/get_intelligibility_reward": -5.172858917713166, "rewards/get_intelligibility_reward_std": 10.956598567962647, "rewards/get_target_len_reward": -0.021431863773614168, "rewards/get_target_len_reward_std": 0.05911620147526264, "step": 5970 }, { "advantages": -3.2161674425879026e-07, "advantages_std": 1.5618364930152893, "clip_ratio": 0.0, "completion_length": 85.67976303100586, "epoch": 4.496992481203008, "grad_norm": 22.875, "kl": 0.3210230380296707, "learning_rate": 2.7518796992481205e-06, "loss": 0.0369, "num_tokens": 30135432.0, "reward": -1.274936705827713, "reward_std": 6.508914232254028, "rewards/get_chromagram_reward": 0.6171582043170929, "rewards/get_chromagram_reward_std": 0.12334928214550019, "rewards/get_intelligibility_reward": -4.41784838438034, "rewards/get_intelligibility_reward_std": 10.500092363357544, "rewards/get_target_len_reward": -0.02411962877959013, "rewards/get_target_len_reward_std": 0.06979301236569882, "step": 5980 }, { "advantages": 3.8544338281099045e-07, "advantages_std": 1.5411863803863526, "clip_ratio": 0.0, "completion_length": 86.49702606201171, "epoch": 4.504511278195489, "grad_norm": 7.53125, "kl": 0.333082078397274, "learning_rate": 2.7481203007518798e-06, "loss": 0.0345, "num_tokens": 30440601.0, "reward": -1.6373848259449004, "reward_std": 6.695670700073242, "rewards/get_chromagram_reward": 0.6192308783531189, "rewards/get_chromagram_reward_std": 0.11601671576499939, "rewards/get_intelligibility_reward": -5.509091401100159, "rewards/get_intelligibility_reward_std": 10.489329147338868, "rewards/get_target_len_reward": -0.02229350171983242, "rewards/get_target_len_reward_std": 0.06566942296922207, "step": 5990 }, { "advantages": 4.122653649574204e-08, "advantages_std": 1.57593115568161, "clip_ratio": 0.0, "completion_length": 91.12321472167969, "epoch": 4.51203007518797, "grad_norm": 6.6875, "kl": 0.3484980553388596, "learning_rate": 2.7443609022556394e-06, "loss": 0.0396, "num_tokens": 30758623.0, "reward": -1.3039451286196708, "reward_std": 6.552640724182129, "rewards/get_chromagram_reward": 0.6223575115203858, "rewards/get_chromagram_reward_std": 0.10774782225489617, "rewards/get_intelligibility_reward": -4.510845673084259, "rewards/get_intelligibility_reward_std": 10.53021697998047, "rewards/get_target_len_reward": -0.023346944618970154, "rewards/get_target_len_reward_std": 0.06934394463896751, "step": 6000 }, { "advantages": 1.7446777462737372e-07, "advantages_std": 1.4582236886024476, "clip_ratio": 0.0, "completion_length": 86.5726203918457, "epoch": 4.519548872180451, "grad_norm": 49.5, "kl": 0.45119605511426925, "learning_rate": 2.7406015037593986e-06, "loss": 0.0486, "num_tokens": 31064652.0, "reward": -1.4632755875587464, "reward_std": 7.048706197738648, "rewards/get_chromagram_reward": 0.6153858244419098, "rewards/get_chromagram_reward_std": 0.12012667879462242, "rewards/get_intelligibility_reward": -4.986211991310119, "rewards/get_intelligibility_reward_std": 11.311538219451904, "rewards/get_target_len_reward": -0.019000414945185184, "rewards/get_target_len_reward_std": 0.05883214082568884, "step": 6010 }, { "advantages": 4.943460442774495e-07, "advantages_std": 1.5501924872398376, "clip_ratio": 0.0, "completion_length": 88.29702529907226, "epoch": 4.527067669172933, "grad_norm": 13.1875, "kl": 0.3232778489589691, "learning_rate": 2.7368421052631583e-06, "loss": 0.0332, "num_tokens": 31375492.0, "reward": -1.3596054553985595, "reward_std": 6.917938184738159, "rewards/get_chromagram_reward": 0.6279544234275818, "rewards/get_chromagram_reward_std": 0.11212313920259476, "rewards/get_intelligibility_reward": -4.684613796649501, "rewards/get_intelligibility_reward_std": 11.098008298873902, "rewards/get_target_len_reward": -0.02215675003826618, "rewards/get_target_len_reward_std": 0.06159700192511082, "step": 6020 }, { "advantages": 3.1466286944947795e-07, "advantages_std": 1.5159233927726745, "clip_ratio": 0.0, "completion_length": 85.98511962890625, "epoch": 4.534586466165414, "grad_norm": 8.25, "kl": 6.474253372848034, "learning_rate": 2.7330827067669175e-06, "loss": 0.6535, "num_tokens": 31679996.0, "reward": -1.3839904189109802, "reward_std": 6.673179626464844, "rewards/get_chromagram_reward": 0.6101464450359344, "rewards/get_chromagram_reward_std": 0.1205007255077362, "rewards/get_intelligibility_reward": -4.739246428012848, "rewards/get_intelligibility_reward_std": 10.690727710723877, "rewards/get_target_len_reward": -0.02287101689726114, "rewards/get_target_len_reward_std": 0.07206516806036234, "step": 6030 }, { "advantages": -6.804864085552254e-08, "advantages_std": 1.549793303012848, "clip_ratio": 0.0, "completion_length": 83.12321548461914, "epoch": 4.542105263157895, "grad_norm": 155.0, "kl": 0.35599401146173476, "learning_rate": 2.729323308270677e-06, "loss": 0.04, "num_tokens": 31976212.0, "reward": -1.6936394423246384, "reward_std": 6.708538150787353, "rewards/get_chromagram_reward": 0.6269149005413055, "rewards/get_chromagram_reward_std": 0.11131934896111488, "rewards/get_intelligibility_reward": -5.6877417176961895, "rewards/get_intelligibility_reward_std": 10.37338047027588, "rewards/get_target_len_reward": -0.02009119251742959, "rewards/get_target_len_reward_std": 0.056248923763632774, "step": 6040 }, { "advantages": 3.6309162680936425e-07, "advantages_std": 1.6578883528709412, "clip_ratio": 0.0, "completion_length": 87.18095397949219, "epoch": 4.549624060150376, "grad_norm": 9.5625, "kl": 0.311375567317009, "learning_rate": 2.7255639097744363e-06, "loss": 0.0328, "num_tokens": 32283730.0, "reward": -1.406548136472702, "reward_std": 6.439837408065796, "rewards/get_chromagram_reward": 0.616950273513794, "rewards/get_chromagram_reward_std": 0.11176617294549943, "rewards/get_intelligibility_reward": -4.8177523732185366, "rewards/get_intelligibility_reward_std": 10.18739709854126, "rewards/get_target_len_reward": -0.01884202305227518, "rewards/get_target_len_reward_std": 0.05634100623428821, "step": 6050 }, { "advantages": 4.1847434539832305e-08, "advantages_std": 1.6048791885375977, "clip_ratio": 0.0, "completion_length": 88.63393096923828, "epoch": 4.557142857142857, "grad_norm": 6.59375, "kl": 0.30048550814390185, "learning_rate": 2.7218045112781956e-06, "loss": 0.0307, "num_tokens": 32595796.0, "reward": -1.2904392518103123, "reward_std": 6.2870903491973875, "rewards/get_chromagram_reward": 0.6291167497634887, "rewards/get_chromagram_reward_std": 0.11226154044270516, "rewards/get_intelligibility_reward": -4.4783551633358005, "rewards/get_intelligibility_reward_std": 10.016265392303467, "rewards/get_target_len_reward": -0.02207921463996172, "rewards/get_target_len_reward_std": 0.06249589528888464, "step": 6060 }, { "advantages": -4.0456654346598955e-07, "advantages_std": 1.509422194957733, "clip_ratio": 0.0, "completion_length": 86.4000015258789, "epoch": 4.564661654135338, "grad_norm": 6.1875, "kl": 0.3658337786793709, "learning_rate": 2.7180451127819552e-06, "loss": 0.0457, "num_tokens": 32901569.0, "reward": -1.341392619907856, "reward_std": 6.766161584854126, "rewards/get_chromagram_reward": 0.6196685910224915, "rewards/get_chromagram_reward_std": 0.114710883051157, "rewards/get_intelligibility_reward": -4.619689786434174, "rewards/get_intelligibility_reward_std": 10.862145328521729, "rewards/get_target_len_reward": -0.024156391993165015, "rewards/get_target_len_reward_std": 0.07279833741486072, "step": 6070 }, { "advantages": 2.572933830435886e-07, "advantages_std": 1.5198671460151671, "clip_ratio": 0.0, "completion_length": 86.99702529907226, "epoch": 4.57218045112782, "grad_norm": 536.0, "kl": 0.42363296151161195, "learning_rate": 2.7142857142857144e-06, "loss": 0.042, "num_tokens": 33208757.0, "reward": -1.2752907037734986, "reward_std": 6.531996488571167, "rewards/get_chromagram_reward": 0.6229283511638641, "rewards/get_chromagram_reward_std": 0.09791875258088112, "rewards/get_intelligibility_reward": -4.431623411178589, "rewards/get_intelligibility_reward_std": 10.481385231018066, "rewards/get_target_len_reward": -0.017176955845206975, "rewards/get_target_len_reward_std": 0.05317041240632534, "step": 6080 }, { "advantages": 4.5324364350562975e-08, "advantages_std": 1.5965524196624756, "clip_ratio": 0.0, "completion_length": 88.30119247436524, "epoch": 4.579699248120301, "grad_norm": 4.71875, "kl": 0.8488052666187287, "learning_rate": 2.710526315789474e-06, "loss": 0.088, "num_tokens": 33518832.0, "reward": -1.3742637276649474, "reward_std": 6.573501634597778, "rewards/get_chromagram_reward": 0.6275453746318818, "rewards/get_chromagram_reward_std": 0.10700947791337967, "rewards/get_intelligibility_reward": -4.729785847663879, "rewards/get_intelligibility_reward_std": 10.549948787689209, "rewards/get_target_len_reward": -0.020550532080233098, "rewards/get_target_len_reward_std": 0.058156965486705306, "step": 6090 }, { "advantages": 2.0836791065903525e-07, "advantages_std": 1.679631507396698, "clip_ratio": 0.0, "completion_length": 86.75595321655274, "epoch": 4.587218045112782, "grad_norm": 9.0, "kl": 0.3424948573112488, "learning_rate": 2.7067669172932333e-06, "loss": 0.0378, "num_tokens": 33825338.0, "reward": -1.4295830607414246, "reward_std": 6.916474342346191, "rewards/get_chromagram_reward": 0.6332552254199981, "rewards/get_chromagram_reward_std": 0.11635537669062615, "rewards/get_intelligibility_reward": -4.903472948074341, "rewards/get_intelligibility_reward_std": 11.094056224822998, "rewards/get_target_len_reward": -0.018531178031116723, "rewards/get_target_len_reward_std": 0.05028481315821409, "step": 6100 }, { "advantages": 4.855295596684073e-08, "advantages_std": 1.5576176047325134, "clip_ratio": 0.0, "completion_length": 85.89881134033203, "epoch": 4.594736842105263, "grad_norm": 7.71875, "kl": 0.3236625760793686, "learning_rate": 2.703007518796993e-06, "loss": 0.0392, "num_tokens": 34128529.0, "reward": -1.6338713705539702, "reward_std": 6.288772249221802, "rewards/get_chromagram_reward": 0.6248431921005249, "rewards/get_chromagram_reward_std": 0.10880339443683625, "rewards/get_intelligibility_reward": -5.505643081665039, "rewards/get_intelligibility_reward_std": 9.724865436553955, "rewards/get_target_len_reward": -0.020813790801912545, "rewards/get_target_len_reward_std": 0.06379029210656881, "step": 6110 }, { "advantages": -1.3758738646174606e-07, "advantages_std": 1.5842162370681763, "clip_ratio": 0.0, "completion_length": 88.39940719604492, "epoch": 4.602255639097744, "grad_norm": 8.5625, "kl": 0.3194336831569672, "learning_rate": 2.699248120300752e-06, "loss": 0.0432, "num_tokens": 34439010.0, "reward": -1.4361367881298066, "reward_std": 6.373421096801758, "rewards/get_chromagram_reward": 0.6246614634990693, "rewards/get_chromagram_reward_std": 0.11062444038689137, "rewards/get_intelligibility_reward": -4.907348370552063, "rewards/get_intelligibility_reward_std": 10.147270202636719, "rewards/get_target_len_reward": -0.025723107066005467, "rewards/get_target_len_reward_std": 0.08910752348601818, "step": 6120 }, { "advantages": 3.765026921342951e-07, "advantages_std": 1.5026398301124573, "clip_ratio": 0.0, "completion_length": 90.22500152587891, "epoch": 4.609774436090225, "grad_norm": 10.625, "kl": 0.31463173031806946, "learning_rate": 2.695488721804512e-06, "loss": 0.0356, "num_tokens": 34754401.0, "reward": -1.5472688972949982, "reward_std": 6.971684503555298, "rewards/get_chromagram_reward": 0.6299036264419555, "rewards/get_chromagram_reward_std": 0.11574003919959068, "rewards/get_intelligibility_reward": -5.250876641273498, "rewards/get_intelligibility_reward_std": 11.020786952972411, "rewards/get_target_len_reward": -0.020833592116832732, "rewards/get_target_len_reward_std": 0.060291562043130395, "step": 6130 }, { "advantages": -3.4719706292207776e-07, "advantages_std": 1.5989922523498534, "clip_ratio": 0.0, "completion_length": 86.1327407836914, "epoch": 4.617293233082707, "grad_norm": 8.1875, "kl": 0.3493267551064491, "learning_rate": 2.691729323308271e-06, "loss": 0.0382, "num_tokens": 35059092.0, "reward": -1.512378105521202, "reward_std": 6.478836536407471, "rewards/get_chromagram_reward": 0.6267882108688354, "rewards/get_chromagram_reward_std": 0.1290317542850971, "rewards/get_intelligibility_reward": -5.140925347805023, "rewards/get_intelligibility_reward_std": 10.085025024414062, "rewards/get_target_len_reward": -0.022996900044381617, "rewards/get_target_len_reward_std": 0.06523961815983056, "step": 6140 }, { "advantages": -5.173186604423563e-07, "advantages_std": 1.6256378650665284, "clip_ratio": 0.0, "completion_length": 85.16726303100586, "epoch": 4.624812030075188, "grad_norm": 4.875, "kl": 1.8388477712869644, "learning_rate": 2.6879699248120307e-06, "loss": 0.1896, "num_tokens": 35360995.0, "reward": -1.585798019170761, "reward_std": 6.935663890838623, "rewards/get_chromagram_reward": 0.6336425006389618, "rewards/get_chromagram_reward_std": 0.11773469522595406, "rewards/get_intelligibility_reward": -5.3694363832473755, "rewards/get_intelligibility_reward_std": 10.936893081665039, "rewards/get_target_len_reward": -0.02159978710114956, "rewards/get_target_len_reward_std": 0.051993397623300554, "step": 6150 }, { "advantages": 3.2732882999653156e-07, "advantages_std": 1.5918954968452455, "clip_ratio": 0.0, "completion_length": 84.12916870117188, "epoch": 4.632330827067669, "grad_norm": 7.65625, "kl": 0.3650706380605698, "learning_rate": 2.68421052631579e-06, "loss": 0.0391, "num_tokens": 35660326.0, "reward": -1.4566324774175883, "reward_std": 6.758801698684692, "rewards/get_chromagram_reward": 0.6240825772285461, "rewards/get_chromagram_reward_std": 0.12061264365911484, "rewards/get_intelligibility_reward": -4.972877359390258, "rewards/get_intelligibility_reward_std": 10.724132823944093, "rewards/get_target_len_reward": -0.021102200075984002, "rewards/get_target_len_reward_std": 0.0565574087202549, "step": 6160 }, { "advantages": 1.2367963790893556e-07, "advantages_std": 1.581169807910919, "clip_ratio": 0.0, "completion_length": 86.17619171142579, "epoch": 4.63984962406015, "grad_norm": 16.5, "kl": 5.859141428768635, "learning_rate": 2.6804511278195487e-06, "loss": 0.5881, "num_tokens": 35964356.0, "reward": -1.3016383200883865, "reward_std": 6.410171842575073, "rewards/get_chromagram_reward": 0.6194089889526367, "rewards/get_chromagram_reward_std": 0.12417329400777817, "rewards/get_intelligibility_reward": -4.501974666118622, "rewards/get_intelligibility_reward_std": 10.251604223251343, "rewards/get_target_len_reward": -0.022349087800830603, "rewards/get_target_len_reward_std": 0.05504023898392916, "step": 6170 }, { "advantages": 3.129243957289418e-07, "advantages_std": 1.5140856862068177, "clip_ratio": 0.0, "completion_length": 90.33214416503907, "epoch": 4.647368421052631, "grad_norm": 68.0, "kl": 0.35507272034883497, "learning_rate": 2.6766917293233088e-06, "loss": 0.0364, "num_tokens": 36279969.0, "reward": -1.4424680143594741, "reward_std": 6.719956970214843, "rewards/get_chromagram_reward": 0.6270939588546753, "rewards/get_chromagram_reward_std": 0.11368402689695359, "rewards/get_intelligibility_reward": -4.934299838542938, "rewards/get_intelligibility_reward_std": 10.716573667526244, "rewards/get_target_len_reward": -0.020197873562574388, "rewards/get_target_len_reward_std": 0.04989261887967587, "step": 6180 }, { "advantages": -2.2873283924695898e-07, "advantages_std": 1.5742897629737853, "clip_ratio": 0.0, "completion_length": 84.81488265991212, "epoch": 4.654887218045113, "grad_norm": 16.875, "kl": 0.28883529752492904, "learning_rate": 2.6729323308270676e-06, "loss": 0.0323, "num_tokens": 36581099.0, "reward": -1.557493907213211, "reward_std": 6.904000568389892, "rewards/get_chromagram_reward": 0.6277475774288177, "rewards/get_chromagram_reward_std": 0.10811701565980911, "rewards/get_intelligibility_reward": -5.280087733268738, "rewards/get_intelligibility_reward_std": 11.021557521820068, "rewards/get_target_len_reward": -0.02014122884720564, "rewards/get_target_len_reward_std": 0.05904182381927967, "step": 6190 }, { "advantages": 1.4007091770906753e-07, "advantages_std": 1.486948847770691, "clip_ratio": 0.0, "completion_length": 87.18452606201171, "epoch": 4.662406015037594, "grad_norm": 5.53125, "kl": 0.35808763206005095, "learning_rate": 2.669172932330827e-06, "loss": 0.0396, "num_tokens": 36888822.0, "reward": -1.5488356798887253, "reward_std": 6.3901426792144775, "rewards/get_chromagram_reward": 0.6087078809738159, "rewards/get_chromagram_reward_std": 0.1133020430803299, "rewards/get_intelligibility_reward": -5.235795629024506, "rewards/get_intelligibility_reward_std": 10.033557605743407, "rewards/get_target_len_reward": -0.01941908346489072, "rewards/get_target_len_reward_std": 0.05598939694464207, "step": 6200 }, { "advantages": 1.9818544743088752e-07, "advantages_std": 1.7082177996635437, "clip_ratio": 0.0, "completion_length": 83.97797775268555, "epoch": 4.669924812030075, "grad_norm": 6.34375, "kl": 0.35583060383796694, "learning_rate": 2.6654135338345864e-06, "loss": 0.0364, "num_tokens": 37186803.0, "reward": -1.5818522050976753, "reward_std": 6.408353328704834, "rewards/get_chromagram_reward": 0.6205709517002106, "rewards/get_chromagram_reward_std": 0.11420291811227798, "rewards/get_intelligibility_reward": -5.345265340805054, "rewards/get_intelligibility_reward_std": 10.000323915481568, "rewards/get_target_len_reward": -0.02086184676736593, "rewards/get_target_len_reward_std": 0.04689359571784735, "step": 6210 }, { "advantages": -1.7484029939396352e-07, "advantages_std": 1.533245360851288, "clip_ratio": 0.0, "completion_length": 87.19404983520508, "epoch": 4.677443609022556, "grad_norm": 5.75, "kl": 0.3654496863484383, "learning_rate": 2.661654135338346e-06, "loss": 0.037, "num_tokens": 37494393.0, "reward": -1.2985429098829626, "reward_std": 6.674247598648071, "rewards/get_chromagram_reward": 0.6185528457164764, "rewards/get_chromagram_reward_std": 0.11509222164750099, "rewards/get_intelligibility_reward": -4.494230937957764, "rewards/get_intelligibility_reward_std": 10.719346714019775, "rewards/get_target_len_reward": -0.019950428698211908, "rewards/get_target_len_reward_std": 0.04983298964798451, "step": 6220 }, { "advantages": -4.867712846134964e-07, "advantages_std": 1.6637269616127015, "clip_ratio": 0.0, "completion_length": 84.39404983520508, "epoch": 4.684962406015037, "grad_norm": 13.0625, "kl": 0.40054383873939514, "learning_rate": 2.6578947368421053e-06, "loss": 0.0464, "num_tokens": 37793558.0, "reward": -1.656310772895813, "reward_std": 6.542108106613159, "rewards/get_chromagram_reward": 0.6114983022212982, "rewards/get_chromagram_reward_std": 0.10526356026530266, "rewards/get_intelligibility_reward": -5.55853419303894, "rewards/get_intelligibility_reward_std": 10.181151390075684, "rewards/get_target_len_reward": -0.021896019019186495, "rewards/get_target_len_reward_std": 0.06076169461011886, "step": 6230 }, { "advantages": -2.510845732217604e-07, "advantages_std": 1.5965062618255614, "clip_ratio": 0.0, "completion_length": 89.59166793823242, "epoch": 4.692481203007519, "grad_norm": 8.875, "kl": 0.3339507460594177, "learning_rate": 2.654135338345865e-06, "loss": 0.0326, "num_tokens": 38107248.0, "reward": -1.2790717422962188, "reward_std": 6.208766460418701, "rewards/get_chromagram_reward": 0.6216834604740142, "rewards/get_chromagram_reward_std": 0.11403061151504516, "rewards/get_intelligibility_reward": -4.439843034744262, "rewards/get_intelligibility_reward_std": 9.951672554016113, "rewards/get_target_len_reward": -0.019055431988090277, "rewards/get_target_len_reward_std": 0.04317720346152783, "step": 6240 }, { "advantages": 7.944802661086215e-07, "advantages_std": 1.623925805091858, "clip_ratio": 0.0, "completion_length": 82.06845397949219, "epoch": 4.7, "grad_norm": 37.25, "kl": 0.3709164083003998, "learning_rate": 2.650375939849624e-06, "loss": 0.0414, "num_tokens": 38400768.0, "reward": -1.6665802896022797, "reward_std": 6.8437591075897215, "rewards/get_chromagram_reward": 0.6265310943126678, "rewards/get_chromagram_reward_std": 0.11978982761502266, "rewards/get_intelligibility_reward": -5.603765249252319, "rewards/get_intelligibility_reward_std": 10.746640014648438, "rewards/get_target_len_reward": -0.022506364062428473, "rewards/get_target_len_reward_std": 0.05831002295017242, "step": 6250 }, { "advantages": 1.7372271798876682e-07, "advantages_std": 1.5418607473373414, "clip_ratio": 0.0, "completion_length": 89.39166870117188, "epoch": 4.707518796992481, "grad_norm": 4416.0, "kl": 0.570494931936264, "learning_rate": 2.6466165413533834e-06, "loss": 0.0596, "num_tokens": 38714090.0, "reward": -1.370600515604019, "reward_std": 6.201184606552124, "rewards/get_chromagram_reward": 0.6187590003013611, "rewards/get_chromagram_reward_std": 0.10438089221715927, "rewards/get_intelligibility_reward": -4.716014695167542, "rewards/get_intelligibility_reward_std": 9.873149538040161, "rewards/get_target_len_reward": -0.014545533526688813, "rewards/get_target_len_reward_std": 0.04638975989073515, "step": 6260 }, { "advantages": 2.384185914472425e-07, "advantages_std": 1.533749508857727, "clip_ratio": 0.0, "completion_length": 85.84940719604492, "epoch": 4.715037593984962, "grad_norm": 80.5, "kl": 1.3595893025398254, "learning_rate": 2.642857142857143e-06, "loss": 0.1384, "num_tokens": 39018557.0, "reward": -1.3898645401000977, "reward_std": 6.5797117233276365, "rewards/get_chromagram_reward": 0.6220493018627167, "rewards/get_chromagram_reward_std": 0.1071101889014244, "rewards/get_intelligibility_reward": -4.771307897567749, "rewards/get_intelligibility_reward_std": 10.541376209259033, "rewards/get_target_len_reward": -0.020334663148969413, "rewards/get_target_len_reward_std": 0.05680564884096384, "step": 6270 }, { "advantages": -1.1126200334388159e-07, "advantages_std": 1.5531922578811646, "clip_ratio": 0.0, "completion_length": 84.42619400024414, "epoch": 4.722556390977443, "grad_norm": 17.25, "kl": 0.35954761505126953, "learning_rate": 2.6390977443609022e-06, "loss": 0.0373, "num_tokens": 39318340.0, "reward": -1.4835880193859339, "reward_std": 6.443195438385009, "rewards/get_chromagram_reward": 0.6273998200893403, "rewards/get_chromagram_reward_std": 0.11976640075445175, "rewards/get_intelligibility_reward": -5.056619435548782, "rewards/get_intelligibility_reward_std": 9.997103500366212, "rewards/get_target_len_reward": -0.02154421918094158, "rewards/get_target_len_reward_std": 0.053043334558606145, "step": 6280 }, { "advantages": -2.135833483407623e-08, "advantages_std": 1.5955830335617065, "clip_ratio": 0.0, "completion_length": 90.60833511352538, "epoch": 4.730075187969925, "grad_norm": 5.3125, "kl": 0.3239411249756813, "learning_rate": 2.635338345864662e-06, "loss": 0.0357, "num_tokens": 39635641.0, "reward": -0.942909163236618, "reward_std": 6.630526304244995, "rewards/get_chromagram_reward": 0.6229256749153137, "rewards/get_chromagram_reward_std": 0.10916498303413391, "rewards/get_intelligibility_reward": -3.4299890637397765, "rewards/get_intelligibility_reward_std": 11.004190635681152, "rewards/get_target_len_reward": -0.021664107311517, "rewards/get_target_len_reward_std": 0.06791071593761444, "step": 6290 }, { "advantages": 1.9532938999589078e-07, "advantages_std": 1.6577616095542909, "clip_ratio": 0.0, "completion_length": 85.73452377319336, "epoch": 4.737593984962406, "grad_norm": 6.90625, "kl": 0.37113538682460784, "learning_rate": 2.631578947368421e-06, "loss": 0.0406, "num_tokens": 39938723.0, "reward": -1.636850079894066, "reward_std": 6.650091171264648, "rewards/get_chromagram_reward": 0.6210036218166352, "rewards/get_chromagram_reward_std": 0.11724439710378647, "rewards/get_intelligibility_reward": -5.510291111469269, "rewards/get_intelligibility_reward_std": 10.393040084838868, "rewards/get_target_len_reward": -0.021262429282069208, "rewards/get_target_len_reward_std": 0.05347590520977974, "step": 6300 }, { "advantages": 1.6291936191237256e-07, "advantages_std": 1.6182387113571166, "clip_ratio": 0.0, "completion_length": 87.6976219177246, "epoch": 4.745112781954887, "grad_norm": 7968.0, "kl": 1.5957046091556548, "learning_rate": 2.6278195488721808e-06, "loss": 0.1626, "num_tokens": 40247825.0, "reward": -1.2495031118392945, "reward_std": 6.350716972351075, "rewards/get_chromagram_reward": 0.606559020280838, "rewards/get_chromagram_reward_std": 0.12095492407679558, "rewards/get_intelligibility_reward": -4.336573672294617, "rewards/get_intelligibility_reward_std": 10.243553924560548, "rewards/get_target_len_reward": -0.018494523130357265, "rewards/get_target_len_reward_std": 0.04909849762916565, "step": 6310 }, { "advantages": 3.113101016083419e-07, "advantages_std": 1.5516540169715882, "clip_ratio": 0.0, "completion_length": 86.06964416503907, "epoch": 4.752631578947368, "grad_norm": 6.625, "kl": 0.9928636848926544, "learning_rate": 2.62406015037594e-06, "loss": 0.1056, "num_tokens": 40551708.0, "reward": -1.4838507711887359, "reward_std": 6.722097969055175, "rewards/get_chromagram_reward": 0.6187690258026123, "rewards/get_chromagram_reward_std": 0.11046081259846688, "rewards/get_intelligibility_reward": -5.0466511964797975, "rewards/get_intelligibility_reward_std": 10.710439920425415, "rewards/get_target_len_reward": -0.023669831547886135, "rewards/get_target_len_reward_std": 0.06956328004598618, "step": 6320 }, { "advantages": -2.3916364231268974e-07, "advantages_std": 1.5447567343711852, "clip_ratio": 0.0, "completion_length": 84.32024002075195, "epoch": 4.760150375939849, "grad_norm": 7.53125, "kl": 0.31202267557382585, "learning_rate": 2.6203007518796996e-06, "loss": 0.0355, "num_tokens": 40850743.0, "reward": -1.411372572183609, "reward_std": 6.221960210800171, "rewards/get_chromagram_reward": 0.6327088832855224, "rewards/get_chromagram_reward_std": 0.11209949627518653, "rewards/get_intelligibility_reward": -4.8436802387237545, "rewards/get_intelligibility_reward_std": 9.854103612899781, "rewards/get_target_len_reward": -0.023146109841763973, "rewards/get_target_len_reward_std": 0.06779935285449028, "step": 6330 }, { "advantages": -4.929800956343655e-07, "advantages_std": 1.4955716013908387, "clip_ratio": 0.0, "completion_length": 85.45416793823242, "epoch": 4.767669172932331, "grad_norm": 6.1875, "kl": 0.3646396204829216, "learning_rate": 2.616541353383459e-06, "loss": 0.0451, "num_tokens": 41152837.0, "reward": -1.8766667008399964, "reward_std": 6.958877801895142, "rewards/get_chromagram_reward": 0.6213249683380127, "rewards/get_chromagram_reward_std": 0.1146389216184616, "rewards/get_intelligibility_reward": -6.225960445404053, "rewards/get_intelligibility_reward_std": 10.729834365844727, "rewards/get_target_len_reward": -0.02536428887397051, "rewards/get_target_len_reward_std": 0.07496323771774768, "step": 6340 }, { "advantages": 4.221998040065955e-09, "advantages_std": 1.5300183176994324, "clip_ratio": 0.0, "completion_length": 85.9511932373047, "epoch": 4.775187969924812, "grad_norm": 23.5, "kl": 1.3359659627079963, "learning_rate": 2.6127819548872185e-06, "loss": 0.1377, "num_tokens": 41456485.0, "reward": -1.5328714907169343, "reward_std": 6.626535129547119, "rewards/get_chromagram_reward": 0.615748256444931, "rewards/get_chromagram_reward_std": 0.11465367525815964, "rewards/get_intelligibility_reward": -5.193982696533203, "rewards/get_intelligibility_reward_std": 10.48976697921753, "rewards/get_target_len_reward": -0.020379604259505867, "rewards/get_target_len_reward_std": 0.05834213700145483, "step": 6350 }, { "advantages": 1.7310182585106304e-07, "advantages_std": 1.5485412716865539, "clip_ratio": 0.0, "completion_length": 89.55119171142579, "epoch": 4.782706766917293, "grad_norm": 6.75, "kl": 0.31952681094408036, "learning_rate": 2.6090225563909777e-06, "loss": 0.0336, "num_tokens": 41770739.0, "reward": -1.2756139397621156, "reward_std": 6.977913093566895, "rewards/get_chromagram_reward": 0.6330669701099396, "rewards/get_chromagram_reward_std": 0.10896242782473564, "rewards/get_intelligibility_reward": -4.439062762260437, "rewards/get_intelligibility_reward_std": 11.30651569366455, "rewards/get_target_len_reward": -0.020845694560557605, "rewards/get_target_len_reward_std": 0.058593994937837124, "step": 6360 }, { "advantages": 3.568828176980787e-07, "advantages_std": 1.5717073440551759, "clip_ratio": 0.0, "completion_length": 88.12797775268555, "epoch": 4.790225563909774, "grad_norm": 5.46875, "kl": 0.27634538114070895, "learning_rate": 2.605263157894737e-06, "loss": 0.0345, "num_tokens": 42079458.0, "reward": -1.7293182492256165, "reward_std": 6.731516885757446, "rewards/get_chromagram_reward": 0.6031282305717468, "rewards/get_chromagram_reward_std": 0.10961822122335434, "rewards/get_intelligibility_reward": -5.771774411201477, "rewards/get_intelligibility_reward_std": 10.530457973480225, "rewards/get_target_len_reward": -0.019308150000870227, "rewards/get_target_len_reward_std": 0.06329579185694456, "step": 6370 }, { "advantages": 5.245208821058611e-07, "advantages_std": 1.6197218537330627, "clip_ratio": 0.0, "completion_length": 88.53095321655273, "epoch": 4.797744360902255, "grad_norm": 5.78125, "kl": 0.29962356984615324, "learning_rate": 2.6015037593984966e-06, "loss": 0.0278, "num_tokens": 42390738.0, "reward": -1.6248762607574463, "reward_std": 6.814621114730835, "rewards/get_chromagram_reward": 0.6148935854434967, "rewards/get_chromagram_reward_std": 0.11921465694904328, "rewards/get_intelligibility_reward": -5.47128050327301, "rewards/get_intelligibility_reward_std": 10.650694179534913, "rewards/get_target_len_reward": -0.018241762649267912, "rewards/get_target_len_reward_std": 0.05343018397688866, "step": 6380 }, { "advantages": 3.583729402123481e-07, "advantages_std": 1.600301730632782, "clip_ratio": 0.0, "completion_length": 86.42619323730469, "epoch": 4.8052631578947365, "grad_norm": 8.5625, "kl": 6.743069607019424, "learning_rate": 2.597744360902256e-06, "loss": 0.674, "num_tokens": 42696242.0, "reward": -1.2496988654136658, "reward_std": 6.139151859283447, "rewards/get_chromagram_reward": 0.6119321584701538, "rewards/get_chromagram_reward_std": 0.11752462461590767, "rewards/get_intelligibility_reward": -4.340754376351834, "rewards/get_intelligibility_reward_std": 9.796479940414429, "rewards/get_target_len_reward": -0.020274097472429274, "rewards/get_target_len_reward_std": 0.051021433994174005, "step": 6390 }, { "advantages": 5.687276654953166e-07, "advantages_std": 1.5225232601165772, "clip_ratio": 0.0, "completion_length": 86.62500076293945, "epoch": 4.812781954887218, "grad_norm": 5.53125, "kl": 0.3688139796257019, "learning_rate": 2.5939849624060154e-06, "loss": 0.0437, "num_tokens": 43002051.0, "reward": -1.2026754826307298, "reward_std": 6.638499164581299, "rewards/get_chromagram_reward": 0.6256828069686889, "rewards/get_chromagram_reward_std": 0.11374538168311119, "rewards/get_intelligibility_reward": -4.210258966684341, "rewards/get_intelligibility_reward_std": 10.757501363754272, "rewards/get_target_len_reward": -0.023450003052130342, "rewards/get_target_len_reward_std": 0.07306296471506357, "step": 6400 }, { "advantages": 1.3063351693709536e-07, "advantages_std": 1.5896078944206238, "clip_ratio": 0.0, "completion_length": 84.06428756713868, "epoch": 4.820300751879699, "grad_norm": 8.0, "kl": 0.2945214152336121, "learning_rate": 2.5902255639097747e-06, "loss": 0.029, "num_tokens": 43301153.0, "reward": -1.2195492424070835, "reward_std": 6.1925302028656, "rewards/get_chromagram_reward": 0.6336007118225098, "rewards/get_chromagram_reward_std": 0.11149605363607407, "rewards/get_intelligibility_reward": -4.271958157420158, "rewards/get_intelligibility_reward_std": 9.869881725311279, "rewards/get_target_len_reward": -0.02029011370614171, "rewards/get_target_len_reward_std": 0.048033690080046654, "step": 6410 }, { "advantages": -3.386288867091025e-07, "advantages_std": 1.5211575150489807, "clip_ratio": 0.0, "completion_length": 85.46607513427735, "epoch": 4.8278195488721805, "grad_norm": 8.3125, "kl": 0.5205657571554184, "learning_rate": 2.5864661654135343e-06, "loss": 0.0572, "num_tokens": 43604022.0, "reward": -1.1797817513346671, "reward_std": 6.35502986907959, "rewards/get_chromagram_reward": 0.6147702217102051, "rewards/get_chromagram_reward_std": 0.10805823653936386, "rewards/get_intelligibility_reward": -4.129573428630829, "rewards/get_intelligibility_reward_std": 10.311836242675781, "rewards/get_target_len_reward": -0.024541809875518083, "rewards/get_target_len_reward_std": 0.08319154866039753, "step": 6420 }, { "advantages": 3.0547380305279146e-07, "advantages_std": 1.6374112010002135, "clip_ratio": 0.0, "completion_length": 86.75059661865234, "epoch": 4.8353383458646615, "grad_norm": 7.34375, "kl": 0.42518229633569715, "learning_rate": 2.5827067669172935e-06, "loss": 0.0461, "num_tokens": 43909645.0, "reward": -1.500042522698641, "reward_std": 6.227635717391967, "rewards/get_chromagram_reward": 0.6166976928710938, "rewards/get_chromagram_reward_std": 0.10093749687075615, "rewards/get_intelligibility_reward": -5.097601294517517, "rewards/get_intelligibility_reward_std": 9.62869644165039, "rewards/get_target_len_reward": -0.019223684445023537, "rewards/get_target_len_reward_std": 0.054870061576366425, "step": 6430 }, { "advantages": -2.2687019125555706e-07, "advantages_std": 1.6094887852668762, "clip_ratio": 0.0, "completion_length": 86.8827392578125, "epoch": 4.8428571428571425, "grad_norm": 5.875, "kl": 0.30442375540733335, "learning_rate": 2.578947368421053e-06, "loss": 0.0338, "num_tokens": 44216428.0, "reward": -1.6521910965442657, "reward_std": 7.041847658157349, "rewards/get_chromagram_reward": 0.6428345263004303, "rewards/get_chromagram_reward_std": 0.11256081908941269, "rewards/get_intelligibility_reward": -5.579641795158386, "rewards/get_intelligibility_reward_std": 11.189670085906982, "rewards/get_target_len_reward": -0.019765730388462542, "rewards/get_target_len_reward_std": 0.052372989058494565, "step": 6440 }, { "advantages": -4.147489249817227e-08, "advantages_std": 1.584288239479065, "clip_ratio": 0.0, "completion_length": 84.63809661865234, "epoch": 4.850375939849624, "grad_norm": 9.8125, "kl": 0.3660952433943748, "learning_rate": 2.5751879699248124e-06, "loss": 0.0423, "num_tokens": 44516628.0, "reward": -1.6733587265014649, "reward_std": 6.637929773330688, "rewards/get_chromagram_reward": 0.6172063827514649, "rewards/get_chromagram_reward_std": 0.11280516609549522, "rewards/get_intelligibility_reward": -5.615116000175476, "rewards/get_intelligibility_reward_std": 10.391260719299316, "rewards/get_target_len_reward": -0.022166123893111945, "rewards/get_target_len_reward_std": 0.0620707118883729, "step": 6450 }, { "advantages": 2.9553966918172138e-08, "advantages_std": 1.5499458074569703, "clip_ratio": 0.0, "completion_length": 83.31964416503907, "epoch": 4.8578947368421055, "grad_norm": 5.5, "kl": 0.32719179838895796, "learning_rate": 2.571428571428571e-06, "loss": 0.0333, "num_tokens": 44814027.0, "reward": -1.6808424234390258, "reward_std": 6.966293144226074, "rewards/get_chromagram_reward": 0.6218828916549682, "rewards/get_chromagram_reward_std": 0.10610488280653954, "rewards/get_intelligibility_reward": -5.644246053695679, "rewards/get_intelligibility_reward_std": 11.015114688873291, "rewards/get_target_len_reward": -0.020163825061172248, "rewards/get_target_len_reward_std": 0.05375215411186218, "step": 6460 }, { "advantages": 3.8867194760427994e-07, "advantages_std": 1.5340965390205383, "clip_ratio": 0.0, "completion_length": 83.85119171142578, "epoch": 4.8654135338345865, "grad_norm": 5.6875, "kl": 0.48933424055576324, "learning_rate": 2.5676691729323313e-06, "loss": 0.0527, "num_tokens": 45112541.0, "reward": -1.62738236784935, "reward_std": 6.789244747161865, "rewards/get_chromagram_reward": 0.6253014147281647, "rewards/get_chromagram_reward_std": 0.12089017108082771, "rewards/get_intelligibility_reward": -5.4844811916351315, "rewards/get_intelligibility_reward_std": 10.714595985412597, "rewards/get_target_len_reward": -0.02296700868755579, "rewards/get_target_len_reward_std": 0.0656088100746274, "step": 6470 }, { "advantages": -1.1821588685734241e-07, "advantages_std": 1.5755035519599914, "clip_ratio": 0.0, "completion_length": 88.55357360839844, "epoch": 4.872932330827068, "grad_norm": 5.65625, "kl": 0.33493589907884597, "learning_rate": 2.56390977443609e-06, "loss": 0.0415, "num_tokens": 45424425.0, "reward": -1.141952557489276, "reward_std": 6.468104410171509, "rewards/get_chromagram_reward": 0.6155132055282593, "rewards/get_chromagram_reward_std": 0.09994912594556808, "rewards/get_intelligibility_reward": -4.016626697778702, "rewards/get_intelligibility_reward_std": 10.548732280731201, "rewards/get_target_len_reward": -0.024744043592363596, "rewards/get_target_len_reward_std": 0.07314223255962134, "step": 6480 }, { "advantages": -8.195643630415362e-09, "advantages_std": 1.5248024225234986, "clip_ratio": 0.0, "completion_length": 87.0875015258789, "epoch": 4.880451127819549, "grad_norm": 5.6875, "kl": 3.428301727771759, "learning_rate": 2.56015037593985e-06, "loss": 0.3461, "num_tokens": 45731578.0, "reward": -1.7025025725364684, "reward_std": 7.153896188735962, "rewards/get_chromagram_reward": 0.6351093053817749, "rewards/get_chromagram_reward_std": 0.10930986404418945, "rewards/get_intelligibility_reward": -5.719335460662842, "rewards/get_intelligibility_reward_std": 11.252530097961426, "rewards/get_target_len_reward": -0.023281274922192098, "rewards/get_target_len_reward_std": 0.07412473894655705, "step": 6490 }, { "advantages": 1.7036997057573444e-07, "advantages_std": 1.540906298160553, "clip_ratio": 0.0, "completion_length": 87.96785888671874, "epoch": 4.88796992481203, "grad_norm": 6.84375, "kl": 0.3502262085676193, "learning_rate": 2.556390977443609e-06, "loss": 0.0407, "num_tokens": 46040680.0, "reward": -1.0871734350919724, "reward_std": 5.983257627487182, "rewards/get_chromagram_reward": 0.6318377196788788, "rewards/get_chromagram_reward_std": 0.11660416722297669, "rewards/get_intelligibility_reward": -3.868463712930679, "rewards/get_intelligibility_reward_std": 9.709487009048463, "rewards/get_target_len_reward": -0.024894051626324652, "rewards/get_target_len_reward_std": 0.0769047923386097, "step": 6500 }, { "advantages": 6.072223367681317e-07, "advantages_std": 1.7283748388290405, "clip_ratio": 0.0, "completion_length": 88.71309661865234, "epoch": 4.895488721804512, "grad_norm": 6.5625, "kl": 0.29507723152637483, "learning_rate": 2.552631578947369e-06, "loss": 0.0317, "num_tokens": 46351916.0, "reward": -1.631992408633232, "reward_std": 6.526308012008667, "rewards/get_chromagram_reward": 0.6183791100978852, "rewards/get_chromagram_reward_std": 0.10845671147108078, "rewards/get_intelligibility_reward": -5.496023435890675, "rewards/get_intelligibility_reward_std": 10.073901176452637, "rewards/get_target_len_reward": -0.018332591652870177, "rewards/get_target_len_reward_std": 0.05578165017068386, "step": 6510 }, { "advantages": -3.129243992816555e-08, "advantages_std": 1.5458887219429016, "clip_ratio": 0.0, "completion_length": 88.31904983520508, "epoch": 4.903007518796993, "grad_norm": 6.5, "kl": 0.3002905026078224, "learning_rate": 2.548872180451128e-06, "loss": 0.0293, "num_tokens": 46661545.0, "reward": -1.633839136362076, "reward_std": 6.5747246742248535, "rewards/get_chromagram_reward": 0.620064640045166, "rewards/get_chromagram_reward_std": 0.10467863827943802, "rewards/get_intelligibility_reward": -5.506745052337647, "rewards/get_intelligibility_reward_std": 10.277530574798584, "rewards/get_target_len_reward": -0.01483667390421033, "rewards/get_target_len_reward_std": 0.03543837685137987, "step": 6520 }, { "advantages": 3.6607186615356113e-07, "advantages_std": 1.5753893375396728, "clip_ratio": 0.0, "completion_length": 84.10476226806641, "epoch": 4.910526315789474, "grad_norm": 13.1875, "kl": 0.32883389592170714, "learning_rate": 2.545112781954888e-06, "loss": 0.0391, "num_tokens": 46960276.0, "reward": -1.2893819987773896, "reward_std": 6.348278760910034, "rewards/get_chromagram_reward": 0.6218626320362091, "rewards/get_chromagram_reward_std": 0.10212240219116211, "rewards/get_intelligibility_reward": -4.469036054611206, "rewards/get_intelligibility_reward_std": 10.204508018493652, "rewards/get_target_len_reward": -0.020972410589456557, "rewards/get_target_len_reward_std": 0.06330780945718288, "step": 6530 }, { "advantages": 1.6006330909590362e-07, "advantages_std": 1.5176787972450256, "clip_ratio": 0.0, "completion_length": 89.59047622680664, "epoch": 4.918045112781955, "grad_norm": 7.4375, "kl": 0.3287327170372009, "learning_rate": 2.5413533834586467e-06, "loss": 0.0348, "num_tokens": 47274035.0, "reward": -1.4367490768432618, "reward_std": 6.943380355834961, "rewards/get_chromagram_reward": 0.6169800817966461, "rewards/get_chromagram_reward_std": 0.10801626220345498, "rewards/get_intelligibility_reward": -4.91004763841629, "rewards/get_intelligibility_reward_std": 11.124363040924072, "rewards/get_target_len_reward": -0.017179496679455043, "rewards/get_target_len_reward_std": 0.047147853672504424, "step": 6540 }, { "advantages": -1.2964010949190196e-07, "advantages_std": 1.4592286229133606, "clip_ratio": 0.0, "completion_length": 87.46904907226562, "epoch": 4.925563909774436, "grad_norm": 77.0, "kl": 1.5469397068023683, "learning_rate": 2.5375939849624063e-06, "loss": 0.158, "num_tokens": 47582513.0, "reward": -1.3547814309597015, "reward_std": 6.633420515060425, "rewards/get_chromagram_reward": 0.62143474817276, "rewards/get_chromagram_reward_std": 0.11483238711953163, "rewards/get_intelligibility_reward": -4.66466805934906, "rewards/get_intelligibility_reward_std": 10.740064477920532, "rewards/get_target_len_reward": -0.02111075660213828, "rewards/get_target_len_reward_std": 0.05489732790738344, "step": 6550 }, { "advantages": -9.43740351644351e-08, "advantages_std": 1.5623414754867553, "clip_ratio": 0.0, "completion_length": 86.35833511352538, "epoch": 4.933082706766918, "grad_norm": 6.09375, "kl": 0.2813438355922699, "learning_rate": 2.5338345864661655e-06, "loss": 0.0347, "num_tokens": 47887515.0, "reward": -1.7280932068824768, "reward_std": 6.756200218200684, "rewards/get_chromagram_reward": 0.6194582223892212, "rewards/get_chromagram_reward_std": 0.11464768573641777, "rewards/get_intelligibility_reward": -5.78103654384613, "rewards/get_intelligibility_reward_std": 10.545590496063232, "rewards/get_target_len_reward": -0.022701161913573742, "rewards/get_target_len_reward_std": 0.08312043901532888, "step": 6560 }, { "advantages": 1.3187527301283807e-07, "advantages_std": 1.5597585439682007, "clip_ratio": 0.0, "completion_length": 88.42857284545899, "epoch": 4.940601503759399, "grad_norm": 26.0, "kl": 0.3721115231513977, "learning_rate": 2.5300751879699247e-06, "loss": 0.0402, "num_tokens": 48198543.0, "reward": -1.2894165456295013, "reward_std": 6.483512449264526, "rewards/get_chromagram_reward": 0.6240399837493896, "rewards/get_chromagram_reward_std": 0.12066565677523614, "rewards/get_intelligibility_reward": -4.470979905128479, "rewards/get_intelligibility_reward_std": 10.422832012176514, "rewards/get_target_len_reward": -0.02130947196856141, "rewards/get_target_len_reward_std": 0.058992895483970645, "step": 6570 }, { "advantages": -7.182359979651665e-07, "advantages_std": 1.524202859401703, "clip_ratio": 0.0, "completion_length": 86.0982162475586, "epoch": 4.94812030075188, "grad_norm": 5.75, "kl": 0.32063417583703996, "learning_rate": 2.5263157894736844e-06, "loss": 0.0388, "num_tokens": 48502433.0, "reward": -1.8312727063894272, "reward_std": 7.22638783454895, "rewards/get_chromagram_reward": 0.6144508063793183, "rewards/get_chromagram_reward_std": 0.10415496379137039, "rewards/get_intelligibility_reward": -6.086835551261902, "rewards/get_intelligibility_reward_std": 11.28345012664795, "rewards/get_target_len_reward": -0.02143293796107173, "rewards/get_target_len_reward_std": 0.07228237017989159, "step": 6580 }, { "advantages": 3.88796137418268e-07, "advantages_std": 1.619350051879883, "clip_ratio": 0.0, "completion_length": 85.75714340209962, "epoch": 4.955639097744361, "grad_norm": 78.0, "kl": 0.3856597736477852, "learning_rate": 2.5225563909774436e-06, "loss": 0.0452, "num_tokens": 48805915.0, "reward": -1.8449522614479066, "reward_std": 6.886161613464355, "rewards/get_chromagram_reward": 0.6164387345314026, "rewards/get_chromagram_reward_std": 0.11670946702361107, "rewards/get_intelligibility_reward": -6.128629541397094, "rewards/get_intelligibility_reward_std": 10.64172887802124, "rewards/get_target_len_reward": -0.022665658872574566, "rewards/get_target_len_reward_std": 0.06206500325351953, "step": 6590 }, { "advantages": -1.877546353057369e-07, "advantages_std": 1.624682652950287, "clip_ratio": 0.0, "completion_length": 85.98928680419922, "epoch": 4.963157894736842, "grad_norm": 87.0, "kl": 0.3426424890756607, "learning_rate": 2.5187969924812033e-06, "loss": 0.038, "num_tokens": 49110315.0, "reward": -1.4495089689269662, "reward_std": 6.569057416915894, "rewards/get_chromagram_reward": 0.6227473258972168, "rewards/get_chromagram_reward_std": 0.1206977717578411, "rewards/get_intelligibility_reward": -4.950021553039551, "rewards/get_intelligibility_reward_std": 10.386702346801759, "rewards/get_target_len_reward": -0.02125244690105319, "rewards/get_target_len_reward_std": 0.06872284896671772, "step": 6600 }, { "advantages": -1.185884169530027e-07, "advantages_std": 1.5967671275138855, "clip_ratio": 0.0, "completion_length": 82.43988265991212, "epoch": 4.970676691729324, "grad_norm": 7.15625, "kl": 0.7244855403900147, "learning_rate": 2.5150375939849625e-06, "loss": 0.0808, "num_tokens": 49404204.0, "reward": -1.7379024147987365, "reward_std": 6.669083547592163, "rewards/get_chromagram_reward": 0.6282551884651184, "rewards/get_chromagram_reward_std": 0.11871347054839135, "rewards/get_intelligibility_reward": -5.818054795265198, "rewards/get_intelligibility_reward_std": 10.382137298583984, "rewards/get_target_len_reward": -0.023907260969281197, "rewards/get_target_len_reward_std": 0.07197411060333252, "step": 6610 }, { "advantages": -4.0841601247620927e-07, "advantages_std": 1.6466867446899414, "clip_ratio": 0.0, "completion_length": 88.15833435058593, "epoch": 4.978195488721805, "grad_norm": 23.5, "kl": 0.29043773263692857, "learning_rate": 2.511278195488722e-06, "loss": 0.0308, "num_tokens": 49714079.0, "reward": -1.126624122262001, "reward_std": 6.541045331954956, "rewards/get_chromagram_reward": 0.6227805554866791, "rewards/get_chromagram_reward_std": 0.11063579246401786, "rewards/get_intelligibility_reward": -3.984594986587763, "rewards/get_intelligibility_reward_std": 10.58214750289917, "rewards/get_target_len_reward": -0.018057726556435227, "rewards/get_target_len_reward_std": 0.0481023607775569, "step": 6620 }, { "advantages": -2.9206275371507217e-07, "advantages_std": 1.5873522877693176, "clip_ratio": 0.0, "completion_length": 91.16012115478516, "epoch": 4.985714285714286, "grad_norm": 8.5, "kl": 0.36675150841474535, "learning_rate": 2.5075187969924813e-06, "loss": 0.039, "num_tokens": 50032559.0, "reward": -1.2497528120875359, "reward_std": 6.907018804550171, "rewards/get_chromagram_reward": 0.6320128381252289, "rewards/get_chromagram_reward_std": 0.10616158470511436, "rewards/get_intelligibility_reward": -4.356342947483062, "rewards/get_intelligibility_reward_std": 11.155338287353516, "rewards/get_target_len_reward": -0.02492810133844614, "rewards/get_target_len_reward_std": 0.07248065434396267, "step": 6630 }, { "advantages": -2.669791513199016e-07, "advantages_std": 1.5647882103919983, "clip_ratio": 0.0, "completion_length": 89.2732162475586, "epoch": 4.993233082706767, "grad_norm": 10.75, "kl": 0.2904526948928833, "learning_rate": 2.503759398496241e-06, "loss": 0.0348, "num_tokens": 50345461.0, "reward": -1.317164820432663, "reward_std": 6.558288669586181, "rewards/get_chromagram_reward": 0.6253573298454285, "rewards/get_chromagram_reward_std": 0.10857684016227723, "rewards/get_intelligibility_reward": -4.557005780935287, "rewards/get_intelligibility_reward_std": 10.478938770294189, "rewards/get_target_len_reward": -0.019845707342028618, "rewards/get_target_len_reward_std": 0.06356870625168085, "step": 6640 }, { "advantages": -5.700936128505419e-07, "advantages_std": 1.5285292983055114, "clip_ratio": 0.0, "completion_length": 84.10797882080078, "epoch": 5.001503759398497, "grad_norm": 306.0, "kl": 0.5828635230660438, "learning_rate": 2.5e-06, "loss": 0.0609, "num_tokens": 50648514.0, "reward": -1.8416775107383727, "reward_std": 7.091966581344605, "rewards/get_chromagram_reward": 0.6169986069202423, "rewards/get_chromagram_reward_std": 0.11487487629055977, "rewards/get_intelligibility_reward": -6.121673631668091, "rewards/get_intelligibility_reward_std": 10.917494773864746, "rewards/get_target_len_reward": -0.020357232261449098, "rewards/get_target_len_reward_std": 0.05068073961883783, "step": 6650 }, { "advantages": 4.743536479168142e-08, "advantages_std": 1.649160099029541, "clip_ratio": 0.0, "completion_length": 83.18393020629883, "epoch": 5.009022556390978, "grad_norm": 8.6875, "kl": 0.39235475957393645, "learning_rate": 2.4962406015037594e-06, "loss": 0.0453, "num_tokens": 50944468.0, "reward": -1.8302626073360444, "reward_std": 6.798886489868164, "rewards/get_chromagram_reward": 0.6179421901702881, "rewards/get_chromagram_reward_std": 0.1180720493197441, "rewards/get_intelligibility_reward": -6.085371446609497, "rewards/get_intelligibility_reward_std": 10.489519023895264, "rewards/get_target_len_reward": -0.023358290363103152, "rewards/get_target_len_reward_std": 0.07059708088636399, "step": 6660 }, { "advantages": -2.121552803657778e-07, "advantages_std": 1.5431331992149353, "clip_ratio": 0.0, "completion_length": 84.96071548461914, "epoch": 5.016541353383459, "grad_norm": 6.75, "kl": 0.2815273180603981, "learning_rate": 2.492481203007519e-06, "loss": 0.0269, "num_tokens": 51246029.0, "reward": -1.4928331673145294, "reward_std": 6.165701866149902, "rewards/get_chromagram_reward": 0.627123236656189, "rewards/get_chromagram_reward_std": 0.10546824783086776, "rewards/get_intelligibility_reward": -5.086357855796814, "rewards/get_intelligibility_reward_std": 9.6989670753479, "rewards/get_target_len_reward": -0.019264612533152103, "rewards/get_target_len_reward_std": 0.049389274418354036, "step": 6670 }, { "advantages": 5.016724191619915e-07, "advantages_std": 1.626193630695343, "clip_ratio": 0.0, "completion_length": 86.72797775268555, "epoch": 5.02406015037594, "grad_norm": 7.03125, "kl": 0.3750910758972168, "learning_rate": 2.4887218045112783e-06, "loss": 0.0397, "num_tokens": 51552511.0, "reward": -1.5638932228088378, "reward_std": 6.7187182903289795, "rewards/get_chromagram_reward": 0.6333837032318115, "rewards/get_chromagram_reward_std": 0.12098120152950287, "rewards/get_intelligibility_reward": -5.302863430976868, "rewards/get_intelligibility_reward_std": 10.55617914199829, "rewards/get_target_len_reward": -0.02219974249601364, "rewards/get_target_len_reward_std": 0.05710374694317579, "step": 6680 }, { "advantages": 3.1342108570697745e-07, "advantages_std": 1.5879101514816285, "clip_ratio": 0.0, "completion_length": 82.8029769897461, "epoch": 5.031578947368421, "grad_norm": 9.0625, "kl": 0.5160459518432617, "learning_rate": 2.484962406015038e-06, "loss": 0.0557, "num_tokens": 51848678.0, "reward": -1.152049209177494, "reward_std": 6.030084133148193, "rewards/get_chromagram_reward": 0.6250806391239166, "rewards/get_chromagram_reward_std": 0.12028426826000213, "rewards/get_intelligibility_reward": -4.054237425327301, "rewards/get_intelligibility_reward_std": 9.741949558258057, "rewards/get_target_len_reward": -0.02699065739288926, "rewards/get_target_len_reward_std": 0.07074901163578033, "step": 6690 }, { "advantages": -5.6376058399365546e-08, "advantages_std": 1.5316031932830811, "clip_ratio": 0.0, "completion_length": 86.11011962890625, "epoch": 5.039097744360903, "grad_norm": 6.21875, "kl": 0.3129287138581276, "learning_rate": 2.481203007518797e-06, "loss": 0.0345, "num_tokens": 52152743.0, "reward": -1.6599902629852294, "reward_std": 6.832069444656372, "rewards/get_chromagram_reward": 0.6020946443080902, "rewards/get_chromagram_reward_std": 0.12764331623911856, "rewards/get_intelligibility_reward": -5.563377809524536, "rewards/get_intelligibility_reward_std": 10.737880897521972, "rewards/get_target_len_reward": -0.018687471002340316, "rewards/get_target_len_reward_std": 0.05601380094885826, "step": 6700 }, { "advantages": 2.5232635039174054e-07, "advantages_std": 1.609774398803711, "clip_ratio": 0.0, "completion_length": 87.4648826599121, "epoch": 5.046616541353384, "grad_norm": 7.96875, "kl": 0.34581609070301056, "learning_rate": 2.4774436090225564e-06, "loss": 0.0382, "num_tokens": 52461229.0, "reward": -1.466331911087036, "reward_std": 6.7329998970031735, "rewards/get_chromagram_reward": 0.6073729395866394, "rewards/get_chromagram_reward_std": 0.1180558256804943, "rewards/get_intelligibility_reward": -4.988929557800293, "rewards/get_intelligibility_reward_std": 10.727745819091798, "rewards/get_target_len_reward": -0.017438811622560023, "rewards/get_target_len_reward_std": 0.04986635074019432, "step": 6710 }, { "advantages": -7.363657630321541e-08, "advantages_std": 1.5829906702041625, "clip_ratio": 0.0, "completion_length": 81.79226303100586, "epoch": 5.054135338345865, "grad_norm": 6.875, "kl": 0.4496503293514252, "learning_rate": 2.473684210526316e-06, "loss": 0.0492, "num_tokens": 52754446.0, "reward": -1.3893569886684418, "reward_std": 6.619420766830444, "rewards/get_chromagram_reward": 0.6111753046512604, "rewards/get_chromagram_reward_std": 0.12401786893606186, "rewards/get_intelligibility_reward": -4.755018162727356, "rewards/get_intelligibility_reward_std": 10.626482391357422, "rewards/get_target_len_reward": -0.024227831698954105, "rewards/get_target_len_reward_std": 0.06835556291043758, "step": 6720 }, { "advantages": -1.763303600910149e-08, "advantages_std": 1.6146543741226196, "clip_ratio": 0.0, "completion_length": 91.1583351135254, "epoch": 5.061654135338346, "grad_norm": 1392.0, "kl": 0.45773075222969056, "learning_rate": 2.4699248120300752e-06, "loss": 0.047, "num_tokens": 53072797.0, "reward": -1.0947829756885767, "reward_std": 6.209275341033935, "rewards/get_chromagram_reward": 0.6310640692710876, "rewards/get_chromagram_reward_std": 0.10584187656641006, "rewards/get_intelligibility_reward": -3.8928360402584077, "rewards/get_intelligibility_reward_std": 10.099512815475464, "rewards/get_target_len_reward": -0.022576854890212418, "rewards/get_target_len_reward_std": 0.07170646525919437, "step": 6730 }, { "advantages": -8.13106710850775e-07, "advantages_std": 1.6019715070724487, "clip_ratio": 0.0, "completion_length": 84.63809661865234, "epoch": 5.069172932330827, "grad_norm": 7.65625, "kl": 0.3156878113746643, "learning_rate": 2.466165413533835e-06, "loss": 0.0365, "num_tokens": 53373173.0, "reward": -1.7333171367645264, "reward_std": 6.950545787811279, "rewards/get_chromagram_reward": 0.6180140256881714, "rewards/get_chromagram_reward_std": 0.11169339194893838, "rewards/get_intelligibility_reward": -5.79854383468628, "rewards/get_intelligibility_reward_std": 10.903221225738525, "rewards/get_target_len_reward": -0.019421275705099106, "rewards/get_target_len_reward_std": 0.06264973357319832, "step": 6740 }, { "advantages": -1.368423205860836e-07, "advantages_std": 1.5889723181724549, "clip_ratio": 0.0, "completion_length": 85.70952529907227, "epoch": 5.076691729323309, "grad_norm": 7.09375, "kl": 0.3136765375733376, "learning_rate": 2.462406015037594e-06, "loss": 0.0334, "num_tokens": 53676787.0, "reward": -1.5442959815263748, "reward_std": 7.033763265609741, "rewards/get_chromagram_reward": 0.6433658242225647, "rewards/get_chromagram_reward_std": 0.1127834253013134, "rewards/get_intelligibility_reward": -5.251438069343567, "rewards/get_intelligibility_reward_std": 11.205382347106934, "rewards/get_target_len_reward": -0.024815433658659458, "rewards/get_target_len_reward_std": 0.061740655824542044, "step": 6750 }, { "advantages": 4.728635475181875e-07, "advantages_std": 1.6415120124816895, "clip_ratio": 0.0, "completion_length": 86.85595397949218, "epoch": 5.08421052631579, "grad_norm": 6.96875, "kl": 0.3616165786981583, "learning_rate": 2.4586466165413538e-06, "loss": 0.0388, "num_tokens": 53982610.0, "reward": -1.7239043295383454, "reward_std": 6.90065188407898, "rewards/get_chromagram_reward": 0.6219595074653625, "rewards/get_chromagram_reward_std": 0.10513537898659706, "rewards/get_intelligibility_reward": -5.775780349969864, "rewards/get_intelligibility_reward_std": 10.657470417022704, "rewards/get_target_len_reward": -0.017891742382198574, "rewards/get_target_len_reward_std": 0.05384985618293285, "step": 6760 }, { "advantages": 2.967814694443405e-07, "advantages_std": 1.7612375378608705, "clip_ratio": 0.0, "completion_length": 87.82857208251953, "epoch": 5.091729323308271, "grad_norm": 7.78125, "kl": 0.37060387432575226, "learning_rate": 2.454887218045113e-06, "loss": 0.0418, "num_tokens": 54291505.0, "reward": -1.7024814426898955, "reward_std": 7.071507167816162, "rewards/get_chromagram_reward": 0.6242042541503906, "rewards/get_chromagram_reward_std": 0.11118239611387253, "rewards/get_intelligibility_reward": -5.710920715332032, "rewards/get_intelligibility_reward_std": 11.158560276031494, "rewards/get_target_len_reward": -0.020727519784122704, "rewards/get_target_len_reward_std": 0.06277465522289276, "step": 6770 }, { "advantages": -5.712108475108835e-09, "advantages_std": 1.6215251684188843, "clip_ratio": 0.0, "completion_length": 88.75714492797852, "epoch": 5.099248120300752, "grad_norm": 38.75, "kl": 0.4336851522326469, "learning_rate": 2.4511278195488726e-06, "loss": 0.0472, "num_tokens": 54603325.0, "reward": -1.2727844998240472, "reward_std": 6.598020696640015, "rewards/get_chromagram_reward": 0.6093868017196655, "rewards/get_chromagram_reward_std": 0.1134963721036911, "rewards/get_intelligibility_reward": -4.4110959649086, "rewards/get_intelligibility_reward_std": 10.600707530975342, "rewards/get_target_len_reward": -0.016644243523478507, "rewards/get_target_len_reward_std": 0.051578975096344945, "step": 6780 }, { "advantages": 2.786517171671221e-07, "advantages_std": 1.5503077149391173, "clip_ratio": 0.0, "completion_length": 87.54642944335937, "epoch": 5.106766917293233, "grad_norm": 5.5625, "kl": 0.32320014089345933, "learning_rate": 2.447368421052632e-06, "loss": 0.0341, "num_tokens": 54912339.0, "reward": -1.5714792966842652, "reward_std": 6.982712078094482, "rewards/get_chromagram_reward": 0.6172931432723999, "rewards/get_chromagram_reward_std": 0.10349898040294647, "rewards/get_intelligibility_reward": -5.315335440635681, "rewards/get_intelligibility_reward_std": 11.076660537719727, "rewards/get_target_len_reward": -0.016395517718046905, "rewards/get_target_len_reward_std": 0.047106191515922546, "step": 6790 }, { "advantages": -5.935630156272964e-08, "advantages_std": 1.6334615707397462, "clip_ratio": 0.0, "completion_length": 85.74702606201171, "epoch": 5.114285714285714, "grad_norm": 10.0, "kl": 0.3186270877718925, "learning_rate": 2.443609022556391e-06, "loss": 0.0331, "num_tokens": 55216325.0, "reward": -1.288988533616066, "reward_std": 6.493602895736695, "rewards/get_chromagram_reward": 0.6300028264522552, "rewards/get_chromagram_reward_std": 0.12195887714624405, "rewards/get_intelligibility_reward": -4.469488799571991, "rewards/get_intelligibility_reward_std": 10.4141845703125, "rewards/get_target_len_reward": -0.027479395363479854, "rewards/get_target_len_reward_std": 0.07249160967767239, "step": 6800 }, { "advantages": -7.649263109144045e-07, "advantages_std": 1.6298179507255555, "clip_ratio": 0.0, "completion_length": 87.87619171142578, "epoch": 5.121804511278196, "grad_norm": 13.0, "kl": 0.30407789051532746, "learning_rate": 2.4398496240601503e-06, "loss": 0.0375, "num_tokens": 55525445.0, "reward": -1.5689442038536072, "reward_std": 7.007902336120606, "rewards/get_chromagram_reward": 0.6134120762348175, "rewards/get_chromagram_reward_std": 0.11479166969656944, "rewards/get_intelligibility_reward": -5.299472689628601, "rewards/get_intelligibility_reward_std": 11.139194774627686, "rewards/get_target_len_reward": -0.02077170191332698, "rewards/get_target_len_reward_std": 0.06966968681663274, "step": 6810 }, { "advantages": 4.023312931700218e-08, "advantages_std": 1.6519222855567932, "clip_ratio": 0.0, "completion_length": 86.25595397949219, "epoch": 5.129323308270677, "grad_norm": 40.5, "kl": 0.3467926293611526, "learning_rate": 2.43609022556391e-06, "loss": 0.0386, "num_tokens": 55830230.0, "reward": -1.8048245370388032, "reward_std": 6.856232643127441, "rewards/get_chromagram_reward": 0.6267795324325561, "rewards/get_chromagram_reward_std": 0.12441687732934952, "rewards/get_intelligibility_reward": -6.0160400390625, "rewards/get_intelligibility_reward_std": 10.567765140533448, "rewards/get_target_len_reward": -0.02521284818649292, "rewards/get_target_len_reward_std": 0.07437393795698881, "step": 6820 }, { "advantages": 4.423161307443024e-07, "advantages_std": 1.5914803862571716, "clip_ratio": 0.0, "completion_length": 86.9851203918457, "epoch": 5.136842105263158, "grad_norm": 8.0, "kl": 0.3530740290880203, "learning_rate": 2.432330827067669e-06, "loss": 0.0352, "num_tokens": 56137102.0, "reward": -1.563096636533737, "reward_std": 6.7047443866729735, "rewards/get_chromagram_reward": 0.6265234291553498, "rewards/get_chromagram_reward_std": 0.12023014053702355, "rewards/get_intelligibility_reward": -5.292724537849426, "rewards/get_intelligibility_reward_std": 10.608396053314209, "rewards/get_target_len_reward": -0.023088517505675553, "rewards/get_target_len_reward_std": 0.0586923124268651, "step": 6830 }, { "advantages": -5.491078042041409e-07, "advantages_std": 1.517658293247223, "clip_ratio": 0.0, "completion_length": 85.73631057739257, "epoch": 5.144360902255639, "grad_norm": 59.0, "kl": 0.3680226504802704, "learning_rate": 2.428571428571429e-06, "loss": 0.0385, "num_tokens": 56441444.0, "reward": -1.2847602039575576, "reward_std": 6.313221168518067, "rewards/get_chromagram_reward": 0.6244631409645081, "rewards/get_chromagram_reward_std": 0.12040503397583961, "rewards/get_intelligibility_reward": -4.454781115055084, "rewards/get_intelligibility_reward_std": 10.152585697174072, "rewards/get_target_len_reward": -0.023962332773953675, "rewards/get_target_len_reward_std": 0.05895144417881966, "step": 6840 }, { "advantages": 5.302330219336682e-08, "advantages_std": 1.52640398144722, "clip_ratio": 0.0, "completion_length": 88.34166870117187, "epoch": 5.15187969924812, "grad_norm": 5.15625, "kl": 0.32784585654735565, "learning_rate": 2.424812030075188e-06, "loss": 0.0328, "num_tokens": 56752125.0, "reward": -1.2381734997034073, "reward_std": 6.723718500137329, "rewards/get_chromagram_reward": 0.6423853456974029, "rewards/get_chromagram_reward_std": 0.10823142379522324, "rewards/get_intelligibility_reward": -4.337746638059616, "rewards/get_intelligibility_reward_std": 10.807358932495116, "rewards/get_target_len_reward": -0.01915889075025916, "rewards/get_target_len_reward_std": 0.05332515276968479, "step": 6850 }, { "advantages": 1.184642421492299e-07, "advantages_std": 1.5295220255851745, "clip_ratio": 0.0, "completion_length": 86.89345474243164, "epoch": 5.159398496240602, "grad_norm": 19.25, "kl": 5.167052660882473, "learning_rate": 2.4210526315789477e-06, "loss": 0.5225, "num_tokens": 57058305.0, "reward": -1.584045022726059, "reward_std": 6.4145008563995365, "rewards/get_chromagram_reward": 0.6162258267402649, "rewards/get_chromagram_reward_std": 0.1133840948343277, "rewards/get_intelligibility_reward": -5.345023941993714, "rewards/get_intelligibility_reward_std": 9.983595991134644, "rewards/get_target_len_reward": -0.02333657452836633, "rewards/get_target_len_reward_std": 0.0719031471759081, "step": 6860 }, { "advantages": -4.3710078045933185e-08, "advantages_std": 1.5951733827590941, "clip_ratio": 0.0, "completion_length": 86.81666946411133, "epoch": 5.166917293233083, "grad_norm": 7.90625, "kl": 0.35654806196689603, "learning_rate": 2.417293233082707e-06, "loss": 0.0423, "num_tokens": 57364259.0, "reward": -1.0671080329455436, "reward_std": 6.709721088409424, "rewards/get_chromagram_reward": 0.6349423170089722, "rewards/get_chromagram_reward_std": 0.11176861301064492, "rewards/get_intelligibility_reward": -3.807164826989174, "rewards/get_intelligibility_reward_std": 10.922046852111816, "rewards/get_target_len_reward": -0.029101449809968472, "rewards/get_target_len_reward_std": 0.08822835758328437, "step": 6870 }, { "advantages": -5.061427756913872e-07, "advantages_std": 1.558658480644226, "clip_ratio": 0.0, "completion_length": 84.79107284545898, "epoch": 5.174436090225564, "grad_norm": 7.84375, "kl": 24.10377275198698, "learning_rate": 2.4135338345864665e-06, "loss": 2.4124, "num_tokens": 57665170.0, "reward": -1.6351744055747985, "reward_std": 6.380785751342773, "rewards/get_chromagram_reward": 0.6248750269412995, "rewards/get_chromagram_reward_std": 0.10694740414619446, "rewards/get_intelligibility_reward": -5.510381889343262, "rewards/get_intelligibility_reward_std": 9.946515560150146, "rewards/get_target_len_reward": -0.020016012340784074, "rewards/get_target_len_reward_std": 0.05424492470920086, "step": 6880 }, { "advantages": -1.986825282074278e-09, "advantages_std": 1.5632656812667847, "clip_ratio": 0.0, "completion_length": 89.39166641235352, "epoch": 5.181954887218045, "grad_norm": 12.25, "kl": 0.3236946240067482, "learning_rate": 2.4097744360902257e-06, "loss": 0.0371, "num_tokens": 57978655.0, "reward": -1.1818639472126962, "reward_std": 6.2812474250793455, "rewards/get_chromagram_reward": 0.6271504878997802, "rewards/get_chromagram_reward_std": 0.09476440995931626, "rewards/get_intelligibility_reward": -4.154732119292021, "rewards/get_intelligibility_reward_std": 10.153310012817382, "rewards/get_target_len_reward": -0.018009957671165467, "rewards/get_target_len_reward_std": 0.05352111738175154, "step": 6890 }, { "advantages": 1.5199186123027176e-07, "advantages_std": 1.5968881249427795, "clip_ratio": 0.0, "completion_length": 89.56309509277344, "epoch": 5.189473684210526, "grad_norm": 7.03125, "kl": 0.31880530416965486, "learning_rate": 2.406015037593985e-06, "loss": 0.0362, "num_tokens": 58292583.0, "reward": -1.1216454744338988, "reward_std": 6.318594741821289, "rewards/get_chromagram_reward": 0.6131006479263306, "rewards/get_chromagram_reward_std": 0.11267746463418007, "rewards/get_intelligibility_reward": -3.9571539878845217, "rewards/get_intelligibility_reward_std": 10.343779563903809, "rewards/get_target_len_reward": -0.020882988907396795, "rewards/get_target_len_reward_std": 0.06357498727738857, "step": 6900 }, { "advantages": -9.909271625474503e-08, "advantages_std": 1.583446776866913, "clip_ratio": 0.0, "completion_length": 86.56964492797852, "epoch": 5.196992481203008, "grad_norm": 6.15625, "kl": 0.38114998638629916, "learning_rate": 2.4022556390977446e-06, "loss": 0.039, "num_tokens": 58598561.0, "reward": -1.4668249249458314, "reward_std": 6.721897220611572, "rewards/get_chromagram_reward": 0.6037787020206451, "rewards/get_chromagram_reward_std": 0.1092615433037281, "rewards/get_intelligibility_reward": -4.986512398719787, "rewards/get_intelligibility_reward_std": 10.674504327774049, "rewards/get_target_len_reward": -0.017740893363952636, "rewards/get_target_len_reward_std": 0.0504965964704752, "step": 6910 }, { "advantages": 3.6979716639962134e-07, "advantages_std": 1.5312896370887756, "clip_ratio": 0.0, "completion_length": 84.78273849487304, "epoch": 5.204511278195489, "grad_norm": 9.5, "kl": 0.5774701595306396, "learning_rate": 2.398496240601504e-06, "loss": 0.0619, "num_tokens": 58898988.0, "reward": -1.5599512100219726, "reward_std": 6.487179136276245, "rewards/get_chromagram_reward": 0.6300956845283509, "rewards/get_chromagram_reward_std": 0.11254699677228927, "rewards/get_intelligibility_reward": -5.289097785949707, "rewards/get_intelligibility_reward_std": 10.138393115997314, "rewards/get_target_len_reward": -0.02085120417177677, "rewards/get_target_len_reward_std": 0.054374009184539315, "step": 6920 }, { "advantages": 9.064873864872425e-08, "advantages_std": 1.4809496760368348, "clip_ratio": 0.0, "completion_length": 86.4476203918457, "epoch": 5.21203007518797, "grad_norm": 7.0, "kl": 0.3302300497889519, "learning_rate": 2.3947368421052635e-06, "loss": 0.0375, "num_tokens": 59204310.0, "reward": -1.5772881627082824, "reward_std": 6.619961738586426, "rewards/get_chromagram_reward": 0.6164861679077148, "rewards/get_chromagram_reward_std": 0.11783003509044647, "rewards/get_intelligibility_reward": -5.327880811691284, "rewards/get_intelligibility_reward_std": 10.359909629821777, "rewards/get_target_len_reward": -0.020469481870532037, "rewards/get_target_len_reward_std": 0.05776517633348703, "step": 6930 }, { "advantages": -5.712101369681477e-09, "advantages_std": 1.6208165287971497, "clip_ratio": 0.0, "completion_length": 90.48214492797851, "epoch": 5.219548872180451, "grad_norm": 4.0625, "kl": 0.28569827526807784, "learning_rate": 2.3909774436090227e-06, "loss": 0.0295, "num_tokens": 59521105.0, "reward": -1.22709841132164, "reward_std": 6.416428852081299, "rewards/get_chromagram_reward": 0.6274036824703216, "rewards/get_chromagram_reward_std": 0.11061776801943779, "rewards/get_intelligibility_reward": -4.292467278242111, "rewards/get_intelligibility_reward_std": 10.17729892730713, "rewards/get_target_len_reward": -0.016231359355151652, "rewards/get_target_len_reward_std": 0.041813553869724275, "step": 6940 }, { "advantages": -3.9935112092770455e-07, "advantages_std": 1.5138915538787843, "clip_ratio": 0.0, "completion_length": 89.7500015258789, "epoch": 5.227067669172932, "grad_norm": 10.25, "kl": 0.3521960288286209, "learning_rate": 2.3872180451127823e-06, "loss": 0.0354, "num_tokens": 59835649.0, "reward": -1.3172665178775786, "reward_std": 6.572981929779052, "rewards/get_chromagram_reward": 0.6248763024806976, "rewards/get_chromagram_reward_std": 0.10387115105986595, "rewards/get_intelligibility_reward": -4.556372022628784, "rewards/get_intelligibility_reward_std": 10.613866996765136, "rewards/get_target_len_reward": -0.02030346216633916, "rewards/get_target_len_reward_std": 0.06047505233436823, "step": 6950 }, { "advantages": 3.9984780286772546e-08, "advantages_std": 1.3496541380882263, "clip_ratio": 0.0, "completion_length": 85.63928680419922, "epoch": 5.234586466165413, "grad_norm": 8.3125, "kl": 0.33269438743591306, "learning_rate": 2.3834586466165416e-06, "loss": 0.0387, "num_tokens": 60138755.0, "reward": -1.9132965922355651, "reward_std": 6.963921070098877, "rewards/get_chromagram_reward": 0.6157672822475433, "rewards/get_chromagram_reward_std": 0.11723127737641334, "rewards/get_intelligibility_reward": -6.334445428848267, "rewards/get_intelligibility_reward_std": 10.675588703155517, "rewards/get_target_len_reward": -0.0212111490778625, "rewards/get_target_len_reward_std": 0.06555038467049598, "step": 6960 }, { "advantages": 1.5447536867441157e-07, "advantages_std": 1.706715500354767, "clip_ratio": 0.0, "completion_length": 89.80952529907226, "epoch": 5.242105263157895, "grad_norm": 7.875, "kl": 0.3068757638335228, "learning_rate": 2.379699248120301e-06, "loss": 0.0351, "num_tokens": 60452409.0, "reward": -1.1487817078828813, "reward_std": 6.275458145141601, "rewards/get_chromagram_reward": 0.6316677033901215, "rewards/get_chromagram_reward_std": 0.10417709574103355, "rewards/get_intelligibility_reward": -4.055424535274506, "rewards/get_intelligibility_reward_std": 10.131557607650757, "rewards/get_target_len_reward": -0.022588130366057158, "rewards/get_target_len_reward_std": 0.07189572602510452, "step": 6970 }, { "advantages": -4.221995197895012e-08, "advantages_std": 1.666536283493042, "clip_ratio": 0.0, "completion_length": 86.27559814453124, "epoch": 5.249624060150376, "grad_norm": 6.0625, "kl": 3.1084075570106506, "learning_rate": 2.3759398496240604e-06, "loss": 0.3127, "num_tokens": 60757343.0, "reward": -1.7111223936080933, "reward_std": 6.904574012756347, "rewards/get_chromagram_reward": 0.6233543515205383, "rewards/get_chromagram_reward_std": 0.11629278510808945, "rewards/get_intelligibility_reward": -5.738094091415405, "rewards/get_intelligibility_reward_std": 10.80052490234375, "rewards/get_target_len_reward": -0.01862709941342473, "rewards/get_target_len_reward_std": 0.04503080155700445, "step": 6980 }, { "advantages": 5.811452865600586e-08, "advantages_std": 1.6017412543296814, "clip_ratio": 0.0, "completion_length": 87.71666793823242, "epoch": 5.257142857142857, "grad_norm": 6.40625, "kl": 0.662769903242588, "learning_rate": 2.3721804511278197e-06, "loss": 0.0647, "num_tokens": 61066441.0, "reward": -1.690003263950348, "reward_std": 7.024064779281616, "rewards/get_chromagram_reward": 0.626186752319336, "rewards/get_chromagram_reward_std": 0.11964567676186562, "rewards/get_intelligibility_reward": -5.6770260572433475, "rewards/get_intelligibility_reward_std": 11.044268608093262, "rewards/get_target_len_reward": -0.019170239195227624, "rewards/get_target_len_reward_std": 0.04387279041111469, "step": 6990 }, { "advantages": -5.098680901483022e-07, "advantages_std": 1.6851074337959289, "clip_ratio": 0.0, "completion_length": 90.42500152587891, "epoch": 5.264661654135338, "grad_norm": 5.0625, "kl": 0.4801910310983658, "learning_rate": 2.368421052631579e-06, "loss": 0.0531, "num_tokens": 61383224.0, "reward": -1.1600703239440917, "reward_std": 6.438305616378784, "rewards/get_chromagram_reward": 0.6169378876686096, "rewards/get_chromagram_reward_std": 0.11968811750411987, "rewards/get_intelligibility_reward": -4.075756704807281, "rewards/get_intelligibility_reward_std": 10.412063598632812, "rewards/get_target_len_reward": -0.021391940582543612, "rewards/get_target_len_reward_std": 0.05608705058693886, "step": 7000 }, { "advantages": 5.019208067835734e-07, "advantages_std": 1.6616424560546874, "clip_ratio": 0.0, "completion_length": 83.74107208251954, "epoch": 5.272180451127819, "grad_norm": 7.21875, "kl": 0.44268949925899503, "learning_rate": 2.3646616541353385e-06, "loss": 0.0489, "num_tokens": 61680912.0, "reward": -1.8132120728492738, "reward_std": 6.99270076751709, "rewards/get_chromagram_reward": 0.6127820551395416, "rewards/get_chromagram_reward_std": 0.1150453269481659, "rewards/get_intelligibility_reward": -6.030172061920166, "rewards/get_intelligibility_reward_std": 10.928667163848877, "rewards/get_target_len_reward": -0.022245942149311304, "rewards/get_target_len_reward_std": 0.0636180106550455, "step": 7010 }, { "advantages": 3.9910278815114e-07, "advantages_std": 1.6216766953468322, "clip_ratio": 0.0, "completion_length": 85.14702377319335, "epoch": 5.279699248120301, "grad_norm": 5.71875, "kl": 0.3775395154953003, "learning_rate": 2.3609022556390977e-06, "loss": 0.0443, "num_tokens": 61982083.0, "reward": -1.5514660596847534, "reward_std": 6.568768739700317, "rewards/get_chromagram_reward": 0.6127265453338623, "rewards/get_chromagram_reward_std": 0.1180819720029831, "rewards/get_intelligibility_reward": -5.243985009193421, "rewards/get_intelligibility_reward_std": 10.370185852050781, "rewards/get_target_len_reward": -0.023139323480427264, "rewards/get_target_len_reward_std": 0.07175651714205741, "step": 7020 }, { "advantages": -2.2128225509732147e-07, "advantages_std": 1.4390228509902954, "clip_ratio": 0.0, "completion_length": 90.20476379394532, "epoch": 5.287218045112782, "grad_norm": 7.03125, "kl": 0.3158534452319145, "learning_rate": 2.3571428571428574e-06, "loss": 0.0319, "num_tokens": 62298384.0, "reward": -1.0992859616875648, "reward_std": 6.4978162288665775, "rewards/get_chromagram_reward": 0.6307229638099671, "rewards/get_chromagram_reward_std": 0.1130222037434578, "rewards/get_intelligibility_reward": -3.906294071674347, "rewards/get_intelligibility_reward_std": 10.669676256179809, "rewards/get_target_len_reward": -0.022286569233983755, "rewards/get_target_len_reward_std": 0.053410691767930986, "step": 7030 }, { "advantages": 3.4521024296907397e-07, "advantages_std": 1.474492335319519, "clip_ratio": 0.0, "completion_length": 85.87381210327149, "epoch": 5.294736842105263, "grad_norm": 5.1875, "kl": 0.3069776311516762, "learning_rate": 2.3533834586466166e-06, "loss": 0.0318, "num_tokens": 62602452.0, "reward": -1.6596662104129791, "reward_std": 6.60387659072876, "rewards/get_chromagram_reward": 0.6276511192321778, "rewards/get_chromagram_reward_std": 0.11999709233641624, "rewards/get_intelligibility_reward": -5.584190630912781, "rewards/get_intelligibility_reward_std": 10.274257373809814, "rewards/get_target_len_reward": -0.02245878903195262, "rewards/get_target_len_reward_std": 0.05738620981574059, "step": 7040 }, { "advantages": -1.3262032894090225e-07, "advantages_std": 1.645529079437256, "clip_ratio": 0.0, "completion_length": 88.70119171142578, "epoch": 5.302255639097744, "grad_norm": 7.59375, "kl": 0.31840053349733355, "learning_rate": 2.3496240601503762e-06, "loss": 0.035, "num_tokens": 62914016.0, "reward": -1.4617899343371392, "reward_std": 6.983079671859741, "rewards/get_chromagram_reward": 0.6129271507263183, "rewards/get_chromagram_reward_std": 0.11817508563399315, "rewards/get_intelligibility_reward": -4.9796409726142885, "rewards/get_intelligibility_reward_std": 11.137241172790528, "rewards/get_target_len_reward": -0.018655857909470795, "rewards/get_target_len_reward_std": 0.05034475326538086, "step": 7050 }, { "advantages": 3.899136231666489e-08, "advantages_std": 1.5345199227333068, "clip_ratio": 0.0, "completion_length": 87.96964416503906, "epoch": 5.309774436090225, "grad_norm": 11.25, "kl": 0.42554541379213334, "learning_rate": 2.3458646616541355e-06, "loss": 0.0469, "num_tokens": 63223477.0, "reward": -1.5128212684765459, "reward_std": 6.479464769363403, "rewards/get_chromagram_reward": 0.6212924182415008, "rewards/get_chromagram_reward_std": 0.12423446327447892, "rewards/get_intelligibility_reward": -5.140022248029709, "rewards/get_intelligibility_reward_std": 10.14908390045166, "rewards/get_target_len_reward": -0.019733687210828067, "rewards/get_target_len_reward_std": 0.053223836794495584, "step": 7060 }, { "advantages": 4.6094261083595713e-07, "advantages_std": 1.4596411824226379, "clip_ratio": 0.0, "completion_length": 87.01131134033203, "epoch": 5.317293233082707, "grad_norm": 21.0, "kl": 0.3311875075101852, "learning_rate": 2.342105263157895e-06, "loss": 0.0376, "num_tokens": 63529504.0, "reward": -1.3178690791130065, "reward_std": 6.07402229309082, "rewards/get_chromagram_reward": 0.6423762559890747, "rewards/get_chromagram_reward_std": 0.10435187965631484, "rewards/get_intelligibility_reward": -4.571545362472534, "rewards/get_intelligibility_reward_std": 9.608693504333496, "rewards/get_target_len_reward": -0.024437942169606687, "rewards/get_target_len_reward_std": 0.07988403253257274, "step": 7070 }, { "advantages": -1.0418395106626121e-07, "advantages_std": 1.5732410550117493, "clip_ratio": 0.0, "completion_length": 87.19583587646484, "epoch": 5.324812030075188, "grad_norm": 5.3125, "kl": 0.439504337310791, "learning_rate": 2.3383458646616543e-06, "loss": 0.05, "num_tokens": 63836515.0, "reward": -1.24023876786232, "reward_std": 6.505569648742676, "rewards/get_chromagram_reward": 0.6219327926635743, "rewards/get_chromagram_reward_std": 0.10478176176548004, "rewards/get_intelligibility_reward": -4.321042706817389, "rewards/get_intelligibility_reward_std": 10.534570789337158, "rewards/get_target_len_reward": -0.021606145799160002, "rewards/get_target_len_reward_std": 0.07151275128126144, "step": 7080 }, { "advantages": -6.737808490697717e-07, "advantages_std": 1.6721099257469176, "clip_ratio": 0.0, "completion_length": 89.05476303100586, "epoch": 5.332330827067669, "grad_norm": 6.28125, "kl": 0.2852652370929718, "learning_rate": 2.334586466165414e-06, "loss": 0.0308, "num_tokens": 64148637.0, "reward": -1.182309341430664, "reward_std": 5.901613664627075, "rewards/get_chromagram_reward": 0.6252353847026825, "rewards/get_chromagram_reward_std": 0.11671873182058334, "rewards/get_intelligibility_reward": -4.154728293418884, "rewards/get_intelligibility_reward_std": 9.46047306060791, "rewards/get_target_len_reward": -0.017434802697971465, "rewards/get_target_len_reward_std": 0.05399100258946419, "step": 7090 }, { "advantages": -4.215787043904129e-07, "advantages_std": 1.4545109629631043, "clip_ratio": 0.0, "completion_length": 86.90357208251953, "epoch": 5.33984962406015, "grad_norm": 34.5, "kl": 0.8135642141103745, "learning_rate": 2.330827067669173e-06, "loss": 0.0854, "num_tokens": 64454798.0, "reward": -1.7862254559993744, "reward_std": 6.553706693649292, "rewards/get_chromagram_reward": 0.6116910398006439, "rewards/get_chromagram_reward_std": 0.10669722333550453, "rewards/get_intelligibility_reward": -5.948037195205688, "rewards/get_intelligibility_reward_std": 10.042701148986817, "rewards/get_target_len_reward": -0.02233006376773119, "rewards/get_target_len_reward_std": 0.0645206457003951, "step": 7100 }, { "advantages": -7.164975386331207e-08, "advantages_std": 1.5383620381355285, "clip_ratio": 0.0, "completion_length": 87.3398811340332, "epoch": 5.347368421052631, "grad_norm": 6.15625, "kl": 0.31754042506217955, "learning_rate": 2.3270676691729324e-06, "loss": 0.0366, "num_tokens": 64762510.0, "reward": -1.8574148535728454, "reward_std": 7.235694837570191, "rewards/get_chromagram_reward": 0.624306446313858, "rewards/get_chromagram_reward_std": 0.11558253094553947, "rewards/get_intelligibility_reward": -6.175735664367676, "rewards/get_intelligibility_reward_std": 11.251740074157714, "rewards/get_target_len_reward": -0.02081504138186574, "rewards/get_target_len_reward_std": 0.059531612880527975, "step": 7110 }, { "advantages": 1.9793709107318592e-07, "advantages_std": 1.5872637391090394, "clip_ratio": 0.0, "completion_length": 85.45297698974609, "epoch": 5.354887218045112, "grad_norm": 7.96875, "kl": 0.3048421382904053, "learning_rate": 2.3233082706766916e-06, "loss": 0.0402, "num_tokens": 65064777.0, "reward": -1.672779655456543, "reward_std": 7.417824840545654, "rewards/get_chromagram_reward": 0.6289528369903564, "rewards/get_chromagram_reward_std": 0.11590608209371567, "rewards/get_intelligibility_reward": -5.623922848701477, "rewards/get_intelligibility_reward_std": 11.883197689056397, "rewards/get_target_len_reward": -0.023368793446570633, "rewards/get_target_len_reward_std": 0.08596869017928839, "step": 7120 }, { "advantages": -5.041558601703855e-08, "advantages_std": 1.5343198895454406, "clip_ratio": 0.0, "completion_length": 87.90774078369141, "epoch": 5.362406015037594, "grad_norm": 10.25, "kl": 0.40592711269855497, "learning_rate": 2.3195488721804513e-06, "loss": 0.0454, "num_tokens": 65373399.0, "reward": -1.4265998385846614, "reward_std": 6.847759199142456, "rewards/get_chromagram_reward": 0.6110140025615692, "rewards/get_chromagram_reward_std": 0.11806119754910469, "rewards/get_intelligibility_reward": -4.8684638172388075, "rewards/get_intelligibility_reward_std": 10.972066974639892, "rewards/get_target_len_reward": -0.022349441517144443, "rewards/get_target_len_reward_std": 0.07365586645901204, "step": 7130 }, { "advantages": -4.6330194436450256e-07, "advantages_std": 1.4813837170600892, "clip_ratio": 0.0, "completion_length": 88.28869400024413, "epoch": 5.369924812030075, "grad_norm": 5.125, "kl": 0.3149084284901619, "learning_rate": 2.3157894736842105e-06, "loss": 0.0357, "num_tokens": 65684331.0, "reward": -1.166980442777276, "reward_std": 6.096787214279175, "rewards/get_chromagram_reward": 0.6261518657207489, "rewards/get_chromagram_reward_std": 0.10937800630927086, "rewards/get_intelligibility_reward": -4.104114997386932, "rewards/get_intelligibility_reward_std": 9.773707628250122, "rewards/get_target_len_reward": -0.02297802213579416, "rewards/get_target_len_reward_std": 0.07116317190229893, "step": 7140 }, { "advantages": -4.321336177781632e-08, "advantages_std": 1.5572047114372254, "clip_ratio": 0.0, "completion_length": 85.3553581237793, "epoch": 5.377443609022556, "grad_norm": 5.90625, "kl": 0.3070035442709923, "learning_rate": 2.31203007518797e-06, "loss": 0.0399, "num_tokens": 65985864.0, "reward": -1.5421805202960968, "reward_std": 6.477402973175049, "rewards/get_chromagram_reward": 0.6324486792087555, "rewards/get_chromagram_reward_std": 0.11431118622422218, "rewards/get_intelligibility_reward": -5.233871936798096, "rewards/get_intelligibility_reward_std": 10.193256998062134, "rewards/get_target_len_reward": -0.025118078384548426, "rewards/get_target_len_reward_std": 0.07251648269593716, "step": 7150 }, { "advantages": 3.0522546214939437e-07, "advantages_std": 1.5489553928375244, "clip_ratio": 0.0, "completion_length": 88.06726303100587, "epoch": 5.384962406015037, "grad_norm": 11.8125, "kl": 0.32657683938741683, "learning_rate": 2.3082706766917294e-06, "loss": 0.037, "num_tokens": 66296128.0, "reward": -1.4618814080953597, "reward_std": 6.870611715316772, "rewards/get_chromagram_reward": 0.6149131417274475, "rewards/get_chromagram_reward_std": 0.11258464604616165, "rewards/get_intelligibility_reward": -4.979121434688568, "rewards/get_intelligibility_reward_std": 10.994513130187988, "rewards/get_target_len_reward": -0.021435728576034308, "rewards/get_target_len_reward_std": 0.07056615706533194, "step": 7160 }, { "advantages": 3.5663447448541776e-07, "advantages_std": 1.491836416721344, "clip_ratio": 0.0, "completion_length": 86.39881134033203, "epoch": 5.392481203007518, "grad_norm": 98.0, "kl": 0.5265922620892525, "learning_rate": 2.304511278195489e-06, "loss": 0.0578, "num_tokens": 66601121.0, "reward": -1.5936202168464662, "reward_std": 7.05308780670166, "rewards/get_chromagram_reward": 0.6205924928188324, "rewards/get_chromagram_reward_std": 0.11182191222906113, "rewards/get_intelligibility_reward": -5.383054161071778, "rewards/get_intelligibility_reward_std": 11.244775199890137, "rewards/get_target_len_reward": -0.018398610036820175, "rewards/get_target_len_reward_std": 0.04998060278594494, "step": 7170 }, { "advantages": -2.58783499385018e-07, "advantages_std": 1.5922078490257263, "clip_ratio": 0.0, "completion_length": 85.5148826599121, "epoch": 5.4, "grad_norm": 290.0, "kl": 0.36700445264577863, "learning_rate": 2.3007518796992482e-06, "loss": 0.0381, "num_tokens": 66903910.0, "reward": -1.4047640979290008, "reward_std": 6.410592555999756, "rewards/get_chromagram_reward": 0.6141754031181336, "rewards/get_chromagram_reward_std": 0.11500288918614388, "rewards/get_intelligibility_reward": -4.811613512039185, "rewards/get_intelligibility_reward_std": 10.22232813835144, "rewards/get_target_len_reward": -0.01685400800779462, "rewards/get_target_len_reward_std": 0.051270670257508755, "step": 7180 }, { "advantages": -3.978610166655017e-07, "advantages_std": 1.548390531539917, "clip_ratio": 0.0, "completion_length": 91.05595321655274, "epoch": 5.407518796992481, "grad_norm": 5.3125, "kl": 0.34714345484972, "learning_rate": 2.296992481203008e-06, "loss": 0.0371, "num_tokens": 67222290.0, "reward": -1.0444933593273162, "reward_std": 6.631856918334961, "rewards/get_chromagram_reward": 0.6159287035465241, "rewards/get_chromagram_reward_std": 0.10580892786383629, "rewards/get_intelligibility_reward": -3.72859765291214, "rewards/get_intelligibility_reward_std": 10.923980712890625, "rewards/get_target_len_reward": -0.02081095390021801, "rewards/get_target_len_reward_std": 0.06703416649252177, "step": 7190 }, { "advantages": 5.582968611861361e-07, "advantages_std": 1.5962399005889893, "clip_ratio": 0.0, "completion_length": 86.19881057739258, "epoch": 5.415037593984962, "grad_norm": 8.8125, "kl": 0.344843378663063, "learning_rate": 2.293233082706767e-06, "loss": 0.0378, "num_tokens": 67526984.0, "reward": -1.3031162723898888, "reward_std": 6.6909068584442135, "rewards/get_chromagram_reward": 0.6307554066181182, "rewards/get_chromagram_reward_std": 0.1207100123167038, "rewards/get_intelligibility_reward": -4.517148065567016, "rewards/get_intelligibility_reward_std": 10.817517948150634, "rewards/get_target_len_reward": -0.022955980710685255, "rewards/get_target_len_reward_std": 0.05329591147601605, "step": 7200 }, { "advantages": 6.263454807253765e-07, "advantages_std": 1.6104538202285767, "clip_ratio": 0.0, "completion_length": 85.74107284545899, "epoch": 5.4225563909774435, "grad_norm": 36.25, "kl": 0.36100142300128935, "learning_rate": 2.2894736842105263e-06, "loss": 0.0402, "num_tokens": 67829982.0, "reward": -1.5839881598949432, "reward_std": 6.86516432762146, "rewards/get_chromagram_reward": 0.6199360251426697, "rewards/get_chromagram_reward_std": 0.1123465433716774, "rewards/get_intelligibility_reward": -5.348700094223022, "rewards/get_intelligibility_reward_std": 10.803610897064209, "rewards/get_target_len_reward": -0.023200150951743125, "rewards/get_target_len_reward_std": 0.07127108946442604, "step": 7210 }, { "advantages": -1.9644698063814302e-07, "advantages_std": 1.6620344519615173, "clip_ratio": 0.0, "completion_length": 84.13869171142578, "epoch": 5.4300751879699245, "grad_norm": 7.0, "kl": 0.33577401787042616, "learning_rate": 2.285714285714286e-06, "loss": 0.037, "num_tokens": 68129217.0, "reward": -1.6259113669395446, "reward_std": 6.991596651077271, "rewards/get_chromagram_reward": 0.6219640374183655, "rewards/get_chromagram_reward_std": 0.11431905999779701, "rewards/get_intelligibility_reward": -5.476862597465515, "rewards/get_intelligibility_reward_std": 11.013343715667725, "rewards/get_target_len_reward": -0.0228353314101696, "rewards/get_target_len_reward_std": 0.062448183074593544, "step": 7220 }, { "advantages": 5.846222336458595e-07, "advantages_std": 1.5011724472045898, "clip_ratio": 0.0, "completion_length": 88.38809585571289, "epoch": 5.437593984962406, "grad_norm": 7.34375, "kl": 0.2984209552407265, "learning_rate": 2.281954887218045e-06, "loss": 0.0347, "num_tokens": 68439441.0, "reward": -1.5354242980480195, "reward_std": 7.008116817474365, "rewards/get_chromagram_reward": 0.602736109495163, "rewards/get_chromagram_reward_std": 0.10836800113320351, "rewards/get_intelligibility_reward": -5.19312492609024, "rewards/get_intelligibility_reward_std": 11.163818836212158, "rewards/get_target_len_reward": -0.015883707161992788, "rewards/get_target_len_reward_std": 0.056575870141386986, "step": 7230 }, { "advantages": -1.2690824462424645e-07, "advantages_std": 1.5233567714691163, "clip_ratio": 0.0, "completion_length": 86.97321548461915, "epoch": 5.4451127819548875, "grad_norm": 16.75, "kl": 1.1598840221762656, "learning_rate": 2.278195488721805e-06, "loss": 0.1181, "num_tokens": 68746932.0, "reward": -1.5316576719284059, "reward_std": 7.266022396087647, "rewards/get_chromagram_reward": 0.6144776403903961, "rewards/get_chromagram_reward_std": 0.1082504540681839, "rewards/get_intelligibility_reward": -5.187613144516945, "rewards/get_intelligibility_reward_std": 11.530435466766358, "rewards/get_target_len_reward": -0.021837185509502886, "rewards/get_target_len_reward_std": 0.06608111709356308, "step": 7240 }, { "advantages": -7.033348182972077e-07, "advantages_std": 1.6308774948120117, "clip_ratio": 0.0, "completion_length": 85.37381057739258, "epoch": 5.4526315789473685, "grad_norm": 7.03125, "kl": 0.33484105467796327, "learning_rate": 2.274436090225564e-06, "loss": 0.0373, "num_tokens": 69049029.0, "reward": -1.5764613687992095, "reward_std": 6.747067260742187, "rewards/get_chromagram_reward": 0.6007543444633484, "rewards/get_chromagram_reward_std": 0.11851666420698166, "rewards/get_intelligibility_reward": -5.304040777683258, "rewards/get_intelligibility_reward_std": 10.560655975341797, "rewards/get_target_len_reward": -0.026097355782985686, "rewards/get_target_len_reward_std": 0.08660393953323364, "step": 7250 }, { "advantages": -4.818043635168578e-08, "advantages_std": 1.5586305379867553, "clip_ratio": 0.0, "completion_length": 86.24881286621094, "epoch": 5.4601503759398495, "grad_norm": 7.125, "kl": 0.3909894391894341, "learning_rate": 2.2706766917293237e-06, "loss": 0.0478, "num_tokens": 69354051.0, "reward": -1.437357211112976, "reward_std": 6.97461724281311, "rewards/get_chromagram_reward": 0.6212467789649964, "rewards/get_chromagram_reward_std": 0.10954332649707794, "rewards/get_intelligibility_reward": -4.909408502280712, "rewards/get_intelligibility_reward_std": 11.192184829711914, "rewards/get_target_len_reward": -0.02390976846218109, "rewards/get_target_len_reward_std": 0.07783832289278507, "step": 7260 }, { "advantages": -1.3927619193054852e-06, "advantages_std": 1.5793645858764649, "clip_ratio": 0.0, "completion_length": 86.78274078369141, "epoch": 5.467669172932331, "grad_norm": 11.625, "kl": 0.5376500964164734, "learning_rate": 2.266917293233083e-06, "loss": 0.0585, "num_tokens": 69659701.0, "reward": -1.7209204077720641, "reward_std": 6.849113702774048, "rewards/get_chromagram_reward": 0.6152911186218262, "rewards/get_chromagram_reward_std": 0.11797176897525788, "rewards/get_intelligibility_reward": -5.754384231567383, "rewards/get_intelligibility_reward_std": 10.6309889793396, "rewards/get_target_len_reward": -0.02366781998425722, "rewards/get_target_len_reward_std": 0.06547661423683167, "step": 7270 }, { "advantages": -4.146248176795098e-07, "advantages_std": 1.5919759631156922, "clip_ratio": 0.0, "completion_length": 89.48750228881836, "epoch": 5.4751879699248125, "grad_norm": 8.0625, "kl": 0.3412448182702065, "learning_rate": 2.2631578947368426e-06, "loss": 0.0408, "num_tokens": 69973447.0, "reward": -1.612433785200119, "reward_std": 7.330462169647217, "rewards/get_chromagram_reward": 0.6047484815120697, "rewards/get_chromagram_reward_std": 0.11287015751004219, "rewards/get_intelligibility_reward": -5.421263241767884, "rewards/get_intelligibility_reward_std": 11.748994159698487, "rewards/get_target_len_reward": -0.020786524470895528, "rewards/get_target_len_reward_std": 0.07320697046816349, "step": 7280 }, { "advantages": 1.225620582800957e-07, "advantages_std": 1.5420305848121643, "clip_ratio": 0.0, "completion_length": 84.21488189697266, "epoch": 5.4827067669172935, "grad_norm": 5.875, "kl": 11.998553581535816, "learning_rate": 2.259398496240602e-06, "loss": 1.2037, "num_tokens": 70273466.0, "reward": -1.5845581710338592, "reward_std": 6.450918436050415, "rewards/get_chromagram_reward": 0.6186227262020111, "rewards/get_chromagram_reward_std": 0.1141671285033226, "rewards/get_intelligibility_reward": -5.353073540329933, "rewards/get_intelligibility_reward_std": 10.01052770614624, "rewards/get_target_len_reward": -0.019223571103066207, "rewards/get_target_len_reward_std": 0.054246239550411704, "step": 7290 }, { "advantages": -2.4288893314405867e-07, "advantages_std": 1.6326185584068298, "clip_ratio": 0.0, "completion_length": 89.59404983520508, "epoch": 5.490225563909775, "grad_norm": 6.53125, "kl": 0.8095985978841782, "learning_rate": 2.255639097744361e-06, "loss": 0.0826, "num_tokens": 70588453.0, "reward": -1.3116859912872314, "reward_std": 6.901690149307251, "rewards/get_chromagram_reward": 0.6312417924404145, "rewards/get_chromagram_reward_std": 0.11385724022984504, "rewards/get_intelligibility_reward": -4.543382370471955, "rewards/get_intelligibility_reward_std": 11.238386249542236, "rewards/get_target_len_reward": -0.022917152382433414, "rewards/get_target_len_reward_std": 0.05599991828203201, "step": 7300 }, { "advantages": -1.1771916632596913e-07, "advantages_std": 1.5935291290283202, "clip_ratio": 0.0, "completion_length": 84.82916870117188, "epoch": 5.497744360902256, "grad_norm": 8.5, "kl": 0.3385091483592987, "learning_rate": 2.2518796992481202e-06, "loss": 0.0437, "num_tokens": 70888841.0, "reward": -1.810219794511795, "reward_std": 6.882726764678955, "rewards/get_chromagram_reward": 0.6040708005428315, "rewards/get_chromagram_reward_std": 0.11017877012491226, "rewards/get_intelligibility_reward": -6.00965039730072, "rewards/get_intelligibility_reward_std": 10.694256210327149, "rewards/get_target_len_reward": -0.02507947999984026, "rewards/get_target_len_reward_std": 0.07585760038346052, "step": 7310 }, { "advantages": -2.9454629952851973e-07, "advantages_std": 1.7237526535987855, "clip_ratio": 0.0, "completion_length": 87.3398826599121, "epoch": 5.505263157894737, "grad_norm": 6.875, "kl": 0.2888296276330948, "learning_rate": 2.24812030075188e-06, "loss": 0.0298, "num_tokens": 71197368.0, "reward": -1.3548449575901031, "reward_std": 7.186491394042969, "rewards/get_chromagram_reward": 0.6183264970779419, "rewards/get_chromagram_reward_std": 0.11901157423853874, "rewards/get_intelligibility_reward": -4.6635973930358885, "rewards/get_intelligibility_reward_std": 11.709501457214355, "rewards/get_target_len_reward": -0.01926371455192566, "rewards/get_target_len_reward_std": 0.04918394237756729, "step": 7320 }, { "advantages": -2.2997459652174258e-07, "advantages_std": 1.6268801808357238, "clip_ratio": 0.0, "completion_length": 91.177978515625, "epoch": 5.512781954887218, "grad_norm": 9.0, "kl": 0.3528441786766052, "learning_rate": 2.244360902255639e-06, "loss": 0.0388, "num_tokens": 71516401.0, "reward": -1.370145285129547, "reward_std": 6.830450391769409, "rewards/get_chromagram_reward": 0.6379683613777161, "rewards/get_chromagram_reward_std": 0.11868164539337159, "rewards/get_intelligibility_reward": -4.725983762741089, "rewards/get_intelligibility_reward_std": 11.004422569274903, "rewards/get_target_len_reward": -0.022420282661914825, "rewards/get_target_len_reward_std": 0.0538643242791295, "step": 7330 }, { "advantages": -5.488596244163091e-08, "advantages_std": 1.6177343845367431, "clip_ratio": 0.0, "completion_length": 83.06428680419921, "epoch": 5.5203007518797, "grad_norm": 5.25, "kl": 0.3498178094625473, "learning_rate": 2.2406015037593987e-06, "loss": 0.0417, "num_tokens": 71812380.0, "reward": -1.207906161621213, "reward_std": 6.610358667373657, "rewards/get_chromagram_reward": 0.6290356993675232, "rewards/get_chromagram_reward_std": 0.10410335063934326, "rewards/get_intelligibility_reward": -4.230634343624115, "rewards/get_intelligibility_reward_std": 10.764556217193604, "rewards/get_target_len_reward": -0.02211959520354867, "rewards/get_target_len_reward_std": 0.06520765721797943, "step": 7340 }, { "advantages": -8.321057112681273e-07, "advantages_std": 1.5614672541618346, "clip_ratio": 0.0, "completion_length": 86.23631057739257, "epoch": 5.527819548872181, "grad_norm": 6.5625, "kl": 0.2928524002432823, "learning_rate": 2.236842105263158e-06, "loss": 0.0386, "num_tokens": 72117651.0, "reward": -1.6458905786275864, "reward_std": 7.037843132019043, "rewards/get_chromagram_reward": 0.6171200931072235, "rewards/get_chromagram_reward_std": 0.10211833268404007, "rewards/get_intelligibility_reward": -5.5341644287109375, "rewards/get_intelligibility_reward_std": 11.144237804412843, "rewards/get_target_len_reward": -0.020627187751233577, "rewards/get_target_len_reward_std": 0.06789864487946033, "step": 7350 }, { "advantages": -6.174047882723244e-07, "advantages_std": 1.5940613746643066, "clip_ratio": 0.0, "completion_length": 87.6255973815918, "epoch": 5.535338345864662, "grad_norm": 18.75, "kl": 0.34271004796028137, "learning_rate": 2.2330827067669176e-06, "loss": 0.037, "num_tokens": 72425365.0, "reward": -1.725543212890625, "reward_std": 6.623648262023925, "rewards/get_chromagram_reward": 0.6141940057277679, "rewards/get_chromagram_reward_std": 0.11702094674110412, "rewards/get_intelligibility_reward": -5.767955183982849, "rewards/get_intelligibility_reward_std": 10.295107078552245, "rewards/get_target_len_reward": -0.022868365794420243, "rewards/get_target_len_reward_std": 0.06632985081523657, "step": 7360 }, { "advantages": 1.5397867496602658e-07, "advantages_std": 1.4260494589805603, "clip_ratio": 0.0, "completion_length": 86.57143096923828, "epoch": 5.542857142857143, "grad_norm": 4.75, "kl": 0.3642714560031891, "learning_rate": 2.229323308270677e-06, "loss": 0.0418, "num_tokens": 72730793.0, "reward": -1.752213227748871, "reward_std": 6.929638338088989, "rewards/get_chromagram_reward": 0.6075048983097077, "rewards/get_chromagram_reward_std": 0.11558273807168007, "rewards/get_intelligibility_reward": -5.8434700012207035, "rewards/get_intelligibility_reward_std": 10.849376010894776, "rewards/get_target_len_reward": -0.020674252323806284, "rewards/get_target_len_reward_std": 0.06688700504601001, "step": 7370 }, { "advantages": -1.380840899400937e-07, "advantages_std": 1.7065526127815247, "clip_ratio": 0.0, "completion_length": 82.55714416503906, "epoch": 5.550375939849624, "grad_norm": 77.5, "kl": 0.3779150277376175, "learning_rate": 2.2255639097744365e-06, "loss": 0.0513, "num_tokens": 73025159.0, "reward": -1.7196123540401458, "reward_std": 7.126970195770264, "rewards/get_chromagram_reward": 0.623649537563324, "rewards/get_chromagram_reward_std": 0.11368174850940704, "rewards/get_intelligibility_reward": -5.756095004081726, "rewards/get_intelligibility_reward_std": 11.227530765533448, "rewards/get_target_len_reward": -0.026391300559043884, "rewards/get_target_len_reward_std": 0.08802502788603306, "step": 7380 }, { "advantages": 5.04155968883424e-07, "advantages_std": 1.6110849261283875, "clip_ratio": 0.0, "completion_length": 89.42797775268555, "epoch": 5.557894736842105, "grad_norm": 5.78125, "kl": 0.3004383772611618, "learning_rate": 2.2218045112781957e-06, "loss": 0.0302, "num_tokens": 73339524.0, "reward": -1.140392405539751, "reward_std": 6.8683192253112795, "rewards/get_chromagram_reward": 0.6287994384765625, "rewards/get_chromagram_reward_std": 0.10455321967601776, "rewards/get_intelligibility_reward": -4.034494996070862, "rewards/get_intelligibility_reward_std": 11.122114038467407, "rewards/get_target_len_reward": -0.015481433924287557, "rewards/get_target_len_reward_std": 0.04037857819348574, "step": 7390 }, { "advantages": 3.5812459326933775e-07, "advantages_std": 1.6297937989234925, "clip_ratio": 0.0, "completion_length": 88.87916793823243, "epoch": 5.565413533834587, "grad_norm": 13.25, "kl": 0.30461192429065703, "learning_rate": 2.218045112781955e-06, "loss": 0.0372, "num_tokens": 73652091.0, "reward": -1.1990951776504517, "reward_std": 6.741983842849732, "rewards/get_chromagram_reward": 0.6169572472572327, "rewards/get_chromagram_reward_std": 0.1067502036690712, "rewards/get_intelligibility_reward": -4.190165710449219, "rewards/get_intelligibility_reward_std": 10.995736217498779, "rewards/get_target_len_reward": -0.02407686710357666, "rewards/get_target_len_reward_std": 0.07429735269397497, "step": 7400 }, { "advantages": 1.0232131586462856e-07, "advantages_std": 1.5955123424530029, "clip_ratio": 0.0, "completion_length": 87.42024078369141, "epoch": 5.572932330827068, "grad_norm": 8.9375, "kl": 77.94752575904131, "learning_rate": 2.2142857142857146e-06, "loss": 7.7973, "num_tokens": 73959565.0, "reward": -1.600351732969284, "reward_std": 6.964174509048462, "rewards/get_chromagram_reward": 0.6232900559902191, "rewards/get_chromagram_reward_std": 0.11058256328105927, "rewards/get_intelligibility_reward": -5.403542852401733, "rewards/get_intelligibility_reward_std": 11.038510799407959, "rewards/get_target_len_reward": -0.020802028104662897, "rewards/get_target_len_reward_std": 0.05858626961708069, "step": 7410 }, { "advantages": 1.9123157102285405e-08, "advantages_std": 1.5321441888809204, "clip_ratio": 0.0, "completion_length": 91.97440795898437, "epoch": 5.580451127819549, "grad_norm": 4.78125, "kl": 0.3133543863892555, "learning_rate": 2.2105263157894738e-06, "loss": 0.0342, "num_tokens": 74279659.0, "reward": -1.511323982477188, "reward_std": 6.717669296264648, "rewards/get_chromagram_reward": 0.6166324973106384, "rewards/get_chromagram_reward_std": 0.10233291685581207, "rewards/get_intelligibility_reward": -5.131410145759583, "rewards/get_intelligibility_reward_std": 10.667114448547363, "rewards/get_target_len_reward": -0.019194147270172834, "rewards/get_target_len_reward_std": 0.055062121339142324, "step": 7420 }, { "advantages": -6.141762213474067e-07, "advantages_std": 1.5866358041763307, "clip_ratio": 0.0, "completion_length": 85.48631057739257, "epoch": 5.58796992481203, "grad_norm": 9.125, "kl": 0.5132203131914139, "learning_rate": 2.2067669172932334e-06, "loss": 0.0522, "num_tokens": 74582539.0, "reward": -1.4089649975299836, "reward_std": 6.644739103317261, "rewards/get_chromagram_reward": 0.6317284643650055, "rewards/get_chromagram_reward_std": 0.10995041355490684, "rewards/get_intelligibility_reward": -4.83715957403183, "rewards/get_intelligibility_reward_std": 10.636877918243409, "rewards/get_target_len_reward": -0.0214636730030179, "rewards/get_target_len_reward_std": 0.06028429102152586, "step": 7430 }, { "advantages": 1.8378098758375928e-07, "advantages_std": 1.5107809066772462, "clip_ratio": 0.0, "completion_length": 84.70178756713867, "epoch": 5.595488721804511, "grad_norm": 16.75, "kl": 0.9980653643608093, "learning_rate": 2.2030075187969927e-06, "loss": 0.1048, "num_tokens": 74882065.0, "reward": -1.8240859806537628, "reward_std": 6.827915096282959, "rewards/get_chromagram_reward": 0.6137704908847809, "rewards/get_chromagram_reward_std": 0.11075319945812226, "rewards/get_intelligibility_reward": -6.064216899871826, "rewards/get_intelligibility_reward_std": 10.448147773742676, "rewards/get_target_len_reward": -0.021811048593372108, "rewards/get_target_len_reward_std": 0.06830717157572508, "step": 7440 }, { "advantages": 3.5017728805541994e-08, "advantages_std": 1.6366748571395875, "clip_ratio": 0.0, "completion_length": 84.80952453613281, "epoch": 5.603007518796993, "grad_norm": 6.46875, "kl": 0.3172616258263588, "learning_rate": 2.199248120300752e-06, "loss": 0.0363, "num_tokens": 75183463.0, "reward": -1.5454985558986665, "reward_std": 6.658914279937744, "rewards/get_chromagram_reward": 0.6241901516914368, "rewards/get_chromagram_reward_std": 0.11918274387717247, "rewards/get_intelligibility_reward": -5.237881135940552, "rewards/get_intelligibility_reward_std": 10.563720417022704, "rewards/get_target_len_reward": -0.02280454756692052, "rewards/get_target_len_reward_std": 0.06878137122839689, "step": 7450 }, { "advantages": 3.047287538748833e-07, "advantages_std": 1.5836209535598755, "clip_ratio": 0.0, "completion_length": 85.54285736083985, "epoch": 5.610526315789474, "grad_norm": 7.1875, "kl": 0.3748527690768242, "learning_rate": 2.1954887218045115e-06, "loss": 0.0372, "num_tokens": 75486376.0, "reward": -1.5894612610340118, "reward_std": 7.040074014663697, "rewards/get_chromagram_reward": 0.6304149627685547, "rewards/get_chromagram_reward_std": 0.11504409015178681, "rewards/get_intelligibility_reward": -5.378788644075394, "rewards/get_intelligibility_reward_std": 11.16924238204956, "rewards/get_target_len_reward": -0.020009812247008086, "rewards/get_target_len_reward_std": 0.047809756547212603, "step": 7460 }, { "advantages": -1.231829600101264e-07, "advantages_std": 1.5577387928962707, "clip_ratio": 0.0, "completion_length": 87.06845474243164, "epoch": 5.618045112781955, "grad_norm": 9.25, "kl": 0.3719530820846558, "learning_rate": 2.1917293233082707e-06, "loss": 0.036, "num_tokens": 75793227.0, "reward": -1.6739049434661866, "reward_std": 7.10861234664917, "rewards/get_chromagram_reward": 0.6131251990795136, "rewards/get_chromagram_reward_std": 0.1312383234500885, "rewards/get_intelligibility_reward": -5.615152812004089, "rewards/get_intelligibility_reward_std": 11.296269989013672, "rewards/get_target_len_reward": -0.01968686729669571, "rewards/get_target_len_reward_std": 0.0534376522526145, "step": 7470 }, { "advantages": 3.082056977632419e-07, "advantages_std": 1.5980460882186889, "clip_ratio": 0.0, "completion_length": 88.05476379394531, "epoch": 5.625563909774436, "grad_norm": 22.375, "kl": 0.6268338203430176, "learning_rate": 2.1879699248120304e-06, "loss": 0.0698, "num_tokens": 76102876.0, "reward": -1.3034660577774049, "reward_std": 6.318447589874268, "rewards/get_chromagram_reward": 0.6268084466457366, "rewards/get_chromagram_reward_std": 0.10592088475823402, "rewards/get_intelligibility_reward": -4.514228749275207, "rewards/get_intelligibility_reward_std": 10.15007405281067, "rewards/get_target_len_reward": -0.022977558616548776, "rewards/get_target_len_reward_std": 0.06933909989893436, "step": 7480 }, { "advantages": -5.220373708425541e-07, "advantages_std": 1.5136359333992004, "clip_ratio": 0.0, "completion_length": 87.81428680419921, "epoch": 5.633082706766917, "grad_norm": 64.0, "kl": 0.34380061328411105, "learning_rate": 2.1842105263157896e-06, "loss": 0.0433, "num_tokens": 76412451.0, "reward": -1.3146987706422806, "reward_std": 6.69021692276001, "rewards/get_chromagram_reward": 0.6146059095859527, "rewards/get_chromagram_reward_std": 0.11763279736042023, "rewards/get_intelligibility_reward": -4.534822654724121, "rewards/get_intelligibility_reward_std": 10.621045303344726, "rewards/get_target_len_reward": -0.02387933572754264, "rewards/get_target_len_reward_std": 0.06662974283099174, "step": 7490 }, { "advantages": -2.632538624425251e-07, "advantages_std": 1.453635001182556, "clip_ratio": 0.0, "completion_length": 85.31369323730469, "epoch": 5.640601503759399, "grad_norm": 6.65625, "kl": 0.3900526463985443, "learning_rate": 2.180451127819549e-06, "loss": 0.0464, "num_tokens": 76714207.0, "reward": -1.7231360912322997, "reward_std": 6.901036357879638, "rewards/get_chromagram_reward": 0.6194710373878479, "rewards/get_chromagram_reward_std": 0.11405050083994865, "rewards/get_intelligibility_reward": -5.76176085472107, "rewards/get_intelligibility_reward_std": 10.817874336242676, "rewards/get_target_len_reward": -0.02711833519861102, "rewards/get_target_len_reward_std": 0.0795399196445942, "step": 7500 }, { "advantages": -2.1532178209326958e-07, "advantages_std": 1.5806753516197205, "clip_ratio": 0.0, "completion_length": 88.37857284545899, "epoch": 5.64812030075188, "grad_norm": 58.0, "kl": 0.3461156725883484, "learning_rate": 2.1766917293233085e-06, "loss": 0.0375, "num_tokens": 77024921.0, "reward": -1.7006117105484009, "reward_std": 7.299945545196533, "rewards/get_chromagram_reward": 0.6067075133323669, "rewards/get_chromagram_reward_std": 0.116612958163023, "rewards/get_intelligibility_reward": -5.686506867408752, "rewards/get_intelligibility_reward_std": 11.537493228912354, "rewards/get_target_len_reward": -0.02203559260815382, "rewards/get_target_len_reward_std": 0.06246333085000515, "step": 7510 }, { "advantages": -1.5459956159702415e-07, "advantages_std": 1.616200816631317, "clip_ratio": 0.0, "completion_length": 90.30774002075195, "epoch": 5.655639097744361, "grad_norm": 5.84375, "kl": 0.3066115379333496, "learning_rate": 2.1729323308270677e-06, "loss": 0.0327, "num_tokens": 77340767.0, "reward": -1.258822149783373, "reward_std": 6.4773036479949955, "rewards/get_chromagram_reward": 0.6259684383869171, "rewards/get_chromagram_reward_std": 0.12014298290014266, "rewards/get_intelligibility_reward": -4.385247963666916, "rewards/get_intelligibility_reward_std": 10.4630756855011, "rewards/get_target_len_reward": -0.01718673426657915, "rewards/get_target_len_reward_std": 0.048602374456822875, "step": 7520 }, { "advantages": -1.3783579788650967e-08, "advantages_std": 1.5101877331733704, "clip_ratio": 0.0, "completion_length": 85.07559814453126, "epoch": 5.663157894736842, "grad_norm": 153.0, "kl": 0.3391159653663635, "learning_rate": 2.1691729323308273e-06, "loss": 0.0357, "num_tokens": 77642099.0, "reward": -1.0761063262820243, "reward_std": 6.302792119979858, "rewards/get_chromagram_reward": 0.6141286730766297, "rewards/get_chromagram_reward_std": 0.12359654754400254, "rewards/get_intelligibility_reward": -3.8235466867685317, "rewards/get_intelligibility_reward_std": 10.235271453857422, "rewards/get_target_len_reward": -0.018900788482278587, "rewards/get_target_len_reward_std": 0.05066053103655577, "step": 7530 }, { "advantages": -2.648681522998686e-07, "advantages_std": 1.566270637512207, "clip_ratio": 0.0, "completion_length": 89.32321624755859, "epoch": 5.670676691729323, "grad_norm": 6.46875, "kl": 0.3445367142558098, "learning_rate": 2.1654135338345866e-06, "loss": 0.039, "num_tokens": 77955382.0, "reward": -1.1872239589691163, "reward_std": 6.858507299423218, "rewards/get_chromagram_reward": 0.6298686146736145, "rewards/get_chromagram_reward_std": 0.11859809085726739, "rewards/get_intelligibility_reward": -4.16928243637085, "rewards/get_intelligibility_reward_std": 11.18128228187561, "rewards/get_target_len_reward": -0.022257814556360243, "rewards/get_target_len_reward_std": 0.059147943183779715, "step": 7540 }, { "advantages": -2.829978953400314e-07, "advantages_std": 1.4890665531158447, "clip_ratio": 0.0, "completion_length": 86.4071434020996, "epoch": 5.678195488721805, "grad_norm": 7.3125, "kl": 0.3267993301153183, "learning_rate": 2.161654135338346e-06, "loss": 0.0369, "num_tokens": 78260685.0, "reward": -1.3021728478372097, "reward_std": 6.207171201705933, "rewards/get_chromagram_reward": 0.6221062183380127, "rewards/get_chromagram_reward_std": 0.12322953790426254, "rewards/get_intelligibility_reward": -4.508913117647171, "rewards/get_intelligibility_reward_std": 9.848860836029052, "rewards/get_target_len_reward": -0.01971148233860731, "rewards/get_target_len_reward_std": 0.05028697308152914, "step": 7550 }, { "advantages": 2.8337041442227927e-07, "advantages_std": 1.5456938207149507, "clip_ratio": 0.0, "completion_length": 89.8357162475586, "epoch": 5.685714285714286, "grad_norm": 7.71875, "kl": 0.37217641323804856, "learning_rate": 2.1578947368421054e-06, "loss": 0.0419, "num_tokens": 78575011.0, "reward": -1.1617390155792235, "reward_std": 6.960851192474365, "rewards/get_chromagram_reward": 0.6310091912746429, "rewards/get_chromagram_reward_std": 0.11414845660328865, "rewards/get_intelligibility_reward": -4.094545310735702, "rewards/get_intelligibility_reward_std": 11.278204441070557, "rewards/get_target_len_reward": -0.021680734027177094, "rewards/get_target_len_reward_std": 0.06393090002238751, "step": 7560 }, { "advantages": -1.9644698765475254e-07, "advantages_std": 1.4756534457206727, "clip_ratio": 0.0, "completion_length": 86.86726379394531, "epoch": 5.693233082706767, "grad_norm": 7.1875, "kl": 0.420529405772686, "learning_rate": 2.154135338345865e-06, "loss": 0.0466, "num_tokens": 78882044.0, "reward": -1.2242244243621827, "reward_std": 6.362412405014038, "rewards/get_chromagram_reward": 0.6175295114517212, "rewards/get_chromagram_reward_std": 0.11478937491774559, "rewards/get_intelligibility_reward": -4.266911506652832, "rewards/get_intelligibility_reward_std": 10.337289953231812, "rewards/get_target_len_reward": -0.02329086307436228, "rewards/get_target_len_reward_std": 0.07511311620473862, "step": 7570 }, { "advantages": -4.892548970403254e-08, "advantages_std": 1.5039716720581056, "clip_ratio": 0.0, "completion_length": 86.2928596496582, "epoch": 5.700751879699248, "grad_norm": 5.6875, "kl": 0.421612012386322, "learning_rate": 2.1503759398496243e-06, "loss": 0.044, "num_tokens": 79186669.0, "reward": -1.5858042895793916, "reward_std": 7.348571300506592, "rewards/get_chromagram_reward": 0.6164960920810699, "rewards/get_chromagram_reward_std": 0.11954497992992401, "rewards/get_intelligibility_reward": -5.3503889560699465, "rewards/get_intelligibility_reward_std": 11.802607250213622, "rewards/get_target_len_reward": -0.02351978179067373, "rewards/get_target_len_reward_std": 0.07501283418387175, "step": 7580 }, { "advantages": 4.142522826100503e-07, "advantages_std": 1.5708836436271667, "clip_ratio": 0.0, "completion_length": 85.72381057739258, "epoch": 5.708270676691729, "grad_norm": 5.21875, "kl": 0.8034768372774124, "learning_rate": 2.146616541353384e-06, "loss": 0.087, "num_tokens": 79490048.0, "reward": -1.3070945113897323, "reward_std": 6.501582670211792, "rewards/get_chromagram_reward": 0.6361547887325287, "rewards/get_chromagram_reward_std": 0.11452390253543854, "rewards/get_intelligibility_reward": -4.532324576377869, "rewards/get_intelligibility_reward_std": 10.39711618423462, "rewards/get_target_len_reward": -0.025113471318036318, "rewards/get_target_len_reward_std": 0.06926766522228718, "step": 7590 }, { "advantages": 5.488594325697704e-08, "advantages_std": 1.631120765209198, "clip_ratio": 0.0, "completion_length": 87.38928756713867, "epoch": 5.715789473684211, "grad_norm": 6.28125, "kl": 0.372196751832962, "learning_rate": 2.1428571428571427e-06, "loss": 0.0431, "num_tokens": 79798145.0, "reward": -1.4786714985966682, "reward_std": 6.542671346664429, "rewards/get_chromagram_reward": 0.6265040755271911, "rewards/get_chromagram_reward_std": 0.11047741249203683, "rewards/get_intelligibility_reward": -5.038548780605197, "rewards/get_intelligibility_reward_std": 10.313930130004882, "rewards/get_target_len_reward": -0.023969475366175174, "rewards/get_target_len_reward_std": 0.06568300873041152, "step": 7600 }, { "advantages": 5.471209846064085e-07, "advantages_std": 1.6120773196220397, "clip_ratio": 0.0, "completion_length": 88.9708351135254, "epoch": 5.723308270676692, "grad_norm": 33.5, "kl": 0.3500454694032669, "learning_rate": 2.1390977443609024e-06, "loss": 0.0343, "num_tokens": 80110077.0, "reward": -1.3915309190750123, "reward_std": 6.181269693374634, "rewards/get_chromagram_reward": 0.6148362159729004, "rewards/get_chromagram_reward_std": 0.11888119354844093, "rewards/get_intelligibility_reward": -4.770346236228943, "rewards/get_intelligibility_reward_std": 9.799637031555175, "rewards/get_target_len_reward": -0.019082391913980246, "rewards/get_target_len_reward_std": 0.0465511741116643, "step": 7610 }, { "advantages": -2.140800191341441e-07, "advantages_std": 1.5855832815170288, "clip_ratio": 0.0, "completion_length": 89.29226455688476, "epoch": 5.730827067669173, "grad_norm": 6.4375, "kl": 0.3423966646194458, "learning_rate": 2.1353383458646616e-06, "loss": 0.0341, "num_tokens": 80423645.0, "reward": -1.2833735831081867, "reward_std": 6.893964433670044, "rewards/get_chromagram_reward": 0.6285513877868653, "rewards/get_chromagram_reward_std": 0.11171592697501183, "rewards/get_intelligibility_reward": -4.462010219693184, "rewards/get_intelligibility_reward_std": 11.191399383544923, "rewards/get_target_len_reward": -0.016661503352224827, "rewards/get_target_len_reward_std": 0.042914232984185216, "step": 7620 }, { "advantages": 1.0579824163414742e-07, "advantages_std": 1.7247357010841369, "clip_ratio": 0.0, "completion_length": 86.08928680419922, "epoch": 5.738345864661654, "grad_norm": 17.75, "kl": 0.41501193344593046, "learning_rate": 2.1315789473684212e-06, "loss": 0.0444, "num_tokens": 80727782.0, "reward": -1.6765829205513, "reward_std": 6.616048383712768, "rewards/get_chromagram_reward": 0.6197386085987091, "rewards/get_chromagram_reward_std": 0.1038549706339836, "rewards/get_intelligibility_reward": -5.629750919342041, "rewards/get_intelligibility_reward_std": 10.326595687866211, "rewards/get_target_len_reward": -0.01973616676405072, "rewards/get_target_len_reward_std": 0.05329372007399798, "step": 7630 }, { "advantages": 2.5952857072297777e-07, "advantages_std": 1.517828369140625, "clip_ratio": 0.0, "completion_length": 89.41131210327148, "epoch": 5.745864661654135, "grad_norm": 7.6875, "kl": 0.3498734712600708, "learning_rate": 2.1278195488721805e-06, "loss": 0.0336, "num_tokens": 81041426.0, "reward": -1.0377632051706314, "reward_std": 6.863356018066407, "rewards/get_chromagram_reward": 0.6367423355579376, "rewards/get_chromagram_reward_std": 0.10499195754528046, "rewards/get_intelligibility_reward": -3.7316944122314455, "rewards/get_intelligibility_reward_std": 11.360644626617432, "rewards/get_target_len_reward": -0.01833730023354292, "rewards/get_target_len_reward_std": 0.04112956747412681, "step": 7640 }, { "advantages": 3.581245948680589e-07, "advantages_std": 1.5799175381660462, "clip_ratio": 0.0, "completion_length": 88.0773826599121, "epoch": 5.753383458646616, "grad_norm": 8.1875, "kl": 0.32614710479974746, "learning_rate": 2.12406015037594e-06, "loss": 0.0367, "num_tokens": 81351297.0, "reward": -1.2343915634031872, "reward_std": 6.686226320266724, "rewards/get_chromagram_reward": 0.6211168110370636, "rewards/get_chromagram_reward_std": 0.10851754248142242, "rewards/get_intelligibility_reward": -4.305308359861374, "rewards/get_intelligibility_reward_std": 10.819230556488037, "rewards/get_target_len_reward": -0.01898303721100092, "rewards/get_target_len_reward_std": 0.05286200325936079, "step": 7650 }, { "advantages": 6.02876174582434e-07, "advantages_std": 1.5238336324691772, "clip_ratio": 0.0, "completion_length": 82.91726379394531, "epoch": 5.760902255639098, "grad_norm": 6.6875, "kl": 0.41712719202041626, "learning_rate": 2.1203007518796993e-06, "loss": 0.0433, "num_tokens": 81647033.0, "reward": -1.8270630359649658, "reward_std": 7.040747261047363, "rewards/get_chromagram_reward": 0.6239575922489167, "rewards/get_chromagram_reward_std": 0.11793971508741379, "rewards/get_intelligibility_reward": -6.084248375892639, "rewards/get_intelligibility_reward_std": 10.95757598876953, "rewards/get_target_len_reward": -0.020897910837084054, "rewards/get_target_len_reward_std": 0.05306037589907646, "step": 7660 }, { "advantages": 1.9992392807921533e-07, "advantages_std": 1.5137548208236695, "clip_ratio": 0.0, "completion_length": 86.46190567016602, "epoch": 5.768421052631579, "grad_norm": 4.8125, "kl": 0.315133111178875, "learning_rate": 2.116541353383459e-06, "loss": 0.0352, "num_tokens": 81953414.0, "reward": -1.4287876427173614, "reward_std": 6.695111894607544, "rewards/get_chromagram_reward": 0.6318199157714843, "rewards/get_chromagram_reward_std": 0.11796366795897484, "rewards/get_intelligibility_reward": -4.898377990722656, "rewards/get_intelligibility_reward_std": 10.68131456375122, "rewards/get_target_len_reward": -0.019804536644369364, "rewards/get_target_len_reward_std": 0.0511545468121767, "step": 7670 }, { "advantages": 4.3337545463373317e-07, "advantages_std": 1.576704490184784, "clip_ratio": 0.0, "completion_length": 86.61726226806641, "epoch": 5.77593984962406, "grad_norm": 7.53125, "kl": 0.29568569660186766, "learning_rate": 2.112781954887218e-06, "loss": 0.033, "num_tokens": 82259441.0, "reward": -0.9885002732276916, "reward_std": 6.236829566955566, "rewards/get_chromagram_reward": 0.6253950476646424, "rewards/get_chromagram_reward_std": 0.10131782740354538, "rewards/get_intelligibility_reward": -3.5707726955413817, "rewards/get_intelligibility_reward_std": 10.291595935821533, "rewards/get_target_len_reward": -0.020123045518994333, "rewards/get_target_len_reward_std": 0.05881231594830751, "step": 7680 }, { "advantages": -6.640950957148561e-07, "advantages_std": 1.5600481033325195, "clip_ratio": 0.0, "completion_length": 86.81904983520508, "epoch": 5.783458646616541, "grad_norm": 13.8125, "kl": 0.33634247779846194, "learning_rate": 2.109022556390978e-06, "loss": 0.0394, "num_tokens": 82566035.0, "reward": -1.7160348892211914, "reward_std": 6.599533224105835, "rewards/get_chromagram_reward": 0.6210841238498688, "rewards/get_chromagram_reward_std": 0.1084224171936512, "rewards/get_intelligibility_reward": -5.749317216873169, "rewards/get_intelligibility_reward_std": 10.231350469589234, "rewards/get_target_len_reward": -0.019871008209884168, "rewards/get_target_len_reward_std": 0.05601765606552363, "step": 7690 }, { "advantages": 2.1544597696987467e-07, "advantages_std": 1.4199920415878295, "clip_ratio": 0.0, "completion_length": 85.48809661865235, "epoch": 5.790977443609022, "grad_norm": 9.0, "kl": 0.3188432216644287, "learning_rate": 2.105263157894737e-06, "loss": 0.0401, "num_tokens": 82869216.0, "reward": -1.414464271068573, "reward_std": 6.685119390487671, "rewards/get_chromagram_reward": 0.6314283013343811, "rewards/get_chromagram_reward_std": 0.11570866852998733, "rewards/get_intelligibility_reward": -4.846628820896148, "rewards/get_intelligibility_reward_std": 10.673337078094482, "rewards/get_target_len_reward": -0.028192108776420356, "rewards/get_target_len_reward_std": 0.08352104537189006, "step": 7700 }, { "advantages": 3.07957300549333e-08, "advantages_std": 1.5753357529640197, "clip_ratio": 0.0, "completion_length": 89.41071548461915, "epoch": 5.798496240601503, "grad_norm": 8.1875, "kl": 0.3553292080760002, "learning_rate": 2.1015037593984963e-06, "loss": 0.0424, "num_tokens": 83183117.0, "reward": -1.2461704462766647, "reward_std": 6.781559991836548, "rewards/get_chromagram_reward": 0.6093644201755524, "rewards/get_chromagram_reward_std": 0.11293570399284363, "rewards/get_intelligibility_reward": -4.326484024524689, "rewards/get_intelligibility_reward_std": 11.03603982925415, "rewards/get_target_len_reward": -0.0213914823718369, "rewards/get_target_len_reward_std": 0.06803354378789664, "step": 7710 }, { "advantages": 4.512568470893541e-07, "advantages_std": 1.483815038204193, "clip_ratio": 0.0, "completion_length": 87.26666870117188, "epoch": 5.806015037593985, "grad_norm": 7.15625, "kl": 0.3535382956266403, "learning_rate": 2.097744360902256e-06, "loss": 0.0357, "num_tokens": 83491814.0, "reward": -1.2586440563201904, "reward_std": 6.434259748458862, "rewards/get_chromagram_reward": 0.6283224105834961, "rewards/get_chromagram_reward_std": 0.12016476839780807, "rewards/get_intelligibility_reward": -4.383782145380974, "rewards/get_intelligibility_reward_std": 10.280045509338379, "rewards/get_target_len_reward": -0.02047220030799508, "rewards/get_target_len_reward_std": 0.047592471912503244, "step": 7720 }, { "advantages": 2.07871214286115e-07, "advantages_std": 1.5112568855285644, "clip_ratio": 0.0, "completion_length": 85.03095397949218, "epoch": 5.813533834586466, "grad_norm": 6.84375, "kl": 0.27754891514778135, "learning_rate": 2.093984962406015e-06, "loss": 0.0304, "num_tokens": 83793626.0, "reward": -1.3859003722667693, "reward_std": 6.656201267242432, "rewards/get_chromagram_reward": 0.6436172723770142, "rewards/get_chromagram_reward_std": 0.1101572260260582, "rewards/get_intelligibility_reward": -4.780960714817047, "rewards/get_intelligibility_reward_std": 10.576345729827882, "rewards/get_target_len_reward": -0.020357548724859953, "rewards/get_target_len_reward_std": 0.05655680745840073, "step": 7730 }, { "advantages": 3.1640133970611826e-07, "advantages_std": 1.6128756046295165, "clip_ratio": 0.0, "completion_length": 84.45476379394532, "epoch": 5.821052631578947, "grad_norm": 6.9375, "kl": 0.36553671211004257, "learning_rate": 2.090225563909775e-06, "loss": 0.0402, "num_tokens": 84093787.0, "reward": -1.6369949102401733, "reward_std": 6.2919535636901855, "rewards/get_chromagram_reward": 0.6147024154663085, "rewards/get_chromagram_reward_std": 0.12221779748797416, "rewards/get_intelligibility_reward": -5.501388788223267, "rewards/get_intelligibility_reward_std": 9.713677501678466, "rewards/get_target_len_reward": -0.02429804615676403, "rewards/get_target_len_reward_std": 0.07021530121564865, "step": 7740 }, { "advantages": 3.630916513230886e-07, "advantages_std": 1.59818115234375, "clip_ratio": 0.0, "completion_length": 88.15774078369141, "epoch": 5.828571428571428, "grad_norm": 6.34375, "kl": 0.3614527150988579, "learning_rate": 2.086466165413534e-06, "loss": 0.0421, "num_tokens": 84403760.0, "reward": -1.3270988881587982, "reward_std": 6.766696786880493, "rewards/get_chromagram_reward": 0.6228858053684234, "rewards/get_chromagram_reward_std": 0.10732598975300789, "rewards/get_intelligibility_reward": -4.585289144515992, "rewards/get_intelligibility_reward_std": 10.932073068618774, "rewards/get_target_len_reward": -0.01889320518821478, "rewards/get_target_len_reward_std": 0.05684518478810787, "step": 7750 }, { "advantages": 1.343588138524865e-07, "advantages_std": 1.5361783146858214, "clip_ratio": 0.0, "completion_length": 88.17143020629882, "epoch": 5.836090225563909, "grad_norm": 6.1875, "kl": 0.341474187374115, "learning_rate": 2.0827067669172937e-06, "loss": 0.0388, "num_tokens": 84713912.0, "reward": -1.3829630866646767, "reward_std": 6.212386417388916, "rewards/get_chromagram_reward": 0.6140726923942565, "rewards/get_chromagram_reward_std": 0.12224277853965759, "rewards/get_intelligibility_reward": -4.742500221729278, "rewards/get_intelligibility_reward_std": 9.82179946899414, "rewards/get_target_len_reward": -0.020461480133235455, "rewards/get_target_len_reward_std": 0.05814525857567787, "step": 7760 }, { "advantages": -2.9901662994546997e-07, "advantages_std": 1.645896029472351, "clip_ratio": 0.0, "completion_length": 87.46190643310547, "epoch": 5.843609022556391, "grad_norm": 6.28125, "kl": 0.3289607897400856, "learning_rate": 2.078947368421053e-06, "loss": 0.0401, "num_tokens": 85020860.0, "reward": -1.580271178483963, "reward_std": 6.879051733016968, "rewards/get_chromagram_reward": 0.6160095632076263, "rewards/get_chromagram_reward_std": 0.10809171348810195, "rewards/get_intelligibility_reward": -5.33557288646698, "rewards/get_intelligibility_reward_std": 10.888761234283447, "rewards/get_target_len_reward": -0.021249937638640404, "rewards/get_target_len_reward_std": 0.0723442368209362, "step": 7770 }, { "advantages": -2.1358331139254006e-07, "advantages_std": 1.487088394165039, "clip_ratio": 0.0, "completion_length": 87.21190643310547, "epoch": 5.851127819548872, "grad_norm": 12.9375, "kl": 0.45677812695503234, "learning_rate": 2.075187969924812e-06, "loss": 0.0516, "num_tokens": 85327139.0, "reward": -1.6519016563892364, "reward_std": 7.351545715332032, "rewards/get_chromagram_reward": 0.6114323198795318, "rewards/get_chromagram_reward_std": 0.1092762902379036, "rewards/get_intelligibility_reward": -5.544794082641602, "rewards/get_intelligibility_reward_std": 11.731245994567871, "rewards/get_target_len_reward": -0.022342839650809763, "rewards/get_target_len_reward_std": 0.0750244103372097, "step": 7780 }, { "advantages": -6.4571814561986685e-09, "advantages_std": 1.5465797185897827, "clip_ratio": 0.0, "completion_length": 88.10119247436523, "epoch": 5.858646616541353, "grad_norm": 4.75, "kl": 0.33119735270738604, "learning_rate": 2.0714285714285717e-06, "loss": 0.0384, "num_tokens": 85636475.0, "reward": -2.0101249754428863, "reward_std": 7.168440437316894, "rewards/get_chromagram_reward": 0.6261911392211914, "rewards/get_chromagram_reward_std": 0.11736593246459961, "rewards/get_intelligibility_reward": -6.6336499691009525, "rewards/get_intelligibility_reward_std": 10.861632442474365, "rewards/get_target_len_reward": -0.022915830463171007, "rewards/get_target_len_reward_std": 0.07768816240131855, "step": 7790 }, { "advantages": -1.8725794106444483e-07, "advantages_std": 1.5579930305480958, "clip_ratio": 0.0, "completion_length": 88.16666793823242, "epoch": 5.866165413533834, "grad_norm": 6.375, "kl": 0.3023149937391281, "learning_rate": 2.067669172932331e-06, "loss": 0.0302, "num_tokens": 85946568.0, "reward": -1.6431579798460008, "reward_std": 6.812919569015503, "rewards/get_chromagram_reward": 0.6214454472064972, "rewards/get_chromagram_reward_std": 0.11257565468549728, "rewards/get_intelligibility_reward": -5.531436330080032, "rewards/get_intelligibility_reward_std": 10.538073015213012, "rewards/get_target_len_reward": -0.019482666440308095, "rewards/get_target_len_reward_std": 0.044953730516135694, "step": 7800 }, { "advantages": -6.618599286412064e-08, "advantages_std": 1.5409797191619874, "clip_ratio": 0.0, "completion_length": 86.61428604125976, "epoch": 5.873684210526315, "grad_norm": 6.9375, "kl": 0.2656691923737526, "learning_rate": 2.06390977443609e-06, "loss": 0.0269, "num_tokens": 86252028.0, "reward": -1.5512358218431472, "reward_std": 7.006728744506836, "rewards/get_chromagram_reward": 0.6187478005886078, "rewards/get_chromagram_reward_std": 0.10456798076629639, "rewards/get_intelligibility_reward": -5.2547792315483095, "rewards/get_intelligibility_reward_std": 11.130170154571534, "rewards/get_target_len_reward": -0.01767557030543685, "rewards/get_target_len_reward_std": 0.053920988366007803, "step": 7810 }, { "advantages": -3.606081140361539e-07, "advantages_std": 1.4223445296287536, "clip_ratio": 0.0, "completion_length": 88.24464416503906, "epoch": 5.881203007518797, "grad_norm": 10.75, "kl": 0.373692986369133, "learning_rate": 2.06015037593985e-06, "loss": 0.037, "num_tokens": 86563084.0, "reward": -1.4428786307573318, "reward_std": 6.848339080810547, "rewards/get_chromagram_reward": 0.6280100584030152, "rewards/get_chromagram_reward_std": 0.11810790672898293, "rewards/get_intelligibility_reward": -4.936243617534638, "rewards/get_intelligibility_reward_std": 11.012694549560546, "rewards/get_target_len_reward": -0.020402026176452637, "rewards/get_target_len_reward_std": 0.0451013945043087, "step": 7820 }, { "advantages": 4.321336435353373e-08, "advantages_std": 1.5101688921451568, "clip_ratio": 0.0, "completion_length": 88.09285736083984, "epoch": 5.888721804511278, "grad_norm": 6.5, "kl": 0.39618532359600067, "learning_rate": 2.056390977443609e-06, "loss": 0.041, "num_tokens": 86872944.0, "reward": -1.3075867846608162, "reward_std": 6.386407995223999, "rewards/get_chromagram_reward": 0.6417074501514435, "rewards/get_chromagram_reward_std": 0.10772898942232131, "rewards/get_intelligibility_reward": -4.542570279538632, "rewards/get_intelligibility_reward_std": 10.191238403320312, "rewards/get_target_len_reward": -0.021897280309349298, "rewards/get_target_len_reward_std": 0.06281909570097924, "step": 7830 }, { "advantages": -1.1250376275029339e-07, "advantages_std": 1.5223307609558105, "clip_ratio": 0.0, "completion_length": 90.91964416503906, "epoch": 5.896240601503759, "grad_norm": 9.3125, "kl": 0.34072367250919344, "learning_rate": 2.0526315789473687e-06, "loss": 0.0372, "num_tokens": 87190111.0, "reward": -1.261566150188446, "reward_std": 6.712820816040039, "rewards/get_chromagram_reward": 0.6167412042617798, "rewards/get_chromagram_reward_std": 0.11125565245747567, "rewards/get_intelligibility_reward": -4.383474278450012, "rewards/get_intelligibility_reward_std": 10.852442836761474, "rewards/get_target_len_reward": -0.01796508561819792, "rewards/get_target_len_reward_std": 0.05169492810964584, "step": 7840 }, { "advantages": 1.0977189646155239e-07, "advantages_std": 1.5909324765205384, "clip_ratio": 0.0, "completion_length": 84.8101203918457, "epoch": 5.90375939849624, "grad_norm": 22.0, "kl": 0.3534141376614571, "learning_rate": 2.048872180451128e-06, "loss": 0.0369, "num_tokens": 87490948.0, "reward": -1.4347603440284729, "reward_std": 5.89638843536377, "rewards/get_chromagram_reward": 0.622400826215744, "rewards/get_chromagram_reward_std": 0.11123741194605827, "rewards/get_intelligibility_reward": -4.9074736595153805, "rewards/get_intelligibility_reward_std": 9.186559772491455, "rewards/get_target_len_reward": -0.019207833986729383, "rewards/get_target_len_reward_std": 0.05386210381984711, "step": 7850 }, { "advantages": -2.622604597490863e-07, "advantages_std": 1.5522445559501648, "clip_ratio": 0.0, "completion_length": 83.62976303100587, "epoch": 5.9112781954887215, "grad_norm": 7.71875, "kl": 0.3019698172807693, "learning_rate": 2.0451127819548876e-06, "loss": 0.032, "num_tokens": 87788686.0, "reward": -1.4791161119937897, "reward_std": 6.530420875549316, "rewards/get_chromagram_reward": 0.6263103306293487, "rewards/get_chromagram_reward_std": 0.11584180518984795, "rewards/get_intelligibility_reward": -5.04234025478363, "rewards/get_intelligibility_reward_std": 10.386770009994507, "rewards/get_target_len_reward": -0.021318211499601603, "rewards/get_target_len_reward_std": 0.05836506653577089, "step": 7860 }, { "advantages": -1.4901215372731258e-09, "advantages_std": 1.7043312788009644, "clip_ratio": 0.0, "completion_length": 84.61190643310547, "epoch": 5.918796992481203, "grad_norm": 7.3125, "kl": 0.330104099214077, "learning_rate": 2.0413533834586468e-06, "loss": 0.0425, "num_tokens": 88088552.0, "reward": -1.6420798242092132, "reward_std": 6.732420825958252, "rewards/get_chromagram_reward": 0.6319904744625091, "rewards/get_chromagram_reward_std": 0.1173894077539444, "rewards/get_intelligibility_reward": -5.5287513256073, "rewards/get_intelligibility_reward_std": 10.562070035934449, "rewards/get_target_len_reward": -0.029478291515260935, "rewards/get_target_len_reward_std": 0.08697659857571124, "step": 7870 }, { "advantages": -1.7657876867360755e-07, "advantages_std": 1.491753101348877, "clip_ratio": 0.0, "completion_length": 83.62678680419921, "epoch": 5.926315789473684, "grad_norm": 10.25, "kl": 0.30500824749469757, "learning_rate": 2.0375939849624064e-06, "loss": 0.0323, "num_tokens": 88386276.0, "reward": -1.5912244275212288, "reward_std": 6.555933570861816, "rewards/get_chromagram_reward": 0.6297276735305786, "rewards/get_chromagram_reward_std": 0.10537393242120743, "rewards/get_intelligibility_reward": -5.385035419464112, "rewards/get_intelligibility_reward_std": 10.15295705795288, "rewards/get_target_len_reward": -0.018365173134952784, "rewards/get_target_len_reward_std": 0.04643288180232048, "step": 7880 }, { "advantages": -2.3643176874088568e-07, "advantages_std": 1.5616509437561035, "clip_ratio": 0.0, "completion_length": 84.15119171142578, "epoch": 5.9338345864661655, "grad_norm": 474.0, "kl": 0.3755561888217926, "learning_rate": 2.0338345864661656e-06, "loss": 0.0409, "num_tokens": 88685672.0, "reward": -1.517099180072546, "reward_std": 6.650594806671142, "rewards/get_chromagram_reward": 0.6291989326477051, "rewards/get_chromagram_reward_std": 0.10843588039278984, "rewards/get_intelligibility_reward": -5.156713980436325, "rewards/get_intelligibility_reward_std": 10.467614555358887, "rewards/get_target_len_reward": -0.02378233168274164, "rewards/get_target_len_reward_std": 0.06915392931550741, "step": 7890 }, { "advantages": 1.7955899256349994e-07, "advantages_std": 1.5938949942588807, "clip_ratio": 0.0, "completion_length": 83.46071548461914, "epoch": 5.9413533834586465, "grad_norm": 9.5, "kl": 0.4998282104730606, "learning_rate": 2.030075187969925e-06, "loss": 0.0578, "num_tokens": 88983334.0, "reward": -1.556469202041626, "reward_std": 6.985205411911011, "rewards/get_chromagram_reward": 0.6302753865718842, "rewards/get_chromagram_reward_std": 0.12176822200417518, "rewards/get_intelligibility_reward": -5.267308855056763, "rewards/get_intelligibility_reward_std": 11.105116081237792, "rewards/get_target_len_reward": -0.032373837940394876, "rewards/get_target_len_reward_std": 0.09475215002894402, "step": 7900 }, { "advantages": 6.258487275090375e-08, "advantages_std": 1.605884838104248, "clip_ratio": 0.0, "completion_length": 82.43928756713868, "epoch": 5.9488721804511275, "grad_norm": 37.25, "kl": 0.36509293913841245, "learning_rate": 2.026315789473684e-06, "loss": 0.0391, "num_tokens": 89277739.0, "reward": -1.4767971098423005, "reward_std": 6.863310861587524, "rewards/get_chromagram_reward": 0.6345268189907074, "rewards/get_chromagram_reward_std": 0.12481983080506324, "rewards/get_intelligibility_reward": -5.037338101863861, "rewards/get_intelligibility_reward_std": 10.954428386688232, "rewards/get_target_len_reward": -0.02757984409108758, "rewards/get_target_len_reward_std": 0.07784405499696731, "step": 7910 }, { "advantages": -8.953115724352755e-08, "advantages_std": 1.486455249786377, "clip_ratio": 0.0, "completion_length": 88.00476379394532, "epoch": 5.9563909774436095, "grad_norm": 6.125, "kl": 0.36002791225910186, "learning_rate": 2.0225563909774437e-06, "loss": 0.0416, "num_tokens": 89587840.0, "reward": -1.0802738130092622, "reward_std": 6.528935480117798, "rewards/get_chromagram_reward": 0.6253605365753174, "rewards/get_chromagram_reward_std": 0.12136272937059403, "rewards/get_intelligibility_reward": -3.8383922219276427, "rewards/get_intelligibility_reward_std": 10.701953887939453, "rewards/get_target_len_reward": -0.027789629716426133, "rewards/get_target_len_reward_std": 0.07571598924696446, "step": 7920 }, { "advantages": -7.450581023249469e-08, "advantages_std": 1.435997450351715, "clip_ratio": 0.0, "completion_length": 88.46964492797852, "epoch": 5.9639097744360905, "grad_norm": 18.75, "kl": 0.3710513383150101, "learning_rate": 2.018796992481203e-06, "loss": 0.043, "num_tokens": 89898232.0, "reward": -1.3577034890651702, "reward_std": 7.212418031692505, "rewards/get_chromagram_reward": 0.6273108780384063, "rewards/get_chromagram_reward_std": 0.11795835718512535, "rewards/get_intelligibility_reward": -4.674716591835022, "rewards/get_intelligibility_reward_std": 11.762153434753419, "rewards/get_target_len_reward": -0.025704485923051835, "rewards/get_target_len_reward_std": 0.07749940752983094, "step": 7930 }, { "advantages": 3.890445057663783e-07, "advantages_std": 1.536833357810974, "clip_ratio": 0.0, "completion_length": 90.43154830932617, "epoch": 5.9714285714285715, "grad_norm": 6.21875, "kl": 0.33514691740274427, "learning_rate": 2.0150375939849626e-06, "loss": 0.0336, "num_tokens": 90215052.0, "reward": -1.166587858274579, "reward_std": 6.312453126907348, "rewards/get_chromagram_reward": 0.6052829146385192, "rewards/get_chromagram_reward_std": 0.10888011902570724, "rewards/get_intelligibility_reward": -4.090318483114243, "rewards/get_intelligibility_reward_std": 10.21594796180725, "rewards/get_target_len_reward": -0.014727739710360765, "rewards/get_target_len_reward_std": 0.04119625072926283, "step": 7940 }, { "advantages": -1.8924474396442292e-07, "advantages_std": 1.5644995093345642, "clip_ratio": 0.0, "completion_length": 86.95238189697265, "epoch": 5.978947368421053, "grad_norm": 7.25, "kl": 0.3163200944662094, "learning_rate": 2.011278195488722e-06, "loss": 0.0341, "num_tokens": 90521684.0, "reward": -1.7043726980686187, "reward_std": 6.817698001861572, "rewards/get_chromagram_reward": 0.6099827468395234, "rewards/get_chromagram_reward_std": 0.12223256900906562, "rewards/get_intelligibility_reward": -5.7000898838043215, "rewards/get_intelligibility_reward_std": 10.658313989639282, "rewards/get_target_len_reward": -0.023010530322790147, "rewards/get_target_len_reward_std": 0.056262052804231646, "step": 7950 }, { "advantages": 8.924554137479391e-07, "advantages_std": 1.5793184757232666, "clip_ratio": 0.0, "completion_length": 83.6452392578125, "epoch": 5.986466165413534, "grad_norm": 5.46875, "kl": 0.3809826672077179, "learning_rate": 2.0075187969924815e-06, "loss": 0.0398, "num_tokens": 90820167.0, "reward": -1.5370681881904602, "reward_std": 6.8732414722442625, "rewards/get_chromagram_reward": 0.6148294448852539, "rewards/get_chromagram_reward_std": 0.12173845618963242, "rewards/get_intelligibility_reward": -5.202451133728028, "rewards/get_intelligibility_reward_std": 10.899156856536866, "rewards/get_target_len_reward": -0.023582598939538003, "rewards/get_target_len_reward_std": 0.06225805208086967, "step": 7960 }, { "advantages": 2.734363164336173e-07, "advantages_std": 1.6118045687675475, "clip_ratio": 0.0, "completion_length": 87.17857284545899, "epoch": 5.9939849624060155, "grad_norm": 6.96875, "kl": 0.3206441327929497, "learning_rate": 2.0037593984962407e-06, "loss": 0.0338, "num_tokens": 91127400.0, "reward": -1.338904321193695, "reward_std": 6.592849445343018, "rewards/get_chromagram_reward": 0.6174337565898895, "rewards/get_chromagram_reward_std": 0.1077630490064621, "rewards/get_intelligibility_reward": -4.616280210018158, "rewards/get_intelligibility_reward_std": 10.516047191619872, "rewards/get_target_len_reward": -0.017866184283047915, "rewards/get_target_len_reward_std": 0.050554357655346396, "step": 7970 }, { "advantages": 3.6954881466044753e-07, "advantages_std": 1.5720568537712096, "clip_ratio": 0.0, "completion_length": 86.29333572387695, "epoch": 6.002255639097744, "grad_norm": 7264.0, "kl": 1.0428142532706262, "learning_rate": 2.0000000000000003e-06, "loss": 0.1081, "num_tokens": 91433355.0, "reward": -1.3834814786911012, "reward_std": 6.725188589096069, "rewards/get_chromagram_reward": 0.6308774054050446, "rewards/get_chromagram_reward_std": 0.11067381277680396, "rewards/get_intelligibility_reward": -4.760740184783936, "rewards/get_intelligibility_reward_std": 10.797851181030273, "rewards/get_target_len_reward": -0.02058134414255619, "rewards/get_target_len_reward_std": 0.05509445741772652, "step": 7980 }, { "advantages": 6.395081754817511e-07, "advantages_std": 1.533771240711212, "clip_ratio": 0.0, "completion_length": 85.02262115478516, "epoch": 6.009774436090225, "grad_norm": 7.875, "kl": 0.2817109614610672, "learning_rate": 1.9962406015037596e-06, "loss": 0.034, "num_tokens": 91734631.0, "reward": -1.4631374210119248, "reward_std": 6.582286691665649, "rewards/get_chromagram_reward": 0.6157466650009156, "rewards/get_chromagram_reward_std": 0.10654039457440376, "rewards/get_intelligibility_reward": -4.983345782756805, "rewards/get_intelligibility_reward_std": 10.402837800979615, "rewards/get_target_len_reward": -0.021813186444342138, "rewards/get_target_len_reward_std": 0.0684099044650793, "step": 7990 }, { "advantages": -5.8611242081951787e-08, "advantages_std": 1.5856661796569824, "clip_ratio": 0.0, "completion_length": 87.34166793823242, "epoch": 6.0172932330827065, "grad_norm": 8.9375, "kl": 0.3133195355534554, "learning_rate": 1.9924812030075188e-06, "loss": 0.0361, "num_tokens": 92042898.0, "reward": -1.4364587038755416, "reward_std": 6.61562328338623, "rewards/get_chromagram_reward": 0.6262919783592225, "rewards/get_chromagram_reward_std": 0.11840886026620864, "rewards/get_intelligibility_reward": -4.914097237586975, "rewards/get_intelligibility_reward_std": 10.516135978698731, "rewards/get_target_len_reward": -0.021570559963583945, "rewards/get_target_len_reward_std": 0.06995179653167724, "step": 8000 }, { "advantages": 2.9305617132990848e-08, "advantages_std": 1.611814260482788, "clip_ratio": 0.0, "completion_length": 90.39702529907227, "epoch": 6.024812030075188, "grad_norm": 30.75, "kl": 0.3496403008699417, "learning_rate": 1.9887218045112784e-06, "loss": 0.0344, "num_tokens": 92359567.0, "reward": -1.0210472345352173, "reward_std": 6.469062328338623, "rewards/get_chromagram_reward": 0.6231420278549195, "rewards/get_chromagram_reward_std": 0.10452277362346649, "rewards/get_intelligibility_reward": -3.6649996519088743, "rewards/get_intelligibility_reward_std": 10.687770748138428, "rewards/get_target_len_reward": -0.021283876802772283, "rewards/get_target_len_reward_std": 0.05394844859838486, "step": 8010 }, { "advantages": -6.124377453176066e-07, "advantages_std": 1.4987279534339906, "clip_ratio": 0.0, "completion_length": 83.43869171142578, "epoch": 6.032330827067669, "grad_norm": 163.0, "kl": 0.384149894118309, "learning_rate": 1.9849624060150376e-06, "loss": 0.0418, "num_tokens": 92656476.0, "reward": -1.8202392339706421, "reward_std": 6.662428855895996, "rewards/get_chromagram_reward": 0.6189577221870423, "rewards/get_chromagram_reward_std": 0.12275770530104638, "rewards/get_intelligibility_reward": -6.0548642635345455, "rewards/get_intelligibility_reward_std": 10.261438941955566, "rewards/get_target_len_reward": -0.024811002239584923, "rewards/get_target_len_reward_std": 0.06445497255772352, "step": 8020 }, { "advantages": -1.5820067140737136e-07, "advantages_std": 1.6390297174453736, "clip_ratio": 0.0, "completion_length": 87.66785888671875, "epoch": 6.0398496240601505, "grad_norm": 18.0, "kl": 0.31635782122612, "learning_rate": 1.9812030075187973e-06, "loss": 0.0319, "num_tokens": 92966368.0, "reward": -1.2295335441827775, "reward_std": 6.481088876724243, "rewards/get_chromagram_reward": 0.6162499785423279, "rewards/get_chromagram_reward_std": 0.10878583490848541, "rewards/get_intelligibility_reward": -4.289811539649963, "rewards/get_intelligibility_reward_std": 10.47708339691162, "rewards/get_target_len_reward": -0.015038707852363586, "rewards/get_target_len_reward_std": 0.04038047567009926, "step": 8030 }, { "advantages": -4.823009362553421e-07, "advantages_std": 1.5642240166664123, "clip_ratio": 0.0, "completion_length": 88.17440567016601, "epoch": 6.0473684210526315, "grad_norm": 9.375, "kl": 0.6099935069680213, "learning_rate": 1.9774436090225565e-06, "loss": 0.067, "num_tokens": 93276192.0, "reward": -1.6536858409643174, "reward_std": 6.549434661865234, "rewards/get_chromagram_reward": 0.6186293482780456, "rewards/get_chromagram_reward_std": 0.125721525400877, "rewards/get_intelligibility_reward": -5.550586187839508, "rewards/get_intelligibility_reward_std": 10.16653528213501, "rewards/get_target_len_reward": -0.0291005146689713, "rewards/get_target_len_reward_std": 0.09697311259806156, "step": 8040 }, { "advantages": 2.3705265448370482e-07, "advantages_std": 1.6255091428756714, "clip_ratio": 0.0, "completion_length": 84.94821624755859, "epoch": 6.0548872180451125, "grad_norm": 7.90625, "kl": 0.34752358943223954, "learning_rate": 1.973684210526316e-06, "loss": 0.0392, "num_tokens": 93576988.0, "reward": -1.528093084692955, "reward_std": 6.340835666656494, "rewards/get_chromagram_reward": 0.6186446070671081, "rewards/get_chromagram_reward_std": 0.12236835733056069, "rewards/get_intelligibility_reward": -5.179726958274841, "rewards/get_intelligibility_reward_std": 9.91945195198059, "rewards/get_target_len_reward": -0.02319667050614953, "rewards/get_target_len_reward_std": 0.06414449084550142, "step": 8050 }, { "advantages": -2.868473625738943e-07, "advantages_std": 1.5047789096832276, "clip_ratio": 0.0, "completion_length": 87.94285888671875, "epoch": 6.062406015037594, "grad_norm": 6.21875, "kl": 0.31508910208940505, "learning_rate": 1.9699248120300754e-06, "loss": 0.0333, "num_tokens": 93886561.0, "reward": -1.2129630863666534, "reward_std": 6.305604791641235, "rewards/get_chromagram_reward": 0.6133894443511962, "rewards/get_chromagram_reward_std": 0.11958390176296234, "rewards/get_intelligibility_reward": -4.229742407798767, "rewards/get_intelligibility_reward_std": 10.233415031433106, "rewards/get_target_len_reward": -0.022536069620400667, "rewards/get_target_len_reward_std": 0.06264531053602695, "step": 8060 }, { "advantages": 2.2302072437696553e-07, "advantages_std": 1.6339765667915345, "clip_ratio": 0.0, "completion_length": 86.25595321655274, "epoch": 6.0699248120300755, "grad_norm": 7.53125, "kl": 0.26957926601171495, "learning_rate": 1.966165413533835e-06, "loss": 0.0311, "num_tokens": 94191443.0, "reward": -1.3586907029151916, "reward_std": 6.435478687286377, "rewards/get_chromagram_reward": 0.6246838212013245, "rewards/get_chromagram_reward_std": 0.10953456312417983, "rewards/get_intelligibility_reward": -4.679515743255616, "rewards/get_intelligibility_reward_std": 10.33646240234375, "rewards/get_target_len_reward": -0.021239984221756457, "rewards/get_target_len_reward_std": 0.06747781485319138, "step": 8070 }, { "advantages": 1.822908856752292e-07, "advantages_std": 1.5817332983016967, "clip_ratio": 0.0, "completion_length": 88.4273811340332, "epoch": 6.0774436090225565, "grad_norm": 12.375, "kl": 0.30030532777309416, "learning_rate": 1.9624060150375942e-06, "loss": 0.0298, "num_tokens": 94503323.0, "reward": -1.3621489562094211, "reward_std": 6.7359175205230715, "rewards/get_chromagram_reward": 0.609242957830429, "rewards/get_chromagram_reward_std": 0.1111322857439518, "rewards/get_intelligibility_reward": -4.679346296191215, "rewards/get_intelligibility_reward_std": 10.767370700836182, "rewards/get_target_len_reward": -0.016343369521200656, "rewards/get_target_len_reward_std": 0.047802192904055116, "step": 8080 }, { "advantages": 1.2479723672242925e-07, "advantages_std": 1.5469775915145874, "clip_ratio": 0.0, "completion_length": 88.78869323730468, "epoch": 6.084962406015038, "grad_norm": 8.375, "kl": 0.3110333472490311, "learning_rate": 1.9586466165413535e-06, "loss": 0.0299, "num_tokens": 94815534.0, "reward": -1.3442703269422054, "reward_std": 6.71604962348938, "rewards/get_chromagram_reward": 0.6142023801803589, "rewards/get_chromagram_reward_std": 0.1096267469227314, "rewards/get_intelligibility_reward": -4.628188914060592, "rewards/get_intelligibility_reward_std": 10.82657985687256, "rewards/get_target_len_reward": -0.018824245547875762, "rewards/get_target_len_reward_std": 0.04439031798392534, "step": 8090 }, { "advantages": -8.245309075505247e-08, "advantages_std": 1.5762543678283691, "clip_ratio": 0.0, "completion_length": 87.75893020629883, "epoch": 6.092481203007519, "grad_norm": 6.8125, "kl": 0.6441232696175575, "learning_rate": 1.9548872180451127e-06, "loss": 0.0639, "num_tokens": 95124092.0, "reward": -1.5475315034389496, "reward_std": 6.789844131469726, "rewards/get_chromagram_reward": 0.6069821238517761, "rewards/get_chromagram_reward_std": 0.10674103200435639, "rewards/get_intelligibility_reward": -5.235275602340698, "rewards/get_intelligibility_reward_std": 10.816466999053954, "rewards/get_target_len_reward": -0.014300601463764906, "rewards/get_target_len_reward_std": 0.03731911201030016, "step": 8100 }, { "advantages": 5.935629419084876e-08, "advantages_std": 1.6695214748382567, "clip_ratio": 0.0, "completion_length": 87.73809661865235, "epoch": 6.1, "grad_norm": 8.3125, "kl": 0.3115114450454712, "learning_rate": 1.9511278195488723e-06, "loss": 0.0318, "num_tokens": 95433893.0, "reward": -0.9923997074365616, "reward_std": 6.4153810977935795, "rewards/get_chromagram_reward": 0.6363845229148865, "rewards/get_chromagram_reward_std": 0.11569565311074256, "rewards/get_intelligibility_reward": -3.5924025774002075, "rewards/get_intelligibility_reward_std": 10.606056118011475, "rewards/get_target_len_reward": -0.02118076141923666, "rewards/get_target_len_reward_std": 0.05613228138536215, "step": 8110 }, { "advantages": -4.447996587941816e-07, "advantages_std": 1.6991694092750549, "clip_ratio": 0.0, "completion_length": 89.45952606201172, "epoch": 6.107518796992482, "grad_norm": 6.125, "kl": 0.3138931080698967, "learning_rate": 1.9473684210526315e-06, "loss": 0.0337, "num_tokens": 95747313.0, "reward": -1.5589805111289023, "reward_std": 6.462708234786987, "rewards/get_chromagram_reward": 0.6133163690567016, "rewards/get_chromagram_reward_std": 0.11356438770890236, "rewards/get_intelligibility_reward": -5.270368266105652, "rewards/get_intelligibility_reward_std": 10.066627788543702, "rewards/get_target_len_reward": -0.01988951340317726, "rewards/get_target_len_reward_std": 0.05338175091892481, "step": 8120 }, { "advantages": -3.0820569492107096e-07, "advantages_std": 1.469941759109497, "clip_ratio": 0.0, "completion_length": 85.25238189697265, "epoch": 6.115037593984963, "grad_norm": 31.125, "kl": 0.3752498090267181, "learning_rate": 1.943609022556391e-06, "loss": 0.0427, "num_tokens": 96049192.0, "reward": -1.7108392238616943, "reward_std": 7.364109134674072, "rewards/get_chromagram_reward": 0.6208688795566559, "rewards/get_chromagram_reward_std": 0.1251021847128868, "rewards/get_intelligibility_reward": -5.730622339248657, "rewards/get_intelligibility_reward_std": 11.692827415466308, "rewards/get_target_len_reward": -0.022763955313712357, "rewards/get_target_len_reward_std": 0.06721158996224404, "step": 8130 }, { "advantages": -1.5075008619191976e-07, "advantages_std": 1.5935341954231261, "clip_ratio": 0.0, "completion_length": 88.75476455688477, "epoch": 6.122556390977444, "grad_norm": 6.1875, "kl": 0.3593511641025543, "learning_rate": 1.9398496240601504e-06, "loss": 0.0405, "num_tokens": 96361424.0, "reward": -1.5784752249717713, "reward_std": 6.592087030410767, "rewards/get_chromagram_reward": 0.6113418281078339, "rewards/get_chromagram_reward_std": 0.11505894362926483, "rewards/get_intelligibility_reward": -5.325995564460754, "rewards/get_intelligibility_reward_std": 10.418634796142578, "rewards/get_target_len_reward": -0.02077172938734293, "rewards/get_target_len_reward_std": 0.062492662109434605, "step": 8140 }, { "advantages": 3.998478298683494e-07, "advantages_std": 1.515890085697174, "clip_ratio": 0.0, "completion_length": 84.33035812377929, "epoch": 6.130075187969925, "grad_norm": 6.96875, "kl": 0.2902126759290695, "learning_rate": 1.93609022556391e-06, "loss": 0.0302, "num_tokens": 96661063.0, "reward": -1.3295504868030548, "reward_std": 6.396369504928589, "rewards/get_chromagram_reward": 0.6250507950782775, "rewards/get_chromagram_reward_std": 0.12326680570840835, "rewards/get_intelligibility_reward": -4.596553935110569, "rewards/get_intelligibility_reward_std": 10.163883447647095, "rewards/get_target_len_reward": -0.017148146592080592, "rewards/get_target_len_reward_std": 0.04615513402968645, "step": 8150 }, { "advantages": -2.834946002394645e-07, "advantages_std": 1.62041996717453, "clip_ratio": 0.0, "completion_length": 88.4428596496582, "epoch": 6.137593984962406, "grad_norm": 7.25, "kl": 0.29210630506277085, "learning_rate": 1.9323308270676693e-06, "loss": 0.0338, "num_tokens": 96972724.0, "reward": -1.1920385241508484, "reward_std": 6.446213865280152, "rewards/get_chromagram_reward": 0.624556976556778, "rewards/get_chromagram_reward_std": 0.10356669053435326, "rewards/get_intelligibility_reward": -4.179461181163788, "rewards/get_intelligibility_reward_std": 10.4103515625, "rewards/get_target_len_reward": -0.02121110763400793, "rewards/get_target_len_reward_std": 0.06671831868588925, "step": 8160 }, { "advantages": 2.3022295110308731e-07, "advantages_std": 1.5382867932319642, "clip_ratio": 0.0, "completion_length": 82.35952606201172, "epoch": 6.145112781954888, "grad_norm": 8.375, "kl": 0.3417093217372894, "learning_rate": 1.928571428571429e-06, "loss": 0.0426, "num_tokens": 97266863.0, "reward": -1.7890142560005189, "reward_std": 6.895098543167114, "rewards/get_chromagram_reward": 0.6114492297172547, "rewards/get_chromagram_reward_std": 0.12038289308547974, "rewards/get_intelligibility_reward": -5.953407573699951, "rewards/get_intelligibility_reward_std": 10.688486623764039, "rewards/get_target_len_reward": -0.025084178801625966, "rewards/get_target_len_reward_std": 0.0768413070589304, "step": 8170 }, { "advantages": 2.2798777941090975e-07, "advantages_std": 1.5938152074813843, "clip_ratio": 0.0, "completion_length": 87.05595245361329, "epoch": 6.152631578947369, "grad_norm": 7.15625, "kl": 0.30286179631948473, "learning_rate": 1.924812030075188e-06, "loss": 0.0342, "num_tokens": 97573766.0, "reward": -1.5368107587099076, "reward_std": 7.258013725280762, "rewards/get_chromagram_reward": 0.6315832614898682, "rewards/get_chromagram_reward_std": 0.11302488297224045, "rewards/get_intelligibility_reward": -5.219581270217896, "rewards/get_intelligibility_reward_std": 11.637852764129638, "rewards/get_target_len_reward": -0.02243395196273923, "rewards/get_target_len_reward_std": 0.06936036106199026, "step": 8180 }, { "advantages": -5.406637967553251e-07, "advantages_std": 1.5901297569274901, "clip_ratio": 0.0, "completion_length": 83.93274002075195, "epoch": 6.16015037593985, "grad_norm": 31.75, "kl": 0.3345526769757271, "learning_rate": 1.9210526315789474e-06, "loss": 0.0365, "num_tokens": 97871945.0, "reward": -1.9582500457763672, "reward_std": 6.844857597351075, "rewards/get_chromagram_reward": 0.6026628851890564, "rewards/get_chromagram_reward_std": 0.1115984320640564, "rewards/get_intelligibility_reward": -6.46079785823822, "rewards/get_intelligibility_reward_std": 10.43050413131714, "rewards/get_target_len_reward": -0.016614936850965024, "rewards/get_target_len_reward_std": 0.053378655947744845, "step": 8190 }, { "advantages": 5.277494210531586e-08, "advantages_std": 1.4953957557678224, "clip_ratio": 0.0, "completion_length": 88.58690567016602, "epoch": 6.167669172932331, "grad_norm": 8.375, "kl": 0.49890299141407013, "learning_rate": 1.917293233082707e-06, "loss": 0.0509, "num_tokens": 98183337.0, "reward": -1.4810203466564418, "reward_std": 6.8311989307403564, "rewards/get_chromagram_reward": 0.6411927580833435, "rewards/get_chromagram_reward_std": 0.11407085806131363, "rewards/get_intelligibility_reward": -5.062011855840683, "rewards/get_intelligibility_reward_std": 10.800096607208252, "rewards/get_target_len_reward": -0.022241707518696786, "rewards/get_target_len_reward_std": 0.04992542583495378, "step": 8200 }, { "advantages": 3.0721227730623466e-07, "advantages_std": 1.7018776655197143, "clip_ratio": 0.0, "completion_length": 87.16607208251953, "epoch": 6.175187969924812, "grad_norm": 28.5, "kl": 0.402908644080162, "learning_rate": 1.9135338345864662e-06, "loss": 0.0441, "num_tokens": 98491721.0, "reward": -1.239027801156044, "reward_std": 6.582078361511231, "rewards/get_chromagram_reward": 0.6156240582466126, "rewards/get_chromagram_reward_std": 0.1105881929397583, "rewards/get_intelligibility_reward": -4.306832981109619, "rewards/get_intelligibility_reward_std": 10.683900594711304, "rewards/get_target_len_reward": -0.025874282885342836, "rewards/get_target_len_reward_std": 0.07156331483274699, "step": 8210 }, { "advantages": -2.3345153392639873e-07, "advantages_std": 1.6137319803237915, "clip_ratio": 0.0, "completion_length": 84.79285736083985, "epoch": 6.182706766917293, "grad_norm": 7.5625, "kl": 0.4480709329247475, "learning_rate": 1.909774436090226e-06, "loss": 0.0512, "num_tokens": 98792316.0, "reward": -1.655766987800598, "reward_std": 6.44510407447815, "rewards/get_chromagram_reward": 0.6254911303520203, "rewards/get_chromagram_reward_std": 0.11293328404426575, "rewards/get_intelligibility_reward": -5.572954297065735, "rewards/get_intelligibility_reward_std": 9.979540252685547, "rewards/get_target_len_reward": -0.01983743775635958, "rewards/get_target_len_reward_std": 0.05844023115932941, "step": 8220 }, { "advantages": -7.078050803244196e-08, "advantages_std": 1.6125649809837341, "clip_ratio": 0.0, "completion_length": 85.79345474243163, "epoch": 6.190225563909775, "grad_norm": 186.0, "kl": 3.631215937435627, "learning_rate": 1.906015037593985e-06, "loss": 0.3673, "num_tokens": 99095940.0, "reward": -1.8117383182048798, "reward_std": 6.934366941452026, "rewards/get_chromagram_reward": 0.6273930549621582, "rewards/get_chromagram_reward_std": 0.1096891388297081, "rewards/get_intelligibility_reward": -6.042661952972412, "rewards/get_intelligibility_reward_std": 10.70831527709961, "rewards/get_target_len_reward": -0.019945572968572377, "rewards/get_target_len_reward_std": 0.06038584988564253, "step": 8230 }, { "advantages": 3.5768996617946416e-07, "advantages_std": 1.748731517791748, "clip_ratio": 0.0, "completion_length": 89.18452529907226, "epoch": 6.197744360902256, "grad_norm": 5.3125, "kl": 0.3243556499481201, "learning_rate": 1.9022556390977445e-06, "loss": 0.0343, "num_tokens": 99409450.0, "reward": -1.232142798602581, "reward_std": 6.382209587097168, "rewards/get_chromagram_reward": 0.6276563227176666, "rewards/get_chromagram_reward_std": 0.11100775673985482, "rewards/get_intelligibility_reward": -4.3032633543014525, "rewards/get_intelligibility_reward_std": 10.270160007476807, "rewards/get_target_len_reward": -0.0208210751414299, "rewards/get_target_len_reward_std": 0.055809604562819005, "step": 8240 }, { "advantages": 5.664924970005814e-07, "advantages_std": 1.6130936205387116, "clip_ratio": 0.0, "completion_length": 88.2357162475586, "epoch": 6.205263157894737, "grad_norm": 7.0, "kl": 0.4117813140153885, "learning_rate": 1.898496240601504e-06, "loss": 0.0465, "num_tokens": 99718961.0, "reward": -1.6371686838567256, "reward_std": 7.206591939926147, "rewards/get_chromagram_reward": 0.6129654109477997, "rewards/get_chromagram_reward_std": 0.10508479103446007, "rewards/get_intelligibility_reward": -5.503394261002541, "rewards/get_intelligibility_reward_std": 11.317814826965332, "rewards/get_target_len_reward": -0.021076952386647464, "rewards/get_target_len_reward_std": 0.07323625609278679, "step": 8250 }, { "advantages": 1.3535223644112194e-07, "advantages_std": 1.5970824837684632, "clip_ratio": 0.0, "completion_length": 87.04643173217774, "epoch": 6.212781954887218, "grad_norm": 8.1875, "kl": 0.660678879916668, "learning_rate": 1.8947368421052634e-06, "loss": 0.0729, "num_tokens": 100026364.0, "reward": -1.2636336654424667, "reward_std": 6.453917837142944, "rewards/get_chromagram_reward": 0.6379686415195465, "rewards/get_chromagram_reward_std": 0.10256423130631447, "rewards/get_intelligibility_reward": -4.404207837581635, "rewards/get_intelligibility_reward_std": 10.436718273162843, "rewards/get_target_len_reward": -0.024661644268780945, "rewards/get_target_len_reward_std": 0.06927633434534072, "step": 8260 }, { "advantages": -2.4090209649330064e-08, "advantages_std": 1.5134394288063049, "clip_ratio": 0.0, "completion_length": 88.10119171142578, "epoch": 6.220300751879699, "grad_norm": 13.25, "kl": 0.3364943116903305, "learning_rate": 1.8909774436090228e-06, "loss": 0.0371, "num_tokens": 100336374.0, "reward": -1.2326872587203979, "reward_std": 6.58498969078064, "rewards/get_chromagram_reward": 0.6152431964874268, "rewards/get_chromagram_reward_std": 0.09777917936444283, "rewards/get_intelligibility_reward": -4.295182102918625, "rewards/get_intelligibility_reward_std": 10.663773727416991, "rewards/get_target_len_reward": -0.0181226521730423, "rewards/get_target_len_reward_std": 0.06043642610311508, "step": 8270 }, { "advantages": -2.585351523975987e-07, "advantages_std": 1.4786474108695984, "clip_ratio": 0.0, "completion_length": 87.06488189697265, "epoch": 6.227819548872181, "grad_norm": 9.0625, "kl": 0.32379979491233823, "learning_rate": 1.8872180451127823e-06, "loss": 0.0392, "num_tokens": 100643174.0, "reward": -1.5286317825317384, "reward_std": 6.597978019714356, "rewards/get_chromagram_reward": 0.6205591857433319, "rewards/get_chromagram_reward_std": 0.11373497024178505, "rewards/get_intelligibility_reward": -5.185364344716072, "rewards/get_intelligibility_reward_std": 10.417848777770995, "rewards/get_target_len_reward": -0.021089773811399936, "rewards/get_target_len_reward_std": 0.06376664116978645, "step": 8280 }, { "advantages": 3.129243943078563e-07, "advantages_std": 1.6236744046211242, "clip_ratio": 0.0, "completion_length": 90.57024002075195, "epoch": 6.235338345864662, "grad_norm": 6.8125, "kl": 0.37636581659317014, "learning_rate": 1.8834586466165413e-06, "loss": 0.044, "num_tokens": 100960519.0, "reward": -1.2806937724351883, "reward_std": 6.859244298934937, "rewards/get_chromagram_reward": 0.6241738677024842, "rewards/get_chromagram_reward_std": 0.11543380096554756, "rewards/get_intelligibility_reward": -4.445393490791321, "rewards/get_intelligibility_reward_std": 10.988745975494385, "rewards/get_target_len_reward": -0.020861545857042075, "rewards/get_target_len_reward_std": 0.06513547562062741, "step": 8290 }, { "advantages": 4.6193599700927736e-08, "advantages_std": 1.6427213549613953, "clip_ratio": 0.0, "completion_length": 87.55000305175781, "epoch": 6.242857142857143, "grad_norm": 9.125, "kl": 0.3940353602170944, "learning_rate": 1.8796992481203007e-06, "loss": 0.0439, "num_tokens": 101268639.0, "reward": -1.496222859621048, "reward_std": 6.498574066162109, "rewards/get_chromagram_reward": 0.631698876619339, "rewards/get_chromagram_reward_std": 0.11189599186182023, "rewards/get_intelligibility_reward": -5.096735191345215, "rewards/get_intelligibility_reward_std": 10.293656492233277, "rewards/get_target_len_reward": -0.023632081225514412, "rewards/get_target_len_reward_std": 0.06621262319386005, "step": 8300 }, { "advantages": -3.79979621811799e-07, "advantages_std": 1.5759154319763184, "clip_ratio": 0.0, "completion_length": 83.46607208251953, "epoch": 6.250375939849624, "grad_norm": 7.25, "kl": 12158.842534568907, "learning_rate": 1.8759398496240601e-06, "loss": 1215.8889, "num_tokens": 101566074.0, "reward": -1.5842579126358032, "reward_std": 6.644231128692627, "rewards/get_chromagram_reward": 0.6195006787776947, "rewards/get_chromagram_reward_std": 0.11031231805682182, "rewards/get_intelligibility_reward": -5.348876094818115, "rewards/get_intelligibility_reward_std": 10.512411499023438, "rewards/get_target_len_reward": -0.023398030642420055, "rewards/get_target_len_reward_std": 0.06640774458646774, "step": 8310 }, { "advantages": 1.5286108485668136e-07, "advantages_std": 1.6544549465179443, "clip_ratio": 0.0, "completion_length": 83.01488189697265, "epoch": 6.257894736842105, "grad_norm": 7.78125, "kl": 0.3186744153499603, "learning_rate": 1.8721804511278196e-06, "loss": 0.0353, "num_tokens": 101862129.0, "reward": -1.4059077441692351, "reward_std": 6.380411100387573, "rewards/get_chromagram_reward": 0.6233551204204559, "rewards/get_chromagram_reward_std": 0.11690683215856552, "rewards/get_intelligibility_reward": -4.819263887405396, "rewards/get_intelligibility_reward_std": 10.128415775299072, "rewards/get_target_len_reward": -0.021814127545803787, "rewards/get_target_len_reward_std": 0.06665037646889686, "step": 8320 }, { "advantages": 1.490116545710407e-08, "advantages_std": 1.5730611205101013, "clip_ratio": 0.0, "completion_length": 87.61488189697266, "epoch": 6.265413533834587, "grad_norm": 5.09375, "kl": 0.33536899983882906, "learning_rate": 1.868421052631579e-06, "loss": 0.043, "num_tokens": 102170609.0, "reward": -1.378728559613228, "reward_std": 6.750386762619018, "rewards/get_chromagram_reward": 0.6135457396507263, "rewards/get_chromagram_reward_std": 0.12062636762857437, "rewards/get_intelligibility_reward": -4.7219622254371645, "rewards/get_intelligibility_reward_std": 10.827591705322266, "rewards/get_target_len_reward": -0.027769038919359446, "rewards/get_target_len_reward_std": 0.08973861802369357, "step": 8330 }, { "advantages": 1.559654998928295e-07, "advantages_std": 1.5384475350379945, "clip_ratio": 0.0, "completion_length": 88.77202453613282, "epoch": 6.272932330827068, "grad_norm": 6.28125, "kl": 0.36056165099143983, "learning_rate": 1.8646616541353384e-06, "loss": 0.0356, "num_tokens": 102482310.0, "reward": -1.124573567509651, "reward_std": 6.1969846248626705, "rewards/get_chromagram_reward": 0.6254005432128906, "rewards/get_chromagram_reward_std": 0.10148084163665771, "rewards/get_intelligibility_reward": -3.9837652325630186, "rewards/get_intelligibility_reward_std": 10.051096773147583, "rewards/get_target_len_reward": -0.015355863701552152, "rewards/get_target_len_reward_std": 0.04210511483252048, "step": 8340 }, { "advantages": 1.912315923391361e-08, "advantages_std": 1.4889129996299744, "clip_ratio": 0.0, "completion_length": 89.9702392578125, "epoch": 6.280451127819549, "grad_norm": 5.96875, "kl": 0.33595179915428164, "learning_rate": 1.8609022556390979e-06, "loss": 0.0335, "num_tokens": 102797762.0, "reward": -1.3522529363632203, "reward_std": 6.929391670227051, "rewards/get_chromagram_reward": 0.6337267577648162, "rewards/get_chromagram_reward_std": 0.1047416977584362, "rewards/get_intelligibility_reward": -4.6725863218307495, "rewards/get_intelligibility_reward_std": 11.20484275817871, "rewards/get_target_len_reward": -0.01789900762960315, "rewards/get_target_len_reward_std": 0.044453246705234054, "step": 8350 }, { "advantages": -3.428508954073095e-07, "advantages_std": 1.634053146839142, "clip_ratio": 0.0, "completion_length": 86.97202377319336, "epoch": 6.28796992481203, "grad_norm": 10.0, "kl": 0.35779112577438354, "learning_rate": 1.8571428571428573e-06, "loss": 0.0369, "num_tokens": 103103782.0, "reward": -1.4777903586626053, "reward_std": 7.088888502120971, "rewards/get_chromagram_reward": 0.6264364421367645, "rewards/get_chromagram_reward_std": 0.11746482402086258, "rewards/get_intelligibility_reward": -5.032951909303665, "rewards/get_intelligibility_reward_std": 11.310615634918213, "rewards/get_target_len_reward": -0.02685513999313116, "rewards/get_target_len_reward_std": 0.07638096138834953, "step": 8360 }, { "advantages": 2.1234156264426928e-07, "advantages_std": 1.5911062717437745, "clip_ratio": 0.0, "completion_length": 85.92916946411133, "epoch": 6.295488721804511, "grad_norm": 506.0, "kl": 0.4133845239877701, "learning_rate": 1.8533834586466167e-06, "loss": 0.0495, "num_tokens": 103407520.0, "reward": -1.6198764503002168, "reward_std": 6.933388805389404, "rewards/get_chromagram_reward": 0.6128597974777221, "rewards/get_chromagram_reward_std": 0.10890985131263733, "rewards/get_intelligibility_reward": -5.447985672950745, "rewards/get_intelligibility_reward_std": 10.8856369972229, "rewards/get_target_len_reward": -0.02450302317738533, "rewards/get_target_len_reward_std": 0.08764106258749962, "step": 8370 }, { "advantages": -4.8925475937267035e-08, "advantages_std": 1.5571423411369323, "clip_ratio": 0.0, "completion_length": 84.05119171142579, "epoch": 6.303007518796992, "grad_norm": 6.15625, "kl": 0.3722055435180664, "learning_rate": 1.8496240601503762e-06, "loss": 0.0402, "num_tokens": 103706409.0, "reward": -1.3056098520755768, "reward_std": 6.024608469009399, "rewards/get_chromagram_reward": 0.6267408490180969, "rewards/get_chromagram_reward_std": 0.11162896826863289, "rewards/get_intelligibility_reward": -4.523062682151794, "rewards/get_intelligibility_reward_std": 9.563552713394165, "rewards/get_target_len_reward": -0.020507614687085153, "rewards/get_target_len_reward_std": 0.05589599050581455, "step": 8380 }, { "advantages": 1.0083120045578653e-07, "advantages_std": 1.600996732711792, "clip_ratio": 0.0, "completion_length": 85.49881134033203, "epoch": 6.310526315789474, "grad_norm": 7.90625, "kl": 0.3630474954843521, "learning_rate": 1.8458646616541354e-06, "loss": 0.0406, "num_tokens": 104009587.0, "reward": -1.3805613562464714, "reward_std": 6.6248880386352536, "rewards/get_chromagram_reward": 0.6300054669380188, "rewards/get_chromagram_reward_std": 0.1087301142513752, "rewards/get_intelligibility_reward": -4.750552833080292, "rewards/get_intelligibility_reward_std": 10.588902616500855, "rewards/get_target_len_reward": -0.021136431582272054, "rewards/get_target_len_reward_std": 0.05443032290786505, "step": 8390 }, { "advantages": 1.1796752232839936e-07, "advantages_std": 1.6699777483940124, "clip_ratio": 0.0, "completion_length": 86.6827407836914, "epoch": 6.318045112781955, "grad_norm": 5.46875, "kl": 0.33182170391082766, "learning_rate": 1.8421052631578948e-06, "loss": 0.0341, "num_tokens": 104316038.0, "reward": -1.4212640821933746, "reward_std": 6.47331919670105, "rewards/get_chromagram_reward": 0.6123316049575805, "rewards/get_chromagram_reward_std": 0.10759163647890091, "rewards/get_intelligibility_reward": -4.858754765987396, "rewards/get_intelligibility_reward_std": 10.282232522964478, "rewards/get_target_len_reward": -0.017369027622044085, "rewards/get_target_len_reward_std": 0.04712581820785999, "step": 8400 }, { "advantages": 2.6561320716211866e-07, "advantages_std": 1.7279439330101014, "clip_ratio": 0.0, "completion_length": 87.80238342285156, "epoch": 6.325563909774436, "grad_norm": 32.75, "kl": 0.44390062987804413, "learning_rate": 1.8383458646616543e-06, "loss": 0.0476, "num_tokens": 104624926.0, "reward": -1.646631732583046, "reward_std": 7.138958835601807, "rewards/get_chromagram_reward": 0.6219754099845887, "rewards/get_chromagram_reward_std": 0.1156666859984398, "rewards/get_intelligibility_reward": -5.541904759407044, "rewards/get_intelligibility_reward_std": 11.233854007720947, "rewards/get_target_len_reward": -0.019965619780123234, "rewards/get_target_len_reward_std": 0.057146585918962954, "step": 8410 }, { "advantages": 2.248833581575127e-07, "advantages_std": 1.6452932000160216, "clip_ratio": 0.0, "completion_length": 86.79107360839843, "epoch": 6.333082706766917, "grad_norm": 11.625, "kl": 0.3099846750497818, "learning_rate": 1.8345864661654137e-06, "loss": 0.0353, "num_tokens": 104930820.0, "reward": -1.535498809814453, "reward_std": 6.530526494979858, "rewards/get_chromagram_reward": 0.6166078150272369, "rewards/get_chromagram_reward_std": 0.11259147003293038, "rewards/get_intelligibility_reward": -5.201871180534363, "rewards/get_intelligibility_reward_std": 10.227967166900635, "rewards/get_target_len_reward": -0.021232699742540717, "rewards/get_target_len_reward_std": 0.0649916348978877, "step": 8420 }, { "advantages": -2.1134814573997573e-07, "advantages_std": 1.6378458976745605, "clip_ratio": 0.0, "completion_length": 89.88333511352539, "epoch": 6.340601503759398, "grad_norm": 6.03125, "kl": 0.35197239816188813, "learning_rate": 1.8308270676691731e-06, "loss": 0.0441, "num_tokens": 105245119.0, "reward": -1.078207679092884, "reward_std": 6.285722970962524, "rewards/get_chromagram_reward": 0.6210645318031311, "rewards/get_chromagram_reward_std": 0.11385365948081017, "rewards/get_intelligibility_reward": -3.830742156505585, "rewards/get_intelligibility_reward_std": 10.259477138519287, "rewards/get_target_len_reward": -0.024945309106260537, "rewards/get_target_len_reward_std": 0.0796560823917389, "step": 8430 }, { "advantages": -1.862645206074376e-07, "advantages_std": 1.616140902042389, "clip_ratio": 0.0, "completion_length": 84.11964340209961, "epoch": 6.34812030075188, "grad_norm": 10.9375, "kl": 0.43686449378728864, "learning_rate": 1.8270676691729326e-06, "loss": 0.045, "num_tokens": 105543834.0, "reward": -1.6489113748073578, "reward_std": 6.583069133758545, "rewards/get_chromagram_reward": 0.6107912182807922, "rewards/get_chromagram_reward_std": 0.10622861087322236, "rewards/get_intelligibility_reward": -5.538575506210327, "rewards/get_intelligibility_reward_std": 10.245015335083007, "rewards/get_target_len_reward": -0.01894954005256295, "rewards/get_target_len_reward_std": 0.0537716269493103, "step": 8440 }, { "advantages": -3.558894213995245e-07, "advantages_std": 1.6489933967590331, "clip_ratio": 0.0, "completion_length": 86.92559585571288, "epoch": 6.355639097744361, "grad_norm": 6.03125, "kl": 0.37896920144557955, "learning_rate": 1.823308270676692e-06, "loss": 0.0406, "num_tokens": 105850112.0, "reward": -1.5943864196538926, "reward_std": 6.7091076374053955, "rewards/get_chromagram_reward": 0.6330448150634765, "rewards/get_chromagram_reward_std": 0.12480418682098389, "rewards/get_intelligibility_reward": -5.390610149502754, "rewards/get_intelligibility_reward_std": 10.372951984405518, "rewards/get_target_len_reward": -0.0255936867557466, "rewards/get_target_len_reward_std": 0.06997879669070244, "step": 8450 }, { "advantages": 3.0870238560964934e-07, "advantages_std": 1.5005107045173645, "clip_ratio": 0.0, "completion_length": 89.99821548461914, "epoch": 6.363157894736842, "grad_norm": 9.875, "kl": 0.328534708917141, "learning_rate": 1.8195488721804514e-06, "loss": 0.0422, "num_tokens": 106165542.0, "reward": -1.3361444085836411, "reward_std": 6.730158090591431, "rewards/get_chromagram_reward": 0.6162006616592407, "rewards/get_chromagram_reward_std": 0.10386288464069367, "rewards/get_intelligibility_reward": -4.604814183712006, "rewards/get_intelligibility_reward_std": 10.82592601776123, "rewards/get_target_len_reward": -0.019819434359669687, "rewards/get_target_len_reward_std": 0.06969902403652668, "step": 8460 }, { "advantages": -1.9297004598684e-07, "advantages_std": 1.5895915508270264, "clip_ratio": 0.0, "completion_length": 84.47619323730468, "epoch": 6.370676691729323, "grad_norm": 9.0, "kl": 0.3012957707047462, "learning_rate": 1.8157894736842109e-06, "loss": 0.0336, "num_tokens": 106465645.0, "reward": -1.5179098486900329, "reward_std": 6.907446670532226, "rewards/get_chromagram_reward": 0.6151711463928222, "rewards/get_chromagram_reward_std": 0.11792625412344933, "rewards/get_intelligibility_reward": -5.149234783649445, "rewards/get_intelligibility_reward_std": 11.002441883087158, "rewards/get_target_len_reward": -0.019665668066591025, "rewards/get_target_len_reward_std": 0.058271700888872145, "step": 8470 }, { "advantages": 8.170803766915924e-08, "advantages_std": 1.5603940725326537, "clip_ratio": 0.0, "completion_length": 88.06666717529296, "epoch": 6.378195488721804, "grad_norm": 7.40625, "kl": 0.33412752449512484, "learning_rate": 1.8120300751879703e-06, "loss": 0.0359, "num_tokens": 106775755.0, "reward": -1.5474644482135773, "reward_std": 6.983898639678955, "rewards/get_chromagram_reward": 0.6263640344142913, "rewards/get_chromagram_reward_std": 0.11608935371041298, "rewards/get_intelligibility_reward": -5.249857783317566, "rewards/get_intelligibility_reward_std": 11.141074562072754, "rewards/get_target_len_reward": -0.018899236246943472, "rewards/get_target_len_reward_std": 0.04913288354873657, "step": 8480 }, { "advantages": -2.186745465593276e-07, "advantages_std": 1.6869441747665406, "clip_ratio": 0.0, "completion_length": 84.07559661865234, "epoch": 6.385714285714286, "grad_norm": 5.09375, "kl": 2.093244831264019, "learning_rate": 1.8082706766917293e-06, "loss": 0.2162, "num_tokens": 107074777.0, "reward": -1.7199529886245728, "reward_std": 6.821811771392822, "rewards/get_chromagram_reward": 0.6313170075416565, "rewards/get_chromagram_reward_std": 0.11584596931934357, "rewards/get_intelligibility_reward": -5.7700822114944454, "rewards/get_intelligibility_reward_std": 10.664770889282227, "rewards/get_target_len_reward": -0.0210935284383595, "rewards/get_target_len_reward_std": 0.058279063180089, "step": 8490 }, { "advantages": 6.52546734158932e-07, "advantages_std": 1.5661371231079102, "clip_ratio": 0.0, "completion_length": 90.01190567016602, "epoch": 6.393233082706767, "grad_norm": 11.0, "kl": 1.2664696410298348, "learning_rate": 1.8045112781954887e-06, "loss": 0.1318, "num_tokens": 107389476.0, "reward": -1.5618727438151836, "reward_std": 7.49455828666687, "rewards/get_chromagram_reward": 0.623270982503891, "rewards/get_chromagram_reward_std": 0.11199977099895478, "rewards/get_intelligibility_reward": -5.290174907445907, "rewards/get_intelligibility_reward_std": 11.975935935974121, "rewards/get_target_len_reward": -0.018713922891765832, "rewards/get_target_len_reward_std": 0.054541667178273204, "step": 8500 }, { "advantages": -1.0679164077487258e-07, "advantages_std": 1.5995036005973815, "clip_ratio": 0.0, "completion_length": 82.4601203918457, "epoch": 6.400751879699248, "grad_norm": 9.25, "kl": 0.4671302646398544, "learning_rate": 1.8007518796992482e-06, "loss": 0.0484, "num_tokens": 107684417.0, "reward": -1.7090104311704635, "reward_std": 6.771821355819702, "rewards/get_chromagram_reward": 0.6103951513767243, "rewards/get_chromagram_reward_std": 0.1157499797642231, "rewards/get_intelligibility_reward": -5.714370238780975, "rewards/get_intelligibility_reward_std": 10.493238353729248, "rewards/get_target_len_reward": -0.023055852763354777, "rewards/get_target_len_reward_std": 0.05546447858214378, "step": 8510 }, { "advantages": -5.736946775414253e-08, "advantages_std": 1.5776451468467712, "clip_ratio": 0.0, "completion_length": 86.18928680419921, "epoch": 6.408270676691729, "grad_norm": 6.46875, "kl": 0.3133410021662712, "learning_rate": 1.7969924812030076e-06, "loss": 0.0351, "num_tokens": 107989269.0, "reward": -1.4148914575576783, "reward_std": 6.5578773021698, "rewards/get_chromagram_reward": 0.6112196803092956, "rewards/get_chromagram_reward_std": 0.12375476881861687, "rewards/get_intelligibility_reward": -4.83680567741394, "rewards/get_intelligibility_reward_std": 10.457363033294678, "rewards/get_target_len_reward": -0.019088097847998142, "rewards/get_target_len_reward_std": 0.05398804843425751, "step": 8520 }, { "advantages": 1.7459195049696065e-07, "advantages_std": 1.5523484230041504, "clip_ratio": 0.0, "completion_length": 88.58095321655273, "epoch": 6.41578947368421, "grad_norm": 9.4375, "kl": 0.5049596816301346, "learning_rate": 1.793233082706767e-06, "loss": 0.0587, "num_tokens": 108299956.0, "reward": -1.4245594978332519, "reward_std": 6.555758714675903, "rewards/get_chromagram_reward": 0.6246987998485565, "rewards/get_chromagram_reward_std": 0.11914677992463112, "rewards/get_intelligibility_reward": -4.87405880689621, "rewards/get_intelligibility_reward_std": 10.457990598678588, "rewards/get_target_len_reward": -0.02431822130456567, "rewards/get_target_len_reward_std": 0.06859862487763166, "step": 8530 }, { "advantages": -5.985301498867557e-08, "advantages_std": 1.5063032269477845, "clip_ratio": 0.0, "completion_length": 88.59047775268554, "epoch": 6.423308270676692, "grad_norm": 7.5, "kl": 4.2682260736823086, "learning_rate": 1.7894736842105265e-06, "loss": 0.4341, "num_tokens": 108611511.0, "reward": -1.5688146725296974, "reward_std": 7.056678915023804, "rewards/get_chromagram_reward": 0.6214147567749023, "rewards/get_chromagram_reward_std": 0.11932239979505539, "rewards/get_intelligibility_reward": -5.303305222094059, "rewards/get_intelligibility_reward_std": 11.121390342712402, "rewards/get_target_len_reward": -0.02455320842564106, "rewards/get_target_len_reward_std": 0.06936857439577579, "step": 8540 }, { "advantages": -7.162491741041776e-07, "advantages_std": 1.7330376267433167, "clip_ratio": 0.0, "completion_length": 84.84761962890624, "epoch": 6.430827067669173, "grad_norm": 7.03125, "kl": 0.41429437398910524, "learning_rate": 1.7857142857142859e-06, "loss": 0.0445, "num_tokens": 108911633.0, "reward": -1.6869840025901794, "reward_std": 6.495787239074707, "rewards/get_chromagram_reward": 0.6123341917991638, "rewards/get_chromagram_reward_std": 0.10488304197788238, "rewards/get_intelligibility_reward": -5.649365377426148, "rewards/get_intelligibility_reward_std": 10.102130126953124, "rewards/get_target_len_reward": -0.02392053948715329, "rewards/get_target_len_reward_std": 0.07797471843659878, "step": 8550 }, { "advantages": -5.784134136987973e-07, "advantages_std": 1.498878812789917, "clip_ratio": 0.0, "completion_length": 89.68035888671875, "epoch": 6.438345864661654, "grad_norm": 6.8125, "kl": 0.3487410917878151, "learning_rate": 1.7819548872180453e-06, "loss": 0.041, "num_tokens": 109225082.0, "reward": -1.1978444576263427, "reward_std": 6.764661836624145, "rewards/get_chromagram_reward": 0.6185143291950226, "rewards/get_chromagram_reward_std": 0.11313116848468781, "rewards/get_intelligibility_reward": -4.189542031288147, "rewards/get_intelligibility_reward_std": 11.098699617385865, "rewards/get_target_len_reward": -0.022505429945886134, "rewards/get_target_len_reward_std": 0.0727207712829113, "step": 8560 }, { "advantages": 2.95912244041574e-07, "advantages_std": 1.5081347227096558, "clip_ratio": 0.0, "completion_length": 88.14404907226563, "epoch": 6.445864661654135, "grad_norm": 68.5, "kl": 0.43108378648757933, "learning_rate": 1.7781954887218048e-06, "loss": 0.0456, "num_tokens": 109534683.0, "reward": -1.529983852803707, "reward_std": 6.960060405731201, "rewards/get_chromagram_reward": 0.6225944101810456, "rewards/get_chromagram_reward_std": 0.10861722603440285, "rewards/get_intelligibility_reward": -5.19299818277359, "rewards/get_intelligibility_reward_std": 11.049736833572387, "rewards/get_target_len_reward": -0.019547457993030547, "rewards/get_target_len_reward_std": 0.05952412653714419, "step": 8570 }, { "advantages": 5.5258470865737766e-08, "advantages_std": 1.6067670226097106, "clip_ratio": 0.0, "completion_length": 86.61964340209961, "epoch": 6.453383458646616, "grad_norm": 17.875, "kl": 0.3261327803134918, "learning_rate": 1.7744360902255642e-06, "loss": 0.0383, "num_tokens": 109839985.0, "reward": -1.5939397394657135, "reward_std": 6.7601910591125485, "rewards/get_chromagram_reward": 0.6212305128574371, "rewards/get_chromagram_reward_std": 0.12474968880414963, "rewards/get_intelligibility_reward": -5.381693542003632, "rewards/get_intelligibility_reward_std": 10.635554122924805, "rewards/get_target_len_reward": -0.02135584419593215, "rewards/get_target_len_reward_std": 0.06412192359566689, "step": 8580 }, { "advantages": 1.6068421047066294e-07, "advantages_std": 1.6900987029075623, "clip_ratio": 0.0, "completion_length": 87.32262115478515, "epoch": 6.460902255639097, "grad_norm": 6.0, "kl": 0.32133436053991316, "learning_rate": 1.7706766917293234e-06, "loss": 0.0342, "num_tokens": 110146731.0, "reward": -1.518751847743988, "reward_std": 6.533436012268067, "rewards/get_chromagram_reward": 0.6186932504177094, "rewards/get_chromagram_reward_std": 0.12964782863855362, "rewards/get_intelligibility_reward": -5.159471201896667, "rewards/get_intelligibility_reward_std": 10.372815799713134, "rewards/get_target_len_reward": -0.015477333776652813, "rewards/get_target_len_reward_std": 0.04433933421969414, "step": 8590 }, { "advantages": -3.4384431231160305e-07, "advantages_std": 1.4812658846378326, "clip_ratio": 0.0, "completion_length": 84.97857284545898, "epoch": 6.468421052631579, "grad_norm": 6.03125, "kl": 0.2949420794844627, "learning_rate": 1.7669172932330828e-06, "loss": 0.0378, "num_tokens": 110448240.0, "reward": -1.9043188631534576, "reward_std": 6.780496072769165, "rewards/get_chromagram_reward": 0.6142106592655182, "rewards/get_chromagram_reward_std": 0.11835580542683602, "rewards/get_intelligibility_reward": -6.305766224861145, "rewards/get_intelligibility_reward_std": 10.31535725593567, "rewards/get_target_len_reward": -0.021400523744523524, "rewards/get_target_len_reward_std": 0.07350437175482512, "step": 8600 }, { "advantages": -5.513429854886454e-08, "advantages_std": 1.6094445586204529, "clip_ratio": 0.0, "completion_length": 86.00714416503907, "epoch": 6.47593984962406, "grad_norm": 7.90625, "kl": 2.071778839826584, "learning_rate": 1.7631578947368423e-06, "loss": 0.2087, "num_tokens": 110752197.0, "reward": -1.7652361631393432, "reward_std": 6.954381370544434, "rewards/get_chromagram_reward": 0.6145216584205627, "rewards/get_chromagram_reward_std": 0.12558116614818574, "rewards/get_intelligibility_reward": -5.887758874893189, "rewards/get_intelligibility_reward_std": 10.753602027893066, "rewards/get_target_len_reward": -0.022470803745090962, "rewards/get_target_len_reward_std": 0.0651225570589304, "step": 8610 }, { "advantages": 1.80676577699046e-07, "advantages_std": 1.4532333254814147, "clip_ratio": 0.0, "completion_length": 88.41131057739258, "epoch": 6.483458646616541, "grad_norm": 7.5625, "kl": 0.33889811784029006, "learning_rate": 1.7593984962406017e-06, "loss": 0.0396, "num_tokens": 111063022.0, "reward": -1.2849171161651611, "reward_std": 6.4561848640441895, "rewards/get_chromagram_reward": 0.6181443452835083, "rewards/get_chromagram_reward_std": 0.1125445008277893, "rewards/get_intelligibility_reward": -4.449799716472626, "rewards/get_intelligibility_reward_std": 10.360198545455933, "rewards/get_target_len_reward": -0.023095874674618246, "rewards/get_target_len_reward_std": 0.07099482864141464, "step": 8620 }, { "advantages": 6.780029053743419e-08, "advantages_std": 1.6173210978507995, "clip_ratio": 0.0, "completion_length": 86.86428833007812, "epoch": 6.490977443609022, "grad_norm": 18.375, "kl": 0.35529542416334153, "learning_rate": 1.755639097744361e-06, "loss": 0.035, "num_tokens": 111369702.0, "reward": -1.5035405695438384, "reward_std": 6.32957501411438, "rewards/get_chromagram_reward": 0.6382215857505799, "rewards/get_chromagram_reward_std": 0.10603488236665726, "rewards/get_intelligibility_reward": -5.13039231300354, "rewards/get_intelligibility_reward_std": 9.97192997932434, "rewards/get_target_len_reward": -0.018450586684048177, "rewards/get_target_len_reward_std": 0.04384410493075848, "step": 8630 }, { "advantages": 7.533778996204887e-07, "advantages_std": 1.6847024321556092, "clip_ratio": 0.0, "completion_length": 86.977978515625, "epoch": 6.498496240601503, "grad_norm": 5.71875, "kl": 15.765263549983501, "learning_rate": 1.7518796992481204e-06, "loss": 1.5793, "num_tokens": 111675934.0, "reward": -1.5743951201438904, "reward_std": 7.082512235641479, "rewards/get_chromagram_reward": 0.6116576075553894, "rewards/get_chromagram_reward_std": 0.1156858630478382, "rewards/get_intelligibility_reward": -5.314873480796814, "rewards/get_intelligibility_reward_std": 11.322156143188476, "rewards/get_target_len_reward": -0.019969225488603116, "rewards/get_target_len_reward_std": 0.05809299666434527, "step": 8640 }, { "advantages": -9.288390145911762e-08, "advantages_std": 1.513525414466858, "clip_ratio": 0.0, "completion_length": 87.31547698974609, "epoch": 6.5060150375939845, "grad_norm": 14.625, "kl": 0.31155717074871064, "learning_rate": 1.7481203007518798e-06, "loss": 0.0389, "num_tokens": 111983532.0, "reward": -1.3458161890506743, "reward_std": 6.663216876983642, "rewards/get_chromagram_reward": 0.6161249577999115, "rewards/get_chromagram_reward_std": 0.11008406803011894, "rewards/get_intelligibility_reward": -4.63141520023346, "rewards/get_intelligibility_reward_std": 10.778537368774414, "rewards/get_target_len_reward": -0.022158146370202303, "rewards/get_target_len_reward_std": 0.06249944530427456, "step": 8650 }, { "advantages": 3.9959948026080385e-07, "advantages_std": 1.6454981327056886, "clip_ratio": 0.0, "completion_length": 84.92202606201172, "epoch": 6.513533834586466, "grad_norm": 6.96875, "kl": 0.4470529407262802, "learning_rate": 1.7443609022556392e-06, "loss": 0.0528, "num_tokens": 112284573.0, "reward": -1.5209301978349685, "reward_std": 6.529443597793579, "rewards/get_chromagram_reward": 0.5984456181526184, "rewards/get_chromagram_reward_std": 0.12049673646688461, "rewards/get_intelligibility_reward": -5.138624894618988, "rewards/get_intelligibility_reward_std": 10.2837571144104, "rewards/get_target_len_reward": -0.022611112985759973, "rewards/get_target_len_reward_std": 0.07267039511352777, "step": 8660 }, { "advantages": -2.8436387466967972e-08, "advantages_std": 1.6120068430900574, "clip_ratio": 0.0, "completion_length": 87.93393173217774, "epoch": 6.521052631578947, "grad_norm": 9.1875, "kl": 0.39266557842493055, "learning_rate": 1.7406015037593987e-06, "loss": 0.0412, "num_tokens": 112594646.0, "reward": -1.500117802619934, "reward_std": 6.989867258071899, "rewards/get_chromagram_reward": 0.6125692486763, "rewards/get_chromagram_reward_std": 0.10713726431131362, "rewards/get_intelligibility_reward": -5.09507395029068, "rewards/get_intelligibility_reward_std": 11.139153861999512, "rewards/get_target_len_reward": -0.01784856105223298, "rewards/get_target_len_reward_std": 0.05503632873296738, "step": 8670 }, { "advantages": 2.2525589997712815e-07, "advantages_std": 1.5830511093139648, "clip_ratio": 0.0, "completion_length": 91.49642944335938, "epoch": 6.5285714285714285, "grad_norm": 5.0625, "kl": 0.31605358272790907, "learning_rate": 1.736842105263158e-06, "loss": 0.0359, "num_tokens": 112913111.0, "reward": -1.2155792593955994, "reward_std": 6.368030166625976, "rewards/get_chromagram_reward": 0.6121292293071747, "rewards/get_chromagram_reward_std": 0.10758508741855621, "rewards/get_intelligibility_reward": -4.240009331703186, "rewards/get_intelligibility_reward_std": 10.14575023651123, "rewards/get_target_len_reward": -0.018857531901448964, "rewards/get_target_len_reward_std": 0.06237869169563055, "step": 8680 }, { "advantages": -3.6458176566611655e-07, "advantages_std": 1.5414014101028441, "clip_ratio": 0.0, "completion_length": 87.77916870117187, "epoch": 6.5360902255639095, "grad_norm": 6.53125, "kl": 0.3152762994170189, "learning_rate": 1.7330827067669173e-06, "loss": 0.0337, "num_tokens": 113223411.0, "reward": -1.2201290145516395, "reward_std": 6.668260669708252, "rewards/get_chromagram_reward": 0.6242131114006042, "rewards/get_chromagram_reward_std": 0.10980811715126038, "rewards/get_intelligibility_reward": -4.262039077281952, "rewards/get_intelligibility_reward_std": 10.77245044708252, "rewards/get_target_len_reward": -0.022560841497033836, "rewards/get_target_len_reward_std": 0.054543149471282956, "step": 8690 }, { "advantages": -1.8874803231483383e-08, "advantages_std": 1.5456940293312074, "clip_ratio": 0.0, "completion_length": 91.7428596496582, "epoch": 6.5436090225563905, "grad_norm": 6.5625, "kl": 0.29459398835897443, "learning_rate": 1.7293233082706767e-06, "loss": 0.0345, "num_tokens": 113543909.0, "reward": -1.447171300649643, "reward_std": 6.840606307983398, "rewards/get_chromagram_reward": 0.6198675811290741, "rewards/get_chromagram_reward_std": 0.1132724367082119, "rewards/get_intelligibility_reward": -4.9377094268798825, "rewards/get_intelligibility_reward_std": 10.912199115753173, "rewards/get_target_len_reward": -0.023671871796250344, "rewards/get_target_len_reward_std": 0.07286440655589103, "step": 8700 }, { "advantages": -1.889963925805205e-07, "advantages_std": 1.6550687193870544, "clip_ratio": 0.0, "completion_length": 86.5553581237793, "epoch": 6.5511278195488725, "grad_norm": 12.125, "kl": 0.3258012026548386, "learning_rate": 1.7255639097744362e-06, "loss": 0.0406, "num_tokens": 113849326.0, "reward": -1.1805256187915802, "reward_std": 6.319496059417725, "rewards/get_chromagram_reward": 0.6109031736850739, "rewards/get_chromagram_reward_std": 0.11574857532978058, "rewards/get_intelligibility_reward": -4.132873296737671, "rewards/get_intelligibility_reward_std": 10.294385719299317, "rewards/get_target_len_reward": -0.019606582634150983, "rewards/get_target_len_reward_std": 0.05889410562813282, "step": 8710 }, { "advantages": 2.54809867072936e-07, "advantages_std": 1.6077858805656433, "clip_ratio": 0.0, "completion_length": 86.66369018554687, "epoch": 6.5586466165413535, "grad_norm": 13.0625, "kl": 0.3335361868143082, "learning_rate": 1.7218045112781956e-06, "loss": 0.0357, "num_tokens": 114155062.0, "reward": -1.6903316497802734, "reward_std": 6.802506971359253, "rewards/get_chromagram_reward": 0.6251188158988953, "rewards/get_chromagram_reward_std": 0.1196857139468193, "rewards/get_intelligibility_reward": -5.675504660606384, "rewards/get_intelligibility_reward_std": 10.526002311706543, "rewards/get_target_len_reward": -0.020608733221888543, "rewards/get_target_len_reward_std": 0.05494309738278389, "step": 8720 }, { "advantages": -8.344649558011951e-08, "advantages_std": 1.6645886659622193, "clip_ratio": 0.0, "completion_length": 85.4029769897461, "epoch": 6.5661654135338345, "grad_norm": 9.4375, "kl": 0.36896526366472243, "learning_rate": 1.718045112781955e-06, "loss": 0.0415, "num_tokens": 114457071.0, "reward": -1.6527364611625672, "reward_std": 6.956710481643677, "rewards/get_chromagram_reward": 0.6127317428588868, "rewards/get_chromagram_reward_std": 0.12270680665969849, "rewards/get_intelligibility_reward": -5.546628451347351, "rewards/get_intelligibility_reward_std": 11.009428977966309, "rewards/get_target_len_reward": -0.02431240752339363, "rewards/get_target_len_reward_std": 0.07028789706528187, "step": 8730 }, { "advantages": -2.0960967361816074e-07, "advantages_std": 1.543196427822113, "clip_ratio": 0.0, "completion_length": 87.92678604125976, "epoch": 6.573684210526316, "grad_norm": 6.34375, "kl": 0.34959004521369935, "learning_rate": 1.7142857142857145e-06, "loss": 0.0343, "num_tokens": 114766945.0, "reward": -1.538115844130516, "reward_std": 7.157424592971802, "rewards/get_chromagram_reward": 0.6251393258571625, "rewards/get_chromagram_reward_std": 0.11151338070631027, "rewards/get_intelligibility_reward": -5.224161815643311, "rewards/get_intelligibility_reward_std": 11.43363332748413, "rewards/get_target_len_reward": -0.015324807818979025, "rewards/get_target_len_reward_std": 0.04183251298964023, "step": 8740 }, { "advantages": 2.884616570497656e-07, "advantages_std": 1.5197386741638184, "clip_ratio": 0.0, "completion_length": 87.06071548461914, "epoch": 6.581203007518797, "grad_norm": 7.625, "kl": 0.3010944381356239, "learning_rate": 1.710526315789474e-06, "loss": 0.0353, "num_tokens": 115074311.0, "reward": -1.662864577770233, "reward_std": 7.032498836517334, "rewards/get_chromagram_reward": 0.623716801404953, "rewards/get_chromagram_reward_std": 0.11941528245806694, "rewards/get_intelligibility_reward": -5.592788290977478, "rewards/get_intelligibility_reward_std": 11.116605138778686, "rewards/get_target_len_reward": -0.019521817099303007, "rewards/get_target_len_reward_std": 0.06179252229630947, "step": 8750 }, { "advantages": -3.750125898704937e-08, "advantages_std": 1.5628470301628112, "clip_ratio": 0.0, "completion_length": 86.24881057739258, "epoch": 6.5887218045112785, "grad_norm": 5.625, "kl": 0.520573103427887, "learning_rate": 1.7067669172932333e-06, "loss": 0.0546, "num_tokens": 115379268.0, "reward": -1.5334375344216824, "reward_std": 6.752659320831299, "rewards/get_chromagram_reward": 0.6204401135444642, "rewards/get_chromagram_reward_std": 0.1138177677989006, "rewards/get_intelligibility_reward": -5.2039868295192715, "rewards/get_intelligibility_reward_std": 10.579442930221557, "rewards/get_target_len_reward": -0.01676576565951109, "rewards/get_target_len_reward_std": 0.045275353640317914, "step": 8760 }, { "advantages": -3.0001005200119837e-07, "advantages_std": 1.6244164824485778, "clip_ratio": 0.0, "completion_length": 85.89107131958008, "epoch": 6.59624060150376, "grad_norm": 5.28125, "kl": 0.2966675475239754, "learning_rate": 1.7030075187969928e-06, "loss": 0.0297, "num_tokens": 115682975.0, "reward": -1.2809839069843292, "reward_std": 6.5348756313323975, "rewards/get_chromagram_reward": 0.6395108997821808, "rewards/get_chromagram_reward_std": 0.12465112805366516, "rewards/get_intelligibility_reward": -4.461061334609985, "rewards/get_intelligibility_reward_std": 10.533429145812988, "rewards/get_target_len_reward": -0.021401012036949395, "rewards/get_target_len_reward_std": 0.05138088017702103, "step": 8770 }, { "advantages": -2.5431316998947293e-07, "advantages_std": 1.5624045133590698, "clip_ratio": 0.0, "completion_length": 87.46309661865234, "epoch": 6.603759398496241, "grad_norm": 7.96875, "kl": 0.3417803421616554, "learning_rate": 1.6992481203007522e-06, "loss": 0.0377, "num_tokens": 115990689.0, "reward": -1.4857405304908753, "reward_std": 6.962762022018433, "rewards/get_chromagram_reward": 0.6201785683631897, "rewards/get_chromagram_reward_std": 0.12068104594945908, "rewards/get_intelligibility_reward": -5.0505283117294315, "rewards/get_intelligibility_reward_std": 11.170841598510743, "rewards/get_target_len_reward": -0.026871402747929096, "rewards/get_target_len_reward_std": 0.07678173333406449, "step": 8780 }, { "advantages": 2.59031862270831e-07, "advantages_std": 1.4772692441940307, "clip_ratio": 0.0, "completion_length": 88.14166870117188, "epoch": 6.611278195488722, "grad_norm": 4.9375, "kl": 0.2925388216972351, "learning_rate": 1.6954887218045112e-06, "loss": 0.0312, "num_tokens": 116302271.0, "reward": -1.315621554851532, "reward_std": 7.228752565383911, "rewards/get_chromagram_reward": 0.6382994651794434, "rewards/get_chromagram_reward_std": 0.1257988005876541, "rewards/get_intelligibility_reward": -4.557725477218628, "rewards/get_intelligibility_reward_std": 11.79575719833374, "rewards/get_target_len_reward": -0.027438334189355373, "rewards/get_target_len_reward_std": 0.06499854773283005, "step": 8790 }, { "advantages": -1.0716418898937264e-07, "advantages_std": 1.5314384937286376, "clip_ratio": 0.0, "completion_length": 87.74285888671875, "epoch": 6.618796992481203, "grad_norm": 7.6875, "kl": 0.3825727790594101, "learning_rate": 1.6917293233082707e-06, "loss": 0.0434, "num_tokens": 116611413.0, "reward": -1.272094513475895, "reward_std": 6.248283386230469, "rewards/get_chromagram_reward": 0.6156734883785248, "rewards/get_chromagram_reward_std": 0.11137211546301842, "rewards/get_intelligibility_reward": -4.4124711662530895, "rewards/get_intelligibility_reward_std": 9.994548034667968, "rewards/get_target_len_reward": -0.0194855909794569, "rewards/get_target_len_reward_std": 0.05853393040597439, "step": 8800 }, { "advantages": 3.141661484740155e-07, "advantages_std": 1.6251811265945435, "clip_ratio": 0.0, "completion_length": 87.82083435058594, "epoch": 6.626315789473685, "grad_norm": 114.5, "kl": 0.32971449494361876, "learning_rate": 1.68796992481203e-06, "loss": 0.0385, "num_tokens": 116920595.0, "reward": -1.8200352877378463, "reward_std": 7.04195556640625, "rewards/get_chromagram_reward": 0.6245281338691712, "rewards/get_chromagram_reward_std": 0.11340516358613968, "rewards/get_intelligibility_reward": -6.059786748886109, "rewards/get_intelligibility_reward_std": 10.940639019012451, "rewards/get_target_len_reward": -0.02484700605273247, "rewards/get_target_len_reward_std": 0.07456877678632737, "step": 8810 }, { "advantages": 9.561578622196975e-08, "advantages_std": 1.5531502604484557, "clip_ratio": 0.0, "completion_length": 85.92440795898438, "epoch": 6.633834586466166, "grad_norm": 6.21875, "kl": 0.30954319685697557, "learning_rate": 1.6842105263157895e-06, "loss": 0.0382, "num_tokens": 117225206.0, "reward": -1.457185184955597, "reward_std": 6.83298134803772, "rewards/get_chromagram_reward": 0.622676020860672, "rewards/get_chromagram_reward_std": 0.11204622760415077, "rewards/get_intelligibility_reward": -4.975749778747558, "rewards/get_intelligibility_reward_std": 10.919587230682373, "rewards/get_target_len_reward": -0.01848159311339259, "rewards/get_target_len_reward_std": 0.05746168848127127, "step": 8820 }, { "advantages": -1.3845661754885442e-07, "advantages_std": 1.5635218024253845, "clip_ratio": 0.0, "completion_length": 83.90654907226562, "epoch": 6.641353383458647, "grad_norm": 9.125, "kl": 0.30399465262889863, "learning_rate": 1.680451127819549e-06, "loss": 0.0355, "num_tokens": 117523272.0, "reward": -1.6465578913688659, "reward_std": 6.5834332466125485, "rewards/get_chromagram_reward": 0.6250576794147491, "rewards/get_chromagram_reward_std": 0.1151248849928379, "rewards/get_intelligibility_reward": -5.5405457496643065, "rewards/get_intelligibility_reward_std": 10.319419717788696, "rewards/get_target_len_reward": -0.024185398779809474, "rewards/get_target_len_reward_std": 0.08074920866638421, "step": 8830 }, { "advantages": -1.8986564214173996e-07, "advantages_std": 1.5144237875938416, "clip_ratio": 0.0, "completion_length": 83.20476379394532, "epoch": 6.648872180451128, "grad_norm": 164.0, "kl": 0.4179477095603943, "learning_rate": 1.6766917293233084e-06, "loss": 0.0442, "num_tokens": 117819871.0, "reward": -1.4703819632530213, "reward_std": 6.541961574554444, "rewards/get_chromagram_reward": 0.6243762791156768, "rewards/get_chromagram_reward_std": 0.11654146909713745, "rewards/get_intelligibility_reward": -5.014195156097412, "rewards/get_intelligibility_reward_std": 10.413293981552124, "rewards/get_target_len_reward": -0.02132681766524911, "rewards/get_target_len_reward_std": 0.0534250408411026, "step": 8840 }, { "advantages": -1.1647741686715562e-07, "advantages_std": 1.5453578233718872, "clip_ratio": 0.0, "completion_length": 86.41369247436523, "epoch": 6.656390977443609, "grad_norm": 32.75, "kl": 0.3617649167776108, "learning_rate": 1.6729323308270678e-06, "loss": 0.0439, "num_tokens": 118125840.0, "reward": -1.111614690721035, "reward_std": 6.658377170562744, "rewards/get_chromagram_reward": 0.6361361742019653, "rewards/get_chromagram_reward_std": 0.11931278705596923, "rewards/get_intelligibility_reward": -3.9476100608706473, "rewards/get_intelligibility_reward_std": 10.896197414398193, "rewards/get_target_len_reward": -0.02336995638906956, "rewards/get_target_len_reward_std": 0.06786302607506514, "step": 8850 }, { "advantages": 3.3862889949887174e-07, "advantages_std": 1.54275141954422, "clip_ratio": 0.0, "completion_length": 86.06428756713868, "epoch": 6.663909774436091, "grad_norm": 9.3125, "kl": 0.3179106771945953, "learning_rate": 1.6691729323308273e-06, "loss": 0.0318, "num_tokens": 118430577.0, "reward": -1.6130412936210632, "reward_std": 7.0431239128112795, "rewards/get_chromagram_reward": 0.6181690275669098, "rewards/get_chromagram_reward_std": 0.11314555704593658, "rewards/get_intelligibility_reward": -5.437689471244812, "rewards/get_intelligibility_reward_std": 11.172392845153809, "rewards/get_target_len_reward": -0.01960300365462899, "rewards/get_target_len_reward_std": 0.04749173801392317, "step": 8860 }, { "advantages": 5.066394344055425e-08, "advantages_std": 1.652000641822815, "clip_ratio": 0.0, "completion_length": 87.61964340209961, "epoch": 6.671428571428572, "grad_norm": 5.1875, "kl": 0.2891685277223587, "learning_rate": 1.6654135338345867e-06, "loss": 0.0312, "num_tokens": 118738849.0, "reward": -1.160328009724617, "reward_std": 6.376783657073974, "rewards/get_chromagram_reward": 0.6396682798862457, "rewards/get_chromagram_reward_std": 0.11093177422881126, "rewards/get_intelligibility_reward": -4.09717288017273, "rewards/get_intelligibility_reward_std": 10.407121753692627, "rewards/get_target_len_reward": -0.023479225765913724, "rewards/get_target_len_reward_std": 0.07763024345040322, "step": 8870 }, { "advantages": 2.1879872349472862e-07, "advantages_std": 1.5195560693740844, "clip_ratio": 0.0, "completion_length": 87.31547775268555, "epoch": 6.678947368421053, "grad_norm": 7.65625, "kl": 0.3144701659679413, "learning_rate": 1.6616541353383461e-06, "loss": 0.0347, "num_tokens": 119046694.0, "reward": -1.609968501329422, "reward_std": 6.308842182159424, "rewards/get_chromagram_reward": 0.6176996469497681, "rewards/get_chromagram_reward_std": 0.1137751154601574, "rewards/get_intelligibility_reward": -5.427005124092102, "rewards/get_intelligibility_reward_std": 9.80909767150879, "rewards/get_target_len_reward": -0.020599970314651727, "rewards/get_target_len_reward_std": 0.055216855742037295, "step": 8880 }, { "advantages": 5.116065864285701e-08, "advantages_std": 1.5125298976898194, "clip_ratio": 0.0, "completion_length": 86.89643020629883, "epoch": 6.686466165413534, "grad_norm": 5.9375, "kl": 0.31706361621618273, "learning_rate": 1.6578947368421053e-06, "loss": 0.038, "num_tokens": 119353470.0, "reward": -1.6176787674427033, "reward_std": 6.833755254745483, "rewards/get_chromagram_reward": 0.6201191484928131, "rewards/get_chromagram_reward_std": 0.11991398110985756, "rewards/get_intelligibility_reward": -5.450251662731171, "rewards/get_intelligibility_reward_std": 10.686319255828858, "rewards/get_target_len_reward": -0.02290344387292862, "rewards/get_target_len_reward_std": 0.06531772967427969, "step": 8890 }, { "advantages": -5.1657369048996316e-08, "advantages_std": 1.5619229674339294, "clip_ratio": 0.0, "completion_length": 90.20178833007813, "epoch": 6.693984962406015, "grad_norm": 7.03125, "kl": 0.33849948048591616, "learning_rate": 1.6541353383458648e-06, "loss": 0.0357, "num_tokens": 119669186.0, "reward": -1.3696957796812057, "reward_std": 6.409091806411743, "rewards/get_chromagram_reward": 0.6205049753189087, "rewards/get_chromagram_reward_std": 0.11114726960659027, "rewards/get_intelligibility_reward": -4.704723370075226, "rewards/get_intelligibility_reward_std": 10.246364498138428, "rewards/get_target_len_reward": -0.0248687282204628, "rewards/get_target_len_reward_std": 0.07564185373485088, "step": 8900 }, { "advantages": 4.21206185308165e-07, "advantages_std": 1.5767785668373109, "clip_ratio": 0.0, "completion_length": 87.05000152587891, "epoch": 6.701503759398497, "grad_norm": 5.78125, "kl": 1.301441177725792, "learning_rate": 1.6503759398496242e-06, "loss": 0.1376, "num_tokens": 119975757.0, "reward": -1.6824484169483185, "reward_std": 6.476547956466675, "rewards/get_chromagram_reward": 0.6236434578895569, "rewards/get_chromagram_reward_std": 0.12051276862621307, "rewards/get_intelligibility_reward": -5.6518255233764645, "rewards/get_intelligibility_reward_std": 9.937983512878418, "rewards/get_target_len_reward": -0.01916287373751402, "rewards/get_target_len_reward_std": 0.05058997441083193, "step": 8910 }, { "advantages": -2.3593503462038258e-08, "advantages_std": 1.636739730834961, "clip_ratio": 0.0, "completion_length": 83.20416793823242, "epoch": 6.709022556390978, "grad_norm": 6.71875, "kl": 0.34641715288162234, "learning_rate": 1.6466165413533836e-06, "loss": 0.0355, "num_tokens": 120272973.0, "reward": -1.6513566374778748, "reward_std": 7.347185850143433, "rewards/get_chromagram_reward": 0.6174445629119873, "rewards/get_chromagram_reward_std": 0.11873769238591195, "rewards/get_intelligibility_reward": -5.549295377731323, "rewards/get_intelligibility_reward_std": 11.645557308197022, "rewards/get_target_len_reward": -0.022218797355890274, "rewards/get_target_len_reward_std": 0.06003955211490393, "step": 8920 }, { "advantages": 7.276734050876144e-08, "advantages_std": 1.5402096390724183, "clip_ratio": 0.0, "completion_length": 88.8607177734375, "epoch": 6.716541353383459, "grad_norm": 7.40625, "kl": 0.32854454070329664, "learning_rate": 1.642857142857143e-06, "loss": 0.0394, "num_tokens": 120585221.0, "reward": -1.6409627513494343, "reward_std": 7.005185222625732, "rewards/get_chromagram_reward": 0.6250455319881439, "rewards/get_chromagram_reward_std": 0.12024707272648812, "rewards/get_intelligibility_reward": -5.524389547109604, "rewards/get_intelligibility_reward_std": 11.067870235443115, "rewards/get_target_len_reward": -0.023543883627280594, "rewards/get_target_len_reward_std": 0.05823461338877678, "step": 8930 }, { "advantages": 6.144245844552642e-07, "advantages_std": 1.5682287096977234, "clip_ratio": 0.0, "completion_length": 86.10595397949218, "epoch": 6.72406015037594, "grad_norm": 10.5, "kl": 0.3400934889912605, "learning_rate": 1.6390977443609025e-06, "loss": 0.038, "num_tokens": 120889444.0, "reward": -1.4442845374345779, "reward_std": 6.767345428466797, "rewards/get_chromagram_reward": 0.6401819050312042, "rewards/get_chromagram_reward_std": 0.12307184860110283, "rewards/get_intelligibility_reward": -4.949033164978028, "rewards/get_intelligibility_reward_std": 10.734640312194824, "rewards/get_target_len_reward": -0.02400211989879608, "rewards/get_target_len_reward_std": 0.06089174598455429, "step": 8940 }, { "advantages": -4.656613000975085e-07, "advantages_std": 1.56749826669693, "clip_ratio": 0.0, "completion_length": 85.18333435058594, "epoch": 6.731578947368421, "grad_norm": 7.65625, "kl": 0.39585251808166505, "learning_rate": 1.635338345864662e-06, "loss": 0.0463, "num_tokens": 121191124.0, "reward": -1.516963106393814, "reward_std": 6.727704811096191, "rewards/get_chromagram_reward": 0.6204110085964203, "rewards/get_chromagram_reward_std": 0.1156246043741703, "rewards/get_intelligibility_reward": -5.150197815895081, "rewards/get_intelligibility_reward_std": 10.64694356918335, "rewards/get_target_len_reward": -0.021102245897054672, "rewards/get_target_len_reward_std": 0.05872043874114752, "step": 8950 }, { "advantages": -2.510845703795894e-07, "advantages_std": 1.6418975114822387, "clip_ratio": 0.0, "completion_length": 82.9857162475586, "epoch": 6.739097744360902, "grad_norm": 6.75, "kl": 0.44420134127140043, "learning_rate": 1.6315789473684212e-06, "loss": 0.0469, "num_tokens": 121486792.0, "reward": -1.8509411275386811, "reward_std": 7.135700464248657, "rewards/get_chromagram_reward": 0.6175800204277039, "rewards/get_chromagram_reward_std": 0.10379650369286537, "rewards/get_intelligibility_reward": -6.14611246585846, "rewards/get_intelligibility_reward_std": 11.105937671661376, "rewards/get_target_len_reward": -0.024290661606937648, "rewards/get_target_len_reward_std": 0.0705668555572629, "step": 8960 }, { "advantages": 9.785095400616228e-08, "advantages_std": 1.5706232070922852, "clip_ratio": 0.0, "completion_length": 88.34821548461915, "epoch": 6.746616541353384, "grad_norm": 5.84375, "kl": 9.645041857659816, "learning_rate": 1.6278195488721806e-06, "loss": 0.9646, "num_tokens": 121798026.0, "reward": -1.499186635017395, "reward_std": 6.527169799804687, "rewards/get_chromagram_reward": 0.6356152951717376, "rewards/get_chromagram_reward_std": 0.114602829515934, "rewards/get_intelligibility_reward": -5.1114842891693115, "rewards/get_intelligibility_reward_std": 10.352026510238648, "rewards/get_target_len_reward": -0.021690726187080143, "rewards/get_target_len_reward_std": 0.05747928954660893, "step": 8970 }, { "advantages": -8.46137631071997e-07, "advantages_std": 1.6054083704948425, "clip_ratio": 0.0, "completion_length": 85.80952606201171, "epoch": 6.754135338345865, "grad_norm": 7.5625, "kl": 0.3294598564505577, "learning_rate": 1.62406015037594e-06, "loss": 0.0353, "num_tokens": 122101766.0, "reward": -1.4127657055854796, "reward_std": 6.778144502639771, "rewards/get_chromagram_reward": 0.6286609172821045, "rewards/get_chromagram_reward_std": 0.1091697208583355, "rewards/get_intelligibility_reward": -4.847491002082824, "rewards/get_intelligibility_reward_std": 10.899819278717041, "rewards/get_target_len_reward": -0.01946706180460751, "rewards/get_target_len_reward_std": 0.05438558142632246, "step": 8980 }, { "advantages": 3.8991357875772795e-08, "advantages_std": 1.491723620891571, "clip_ratio": 0.0, "completion_length": 84.7428581237793, "epoch": 6.761654135338346, "grad_norm": 8.875, "kl": 0.34233406037092207, "learning_rate": 1.6203007518796992e-06, "loss": 0.0357, "num_tokens": 122402651.0, "reward": -1.2671085568144917, "reward_std": 6.347675132751465, "rewards/get_chromagram_reward": 0.6184946298599243, "rewards/get_chromagram_reward_std": 0.11106136739253998, "rewards/get_intelligibility_reward": -4.399339348077774, "rewards/get_intelligibility_reward_std": 10.213108444213868, "rewards/get_target_len_reward": -0.020480694342404605, "rewards/get_target_len_reward_std": 0.06267086192965507, "step": 8990 }, { "advantages": -6.9538813818326165e-09, "advantages_std": 1.7154770612716674, "clip_ratio": 0.0, "completion_length": 87.94761962890625, "epoch": 6.769172932330827, "grad_norm": 11.5, "kl": 0.3620362639427185, "learning_rate": 1.6165413533834587e-06, "loss": 0.0425, "num_tokens": 122711859.0, "reward": -1.6059120416641235, "reward_std": 7.1333118915557865, "rewards/get_chromagram_reward": 0.6188837230205536, "rewards/get_chromagram_reward_std": 0.11426898092031479, "rewards/get_intelligibility_reward": -5.411304783821106, "rewards/get_intelligibility_reward_std": 11.316510009765626, "rewards/get_target_len_reward": -0.025314744096249342, "rewards/get_target_len_reward_std": 0.07575746681541204, "step": 9000 } ], "logging_steps": 10, "max_steps": 13300, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 24, "trial_name": null, "trial_params": null }