Instructions to use KEVIN04087/my-model with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- PEFT
How to use KEVIN04087/my-model with PEFT:
Base model is not found.
- Notebooks
- Google Colab
- Kaggle
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.3629489603024574, | |
| "eval_steps": 500, | |
| "global_step": 10000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 44.075, | |
| "epoch": 0.0023629489603024575, | |
| "format_reward": -1.75, | |
| "grad_norm": 0.179437518119812, | |
| "image_reward": 0.292385521862242, | |
| "kl": 0.0005639283277560026, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0818, | |
| "reward": -1.718647839128971, | |
| "reward_std": 2.0869705460965635, | |
| "rewards/reward_func": -1.718647839128971, | |
| "step": 10, | |
| "toxic_reward": 3.753792663415273 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 42.35, | |
| "epoch": 0.004725897920604915, | |
| "format_reward": -1.75, | |
| "grad_norm": 0.40540918707847595, | |
| "image_reward": 0.28610331267118455, | |
| "kl": 0.0006540146190673113, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0547, | |
| "reward": -0.9438592553138733, | |
| "reward_std": 3.9592867106199265, | |
| "rewards/reward_func": -0.9438592553138733, | |
| "step": 20, | |
| "toxic_reward": 3.622282150387764 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 41.35, | |
| "epoch": 0.007088846880907372, | |
| "format_reward": -2.5, | |
| "grad_norm": 0.3070058524608612, | |
| "image_reward": 0.29500325620174406, | |
| "kl": 0.0006122831255197525, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0196, | |
| "reward": -2.2396623373031614, | |
| "reward_std": 4.928562045097351, | |
| "rewards/reward_func": -2.2396623373031614, | |
| "step": 30, | |
| "toxic_reward": 3.3049886375665665 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 53.475, | |
| "epoch": 0.00945179584120983, | |
| "format_reward": -2.25, | |
| "grad_norm": 0.30812421441078186, | |
| "image_reward": 0.2784423798322678, | |
| "kl": 0.0007215021323645487, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0374, | |
| "reward": -1.9919262409210206, | |
| "reward_std": 3.2468371063470842, | |
| "rewards/reward_func": -1.9919262409210206, | |
| "step": 40, | |
| "toxic_reward": 4.284190082550049 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 42.0, | |
| "epoch": 0.011814744801512287, | |
| "format_reward": -2.25, | |
| "grad_norm": 0.7593560814857483, | |
| "image_reward": 0.3014272041618824, | |
| "kl": 0.0006584389615454711, | |
| "learning_rate": 5e-06, | |
| "loss": 0.2018, | |
| "reward": -2.2726588547229767, | |
| "reward_std": 3.8129764549434184, | |
| "rewards/reward_func": -2.2726588547229767, | |
| "step": 50, | |
| "toxic_reward": 3.694073647260666 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 51.475, | |
| "epoch": 0.014177693761814745, | |
| "format_reward": -3.75, | |
| "grad_norm": 0.46164554357528687, | |
| "image_reward": 0.24024454802274703, | |
| "kl": 0.000736865375074558, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1005, | |
| "reward": -3.6363322257995607, | |
| "reward_std": 5.34180793762207, | |
| "rewards/reward_func": -3.6363322257995607, | |
| "step": 60, | |
| "toxic_reward": 4.516654038429261 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 40.275, | |
| "epoch": 0.0165406427221172, | |
| "format_reward": -1.75, | |
| "grad_norm": 0.34076398611068726, | |
| "image_reward": 0.2581237778067589, | |
| "kl": 0.0006409274850739166, | |
| "learning_rate": 5e-06, | |
| "loss": -0.048, | |
| "reward": -1.306644481420517, | |
| "reward_std": 3.5849914638325573, | |
| "rewards/reward_func": -1.306644481420517, | |
| "step": 70, | |
| "toxic_reward": 4.222395324707032 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 41.05, | |
| "epoch": 0.01890359168241966, | |
| "format_reward": -1.75, | |
| "grad_norm": 0.2942235469818115, | |
| "image_reward": 0.2728251129388809, | |
| "kl": 0.00077872859837953, | |
| "learning_rate": 5e-06, | |
| "loss": -0.018, | |
| "reward": -1.0979918956756591, | |
| "reward_std": 3.421245375275612, | |
| "rewards/reward_func": -1.0979918956756591, | |
| "step": 80, | |
| "toxic_reward": 4.375229549407959 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 46.375, | |
| "epoch": 0.021266540642722116, | |
| "format_reward": -1.75, | |
| "grad_norm": 0.4680553376674652, | |
| "image_reward": 0.27020376589563155, | |
| "kl": 0.0006814575113821775, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1758, | |
| "reward": -1.8029783844947815, | |
| "reward_std": 2.909199387952685, | |
| "rewards/reward_func": -1.8029783844947815, | |
| "step": 90, | |
| "toxic_reward": 3.5180059373378754 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 32.975, | |
| "epoch": 0.023629489603024575, | |
| "format_reward": -1.75, | |
| "grad_norm": 0.5208232998847961, | |
| "image_reward": 0.28130289614200593, | |
| "kl": 0.0006654941505985334, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0732, | |
| "reward": -1.5811177730560302, | |
| "reward_std": 3.0347108453512193, | |
| "rewards/reward_func": -1.5811177730560302, | |
| "step": 100, | |
| "toxic_reward": 3.8031033158302305 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 59.575, | |
| "epoch": 0.02599243856332703, | |
| "format_reward": -3.5, | |
| "grad_norm": 0.5875898003578186, | |
| "image_reward": 0.2767374664545059, | |
| "kl": 0.0009529282746370882, | |
| "learning_rate": 5e-06, | |
| "loss": 0.013, | |
| "reward": -3.43455148935318, | |
| "reward_std": 5.033185955882073, | |
| "rewards/reward_func": -3.43455148935318, | |
| "step": 110, | |
| "toxic_reward": 3.8197044640779496 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 34.825, | |
| "epoch": 0.02835538752362949, | |
| "format_reward": -2.5, | |
| "grad_norm": 0.9147374629974365, | |
| "image_reward": 0.298614501953125, | |
| "kl": 0.0007633624511072413, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1077, | |
| "reward": -2.407980114221573, | |
| "reward_std": 4.146487069129944, | |
| "rewards/reward_func": -2.407980114221573, | |
| "step": 120, | |
| "toxic_reward": 3.8069980409410267 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 40.575, | |
| "epoch": 0.030718336483931945, | |
| "format_reward": -1.5, | |
| "grad_norm": 0.6123144626617432, | |
| "image_reward": 0.26710906128088635, | |
| "kl": 0.000945484999101609, | |
| "learning_rate": 5e-06, | |
| "loss": -0.033, | |
| "reward": -1.4210234582424164, | |
| "reward_std": 2.5487833991646767, | |
| "rewards/reward_func": -1.4210234582424164, | |
| "step": 130, | |
| "toxic_reward": 3.9784648021062217 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 36.325, | |
| "epoch": 0.0330812854442344, | |
| "format_reward": -1.5, | |
| "grad_norm": 0.35265249013900757, | |
| "image_reward": 0.2955657958984375, | |
| "kl": 0.001620796724455431, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0518, | |
| "reward": -1.1245046585798264, | |
| "reward_std": 3.641619694232941, | |
| "rewards/reward_func": -1.1245046585798264, | |
| "step": 140, | |
| "toxic_reward": 3.7418821096420287 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 37.325, | |
| "epoch": 0.03544423440453686, | |
| "format_reward": -1.75, | |
| "grad_norm": 0.6911599040031433, | |
| "image_reward": 0.301416015625, | |
| "kl": 0.0009025269537232816, | |
| "learning_rate": 5e-06, | |
| "loss": 0.3208, | |
| "reward": -1.707236361503601, | |
| "reward_std": 3.211209188401699, | |
| "rewards/reward_func": -1.707236361503601, | |
| "step": 150, | |
| "toxic_reward": 3.413761219382286 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 37.6, | |
| "epoch": 0.03780718336483932, | |
| "format_reward": -2.5, | |
| "grad_norm": 0.6072728037834167, | |
| "image_reward": 0.28253965079784393, | |
| "kl": 0.0016979283303953708, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0538, | |
| "reward": -1.9519330382347106, | |
| "reward_std": 3.5465006709098814, | |
| "rewards/reward_func": -1.9519330382347106, | |
| "step": 160, | |
| "toxic_reward": 4.008814732233684 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 41.7, | |
| "epoch": 0.04017013232514178, | |
| "format_reward": -2.5, | |
| "grad_norm": 0.9174755811691284, | |
| "image_reward": 0.2571976251072354, | |
| "kl": 0.002261338901007548, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1112, | |
| "reward": -2.123195892572403, | |
| "reward_std": 4.526358595490455, | |
| "rewards/reward_func": -2.123195892572403, | |
| "step": 170, | |
| "toxic_reward": 3.4624782469537525 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 42.25, | |
| "epoch": 0.04253308128544423, | |
| "format_reward": -3.5, | |
| "grad_norm": 0.6067785024642944, | |
| "image_reward": 0.26939900666475297, | |
| "kl": 0.0012992891133762896, | |
| "learning_rate": 5e-06, | |
| "loss": 0.2051, | |
| "reward": -3.432029390335083, | |
| "reward_std": 5.464101791381836, | |
| "rewards/reward_func": -3.432029390335083, | |
| "step": 180, | |
| "toxic_reward": 3.7212570786476133 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 47.175, | |
| "epoch": 0.04489603024574669, | |
| "format_reward": -1.5, | |
| "grad_norm": 0.32170766592025757, | |
| "image_reward": 0.2965630425347222, | |
| "kl": 0.00527564455405809, | |
| "learning_rate": 5e-06, | |
| "loss": -0.1016, | |
| "reward": -1.44393031001091, | |
| "reward_std": 3.0585690192878245, | |
| "rewards/reward_func": -1.44393031001091, | |
| "step": 190, | |
| "toxic_reward": 4.017925447887844 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 38.775, | |
| "epoch": 0.04725897920604915, | |
| "format_reward": -2.0, | |
| "grad_norm": 0.5049771070480347, | |
| "image_reward": 0.28580220490694047, | |
| "kl": 0.003515976545168087, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0836, | |
| "reward": -1.8655982911586761, | |
| "reward_std": 3.0476409645751117, | |
| "rewards/reward_func": -1.8655982911586761, | |
| "step": 200, | |
| "toxic_reward": 3.8450406193733215 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 37.05, | |
| "epoch": 0.04962192816635161, | |
| "format_reward": -2.0, | |
| "grad_norm": 0.38584810495376587, | |
| "image_reward": 0.2693684895833333, | |
| "kl": 0.0034694685833528637, | |
| "learning_rate": 5e-06, | |
| "loss": 0.2267, | |
| "reward": -2.040862238407135, | |
| "reward_std": 3.0185029461979864, | |
| "rewards/reward_func": -2.040862238407135, | |
| "step": 210, | |
| "toxic_reward": 4.567277669906616 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 43.325, | |
| "epoch": 0.05198487712665406, | |
| "format_reward": -2.25, | |
| "grad_norm": 0.7845410108566284, | |
| "image_reward": 0.27211100459098814, | |
| "kl": 0.0027843258751090614, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0217, | |
| "reward": -1.9688808619976044, | |
| "reward_std": 4.326950389891863, | |
| "rewards/reward_func": -1.9688808619976044, | |
| "step": 220, | |
| "toxic_reward": 3.998746132850647 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 38.475, | |
| "epoch": 0.05434782608695652, | |
| "format_reward": -1.5, | |
| "grad_norm": 0.28465747833251953, | |
| "image_reward": 0.28180135041475296, | |
| "kl": 0.0029356992337852715, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1949, | |
| "reward": -1.8664621770381928, | |
| "reward_std": 3.3784094207920132, | |
| "rewards/reward_func": -1.8664621770381928, | |
| "step": 230, | |
| "toxic_reward": 3.4729531943798064 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 61.9, | |
| "epoch": 0.05671077504725898, | |
| "format_reward": -3.25, | |
| "grad_norm": 0.42949026823043823, | |
| "image_reward": 0.27780679166316985, | |
| "kl": 0.00391890910686925, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0808, | |
| "reward": -2.961669445037842, | |
| "reward_std": 4.58679872751236, | |
| "rewards/reward_func": -2.961669445037842, | |
| "step": 240, | |
| "toxic_reward": 3.08537415266037 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 43.8, | |
| "epoch": 0.05907372400756144, | |
| "format_reward": -2.25, | |
| "grad_norm": 0.9381951093673706, | |
| "image_reward": 0.276055908203125, | |
| "kl": 0.017148628836730496, | |
| "learning_rate": 5e-06, | |
| "loss": -0.2303, | |
| "reward": -2.1199170768260958, | |
| "reward_std": 3.3072034239768984, | |
| "rewards/reward_func": -2.1199170768260958, | |
| "step": 250, | |
| "toxic_reward": 3.4872416734695433 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 45.7, | |
| "epoch": 0.06143667296786389, | |
| "format_reward": -0.75, | |
| "grad_norm": 1.0572214126586914, | |
| "image_reward": 0.26918131560087205, | |
| "kl": 0.0040537358960136775, | |
| "learning_rate": 5e-06, | |
| "loss": -0.1228, | |
| "reward": -0.41667274236679075, | |
| "reward_std": 1.9968392252922058, | |
| "rewards/reward_func": -0.41667274236679075, | |
| "step": 260, | |
| "toxic_reward": 4.103001546859741 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 31.325, | |
| "epoch": 0.06379962192816635, | |
| "format_reward": -1.75, | |
| "grad_norm": 0.33211401104927063, | |
| "image_reward": 0.26322936862707136, | |
| "kl": 0.010905979719245807, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1079, | |
| "reward": -1.2869422495365144, | |
| "reward_std": 3.4389497309923174, | |
| "rewards/reward_func": -1.2869422495365144, | |
| "step": 270, | |
| "toxic_reward": 4.132273650169372 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 56.725, | |
| "epoch": 0.0661625708884688, | |
| "format_reward": -1.0, | |
| "grad_norm": 1.1315058469772339, | |
| "image_reward": 0.28337690565321183, | |
| "kl": 0.003939477750100196, | |
| "learning_rate": 5e-06, | |
| "loss": 0.3379, | |
| "reward": -1.081434178352356, | |
| "reward_std": 1.8980566158890724, | |
| "rewards/reward_func": -1.081434178352356, | |
| "step": 280, | |
| "toxic_reward": 4.38276841905382 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 31.3, | |
| "epoch": 0.06852551984877127, | |
| "format_reward": -1.75, | |
| "grad_norm": 0.35049131512641907, | |
| "image_reward": 0.27892303466796875, | |
| "kl": 0.04790264330804348, | |
| "learning_rate": 5e-06, | |
| "loss": -0.1138, | |
| "reward": -1.20261852145195, | |
| "reward_std": 3.7090243451297282, | |
| "rewards/reward_func": -1.20261852145195, | |
| "step": 290, | |
| "toxic_reward": 3.994084894657135 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 51.925, | |
| "epoch": 0.07088846880907372, | |
| "format_reward": -2.0, | |
| "grad_norm": 0.5147161483764648, | |
| "image_reward": 0.290887451171875, | |
| "kl": 0.008263002592138946, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0859, | |
| "reward": -1.9743857204914093, | |
| "reward_std": 3.558365413546562, | |
| "rewards/reward_func": -1.9743857204914093, | |
| "step": 300, | |
| "toxic_reward": 3.2862678617239 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 35.4, | |
| "epoch": 0.07325141776937619, | |
| "format_reward": -0.25, | |
| "grad_norm": 0.4911198616027832, | |
| "image_reward": 0.27863413393497466, | |
| "kl": 0.0026858947356231512, | |
| "learning_rate": 5e-06, | |
| "loss": -0.1385, | |
| "reward": -0.005173623561859131, | |
| "reward_std": 1.2043775863945485, | |
| "rewards/reward_func": -0.005173623561859131, | |
| "step": 310, | |
| "toxic_reward": 4.392678713798523 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 49.925, | |
| "epoch": 0.07561436672967864, | |
| "format_reward": -2.5, | |
| "grad_norm": 0.7166872024536133, | |
| "image_reward": 0.2722563561466005, | |
| "kl": 0.0117031121510081, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1367, | |
| "reward": -2.320690667629242, | |
| "reward_std": 3.535179616510868, | |
| "rewards/reward_func": -2.320690667629242, | |
| "step": 320, | |
| "toxic_reward": 3.1776589486334057 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 36.3, | |
| "epoch": 0.07797731568998109, | |
| "format_reward": -0.75, | |
| "grad_norm": 0.2700420618057251, | |
| "image_reward": 0.29552409052848816, | |
| "kl": 0.017672599526122212, | |
| "learning_rate": 5e-06, | |
| "loss": -0.1148, | |
| "reward": -0.291591414809227, | |
| "reward_std": 2.016152049601078, | |
| "rewards/reward_func": -0.291591414809227, | |
| "step": 330, | |
| "toxic_reward": 3.240800154209137 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 40.525, | |
| "epoch": 0.08034026465028356, | |
| "format_reward": -1.75, | |
| "grad_norm": 0.4268760085105896, | |
| "image_reward": 0.29228312373161314, | |
| "kl": 0.0068239012965932485, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1467, | |
| "reward": -1.9235587894916535, | |
| "reward_std": 2.7463727177120747, | |
| "rewards/reward_func": -1.9235587894916535, | |
| "step": 340, | |
| "toxic_reward": 3.535431480407715 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 64.05, | |
| "epoch": 0.08270321361058601, | |
| "format_reward": -1.5, | |
| "grad_norm": 0.6083372235298157, | |
| "image_reward": 0.2844095855951309, | |
| "kl": 0.014765451126731933, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0278, | |
| "reward": -1.293799924850464, | |
| "reward_std": 3.273481422662735, | |
| "rewards/reward_func": -1.293799924850464, | |
| "step": 350, | |
| "toxic_reward": 3.966331052780151 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 50.65, | |
| "epoch": 0.08506616257088846, | |
| "format_reward": -1.25, | |
| "grad_norm": 0.72890704870224, | |
| "image_reward": 0.2809214279055595, | |
| "kl": 0.013393631461076439, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0463, | |
| "reward": -1.2338840126991273, | |
| "reward_std": 2.9229114189743997, | |
| "rewards/reward_func": -1.2338840126991273, | |
| "step": 360, | |
| "toxic_reward": 3.495128685235977 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 40.5, | |
| "epoch": 0.08742911153119093, | |
| "format_reward": -1.0, | |
| "grad_norm": 0.19754794239997864, | |
| "image_reward": 0.26820373386144636, | |
| "kl": 0.009714199486188591, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0682, | |
| "reward": -0.5900760173797608, | |
| "reward_std": 2.301849504513666, | |
| "rewards/reward_func": -0.5900760173797608, | |
| "step": 370, | |
| "toxic_reward": 4.155627632141114 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 43.325, | |
| "epoch": 0.08979206049149338, | |
| "format_reward": -1.0, | |
| "grad_norm": 0.7548431158065796, | |
| "image_reward": 0.2744639068841934, | |
| "kl": 0.02220306231174618, | |
| "learning_rate": 5e-06, | |
| "loss": 0.2135, | |
| "reward": -1.0681762412190436, | |
| "reward_std": 2.1873259781859815, | |
| "rewards/reward_func": -1.0681762412190436, | |
| "step": 380, | |
| "toxic_reward": 3.818178777396679 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 38.75, | |
| "epoch": 0.09215500945179585, | |
| "format_reward": -1.5, | |
| "grad_norm": 1.6385910511016846, | |
| "image_reward": 0.28105672299861906, | |
| "kl": 0.010323460912331939, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1059, | |
| "reward": -1.3754307508468628, | |
| "reward_std": 3.023836246691644, | |
| "rewards/reward_func": -1.3754307508468628, | |
| "step": 390, | |
| "toxic_reward": 4.011180245876313 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 45.55, | |
| "epoch": 0.0945179584120983, | |
| "format_reward": -2.0, | |
| "grad_norm": 0.8115288615226746, | |
| "image_reward": 0.28032633662223816, | |
| "kl": 0.05955924341687933, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0819, | |
| "reward": -1.5853845477104187, | |
| "reward_std": 4.042920933663845, | |
| "rewards/reward_func": -1.5853845477104187, | |
| "step": 400, | |
| "toxic_reward": 3.5872471928596497 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 36.675, | |
| "epoch": 0.09688090737240075, | |
| "format_reward": -0.75, | |
| "grad_norm": 0.5388673543930054, | |
| "image_reward": 0.2691446923547321, | |
| "kl": 0.007679732237011194, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0322, | |
| "reward": -0.41340800523757937, | |
| "reward_std": 2.081881234049797, | |
| "rewards/reward_func": -0.41340800523757937, | |
| "step": 410, | |
| "toxic_reward": 3.857707765367296 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 52.125, | |
| "epoch": 0.09924385633270322, | |
| "format_reward": -1.75, | |
| "grad_norm": 0.2760399281978607, | |
| "image_reward": 0.2856099456548691, | |
| "kl": 0.06441240075509995, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1586, | |
| "reward": -1.576817613840103, | |
| "reward_std": 3.0979749940335752, | |
| "rewards/reward_func": -1.576817613840103, | |
| "step": 420, | |
| "toxic_reward": 3.6233551859855653 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 42.9, | |
| "epoch": 0.10160680529300567, | |
| "format_reward": -2.25, | |
| "grad_norm": 0.8334791660308838, | |
| "image_reward": 0.314910888671875, | |
| "kl": 0.13873190036974847, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0671, | |
| "reward": -1.9813659265637398, | |
| "reward_std": 3.193006566166878, | |
| "rewards/reward_func": -1.9813659265637398, | |
| "step": 430, | |
| "toxic_reward": 3.297185143828392 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 41.6, | |
| "epoch": 0.10396975425330812, | |
| "format_reward": -0.25, | |
| "grad_norm": 0.8734163045883179, | |
| "image_reward": 0.2913574203848839, | |
| "kl": 0.01563742496073246, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0094, | |
| "reward": 0.012267284095287323, | |
| "reward_std": 1.1554903835058212, | |
| "rewards/reward_func": 0.012267284095287323, | |
| "step": 440, | |
| "toxic_reward": 2.416472536325455 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 36.825, | |
| "epoch": 0.10633270321361059, | |
| "format_reward": -1.5, | |
| "grad_norm": 1.1138451099395752, | |
| "image_reward": 0.28591206669807434, | |
| "kl": 0.022654308984056116, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0957, | |
| "reward": -1.2695215404033662, | |
| "reward_std": 2.8351699322462083, | |
| "rewards/reward_func": -1.2695215404033662, | |
| "step": 450, | |
| "toxic_reward": 3.3260442495346068 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 33.475, | |
| "epoch": 0.10869565217391304, | |
| "format_reward": -0.25, | |
| "grad_norm": 0.6227550506591797, | |
| "image_reward": 0.2807729095220566, | |
| "kl": 0.007865939987823367, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0179, | |
| "reward": 1.1717996835708617, | |
| "reward_std": 1.5977750271558762, | |
| "rewards/reward_func": 1.1717996835708617, | |
| "step": 460, | |
| "toxic_reward": 3.309050977230072 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 34.275, | |
| "epoch": 0.1110586011342155, | |
| "format_reward": -1.0, | |
| "grad_norm": 0.3605867624282837, | |
| "image_reward": 0.3162755310535431, | |
| "kl": 0.02495001317001879, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0852, | |
| "reward": -1.0550554990768433, | |
| "reward_std": 2.7155043721199035, | |
| "rewards/reward_func": -1.0550554990768433, | |
| "step": 470, | |
| "toxic_reward": 3.4480915129184724 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 43.0, | |
| "epoch": 0.11342155009451796, | |
| "format_reward": -0.5, | |
| "grad_norm": 0.3204725980758667, | |
| "image_reward": 0.32206115424633025, | |
| "kl": 0.011832635500468314, | |
| "learning_rate": 5e-06, | |
| "loss": 0.032, | |
| "reward": -0.29420808106660845, | |
| "reward_std": 1.479024769924581, | |
| "rewards/reward_func": -0.29420808106660845, | |
| "step": 480, | |
| "toxic_reward": 3.5640476822853087 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 49.2, | |
| "epoch": 0.11578449905482041, | |
| "format_reward": -1.75, | |
| "grad_norm": 0.6204938888549805, | |
| "image_reward": 0.26227518618106843, | |
| "kl": 0.03589183106087148, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1186, | |
| "reward": -1.7148385405540467, | |
| "reward_std": 3.0656426630914213, | |
| "rewards/reward_func": -1.7148385405540467, | |
| "step": 490, | |
| "toxic_reward": 4.371080112457276 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 52.925, | |
| "epoch": 0.11814744801512288, | |
| "format_reward": -1.75, | |
| "grad_norm": 0.7287388443946838, | |
| "image_reward": 0.2700037628412247, | |
| "kl": 0.045285335322842, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0265, | |
| "reward": -1.4807079195976258, | |
| "reward_std": 3.789501038193703, | |
| "rewards/reward_func": -1.4807079195976258, | |
| "step": 500, | |
| "toxic_reward": 3.9034363865852355 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 35.75, | |
| "epoch": 0.12051039697542533, | |
| "format_reward": -1.0, | |
| "grad_norm": 0.8750075697898865, | |
| "image_reward": 0.2932400173611111, | |
| "kl": 0.15442988513968886, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0218, | |
| "reward": -0.7209997951984406, | |
| "reward_std": 1.843582271039486, | |
| "rewards/reward_func": -0.7209997951984406, | |
| "step": 510, | |
| "toxic_reward": 3.8813175095452204 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 44.125, | |
| "epoch": 0.12287334593572778, | |
| "format_reward": -0.5, | |
| "grad_norm": 0.4498269259929657, | |
| "image_reward": 0.29094645082950593, | |
| "kl": 0.01979847764596343, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0324, | |
| "reward": -0.40604341179132464, | |
| "reward_std": 1.4113173604011535, | |
| "rewards/reward_func": -0.40604341179132464, | |
| "step": 520, | |
| "toxic_reward": 4.314427596330643 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 34.025, | |
| "epoch": 0.12523629489603025, | |
| "format_reward": -1.0, | |
| "grad_norm": 1.7480149269104004, | |
| "image_reward": 0.2472829192876816, | |
| "kl": 0.0556537595577538, | |
| "learning_rate": 5e-06, | |
| "loss": -0.1291, | |
| "reward": -1.0481307327747345, | |
| "reward_std": 2.317431343346834, | |
| "rewards/reward_func": -1.0481307327747345, | |
| "step": 530, | |
| "toxic_reward": 4.515345811843872 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 36.2, | |
| "epoch": 0.1275992438563327, | |
| "format_reward": -1.25, | |
| "grad_norm": 0.39433184266090393, | |
| "image_reward": 0.2873850494623184, | |
| "kl": 0.039984302362427115, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0301, | |
| "reward": -0.2359391689300537, | |
| "reward_std": 2.863342150300741, | |
| "rewards/reward_func": -0.2359391689300537, | |
| "step": 540, | |
| "toxic_reward": 3.940987694263458 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 42.15, | |
| "epoch": 0.12996219281663515, | |
| "format_reward": -1.25, | |
| "grad_norm": 2.7985472679138184, | |
| "image_reward": 0.30071309208869934, | |
| "kl": 0.0283741801045835, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0278, | |
| "reward": -0.4430400252342224, | |
| "reward_std": 2.761640505492687, | |
| "rewards/reward_func": -0.4430400252342224, | |
| "step": 550, | |
| "toxic_reward": 3.235564041137695 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 39.525, | |
| "epoch": 0.1323251417769376, | |
| "format_reward": -1.0, | |
| "grad_norm": 1.208016037940979, | |
| "image_reward": 0.29123942106962203, | |
| "kl": 0.03811377864331007, | |
| "learning_rate": 5e-06, | |
| "loss": 0.2144, | |
| "reward": -1.2057244956493378, | |
| "reward_std": 2.0336616799235343, | |
| "rewards/reward_func": -1.2057244956493378, | |
| "step": 560, | |
| "toxic_reward": 3.977510142326355 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 27.55, | |
| "epoch": 0.13468809073724008, | |
| "format_reward": -1.5, | |
| "grad_norm": 0.8842714428901672, | |
| "image_reward": 0.2724670395255089, | |
| "kl": 0.07012159014120697, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1684, | |
| "reward": -1.302715817093849, | |
| "reward_std": 3.6691504657268523, | |
| "rewards/reward_func": -1.302715817093849, | |
| "step": 570, | |
| "toxic_reward": 3.246229815483093 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 51.6, | |
| "epoch": 0.13705103969754254, | |
| "format_reward": -0.75, | |
| "grad_norm": 0.7157159447669983, | |
| "image_reward": 0.299871826171875, | |
| "kl": 0.028781934920698405, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0308, | |
| "reward": 0.11744136810302734, | |
| "reward_std": 2.1306695722043516, | |
| "rewards/reward_func": 0.11744136810302734, | |
| "step": 580, | |
| "toxic_reward": 3.35184041261673 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 41.475, | |
| "epoch": 0.139413988657845, | |
| "format_reward": -0.25, | |
| "grad_norm": 0.4593754708766937, | |
| "image_reward": 0.2574858499897851, | |
| "kl": 0.05173348039388657, | |
| "learning_rate": 5e-06, | |
| "loss": -0.1241, | |
| "reward": 0.452265202999115, | |
| "reward_std": 1.2885668274015187, | |
| "rewards/reward_func": 0.452265202999115, | |
| "step": 590, | |
| "toxic_reward": 3.4634872145122952 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 35.325, | |
| "epoch": 0.14177693761814744, | |
| "format_reward": -1.0, | |
| "grad_norm": 0.5869470834732056, | |
| "image_reward": 0.26802266389131546, | |
| "kl": 0.2022853755392134, | |
| "learning_rate": 5e-06, | |
| "loss": -0.1205, | |
| "reward": -0.9757636785507202, | |
| "reward_std": 2.408064843714237, | |
| "rewards/reward_func": -0.9757636785507202, | |
| "step": 600, | |
| "toxic_reward": 4.45868456363678 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 39.45, | |
| "epoch": 0.1441398865784499, | |
| "format_reward": -2.75, | |
| "grad_norm": 1.1131778955459595, | |
| "image_reward": 0.26167353987693787, | |
| "kl": 0.16366879558190703, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0012, | |
| "reward": -2.7253461956977842, | |
| "reward_std": 4.713953969441354, | |
| "rewards/reward_func": -2.7253461956977842, | |
| "step": 610, | |
| "toxic_reward": 3.5585821866989136 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 40.65, | |
| "epoch": 0.14650283553875237, | |
| "format_reward": -1.0, | |
| "grad_norm": 1.6662554740905762, | |
| "image_reward": 0.2821828216314316, | |
| "kl": 0.1423144882544875, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0443, | |
| "reward": -0.9905034899711609, | |
| "reward_std": 2.6423311533406375, | |
| "rewards/reward_func": -0.9905034899711609, | |
| "step": 620, | |
| "toxic_reward": 4.095821046829224 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 55.85, | |
| "epoch": 0.14886578449905483, | |
| "format_reward": -1.0, | |
| "grad_norm": 18.956981658935547, | |
| "image_reward": 0.28932088166475295, | |
| "kl": 0.41657317453064024, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0324, | |
| "reward": -0.8243820607662201, | |
| "reward_std": 2.0909267283976076, | |
| "rewards/reward_func": -0.8243820607662201, | |
| "step": 630, | |
| "toxic_reward": 3.2601676136255264 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 29.875, | |
| "epoch": 0.15122873345935728, | |
| "format_reward": -0.75, | |
| "grad_norm": 1.4686508178710938, | |
| "image_reward": 0.29945882111787797, | |
| "kl": 0.28281182143837214, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0769, | |
| "reward": -0.4713120386004448, | |
| "reward_std": 1.791446179151535, | |
| "rewards/reward_func": -0.4713120386004448, | |
| "step": 640, | |
| "toxic_reward": 3.3351209998130797 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 37.725, | |
| "epoch": 0.15359168241965973, | |
| "format_reward": -0.5, | |
| "grad_norm": 2.9935286045074463, | |
| "image_reward": 0.2910970068640179, | |
| "kl": 1.0141649260884151, | |
| "learning_rate": 5e-06, | |
| "loss": -0.2174, | |
| "reward": -0.5139556050300598, | |
| "reward_std": 1.0858815148472787, | |
| "rewards/reward_func": -0.5139556050300598, | |
| "step": 650, | |
| "toxic_reward": 4.207416137059529 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 39.2, | |
| "epoch": 0.15595463137996218, | |
| "format_reward": -0.75, | |
| "grad_norm": 3.4974160194396973, | |
| "image_reward": 0.29859237670898436, | |
| "kl": 0.03742524515837431, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0491, | |
| "reward": -0.8688022553920746, | |
| "reward_std": 1.9190378237515688, | |
| "rewards/reward_func": -0.8688022553920746, | |
| "step": 660, | |
| "toxic_reward": 3.639171451330185 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 44.625, | |
| "epoch": 0.15831758034026466, | |
| "format_reward": -0.75, | |
| "grad_norm": 0.6731751561164856, | |
| "image_reward": 0.2705291733145714, | |
| "kl": 0.1289379763416946, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0339, | |
| "reward": -0.5425865709781647, | |
| "reward_std": 2.217602302134037, | |
| "rewards/reward_func": -0.5425865709781647, | |
| "step": 670, | |
| "toxic_reward": 3.7739344239234924 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 49.9, | |
| "epoch": 0.16068052930056712, | |
| "format_reward": -1.0, | |
| "grad_norm": 0.6705069541931152, | |
| "image_reward": 0.2828119918704033, | |
| "kl": 0.09238320724107325, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0765, | |
| "reward": -0.3722410202026367, | |
| "reward_std": 1.9134121721610426, | |
| "rewards/reward_func": -0.3722410202026367, | |
| "step": 680, | |
| "toxic_reward": 4.390137553215027 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 51.225, | |
| "epoch": 0.16304347826086957, | |
| "format_reward": -1.0, | |
| "grad_norm": 2.7068045139312744, | |
| "image_reward": 0.27732340693473817, | |
| "kl": 0.06089744158089161, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0203, | |
| "reward": -0.6177265048027039, | |
| "reward_std": 2.210049830470234, | |
| "rewards/reward_func": -0.6177265048027039, | |
| "step": 690, | |
| "toxic_reward": 3.5699973523616793 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 46.125, | |
| "epoch": 0.16540642722117202, | |
| "format_reward": -1.25, | |
| "grad_norm": 3.031416654586792, | |
| "image_reward": 0.2965891510248184, | |
| "kl": 0.8002684944309294, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0719, | |
| "reward": -0.29744131565093995, | |
| "reward_std": 2.741807485371828, | |
| "rewards/reward_func": -0.29744131565093995, | |
| "step": 700, | |
| "toxic_reward": 3.4483383893966675 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 35.975, | |
| "epoch": 0.16776937618147447, | |
| "format_reward": -0.25, | |
| "grad_norm": 3.4755773544311523, | |
| "image_reward": 0.2723083525896072, | |
| "kl": 0.24097473481670023, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0494, | |
| "reward": -0.21520038843154907, | |
| "reward_std": 0.7798372395336628, | |
| "rewards/reward_func": -0.21520038843154907, | |
| "step": 710, | |
| "toxic_reward": 4.5303761720657345 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 43.725, | |
| "epoch": 0.17013232514177692, | |
| "format_reward": -0.25, | |
| "grad_norm": 1.2503156661987305, | |
| "image_reward": 0.27466329038143156, | |
| "kl": 0.2257185777183622, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0733, | |
| "reward": 0.11292819976806641, | |
| "reward_std": 1.212121632695198, | |
| "rewards/reward_func": 0.11292819976806641, | |
| "step": 720, | |
| "toxic_reward": 4.0655577898025514 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 39.225, | |
| "epoch": 0.1724952741020794, | |
| "format_reward": -1.5, | |
| "grad_norm": 7.7392988204956055, | |
| "image_reward": 0.2492055267095566, | |
| "kl": 0.37416572365909817, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0225, | |
| "reward": -1.0509216010570526, | |
| "reward_std": 3.409189415350556, | |
| "rewards/reward_func": -1.0509216010570526, | |
| "step": 730, | |
| "toxic_reward": 4.022808003425598 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 28.525, | |
| "epoch": 0.17485822306238186, | |
| "format_reward": -0.5, | |
| "grad_norm": 4.889242172241211, | |
| "image_reward": 0.30042317807674407, | |
| "kl": 0.22789150793105364, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0569, | |
| "reward": -0.2479497730731964, | |
| "reward_std": 1.3530383894219995, | |
| "rewards/reward_func": -0.2479497730731964, | |
| "step": 740, | |
| "toxic_reward": 3.774165117740631 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 50.55, | |
| "epoch": 0.1772211720226843, | |
| "format_reward": -1.5, | |
| "grad_norm": 16.729528427124023, | |
| "image_reward": 0.273948161303997, | |
| "kl": 0.43975371681153774, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1103, | |
| "reward": -1.793390053510666, | |
| "reward_std": 3.0602585028856994, | |
| "rewards/reward_func": -1.793390053510666, | |
| "step": 750, | |
| "toxic_reward": 3.111769822239876 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 42.675, | |
| "epoch": 0.17958412098298676, | |
| "format_reward": -0.25, | |
| "grad_norm": 10.731781005859375, | |
| "image_reward": 0.26650288701057434, | |
| "kl": 0.6582286342978477, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1081, | |
| "reward": 0.10775105953216553, | |
| "reward_std": 1.3219802690669895, | |
| "rewards/reward_func": 0.10775105953216553, | |
| "step": 760, | |
| "toxic_reward": 4.1322005033493046 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 44.0, | |
| "epoch": 0.1819470699432892, | |
| "format_reward": -0.75, | |
| "grad_norm": 4.2282633781433105, | |
| "image_reward": 0.28914388120174406, | |
| "kl": 0.7939867446199059, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0704, | |
| "reward": -0.24524924755096436, | |
| "reward_std": 2.0771213214844466, | |
| "rewards/reward_func": -0.24524924755096436, | |
| "step": 770, | |
| "toxic_reward": 3.9121114134788515 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 55.975, | |
| "epoch": 0.1843100189035917, | |
| "format_reward": -0.5, | |
| "grad_norm": 8.486693382263184, | |
| "image_reward": 0.246868896484375, | |
| "kl": 1.14481502994895, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0032, | |
| "reward": 0.28170942068099974, | |
| "reward_std": 2.0574716079980133, | |
| "rewards/reward_func": 0.28170942068099974, | |
| "step": 780, | |
| "toxic_reward": 3.4702104151248934 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 36.15, | |
| "epoch": 0.18667296786389415, | |
| "format_reward": -0.75, | |
| "grad_norm": 27.51862907409668, | |
| "image_reward": 0.26758320927619933, | |
| "kl": 1.0921552445739509, | |
| "learning_rate": 5e-06, | |
| "loss": -0.3259, | |
| "reward": -0.5566600695252418, | |
| "reward_std": 1.7622592605650425, | |
| "rewards/reward_func": -0.5566600695252418, | |
| "step": 790, | |
| "toxic_reward": 3.4233752876520156 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 35.45, | |
| "epoch": 0.1890359168241966, | |
| "format_reward": -0.5, | |
| "grad_norm": 4.040957927703857, | |
| "image_reward": 0.3153462767601013, | |
| "kl": 1.9678303502500056, | |
| "learning_rate": 5e-06, | |
| "loss": -0.1665, | |
| "reward": -0.010482311248779297, | |
| "reward_std": 1.1518827967345715, | |
| "rewards/reward_func": -0.010482311248779297, | |
| "step": 800, | |
| "toxic_reward": 3.6056110084056856 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 41.5, | |
| "epoch": 0.19139886578449905, | |
| "format_reward": -0.25, | |
| "grad_norm": 12.718086242675781, | |
| "image_reward": 0.27923176884651185, | |
| "kl": 0.9990547701716423, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0447, | |
| "reward": 0.1995850086212158, | |
| "reward_std": 1.246943424642086, | |
| "rewards/reward_func": 0.1995850086212158, | |
| "step": 810, | |
| "toxic_reward": 3.635990482568741 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 37.525, | |
| "epoch": 0.1937618147448015, | |
| "format_reward": -1.25, | |
| "grad_norm": 5.244020938873291, | |
| "image_reward": 0.27026468962430955, | |
| "kl": 2.5741087660193442, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0569, | |
| "reward": -1.3374125480651855, | |
| "reward_std": 2.818611039035022, | |
| "rewards/reward_func": -1.3374125480651855, | |
| "step": 820, | |
| "toxic_reward": 4.255197846889496 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 43.25, | |
| "epoch": 0.19612476370510398, | |
| "format_reward": -0.25, | |
| "grad_norm": 1.3633440732955933, | |
| "image_reward": 0.29616292417049406, | |
| "kl": 0.48451304286718366, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1053, | |
| "reward": -0.34738388657569885, | |
| "reward_std": 0.9195286151021719, | |
| "rewards/reward_func": -0.34738388657569885, | |
| "step": 830, | |
| "toxic_reward": 4.384462606906891 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 35.75, | |
| "epoch": 0.19848771266540643, | |
| "format_reward": -0.75, | |
| "grad_norm": 6.93122673034668, | |
| "image_reward": 0.2948842361569405, | |
| "kl": 0.3984289012849331, | |
| "learning_rate": 5e-06, | |
| "loss": 0.007, | |
| "reward": -0.4061413824558258, | |
| "reward_std": 2.115474058687687, | |
| "rewards/reward_func": -0.4061413824558258, | |
| "step": 840, | |
| "toxic_reward": 2.784619116783142 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 58.85, | |
| "epoch": 0.2008506616257089, | |
| "format_reward": -1.0, | |
| "grad_norm": 11.167367935180664, | |
| "image_reward": 0.2535125732421875, | |
| "kl": 0.7260896906256675, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0252, | |
| "reward": -0.6900001287460327, | |
| "reward_std": 2.5411489391699433, | |
| "rewards/reward_func": -0.6900001287460327, | |
| "step": 850, | |
| "toxic_reward": 3.902221655845642 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 37.35, | |
| "epoch": 0.20321361058601134, | |
| "format_reward": -0.25, | |
| "grad_norm": 12.129627227783203, | |
| "image_reward": 0.25641682744026184, | |
| "kl": 0.5523816287517548, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0869, | |
| "reward": 0.027270352840423583, | |
| "reward_std": 1.1594479020684958, | |
| "rewards/reward_func": 0.027270352840423583, | |
| "step": 860, | |
| "toxic_reward": 4.19142780303955 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 50.75, | |
| "epoch": 0.2055765595463138, | |
| "format_reward": -1.0, | |
| "grad_norm": 25.523523330688477, | |
| "image_reward": 0.28674203488561845, | |
| "kl": 1.1298049300909043, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0639, | |
| "reward": -1.0763263344764709, | |
| "reward_std": 1.7480091962963342, | |
| "rewards/reward_func": -1.0763263344764709, | |
| "step": 870, | |
| "toxic_reward": 4.468152364095052 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 35.575, | |
| "epoch": 0.20793950850661624, | |
| "format_reward": -1.0, | |
| "grad_norm": 3.8387675285339355, | |
| "image_reward": 0.26868184506893156, | |
| "kl": 0.9680751413106918, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0833, | |
| "reward": -0.8666846975684166, | |
| "reward_std": 2.079224378615618, | |
| "rewards/reward_func": -0.8666846975684166, | |
| "step": 880, | |
| "toxic_reward": 3.481996048986912 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 33.825, | |
| "epoch": 0.21030245746691872, | |
| "format_reward": -0.5, | |
| "grad_norm": 15.843626022338867, | |
| "image_reward": 0.2802464798092842, | |
| "kl": 0.49419727362692356, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0241, | |
| "reward": 0.11158292293548584, | |
| "reward_std": 1.7106264479458333, | |
| "rewards/reward_func": 0.11158292293548584, | |
| "step": 890, | |
| "toxic_reward": 3.7324341177940368 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 47.3, | |
| "epoch": 0.21266540642722118, | |
| "format_reward": -0.25, | |
| "grad_norm": 2.770407199859619, | |
| "image_reward": 0.27023824155330656, | |
| "kl": 0.2871086034923792, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1861, | |
| "reward": -0.27072116136550906, | |
| "reward_std": 1.447587224841118, | |
| "rewards/reward_func": -0.27072116136550906, | |
| "step": 900, | |
| "toxic_reward": 3.426037532091141 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 33.0, | |
| "epoch": 0.21502835538752363, | |
| "format_reward": -0.5, | |
| "grad_norm": 6.4211225509643555, | |
| "image_reward": 0.2804026290774345, | |
| "kl": 1.5080223519355058, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0382, | |
| "reward": -0.10845602005720138, | |
| "reward_std": 1.7854840472340583, | |
| "rewards/reward_func": -0.10845602005720138, | |
| "step": 910, | |
| "toxic_reward": 3.3229601860046385 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 41.4, | |
| "epoch": 0.21739130434782608, | |
| "format_reward": -1.0, | |
| "grad_norm": 1.846864938735962, | |
| "image_reward": 0.29064489238791996, | |
| "kl": 0.8340548906475306, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0872, | |
| "reward": -1.217875736951828, | |
| "reward_std": 1.4547557694837452, | |
| "rewards/reward_func": -1.217875736951828, | |
| "step": 920, | |
| "toxic_reward": 4.098645766576131 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 39.3, | |
| "epoch": 0.21975425330812853, | |
| "format_reward": -0.5, | |
| "grad_norm": 14.329817771911621, | |
| "image_reward": 0.28984171748161314, | |
| "kl": 0.3335365690290928, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0341, | |
| "reward": -0.14692462086677552, | |
| "reward_std": 1.6654048651456832, | |
| "rewards/reward_func": -0.14692462086677552, | |
| "step": 930, | |
| "toxic_reward": 3.8828285098075868 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 39.15, | |
| "epoch": 0.222117202268431, | |
| "format_reward": -1.0, | |
| "grad_norm": 13.11744499206543, | |
| "image_reward": 0.2768778458237648, | |
| "kl": 0.7420168094336986, | |
| "learning_rate": 5e-06, | |
| "loss": -0.1362, | |
| "reward": -0.5828769445419312, | |
| "reward_std": 2.509597599506378, | |
| "rewards/reward_func": -0.5828769445419312, | |
| "step": 940, | |
| "toxic_reward": 3.994591364264488 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 35.9, | |
| "epoch": 0.22448015122873347, | |
| "format_reward": -1.25, | |
| "grad_norm": 1.4235849380493164, | |
| "image_reward": 0.2599512729793787, | |
| "kl": 0.23791442420333625, | |
| "learning_rate": 5e-06, | |
| "loss": 0.2081, | |
| "reward": -0.7265825271606445, | |
| "reward_std": 2.4457253187894823, | |
| "rewards/reward_func": -0.7265825271606445, | |
| "step": 950, | |
| "toxic_reward": 4.328470140695572 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 40.05, | |
| "epoch": 0.22684310018903592, | |
| "format_reward": -1.0, | |
| "grad_norm": 10.51688003540039, | |
| "image_reward": 0.29052734225988386, | |
| "kl": 1.1104660354554654, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1948, | |
| "reward": -0.3963636875152588, | |
| "reward_std": 2.6071507059037686, | |
| "rewards/reward_func": -0.3963636875152588, | |
| "step": 960, | |
| "toxic_reward": 3.5060137271881104 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 42.075, | |
| "epoch": 0.22920604914933837, | |
| "format_reward": 0.0, | |
| "grad_norm": 0.5477933287620544, | |
| "image_reward": 0.2825276702642441, | |
| "kl": 0.24828157052397729, | |
| "learning_rate": 5e-06, | |
| "loss": 0.2364, | |
| "reward": -0.024850471317768096, | |
| "reward_std": 0.7480767840519548, | |
| "rewards/reward_func": -0.024850471317768096, | |
| "step": 970, | |
| "toxic_reward": 3.07701745480299 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 39.925, | |
| "epoch": 0.23156899810964082, | |
| "format_reward": -1.25, | |
| "grad_norm": 6.296302318572998, | |
| "image_reward": 0.26927467518382603, | |
| "kl": 3.1552879590541125, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0241, | |
| "reward": -0.648174649477005, | |
| "reward_std": 2.8984405621886253, | |
| "rewards/reward_func": -0.648174649477005, | |
| "step": 980, | |
| "toxic_reward": 3.881988432672289 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 50.475, | |
| "epoch": 0.2339319470699433, | |
| "format_reward": -1.0, | |
| "grad_norm": 2.797386646270752, | |
| "image_reward": 0.2668904632329941, | |
| "kl": 1.7048991359770298, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0828, | |
| "reward": -1.1502302587032318, | |
| "reward_std": 2.383236999064684, | |
| "rewards/reward_func": -1.1502302587032318, | |
| "step": 990, | |
| "toxic_reward": 4.231578087806701 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 39.425, | |
| "epoch": 0.23629489603024575, | |
| "format_reward": -0.75, | |
| "grad_norm": 13.208063125610352, | |
| "image_reward": 0.2917307555675507, | |
| "kl": 0.7445122614502907, | |
| "learning_rate": 5e-06, | |
| "loss": -0.1073, | |
| "reward": -0.7605196535587311, | |
| "reward_std": 2.2064386613667013, | |
| "rewards/reward_func": -0.7605196535587311, | |
| "step": 1000, | |
| "toxic_reward": 3.5633171044290064 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 55.875, | |
| "epoch": 0.2386578449905482, | |
| "format_reward": -1.0, | |
| "grad_norm": 10.358668327331543, | |
| "image_reward": 0.26257934868335725, | |
| "kl": 0.35015557184815405, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0206, | |
| "reward": -0.38898804783821106, | |
| "reward_std": 2.7123206526041033, | |
| "rewards/reward_func": -0.38898804783821106, | |
| "step": 1010, | |
| "toxic_reward": 3.609158730506897 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 52.95, | |
| "epoch": 0.24102079395085066, | |
| "format_reward": -1.0, | |
| "grad_norm": 9.602174758911133, | |
| "image_reward": 0.289794921875, | |
| "kl": 0.2867487147450447, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0269, | |
| "reward": -0.4154239475727081, | |
| "reward_std": 2.4513496346771717, | |
| "rewards/reward_func": -0.4154239475727081, | |
| "step": 1020, | |
| "toxic_reward": 4.2405922412872314 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 52.5, | |
| "epoch": 0.2433837429111531, | |
| "format_reward": -1.0, | |
| "grad_norm": 6.7750630378723145, | |
| "image_reward": 0.2876515701413155, | |
| "kl": 0.8189243379980325, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0184, | |
| "reward": -0.9024024844169617, | |
| "reward_std": 2.123489296063781, | |
| "rewards/reward_func": -0.9024024844169617, | |
| "step": 1030, | |
| "toxic_reward": 3.870901381969452 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 55.1, | |
| "epoch": 0.24574669187145556, | |
| "format_reward": -0.5, | |
| "grad_norm": 1.4051434993743896, | |
| "image_reward": 0.2766723616255654, | |
| "kl": 0.7713468134403229, | |
| "learning_rate": 5e-06, | |
| "loss": -0.1005, | |
| "reward": 0.42890325784683225, | |
| "reward_std": 1.7344073422253132, | |
| "rewards/reward_func": 0.42890325784683225, | |
| "step": 1040, | |
| "toxic_reward": 3.850937591658698 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 43.6, | |
| "epoch": 0.24810964083175804, | |
| "format_reward": -0.5, | |
| "grad_norm": 10.04930591583252, | |
| "image_reward": 0.2845031708478928, | |
| "kl": 0.21945146545767785, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0794, | |
| "reward": -0.29822829365730286, | |
| "reward_std": 2.0626097127795218, | |
| "rewards/reward_func": -0.29822829365730286, | |
| "step": 1050, | |
| "toxic_reward": 3.3056647762656213 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 36.675, | |
| "epoch": 0.2504725897920605, | |
| "format_reward": -0.25, | |
| "grad_norm": 1.4483786821365356, | |
| "image_reward": 0.2949198380112648, | |
| "kl": 0.5147463826462626, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0472, | |
| "reward": -0.4302744150161743, | |
| "reward_std": 0.9093868482857943, | |
| "rewards/reward_func": -0.4302744150161743, | |
| "step": 1060, | |
| "toxic_reward": 4.118269920349121 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 45.125, | |
| "epoch": 0.252835538752363, | |
| "format_reward": -0.75, | |
| "grad_norm": 5.471806526184082, | |
| "image_reward": 0.3024444580078125, | |
| "kl": 0.924912228435278, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0653, | |
| "reward": -0.9226927876472473, | |
| "reward_std": 1.8348794005811215, | |
| "rewards/reward_func": -0.9226927876472473, | |
| "step": 1070, | |
| "toxic_reward": 3.55495400428772 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 49.225, | |
| "epoch": 0.2551984877126654, | |
| "format_reward": -0.5, | |
| "grad_norm": 6.291661739349365, | |
| "image_reward": 0.30248311161994934, | |
| "kl": 0.14056268222630025, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1172, | |
| "reward": -0.07832016348838806, | |
| "reward_std": 1.6703550808131695, | |
| "rewards/reward_func": -0.07832016348838806, | |
| "step": 1080, | |
| "toxic_reward": 3.876679849624634 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 40.575, | |
| "epoch": 0.2575614366729679, | |
| "format_reward": 0.0, | |
| "grad_norm": 5.747459411621094, | |
| "image_reward": 0.268257649242878, | |
| "kl": 0.20501487758010625, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1961, | |
| "reward": 0.8156829088926315, | |
| "reward_std": 0.6415594108402729, | |
| "rewards/reward_func": 0.8156829088926315, | |
| "step": 1090, | |
| "toxic_reward": 4.041116189956665 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 43.5, | |
| "epoch": 0.2599243856332703, | |
| "format_reward": -0.25, | |
| "grad_norm": 0.5391029715538025, | |
| "image_reward": 0.27643330842256547, | |
| "kl": 0.27743567544966935, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0042, | |
| "reward": 0.06835275292396545, | |
| "reward_std": 1.1296793665736913, | |
| "rewards/reward_func": 0.06835275292396545, | |
| "step": 1100, | |
| "toxic_reward": 3.7508057713508607 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 44.475, | |
| "epoch": 0.2622873345935728, | |
| "format_reward": -0.75, | |
| "grad_norm": 5.044631004333496, | |
| "image_reward": 0.2711191803216934, | |
| "kl": 0.08945430461317301, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1402, | |
| "reward": -0.8795787930488587, | |
| "reward_std": 1.802781331539154, | |
| "rewards/reward_func": -0.8795787930488587, | |
| "step": 1110, | |
| "toxic_reward": 3.978659760951996 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 50.0, | |
| "epoch": 0.2646502835538752, | |
| "format_reward": -1.25, | |
| "grad_norm": 10.223982810974121, | |
| "image_reward": 0.2896250396966934, | |
| "kl": 0.5244473532773555, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1933, | |
| "reward": -0.48248053193092344, | |
| "reward_std": 2.971283960342407, | |
| "rewards/reward_func": -0.48248053193092344, | |
| "step": 1120, | |
| "toxic_reward": 3.2150497317314146 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 37.8, | |
| "epoch": 0.2670132325141777, | |
| "format_reward": -0.5, | |
| "grad_norm": 3.6621553897857666, | |
| "image_reward": 0.2852656051516533, | |
| "kl": 0.5911644924432039, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0576, | |
| "reward": -0.3013936847448349, | |
| "reward_std": 1.430125593394041, | |
| "rewards/reward_func": -0.3013936847448349, | |
| "step": 1130, | |
| "toxic_reward": 4.058745819330215 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 32.95, | |
| "epoch": 0.26937618147448017, | |
| "format_reward": -0.5, | |
| "grad_norm": 24.121688842773438, | |
| "image_reward": 0.2795908600091934, | |
| "kl": 0.4301185546442866, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0126, | |
| "reward": -0.10317457914352417, | |
| "reward_std": 1.667516409419477, | |
| "rewards/reward_func": -0.10317457914352417, | |
| "step": 1140, | |
| "toxic_reward": 4.072073769569397 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 42.025, | |
| "epoch": 0.2717391304347826, | |
| "format_reward": 0.0, | |
| "grad_norm": 0.7166000604629517, | |
| "image_reward": 0.2804423004388809, | |
| "kl": 0.675014778599143, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0049, | |
| "reward": 0.4254330635070801, | |
| "reward_std": 0.9621219031512738, | |
| "rewards/reward_func": 0.4254330635070801, | |
| "step": 1150, | |
| "toxic_reward": 3.471704053878784 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 33.175, | |
| "epoch": 0.2741020793950851, | |
| "format_reward": -1.0, | |
| "grad_norm": 1.803680658340454, | |
| "image_reward": 0.31466064155101775, | |
| "kl": 0.344609697163105, | |
| "learning_rate": 5e-06, | |
| "loss": -0.1152, | |
| "reward": -0.5670508742332458, | |
| "reward_std": 2.301799529790878, | |
| "rewards/reward_func": -0.5670508742332458, | |
| "step": 1160, | |
| "toxic_reward": 3.554426383972168 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 31.475, | |
| "epoch": 0.2764650283553875, | |
| "format_reward": 0.0, | |
| "grad_norm": 7.919179439544678, | |
| "image_reward": 0.26389770656824113, | |
| "kl": 0.8297407850623131, | |
| "learning_rate": 5e-06, | |
| "loss": -0.28, | |
| "reward": 0.23291709423065185, | |
| "reward_std": 0.47383863255381586, | |
| "rewards/reward_func": 0.23291709423065185, | |
| "step": 1170, | |
| "toxic_reward": 4.360145711898804 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 36.0, | |
| "epoch": 0.27882797731569, | |
| "format_reward": -0.5, | |
| "grad_norm": 294.2972106933594, | |
| "image_reward": 0.2640360534191132, | |
| "kl": 0.9242212943732738, | |
| "learning_rate": 5e-06, | |
| "loss": 0.017, | |
| "reward": -0.04461590349674225, | |
| "reward_std": 1.7138214907608926, | |
| "rewards/reward_func": -0.04461590349674225, | |
| "step": 1180, | |
| "toxic_reward": 3.5669440746307375 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 60.525, | |
| "epoch": 0.28119092627599246, | |
| "format_reward": -0.25, | |
| "grad_norm": 0.6788994669914246, | |
| "image_reward": 0.2832122802734375, | |
| "kl": 6.060492021404206, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1039, | |
| "reward": 0.30282129645347594, | |
| "reward_std": 1.3184241026639938, | |
| "rewards/reward_func": 0.30282129645347594, | |
| "step": 1190, | |
| "toxic_reward": 3.858977997303009 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 53.5, | |
| "epoch": 0.2835538752362949, | |
| "format_reward": -1.0, | |
| "grad_norm": 2.821944236755371, | |
| "image_reward": 0.292755126953125, | |
| "kl": 0.2833241932094097, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0765, | |
| "reward": -0.8336254239082337, | |
| "reward_std": 2.1170720741152764, | |
| "rewards/reward_func": -0.8336254239082337, | |
| "step": 1200, | |
| "toxic_reward": 4.131281018257141 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 45.975, | |
| "epoch": 0.28591682419659736, | |
| "format_reward": -0.75, | |
| "grad_norm": 5.20048189163208, | |
| "image_reward": 0.3018681839108467, | |
| "kl": 0.26484427275136113, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0037, | |
| "reward": -0.23466770052909852, | |
| "reward_std": 2.3572978913784026, | |
| "rewards/reward_func": -0.23466770052909852, | |
| "step": 1210, | |
| "toxic_reward": 3.701621878147125 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 53.95, | |
| "epoch": 0.2882797731568998, | |
| "format_reward": -0.75, | |
| "grad_norm": 2.5671803951263428, | |
| "image_reward": 0.2591837555170059, | |
| "kl": 0.27887978348881004, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1531, | |
| "reward": -0.5629445493221283, | |
| "reward_std": 2.2025086715817452, | |
| "rewards/reward_func": -0.5629445493221283, | |
| "step": 1220, | |
| "toxic_reward": 3.878066289424896 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 40.975, | |
| "epoch": 0.29064272211720227, | |
| "format_reward": -0.25, | |
| "grad_norm": 1.3592997789382935, | |
| "image_reward": 0.2804290771484375, | |
| "kl": 0.7250507925637066, | |
| "learning_rate": 5e-06, | |
| "loss": -0.1268, | |
| "reward": 0.029623252153396607, | |
| "reward_std": 1.3399539720267057, | |
| "rewards/reward_func": 0.029623252153396607, | |
| "step": 1230, | |
| "toxic_reward": 3.5630233764648436 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 32.675, | |
| "epoch": 0.29300567107750475, | |
| "format_reward": -1.25, | |
| "grad_norm": 6.867509365081787, | |
| "image_reward": 0.2880493178963661, | |
| "kl": 0.46422886326909063, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0122, | |
| "reward": -1.0097105741500854, | |
| "reward_std": 2.696252405457199, | |
| "rewards/reward_func": -1.0097105741500854, | |
| "step": 1240, | |
| "toxic_reward": 4.076703870296479 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 47.4, | |
| "epoch": 0.2953686200378072, | |
| "format_reward": -0.25, | |
| "grad_norm": 4.707825183868408, | |
| "image_reward": 0.256890869140625, | |
| "kl": 0.1788209406659007, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0197, | |
| "reward": 0.38095744252204894, | |
| "reward_std": 1.2988073959946633, | |
| "rewards/reward_func": 0.38095744252204894, | |
| "step": 1250, | |
| "toxic_reward": 3.8400187373161314 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 35.625, | |
| "epoch": 0.29773156899810965, | |
| "format_reward": -0.75, | |
| "grad_norm": 1.229298710823059, | |
| "image_reward": 0.313336181640625, | |
| "kl": 0.33243545759469273, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1516, | |
| "reward": -0.5754710257053375, | |
| "reward_std": 1.8287720288150013, | |
| "rewards/reward_func": -0.5754710257053375, | |
| "step": 1260, | |
| "toxic_reward": 4.415339708328247 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 60.25, | |
| "epoch": 0.3000945179584121, | |
| "format_reward": -0.5, | |
| "grad_norm": 0.5034794807434082, | |
| "image_reward": 0.27869771271944044, | |
| "kl": 0.38923515090718863, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0327, | |
| "reward": -0.4456570327281952, | |
| "reward_std": 1.5328068390488625, | |
| "rewards/reward_func": -0.4456570327281952, | |
| "step": 1270, | |
| "toxic_reward": 3.8723622620105744 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 55.425, | |
| "epoch": 0.30245746691871456, | |
| "format_reward": -1.0, | |
| "grad_norm": 1.2214823961257935, | |
| "image_reward": 0.2668467193841934, | |
| "kl": 1.2360946209169925, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1227, | |
| "reward": -0.9184286594390869, | |
| "reward_std": 2.3616207716986537, | |
| "rewards/reward_func": -0.9184286594390869, | |
| "step": 1280, | |
| "toxic_reward": 4.0201707005500795 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 42.525, | |
| "epoch": 0.30482041587901704, | |
| "format_reward": -0.5, | |
| "grad_norm": 0.6785597205162048, | |
| "image_reward": 0.27662353664636613, | |
| "kl": 0.6153190754354, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0909, | |
| "reward": -0.025622844696044922, | |
| "reward_std": 1.7058033104985952, | |
| "rewards/reward_func": -0.025622844696044922, | |
| "step": 1290, | |
| "toxic_reward": 3.283605984598398 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 48.475, | |
| "epoch": 0.30718336483931946, | |
| "format_reward": -0.5, | |
| "grad_norm": 1.5470991134643555, | |
| "image_reward": 0.28620096743106843, | |
| "kl": 1.3450787207111716, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0773, | |
| "reward": 0.39500882625579836, | |
| "reward_std": 1.9240341871976852, | |
| "rewards/reward_func": 0.39500882625579836, | |
| "step": 1300, | |
| "toxic_reward": 3.8390918374061584 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 74.6, | |
| "epoch": 0.30954631379962194, | |
| "format_reward": 0.0, | |
| "grad_norm": 4.827681541442871, | |
| "image_reward": 0.2871856689453125, | |
| "kl": 0.2589964304119349, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0753, | |
| "reward": -0.08085522651672364, | |
| "reward_std": 0.7007970325648785, | |
| "rewards/reward_func": -0.08085522651672364, | |
| "step": 1310, | |
| "toxic_reward": 4.15708065032959 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 35.175, | |
| "epoch": 0.31190926275992437, | |
| "format_reward": 0.0, | |
| "grad_norm": 2.559379816055298, | |
| "image_reward": 0.28839518427848815, | |
| "kl": 1.160063625872135, | |
| "learning_rate": 5e-06, | |
| "loss": -0.1169, | |
| "reward": 0.457793202996254, | |
| "reward_std": 0.8301180111244321, | |
| "rewards/reward_func": 0.457793202996254, | |
| "step": 1320, | |
| "toxic_reward": 3.847675251960754 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 46.05, | |
| "epoch": 0.31427221172022685, | |
| "format_reward": -0.5, | |
| "grad_norm": 1.0227330923080444, | |
| "image_reward": 0.25479024201631545, | |
| "kl": 5.228898542746902, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1453, | |
| "reward": -0.19808580130338668, | |
| "reward_std": 1.237728140875697, | |
| "rewards/reward_func": -0.19808580130338668, | |
| "step": 1330, | |
| "toxic_reward": 3.487361752986908 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 41.375, | |
| "epoch": 0.3166351606805293, | |
| "format_reward": 0.0, | |
| "grad_norm": 2.158604383468628, | |
| "image_reward": 0.27274220883846284, | |
| "kl": 5.145803064666689, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0016, | |
| "reward": 0.5905790150165557, | |
| "reward_std": 1.0763475911691784, | |
| "rewards/reward_func": 0.5905790150165557, | |
| "step": 1340, | |
| "toxic_reward": 3.561137008666992 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 33.9, | |
| "epoch": 0.31899810964083175, | |
| "format_reward": -0.25, | |
| "grad_norm": 1.078782081604004, | |
| "image_reward": 0.27456156313419344, | |
| "kl": 4.645642199181021, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0272, | |
| "reward": 0.0937275767326355, | |
| "reward_std": 1.5942428700625897, | |
| "rewards/reward_func": 0.0937275767326355, | |
| "step": 1350, | |
| "toxic_reward": 3.385586667060852 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 39.05, | |
| "epoch": 0.32136105860113423, | |
| "format_reward": -0.25, | |
| "grad_norm": 2.4886958599090576, | |
| "image_reward": 0.27929331362247467, | |
| "kl": 0.6772738939616829, | |
| "learning_rate": 5e-06, | |
| "loss": -0.1689, | |
| "reward": 0.10146453976631165, | |
| "reward_std": 1.4149208962917328, | |
| "rewards/reward_func": 0.10146453976631165, | |
| "step": 1360, | |
| "toxic_reward": 4.062562417984009 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 56.025, | |
| "epoch": 0.32372400756143666, | |
| "format_reward": 0.0, | |
| "grad_norm": 0.45091304183006287, | |
| "image_reward": 0.26109618991613387, | |
| "kl": 1.1132759511470796, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0773, | |
| "reward": 0.4344749391078949, | |
| "reward_std": 0.6906750492751599, | |
| "rewards/reward_func": 0.4344749391078949, | |
| "step": 1370, | |
| "toxic_reward": 3.89659765958786 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 54.1, | |
| "epoch": 0.32608695652173914, | |
| "format_reward": -1.0, | |
| "grad_norm": 2.2919623851776123, | |
| "image_reward": 0.2507191985845566, | |
| "kl": 2.863751105964184, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0426, | |
| "reward": -0.3381307005882263, | |
| "reward_std": 1.9777413787320257, | |
| "rewards/reward_func": -0.3381307005882263, | |
| "step": 1380, | |
| "toxic_reward": 4.168315529823303 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 59.7, | |
| "epoch": 0.3284499054820416, | |
| "format_reward": 0.0, | |
| "grad_norm": 17.546894073486328, | |
| "image_reward": 0.2879852294921875, | |
| "kl": 1.016882681287825, | |
| "learning_rate": 5e-06, | |
| "loss": 0.065, | |
| "reward": -0.029438415169715883, | |
| "reward_std": 0.3044209867715836, | |
| "rewards/reward_func": -0.029438415169715883, | |
| "step": 1390, | |
| "toxic_reward": 3.8181951224803923 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 44.425, | |
| "epoch": 0.33081285444234404, | |
| "format_reward": -0.75, | |
| "grad_norm": 3.9508233070373535, | |
| "image_reward": 0.3041224151849747, | |
| "kl": 1.2148886673152446, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0983, | |
| "reward": -0.08471554517745972, | |
| "reward_std": 2.0540446445345877, | |
| "rewards/reward_func": -0.08471554517745972, | |
| "step": 1400, | |
| "toxic_reward": 4.20858781337738 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 43.5, | |
| "epoch": 0.3331758034026465, | |
| "format_reward": -0.75, | |
| "grad_norm": 23.3671817779541, | |
| "image_reward": 0.2869578033685684, | |
| "kl": 9.38541857972741, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0633, | |
| "reward": -0.21220148205757142, | |
| "reward_std": 2.147160884644836, | |
| "rewards/reward_func": -0.21220148205757142, | |
| "step": 1410, | |
| "toxic_reward": 3.646671336889267 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 43.5, | |
| "epoch": 0.33553875236294894, | |
| "format_reward": -0.5, | |
| "grad_norm": 5.768739223480225, | |
| "image_reward": 0.29927419126033783, | |
| "kl": 3.124450533092022, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0205, | |
| "reward": -0.20792179703712463, | |
| "reward_std": 1.7920773405581714, | |
| "rewards/reward_func": -0.20792179703712463, | |
| "step": 1420, | |
| "toxic_reward": 3.938745903968811 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 55.3, | |
| "epoch": 0.3379017013232514, | |
| "format_reward": -0.75, | |
| "grad_norm": 7.805192947387695, | |
| "image_reward": 0.2781646728515625, | |
| "kl": 9.086061615869403, | |
| "learning_rate": 5e-06, | |
| "loss": -0.1052, | |
| "reward": 0.26188963651657104, | |
| "reward_std": 1.916423682682216, | |
| "rewards/reward_func": 0.26188963651657104, | |
| "step": 1430, | |
| "toxic_reward": 3.8569429397583006 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 48.075, | |
| "epoch": 0.34026465028355385, | |
| "format_reward": 0.0, | |
| "grad_norm": 6.398307800292969, | |
| "image_reward": 0.28298187255859375, | |
| "kl": 3.477000297047198, | |
| "learning_rate": 5e-06, | |
| "loss": -0.1652, | |
| "reward": 0.2245475471019745, | |
| "reward_std": 0.7394228018820286, | |
| "rewards/reward_func": 0.2245475471019745, | |
| "step": 1440, | |
| "toxic_reward": 3.977894365787506 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 49.375, | |
| "epoch": 0.34262759924385633, | |
| "format_reward": -0.25, | |
| "grad_norm": 15.553762435913086, | |
| "image_reward": 0.26566060483455656, | |
| "kl": 17.512660111114382, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0446, | |
| "reward": 0.14810482859611512, | |
| "reward_std": 1.504632395505905, | |
| "rewards/reward_func": 0.14810482859611512, | |
| "step": 1450, | |
| "toxic_reward": 3.5254761219024657 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 49.475, | |
| "epoch": 0.3449905482041588, | |
| "format_reward": -1.0, | |
| "grad_norm": 2.524869918823242, | |
| "image_reward": 0.2846649169921875, | |
| "kl": 1.9967870802618564, | |
| "learning_rate": 5e-06, | |
| "loss": -0.002, | |
| "reward": -1.1410660862922668, | |
| "reward_std": 2.0114028319716453, | |
| "rewards/reward_func": -1.1410660862922668, | |
| "step": 1460, | |
| "toxic_reward": 4.1038308382034305 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 55.125, | |
| "epoch": 0.34735349716446123, | |
| "format_reward": -1.0, | |
| "grad_norm": 5.871716499328613, | |
| "image_reward": 0.2953603118658066, | |
| "kl": 1.2091532168909906, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1278, | |
| "reward": -0.8882034704089165, | |
| "reward_std": 2.2325065452605486, | |
| "rewards/reward_func": -0.8882034704089165, | |
| "step": 1470, | |
| "toxic_reward": 2.88705118894577 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 51.5, | |
| "epoch": 0.3497164461247637, | |
| "format_reward": 0.0, | |
| "grad_norm": 5.483914852142334, | |
| "image_reward": 0.2924133285880089, | |
| "kl": 20.523441922478376, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0722, | |
| "reward": 0.036271828413009646, | |
| "reward_std": 1.0079955972731114, | |
| "rewards/reward_func": 0.036271828413009646, | |
| "step": 1480, | |
| "toxic_reward": 3.1371969431638718 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 40.975, | |
| "epoch": 0.35207939508506614, | |
| "format_reward": 0.0, | |
| "grad_norm": 1.4849286079406738, | |
| "image_reward": 0.2851186111569405, | |
| "kl": 2.15047435965389, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0946, | |
| "reward": 0.441963791847229, | |
| "reward_std": 0.4248314931988716, | |
| "rewards/reward_func": 0.441963791847229, | |
| "step": 1490, | |
| "toxic_reward": 3.752102476358414 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 40.0, | |
| "epoch": 0.3544423440453686, | |
| "format_reward": -0.25, | |
| "grad_norm": 2.0869834423065186, | |
| "image_reward": 0.2911224365234375, | |
| "kl": 2.3197390008717775, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0123, | |
| "reward": 0.13550712168216705, | |
| "reward_std": 1.141077246889472, | |
| "rewards/reward_func": 0.13550712168216705, | |
| "step": 1500, | |
| "toxic_reward": 3.36595538854599 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 40.6, | |
| "epoch": 0.3568052930056711, | |
| "format_reward": -0.25, | |
| "grad_norm": 2.7763924598693848, | |
| "image_reward": 0.28095703125, | |
| "kl": 0.8447903416119515, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1001, | |
| "reward": 0.07771911025047303, | |
| "reward_std": 1.3111265070736409, | |
| "rewards/reward_func": 0.07771911025047303, | |
| "step": 1510, | |
| "toxic_reward": 4.060403060913086 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 55.575, | |
| "epoch": 0.3591682419659735, | |
| "format_reward": -0.25, | |
| "grad_norm": 11.143818855285645, | |
| "image_reward": 0.2903269439935684, | |
| "kl": 1.106547536328435, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0008, | |
| "reward": 0.7432255536317826, | |
| "reward_std": 1.0503722863271832, | |
| "rewards/reward_func": 0.7432255536317826, | |
| "step": 1520, | |
| "toxic_reward": 3.4027091443538664 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 43.825, | |
| "epoch": 0.361531190926276, | |
| "format_reward": -0.25, | |
| "grad_norm": 6.157534599304199, | |
| "image_reward": 0.2823811858892441, | |
| "kl": 0.7211934769526124, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0288, | |
| "reward": 0.11932253241539001, | |
| "reward_std": 1.307121137715876, | |
| "rewards/reward_func": 0.11932253241539001, | |
| "step": 1530, | |
| "toxic_reward": 3.8783162236213684 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 37.075, | |
| "epoch": 0.3638941398865784, | |
| "format_reward": -1.0, | |
| "grad_norm": 2.383302688598633, | |
| "image_reward": 0.28258056491613387, | |
| "kl": 3.830422883108258, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0149, | |
| "reward": -0.39380887150764465, | |
| "reward_std": 2.854560297727585, | |
| "rewards/reward_func": -0.39380887150764465, | |
| "step": 1540, | |
| "toxic_reward": 3.2431194216012953 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 44.55, | |
| "epoch": 0.3662570888468809, | |
| "format_reward": -0.25, | |
| "grad_norm": 2.1643450260162354, | |
| "image_reward": 0.2872863754630089, | |
| "kl": 0.3903345447033644, | |
| "learning_rate": 5e-06, | |
| "loss": 0.2399, | |
| "reward": 0.23153584003448485, | |
| "reward_std": 1.3368525609374047, | |
| "rewards/reward_func": 0.23153584003448485, | |
| "step": 1550, | |
| "toxic_reward": 3.452616012096405 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 36.625, | |
| "epoch": 0.3686200378071834, | |
| "format_reward": -0.25, | |
| "grad_norm": 0.922444224357605, | |
| "image_reward": 0.29551798701286314, | |
| "kl": 0.9415501815266907, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0285, | |
| "reward": 0.2152680218219757, | |
| "reward_std": 1.0939797786995769, | |
| "rewards/reward_func": 0.2152680218219757, | |
| "step": 1560, | |
| "toxic_reward": 4.278083491325378 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 58.65, | |
| "epoch": 0.3709829867674858, | |
| "format_reward": -1.0, | |
| "grad_norm": 1.9485223293304443, | |
| "image_reward": 0.289756266772747, | |
| "kl": 0.52877401644364, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0562, | |
| "reward": -0.7691292554140091, | |
| "reward_std": 2.194984516873956, | |
| "rewards/reward_func": -0.7691292554140091, | |
| "step": 1570, | |
| "toxic_reward": 3.7907654672861097 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 34.35, | |
| "epoch": 0.3733459357277883, | |
| "format_reward": -0.75, | |
| "grad_norm": 4.795892238616943, | |
| "image_reward": 0.29136555939912795, | |
| "kl": 1.7273975620046258, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0006, | |
| "reward": -0.2548545479774475, | |
| "reward_std": 2.3145264372229577, | |
| "rewards/reward_func": -0.2548545479774475, | |
| "step": 1580, | |
| "toxic_reward": 3.206251806020737 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 51.15, | |
| "epoch": 0.3757088846880907, | |
| "format_reward": -0.25, | |
| "grad_norm": 1.6828984022140503, | |
| "image_reward": 0.29258016049861907, | |
| "kl": 0.27110366327688096, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0295, | |
| "reward": 0.2889214813709259, | |
| "reward_std": 1.4156969770789147, | |
| "rewards/reward_func": 0.2889214813709259, | |
| "step": 1590, | |
| "toxic_reward": 3.8408302307128905 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 48.075, | |
| "epoch": 0.3780718336483932, | |
| "format_reward": -0.5, | |
| "grad_norm": 2.8415489196777344, | |
| "image_reward": 0.2738067626953125, | |
| "kl": 1.5718746781349182, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0332, | |
| "reward": -0.4795783460140228, | |
| "reward_std": 1.1532321106642485, | |
| "rewards/reward_func": -0.4795783460140228, | |
| "step": 1600, | |
| "toxic_reward": 3.8701359391212464 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 55.3, | |
| "epoch": 0.3804347826086957, | |
| "format_reward": -0.5, | |
| "grad_norm": 0.4898248612880707, | |
| "image_reward": 0.28651835173368456, | |
| "kl": 0.5627498641610146, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1267, | |
| "reward": -0.35464051365852356, | |
| "reward_std": 1.5732567172497511, | |
| "rewards/reward_func": -0.35464051365852356, | |
| "step": 1610, | |
| "toxic_reward": 4.116016793251037 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 44.0, | |
| "epoch": 0.3827977315689981, | |
| "format_reward": -0.5, | |
| "grad_norm": 1.5352033376693726, | |
| "image_reward": 0.29410196989774706, | |
| "kl": 1.2344657305628062, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1575, | |
| "reward": -0.4094507694244385, | |
| "reward_std": 1.245941134635359, | |
| "rewards/reward_func": -0.4094507694244385, | |
| "step": 1620, | |
| "toxic_reward": 4.569849014282227 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 75.625, | |
| "epoch": 0.3851606805293006, | |
| "format_reward": -1.0, | |
| "grad_norm": 0.5829593539237976, | |
| "image_reward": 0.280389404296875, | |
| "kl": 1.36093844124116, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1245, | |
| "reward": -0.31835838556289675, | |
| "reward_std": 2.00613936111331, | |
| "rewards/reward_func": -0.31835838556289675, | |
| "step": 1630, | |
| "toxic_reward": 4.125273871421814 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 50.425, | |
| "epoch": 0.387523629489603, | |
| "format_reward": -0.75, | |
| "grad_norm": 0.8723268508911133, | |
| "image_reward": 0.27287851870059965, | |
| "kl": 0.15645003337413071, | |
| "learning_rate": 5e-06, | |
| "loss": -0.039, | |
| "reward": -0.8851189732551574, | |
| "reward_std": 2.1296220384538174, | |
| "rewards/reward_func": -0.8851189732551574, | |
| "step": 1640, | |
| "toxic_reward": 3.323053848743439 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 35.35, | |
| "epoch": 0.3898865784499055, | |
| "format_reward": -0.75, | |
| "grad_norm": 0.14725980162620544, | |
| "image_reward": 0.28720601350069047, | |
| "kl": 1.1328919077292086, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0133, | |
| "reward": -0.12160237431526184, | |
| "reward_std": 1.725741315446794, | |
| "rewards/reward_func": -0.12160237431526184, | |
| "step": 1650, | |
| "toxic_reward": 3.924569344520569 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 34.05, | |
| "epoch": 0.39224952741020797, | |
| "format_reward": -0.5, | |
| "grad_norm": 2.200639009475708, | |
| "image_reward": 0.2846842437982559, | |
| "kl": 0.11551734725944698, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0781, | |
| "reward": 0.11074192523956299, | |
| "reward_std": 1.8953823536634444, | |
| "rewards/reward_func": 0.11074192523956299, | |
| "step": 1660, | |
| "toxic_reward": 3.5436886310577393 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 30.925, | |
| "epoch": 0.3946124763705104, | |
| "format_reward": 0.0, | |
| "grad_norm": 3.0496935844421387, | |
| "image_reward": 0.2790842682123184, | |
| "kl": 2.538264278974384, | |
| "learning_rate": 5e-06, | |
| "loss": -0.1096, | |
| "reward": 0.14284086227416992, | |
| "reward_std": 0.8084073163568973, | |
| "rewards/reward_func": 0.14284086227416992, | |
| "step": 1670, | |
| "toxic_reward": 4.144779133796692 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 42.675, | |
| "epoch": 0.39697542533081287, | |
| "format_reward": -0.5, | |
| "grad_norm": 0.9690385460853577, | |
| "image_reward": 0.2903676345944405, | |
| "kl": 3.7070351759903133, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1427, | |
| "reward": 0.008394747972488403, | |
| "reward_std": 1.8407307181507349, | |
| "rewards/reward_func": 0.008394747972488403, | |
| "step": 1680, | |
| "toxic_reward": 3.498854029178619 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 42.875, | |
| "epoch": 0.3993383742911153, | |
| "format_reward": -0.5, | |
| "grad_norm": 0.6957125067710876, | |
| "image_reward": 0.2657012939453125, | |
| "kl": 0.42172617875039575, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1448, | |
| "reward": -0.40106786489486695, | |
| "reward_std": 1.718069277703762, | |
| "rewards/reward_func": -0.40106786489486695, | |
| "step": 1690, | |
| "toxic_reward": 3.609626793861389 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 47.375, | |
| "epoch": 0.4017013232514178, | |
| "format_reward": -0.5, | |
| "grad_norm": 2.07503342628479, | |
| "image_reward": 0.2696156814694405, | |
| "kl": 1.291714602895081, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0722, | |
| "reward": -0.014362984895706176, | |
| "reward_std": 1.5762588312849402, | |
| "rewards/reward_func": -0.014362984895706176, | |
| "step": 1700, | |
| "toxic_reward": 4.394974184036255 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 34.125, | |
| "epoch": 0.40406427221172025, | |
| "format_reward": 0.0, | |
| "grad_norm": 1.1231868267059326, | |
| "image_reward": 0.290789794921875, | |
| "kl": 0.21602323912084104, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0932, | |
| "reward": 0.4133676677942276, | |
| "reward_std": 0.8327854365110398, | |
| "rewards/reward_func": 0.4133676677942276, | |
| "step": 1710, | |
| "toxic_reward": 3.955091452598572 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 49.45, | |
| "epoch": 0.4064272211720227, | |
| "format_reward": -0.25, | |
| "grad_norm": 1.602283000946045, | |
| "image_reward": 0.2754241943359375, | |
| "kl": 2.6595573978964238, | |
| "learning_rate": 5e-06, | |
| "loss": -0.1005, | |
| "reward": 0.07846117615699769, | |
| "reward_std": 1.170348797738552, | |
| "rewards/reward_func": 0.07846117615699769, | |
| "step": 1720, | |
| "toxic_reward": 4.142733359336853 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 65.9, | |
| "epoch": 0.40879017013232516, | |
| "format_reward": -1.0, | |
| "grad_norm": 0.5282357335090637, | |
| "image_reward": 0.26338195651769636, | |
| "kl": 0.2848859841004014, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0035, | |
| "reward": -0.5072973608970642, | |
| "reward_std": 2.7491880640387536, | |
| "rewards/reward_func": -0.5072973608970642, | |
| "step": 1730, | |
| "toxic_reward": 4.047195649147033 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 38.375, | |
| "epoch": 0.4111531190926276, | |
| "format_reward": -0.25, | |
| "grad_norm": 1.5527747869491577, | |
| "image_reward": 0.2691065490245819, | |
| "kl": 1.2007373101077974, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0239, | |
| "reward": -0.045976501703262326, | |
| "reward_std": 0.8193172802217304, | |
| "rewards/reward_func": -0.045976501703262326, | |
| "step": 1740, | |
| "toxic_reward": 3.446149069070816 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 46.725, | |
| "epoch": 0.41351606805293006, | |
| "format_reward": -0.25, | |
| "grad_norm": 0.5118568539619446, | |
| "image_reward": 0.27915140688419343, | |
| "kl": 0.9548864349722862, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1013, | |
| "reward": -0.10445084571838378, | |
| "reward_std": 0.730734084546566, | |
| "rewards/reward_func": -0.10445084571838378, | |
| "step": 1750, | |
| "toxic_reward": 4.5370954990386965 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 40.25, | |
| "epoch": 0.4158790170132325, | |
| "format_reward": -0.25, | |
| "grad_norm": 1.8082605600357056, | |
| "image_reward": 0.264396159350872, | |
| "kl": 1.575367003493011, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0632, | |
| "reward": 0.14499086737632752, | |
| "reward_std": 0.663521677441895, | |
| "rewards/reward_func": 0.14499086737632752, | |
| "step": 1760, | |
| "toxic_reward": 4.827451419830322 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 45.325, | |
| "epoch": 0.41824196597353497, | |
| "format_reward": -0.25, | |
| "grad_norm": 0.833739697933197, | |
| "image_reward": 0.28918762058019637, | |
| "kl": 0.6164161543361842, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0846, | |
| "reward": -0.22242847234010696, | |
| "reward_std": 1.0645570412278176, | |
| "rewards/reward_func": -0.22242847234010696, | |
| "step": 1770, | |
| "toxic_reward": 3.958344542980194 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 50.925, | |
| "epoch": 0.42060491493383745, | |
| "format_reward": -0.5, | |
| "grad_norm": 0.929023027420044, | |
| "image_reward": 0.2808074980974197, | |
| "kl": 0.8390735885128379, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0834, | |
| "reward": -0.738262277841568, | |
| "reward_std": 1.677246123738587, | |
| "rewards/reward_func": -0.738262277841568, | |
| "step": 1780, | |
| "toxic_reward": 3.8094155311584474 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 41.4, | |
| "epoch": 0.4229678638941399, | |
| "format_reward": -0.5, | |
| "grad_norm": 1.0305073261260986, | |
| "image_reward": 0.286659748852253, | |
| "kl": 0.6373991215135902, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0697, | |
| "reward": -0.2053418666124344, | |
| "reward_std": 1.680133179202676, | |
| "rewards/reward_func": -0.2053418666124344, | |
| "step": 1790, | |
| "toxic_reward": 3.8562827944755553 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 39.8, | |
| "epoch": 0.42533081285444235, | |
| "format_reward": 0.0, | |
| "grad_norm": 0.9716371297836304, | |
| "image_reward": 0.292718505859375, | |
| "kl": 0.6843567499890924, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0924, | |
| "reward": 0.7018224939703941, | |
| "reward_std": 0.8987518041394651, | |
| "rewards/reward_func": 0.7018224939703941, | |
| "step": 1800, | |
| "toxic_reward": 3.408372712135315 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 51.25, | |
| "epoch": 0.4276937618147448, | |
| "format_reward": 0.0, | |
| "grad_norm": 1.081742286682129, | |
| "image_reward": 0.2768310546875, | |
| "kl": 0.7960635300725698, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0443, | |
| "reward": -0.25897485911846163, | |
| "reward_std": 0.9034805342555046, | |
| "rewards/reward_func": -0.25897485911846163, | |
| "step": 1810, | |
| "toxic_reward": 3.7079725742340086 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 54.3, | |
| "epoch": 0.43005671077504726, | |
| "format_reward": -0.25, | |
| "grad_norm": 0.6442953944206238, | |
| "image_reward": 0.27892710268497467, | |
| "kl": 0.7656038996763528, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0099, | |
| "reward": -0.13414714336395264, | |
| "reward_std": 1.1088863730430603, | |
| "rewards/reward_func": -0.13414714336395264, | |
| "step": 1820, | |
| "toxic_reward": 3.735495138168335 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 42.525, | |
| "epoch": 0.43241965973534974, | |
| "format_reward": -1.0, | |
| "grad_norm": 0.7406989336013794, | |
| "image_reward": 0.2804585784673691, | |
| "kl": 3.6395583665929734, | |
| "learning_rate": 5e-06, | |
| "loss": -0.1008, | |
| "reward": -0.8905552387237549, | |
| "reward_std": 2.38557695299387, | |
| "rewards/reward_func": -0.8905552387237549, | |
| "step": 1830, | |
| "toxic_reward": 3.60183764398098 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 36.375, | |
| "epoch": 0.43478260869565216, | |
| "format_reward": -0.25, | |
| "grad_norm": 1.5541785955429077, | |
| "image_reward": 0.30787862092256546, | |
| "kl": 1.104234455060214, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0222, | |
| "reward": 0.09280971884727478, | |
| "reward_std": 1.7143970176577568, | |
| "rewards/reward_func": 0.09280971884727478, | |
| "step": 1840, | |
| "toxic_reward": 3.689550542831421 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 55.0, | |
| "epoch": 0.43714555765595464, | |
| "format_reward": -0.25, | |
| "grad_norm": 0.8598329424858093, | |
| "image_reward": 0.2855051666498184, | |
| "kl": 0.16781285647302865, | |
| "learning_rate": 5e-06, | |
| "loss": -0.1435, | |
| "reward": 0.3788378477096558, | |
| "reward_std": 1.0338344363495708, | |
| "rewards/reward_func": 0.3788378477096558, | |
| "step": 1850, | |
| "toxic_reward": 4.1332162618637085 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 33.9, | |
| "epoch": 0.43950850661625707, | |
| "format_reward": -0.5, | |
| "grad_norm": 1.6019521951675415, | |
| "image_reward": 0.27197469025850296, | |
| "kl": 7.518688270077109, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0371, | |
| "reward": 0.130861234664917, | |
| "reward_std": 1.7171866662800312, | |
| "rewards/reward_func": 0.130861234664917, | |
| "step": 1860, | |
| "toxic_reward": 4.243645071983337 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 41.7, | |
| "epoch": 0.44187145557655955, | |
| "format_reward": -0.5, | |
| "grad_norm": 0.5758384466171265, | |
| "image_reward": 0.28136799931526185, | |
| "kl": 2.1443952365778385, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0189, | |
| "reward": -0.18380895256996155, | |
| "reward_std": 1.6837687961757184, | |
| "rewards/reward_func": -0.18380895256996155, | |
| "step": 1870, | |
| "toxic_reward": 3.4331242620944975 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 38.3, | |
| "epoch": 0.444234404536862, | |
| "format_reward": -0.75, | |
| "grad_norm": 1.5153789520263672, | |
| "image_reward": 0.28166198879480364, | |
| "kl": 1.9300499164499343, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0564, | |
| "reward": -0.7839775577187538, | |
| "reward_std": 2.034397203475237, | |
| "rewards/reward_func": -0.7839775577187538, | |
| "step": 1880, | |
| "toxic_reward": 3.5422126829624174 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 40.05, | |
| "epoch": 0.44659735349716445, | |
| "format_reward": 0.0, | |
| "grad_norm": 1.02174973487854, | |
| "image_reward": 0.30441080778837204, | |
| "kl": 5.820364655274898, | |
| "learning_rate": 5e-06, | |
| "loss": -0.1999, | |
| "reward": 0.5548859179019928, | |
| "reward_std": 0.8466346619650722, | |
| "rewards/reward_func": 0.5548859179019928, | |
| "step": 1890, | |
| "toxic_reward": 3.5053808212280275 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 41.0, | |
| "epoch": 0.44896030245746693, | |
| "format_reward": -0.75, | |
| "grad_norm": 1.8126834630966187, | |
| "image_reward": 0.25828145295381544, | |
| "kl": 1.9232184071093799, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0966, | |
| "reward": -0.5137902736663819, | |
| "reward_std": 2.415500694513321, | |
| "rewards/reward_func": -0.5137902736663819, | |
| "step": 1900, | |
| "toxic_reward": 3.4278686165809633 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 37.7, | |
| "epoch": 0.45132325141776936, | |
| "format_reward": -0.5, | |
| "grad_norm": 0.6371603608131409, | |
| "image_reward": 0.2626200348138809, | |
| "kl": 6.273042661882937, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0209, | |
| "reward": -0.10160770416259765, | |
| "reward_std": 1.7223791293799877, | |
| "rewards/reward_func": -0.10160770416259765, | |
| "step": 1910, | |
| "toxic_reward": 3.4677812099456786 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 39.9, | |
| "epoch": 0.45368620037807184, | |
| "format_reward": 0.0, | |
| "grad_norm": 1.025303840637207, | |
| "image_reward": 0.27600199580192564, | |
| "kl": 2.9244240637868644, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0036, | |
| "reward": 0.2618570938706398, | |
| "reward_std": 0.7942308865487575, | |
| "rewards/reward_func": 0.2618570938706398, | |
| "step": 1920, | |
| "toxic_reward": 3.214989905059338 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 42.5, | |
| "epoch": 0.4560491493383743, | |
| "format_reward": 0.0, | |
| "grad_norm": 3.0306193828582764, | |
| "image_reward": 0.27111816257238386, | |
| "kl": 7.301137297973037, | |
| "learning_rate": 5e-06, | |
| "loss": -0.3058, | |
| "reward": 0.7629794716835022, | |
| "reward_std": 1.207332517206669, | |
| "rewards/reward_func": 0.7629794716835022, | |
| "step": 1930, | |
| "toxic_reward": 3.8610877275466917 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 49.325, | |
| "epoch": 0.45841209829867674, | |
| "format_reward": -0.5, | |
| "grad_norm": 0.4994942843914032, | |
| "image_reward": 0.2564666748046875, | |
| "kl": 1.9746190145611764, | |
| "learning_rate": 5e-06, | |
| "loss": -0.056, | |
| "reward": 0.17883441746234893, | |
| "reward_std": 1.9227621294558048, | |
| "rewards/reward_func": 0.17883441746234893, | |
| "step": 1940, | |
| "toxic_reward": 3.5681721329689027 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 36.5, | |
| "epoch": 0.4607750472589792, | |
| "format_reward": -0.5, | |
| "grad_norm": 1.0730820894241333, | |
| "image_reward": 0.2937784805893898, | |
| "kl": 2.8218962060287596, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0566, | |
| "reward": -0.1567411482334137, | |
| "reward_std": 1.654453044757247, | |
| "rewards/reward_func": -0.1567411482334137, | |
| "step": 1950, | |
| "toxic_reward": 3.6663838982582093 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 55.725, | |
| "epoch": 0.46313799621928164, | |
| "format_reward": 0.0, | |
| "grad_norm": 2.0345563888549805, | |
| "image_reward": 0.2648590087890625, | |
| "kl": 0.5958237243816257, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0654, | |
| "reward": 0.12212587893009186, | |
| "reward_std": 0.6707309451885521, | |
| "rewards/reward_func": 0.12212587893009186, | |
| "step": 1960, | |
| "toxic_reward": 3.1909562170505525 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 45.4, | |
| "epoch": 0.4655009451795841, | |
| "format_reward": -0.5, | |
| "grad_norm": 5.125189781188965, | |
| "image_reward": 0.28848724216222765, | |
| "kl": 1.6634003438055516, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0863, | |
| "reward": -0.1009038507938385, | |
| "reward_std": 1.4750457480549812, | |
| "rewards/reward_func": -0.1009038507938385, | |
| "step": 1970, | |
| "toxic_reward": 4.304786968231201 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 44.075, | |
| "epoch": 0.4678638941398866, | |
| "format_reward": -0.25, | |
| "grad_norm": 1.4688388109207153, | |
| "image_reward": 0.27630208283662794, | |
| "kl": 0.420011714566499, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0726, | |
| "reward": -0.325018173456192, | |
| "reward_std": 1.0332348687574266, | |
| "rewards/reward_func": -0.325018173456192, | |
| "step": 1980, | |
| "toxic_reward": 3.5992671266198157 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 55.85, | |
| "epoch": 0.47022684310018903, | |
| "format_reward": -0.25, | |
| "grad_norm": 11.723315238952637, | |
| "image_reward": 0.26587321013212206, | |
| "kl": 0.32123089879751204, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0977, | |
| "reward": -0.41115415692329405, | |
| "reward_std": 1.5678910434246063, | |
| "rewards/reward_func": -0.41115415692329405, | |
| "step": 1990, | |
| "toxic_reward": 3.7649365305900573 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 46.35, | |
| "epoch": 0.4725897920604915, | |
| "format_reward": -0.25, | |
| "grad_norm": 2.3079888820648193, | |
| "image_reward": 0.27147267758846283, | |
| "kl": 0.2777526224032044, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0282, | |
| "reward": -0.2599769473075867, | |
| "reward_std": 0.731538234371692, | |
| "rewards/reward_func": -0.2599769473075867, | |
| "step": 2000, | |
| "toxic_reward": 4.658599400520325 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 36.85, | |
| "epoch": 0.47495274102079393, | |
| "format_reward": -0.5, | |
| "grad_norm": 14.372509956359863, | |
| "image_reward": 0.2984934478998184, | |
| "kl": 3.4746980018913747, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0433, | |
| "reward": -0.3160775646567345, | |
| "reward_std": 0.8356795504689216, | |
| "rewards/reward_func": -0.3160775646567345, | |
| "step": 2010, | |
| "toxic_reward": 3.6712876573204993 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 38.975, | |
| "epoch": 0.4773156899810964, | |
| "format_reward": 0.0, | |
| "grad_norm": 9.949368476867676, | |
| "image_reward": 0.2758158355951309, | |
| "kl": 2.603505723550916, | |
| "learning_rate": 5e-06, | |
| "loss": -0.1898, | |
| "reward": 0.5061412572860717, | |
| "reward_std": 0.6404913809150458, | |
| "rewards/reward_func": 0.5061412572860717, | |
| "step": 2020, | |
| "toxic_reward": 4.01279228925705 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 56.05, | |
| "epoch": 0.47967863894139884, | |
| "format_reward": -0.5, | |
| "grad_norm": 11.427620887756348, | |
| "image_reward": 0.2567454010248184, | |
| "kl": 0.622926688939333, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0783, | |
| "reward": 0.21228746175765992, | |
| "reward_std": 1.9739407232031225, | |
| "rewards/reward_func": 0.21228746175765992, | |
| "step": 2030, | |
| "toxic_reward": 3.7354461193084716 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 44.45, | |
| "epoch": 0.4820415879017013, | |
| "format_reward": 0.0, | |
| "grad_norm": 4.316232204437256, | |
| "image_reward": 0.2718638092279434, | |
| "kl": 2.3269161872565745, | |
| "learning_rate": 5e-06, | |
| "loss": -0.1163, | |
| "reward": 0.737056265771389, | |
| "reward_std": 0.9669643521308899, | |
| "rewards/reward_func": 0.737056265771389, | |
| "step": 2040, | |
| "toxic_reward": 3.0878625586628914 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 46.475, | |
| "epoch": 0.4844045368620038, | |
| "format_reward": -1.0, | |
| "grad_norm": 41.36595153808594, | |
| "image_reward": 0.26953938901424407, | |
| "kl": 0.7504621215164662, | |
| "learning_rate": 5e-06, | |
| "loss": -0.1493, | |
| "reward": -1.3220559000968932, | |
| "reward_std": 1.9624842151999473, | |
| "rewards/reward_func": -1.3220559000968932, | |
| "step": 2050, | |
| "toxic_reward": 3.74695360660553 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 45.825, | |
| "epoch": 0.4867674858223062, | |
| "format_reward": 0.0, | |
| "grad_norm": 6.471742153167725, | |
| "image_reward": 0.2753570556640625, | |
| "kl": 0.07729073958471418, | |
| "learning_rate": 5e-06, | |
| "loss": -0.03, | |
| "reward": 1.3116377294063568, | |
| "reward_std": 1.4300442904233932, | |
| "rewards/reward_func": 1.3116377294063568, | |
| "step": 2060, | |
| "toxic_reward": 3.5985005378723143 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 44.825, | |
| "epoch": 0.4891304347826087, | |
| "format_reward": 0.0, | |
| "grad_norm": 1.805216670036316, | |
| "image_reward": 0.306744384765625, | |
| "kl": 6.001958086341619, | |
| "learning_rate": 5e-06, | |
| "loss": -0.1945, | |
| "reward": 0.36415485143661497, | |
| "reward_std": 0.6190065078437328, | |
| "rewards/reward_func": 0.36415485143661497, | |
| "step": 2070, | |
| "toxic_reward": 4.081458044052124 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 48.275, | |
| "epoch": 0.4914933837429111, | |
| "format_reward": 0.0, | |
| "grad_norm": 18.216772079467773, | |
| "image_reward": 0.2797536224126816, | |
| "kl": 0.49935312662273645, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0342, | |
| "reward": 0.23056302070617676, | |
| "reward_std": 0.4776972606778145, | |
| "rewards/reward_func": 0.23056302070617676, | |
| "step": 2080, | |
| "toxic_reward": 4.019720596075058 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 35.075, | |
| "epoch": 0.4938563327032136, | |
| "format_reward": -0.75, | |
| "grad_norm": 13.060705184936523, | |
| "image_reward": 0.28729756474494933, | |
| "kl": 4.740964457206428, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0645, | |
| "reward": -0.4479706704616547, | |
| "reward_std": 2.0641879491508006, | |
| "rewards/reward_func": -0.4479706704616547, | |
| "step": 2090, | |
| "toxic_reward": 2.7062815964221953 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 32.575, | |
| "epoch": 0.4962192816635161, | |
| "format_reward": -0.25, | |
| "grad_norm": 14.017393112182617, | |
| "image_reward": 0.2847381591796875, | |
| "kl": 0.9378721818327904, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0908, | |
| "reward": 0.4732812285423279, | |
| "reward_std": 1.2860259119421245, | |
| "rewards/reward_func": 0.4732812285423279, | |
| "step": 2100, | |
| "toxic_reward": 3.420735603570938 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 55.075, | |
| "epoch": 0.4985822306238185, | |
| "format_reward": -0.75, | |
| "grad_norm": 6.193188667297363, | |
| "image_reward": 0.27182515412569047, | |
| "kl": 2.9611662749201058, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0056, | |
| "reward": -0.19096837639808656, | |
| "reward_std": 1.8480727752670645, | |
| "rewards/reward_func": -0.19096837639808656, | |
| "step": 2110, | |
| "toxic_reward": 4.268127584457398 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 42.8, | |
| "epoch": 0.500945179584121, | |
| "format_reward": -0.75, | |
| "grad_norm": 11.63723087310791, | |
| "image_reward": 0.2698944091796875, | |
| "kl": 1.1968733308836819, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0042, | |
| "reward": -0.5995136559009552, | |
| "reward_std": 2.1293695636093615, | |
| "rewards/reward_func": -0.5995136559009552, | |
| "step": 2120, | |
| "toxic_reward": 3.746561822295189 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 40.8, | |
| "epoch": 0.5033081285444234, | |
| "format_reward": -0.75, | |
| "grad_norm": 2.3855180740356445, | |
| "image_reward": 0.26025390625, | |
| "kl": 1.5614483684301377, | |
| "learning_rate": 5e-06, | |
| "loss": 0.2496, | |
| "reward": -0.6204059720039368, | |
| "reward_std": 1.9704039812088012, | |
| "rewards/reward_func": -0.6204059720039368, | |
| "step": 2130, | |
| "toxic_reward": 3.747698575258255 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 40.75, | |
| "epoch": 0.505671077504726, | |
| "format_reward": 0.0, | |
| "grad_norm": 7.681392669677734, | |
| "image_reward": 0.27169952541589737, | |
| "kl": 3.525779527798295, | |
| "learning_rate": 5e-06, | |
| "loss": -0.154, | |
| "reward": 0.7122885227203369, | |
| "reward_std": 1.038828771188855, | |
| "rewards/reward_func": 0.7122885227203369, | |
| "step": 2140, | |
| "toxic_reward": 3.8024647355079653 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 49.55, | |
| "epoch": 0.5080340264650284, | |
| "format_reward": -0.25, | |
| "grad_norm": 7.522043228149414, | |
| "image_reward": 0.2867136627435684, | |
| "kl": 2.352656077966094, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0567, | |
| "reward": 0.3375007212162018, | |
| "reward_std": 1.1598852841183542, | |
| "rewards/reward_func": 0.3375007212162018, | |
| "step": 2150, | |
| "toxic_reward": 3.6138802111148833 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 47.75, | |
| "epoch": 0.5103969754253308, | |
| "format_reward": 0.0, | |
| "grad_norm": 8.265325546264648, | |
| "image_reward": 0.2756062835454941, | |
| "kl": 6.923487820476294, | |
| "learning_rate": 5e-06, | |
| "loss": -0.1108, | |
| "reward": 0.7483027845621109, | |
| "reward_std": 0.5725362204015255, | |
| "rewards/reward_func": 0.7483027845621109, | |
| "step": 2160, | |
| "toxic_reward": 3.906574785709381 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 33.525, | |
| "epoch": 0.5127599243856332, | |
| "format_reward": -0.75, | |
| "grad_norm": 21.7608642578125, | |
| "image_reward": 0.2696726471185684, | |
| "kl": 5.021715716272593, | |
| "learning_rate": 5e-06, | |
| "loss": -0.1133, | |
| "reward": -0.4512764573097229, | |
| "reward_std": 2.062841220572591, | |
| "rewards/reward_func": -0.4512764573097229, | |
| "step": 2170, | |
| "toxic_reward": 4.282562255859375 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 48.425, | |
| "epoch": 0.5151228733459358, | |
| "format_reward": -0.25, | |
| "grad_norm": 2.369183301925659, | |
| "image_reward": 0.28711649775505066, | |
| "kl": 12.483240520581603, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0658, | |
| "reward": -0.0087041437625885, | |
| "reward_std": 1.3220645122230053, | |
| "rewards/reward_func": -0.0087041437625885, | |
| "step": 2180, | |
| "toxic_reward": 3.781124639511108 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 43.0, | |
| "epoch": 0.5174858223062382, | |
| "format_reward": -0.5, | |
| "grad_norm": 4.219491958618164, | |
| "image_reward": 0.27772623747587205, | |
| "kl": 2.453311304561794, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0285, | |
| "reward": -0.30757330656051635, | |
| "reward_std": 1.7083245173096657, | |
| "rewards/reward_func": -0.30757330656051635, | |
| "step": 2190, | |
| "toxic_reward": 4.130738306045532 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 50.85, | |
| "epoch": 0.5198487712665406, | |
| "format_reward": -0.5, | |
| "grad_norm": 6.190961837768555, | |
| "image_reward": 0.2818817153573036, | |
| "kl": 4.28942144587636, | |
| "learning_rate": 5e-06, | |
| "loss": -0.1115, | |
| "reward": 0.2441554695367813, | |
| "reward_std": 1.9595814019441604, | |
| "rewards/reward_func": 0.2441554695367813, | |
| "step": 2200, | |
| "toxic_reward": 3.141683894395828 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 48.225, | |
| "epoch": 0.5222117202268431, | |
| "format_reward": -0.5, | |
| "grad_norm": 4.348143577575684, | |
| "image_reward": 0.29916890412569047, | |
| "kl": 0.34145298339426516, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0071, | |
| "reward": -0.5653827100992203, | |
| "reward_std": 1.6975119888782502, | |
| "rewards/reward_func": -0.5653827100992203, | |
| "step": 2210, | |
| "toxic_reward": 3.599680471420288 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 49.0, | |
| "epoch": 0.5245746691871456, | |
| "format_reward": -0.75, | |
| "grad_norm": 6.7439422607421875, | |
| "image_reward": 0.2785715714097023, | |
| "kl": 1.8124071411788463, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0723, | |
| "reward": -0.6911701261997223, | |
| "reward_std": 1.9053923369385302, | |
| "rewards/reward_func": -0.6911701261997223, | |
| "step": 2220, | |
| "toxic_reward": 3.67071852684021 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 38.9, | |
| "epoch": 0.526937618147448, | |
| "format_reward": -0.25, | |
| "grad_norm": 5.702417373657227, | |
| "image_reward": 0.2697733551263809, | |
| "kl": 3.5654136715456843, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0592, | |
| "reward": 0.31644179224967955, | |
| "reward_std": 1.338551426678896, | |
| "rewards/reward_func": 0.31644179224967955, | |
| "step": 2230, | |
| "toxic_reward": 4.082410860061645 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 46.9, | |
| "epoch": 0.5293005671077504, | |
| "format_reward": -0.75, | |
| "grad_norm": 3.3108696937561035, | |
| "image_reward": 0.2754450500011444, | |
| "kl": 1.1358238738030195, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0103, | |
| "reward": -0.19608908146619797, | |
| "reward_std": 1.9574983415892349, | |
| "rewards/reward_func": -0.19608908146619797, | |
| "step": 2240, | |
| "toxic_reward": 3.8882675245404243 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 41.775, | |
| "epoch": 0.531663516068053, | |
| "format_reward": 0.0, | |
| "grad_norm": 3.8872108459472656, | |
| "image_reward": 0.2711354583501816, | |
| "kl": 0.6185108724981546, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0331, | |
| "reward": 0.43025930523872374, | |
| "reward_std": 0.6924620851874351, | |
| "rewards/reward_func": 0.43025930523872374, | |
| "step": 2250, | |
| "toxic_reward": 3.741843378543854 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 46.05, | |
| "epoch": 0.5340264650283554, | |
| "format_reward": -0.5, | |
| "grad_norm": 2.605905055999756, | |
| "image_reward": 0.24824727326631546, | |
| "kl": 3.812788811326027, | |
| "learning_rate": 5e-06, | |
| "loss": -0.062, | |
| "reward": -0.0177284836769104, | |
| "reward_std": 1.7159371480345726, | |
| "rewards/reward_func": -0.0177284836769104, | |
| "step": 2260, | |
| "toxic_reward": 3.8558017730712892 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 45.225, | |
| "epoch": 0.5363894139886578, | |
| "format_reward": 0.0, | |
| "grad_norm": 4.317953109741211, | |
| "image_reward": 0.29388427734375, | |
| "kl": 0.9772842615842819, | |
| "learning_rate": 5e-06, | |
| "loss": -0.005, | |
| "reward": 0.24463090300559998, | |
| "reward_std": 0.8211262285709381, | |
| "rewards/reward_func": 0.24463090300559998, | |
| "step": 2270, | |
| "toxic_reward": 3.4330978095531464 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 45.35, | |
| "epoch": 0.5387523629489603, | |
| "format_reward": -0.25, | |
| "grad_norm": 2.7746388912200928, | |
| "image_reward": 0.28372802734375, | |
| "kl": 0.6956694826483727, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0806, | |
| "reward": 0.9492665678262711, | |
| "reward_std": 1.2596320446580649, | |
| "rewards/reward_func": 0.9492665678262711, | |
| "step": 2280, | |
| "toxic_reward": 3.6599619805812837 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 41.65, | |
| "epoch": 0.5411153119092628, | |
| "format_reward": -0.75, | |
| "grad_norm": 24.271883010864258, | |
| "image_reward": 0.25230407863855364, | |
| "kl": 2.0102761931717397, | |
| "learning_rate": 5e-06, | |
| "loss": 0.099, | |
| "reward": -0.5960418626666069, | |
| "reward_std": 1.6162065342068672, | |
| "rewards/reward_func": -0.5960418626666069, | |
| "step": 2290, | |
| "toxic_reward": 3.32955624461174 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 40.025, | |
| "epoch": 0.5434782608695652, | |
| "format_reward": -0.75, | |
| "grad_norm": 12.164813995361328, | |
| "image_reward": 0.27450052797794344, | |
| "kl": 1.0361489206552505, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0215, | |
| "reward": -0.12894563674926757, | |
| "reward_std": 2.2585421696305277, | |
| "rewards/reward_func": -0.12894563674926757, | |
| "step": 2300, | |
| "toxic_reward": 3.8079848527908324 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 39.925, | |
| "epoch": 0.5458412098298677, | |
| "format_reward": 0.0, | |
| "grad_norm": 4.370122909545898, | |
| "image_reward": 0.28968607634305954, | |
| "kl": 2.262423123046756, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0122, | |
| "reward": 0.4122478127479553, | |
| "reward_std": 0.8819206684827805, | |
| "rewards/reward_func": 0.4122478127479553, | |
| "step": 2310, | |
| "toxic_reward": 3.7774435758590696 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 47.625, | |
| "epoch": 0.5482041587901701, | |
| "format_reward": -0.25, | |
| "grad_norm": 4.913710594177246, | |
| "image_reward": 0.2981597900390625, | |
| "kl": 1.1325825482606888, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0383, | |
| "reward": -0.302042031288147, | |
| "reward_std": 1.1343338422477245, | |
| "rewards/reward_func": -0.302042031288147, | |
| "step": 2320, | |
| "toxic_reward": 3.4699944481253624 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 34.875, | |
| "epoch": 0.5505671077504726, | |
| "format_reward": -0.5, | |
| "grad_norm": 10.183396339416504, | |
| "image_reward": 0.2794362396001816, | |
| "kl": 2.359659927338362, | |
| "learning_rate": 5e-06, | |
| "loss": 0.127, | |
| "reward": -0.5543205380439759, | |
| "reward_std": 1.5390649776905776, | |
| "rewards/reward_func": -0.5543205380439759, | |
| "step": 2330, | |
| "toxic_reward": 4.130715823173523 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 38.275, | |
| "epoch": 0.552930056710775, | |
| "format_reward": -0.25, | |
| "grad_norm": 29.773969650268555, | |
| "image_reward": 0.3009490996599197, | |
| "kl": 1.2122079662978649, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0302, | |
| "reward": 0.49274033308029175, | |
| "reward_std": 1.2792111776769162, | |
| "rewards/reward_func": 0.49274033308029175, | |
| "step": 2340, | |
| "toxic_reward": 4.144988393783569 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 48.925, | |
| "epoch": 0.5552930056710775, | |
| "format_reward": -0.25, | |
| "grad_norm": 1.4507733583450317, | |
| "image_reward": 0.27436625212430954, | |
| "kl": 10.124456256255508, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0586, | |
| "reward": 0.16714471578598022, | |
| "reward_std": 1.1183603500947357, | |
| "rewards/reward_func": 0.16714471578598022, | |
| "step": 2350, | |
| "toxic_reward": 3.7719646602869035 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 50.45, | |
| "epoch": 0.55765595463138, | |
| "format_reward": -0.25, | |
| "grad_norm": 3.8344922065734863, | |
| "image_reward": 0.27209879606962206, | |
| "kl": 0.4884789928793907, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0246, | |
| "reward": 0.7492954432964325, | |
| "reward_std": 1.5298523031175137, | |
| "rewards/reward_func": 0.7492954432964325, | |
| "step": 2360, | |
| "toxic_reward": 3.582643675804138 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 33.925, | |
| "epoch": 0.5600189035916824, | |
| "format_reward": -0.25, | |
| "grad_norm": 28.500579833984375, | |
| "image_reward": 0.256439208984375, | |
| "kl": 7.240471968054772, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0033, | |
| "reward": 0.39032529294490814, | |
| "reward_std": 1.3387351400218903, | |
| "rewards/reward_func": 0.39032529294490814, | |
| "step": 2370, | |
| "toxic_reward": 3.7680604696273803 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 43.35, | |
| "epoch": 0.5623818525519849, | |
| "format_reward": -0.25, | |
| "grad_norm": 18.509540557861328, | |
| "image_reward": 0.25091654509305955, | |
| "kl": 2.3443214535713195, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0852, | |
| "reward": -0.013416659832000733, | |
| "reward_std": 1.2783805396407844, | |
| "rewards/reward_func": -0.013416659832000733, | |
| "step": 2380, | |
| "toxic_reward": 3.937808632850647 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 48.125, | |
| "epoch": 0.5647448015122873, | |
| "format_reward": -0.5, | |
| "grad_norm": 11.650871276855469, | |
| "image_reward": 0.29215189516544343, | |
| "kl": 0.3515282288193703, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0259, | |
| "reward": 0.1742587387561798, | |
| "reward_std": 1.8562648460268973, | |
| "rewards/reward_func": 0.1742587387561798, | |
| "step": 2390, | |
| "toxic_reward": 3.7724621415138246 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 52.55, | |
| "epoch": 0.5671077504725898, | |
| "format_reward": -1.0, | |
| "grad_norm": 20.670705795288086, | |
| "image_reward": 0.26702982634305955, | |
| "kl": 2.7752922803163527, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1898, | |
| "reward": -0.49167909026145934, | |
| "reward_std": 2.5721775129437447, | |
| "rewards/reward_func": -0.49167909026145934, | |
| "step": 2400, | |
| "toxic_reward": 3.612065541744232 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 47.75, | |
| "epoch": 0.5694706994328923, | |
| "format_reward": -0.25, | |
| "grad_norm": 5.918033599853516, | |
| "image_reward": 0.27968953400850294, | |
| "kl": 1.1868829876184464, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0339, | |
| "reward": -0.041136431694030764, | |
| "reward_std": 1.1883981741964817, | |
| "rewards/reward_func": -0.041136431694030764, | |
| "step": 2410, | |
| "toxic_reward": 4.002831280231476 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 42.4, | |
| "epoch": 0.5718336483931947, | |
| "format_reward": -0.25, | |
| "grad_norm": 5.842867851257324, | |
| "image_reward": 0.27998046875, | |
| "kl": 0.9403334192931652, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0547, | |
| "reward": 0.23068565130233765, | |
| "reward_std": 1.2439154148101808, | |
| "rewards/reward_func": 0.23068565130233765, | |
| "step": 2420, | |
| "toxic_reward": 3.8584881067276 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 37.2, | |
| "epoch": 0.5741965973534972, | |
| "format_reward": -0.25, | |
| "grad_norm": 13.205660820007324, | |
| "image_reward": 0.2850880965590477, | |
| "kl": 1.6154363751411438, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0775, | |
| "reward": 0.4115023612976074, | |
| "reward_std": 1.0730943327769638, | |
| "rewards/reward_func": 0.4115023612976074, | |
| "step": 2430, | |
| "toxic_reward": 4.400762820243836 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 42.925, | |
| "epoch": 0.5765595463137996, | |
| "format_reward": -0.25, | |
| "grad_norm": 3.637028455734253, | |
| "image_reward": 0.26606852263212205, | |
| "kl": 1.6208242058753968, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0364, | |
| "reward": -0.5815495431423188, | |
| "reward_std": 1.270220142416656, | |
| "rewards/reward_func": -0.5815495431423188, | |
| "step": 2440, | |
| "toxic_reward": 3.934324860572815 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 66.35, | |
| "epoch": 0.5789224952741021, | |
| "format_reward": -0.75, | |
| "grad_norm": 11.621758460998535, | |
| "image_reward": 0.28047332763671873, | |
| "kl": 0.7798056200146675, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0519, | |
| "reward": 0.1087100327014923, | |
| "reward_std": 2.0828719630837442, | |
| "rewards/reward_func": 0.1087100327014923, | |
| "step": 2450, | |
| "toxic_reward": 2.8291834026575087 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 43.95, | |
| "epoch": 0.5812854442344045, | |
| "format_reward": 0.0, | |
| "grad_norm": 9.702945709228516, | |
| "image_reward": 0.28443044126033784, | |
| "kl": 1.73483949303627, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0958, | |
| "reward": 0.28035863041877745, | |
| "reward_std": 0.5182013310492039, | |
| "rewards/reward_func": 0.28035863041877745, | |
| "step": 2460, | |
| "toxic_reward": 3.8520292162895204 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 43.925, | |
| "epoch": 0.583648393194707, | |
| "format_reward": -0.5, | |
| "grad_norm": 18.073659896850586, | |
| "image_reward": 0.24947459101676941, | |
| "kl": 2.9204909898340703, | |
| "learning_rate": 5e-06, | |
| "loss": -0.1939, | |
| "reward": 0.42990538477897644, | |
| "reward_std": 1.8428901416249572, | |
| "rewards/reward_func": 0.42990538477897644, | |
| "step": 2470, | |
| "toxic_reward": 3.7781980872154235 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 43.175, | |
| "epoch": 0.5860113421550095, | |
| "format_reward": -0.5, | |
| "grad_norm": 4.270178318023682, | |
| "image_reward": 0.28282063752412795, | |
| "kl": 0.48990702964365485, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0624, | |
| "reward": -0.30424859523773196, | |
| "reward_std": 1.5560518722981214, | |
| "rewards/reward_func": -0.30424859523773196, | |
| "step": 2480, | |
| "toxic_reward": 4.44784414768219 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 38.9, | |
| "epoch": 0.5883742911153119, | |
| "format_reward": -0.5, | |
| "grad_norm": 7.575175762176514, | |
| "image_reward": 0.2695292145013809, | |
| "kl": 1.2654437847435474, | |
| "learning_rate": 5e-06, | |
| "loss": -0.1158, | |
| "reward": -0.44633115231990816, | |
| "reward_std": 1.8826897315680982, | |
| "rewards/reward_func": -0.44633115231990816, | |
| "step": 2490, | |
| "toxic_reward": 3.8135931372642515 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 46.3, | |
| "epoch": 0.5907372400756143, | |
| "format_reward": 0.0, | |
| "grad_norm": 24.015722274780273, | |
| "image_reward": 0.26897684782743453, | |
| "kl": 5.640305678918958, | |
| "learning_rate": 5e-06, | |
| "loss": -0.1054, | |
| "reward": 0.6214121818542481, | |
| "reward_std": 0.9682584583759308, | |
| "rewards/reward_func": 0.6214121818542481, | |
| "step": 2500, | |
| "toxic_reward": 3.9037705421447755 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 32.125, | |
| "epoch": 0.5931001890359168, | |
| "format_reward": -0.25, | |
| "grad_norm": 13.069973945617676, | |
| "image_reward": 0.2854502350091934, | |
| "kl": 11.71274044290185, | |
| "learning_rate": 5e-06, | |
| "loss": -0.077, | |
| "reward": -0.3511055693030357, | |
| "reward_std": 1.0736159782391042, | |
| "rewards/reward_func": -0.3511055693030357, | |
| "step": 2510, | |
| "toxic_reward": 3.7281174302101134 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 40.375, | |
| "epoch": 0.5954631379962193, | |
| "format_reward": 0.0, | |
| "grad_norm": 2.0403361320495605, | |
| "image_reward": 0.2833099365234375, | |
| "kl": 0.6411756843328476, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0435, | |
| "reward": 0.5592477023601532, | |
| "reward_std": 0.8428021136671304, | |
| "rewards/reward_func": 0.5592477023601532, | |
| "step": 2520, | |
| "toxic_reward": 3.6056689500808714 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 50.925, | |
| "epoch": 0.5978260869565217, | |
| "format_reward": -0.5, | |
| "grad_norm": 2.7234652042388916, | |
| "image_reward": 0.2631998687982559, | |
| "kl": 2.588300554268062, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0954, | |
| "reward": -0.11296717822551727, | |
| "reward_std": 1.059992153197527, | |
| "rewards/reward_func": -0.11296717822551727, | |
| "step": 2530, | |
| "toxic_reward": 4.310960650444031 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 42.8, | |
| "epoch": 0.6001890359168242, | |
| "format_reward": 0.0, | |
| "grad_norm": 1.746839165687561, | |
| "image_reward": 0.27794291228055956, | |
| "kl": 0.12578147873282433, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0894, | |
| "reward": 0.6603235125541687, | |
| "reward_std": 0.5662866534665227, | |
| "rewards/reward_func": 0.6603235125541687, | |
| "step": 2540, | |
| "toxic_reward": 4.165549850463867 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 65.2, | |
| "epoch": 0.6025519848771267, | |
| "format_reward": 0.0, | |
| "grad_norm": 1.1635066270828247, | |
| "image_reward": 0.2584126806921429, | |
| "kl": 16.10209010541439, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0047, | |
| "reward": 0.9701344430446625, | |
| "reward_std": 0.8910946477204561, | |
| "rewards/reward_func": 0.9701344430446625, | |
| "step": 2550, | |
| "toxic_reward": 3.8731188111835055 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 44.975, | |
| "epoch": 0.6049149338374291, | |
| "format_reward": -1.0, | |
| "grad_norm": 3.505110502243042, | |
| "image_reward": 0.2755279541015625, | |
| "kl": 1.5500462669879198, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0015, | |
| "reward": -0.1658882439136505, | |
| "reward_std": 2.0384394701570274, | |
| "rewards/reward_func": -0.1658882439136505, | |
| "step": 2560, | |
| "toxic_reward": 3.8778061270713806 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 30.8, | |
| "epoch": 0.6072778827977315, | |
| "format_reward": 0.0, | |
| "grad_norm": 8.120704650878906, | |
| "image_reward": 0.2892588287591934, | |
| "kl": 2.1680047139525414, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0399, | |
| "reward": 0.6697697341442108, | |
| "reward_std": 1.024929089844227, | |
| "rewards/reward_func": 0.6697697341442108, | |
| "step": 2570, | |
| "toxic_reward": 3.547108954191208 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 63.225, | |
| "epoch": 0.6096408317580341, | |
| "format_reward": -1.0, | |
| "grad_norm": 9.57001781463623, | |
| "image_reward": 0.2734588623046875, | |
| "kl": 0.8948870234191417, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0951, | |
| "reward": -0.7226251482963562, | |
| "reward_std": 2.448101815581322, | |
| "rewards/reward_func": -0.7226251482963562, | |
| "step": 2580, | |
| "toxic_reward": 4.320083689689636 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 38.1, | |
| "epoch": 0.6120037807183365, | |
| "format_reward": 0.0, | |
| "grad_norm": 2.0496883392333984, | |
| "image_reward": 0.2865132659673691, | |
| "kl": 2.8105035655200483, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0797, | |
| "reward": 0.568730728328228, | |
| "reward_std": 0.6556393213570118, | |
| "rewards/reward_func": 0.568730728328228, | |
| "step": 2590, | |
| "toxic_reward": 3.725440341234207 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 45.075, | |
| "epoch": 0.6143667296786389, | |
| "format_reward": 0.0, | |
| "grad_norm": 10.353742599487305, | |
| "image_reward": 0.2710174560546875, | |
| "kl": 0.6778285041451454, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0379, | |
| "reward": 0.2569525420665741, | |
| "reward_std": 0.597846270352602, | |
| "rewards/reward_func": 0.2569525420665741, | |
| "step": 2600, | |
| "toxic_reward": 4.306404328346252 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 43.2, | |
| "epoch": 0.6167296786389413, | |
| "format_reward": -0.5, | |
| "grad_norm": 3.9594945907592773, | |
| "image_reward": 0.28432718813419344, | |
| "kl": 0.5540166199207306, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0047, | |
| "reward": 0.5912085831165313, | |
| "reward_std": 1.5809811264276505, | |
| "rewards/reward_func": 0.5912085831165313, | |
| "step": 2610, | |
| "toxic_reward": 4.350194215774536 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 53.0, | |
| "epoch": 0.6190926275992439, | |
| "format_reward": -0.25, | |
| "grad_norm": 7.203413963317871, | |
| "image_reward": 0.26516723483800886, | |
| "kl": 1.199559571594, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0012, | |
| "reward": 0.2267006203532219, | |
| "reward_std": 1.142584490031004, | |
| "rewards/reward_func": 0.2267006203532219, | |
| "step": 2620, | |
| "toxic_reward": 3.9258982062339784 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 44.95, | |
| "epoch": 0.6214555765595463, | |
| "format_reward": -0.25, | |
| "grad_norm": 6.998039722442627, | |
| "image_reward": 0.2901885986328125, | |
| "kl": 27.16859985589981, | |
| "learning_rate": 5e-06, | |
| "loss": -0.026, | |
| "reward": 0.13268216848373413, | |
| "reward_std": 1.2143183693289756, | |
| "rewards/reward_func": 0.13268216848373413, | |
| "step": 2630, | |
| "toxic_reward": 3.556568074226379 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 48.8, | |
| "epoch": 0.6238185255198487, | |
| "format_reward": -0.25, | |
| "grad_norm": 13.862479209899902, | |
| "image_reward": 0.27274271547794343, | |
| "kl": 2.363949555903673, | |
| "learning_rate": 5e-06, | |
| "loss": -0.1093, | |
| "reward": 0.21172123551368713, | |
| "reward_std": 1.788407751917839, | |
| "rewards/reward_func": 0.21172123551368713, | |
| "step": 2640, | |
| "toxic_reward": 3.530658257007599 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 41.45, | |
| "epoch": 0.6261814744801513, | |
| "format_reward": 0.0, | |
| "grad_norm": 6.451826095581055, | |
| "image_reward": 0.25544840544462205, | |
| "kl": 0.9077189475297928, | |
| "learning_rate": 5e-06, | |
| "loss": -0.1204, | |
| "reward": 0.17604875564575195, | |
| "reward_std": 0.7731596916913986, | |
| "rewards/reward_func": 0.17604875564575195, | |
| "step": 2650, | |
| "toxic_reward": 3.655298948287964 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 52.375, | |
| "epoch": 0.6285444234404537, | |
| "format_reward": -0.25, | |
| "grad_norm": 15.447392463684082, | |
| "image_reward": 0.28090617060661316, | |
| "kl": 1.5149286333471537, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0393, | |
| "reward": 0.6538720428943634, | |
| "reward_std": 1.4380803421139716, | |
| "rewards/reward_func": 0.6538720428943634, | |
| "step": 2660, | |
| "toxic_reward": 3.8757722854614256 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 45.525, | |
| "epoch": 0.6309073724007561, | |
| "format_reward": -0.5, | |
| "grad_norm": 5.443056583404541, | |
| "image_reward": 0.28455810546875, | |
| "kl": 2.1727461591362953, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0129, | |
| "reward": -0.5597851276397705, | |
| "reward_std": 1.4988839238882066, | |
| "rewards/reward_func": -0.5597851276397705, | |
| "step": 2670, | |
| "toxic_reward": 3.852312761545181 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 35.575, | |
| "epoch": 0.6332703213610587, | |
| "format_reward": 0.0, | |
| "grad_norm": 6.4276652336120605, | |
| "image_reward": 0.2711863175034523, | |
| "kl": 2.061120516061783, | |
| "learning_rate": 5e-06, | |
| "loss": -0.1249, | |
| "reward": 0.4250785157084465, | |
| "reward_std": 0.8246009856462478, | |
| "rewards/reward_func": 0.4250785157084465, | |
| "step": 2680, | |
| "toxic_reward": 3.8009597778320314 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 44.375, | |
| "epoch": 0.6356332703213611, | |
| "format_reward": 0.0, | |
| "grad_norm": 1.9922189712524414, | |
| "image_reward": 0.2824055999517441, | |
| "kl": 0.8832020409405231, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0029, | |
| "reward": 0.2428468108177185, | |
| "reward_std": 0.7863198474049569, | |
| "rewards/reward_func": 0.2428468108177185, | |
| "step": 2690, | |
| "toxic_reward": 3.925771975517273 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 37.375, | |
| "epoch": 0.6379962192816635, | |
| "format_reward": -0.25, | |
| "grad_norm": 3.2788710594177246, | |
| "image_reward": 0.2625895172357559, | |
| "kl": 0.3067374438047409, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0585, | |
| "reward": 0.0673605427145958, | |
| "reward_std": 1.2387039607390762, | |
| "rewards/reward_func": 0.0673605427145958, | |
| "step": 2700, | |
| "toxic_reward": 3.223780316114426 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 38.7, | |
| "epoch": 0.6403591682419659, | |
| "format_reward": -0.5, | |
| "grad_norm": 18.068998336791992, | |
| "image_reward": 0.28381652683019637, | |
| "kl": 4.5763449721038345, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0303, | |
| "reward": -0.08654462695121765, | |
| "reward_std": 1.6389019638299942, | |
| "rewards/reward_func": -0.08654462695121765, | |
| "step": 2710, | |
| "toxic_reward": 3.9132798612117767 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 35.75, | |
| "epoch": 0.6427221172022685, | |
| "format_reward": 0.0, | |
| "grad_norm": 16.331071853637695, | |
| "image_reward": 0.2924163818359375, | |
| "kl": 1.1359277203679086, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0706, | |
| "reward": 0.6057616770267487, | |
| "reward_std": 0.7651574447751045, | |
| "rewards/reward_func": 0.6057616770267487, | |
| "step": 2720, | |
| "toxic_reward": 3.8298017740249635 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 36.725, | |
| "epoch": 0.6450850661625709, | |
| "format_reward": -0.25, | |
| "grad_norm": 2.152521848678589, | |
| "image_reward": 0.3041951507329941, | |
| "kl": 8.785355818271636, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0118, | |
| "reward": 0.3512896567583084, | |
| "reward_std": 1.3057980645447969, | |
| "rewards/reward_func": 0.3512896567583084, | |
| "step": 2730, | |
| "toxic_reward": 3.397814577817917 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 32.675, | |
| "epoch": 0.6474480151228733, | |
| "format_reward": 0.0, | |
| "grad_norm": 20.01748275756836, | |
| "image_reward": 0.29010823667049407, | |
| "kl": 3.5924226850271226, | |
| "learning_rate": 5e-06, | |
| "loss": -0.1142, | |
| "reward": 0.7106038928031921, | |
| "reward_std": 0.8158069387078285, | |
| "rewards/reward_func": 0.7106038928031921, | |
| "step": 2740, | |
| "toxic_reward": 4.0692403554916385 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 49.7, | |
| "epoch": 0.6498109640831758, | |
| "format_reward": -0.75, | |
| "grad_norm": 11.965126037597656, | |
| "image_reward": 0.26697489619255066, | |
| "kl": 1.9964583709836006, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0406, | |
| "reward": -0.07322075963020325, | |
| "reward_std": 1.999936766922474, | |
| "rewards/reward_func": -0.07322075963020325, | |
| "step": 2750, | |
| "toxic_reward": 3.366811156272888 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 41.475, | |
| "epoch": 0.6521739130434783, | |
| "format_reward": -0.5, | |
| "grad_norm": 26.80545997619629, | |
| "image_reward": 0.24265645444393158, | |
| "kl": 2.707187344133854, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0971, | |
| "reward": -0.5061075001955032, | |
| "reward_std": 1.755505845695734, | |
| "rewards/reward_func": -0.5061075001955032, | |
| "step": 2760, | |
| "toxic_reward": 3.8667294502258303 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 43.025, | |
| "epoch": 0.6545368620037807, | |
| "format_reward": 0.0, | |
| "grad_norm": 5.15554141998291, | |
| "image_reward": 0.271905517578125, | |
| "kl": 4.915832757204771, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0126, | |
| "reward": 0.8192368298768997, | |
| "reward_std": 0.41571362912654874, | |
| "rewards/reward_func": 0.8192368298768997, | |
| "step": 2770, | |
| "toxic_reward": 4.089378929138183 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 54.075, | |
| "epoch": 0.6568998109640832, | |
| "format_reward": 0.0, | |
| "grad_norm": 2.028783082962036, | |
| "image_reward": 0.27303365170955657, | |
| "kl": 1.8947554275393486, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0468, | |
| "reward": -0.1263785183429718, | |
| "reward_std": 0.7480042926967144, | |
| "rewards/reward_func": -0.1263785183429718, | |
| "step": 2780, | |
| "toxic_reward": 4.165195155143738 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 38.125, | |
| "epoch": 0.6592627599243857, | |
| "format_reward": -0.5, | |
| "grad_norm": 17.42000961303711, | |
| "image_reward": 0.2864379853010178, | |
| "kl": 1.6738548278808594, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0375, | |
| "reward": -0.2114594280719757, | |
| "reward_std": 1.560011611506343, | |
| "rewards/reward_func": -0.2114594280719757, | |
| "step": 2790, | |
| "toxic_reward": 4.024951922893524 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 48.075, | |
| "epoch": 0.6616257088846881, | |
| "format_reward": 0.0, | |
| "grad_norm": 5.910866737365723, | |
| "image_reward": 0.2764821395277977, | |
| "kl": 2.9146203480660917, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0104, | |
| "reward": 0.6828193128108978, | |
| "reward_std": 0.7127262264490127, | |
| "rewards/reward_func": 0.6828193128108978, | |
| "step": 2800, | |
| "toxic_reward": 4.108976912498474 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 46.225, | |
| "epoch": 0.6639886578449905, | |
| "format_reward": -0.25, | |
| "grad_norm": 13.787774085998535, | |
| "image_reward": 0.2695404052734375, | |
| "kl": 2.044136567413807, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1481, | |
| "reward": 0.1416476845741272, | |
| "reward_std": 0.9124870980158448, | |
| "rewards/reward_func": 0.1416476845741272, | |
| "step": 2810, | |
| "toxic_reward": 3.9404671788215637 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 33.35, | |
| "epoch": 0.666351606805293, | |
| "format_reward": -0.25, | |
| "grad_norm": 9.458231925964355, | |
| "image_reward": 0.27503865361213686, | |
| "kl": 12.555490608513356, | |
| "learning_rate": 5e-06, | |
| "loss": 0.015, | |
| "reward": -0.398735374212265, | |
| "reward_std": 1.3145878296345472, | |
| "rewards/reward_func": -0.398735374212265, | |
| "step": 2820, | |
| "toxic_reward": 3.8166601181030275 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 40.575, | |
| "epoch": 0.6687145557655955, | |
| "format_reward": 0.0, | |
| "grad_norm": 5.239807605743408, | |
| "image_reward": 0.2913035064935684, | |
| "kl": 4.1338134072721004, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0082, | |
| "reward": 0.09673230051994323, | |
| "reward_std": 0.5237030681222677, | |
| "rewards/reward_func": 0.09673230051994323, | |
| "step": 2830, | |
| "toxic_reward": 3.791787397861481 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 74.05, | |
| "epoch": 0.6710775047258979, | |
| "format_reward": 0.0, | |
| "grad_norm": 3.1467976570129395, | |
| "image_reward": 0.2838506057858467, | |
| "kl": 18.177365225553512, | |
| "learning_rate": 5e-06, | |
| "loss": 0.21, | |
| "reward": 0.40501208901405333, | |
| "reward_std": 0.894443211145699, | |
| "rewards/reward_func": 0.40501208901405333, | |
| "step": 2840, | |
| "toxic_reward": 3.982026219367981 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 50.05, | |
| "epoch": 0.6734404536862004, | |
| "format_reward": -0.25, | |
| "grad_norm": 4.421890735626221, | |
| "image_reward": 0.2822255462408066, | |
| "kl": 4.33959369957447, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0624, | |
| "reward": -0.05263040065765381, | |
| "reward_std": 1.2849599719047546, | |
| "rewards/reward_func": -0.05263040065765381, | |
| "step": 2850, | |
| "toxic_reward": 3.633159136772156 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 40.2, | |
| "epoch": 0.6758034026465028, | |
| "format_reward": 0.0, | |
| "grad_norm": 2.5038645267486572, | |
| "image_reward": 0.2830434158444405, | |
| "kl": 0.6373848512768745, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0724, | |
| "reward": 0.627695482969284, | |
| "reward_std": 0.8375864863395691, | |
| "rewards/reward_func": 0.627695482969284, | |
| "step": 2860, | |
| "toxic_reward": 2.48615984916687 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 53.45, | |
| "epoch": 0.6781663516068053, | |
| "format_reward": -0.75, | |
| "grad_norm": 13.282075881958008, | |
| "image_reward": 0.27708842009305956, | |
| "kl": 0.9827784240245819, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0701, | |
| "reward": -0.892215234041214, | |
| "reward_std": 2.255379121750593, | |
| "rewards/reward_func": -0.892215234041214, | |
| "step": 2870, | |
| "toxic_reward": 3.7220635175704957 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 45.525, | |
| "epoch": 0.6805293005671077, | |
| "format_reward": 0.0, | |
| "grad_norm": 12.856422424316406, | |
| "image_reward": 0.2848948180675507, | |
| "kl": 1.0351120814681054, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0319, | |
| "reward": 0.2441805601119995, | |
| "reward_std": 0.7333651419728995, | |
| "rewards/reward_func": 0.2441805601119995, | |
| "step": 2880, | |
| "toxic_reward": 3.4050124049186707 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 39.925, | |
| "epoch": 0.6828922495274102, | |
| "format_reward": 0.0, | |
| "grad_norm": 17.430034637451172, | |
| "image_reward": 0.2576904296875, | |
| "kl": 0.8548611015081405, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0552, | |
| "reward": 0.17943925857543946, | |
| "reward_std": 1.0328819096088409, | |
| "rewards/reward_func": 0.17943925857543946, | |
| "step": 2890, | |
| "toxic_reward": 3.6138275027275086 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 48.725, | |
| "epoch": 0.6852551984877127, | |
| "format_reward": -0.5, | |
| "grad_norm": 8.174365997314453, | |
| "image_reward": 0.27861836850643157, | |
| "kl": 2.0340675324201585, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1112, | |
| "reward": 0.21913965195417404, | |
| "reward_std": 1.3600813373923302, | |
| "rewards/reward_func": 0.21913965195417404, | |
| "step": 2900, | |
| "toxic_reward": 3.973111832141876 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 41.375, | |
| "epoch": 0.6876181474480151, | |
| "format_reward": 0.0, | |
| "grad_norm": 9.611124992370605, | |
| "image_reward": 0.2759572356939316, | |
| "kl": 1.901711493730545, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0008, | |
| "reward": 0.7438287258148193, | |
| "reward_std": 0.6283189944922924, | |
| "rewards/reward_func": 0.7438287258148193, | |
| "step": 2910, | |
| "toxic_reward": 3.9766014724969865 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 71.675, | |
| "epoch": 0.6899810964083176, | |
| "format_reward": -0.75, | |
| "grad_norm": 4.556710243225098, | |
| "image_reward": 0.25573730319738386, | |
| "kl": 2.2221992775797843, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0776, | |
| "reward": -0.4340919256210327, | |
| "reward_std": 1.778307182714343, | |
| "rewards/reward_func": -0.4340919256210327, | |
| "step": 2920, | |
| "toxic_reward": 4.26712441444397 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 48.45, | |
| "epoch": 0.69234404536862, | |
| "format_reward": -0.25, | |
| "grad_norm": 8.245325088500977, | |
| "image_reward": 0.28000691831111907, | |
| "kl": 1.5203486174345016, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0075, | |
| "reward": 0.38065839409828184, | |
| "reward_std": 1.2137143149971963, | |
| "rewards/reward_func": 0.38065839409828184, | |
| "step": 2930, | |
| "toxic_reward": 4.011340999603272 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 46.95, | |
| "epoch": 0.6947069943289225, | |
| "format_reward": -0.5, | |
| "grad_norm": 43.48079299926758, | |
| "image_reward": 0.28537089079618455, | |
| "kl": 4.194944667816162, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0192, | |
| "reward": -0.4992818832397461, | |
| "reward_std": 1.652469713240862, | |
| "rewards/reward_func": -0.4992818832397461, | |
| "step": 2940, | |
| "toxic_reward": 3.73269322514534 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 41.375, | |
| "epoch": 0.697069943289225, | |
| "format_reward": -0.25, | |
| "grad_norm": 2.9284157752990723, | |
| "image_reward": 0.29124247282743454, | |
| "kl": 1.9233473122119904, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0558, | |
| "reward": 0.31386570632457733, | |
| "reward_std": 1.3490888617932797, | |
| "rewards/reward_func": 0.31386570632457733, | |
| "step": 2950, | |
| "toxic_reward": 3.3832929611206053 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 37.9, | |
| "epoch": 0.6994328922495274, | |
| "format_reward": -0.75, | |
| "grad_norm": 5.489762306213379, | |
| "image_reward": 0.27443746030330657, | |
| "kl": 11.033294987678527, | |
| "learning_rate": 5e-06, | |
| "loss": 0.034, | |
| "reward": -0.6967712700366974, | |
| "reward_std": 1.7560975707136095, | |
| "rewards/reward_func": -0.6967712700366974, | |
| "step": 2960, | |
| "toxic_reward": 4.0662164211273195 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 50.575, | |
| "epoch": 0.7017958412098299, | |
| "format_reward": -0.25, | |
| "grad_norm": 18.22649574279785, | |
| "image_reward": 0.26611735075712206, | |
| "kl": 3.9552819430828094, | |
| "learning_rate": 5e-06, | |
| "loss": -0.097, | |
| "reward": 0.2059646487236023, | |
| "reward_std": 1.4741453856229783, | |
| "rewards/reward_func": 0.2059646487236023, | |
| "step": 2970, | |
| "toxic_reward": 3.947977590560913 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 39.25, | |
| "epoch": 0.7041587901701323, | |
| "format_reward": 0.0, | |
| "grad_norm": 17.15437889099121, | |
| "image_reward": 0.29809672236442564, | |
| "kl": 2.7566053330898286, | |
| "learning_rate": 5e-06, | |
| "loss": 0.008, | |
| "reward": 0.5703202053904534, | |
| "reward_std": 0.8202566847205162, | |
| "rewards/reward_func": 0.5703202053904534, | |
| "step": 2980, | |
| "toxic_reward": 3.724568712711334 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 37.325, | |
| "epoch": 0.7065217391304348, | |
| "format_reward": -0.5, | |
| "grad_norm": 24.218904495239258, | |
| "image_reward": 0.26676025390625, | |
| "kl": 2.1775312602519987, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0501, | |
| "reward": 0.04957394301891327, | |
| "reward_std": 1.5467448111623525, | |
| "rewards/reward_func": 0.04957394301891327, | |
| "step": 2990, | |
| "toxic_reward": 3.3577624768018723 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 47.1, | |
| "epoch": 0.7088846880907372, | |
| "format_reward": -0.25, | |
| "grad_norm": 4.296006679534912, | |
| "image_reward": 0.286920166015625, | |
| "kl": 1.4036121606826781, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0237, | |
| "reward": 0.3820555150508881, | |
| "reward_std": 1.188760439120233, | |
| "rewards/reward_func": 0.3820555150508881, | |
| "step": 3000, | |
| "toxic_reward": 4.305025839805603 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 39.9, | |
| "epoch": 0.7112476370510397, | |
| "format_reward": -0.5, | |
| "grad_norm": 20.778005599975586, | |
| "image_reward": 0.30558042062653434, | |
| "kl": 1.4864997833967208, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0741, | |
| "reward": -0.08508440256118774, | |
| "reward_std": 1.637317718565464, | |
| "rewards/reward_func": -0.08508440256118774, | |
| "step": 3010, | |
| "toxic_reward": 4.079210705227322 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 41.95, | |
| "epoch": 0.7136105860113422, | |
| "format_reward": -0.25, | |
| "grad_norm": 4.398971080780029, | |
| "image_reward": 0.2982396438717842, | |
| "kl": 36.805122749507426, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0657, | |
| "reward": -0.5174520492553711, | |
| "reward_std": 1.1937666054815055, | |
| "rewards/reward_func": -0.5174520492553711, | |
| "step": 3020, | |
| "toxic_reward": 4.007938003540039 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 43.35, | |
| "epoch": 0.7159735349716446, | |
| "format_reward": -0.5, | |
| "grad_norm": 13.247093200683594, | |
| "image_reward": 0.26703898310661317, | |
| "kl": 1.961264681816101, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0169, | |
| "reward": -0.05578238368034363, | |
| "reward_std": 1.441930427402258, | |
| "rewards/reward_func": -0.05578238368034363, | |
| "step": 3030, | |
| "toxic_reward": 4.098655521869659 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 43.275, | |
| "epoch": 0.718336483931947, | |
| "format_reward": 0.0, | |
| "grad_norm": 23.132400512695312, | |
| "image_reward": 0.2803761810064316, | |
| "kl": 26.18696767091751, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0457, | |
| "reward": 0.49192982316017153, | |
| "reward_std": 0.6619096536189317, | |
| "rewards/reward_func": 0.49192982316017153, | |
| "step": 3040, | |
| "toxic_reward": 4.184990978240966 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 53.35, | |
| "epoch": 0.7206994328922496, | |
| "format_reward": -0.25, | |
| "grad_norm": 28.89768409729004, | |
| "image_reward": 0.2594024658203125, | |
| "kl": 2.125564157962799, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0056, | |
| "reward": -0.18149735927581787, | |
| "reward_std": 1.4747628048062325, | |
| "rewards/reward_func": -0.18149735927581787, | |
| "step": 3050, | |
| "toxic_reward": 3.5373760223388673 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 48.95, | |
| "epoch": 0.723062381852552, | |
| "format_reward": -0.5, | |
| "grad_norm": 17.26774787902832, | |
| "image_reward": 0.29308573305606844, | |
| "kl": 13.767069751024247, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0058, | |
| "reward": 0.02059091329574585, | |
| "reward_std": 1.5365628942847251, | |
| "rewards/reward_func": 0.02059091329574585, | |
| "step": 3060, | |
| "toxic_reward": 3.280546021461487 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 45.65, | |
| "epoch": 0.7254253308128544, | |
| "format_reward": -0.5, | |
| "grad_norm": 16.69405746459961, | |
| "image_reward": 0.27832234650850296, | |
| "kl": 2.4563605159521105, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0249, | |
| "reward": -0.4515081524848938, | |
| "reward_std": 1.2287155898287891, | |
| "rewards/reward_func": -0.4515081524848938, | |
| "step": 3070, | |
| "toxic_reward": 3.398514473438263 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 58.775, | |
| "epoch": 0.7277882797731569, | |
| "format_reward": -0.5, | |
| "grad_norm": 7.214962482452393, | |
| "image_reward": 0.27323404848575594, | |
| "kl": 1.8898794114589692, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0003, | |
| "reward": -0.4889628529548645, | |
| "reward_std": 1.4541106900200247, | |
| "rewards/reward_func": -0.4889628529548645, | |
| "step": 3080, | |
| "toxic_reward": 4.181539106369018 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 52.1, | |
| "epoch": 0.7301512287334594, | |
| "format_reward": -0.5, | |
| "grad_norm": 1.7761129140853882, | |
| "image_reward": 0.27235768735408783, | |
| "kl": 2.1771215945482254, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0612, | |
| "reward": 0.1671779692173004, | |
| "reward_std": 1.4098370391875505, | |
| "rewards/reward_func": 0.1671779692173004, | |
| "step": 3090, | |
| "toxic_reward": 3.8193355441093444 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 36.85, | |
| "epoch": 0.7325141776937618, | |
| "format_reward": -0.5, | |
| "grad_norm": 5.988401412963867, | |
| "image_reward": 0.25819803923368456, | |
| "kl": 0.8303129658102989, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0141, | |
| "reward": 0.20456358194351196, | |
| "reward_std": 1.755793434381485, | |
| "rewards/reward_func": 0.20456358194351196, | |
| "step": 3100, | |
| "toxic_reward": 3.5276977360248565 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 45.825, | |
| "epoch": 0.7348771266540642, | |
| "format_reward": 0.0, | |
| "grad_norm": 63.649696350097656, | |
| "image_reward": 0.26085103303194046, | |
| "kl": 2.509291835129261, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0945, | |
| "reward": 0.34231345951557157, | |
| "reward_std": 1.2596007108688354, | |
| "rewards/reward_func": 0.34231345951557157, | |
| "step": 3110, | |
| "toxic_reward": 3.680406093597412 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 42.675, | |
| "epoch": 0.7372400756143668, | |
| "format_reward": 0.0, | |
| "grad_norm": 13.457945823669434, | |
| "image_reward": 0.2661163330078125, | |
| "kl": 3.3423233568668365, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0837, | |
| "reward": 0.21805171072483062, | |
| "reward_std": 1.0620483674108983, | |
| "rewards/reward_func": 0.21805171072483062, | |
| "step": 3120, | |
| "toxic_reward": 3.4958622455596924 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 43.25, | |
| "epoch": 0.7396030245746692, | |
| "format_reward": 0.0, | |
| "grad_norm": 1.3712886571884155, | |
| "image_reward": 0.280279541015625, | |
| "kl": 0.5368543028831482, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0856, | |
| "reward": -0.08258238434791565, | |
| "reward_std": 0.7678581360727549, | |
| "rewards/reward_func": -0.08258238434791565, | |
| "step": 3130, | |
| "toxic_reward": 4.090320491790772 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 51.75, | |
| "epoch": 0.7419659735349716, | |
| "format_reward": -0.25, | |
| "grad_norm": 33.164817810058594, | |
| "image_reward": 0.263348388671875, | |
| "kl": 3.852606762945652, | |
| "learning_rate": 5e-06, | |
| "loss": -0.1711, | |
| "reward": 0.23493566811084748, | |
| "reward_std": 1.2882447349838912, | |
| "rewards/reward_func": 0.23493566811084748, | |
| "step": 3140, | |
| "toxic_reward": 3.8201312363147735 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 41.275, | |
| "epoch": 0.744328922495274, | |
| "format_reward": -0.25, | |
| "grad_norm": 23.956363677978516, | |
| "image_reward": 0.31206461489200593, | |
| "kl": 0.8646048396825791, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0402, | |
| "reward": 0.018216264247894288, | |
| "reward_std": 1.226612313091755, | |
| "rewards/reward_func": 0.018216264247894288, | |
| "step": 3150, | |
| "toxic_reward": 3.7303581714630125 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 42.025, | |
| "epoch": 0.7466918714555766, | |
| "format_reward": -0.25, | |
| "grad_norm": 7.992063999176025, | |
| "image_reward": 0.283380126953125, | |
| "kl": 1.2732116781175136, | |
| "learning_rate": 5e-06, | |
| "loss": -0.091, | |
| "reward": 0.7706227093935013, | |
| "reward_std": 1.4939947571605443, | |
| "rewards/reward_func": 0.7706227093935013, | |
| "step": 3160, | |
| "toxic_reward": 3.5458990573883056 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 35.05, | |
| "epoch": 0.749054820415879, | |
| "format_reward": 0.0, | |
| "grad_norm": 26.938879013061523, | |
| "image_reward": 0.2929168701171875, | |
| "kl": 4.621248189732432, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0543, | |
| "reward": 0.26634013652801514, | |
| "reward_std": 0.6591222167015076, | |
| "rewards/reward_func": 0.26634013652801514, | |
| "step": 3170, | |
| "toxic_reward": 4.090583860874176 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 45.825, | |
| "epoch": 0.7514177693761814, | |
| "format_reward": -0.5, | |
| "grad_norm": 1.0445924997329712, | |
| "image_reward": 0.27820536196231843, | |
| "kl": 1.2374065339565277, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0777, | |
| "reward": -0.1608543336391449, | |
| "reward_std": 0.9281521745026111, | |
| "rewards/reward_func": -0.1608543336391449, | |
| "step": 3180, | |
| "toxic_reward": 3.968969798088074 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 42.8, | |
| "epoch": 0.753780718336484, | |
| "format_reward": 0.0, | |
| "grad_norm": 26.366165161132812, | |
| "image_reward": 0.27580566257238387, | |
| "kl": 11.984261164069176, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0176, | |
| "reward": 0.36088051795959475, | |
| "reward_std": 0.737302597053349, | |
| "rewards/reward_func": 0.36088051795959475, | |
| "step": 3190, | |
| "toxic_reward": 4.220689821243286 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 43.525, | |
| "epoch": 0.7561436672967864, | |
| "format_reward": 0.0, | |
| "grad_norm": 15.66350269317627, | |
| "image_reward": 0.2542442321777344, | |
| "kl": 1.2004614934325217, | |
| "learning_rate": 5e-06, | |
| "loss": -0.1499, | |
| "reward": 0.916411966085434, | |
| "reward_std": 1.1410479605197907, | |
| "rewards/reward_func": 0.916411966085434, | |
| "step": 3200, | |
| "toxic_reward": 3.5325961112976074 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 32.375, | |
| "epoch": 0.7585066162570888, | |
| "format_reward": 0.0, | |
| "grad_norm": 4.70230770111084, | |
| "image_reward": 0.30909423828125, | |
| "kl": 2.567577276751399, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0471, | |
| "reward": 0.20682075023651122, | |
| "reward_std": 0.5303860757499933, | |
| "rewards/reward_func": 0.20682075023651122, | |
| "step": 3210, | |
| "toxic_reward": 3.270925796031952 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 49.05, | |
| "epoch": 0.7608695652173914, | |
| "format_reward": 0.0, | |
| "grad_norm": 10.79295539855957, | |
| "image_reward": 0.25408528596162794, | |
| "kl": 2.86144537627697, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0379, | |
| "reward": 0.6601236045360566, | |
| "reward_std": 0.7253405870869756, | |
| "rewards/reward_func": 0.6601236045360566, | |
| "step": 3220, | |
| "toxic_reward": 4.3341371536254885 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 42.725, | |
| "epoch": 0.7632325141776938, | |
| "format_reward": -0.25, | |
| "grad_norm": 6.788066387176514, | |
| "image_reward": 0.2796641021966934, | |
| "kl": 5.517164082825184, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1356, | |
| "reward": -0.18339840769767762, | |
| "reward_std": 1.0695885993540286, | |
| "rewards/reward_func": -0.18339840769767762, | |
| "step": 3230, | |
| "toxic_reward": 4.077802658081055 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 37.5, | |
| "epoch": 0.7655954631379962, | |
| "format_reward": -0.5, | |
| "grad_norm": 39.19500732421875, | |
| "image_reward": 0.2956451416015625, | |
| "kl": 0.7065762653946877, | |
| "learning_rate": 5e-06, | |
| "loss": -0.047, | |
| "reward": -0.26765223741531374, | |
| "reward_std": 1.6595379646867514, | |
| "rewards/reward_func": -0.26765223741531374, | |
| "step": 3240, | |
| "toxic_reward": 3.865544855594635 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 40.675, | |
| "epoch": 0.7679584120982986, | |
| "format_reward": 0.0, | |
| "grad_norm": 8.50940990447998, | |
| "image_reward": 0.28647562563419343, | |
| "kl": 4.316986609622836, | |
| "learning_rate": 5e-06, | |
| "loss": 0.124, | |
| "reward": 0.6616093635559082, | |
| "reward_std": 1.070189495384693, | |
| "rewards/reward_func": 0.6616093635559082, | |
| "step": 3250, | |
| "toxic_reward": 3.284928467869759 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 40.175, | |
| "epoch": 0.7703213610586012, | |
| "format_reward": 0.0, | |
| "grad_norm": 21.128314971923828, | |
| "image_reward": 0.2679835006594658, | |
| "kl": 4.375968629121781, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0579, | |
| "reward": 0.3372311323881149, | |
| "reward_std": 0.869463924318552, | |
| "rewards/reward_func": 0.3372311323881149, | |
| "step": 3260, | |
| "toxic_reward": 3.78046395778656 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 39.5, | |
| "epoch": 0.7726843100189036, | |
| "format_reward": 0.0, | |
| "grad_norm": 7.558209419250488, | |
| "image_reward": 0.2745330810546875, | |
| "kl": 2.3013378672301767, | |
| "learning_rate": 5e-06, | |
| "loss": -0.031, | |
| "reward": 0.784791512787342, | |
| "reward_std": 0.8750310368835926, | |
| "rewards/reward_func": 0.784791512787342, | |
| "step": 3270, | |
| "toxic_reward": 3.3566872388124467 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 39.025, | |
| "epoch": 0.775047258979206, | |
| "format_reward": -0.5, | |
| "grad_norm": 2.0432510375976562, | |
| "image_reward": 0.2913625091314316, | |
| "kl": 0.3781319923698902, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0607, | |
| "reward": -0.3740895688533783, | |
| "reward_std": 1.350129895284772, | |
| "rewards/reward_func": -0.3740895688533783, | |
| "step": 3280, | |
| "toxic_reward": 4.018161624670029 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 47.825, | |
| "epoch": 0.7774102079395085, | |
| "format_reward": -0.5, | |
| "grad_norm": 3.16352915763855, | |
| "image_reward": 0.27322998046875, | |
| "kl": 0.3104788601398468, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0334, | |
| "reward": -0.05296646356582642, | |
| "reward_std": 1.2258484821766615, | |
| "rewards/reward_func": -0.05296646356582642, | |
| "step": 3290, | |
| "toxic_reward": 4.389449417591095 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 35.925, | |
| "epoch": 0.779773156899811, | |
| "format_reward": 0.0, | |
| "grad_norm": 4.182164669036865, | |
| "image_reward": 0.2737925201654434, | |
| "kl": 1.2850206293165685, | |
| "learning_rate": 5e-06, | |
| "loss": -0.1213, | |
| "reward": 0.6742017388343811, | |
| "reward_std": 0.736553730070591, | |
| "rewards/reward_func": 0.6742017388343811, | |
| "step": 3300, | |
| "toxic_reward": 4.220770263671875 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 55.175, | |
| "epoch": 0.7821361058601134, | |
| "format_reward": -0.25, | |
| "grad_norm": 8.606978416442871, | |
| "image_reward": 0.2700215637683868, | |
| "kl": 4.289887800067663, | |
| "learning_rate": 5e-06, | |
| "loss": -0.1432, | |
| "reward": 0.30965389013290406, | |
| "reward_std": 1.063696064054966, | |
| "rewards/reward_func": 0.30965389013290406, | |
| "step": 3310, | |
| "toxic_reward": 4.136632585525513 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 40.45, | |
| "epoch": 0.7844990548204159, | |
| "format_reward": 0.0, | |
| "grad_norm": 3.973367691040039, | |
| "image_reward": 0.25255330502986906, | |
| "kl": 6.628417156636715, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0524, | |
| "reward": 0.2595418691635132, | |
| "reward_std": 0.6656439051032066, | |
| "rewards/reward_func": 0.2595418691635132, | |
| "step": 3320, | |
| "toxic_reward": 3.946825695037842 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 44.625, | |
| "epoch": 0.7868620037807184, | |
| "format_reward": 0.0, | |
| "grad_norm": 5.275523662567139, | |
| "image_reward": 0.29005940854549406, | |
| "kl": 25.900663439184427, | |
| "learning_rate": 5e-06, | |
| "loss": -0.1344, | |
| "reward": 0.8005503177642822, | |
| "reward_std": 0.9713124742731452, | |
| "rewards/reward_func": 0.8005503177642822, | |
| "step": 3330, | |
| "toxic_reward": 4.047469854354858 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 36.55, | |
| "epoch": 0.7892249527410208, | |
| "format_reward": 0.0, | |
| "grad_norm": 5.920967102050781, | |
| "image_reward": 0.2706329345703125, | |
| "kl": 2.892443811520934, | |
| "learning_rate": 5e-06, | |
| "loss": -0.028, | |
| "reward": 0.7794641971588134, | |
| "reward_std": 0.7315312433987856, | |
| "rewards/reward_func": 0.7794641971588134, | |
| "step": 3340, | |
| "toxic_reward": 4.036288380622864 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 49.325, | |
| "epoch": 0.7915879017013232, | |
| "format_reward": -0.25, | |
| "grad_norm": 19.411304473876953, | |
| "image_reward": 0.2590001419186592, | |
| "kl": 0.762314885109663, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0738, | |
| "reward": -0.22335948944091796, | |
| "reward_std": 1.229094560444355, | |
| "rewards/reward_func": -0.22335948944091796, | |
| "step": 3350, | |
| "toxic_reward": 4.078046441078186 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 42.825, | |
| "epoch": 0.7939508506616257, | |
| "format_reward": -0.25, | |
| "grad_norm": 9.397270202636719, | |
| "image_reward": 0.25453638202614254, | |
| "kl": 1.7116897955536843, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0637, | |
| "reward": -0.23146066069602966, | |
| "reward_std": 1.4809592371806501, | |
| "rewards/reward_func": -0.23146066069602966, | |
| "step": 3360, | |
| "toxic_reward": 3.7261215580834284 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 36.075, | |
| "epoch": 0.7963137996219282, | |
| "format_reward": 0.0, | |
| "grad_norm": 2.7069385051727295, | |
| "image_reward": 0.29098663330078123, | |
| "kl": 0.2713630013167858, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0331, | |
| "reward": 0.2162942558526993, | |
| "reward_std": 0.7098794117569923, | |
| "rewards/reward_func": 0.2162942558526993, | |
| "step": 3370, | |
| "toxic_reward": 3.5313488602638246 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 40.425, | |
| "epoch": 0.7986767485822306, | |
| "format_reward": 0.0, | |
| "grad_norm": 5.024960041046143, | |
| "image_reward": 0.2739929184317589, | |
| "kl": 18.83201899640262, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0017, | |
| "reward": 0.4777979046106339, | |
| "reward_std": 1.240721021965146, | |
| "rewards/reward_func": 0.4777979046106339, | |
| "step": 3380, | |
| "toxic_reward": 3.522536587715149 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 41.725, | |
| "epoch": 0.8010396975425331, | |
| "format_reward": 0.0, | |
| "grad_norm": 4.2769622802734375, | |
| "image_reward": 0.2533442169427872, | |
| "kl": 8.165257753431797, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0776, | |
| "reward": 0.44249573945999143, | |
| "reward_std": 0.8017176885157824, | |
| "rewards/reward_func": 0.44249573945999143, | |
| "step": 3390, | |
| "toxic_reward": 4.304618096351623 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 33.85, | |
| "epoch": 0.8034026465028355, | |
| "format_reward": -0.75, | |
| "grad_norm": 6.779478549957275, | |
| "image_reward": 0.254600016772747, | |
| "kl": 3.2404680982232095, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0622, | |
| "reward": 1.1459296941757202, | |
| "reward_std": 2.3130407273769378, | |
| "rewards/reward_func": 1.1459296941757202, | |
| "step": 3400, | |
| "toxic_reward": 3.946528363227844 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 49.475, | |
| "epoch": 0.805765595463138, | |
| "format_reward": -0.75, | |
| "grad_norm": 9.082526206970215, | |
| "image_reward": 0.2917332977056503, | |
| "kl": 1.1568331263959408, | |
| "learning_rate": 5e-06, | |
| "loss": -0.087, | |
| "reward": -0.22108137607574463, | |
| "reward_std": 2.109957142919302, | |
| "rewards/reward_func": -0.22108137607574463, | |
| "step": 3410, | |
| "toxic_reward": 3.467973506450653 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 46.225, | |
| "epoch": 0.8081285444234405, | |
| "format_reward": 0.0, | |
| "grad_norm": 4.748640537261963, | |
| "image_reward": 0.2629201263189316, | |
| "kl": 1.277670707181096, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0912, | |
| "reward": 0.5041390061378479, | |
| "reward_std": 1.1238155417144298, | |
| "rewards/reward_func": 0.5041390061378479, | |
| "step": 3420, | |
| "toxic_reward": 3.6773669004440306 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 54.875, | |
| "epoch": 0.8104914933837429, | |
| "format_reward": -0.25, | |
| "grad_norm": 6.2929182052612305, | |
| "image_reward": 0.2664311736822128, | |
| "kl": 0.41112807476893065, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0185, | |
| "reward": 0.25773588865995406, | |
| "reward_std": 1.1975380808115006, | |
| "rewards/reward_func": 0.25773588865995406, | |
| "step": 3430, | |
| "toxic_reward": 3.5475049674510957 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 31.2, | |
| "epoch": 0.8128544423440454, | |
| "format_reward": 0.0, | |
| "grad_norm": 2.984248399734497, | |
| "image_reward": 0.29365132600069044, | |
| "kl": 6.331273209676146, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0094, | |
| "reward": -0.1625719666481018, | |
| "reward_std": 0.864103776961565, | |
| "rewards/reward_func": -0.1625719666481018, | |
| "step": 3440, | |
| "toxic_reward": 3.899219441413879 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 59.075, | |
| "epoch": 0.8152173913043478, | |
| "format_reward": 0.0, | |
| "grad_norm": 5.733253479003906, | |
| "image_reward": 0.28086344301700594, | |
| "kl": 9.025772982649505, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0245, | |
| "reward": 0.4977319598197937, | |
| "reward_std": 0.6485220491886139, | |
| "rewards/reward_func": 0.4977319598197937, | |
| "step": 3450, | |
| "toxic_reward": 3.7944631457328795 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 33.25, | |
| "epoch": 0.8175803402646503, | |
| "format_reward": -0.25, | |
| "grad_norm": 5.605562686920166, | |
| "image_reward": 0.281341552734375, | |
| "kl": 0.665616973862052, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0068, | |
| "reward": -0.26609439849853517, | |
| "reward_std": 1.4688232390210032, | |
| "rewards/reward_func": -0.26609439849853517, | |
| "step": 3460, | |
| "toxic_reward": 3.4837970972061156 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 52.475, | |
| "epoch": 0.8199432892249527, | |
| "format_reward": -0.5, | |
| "grad_norm": 2.6239664554595947, | |
| "image_reward": 0.2684331268072128, | |
| "kl": 1.5078430883586407, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1555, | |
| "reward": -0.12674018144607543, | |
| "reward_std": 1.4365263484418391, | |
| "rewards/reward_func": -0.12674018144607543, | |
| "step": 3470, | |
| "toxic_reward": 4.5540220737457275 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 49.975, | |
| "epoch": 0.8223062381852552, | |
| "format_reward": -0.25, | |
| "grad_norm": 8.734126091003418, | |
| "image_reward": 0.2604085296392441, | |
| "kl": 24.937382932007313, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0011, | |
| "reward": -0.20797204971313477, | |
| "reward_std": 0.9569237198680639, | |
| "rewards/reward_func": -0.20797204971313477, | |
| "step": 3480, | |
| "toxic_reward": 4.407214689254761 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 60.625, | |
| "epoch": 0.8246691871455577, | |
| "format_reward": -0.25, | |
| "grad_norm": 1.5907628536224365, | |
| "image_reward": 0.2842885345220566, | |
| "kl": 0.051335761044174436, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0047, | |
| "reward": 0.9079252362251282, | |
| "reward_std": 1.1933536015450954, | |
| "rewards/reward_func": 0.9079252362251282, | |
| "step": 3490, | |
| "toxic_reward": 4.199088740348816 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 39.75, | |
| "epoch": 0.8270321361058601, | |
| "format_reward": -1.0, | |
| "grad_norm": 1.5189018249511719, | |
| "image_reward": 0.2938863128423691, | |
| "kl": 6.8780351031571625, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0792, | |
| "reward": -0.6177874624729156, | |
| "reward_std": 1.9702259879559278, | |
| "rewards/reward_func": -0.6177874624729156, | |
| "step": 3500, | |
| "toxic_reward": 3.7184417486190795 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 44.075, | |
| "epoch": 0.8293950850661626, | |
| "format_reward": -0.25, | |
| "grad_norm": 0.5691888928413391, | |
| "image_reward": 0.2792378753423691, | |
| "kl": 0.0625603836029768, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0245, | |
| "reward": 0.39415156543254853, | |
| "reward_std": 0.7689090168103576, | |
| "rewards/reward_func": 0.39415156543254853, | |
| "step": 3510, | |
| "toxic_reward": 4.210167169570923 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 48.675, | |
| "epoch": 0.831758034026465, | |
| "format_reward": 0.0, | |
| "grad_norm": 2.8700907230377197, | |
| "image_reward": 0.263861083984375, | |
| "kl": 0.3225065166130662, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0609, | |
| "reward": 0.6327012300491333, | |
| "reward_std": 0.980434575676918, | |
| "rewards/reward_func": 0.6327012300491333, | |
| "step": 3520, | |
| "toxic_reward": 3.8261560261249543 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 40.05, | |
| "epoch": 0.8341209829867675, | |
| "format_reward": 0.0, | |
| "grad_norm": 1.0346537828445435, | |
| "image_reward": 0.284771728515625, | |
| "kl": 0.40898411339148877, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0307, | |
| "reward": 0.30759164690971375, | |
| "reward_std": 0.6451162457466125, | |
| "rewards/reward_func": 0.30759164690971375, | |
| "step": 3530, | |
| "toxic_reward": 4.171144628524781 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 55.3, | |
| "epoch": 0.8364839319470699, | |
| "format_reward": -0.5, | |
| "grad_norm": 5.951425075531006, | |
| "image_reward": 0.3035013824701309, | |
| "kl": 11.781208837591112, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0257, | |
| "reward": -0.1725111722946167, | |
| "reward_std": 1.8102335507050156, | |
| "rewards/reward_func": -0.1725111722946167, | |
| "step": 3540, | |
| "toxic_reward": 3.670738685131073 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 68.975, | |
| "epoch": 0.8388468809073724, | |
| "format_reward": 0.0, | |
| "grad_norm": 14.631609916687012, | |
| "image_reward": 0.2706085205078125, | |
| "kl": 4.56192576661706, | |
| "learning_rate": 5e-06, | |
| "loss": -0.033, | |
| "reward": 1.4811566695570946, | |
| "reward_std": 0.9509499605745078, | |
| "rewards/reward_func": 1.4811566695570946, | |
| "step": 3550, | |
| "toxic_reward": 3.4709715723991392 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 41.225, | |
| "epoch": 0.8412098298676749, | |
| "format_reward": -0.5, | |
| "grad_norm": 7.567039489746094, | |
| "image_reward": 0.2595652252435684, | |
| "kl": 10.069495621696115, | |
| "learning_rate": 5e-06, | |
| "loss": -0.057, | |
| "reward": 0.034914278984069826, | |
| "reward_std": 2.0578875496983526, | |
| "rewards/reward_func": 0.034914278984069826, | |
| "step": 3560, | |
| "toxic_reward": 3.711397814750671 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 44.9, | |
| "epoch": 0.8435727788279773, | |
| "format_reward": 0.0, | |
| "grad_norm": 2.5686023235321045, | |
| "image_reward": 0.27446797788143157, | |
| "kl": 1.5964453139342367, | |
| "learning_rate": 5e-06, | |
| "loss": -0.031, | |
| "reward": 0.41478089690208436, | |
| "reward_std": 0.6242949636653066, | |
| "rewards/reward_func": 0.41478089690208436, | |
| "step": 3570, | |
| "toxic_reward": 4.057631134986877 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 38.725, | |
| "epoch": 0.8459357277882797, | |
| "format_reward": -0.75, | |
| "grad_norm": 18.62441062927246, | |
| "image_reward": 0.26212361752986907, | |
| "kl": 18.3546858407557, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0125, | |
| "reward": 0.3845840930938721, | |
| "reward_std": 2.4349112689495085, | |
| "rewards/reward_func": 0.3845840930938721, | |
| "step": 3580, | |
| "toxic_reward": 3.7492689728736877 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 46.15, | |
| "epoch": 0.8482986767485823, | |
| "format_reward": -0.25, | |
| "grad_norm": 1.5018891096115112, | |
| "image_reward": 0.25630950927734375, | |
| "kl": 1.5399845570325852, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0064, | |
| "reward": -0.4008509755134583, | |
| "reward_std": 1.2334194054827095, | |
| "rewards/reward_func": -0.4008509755134583, | |
| "step": 3590, | |
| "toxic_reward": 3.9793298959732057 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 49.475, | |
| "epoch": 0.8506616257088847, | |
| "format_reward": 0.0, | |
| "grad_norm": 1.118828296661377, | |
| "image_reward": 0.279705810546875, | |
| "kl": 2.2166069228202105, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0439, | |
| "reward": 0.722964608669281, | |
| "reward_std": 0.7349236082285643, | |
| "rewards/reward_func": 0.722964608669281, | |
| "step": 3600, | |
| "toxic_reward": 4.429630327224731 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 47.8, | |
| "epoch": 0.8530245746691871, | |
| "format_reward": 0.0, | |
| "grad_norm": 2.9072647094726562, | |
| "image_reward": 0.2988444000482559, | |
| "kl": 0.4259108882397413, | |
| "learning_rate": 5e-06, | |
| "loss": -0.139, | |
| "reward": 0.36530678868293764, | |
| "reward_std": 1.0169117324054242, | |
| "rewards/reward_func": 0.36530678868293764, | |
| "step": 3610, | |
| "toxic_reward": 3.706578254699707 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 34.05, | |
| "epoch": 0.8553875236294896, | |
| "format_reward": 0.0, | |
| "grad_norm": 5.574492931365967, | |
| "image_reward": 0.26971537321805955, | |
| "kl": 0.8386783060617745, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0196, | |
| "reward": -0.043644605576992034, | |
| "reward_std": 0.7492304600775241, | |
| "rewards/reward_func": -0.043644605576992034, | |
| "step": 3620, | |
| "toxic_reward": 3.7889950960874557 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 48.2, | |
| "epoch": 0.8577504725897921, | |
| "format_reward": 0.0, | |
| "grad_norm": 0.6372764110565186, | |
| "image_reward": 0.2799346923828125, | |
| "kl": 0.09604998417198658, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0245, | |
| "reward": 0.31555656492710116, | |
| "reward_std": 0.5240693692117929, | |
| "rewards/reward_func": 0.31555656492710116, | |
| "step": 3630, | |
| "toxic_reward": 3.9185105204582213 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 54.775, | |
| "epoch": 0.8601134215500945, | |
| "format_reward": 0.0, | |
| "grad_norm": 1.1808196306228638, | |
| "image_reward": 0.2841166198253632, | |
| "kl": 29.371498390100896, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0516, | |
| "reward": 0.26911270916461943, | |
| "reward_std": 0.5647319633513689, | |
| "rewards/reward_func": 0.26911270916461943, | |
| "step": 3640, | |
| "toxic_reward": 3.410293960571289 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 39.35, | |
| "epoch": 0.8624763705103969, | |
| "format_reward": -0.25, | |
| "grad_norm": 0.7336105108261108, | |
| "image_reward": 0.26638386994600294, | |
| "kl": 2.7957767372950912, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0919, | |
| "reward": 0.25321381688117983, | |
| "reward_std": 1.543316999450326, | |
| "rewards/reward_func": 0.25321381688117983, | |
| "step": 3650, | |
| "toxic_reward": 3.6566759824752806 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 46.75, | |
| "epoch": 0.8648393194706995, | |
| "format_reward": -0.25, | |
| "grad_norm": 0.6029968857765198, | |
| "image_reward": 0.2995513916015625, | |
| "kl": 2.5597430652938784, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0974, | |
| "reward": 0.32535398602485655, | |
| "reward_std": 1.3309460416436196, | |
| "rewards/reward_func": 0.32535398602485655, | |
| "step": 3660, | |
| "toxic_reward": 3.65915470123291 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 35.275, | |
| "epoch": 0.8672022684310019, | |
| "format_reward": -0.75, | |
| "grad_norm": 3.770862102508545, | |
| "image_reward": 0.291839599609375, | |
| "kl": 24.679713291302324, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0464, | |
| "reward": -0.6877359867095947, | |
| "reward_std": 2.206609180383384, | |
| "rewards/reward_func": -0.6877359867095947, | |
| "step": 3670, | |
| "toxic_reward": 3.764638936519623 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 44.9, | |
| "epoch": 0.8695652173913043, | |
| "format_reward": -0.25, | |
| "grad_norm": 0.5417113304138184, | |
| "image_reward": 0.26164347380399705, | |
| "kl": 0.35583615899085996, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0668, | |
| "reward": -0.214414319396019, | |
| "reward_std": 1.3576272014528512, | |
| "rewards/reward_func": -0.214414319396019, | |
| "step": 3680, | |
| "toxic_reward": 3.65915904045105 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 38.125, | |
| "epoch": 0.8719281663516069, | |
| "format_reward": 0.0, | |
| "grad_norm": 0.4714978039264679, | |
| "image_reward": 0.278389485180378, | |
| "kl": 2.4852739069610834, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0177, | |
| "reward": 0.1318028151988983, | |
| "reward_std": 0.8923286706209183, | |
| "rewards/reward_func": 0.1318028151988983, | |
| "step": 3690, | |
| "toxic_reward": 3.508782708644867 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 47.05, | |
| "epoch": 0.8742911153119093, | |
| "format_reward": 0.0, | |
| "grad_norm": 0.8036189079284668, | |
| "image_reward": 0.246942138671875, | |
| "kl": 17.79970283471048, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0525, | |
| "reward": 0.44554237723350526, | |
| "reward_std": 0.8977296775206923, | |
| "rewards/reward_func": 0.44554237723350526, | |
| "step": 3700, | |
| "toxic_reward": 3.5204819679260253 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 37.15, | |
| "epoch": 0.8766540642722117, | |
| "format_reward": 0.0, | |
| "grad_norm": 0.9322050213813782, | |
| "image_reward": 0.2607330322265625, | |
| "kl": 4.579995289538056, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0918, | |
| "reward": 0.7257406830787658, | |
| "reward_std": 0.7061707813292741, | |
| "rewards/reward_func": 0.7257406830787658, | |
| "step": 3710, | |
| "toxic_reward": 3.967698335647583 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 32.9, | |
| "epoch": 0.8790170132325141, | |
| "format_reward": -0.5, | |
| "grad_norm": 0.7602401971817017, | |
| "image_reward": 0.277716064453125, | |
| "kl": 5.202583113871515, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0093, | |
| "reward": -0.1386810451745987, | |
| "reward_std": 1.5367558933794498, | |
| "rewards/reward_func": -0.1386810451745987, | |
| "step": 3720, | |
| "toxic_reward": 3.343963861465454 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 46.45, | |
| "epoch": 0.8813799621928167, | |
| "format_reward": 0.0, | |
| "grad_norm": 0.49207255244255066, | |
| "image_reward": 0.2617726638913155, | |
| "kl": 3.0696171432733537, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0096, | |
| "reward": 0.8290068447589874, | |
| "reward_std": 0.6912821188569069, | |
| "rewards/reward_func": 0.8290068447589874, | |
| "step": 3730, | |
| "toxic_reward": 4.308743190765381 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 47.125, | |
| "epoch": 0.8837429111531191, | |
| "format_reward": 0.0, | |
| "grad_norm": 0.5754015445709229, | |
| "image_reward": 0.25755615234375, | |
| "kl": 1.463007004186511, | |
| "learning_rate": 5e-06, | |
| "loss": -0.074, | |
| "reward": 1.1725465416908265, | |
| "reward_std": 0.7939416155219078, | |
| "rewards/reward_func": 1.1725465416908265, | |
| "step": 3740, | |
| "toxic_reward": 3.892818683385849 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 45.65, | |
| "epoch": 0.8861058601134215, | |
| "format_reward": -0.5, | |
| "grad_norm": 0.3917323350906372, | |
| "image_reward": 0.2610259994864464, | |
| "kl": 3.9046508548781276, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0114, | |
| "reward": 0.1690664052963257, | |
| "reward_std": 1.9762837937101723, | |
| "rewards/reward_func": 0.1690664052963257, | |
| "step": 3750, | |
| "toxic_reward": 3.7723870635032655 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 45.45, | |
| "epoch": 0.888468809073724, | |
| "format_reward": 0.0, | |
| "grad_norm": 0.6322398781776428, | |
| "image_reward": 0.27677764892578127, | |
| "kl": 1.8196211833506823, | |
| "learning_rate": 5e-06, | |
| "loss": -0.12, | |
| "reward": 0.42850649207830427, | |
| "reward_std": 0.5486618679948151, | |
| "rewards/reward_func": 0.42850649207830427, | |
| "step": 3760, | |
| "toxic_reward": 3.4346215546131136 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 44.75, | |
| "epoch": 0.8908317580340265, | |
| "format_reward": 0.0, | |
| "grad_norm": 0.3245849013328552, | |
| "image_reward": 0.29166819155216217, | |
| "kl": 10.705555348284543, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0603, | |
| "reward": 0.061820387840270996, | |
| "reward_std": 1.08290204256773, | |
| "rewards/reward_func": 0.061820387840270996, | |
| "step": 3770, | |
| "toxic_reward": 2.949862742424011 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 47.0, | |
| "epoch": 0.8931947069943289, | |
| "format_reward": -0.25, | |
| "grad_norm": 0.3298509418964386, | |
| "image_reward": 0.290167236328125, | |
| "kl": 0.07300702948123217, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0171, | |
| "reward": 0.06625822186470032, | |
| "reward_std": 1.0081432062666864, | |
| "rewards/reward_func": 0.06625822186470032, | |
| "step": 3780, | |
| "toxic_reward": 4.22382138967514 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 38.4, | |
| "epoch": 0.8955576559546313, | |
| "format_reward": 0.0, | |
| "grad_norm": 0.698654055595398, | |
| "image_reward": 0.27091064453125, | |
| "kl": 4.801618622988462, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0591, | |
| "reward": 0.3187494039535522, | |
| "reward_std": 0.5140533071011305, | |
| "rewards/reward_func": 0.3187494039535522, | |
| "step": 3790, | |
| "toxic_reward": 4.416417121887207 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 46.6, | |
| "epoch": 0.8979206049149339, | |
| "format_reward": -0.25, | |
| "grad_norm": 0.6394158601760864, | |
| "image_reward": 0.26355692744255066, | |
| "kl": 3.265846297331154, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0384, | |
| "reward": -0.14046210050582886, | |
| "reward_std": 1.0342714745551347, | |
| "rewards/reward_func": -0.14046210050582886, | |
| "step": 3800, | |
| "toxic_reward": 4.3116097211837765 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 50.7, | |
| "epoch": 0.9002835538752363, | |
| "format_reward": -0.5, | |
| "grad_norm": 0.7541901469230652, | |
| "image_reward": 0.2673909515142441, | |
| "kl": 0.7993329163640738, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0777, | |
| "reward": 0.010242342948913574, | |
| "reward_std": 1.4442682154476643, | |
| "rewards/reward_func": 0.010242342948913574, | |
| "step": 3810, | |
| "toxic_reward": 4.425883173942566 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 42.225, | |
| "epoch": 0.9026465028355387, | |
| "format_reward": 0.0, | |
| "grad_norm": 0.8831507563591003, | |
| "image_reward": 0.29705912470817564, | |
| "kl": 3.6087327402085068, | |
| "learning_rate": 5e-06, | |
| "loss": -0.111, | |
| "reward": 0.8021630614995956, | |
| "reward_std": 0.8431573905050754, | |
| "rewards/reward_func": 0.8021630614995956, | |
| "step": 3820, | |
| "toxic_reward": 3.6668890714645386 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 48.85, | |
| "epoch": 0.9050094517958412, | |
| "format_reward": 0.0, | |
| "grad_norm": 1.166309118270874, | |
| "image_reward": 0.27442220151424407, | |
| "kl": 3.696834401600063, | |
| "learning_rate": 5e-06, | |
| "loss": 0.023, | |
| "reward": 0.46357709765434263, | |
| "reward_std": 0.5384013399481773, | |
| "rewards/reward_func": 0.46357709765434263, | |
| "step": 3830, | |
| "toxic_reward": 4.282819819450379 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 45.275, | |
| "epoch": 0.9073724007561437, | |
| "format_reward": -0.25, | |
| "grad_norm": 2.2214293479919434, | |
| "image_reward": 0.29029541015625, | |
| "kl": 6.355313093215227, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0587, | |
| "reward": 0.36757221817970276, | |
| "reward_std": 1.1468286462128163, | |
| "rewards/reward_func": 0.36757221817970276, | |
| "step": 3840, | |
| "toxic_reward": 3.8713893949985505 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 45.525, | |
| "epoch": 0.9097353497164461, | |
| "format_reward": 0.0, | |
| "grad_norm": 0.7023747563362122, | |
| "image_reward": 0.2795267730951309, | |
| "kl": 0.12285411208868027, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0277, | |
| "reward": 0.6907171040773392, | |
| "reward_std": 0.8528184913098812, | |
| "rewards/reward_func": 0.6907171040773392, | |
| "step": 3850, | |
| "toxic_reward": 3.9646514534950255 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 54.0, | |
| "epoch": 0.9120982986767486, | |
| "format_reward": -0.25, | |
| "grad_norm": 0.6574695706367493, | |
| "image_reward": 0.277626545727253, | |
| "kl": 3.223006421420723, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1019, | |
| "reward": 0.17425565123558046, | |
| "reward_std": 1.0788604862987996, | |
| "rewards/reward_func": 0.17425565123558046, | |
| "step": 3860, | |
| "toxic_reward": 3.892214322090149 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 40.575, | |
| "epoch": 0.9144612476370511, | |
| "format_reward": 0.0, | |
| "grad_norm": 1.6060093641281128, | |
| "image_reward": 0.25700276643037795, | |
| "kl": 1.728565347008407, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0095, | |
| "reward": 0.2703657388687134, | |
| "reward_std": 0.8089243900030851, | |
| "rewards/reward_func": 0.2703657388687134, | |
| "step": 3870, | |
| "toxic_reward": 4.175320339202881 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 44.275, | |
| "epoch": 0.9168241965973535, | |
| "format_reward": 0.0, | |
| "grad_norm": 2.603025436401367, | |
| "image_reward": 0.2737335205078125, | |
| "kl": 0.7518249765969813, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0306, | |
| "reward": 0.8955561727285385, | |
| "reward_std": 1.1253668650984765, | |
| "rewards/reward_func": 0.8955561727285385, | |
| "step": 3880, | |
| "toxic_reward": 3.5497735261917116 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 44.875, | |
| "epoch": 0.9191871455576559, | |
| "format_reward": -0.25, | |
| "grad_norm": 0.6174436211585999, | |
| "image_reward": 0.2833251953125, | |
| "kl": 1.3900917531922459, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0533, | |
| "reward": 0.8824085891246796, | |
| "reward_std": 1.2487390112131833, | |
| "rewards/reward_func": 0.8824085891246796, | |
| "step": 3890, | |
| "toxic_reward": 3.764987659454346 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 50.1, | |
| "epoch": 0.9215500945179584, | |
| "format_reward": -0.5, | |
| "grad_norm": 0.8587064146995544, | |
| "image_reward": 0.25449015349149706, | |
| "kl": 3.2844431857578456, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0836, | |
| "reward": 0.17285645604133607, | |
| "reward_std": 1.4729075387120247, | |
| "rewards/reward_func": 0.17285645604133607, | |
| "step": 3900, | |
| "toxic_reward": 4.319640278816223 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 54.65, | |
| "epoch": 0.9239130434782609, | |
| "format_reward": -0.25, | |
| "grad_norm": 0.7836766242980957, | |
| "image_reward": 0.2760904937982559, | |
| "kl": 0.04128519091755152, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0218, | |
| "reward": -0.14393893480300904, | |
| "reward_std": 1.2086152411997317, | |
| "rewards/reward_func": -0.14393893480300904, | |
| "step": 3910, | |
| "toxic_reward": 3.988687515258789 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 55.525, | |
| "epoch": 0.9262759924385633, | |
| "format_reward": 0.0, | |
| "grad_norm": 1.0223326683044434, | |
| "image_reward": 0.237677001953125, | |
| "kl": 0.10622669160366058, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1241, | |
| "reward": 0.8052110552787781, | |
| "reward_std": 0.809264022950083, | |
| "rewards/reward_func": 0.8052110552787781, | |
| "step": 3920, | |
| "toxic_reward": 4.316140675544739 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 45.525, | |
| "epoch": 0.9286389413988658, | |
| "format_reward": -0.25, | |
| "grad_norm": 1.2948088645935059, | |
| "image_reward": 0.27791646122932434, | |
| "kl": 2.2056565455161037, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0904, | |
| "reward": 0.5610605776309967, | |
| "reward_std": 0.9484948962926865, | |
| "rewards/reward_func": 0.5610605776309967, | |
| "step": 3930, | |
| "toxic_reward": 4.4695143699646 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 47.825, | |
| "epoch": 0.9310018903591682, | |
| "format_reward": 0.0, | |
| "grad_norm": 1.0040950775146484, | |
| "image_reward": 0.27231852263212203, | |
| "kl": 2.655760496482253, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0755, | |
| "reward": 0.263138085603714, | |
| "reward_std": 0.4817726358771324, | |
| "rewards/reward_func": 0.263138085603714, | |
| "step": 3940, | |
| "toxic_reward": 4.636347913742066 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 54.25, | |
| "epoch": 0.9333648393194707, | |
| "format_reward": -0.25, | |
| "grad_norm": 0.39709585905075073, | |
| "image_reward": 0.2637420654296875, | |
| "kl": 0.1439337281510234, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0772, | |
| "reward": 0.04962950348854065, | |
| "reward_std": 0.781620041653514, | |
| "rewards/reward_func": 0.04962950348854065, | |
| "step": 3950, | |
| "toxic_reward": 4.751176500320435 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 45.9, | |
| "epoch": 0.9357277882797732, | |
| "format_reward": -0.25, | |
| "grad_norm": 0.8190930485725403, | |
| "image_reward": 0.2555938705801964, | |
| "kl": 5.330091013200581, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0093, | |
| "reward": -0.16106579303741456, | |
| "reward_std": 1.2331121437251569, | |
| "rewards/reward_func": -0.16106579303741456, | |
| "step": 3960, | |
| "toxic_reward": 4.007374119758606 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 45.75, | |
| "epoch": 0.9380907372400756, | |
| "format_reward": 0.0, | |
| "grad_norm": 1.0821632146835327, | |
| "image_reward": 0.3061696380376816, | |
| "kl": 6.141950584948063, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1307, | |
| "reward": 0.625621622800827, | |
| "reward_std": 0.8008190289139747, | |
| "rewards/reward_func": 0.625621622800827, | |
| "step": 3970, | |
| "toxic_reward": 3.468269979953766 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 47.625, | |
| "epoch": 0.9404536862003781, | |
| "format_reward": -0.75, | |
| "grad_norm": 1.1129677295684814, | |
| "image_reward": 0.26193033903837204, | |
| "kl": 0.6634119726717472, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0429, | |
| "reward": 0.13726072907447814, | |
| "reward_std": 2.353568767011166, | |
| "rewards/reward_func": 0.13726072907447814, | |
| "step": 3980, | |
| "toxic_reward": 4.0013970851898195 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 48.35, | |
| "epoch": 0.9428166351606805, | |
| "format_reward": 0.0, | |
| "grad_norm": 0.7701426148414612, | |
| "image_reward": 0.28350016176700593, | |
| "kl": 0.1994122840464115, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0421, | |
| "reward": 0.7691244065761567, | |
| "reward_std": 0.9025557667016983, | |
| "rewards/reward_func": 0.7691244065761567, | |
| "step": 3990, | |
| "toxic_reward": 4.340523219108581 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 46.225, | |
| "epoch": 0.945179584120983, | |
| "format_reward": 0.0, | |
| "grad_norm": 0.46611157059669495, | |
| "image_reward": 0.26962890625, | |
| "kl": 0.047311073541641234, | |
| "learning_rate": 5e-06, | |
| "loss": -0.1003, | |
| "reward": 1.3654770731925965, | |
| "reward_std": 0.657595872040838, | |
| "rewards/reward_func": 1.3654770731925965, | |
| "step": 4000, | |
| "toxic_reward": 3.765986955165863 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 55.675, | |
| "epoch": 0.9475425330812854, | |
| "format_reward": -0.25, | |
| "grad_norm": 0.730478048324585, | |
| "image_reward": 0.2595326751470566, | |
| "kl": 2.0693125385791062, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0531, | |
| "reward": 0.16633399724960327, | |
| "reward_std": 1.2444878976792098, | |
| "rewards/reward_func": 0.16633399724960327, | |
| "step": 4010, | |
| "toxic_reward": 3.9091518998146055 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 41.775, | |
| "epoch": 0.9499054820415879, | |
| "format_reward": -0.25, | |
| "grad_norm": 0.7307797074317932, | |
| "image_reward": 0.2784047439694405, | |
| "kl": 1.5403530787676574, | |
| "learning_rate": 5e-06, | |
| "loss": 0.01, | |
| "reward": 0.15964727997779846, | |
| "reward_std": 1.2297844395041466, | |
| "rewards/reward_func": 0.15964727997779846, | |
| "step": 4020, | |
| "toxic_reward": 4.325857400894165 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 53.275, | |
| "epoch": 0.9522684310018904, | |
| "format_reward": -0.5, | |
| "grad_norm": 1.158098816871643, | |
| "image_reward": 0.240879312902689, | |
| "kl": 1.8537536807358266, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1782, | |
| "reward": 0.5329648047685623, | |
| "reward_std": 1.5547814331948757, | |
| "rewards/reward_func": 0.5329648047685623, | |
| "step": 4030, | |
| "toxic_reward": 3.8254613667726516 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 53.95, | |
| "epoch": 0.9546313799621928, | |
| "format_reward": 0.0, | |
| "grad_norm": 0.5303730964660645, | |
| "image_reward": 0.25118484497070315, | |
| "kl": 0.187329238653183, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0541, | |
| "reward": 0.27067047357559204, | |
| "reward_std": 0.7333962991833687, | |
| "rewards/reward_func": 0.27067047357559204, | |
| "step": 4040, | |
| "toxic_reward": 4.245214033126831 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 49.5, | |
| "epoch": 0.9569943289224953, | |
| "format_reward": -0.25, | |
| "grad_norm": 0.8333770632743835, | |
| "image_reward": 0.264398193359375, | |
| "kl": 7.662982761859894, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0304, | |
| "reward": 0.26739619076251986, | |
| "reward_std": 1.3646116882562638, | |
| "rewards/reward_func": 0.26739619076251986, | |
| "step": 4050, | |
| "toxic_reward": 3.6070310473442078 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 42.275, | |
| "epoch": 0.9593572778827977, | |
| "format_reward": -0.25, | |
| "grad_norm": 1.021411657333374, | |
| "image_reward": 0.281744384765625, | |
| "kl": 2.6961711190640925, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0528, | |
| "reward": 0.1087444543838501, | |
| "reward_std": 0.9241739958524704, | |
| "rewards/reward_func": 0.1087444543838501, | |
| "step": 4060, | |
| "toxic_reward": 4.196173495054245 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 42.575, | |
| "epoch": 0.9617202268431002, | |
| "format_reward": -0.25, | |
| "grad_norm": 0.7931532859802246, | |
| "image_reward": 0.25716959685087204, | |
| "kl": 5.984370514377952, | |
| "learning_rate": 5e-06, | |
| "loss": -0.1524, | |
| "reward": 0.09075822830200195, | |
| "reward_std": 1.12701465934515, | |
| "rewards/reward_func": 0.09075822830200195, | |
| "step": 4070, | |
| "toxic_reward": 4.376713454723358 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 44.375, | |
| "epoch": 0.9640831758034026, | |
| "format_reward": -0.5, | |
| "grad_norm": 0.7085260152816772, | |
| "image_reward": 0.2577000930905342, | |
| "kl": 1.8822400705888866, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0404, | |
| "reward": 0.17331230640411377, | |
| "reward_std": 1.6633539475500583, | |
| "rewards/reward_func": 0.17331230640411377, | |
| "step": 4080, | |
| "toxic_reward": 4.294411969184876 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 42.8, | |
| "epoch": 0.9664461247637051, | |
| "format_reward": 0.0, | |
| "grad_norm": 1.0063364505767822, | |
| "image_reward": 0.250091552734375, | |
| "kl": 0.14847910068929196, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0732, | |
| "reward": 1.0602999448776245, | |
| "reward_std": 0.6203169705346226, | |
| "rewards/reward_func": 1.0602999448776245, | |
| "step": 4090, | |
| "toxic_reward": 4.171542119979859 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 52.85, | |
| "epoch": 0.9688090737240076, | |
| "format_reward": -0.25, | |
| "grad_norm": 0.6620392203330994, | |
| "image_reward": 0.2424652099609375, | |
| "kl": 0.36983290296047927, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0911, | |
| "reward": 0.6753372728824616, | |
| "reward_std": 1.2773339383304119, | |
| "rewards/reward_func": 0.6753372728824616, | |
| "step": 4100, | |
| "toxic_reward": 4.280330467224121 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 48.9, | |
| "epoch": 0.97117202268431, | |
| "format_reward": 0.0, | |
| "grad_norm": 0.8530160188674927, | |
| "image_reward": 0.2524658203125, | |
| "kl": 4.212855443544686, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0813, | |
| "reward": 0.6060003638267517, | |
| "reward_std": 0.9568195153027773, | |
| "rewards/reward_func": 0.6060003638267517, | |
| "step": 4110, | |
| "toxic_reward": 3.8215681195259092 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 44.85, | |
| "epoch": 0.9735349716446124, | |
| "format_reward": -0.5, | |
| "grad_norm": 1.7192955017089844, | |
| "image_reward": 0.28581949770450593, | |
| "kl": 10.378714705258608, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0248, | |
| "reward": 0.10166561603546143, | |
| "reward_std": 1.791293729841709, | |
| "rewards/reward_func": 0.10166561603546143, | |
| "step": 4120, | |
| "toxic_reward": 3.5509902030229568 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 55.725, | |
| "epoch": 0.975897920604915, | |
| "format_reward": 0.0, | |
| "grad_norm": 0.8529999852180481, | |
| "image_reward": 0.2569305419921875, | |
| "kl": 8.308781201578677, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0001, | |
| "reward": 0.7349396765232086, | |
| "reward_std": 0.4486356295645237, | |
| "rewards/reward_func": 0.7349396765232086, | |
| "step": 4130, | |
| "toxic_reward": 4.5674937725067135 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 50.925, | |
| "epoch": 0.9782608695652174, | |
| "format_reward": 0.0, | |
| "grad_norm": 0.9192355275154114, | |
| "image_reward": 0.273162841796875, | |
| "kl": 2.731711974926293, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0006, | |
| "reward": -0.20568010210990906, | |
| "reward_std": 0.6350222621113062, | |
| "rewards/reward_func": -0.20568010210990906, | |
| "step": 4140, | |
| "toxic_reward": 4.01632958650589 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 44.875, | |
| "epoch": 0.9806238185255198, | |
| "format_reward": 0.0, | |
| "grad_norm": 0.7154003977775574, | |
| "image_reward": 0.25846659392118454, | |
| "kl": 3.1543860264122485, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0384, | |
| "reward": 0.15666076242923738, | |
| "reward_std": 1.1065492704510689, | |
| "rewards/reward_func": 0.15666076242923738, | |
| "step": 4150, | |
| "toxic_reward": 3.2047137916088104 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 44.225, | |
| "epoch": 0.9829867674858223, | |
| "format_reward": -0.25, | |
| "grad_norm": 0.6323632001876831, | |
| "image_reward": 0.2768702179193497, | |
| "kl": 4.070834948495031, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1427, | |
| "reward": 0.21166958212852477, | |
| "reward_std": 1.1970111442729832, | |
| "rewards/reward_func": 0.21166958212852477, | |
| "step": 4160, | |
| "toxic_reward": 4.128625917434692 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 46.425, | |
| "epoch": 0.9853497164461248, | |
| "format_reward": 0.0, | |
| "grad_norm": 0.5803432464599609, | |
| "image_reward": 0.257720947265625, | |
| "kl": 1.2115541946142911, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0251, | |
| "reward": 0.26483882069587705, | |
| "reward_std": 0.8841663489118219, | |
| "rewards/reward_func": 0.26483882069587705, | |
| "step": 4170, | |
| "toxic_reward": 3.953411507606506 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 43.05, | |
| "epoch": 0.9877126654064272, | |
| "format_reward": -0.25, | |
| "grad_norm": 1.0321141481399536, | |
| "image_reward": 0.2662984222173691, | |
| "kl": 12.658018402941526, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0031, | |
| "reward": 0.850147670507431, | |
| "reward_std": 1.0917948484420776, | |
| "rewards/reward_func": 0.850147670507431, | |
| "step": 4180, | |
| "toxic_reward": 4.424872517585754 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 33.775, | |
| "epoch": 0.9900756143667296, | |
| "format_reward": -0.25, | |
| "grad_norm": 2.934152603149414, | |
| "image_reward": 0.28038330078125, | |
| "kl": 0.22834131643176078, | |
| "learning_rate": 5e-06, | |
| "loss": -0.12, | |
| "reward": -0.05527897924184799, | |
| "reward_std": 1.090353344194591, | |
| "rewards/reward_func": -0.05527897924184799, | |
| "step": 4190, | |
| "toxic_reward": 4.094451707601547 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 47.175, | |
| "epoch": 0.9924385633270322, | |
| "format_reward": -0.5, | |
| "grad_norm": 1.2237070798873901, | |
| "image_reward": 0.28351847380399703, | |
| "kl": 11.847508652508258, | |
| "learning_rate": 5e-06, | |
| "loss": -0.078, | |
| "reward": -0.27686416208744047, | |
| "reward_std": 1.4433475863188505, | |
| "rewards/reward_func": -0.27686416208744047, | |
| "step": 4200, | |
| "toxic_reward": 4.032291853427887 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 40.3, | |
| "epoch": 0.9948015122873346, | |
| "format_reward": 0.0, | |
| "grad_norm": 1.00357985496521, | |
| "image_reward": 0.26925506591796877, | |
| "kl": 6.287641528248787, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0509, | |
| "reward": 0.26841793656349183, | |
| "reward_std": 0.7431968785822392, | |
| "rewards/reward_func": 0.26841793656349183, | |
| "step": 4210, | |
| "toxic_reward": 3.797722101211548 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 54.4, | |
| "epoch": 0.997164461247637, | |
| "format_reward": -0.5, | |
| "grad_norm": 1.412477731704712, | |
| "image_reward": 0.2857859283685684, | |
| "kl": 0.20840035788714886, | |
| "learning_rate": 5e-06, | |
| "loss": -0.01, | |
| "reward": 0.49851550459861754, | |
| "reward_std": 1.4509758695960044, | |
| "rewards/reward_func": 0.49851550459861754, | |
| "step": 4220, | |
| "toxic_reward": 4.4275671482086185 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 51.575, | |
| "epoch": 0.9995274102079396, | |
| "format_reward": 0.0, | |
| "grad_norm": 1.3644284009933472, | |
| "image_reward": 0.2857421875, | |
| "kl": 14.352099673077465, | |
| "learning_rate": 5e-06, | |
| "loss": -0.1247, | |
| "reward": 0.5606966435909271, | |
| "reward_std": 0.5899959981441498, | |
| "rewards/reward_func": 0.5606966435909271, | |
| "step": 4230, | |
| "toxic_reward": 4.210422110557556 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 45.975, | |
| "epoch": 1.001890359168242, | |
| "format_reward": -0.25, | |
| "grad_norm": 4.210316181182861, | |
| "image_reward": 0.28163655698299406, | |
| "kl": 6.0347686521708965, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0289, | |
| "reward": 0.11643823981285095, | |
| "reward_std": 1.107480544038117, | |
| "rewards/reward_func": 0.11643823981285095, | |
| "step": 4240, | |
| "toxic_reward": 3.994339680671692 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 54.3, | |
| "epoch": 1.0042533081285445, | |
| "format_reward": 0.0, | |
| "grad_norm": 3.516270637512207, | |
| "image_reward": 0.2680999755859375, | |
| "kl": 1.3430524323135615, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0258, | |
| "reward": 1.2558865308761598, | |
| "reward_std": 0.9449932537972927, | |
| "rewards/reward_func": 1.2558865308761598, | |
| "step": 4250, | |
| "toxic_reward": 4.3164361953735355 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 53.1, | |
| "epoch": 1.0066162570888468, | |
| "format_reward": 0.0, | |
| "grad_norm": 2.1102194786071777, | |
| "image_reward": 0.25366058349609377, | |
| "kl": 0.149768141284585, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0742, | |
| "reward": 0.06790508627891541, | |
| "reward_std": 0.6080379813909531, | |
| "rewards/reward_func": 0.06790508627891541, | |
| "step": 4260, | |
| "toxic_reward": 4.386375617980957 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 52.325, | |
| "epoch": 1.0089792060491494, | |
| "format_reward": -0.25, | |
| "grad_norm": 4.862875461578369, | |
| "image_reward": 0.2832529693841934, | |
| "kl": 2.053416795656085, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0907, | |
| "reward": 0.30691148042678834, | |
| "reward_std": 1.6981020882725715, | |
| "rewards/reward_func": 0.30691148042678834, | |
| "step": 4270, | |
| "toxic_reward": 3.5000792026519774 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 49.05, | |
| "epoch": 1.011342155009452, | |
| "format_reward": -0.25, | |
| "grad_norm": 3.5172159671783447, | |
| "image_reward": 0.2263885498046875, | |
| "kl": 0.14171482473611832, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0082, | |
| "reward": 0.5368665099143982, | |
| "reward_std": 1.4538173630833626, | |
| "rewards/reward_func": 0.5368665099143982, | |
| "step": 4280, | |
| "toxic_reward": 4.501732063293457 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 50.1, | |
| "epoch": 1.0137051039697542, | |
| "format_reward": 0.0, | |
| "grad_norm": 0.6869735717773438, | |
| "image_reward": 0.266131591796875, | |
| "kl": 17.507234007120132, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0165, | |
| "reward": 0.595378065109253, | |
| "reward_std": 0.6132703861221671, | |
| "rewards/reward_func": 0.595378065109253, | |
| "step": 4290, | |
| "toxic_reward": 4.223924076557159 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 51.8, | |
| "epoch": 1.0160680529300568, | |
| "format_reward": -0.5, | |
| "grad_norm": 7.046621322631836, | |
| "image_reward": 0.2617146819829941, | |
| "kl": 0.1885729007422924, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0513, | |
| "reward": 0.02859283685684204, | |
| "reward_std": 1.7149874530732632, | |
| "rewards/reward_func": 0.02859283685684204, | |
| "step": 4300, | |
| "toxic_reward": 4.371392369270325 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 40.6, | |
| "epoch": 1.018431001890359, | |
| "format_reward": -0.25, | |
| "grad_norm": 4.381049156188965, | |
| "image_reward": 0.2654388427734375, | |
| "kl": 1.2927639432251454, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0052, | |
| "reward": -0.0640803337097168, | |
| "reward_std": 1.4367546334862709, | |
| "rewards/reward_func": -0.0640803337097168, | |
| "step": 4310, | |
| "toxic_reward": 4.212549781799316 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 51.9, | |
| "epoch": 1.0207939508506616, | |
| "format_reward": 0.0, | |
| "grad_norm": 1.0743227005004883, | |
| "image_reward": 0.255714924633503, | |
| "kl": 16.378241488710046, | |
| "learning_rate": 5e-06, | |
| "loss": -0.1016, | |
| "reward": 0.3336408376693726, | |
| "reward_std": 0.6735909695737063, | |
| "rewards/reward_func": 0.3336408376693726, | |
| "step": 4320, | |
| "toxic_reward": 4.5154483914375305 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 49.4, | |
| "epoch": 1.0231568998109641, | |
| "format_reward": -0.25, | |
| "grad_norm": 1.6660760641098022, | |
| "image_reward": 0.24780476838350296, | |
| "kl": 11.463303370773792, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0995, | |
| "reward": -0.0025091707706451417, | |
| "reward_std": 1.6661910176277162, | |
| "rewards/reward_func": -0.0025091707706451417, | |
| "step": 4330, | |
| "toxic_reward": 3.852264070510864 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 42.4, | |
| "epoch": 1.0255198487712665, | |
| "format_reward": 0.0, | |
| "grad_norm": 4.4377121925354, | |
| "image_reward": 0.2486114501953125, | |
| "kl": 0.12634929567575454, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0148, | |
| "reward": 0.518048095703125, | |
| "reward_std": 1.0762871712446214, | |
| "rewards/reward_func": 0.518048095703125, | |
| "step": 4340, | |
| "toxic_reward": 4.010181951522827 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 47.875, | |
| "epoch": 1.027882797731569, | |
| "format_reward": -0.25, | |
| "grad_norm": 1.9050147533416748, | |
| "image_reward": 0.2529998779296875, | |
| "kl": 0.20360449738800526, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0104, | |
| "reward": -0.05591415464878082, | |
| "reward_std": 1.2363329231739044, | |
| "rewards/reward_func": -0.05591415464878082, | |
| "step": 4350, | |
| "toxic_reward": 4.434300184249878 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 48.9, | |
| "epoch": 1.0302457466918715, | |
| "format_reward": -0.25, | |
| "grad_norm": 2.783447742462158, | |
| "image_reward": 0.25158691257238386, | |
| "kl": 0.14820914287120104, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0709, | |
| "reward": 0.2038910448551178, | |
| "reward_std": 1.113127877563238, | |
| "rewards/reward_func": 0.2038910448551178, | |
| "step": 4360, | |
| "toxic_reward": 4.226523244380951 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 58.85, | |
| "epoch": 1.0326086956521738, | |
| "format_reward": -0.25, | |
| "grad_norm": 0.6769667863845825, | |
| "image_reward": 0.2698781341314316, | |
| "kl": 1.560221792012453, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0453, | |
| "reward": -0.25547429323196413, | |
| "reward_std": 1.2353890612721443, | |
| "rewards/reward_func": -0.25547429323196413, | |
| "step": 4370, | |
| "toxic_reward": 4.451306319236755 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 45.325, | |
| "epoch": 1.0349716446124764, | |
| "format_reward": -0.5, | |
| "grad_norm": 1.5327138900756836, | |
| "image_reward": 0.25219675749540327, | |
| "kl": 8.099627137556672, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0101, | |
| "reward": -0.2395196735858917, | |
| "reward_std": 1.6747881084680558, | |
| "rewards/reward_func": -0.2395196735858917, | |
| "step": 4380, | |
| "toxic_reward": 4.392533135414124 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 55.875, | |
| "epoch": 1.037334593572779, | |
| "format_reward": 0.0, | |
| "grad_norm": 1.2079089879989624, | |
| "image_reward": 0.2677134186029434, | |
| "kl": 20.563808789849283, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0416, | |
| "reward": 0.5019214197993278, | |
| "reward_std": 1.3155438639223576, | |
| "rewards/reward_func": 0.5019214197993278, | |
| "step": 4390, | |
| "toxic_reward": 3.744106537103653 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 45.575, | |
| "epoch": 1.0396975425330812, | |
| "format_reward": 0.0, | |
| "grad_norm": 12.300169944763184, | |
| "image_reward": 0.266143798828125, | |
| "kl": 0.33298523649573325, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0325, | |
| "reward": 0.3428509056568146, | |
| "reward_std": 0.6832939319312572, | |
| "rewards/reward_func": 0.3428509056568146, | |
| "step": 4400, | |
| "toxic_reward": 3.938971757888794 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 41.575, | |
| "epoch": 1.0420604914933838, | |
| "format_reward": 0.0, | |
| "grad_norm": 13.788394927978516, | |
| "image_reward": 0.2602081298828125, | |
| "kl": 0.14277449063956738, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0799, | |
| "reward": 0.5224148035049438, | |
| "reward_std": 0.5329875692725181, | |
| "rewards/reward_func": 0.5224148035049438, | |
| "step": 4410, | |
| "toxic_reward": 4.6161150455474855 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 52.45, | |
| "epoch": 1.0444234404536863, | |
| "format_reward": 0.0, | |
| "grad_norm": 9.009355545043945, | |
| "image_reward": 0.2745361328125, | |
| "kl": 0.1908944919705391, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0428, | |
| "reward": 0.5507814303040505, | |
| "reward_std": 0.7364906007423997, | |
| "rewards/reward_func": 0.5507814303040505, | |
| "step": 4420, | |
| "toxic_reward": 3.738240921497345 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 53.375, | |
| "epoch": 1.0467863894139886, | |
| "format_reward": -0.25, | |
| "grad_norm": 0.9596161842346191, | |
| "image_reward": 0.2600412994623184, | |
| "kl": 0.2493920259177685, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0546, | |
| "reward": 0.3896596789360046, | |
| "reward_std": 1.1463438659906386, | |
| "rewards/reward_func": 0.3896596789360046, | |
| "step": 4430, | |
| "toxic_reward": 4.299828362464905 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 45.6, | |
| "epoch": 1.0491493383742911, | |
| "format_reward": -0.5, | |
| "grad_norm": 16.468900680541992, | |
| "image_reward": 0.2660593673586845, | |
| "kl": 1.7080695651471616, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0381, | |
| "reward": 0.23900684118270873, | |
| "reward_std": 1.4474023096263409, | |
| "rewards/reward_func": 0.23900684118270873, | |
| "step": 4440, | |
| "toxic_reward": 4.323360848426819 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 43.175, | |
| "epoch": 1.0515122873345937, | |
| "format_reward": 0.0, | |
| "grad_norm": 9.211010932922363, | |
| "image_reward": 0.23970438539981842, | |
| "kl": 2.5637484416365623, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1362, | |
| "reward": 1.0162243604660035, | |
| "reward_std": 0.6415727452374995, | |
| "rewards/reward_func": 1.0162243604660035, | |
| "step": 4450, | |
| "toxic_reward": 4.387140679359436 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 52.875, | |
| "epoch": 1.053875236294896, | |
| "format_reward": -0.25, | |
| "grad_norm": 7.96478796005249, | |
| "image_reward": 0.279913330078125, | |
| "kl": 1.5326927796006202, | |
| "learning_rate": 5e-06, | |
| "loss": 0.135, | |
| "reward": -0.0030475854873657227, | |
| "reward_std": 1.1649701196700335, | |
| "rewards/reward_func": -0.0030475854873657227, | |
| "step": 4460, | |
| "toxic_reward": 4.197524422407151 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 50.475, | |
| "epoch": 1.0562381852551985, | |
| "format_reward": 0.0, | |
| "grad_norm": 18.094940185546875, | |
| "image_reward": 0.25467529296875, | |
| "kl": 0.3941259577870369, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0321, | |
| "reward": 0.24867143034934996, | |
| "reward_std": 0.6323847549967467, | |
| "rewards/reward_func": 0.24867143034934996, | |
| "step": 4470, | |
| "toxic_reward": 4.357023143768311 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 48.6, | |
| "epoch": 1.0586011342155008, | |
| "format_reward": 0.0, | |
| "grad_norm": 6.004316329956055, | |
| "image_reward": 0.249798583984375, | |
| "kl": 0.6100661933422089, | |
| "learning_rate": 5e-06, | |
| "loss": 0.054, | |
| "reward": 0.3194288432598114, | |
| "reward_std": 1.00972272567451, | |
| "rewards/reward_func": 0.3194288432598114, | |
| "step": 4480, | |
| "toxic_reward": 3.6232224822044374 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 41.275, | |
| "epoch": 1.0609640831758034, | |
| "format_reward": 0.0, | |
| "grad_norm": 16.553150177001953, | |
| "image_reward": 0.2777862548828125, | |
| "kl": 1.2566918075084685, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0033, | |
| "reward": 0.3464995056390762, | |
| "reward_std": 1.0610926449298859, | |
| "rewards/reward_func": 0.3464995056390762, | |
| "step": 4490, | |
| "toxic_reward": 3.4315811157226563 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 46.5, | |
| "epoch": 1.063327032136106, | |
| "format_reward": 0.0, | |
| "grad_norm": 21.91239356994629, | |
| "image_reward": 0.26402740478515624, | |
| "kl": 2.7657025068998338, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0088, | |
| "reward": -0.017888635396957397, | |
| "reward_std": 0.36575160175561905, | |
| "rewards/reward_func": -0.017888635396957397, | |
| "step": 4500, | |
| "toxic_reward": 4.492252993583679 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 52.175, | |
| "epoch": 1.0656899810964082, | |
| "format_reward": 0.0, | |
| "grad_norm": 1.70862877368927, | |
| "image_reward": 0.2537200927734375, | |
| "kl": 0.9243695795536041, | |
| "learning_rate": 5e-06, | |
| "loss": -0.148, | |
| "reward": 0.2666252374649048, | |
| "reward_std": 0.8290498301386833, | |
| "rewards/reward_func": 0.2666252374649048, | |
| "step": 4510, | |
| "toxic_reward": 4.025203084945678 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 58.625, | |
| "epoch": 1.0680529300567108, | |
| "format_reward": 0.0, | |
| "grad_norm": 4.76298189163208, | |
| "image_reward": 0.2544342041015625, | |
| "kl": 1.2314461708068847, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0379, | |
| "reward": 0.1297641634941101, | |
| "reward_std": 0.7050925550982357, | |
| "rewards/reward_func": 0.1297641634941101, | |
| "step": 4520, | |
| "toxic_reward": 4.568165302276611 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 36.825, | |
| "epoch": 1.0704158790170133, | |
| "format_reward": 0.0, | |
| "grad_norm": 8.698065757751465, | |
| "image_reward": 0.274078369140625, | |
| "kl": 1.325176051259041, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0398, | |
| "reward": 0.10026351213455201, | |
| "reward_std": 0.812692479044199, | |
| "rewards/reward_func": 0.10026351213455201, | |
| "step": 4530, | |
| "toxic_reward": 3.9686192631721497 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 48.3, | |
| "epoch": 1.0727788279773156, | |
| "format_reward": 0.0, | |
| "grad_norm": 19.877777099609375, | |
| "image_reward": 0.2792388916015625, | |
| "kl": 0.9205800026655198, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1066, | |
| "reward": 0.5690743923187256, | |
| "reward_std": 0.6653784658759833, | |
| "rewards/reward_func": 0.5690743923187256, | |
| "step": 4540, | |
| "toxic_reward": 3.964191234111786 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 42.525, | |
| "epoch": 1.0751417769376181, | |
| "format_reward": -0.25, | |
| "grad_norm": 2.515148878097534, | |
| "image_reward": 0.23823343813419343, | |
| "kl": 2.8265303134918214, | |
| "learning_rate": 5e-06, | |
| "loss": -0.1035, | |
| "reward": 0.6135944664478302, | |
| "reward_std": 1.5456651039421558, | |
| "rewards/reward_func": 0.6135944664478302, | |
| "step": 4550, | |
| "toxic_reward": 4.405286359786987 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 39.85, | |
| "epoch": 1.0775047258979207, | |
| "format_reward": 0.0, | |
| "grad_norm": 16.40328025817871, | |
| "image_reward": 0.26103515625, | |
| "kl": 2.2788069248199463, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0091, | |
| "reward": 0.10576534271240234, | |
| "reward_std": 0.39959471523761747, | |
| "rewards/reward_func": 0.10576534271240234, | |
| "step": 4560, | |
| "toxic_reward": 4.633650445938111 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 44.7, | |
| "epoch": 1.079867674858223, | |
| "format_reward": -0.5, | |
| "grad_norm": 7.5780229568481445, | |
| "image_reward": 0.26860554963350297, | |
| "kl": 4.05745484828949, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0631, | |
| "reward": 0.6903072118759155, | |
| "reward_std": 1.8377123966813087, | |
| "rewards/reward_func": 0.6903072118759155, | |
| "step": 4570, | |
| "toxic_reward": 4.404474878311158 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 69.25, | |
| "epoch": 1.0822306238185255, | |
| "format_reward": -0.25, | |
| "grad_norm": 2.605886936187744, | |
| "image_reward": 0.26128031462430956, | |
| "kl": 1.1529910147190094, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0061, | |
| "reward": -0.4064223051071167, | |
| "reward_std": 1.0936089092865586, | |
| "rewards/reward_func": -0.4064223051071167, | |
| "step": 4580, | |
| "toxic_reward": 4.17680971622467 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 56.225, | |
| "epoch": 1.084593572778828, | |
| "format_reward": 0.0, | |
| "grad_norm": 2.201918601989746, | |
| "image_reward": 0.26739501953125, | |
| "kl": 0.7634936004877091, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0832, | |
| "reward": 0.2786406099796295, | |
| "reward_std": 0.7699430305510759, | |
| "rewards/reward_func": 0.2786406099796295, | |
| "step": 4590, | |
| "toxic_reward": 4.203878152370453 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 47.75, | |
| "epoch": 1.0869565217391304, | |
| "format_reward": 0.0, | |
| "grad_norm": 4.329306125640869, | |
| "image_reward": 0.2691864013671875, | |
| "kl": 1.415566897392273, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0091, | |
| "reward": 0.031065577268600465, | |
| "reward_std": 0.9241972327232361, | |
| "rewards/reward_func": 0.031065577268600465, | |
| "step": 4600, | |
| "toxic_reward": 3.4664461970329286 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 41.825, | |
| "epoch": 1.089319470699433, | |
| "format_reward": 0.0, | |
| "grad_norm": 15.773272514343262, | |
| "image_reward": 0.23636678010225295, | |
| "kl": 3.1093257188797, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0048, | |
| "reward": 0.6077887773513794, | |
| "reward_std": 0.9942519944161177, | |
| "rewards/reward_func": 0.6077887773513794, | |
| "step": 4610, | |
| "toxic_reward": 4.182659006118774 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 45.6, | |
| "epoch": 1.0916824196597354, | |
| "format_reward": -0.25, | |
| "grad_norm": 4.842249870300293, | |
| "image_reward": 0.2884033203125, | |
| "kl": 0.8512112647294998, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0029, | |
| "reward": 0.2840136528015137, | |
| "reward_std": 1.34358575232327, | |
| "rewards/reward_func": 0.2840136528015137, | |
| "step": 4620, | |
| "toxic_reward": 4.202986550331116 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 49.0, | |
| "epoch": 1.0940453686200378, | |
| "format_reward": 0.0, | |
| "grad_norm": 12.111598014831543, | |
| "image_reward": 0.26724853515625, | |
| "kl": 0.5160227678716183, | |
| "learning_rate": 5e-06, | |
| "loss": -0.1139, | |
| "reward": 0.41757542341947557, | |
| "reward_std": 0.6193137221038342, | |
| "rewards/reward_func": 0.41757542341947557, | |
| "step": 4630, | |
| "toxic_reward": 3.6429463922977448 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 56.475, | |
| "epoch": 1.0964083175803403, | |
| "format_reward": 0.0, | |
| "grad_norm": 5.654786586761475, | |
| "image_reward": 0.2576507568359375, | |
| "kl": 0.6431491911411286, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0209, | |
| "reward": 0.4444656491279602, | |
| "reward_std": 0.8271868824958801, | |
| "rewards/reward_func": 0.4444656491279602, | |
| "step": 4640, | |
| "toxic_reward": 3.751771080493927 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 45.225, | |
| "epoch": 1.0987712665406426, | |
| "format_reward": 0.0, | |
| "grad_norm": 3.850701332092285, | |
| "image_reward": 0.25439249724149704, | |
| "kl": 3.444407519698143, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0854, | |
| "reward": 0.643358188867569, | |
| "reward_std": 0.8931491523981094, | |
| "rewards/reward_func": 0.643358188867569, | |
| "step": 4650, | |
| "toxic_reward": 4.321841323375702 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 41.575, | |
| "epoch": 1.1011342155009451, | |
| "format_reward": 0.0, | |
| "grad_norm": 6.3553853034973145, | |
| "image_reward": 0.2528656005859375, | |
| "kl": 0.5195316299796104, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0275, | |
| "reward": 0.4554763913154602, | |
| "reward_std": 0.8332011103630066, | |
| "rewards/reward_func": 0.4554763913154602, | |
| "step": 4660, | |
| "toxic_reward": 4.327652913331986 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 37.925, | |
| "epoch": 1.1034971644612477, | |
| "format_reward": 0.0, | |
| "grad_norm": 7.020429611206055, | |
| "image_reward": 0.24530232697725296, | |
| "kl": 1.074008372426033, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1332, | |
| "reward": 0.08963438272476196, | |
| "reward_std": 1.1591505765914918, | |
| "rewards/reward_func": 0.08963438272476196, | |
| "step": 4670, | |
| "toxic_reward": 3.8221271514892576 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 45.675, | |
| "epoch": 1.10586011342155, | |
| "format_reward": 0.0, | |
| "grad_norm": 1.8656316995620728, | |
| "image_reward": 0.23163909912109376, | |
| "kl": 1.6141413852572442, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0496, | |
| "reward": 0.4311521232128143, | |
| "reward_std": 0.39210873320698736, | |
| "rewards/reward_func": 0.4311521232128143, | |
| "step": 4680, | |
| "toxic_reward": 4.517966604232788 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 54.2, | |
| "epoch": 1.1082230623818525, | |
| "format_reward": 0.0, | |
| "grad_norm": 13.881125450134277, | |
| "image_reward": 0.2380462646484375, | |
| "kl": 0.6565065160393715, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0278, | |
| "reward": 0.3582367777824402, | |
| "reward_std": 1.0096068516373635, | |
| "rewards/reward_func": 0.3582367777824402, | |
| "step": 4690, | |
| "toxic_reward": 3.9490260004997255 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 37.2, | |
| "epoch": 1.110586011342155, | |
| "format_reward": -0.25, | |
| "grad_norm": 6.552460193634033, | |
| "image_reward": 0.24650166779756547, | |
| "kl": 2.4836434960365295, | |
| "learning_rate": 5e-06, | |
| "loss": -0.055, | |
| "reward": 0.850802743434906, | |
| "reward_std": 1.5017553605139256, | |
| "rewards/reward_func": 0.850802743434906, | |
| "step": 4700, | |
| "toxic_reward": 4.000320458412171 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 43.35, | |
| "epoch": 1.1129489603024574, | |
| "format_reward": 0.0, | |
| "grad_norm": 10.16213607788086, | |
| "image_reward": 0.2479156494140625, | |
| "kl": 9.667583072185517, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0035, | |
| "reward": 0.7445069432258606, | |
| "reward_std": 0.7123569492250681, | |
| "rewards/reward_func": 0.7445069432258606, | |
| "step": 4710, | |
| "toxic_reward": 4.515477871894836 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 44.825, | |
| "epoch": 1.11531190926276, | |
| "format_reward": 0.0, | |
| "grad_norm": 11.000924110412598, | |
| "image_reward": 0.2601796478033066, | |
| "kl": 1.5108904749155045, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0823, | |
| "reward": 0.5056971669197082, | |
| "reward_std": 0.6825690733268857, | |
| "rewards/reward_func": 0.5056971669197082, | |
| "step": 4720, | |
| "toxic_reward": 4.039038109779358 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 44.9, | |
| "epoch": 1.1176748582230625, | |
| "format_reward": -0.25, | |
| "grad_norm": 10.222740173339844, | |
| "image_reward": 0.25692138671875, | |
| "kl": 0.35422504395246507, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0093, | |
| "reward": 0.08535944372415542, | |
| "reward_std": 1.423003512620926, | |
| "rewards/reward_func": 0.08535944372415542, | |
| "step": 4730, | |
| "toxic_reward": 3.4993788480758665 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 41.55, | |
| "epoch": 1.1200378071833648, | |
| "format_reward": -0.25, | |
| "grad_norm": 2.3486738204956055, | |
| "image_reward": 0.24800923615694045, | |
| "kl": 0.38842023983597757, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0107, | |
| "reward": 0.1211450919508934, | |
| "reward_std": 1.3706756496801973, | |
| "rewards/reward_func": 0.1211450919508934, | |
| "step": 4740, | |
| "toxic_reward": 3.2782628774642943 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 51.325, | |
| "epoch": 1.1224007561436673, | |
| "format_reward": 0.0, | |
| "grad_norm": 4.218822479248047, | |
| "image_reward": 0.251934814453125, | |
| "kl": 1.4006462961435318, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0701, | |
| "reward": 0.9292663365602494, | |
| "reward_std": 0.874046965315938, | |
| "rewards/reward_func": 0.9292663365602494, | |
| "step": 4750, | |
| "toxic_reward": 4.4716246843338014 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 53.45, | |
| "epoch": 1.1247637051039698, | |
| "format_reward": 0.0, | |
| "grad_norm": 13.548481941223145, | |
| "image_reward": 0.276861572265625, | |
| "kl": 0.5785035833716392, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0177, | |
| "reward": 0.1986662968993187, | |
| "reward_std": 0.7839731447398662, | |
| "rewards/reward_func": 0.1986662968993187, | |
| "step": 4760, | |
| "toxic_reward": 4.128668719530106 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 40.975, | |
| "epoch": 1.1271266540642721, | |
| "format_reward": 0.0, | |
| "grad_norm": 10.347504615783691, | |
| "image_reward": 0.2632904052734375, | |
| "kl": 0.28924584165215494, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0536, | |
| "reward": 0.40365022569894793, | |
| "reward_std": 0.6283778937533497, | |
| "rewards/reward_func": 0.40365022569894793, | |
| "step": 4770, | |
| "toxic_reward": 3.78736280053854 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 51.775, | |
| "epoch": 1.1294896030245747, | |
| "format_reward": -0.75, | |
| "grad_norm": 29.38702964782715, | |
| "image_reward": 0.27183634638786314, | |
| "kl": 8.101900951564312, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0473, | |
| "reward": -0.11305050253868103, | |
| "reward_std": 2.1815814077854156, | |
| "rewards/reward_func": -0.11305050253868103, | |
| "step": 4780, | |
| "toxic_reward": 3.949468755722046 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 48.7, | |
| "epoch": 1.1318525519848772, | |
| "format_reward": -0.75, | |
| "grad_norm": 8.19861125946045, | |
| "image_reward": 0.2731597885489464, | |
| "kl": 5.514032608270645, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0809, | |
| "reward": -0.5878833532333374, | |
| "reward_std": 1.700104326196015, | |
| "rewards/reward_func": -0.5878833532333374, | |
| "step": 4790, | |
| "toxic_reward": 4.362279486656189 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 55.575, | |
| "epoch": 1.1342155009451795, | |
| "format_reward": 0.0, | |
| "grad_norm": 25.879568099975586, | |
| "image_reward": 0.272625732421875, | |
| "kl": 0.41564694195985796, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0466, | |
| "reward": 0.5246647775173188, | |
| "reward_std": 0.5603986160829664, | |
| "rewards/reward_func": 0.5246647775173188, | |
| "step": 4800, | |
| "toxic_reward": 4.4845054864883425 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 38.175, | |
| "epoch": 1.136578449905482, | |
| "format_reward": -0.25, | |
| "grad_norm": 6.490880966186523, | |
| "image_reward": 0.27078043669462204, | |
| "kl": 0.39700448513031006, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0442, | |
| "reward": 0.10371096134185791, | |
| "reward_std": 1.3051490228623153, | |
| "rewards/reward_func": 0.10371096134185791, | |
| "step": 4810, | |
| "toxic_reward": 4.362593126296997 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 42.025, | |
| "epoch": 1.1389413988657844, | |
| "format_reward": 0.0, | |
| "grad_norm": 10.680285453796387, | |
| "image_reward": 0.2447296142578125, | |
| "kl": 0.43964013159275056, | |
| "learning_rate": 5e-06, | |
| "loss": -0.1088, | |
| "reward": 0.43211621046066284, | |
| "reward_std": 0.5677682287991047, | |
| "rewards/reward_func": 0.43211621046066284, | |
| "step": 4820, | |
| "toxic_reward": 4.520205068588257 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 34.225, | |
| "epoch": 1.141304347826087, | |
| "format_reward": -0.5, | |
| "grad_norm": 7.846988201141357, | |
| "image_reward": 0.24936320036649703, | |
| "kl": 0.3737114042043686, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1324, | |
| "reward": -0.08488219976425171, | |
| "reward_std": 1.6377468653023244, | |
| "rewards/reward_func": -0.08488219976425171, | |
| "step": 4830, | |
| "toxic_reward": 3.979211616516113 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 45.95, | |
| "epoch": 1.1436672967863895, | |
| "format_reward": -0.25, | |
| "grad_norm": 13.332221031188965, | |
| "image_reward": 0.27100830078125, | |
| "kl": 0.39419813454151154, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0752, | |
| "reward": 0.9029350757598877, | |
| "reward_std": 1.455178501456976, | |
| "rewards/reward_func": 0.9029350757598877, | |
| "step": 4840, | |
| "toxic_reward": 3.6434609413146974 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 45.125, | |
| "epoch": 1.146030245746692, | |
| "format_reward": 0.0, | |
| "grad_norm": 3.2025651931762695, | |
| "image_reward": 0.281640625, | |
| "kl": 7.567617936432361, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0926, | |
| "reward": 0.7164658069610595, | |
| "reward_std": 0.6624833345413208, | |
| "rewards/reward_func": 0.7164658069610595, | |
| "step": 4850, | |
| "toxic_reward": 3.8413574934005736 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 48.975, | |
| "epoch": 1.1483931947069943, | |
| "format_reward": 0.0, | |
| "grad_norm": 9.695226669311523, | |
| "image_reward": 0.24990997314453126, | |
| "kl": 0.28165399581193923, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0188, | |
| "reward": -0.12237508296966552, | |
| "reward_std": 0.6198875203728675, | |
| "rewards/reward_func": -0.12237508296966552, | |
| "step": 4860, | |
| "toxic_reward": 4.485203766822815 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 40.45, | |
| "epoch": 1.1507561436672968, | |
| "format_reward": 0.0, | |
| "grad_norm": 2.5677099227905273, | |
| "image_reward": 0.2783660888671875, | |
| "kl": 0.29035804942250254, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0618, | |
| "reward": 0.050651901960372926, | |
| "reward_std": 1.2044988840818405, | |
| "rewards/reward_func": 0.050651901960372926, | |
| "step": 4870, | |
| "toxic_reward": 3.6688124537467957 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 51.9, | |
| "epoch": 1.1531190926275992, | |
| "format_reward": 0.0, | |
| "grad_norm": 4.83213996887207, | |
| "image_reward": 0.2756866455078125, | |
| "kl": 5.533606587722898, | |
| "learning_rate": 5e-06, | |
| "loss": -0.1798, | |
| "reward": 0.5692965686321259, | |
| "reward_std": 1.0450827227905393, | |
| "rewards/reward_func": 0.5692965686321259, | |
| "step": 4880, | |
| "toxic_reward": 3.439787745475769 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 42.375, | |
| "epoch": 1.1554820415879017, | |
| "format_reward": 0.0, | |
| "grad_norm": 1.0500257015228271, | |
| "image_reward": 0.258331298828125, | |
| "kl": 0.11663263067603111, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0311, | |
| "reward": 0.4637997090816498, | |
| "reward_std": 0.9648044936358928, | |
| "rewards/reward_func": 0.4637997090816498, | |
| "step": 4890, | |
| "toxic_reward": 4.382505106925964 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 46.5, | |
| "epoch": 1.1578449905482042, | |
| "format_reward": 0.0, | |
| "grad_norm": 7.189860820770264, | |
| "image_reward": 0.2593902587890625, | |
| "kl": 0.15962190218269826, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0056, | |
| "reward": 0.5262487173080445, | |
| "reward_std": 1.052651860564947, | |
| "rewards/reward_func": 0.5262487173080445, | |
| "step": 4900, | |
| "toxic_reward": 4.166365385055542 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 48.575, | |
| "epoch": 1.1602079395085065, | |
| "format_reward": 0.0, | |
| "grad_norm": 1.5136041641235352, | |
| "image_reward": 0.277166748046875, | |
| "kl": 0.14849806036800145, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0034, | |
| "reward": 0.22453336119651796, | |
| "reward_std": 0.5165121786296367, | |
| "rewards/reward_func": 0.22453336119651796, | |
| "step": 4910, | |
| "toxic_reward": 3.900139307975769 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 47.55, | |
| "epoch": 1.162570888468809, | |
| "format_reward": 0.0, | |
| "grad_norm": 0.6905107498168945, | |
| "image_reward": 0.26757049560546875, | |
| "kl": 0.17389641776680947, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0273, | |
| "reward": 0.2769235372543335, | |
| "reward_std": 0.8026977114379406, | |
| "rewards/reward_func": 0.2769235372543335, | |
| "step": 4920, | |
| "toxic_reward": 4.421657228469849 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 41.9, | |
| "epoch": 1.1649338374291116, | |
| "format_reward": 0.0, | |
| "grad_norm": 0.7038688063621521, | |
| "image_reward": 0.23498077392578126, | |
| "kl": 0.1468098048120737, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0593, | |
| "reward": 0.6022326171398162, | |
| "reward_std": 0.8370201224461198, | |
| "rewards/reward_func": 0.6022326171398162, | |
| "step": 4930, | |
| "toxic_reward": 4.272796273231506 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 43.575, | |
| "epoch": 1.167296786389414, | |
| "format_reward": 0.0, | |
| "grad_norm": 3.3997626304626465, | |
| "image_reward": 0.2218317672610283, | |
| "kl": 10.733999550715088, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0239, | |
| "reward": 0.5406073331832886, | |
| "reward_std": 1.1294488459825516, | |
| "rewards/reward_func": 0.5406073331832886, | |
| "step": 4940, | |
| "toxic_reward": 4.010496520996094 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 42.75, | |
| "epoch": 1.1696597353497165, | |
| "format_reward": 0.0, | |
| "grad_norm": 13.437244415283203, | |
| "image_reward": 0.260540771484375, | |
| "kl": 0.4533839326351881, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0308, | |
| "reward": 0.6349693357944488, | |
| "reward_std": 0.9300125196576119, | |
| "rewards/reward_func": 0.6349693357944488, | |
| "step": 4950, | |
| "toxic_reward": 3.9825836658477782 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 48.275, | |
| "epoch": 1.172022684310019, | |
| "format_reward": 0.0, | |
| "grad_norm": 5.643482208251953, | |
| "image_reward": 0.25181121826171876, | |
| "kl": 0.7526591405272484, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0702, | |
| "reward": 0.2168402910232544, | |
| "reward_std": 0.8874317653477192, | |
| "rewards/reward_func": 0.2168402910232544, | |
| "step": 4960, | |
| "toxic_reward": 4.36525526046753 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 52.125, | |
| "epoch": 1.1743856332703213, | |
| "format_reward": -0.25, | |
| "grad_norm": 2.1135120391845703, | |
| "image_reward": 0.27721354067325593, | |
| "kl": 1.7936469875276089, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0616, | |
| "reward": -0.168658310174942, | |
| "reward_std": 1.076946148276329, | |
| "rewards/reward_func": -0.168658310174942, | |
| "step": 4970, | |
| "toxic_reward": 4.423034191131592 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 41.2, | |
| "epoch": 1.1767485822306238, | |
| "format_reward": 0.0, | |
| "grad_norm": 1.0600641965866089, | |
| "image_reward": 0.22789459228515624, | |
| "kl": 3.3630725659430025, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0228, | |
| "reward": 0.7056062936782836, | |
| "reward_std": 0.9683291807770729, | |
| "rewards/reward_func": 0.7056062936782836, | |
| "step": 4980, | |
| "toxic_reward": 4.235305881500244 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 42.025, | |
| "epoch": 1.1791115311909262, | |
| "format_reward": -0.25, | |
| "grad_norm": 1.4251501560211182, | |
| "image_reward": 0.25722147673368456, | |
| "kl": 0.2127727370709181, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0369, | |
| "reward": 0.7460228025913238, | |
| "reward_std": 1.3902123406529427, | |
| "rewards/reward_func": 0.7460228025913238, | |
| "step": 4990, | |
| "toxic_reward": 4.187235593795776 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 43.85, | |
| "epoch": 1.1814744801512287, | |
| "format_reward": 0.0, | |
| "grad_norm": 0.9059237837791443, | |
| "image_reward": 0.275982666015625, | |
| "kl": 0.1094449780881405, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0122, | |
| "reward": -0.013343071937561036, | |
| "reward_std": 0.8927877993322909, | |
| "rewards/reward_func": -0.013343071937561036, | |
| "step": 5000, | |
| "toxic_reward": 4.172649383544922 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 54.475, | |
| "epoch": 1.1838374291115312, | |
| "format_reward": -0.25, | |
| "grad_norm": 0.676426887512207, | |
| "image_reward": 0.25437113344669343, | |
| "kl": 0.306893527135253, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0597, | |
| "reward": 0.5460815012454987, | |
| "reward_std": 1.3148551121354104, | |
| "rewards/reward_func": 0.5460815012454987, | |
| "step": 5010, | |
| "toxic_reward": 4.169591236114502 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 41.5, | |
| "epoch": 1.1862003780718338, | |
| "format_reward": -0.25, | |
| "grad_norm": 1.0359044075012207, | |
| "image_reward": 0.25831960141658783, | |
| "kl": 0.11788953803479671, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0199, | |
| "reward": 0.008247452974319457, | |
| "reward_std": 1.6603192906826734, | |
| "rewards/reward_func": 0.008247452974319457, | |
| "step": 5020, | |
| "toxic_reward": 4.008079314231873 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 41.7, | |
| "epoch": 1.188563327032136, | |
| "format_reward": -0.25, | |
| "grad_norm": 4.407492637634277, | |
| "image_reward": 0.25005086213350297, | |
| "kl": 0.16296980381011963, | |
| "learning_rate": 5e-06, | |
| "loss": 0.013, | |
| "reward": 0.45398043394088744, | |
| "reward_std": 1.4666540574282407, | |
| "rewards/reward_func": 0.45398043394088744, | |
| "step": 5030, | |
| "toxic_reward": 3.9480291843414306 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 45.05, | |
| "epoch": 1.1909262759924386, | |
| "format_reward": -0.25, | |
| "grad_norm": 1.3405718803405762, | |
| "image_reward": 0.26083475798368455, | |
| "kl": 1.2570629265159368, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0608, | |
| "reward": 0.24062097072601318, | |
| "reward_std": 1.1102397807873785, | |
| "rewards/reward_func": 0.24062097072601318, | |
| "step": 5040, | |
| "toxic_reward": 4.282037019729614 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 52.475, | |
| "epoch": 1.193289224952741, | |
| "format_reward": -0.25, | |
| "grad_norm": 1.0421810150146484, | |
| "image_reward": 0.2481842041015625, | |
| "kl": 0.4886137153953314, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0154, | |
| "reward": 0.35102577805519103, | |
| "reward_std": 1.4176109634339809, | |
| "rewards/reward_func": 0.35102577805519103, | |
| "step": 5050, | |
| "toxic_reward": 4.56660737991333 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 49.775, | |
| "epoch": 1.1956521739130435, | |
| "format_reward": 0.0, | |
| "grad_norm": 2.4514474868774414, | |
| "image_reward": 0.2724589020013809, | |
| "kl": 17.261842382885515, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0799, | |
| "reward": 0.2634397208690643, | |
| "reward_std": 0.6655941482633352, | |
| "rewards/reward_func": 0.2634397208690643, | |
| "step": 5060, | |
| "toxic_reward": 4.50599045753479 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 42.675, | |
| "epoch": 1.198015122873346, | |
| "format_reward": -0.25, | |
| "grad_norm": 4.386458396911621, | |
| "image_reward": 0.2706837967038155, | |
| "kl": 0.41296282410621643, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0407, | |
| "reward": 0.3763133823871613, | |
| "reward_std": 1.3990098256617785, | |
| "rewards/reward_func": 0.3763133823871613, | |
| "step": 5070, | |
| "toxic_reward": 3.8180208444595336 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 43.55, | |
| "epoch": 1.2003780718336483, | |
| "format_reward": 0.0, | |
| "grad_norm": 3.708019495010376, | |
| "image_reward": 0.2523040771484375, | |
| "kl": 0.13607071787118913, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0338, | |
| "reward": 0.09913046360015869, | |
| "reward_std": 0.64256557286717, | |
| "rewards/reward_func": 0.09913046360015869, | |
| "step": 5080, | |
| "toxic_reward": 4.473843407630921 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 47.85, | |
| "epoch": 1.2027410207939508, | |
| "format_reward": 0.0, | |
| "grad_norm": 0.9288604855537415, | |
| "image_reward": 0.24779205322265624, | |
| "kl": 0.20878240577876567, | |
| "learning_rate": 5e-06, | |
| "loss": 0.041, | |
| "reward": 0.5819396436214447, | |
| "reward_std": 0.7615427184849978, | |
| "rewards/reward_func": 0.5819396436214447, | |
| "step": 5090, | |
| "toxic_reward": 4.673109149932861 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 46.825, | |
| "epoch": 1.2051039697542534, | |
| "format_reward": -0.75, | |
| "grad_norm": 1.3514373302459717, | |
| "image_reward": 0.24433186948299407, | |
| "kl": 0.2980830356478691, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0794, | |
| "reward": -0.4275161147117615, | |
| "reward_std": 2.30497971996665, | |
| "rewards/reward_func": -0.4275161147117615, | |
| "step": 5100, | |
| "toxic_reward": 4.221112084388733 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 34.775, | |
| "epoch": 1.2074669187145557, | |
| "format_reward": 0.0, | |
| "grad_norm": 1.3809269666671753, | |
| "image_reward": 0.2739410400390625, | |
| "kl": 4.718816532939672, | |
| "learning_rate": 5e-06, | |
| "loss": -0.1045, | |
| "reward": 0.23022666573524475, | |
| "reward_std": 0.9735932052135468, | |
| "rewards/reward_func": 0.23022666573524475, | |
| "step": 5110, | |
| "toxic_reward": 3.8542242765426638 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 51.125, | |
| "epoch": 1.2098298676748582, | |
| "format_reward": -0.75, | |
| "grad_norm": 1.045753836631775, | |
| "image_reward": 0.243878173828125, | |
| "kl": 0.15604666136205197, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0351, | |
| "reward": -0.7461395561695099, | |
| "reward_std": 2.103620085120201, | |
| "rewards/reward_func": -0.7461395561695099, | |
| "step": 5120, | |
| "toxic_reward": 4.052614498138428 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 37.3, | |
| "epoch": 1.2121928166351608, | |
| "format_reward": 0.0, | |
| "grad_norm": 2.4709815979003906, | |
| "image_reward": 0.25801239013671873, | |
| "kl": 0.1505513045936823, | |
| "learning_rate": 5e-06, | |
| "loss": -0.073, | |
| "reward": 0.789186455309391, | |
| "reward_std": 1.0413845662027597, | |
| "rewards/reward_func": 0.789186455309391, | |
| "step": 5130, | |
| "toxic_reward": 3.886520874500275 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 38.575, | |
| "epoch": 1.214555765595463, | |
| "format_reward": 0.0, | |
| "grad_norm": 3.3710083961486816, | |
| "image_reward": 0.2646331787109375, | |
| "kl": 0.1121824998408556, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1077, | |
| "reward": 0.6361587151885033, | |
| "reward_std": 0.6423972092568875, | |
| "rewards/reward_func": 0.6361587151885033, | |
| "step": 5140, | |
| "toxic_reward": 4.181642347574234 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 51.625, | |
| "epoch": 1.2169187145557656, | |
| "format_reward": 0.0, | |
| "grad_norm": 1.4757941961288452, | |
| "image_reward": 0.2700469970703125, | |
| "kl": 0.808637504093349, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0188, | |
| "reward": 0.2734032437205315, | |
| "reward_std": 0.8411962412297725, | |
| "rewards/reward_func": 0.2734032437205315, | |
| "step": 5150, | |
| "toxic_reward": 3.6458971202373505 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 51.725, | |
| "epoch": 1.2192816635160681, | |
| "format_reward": -0.25, | |
| "grad_norm": 0.66521817445755, | |
| "image_reward": 0.2667388916015625, | |
| "kl": 0.4425561033189297, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0435, | |
| "reward": 0.35035309493541716, | |
| "reward_std": 1.6248657763004304, | |
| "rewards/reward_func": 0.35035309493541716, | |
| "step": 5160, | |
| "toxic_reward": 3.7190463662147524 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 53.475, | |
| "epoch": 1.2216446124763705, | |
| "format_reward": 0.0, | |
| "grad_norm": 8.705077171325684, | |
| "image_reward": 0.259429931640625, | |
| "kl": 0.7811562133952975, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0244, | |
| "reward": 0.3771729826927185, | |
| "reward_std": 1.326733610033989, | |
| "rewards/reward_func": 0.3771729826927185, | |
| "step": 5170, | |
| "toxic_reward": 3.6760028123855593 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 46.975, | |
| "epoch": 1.224007561436673, | |
| "format_reward": 0.0, | |
| "grad_norm": 14.75157356262207, | |
| "image_reward": 0.22822214663028717, | |
| "kl": 45.12481062971055, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0908, | |
| "reward": 0.6647323310375214, | |
| "reward_std": 1.0134072445333004, | |
| "rewards/reward_func": 0.6647323310375214, | |
| "step": 5180, | |
| "toxic_reward": 4.210642290115357 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 50.4, | |
| "epoch": 1.2263705103969755, | |
| "format_reward": -0.25, | |
| "grad_norm": 13.71264934539795, | |
| "image_reward": 0.23838348388671876, | |
| "kl": 0.2953592788428068, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0801, | |
| "reward": 0.4181412994861603, | |
| "reward_std": 1.2768891528248787, | |
| "rewards/reward_func": 0.4181412994861603, | |
| "step": 5190, | |
| "toxic_reward": 4.502067589759827 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 55.625, | |
| "epoch": 1.2287334593572778, | |
| "format_reward": 0.0, | |
| "grad_norm": 7.2092390060424805, | |
| "image_reward": 0.2596160888671875, | |
| "kl": 0.15071408227086067, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0359, | |
| "reward": 0.46884081363677976, | |
| "reward_std": 0.9004301078617573, | |
| "rewards/reward_func": 0.46884081363677976, | |
| "step": 5200, | |
| "toxic_reward": 4.537938523292541 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 37.55, | |
| "epoch": 1.2310964083175804, | |
| "format_reward": 0.0, | |
| "grad_norm": 1.4807243347167969, | |
| "image_reward": 0.25061492919921874, | |
| "kl": 0.39518592432141303, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0062, | |
| "reward": 0.7565897464752197, | |
| "reward_std": 0.6514241144061088, | |
| "rewards/reward_func": 0.7565897464752197, | |
| "step": 5210, | |
| "toxic_reward": 4.779706335067749 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 37.15, | |
| "epoch": 1.2334593572778827, | |
| "format_reward": 0.0, | |
| "grad_norm": 12.918940544128418, | |
| "image_reward": 0.266143798828125, | |
| "kl": 1.6246791556477547, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0669, | |
| "reward": 1.0145411103963853, | |
| "reward_std": 0.7731746513396501, | |
| "rewards/reward_func": 1.0145411103963853, | |
| "step": 5220, | |
| "toxic_reward": 3.9364122271537783 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 47.8, | |
| "epoch": 1.2358223062381852, | |
| "format_reward": -0.5, | |
| "grad_norm": 8.1648530960083, | |
| "image_reward": 0.256298828125, | |
| "kl": 0.3491713672876358, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0294, | |
| "reward": 0.18980904817581176, | |
| "reward_std": 1.4395622819662095, | |
| "rewards/reward_func": 0.18980904817581176, | |
| "step": 5230, | |
| "toxic_reward": 4.15175496339798 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 43.675, | |
| "epoch": 1.2381852551984878, | |
| "format_reward": -0.25, | |
| "grad_norm": 7.493502140045166, | |
| "image_reward": 0.2956329345703125, | |
| "kl": 3.9262495055794715, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0129, | |
| "reward": 0.19967559576034546, | |
| "reward_std": 1.4724704299122096, | |
| "rewards/reward_func": 0.19967559576034546, | |
| "step": 5240, | |
| "toxic_reward": 3.676086974143982 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 47.95, | |
| "epoch": 1.24054820415879, | |
| "format_reward": 0.0, | |
| "grad_norm": 7.836026668548584, | |
| "image_reward": 0.262060546875, | |
| "kl": 0.5677594847977161, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0137, | |
| "reward": 1.0836671590805054, | |
| "reward_std": 0.9185017041862011, | |
| "rewards/reward_func": 1.0836671590805054, | |
| "step": 5250, | |
| "toxic_reward": 4.442173409461975 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 35.925, | |
| "epoch": 1.2429111531190926, | |
| "format_reward": -0.25, | |
| "grad_norm": 17.290130615234375, | |
| "image_reward": 0.256195068359375, | |
| "kl": 0.3261503577232361, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0665, | |
| "reward": 0.4270883619785309, | |
| "reward_std": 1.5899662226438522, | |
| "rewards/reward_func": 0.4270883619785309, | |
| "step": 5260, | |
| "toxic_reward": 3.6384164452552796 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 39.425, | |
| "epoch": 1.2452741020793952, | |
| "format_reward": 0.0, | |
| "grad_norm": 19.655460357666016, | |
| "image_reward": 0.271282958984375, | |
| "kl": 1.0409250572323798, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0411, | |
| "reward": 0.7604422211647034, | |
| "reward_std": 0.6456888254731894, | |
| "rewards/reward_func": 0.7604422211647034, | |
| "step": 5270, | |
| "toxic_reward": 3.7677977979183197 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 59.775, | |
| "epoch": 1.2476370510396975, | |
| "format_reward": -0.25, | |
| "grad_norm": 0.5878366827964783, | |
| "image_reward": 0.2589070647954941, | |
| "kl": 0.16051149740815163, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0852, | |
| "reward": 0.39556344896554946, | |
| "reward_std": 1.1551922081038355, | |
| "rewards/reward_func": 0.39556344896554946, | |
| "step": 5280, | |
| "toxic_reward": 3.390644001960754 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 42.275, | |
| "epoch": 1.25, | |
| "format_reward": 0.0, | |
| "grad_norm": 8.206055641174316, | |
| "image_reward": 0.275860595703125, | |
| "kl": 0.4969006285071373, | |
| "learning_rate": 5e-06, | |
| "loss": -0.115, | |
| "reward": 0.4857667863368988, | |
| "reward_std": 0.8739027962088585, | |
| "rewards/reward_func": 0.4857667863368988, | |
| "step": 5290, | |
| "toxic_reward": 4.016790902614593 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 48.7, | |
| "epoch": 1.2523629489603025, | |
| "format_reward": 0.0, | |
| "grad_norm": 3.513704299926758, | |
| "image_reward": 0.24937744140625, | |
| "kl": 0.14417755380272865, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0317, | |
| "reward": 0.48732776641845704, | |
| "reward_std": 0.8942459903657436, | |
| "rewards/reward_func": 0.48732776641845704, | |
| "step": 5300, | |
| "toxic_reward": 4.074605274200439 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 46.7, | |
| "epoch": 1.2547258979206048, | |
| "format_reward": 0.0, | |
| "grad_norm": 3.694108724594116, | |
| "image_reward": 0.2640960693359375, | |
| "kl": 0.21989786028862, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0552, | |
| "reward": 0.20011116266250611, | |
| "reward_std": 0.9783342686016112, | |
| "rewards/reward_func": 0.20011116266250611, | |
| "step": 5310, | |
| "toxic_reward": 3.337161436676979 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 61.15, | |
| "epoch": 1.2570888468809074, | |
| "format_reward": -0.25, | |
| "grad_norm": 1.8417941331863403, | |
| "image_reward": 0.23155619353055953, | |
| "kl": 4.248336365818977, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0877, | |
| "reward": 0.23556498885154725, | |
| "reward_std": 0.9007356996648014, | |
| "rewards/reward_func": 0.23556498885154725, | |
| "step": 5320, | |
| "toxic_reward": 4.5890906810760494 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 50.475, | |
| "epoch": 1.2594517958412097, | |
| "format_reward": 0.0, | |
| "grad_norm": 4.823044300079346, | |
| "image_reward": 0.2513310745358467, | |
| "kl": 4.037289990484714, | |
| "learning_rate": 5e-06, | |
| "loss": 0.082, | |
| "reward": 0.8450765609741211, | |
| "reward_std": 0.8255521267652511, | |
| "rewards/reward_func": 0.8450765609741211, | |
| "step": 5330, | |
| "toxic_reward": 4.287392568588257 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 40.975, | |
| "epoch": 1.2618147448015122, | |
| "format_reward": 0.0, | |
| "grad_norm": 4.315946578979492, | |
| "image_reward": 0.2459075927734375, | |
| "kl": 0.3113373316824436, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0475, | |
| "reward": 0.645756970345974, | |
| "reward_std": 0.7255122657865286, | |
| "rewards/reward_func": 0.645756970345974, | |
| "step": 5340, | |
| "toxic_reward": 4.189401495456695 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 39.7, | |
| "epoch": 1.2641776937618148, | |
| "format_reward": 0.0, | |
| "grad_norm": 3.4027810096740723, | |
| "image_reward": 0.2551483154296875, | |
| "kl": 0.4323126286268234, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0048, | |
| "reward": 0.5660954803228379, | |
| "reward_std": 0.6791210256516933, | |
| "rewards/reward_func": 0.5660954803228379, | |
| "step": 5350, | |
| "toxic_reward": 3.2965795576572416 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 35.55, | |
| "epoch": 1.2665406427221173, | |
| "format_reward": 0.0, | |
| "grad_norm": 1.9737337827682495, | |
| "image_reward": 0.2604766845703125, | |
| "kl": 0.8922965943813324, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0886, | |
| "reward": 0.37445068359375, | |
| "reward_std": 0.7902419693768025, | |
| "rewards/reward_func": 0.37445068359375, | |
| "step": 5360, | |
| "toxic_reward": 3.6073597192764284 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 50.975, | |
| "epoch": 1.2689035916824196, | |
| "format_reward": -0.25, | |
| "grad_norm": 5.368748188018799, | |
| "image_reward": 0.2573964446783066, | |
| "kl": 0.8937133550643921, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0345, | |
| "reward": 1.1729332506656647, | |
| "reward_std": 1.3139135614037514, | |
| "rewards/reward_func": 1.1729332506656647, | |
| "step": 5370, | |
| "toxic_reward": 4.428536581993103 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 47.375, | |
| "epoch": 1.2712665406427222, | |
| "format_reward": 0.0, | |
| "grad_norm": 2.3669607639312744, | |
| "image_reward": 0.242498779296875, | |
| "kl": 0.6131832510232925, | |
| "learning_rate": 5e-06, | |
| "loss": -0.1065, | |
| "reward": 0.3124019861221313, | |
| "reward_std": 0.8398781210184098, | |
| "rewards/reward_func": 0.3124019861221313, | |
| "step": 5380, | |
| "toxic_reward": 3.9513532400131224 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 62.325, | |
| "epoch": 1.2736294896030245, | |
| "format_reward": 0.0, | |
| "grad_norm": 3.6428773403167725, | |
| "image_reward": 0.2564788818359375, | |
| "kl": 0.983223095536232, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0332, | |
| "reward": 0.8021515548229218, | |
| "reward_std": 0.8680705142207443, | |
| "rewards/reward_func": 0.8021515548229218, | |
| "step": 5390, | |
| "toxic_reward": 3.7623249292373657 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 41.375, | |
| "epoch": 1.275992438563327, | |
| "format_reward": 0.0, | |
| "grad_norm": 22.341930389404297, | |
| "image_reward": 0.25401458740234373, | |
| "kl": 0.7686945527791977, | |
| "learning_rate": 5e-06, | |
| "loss": 0.021, | |
| "reward": 0.18261390328407287, | |
| "reward_std": 0.39404432671144607, | |
| "rewards/reward_func": 0.18261390328407287, | |
| "step": 5400, | |
| "toxic_reward": 3.986022639274597 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 43.075, | |
| "epoch": 1.2783553875236295, | |
| "format_reward": -0.5, | |
| "grad_norm": 11.878053665161133, | |
| "image_reward": 0.24981587678194045, | |
| "kl": 0.4886711150407791, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0214, | |
| "reward": 0.06913218498229981, | |
| "reward_std": 1.5105109971016646, | |
| "rewards/reward_func": 0.06913218498229981, | |
| "step": 5410, | |
| "toxic_reward": 4.118505048751831 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 43.65, | |
| "epoch": 1.280718336483932, | |
| "format_reward": -0.25, | |
| "grad_norm": 7.851999759674072, | |
| "image_reward": 0.25881449431180953, | |
| "kl": 0.6457854598760605, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0958, | |
| "reward": 0.12992151379585265, | |
| "reward_std": 1.303325356543064, | |
| "rewards/reward_func": 0.12992151379585265, | |
| "step": 5420, | |
| "toxic_reward": 4.377404046058655 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 39.9, | |
| "epoch": 1.2830812854442344, | |
| "format_reward": -0.25, | |
| "grad_norm": 25.7547550201416, | |
| "image_reward": 0.2694793701171875, | |
| "kl": 1.677524197101593, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0859, | |
| "reward": 0.34250465631484983, | |
| "reward_std": 1.0538076907396317, | |
| "rewards/reward_func": 0.34250465631484983, | |
| "step": 5430, | |
| "toxic_reward": 4.271343016624451 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 46.875, | |
| "epoch": 1.285444234404537, | |
| "format_reward": 0.0, | |
| "grad_norm": 1.9158964157104492, | |
| "image_reward": 0.2666290283203125, | |
| "kl": 0.5966441169381141, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0566, | |
| "reward": 0.4593892157077789, | |
| "reward_std": 0.6576637156307698, | |
| "rewards/reward_func": 0.4593892157077789, | |
| "step": 5440, | |
| "toxic_reward": 4.204905700683594 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 47.525, | |
| "epoch": 1.2878071833648392, | |
| "format_reward": 0.0, | |
| "grad_norm": 1.6007134914398193, | |
| "image_reward": 0.2621429443359375, | |
| "kl": 1.1984394997358323, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0486, | |
| "reward": 0.25084500312805175, | |
| "reward_std": 1.5825427711009978, | |
| "rewards/reward_func": 0.25084500312805175, | |
| "step": 5450, | |
| "toxic_reward": 3.685545027256012 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 38.45, | |
| "epoch": 1.2901701323251418, | |
| "format_reward": -0.25, | |
| "grad_norm": 25.30818748474121, | |
| "image_reward": 0.2647552490234375, | |
| "kl": 6.056701734662056, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0599, | |
| "reward": 0.25077282190322875, | |
| "reward_std": 1.234234382212162, | |
| "rewards/reward_func": 0.25077282190322875, | |
| "step": 5460, | |
| "toxic_reward": 4.593598937988281 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 43.825, | |
| "epoch": 1.2925330812854443, | |
| "format_reward": 0.0, | |
| "grad_norm": 16.34868812561035, | |
| "image_reward": 0.2645416259765625, | |
| "kl": 1.6588621526956557, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0054, | |
| "reward": 0.5463581264019013, | |
| "reward_std": 0.7020838841795921, | |
| "rewards/reward_func": 0.5463581264019013, | |
| "step": 5470, | |
| "toxic_reward": 4.367759561538696 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 57.5, | |
| "epoch": 1.2948960302457466, | |
| "format_reward": 0.0, | |
| "grad_norm": 19.92365264892578, | |
| "image_reward": 0.235223388671875, | |
| "kl": 4.412905436754227, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0277, | |
| "reward": 0.301082968711853, | |
| "reward_std": 0.5573954021558165, | |
| "rewards/reward_func": 0.301082968711853, | |
| "step": 5480, | |
| "toxic_reward": 4.563822269439697 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 48.45, | |
| "epoch": 1.2972589792060492, | |
| "format_reward": -0.25, | |
| "grad_norm": 15.039924621582031, | |
| "image_reward": 0.2686960846185684, | |
| "kl": 1.0792655169963836, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0051, | |
| "reward": -0.10340776294469833, | |
| "reward_std": 1.0451893661171199, | |
| "rewards/reward_func": -0.10340776294469833, | |
| "step": 5490, | |
| "toxic_reward": 3.9914595246315003 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 49.675, | |
| "epoch": 1.2996219281663515, | |
| "format_reward": 0.0, | |
| "grad_norm": 20.686878204345703, | |
| "image_reward": 0.236834716796875, | |
| "kl": 0.6705092936754227, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0657, | |
| "reward": 0.7118561029434204, | |
| "reward_std": 0.6682203419506549, | |
| "rewards/reward_func": 0.7118561029434204, | |
| "step": 5500, | |
| "toxic_reward": 4.56234884262085 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 45.45, | |
| "epoch": 1.301984877126654, | |
| "format_reward": -0.25, | |
| "grad_norm": 9.817633628845215, | |
| "image_reward": 0.2553232818841934, | |
| "kl": 0.9995080977678299, | |
| "learning_rate": 5e-06, | |
| "loss": -0.1257, | |
| "reward": 0.27432467341423034, | |
| "reward_std": 1.3118387231603266, | |
| "rewards/reward_func": 0.27432467341423034, | |
| "step": 5510, | |
| "toxic_reward": 4.407331418991089 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 51.6, | |
| "epoch": 1.3043478260869565, | |
| "format_reward": 0.0, | |
| "grad_norm": 11.929862976074219, | |
| "image_reward": 0.2562835693359375, | |
| "kl": 11.621018621325494, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0747, | |
| "reward": 0.29978330433368683, | |
| "reward_std": 0.5768878096714616, | |
| "rewards/reward_func": 0.29978330433368683, | |
| "step": 5520, | |
| "toxic_reward": 3.9843369722366333 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 44.375, | |
| "epoch": 1.306710775047259, | |
| "format_reward": -0.5, | |
| "grad_norm": 54.56308364868164, | |
| "image_reward": 0.25370279848575594, | |
| "kl": 15.30106150507927, | |
| "learning_rate": 5e-06, | |
| "loss": 0.059, | |
| "reward": -0.3965187072753906, | |
| "reward_std": 1.5167736381292343, | |
| "rewards/reward_func": -0.3965187072753906, | |
| "step": 5530, | |
| "toxic_reward": 4.435292959213257 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 50.35, | |
| "epoch": 1.3090737240075614, | |
| "format_reward": 0.0, | |
| "grad_norm": 3.027195930480957, | |
| "image_reward": 0.30064697265625, | |
| "kl": 1.0911591410636903, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0513, | |
| "reward": 0.3132080137729645, | |
| "reward_std": 0.5429811116307974, | |
| "rewards/reward_func": 0.3132080137729645, | |
| "step": 5540, | |
| "toxic_reward": 4.454129576683044 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 42.2, | |
| "epoch": 1.311436672967864, | |
| "format_reward": 0.0, | |
| "grad_norm": 14.916865348815918, | |
| "image_reward": 0.2515268951654434, | |
| "kl": 5.700361841917038, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0014, | |
| "reward": 0.014350098371505738, | |
| "reward_std": 0.48063138537108896, | |
| "rewards/reward_func": 0.014350098371505738, | |
| "step": 5550, | |
| "toxic_reward": 4.399230480194092 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 43.975, | |
| "epoch": 1.3137996219281662, | |
| "format_reward": -0.25, | |
| "grad_norm": 17.978458404541016, | |
| "image_reward": 0.24458109587430954, | |
| "kl": 4.160827812552452, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0048, | |
| "reward": 0.29164408445358275, | |
| "reward_std": 0.9224476981908083, | |
| "rewards/reward_func": 0.29164408445358275, | |
| "step": 5560, | |
| "toxic_reward": 4.361080431938172 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 42.7, | |
| "epoch": 1.3161625708884688, | |
| "format_reward": 0.0, | |
| "grad_norm": 2.0734810829162598, | |
| "image_reward": 0.248583984375, | |
| "kl": 2.431508493423462, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0346, | |
| "reward": 0.49899758100509645, | |
| "reward_std": 0.9045591181144118, | |
| "rewards/reward_func": 0.49899758100509645, | |
| "step": 5570, | |
| "toxic_reward": 4.267354512214661 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 51.675, | |
| "epoch": 1.3185255198487713, | |
| "format_reward": -0.25, | |
| "grad_norm": 7.6361165046691895, | |
| "image_reward": 0.27701314240694047, | |
| "kl": 1.473704105615616, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0156, | |
| "reward": 0.5548757612705231, | |
| "reward_std": 0.9885425483807921, | |
| "rewards/reward_func": 0.5548757612705231, | |
| "step": 5580, | |
| "toxic_reward": 4.615298962593078 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 45.35, | |
| "epoch": 1.3208884688090738, | |
| "format_reward": 0.0, | |
| "grad_norm": 8.7377290725708, | |
| "image_reward": 0.2538330078125, | |
| "kl": 197.4472616136074, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1633, | |
| "reward": 0.6491668224334717, | |
| "reward_std": 0.6371353514492512, | |
| "rewards/reward_func": 0.6491668224334717, | |
| "step": 5590, | |
| "toxic_reward": 4.438857316970825 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 47.575, | |
| "epoch": 1.3232514177693762, | |
| "format_reward": 0.0, | |
| "grad_norm": 16.95717430114746, | |
| "image_reward": 0.23577117919921875, | |
| "kl": 3.680394399166107, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0084, | |
| "reward": 0.1440478801727295, | |
| "reward_std": 0.4425256311893463, | |
| "rewards/reward_func": 0.1440478801727295, | |
| "step": 5600, | |
| "toxic_reward": 4.603517079353333 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 55.25, | |
| "epoch": 1.3256143667296787, | |
| "format_reward": 0.0, | |
| "grad_norm": 12.732531547546387, | |
| "image_reward": 0.2568023681640625, | |
| "kl": 2.256978714466095, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0219, | |
| "reward": 0.040336894989013675, | |
| "reward_std": 0.6101976454257965, | |
| "rewards/reward_func": 0.040336894989013675, | |
| "step": 5610, | |
| "toxic_reward": 4.473554587364196 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 50.75, | |
| "epoch": 1.327977315689981, | |
| "format_reward": 0.0, | |
| "grad_norm": 14.701716423034668, | |
| "image_reward": 0.252197265625, | |
| "kl": 8.072559344768525, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0803, | |
| "reward": 0.8639614999294281, | |
| "reward_std": 1.0928052112460136, | |
| "rewards/reward_func": 0.8639614999294281, | |
| "step": 5620, | |
| "toxic_reward": 3.9843992233276366 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 46.8, | |
| "epoch": 1.3303402646502835, | |
| "format_reward": 0.0, | |
| "grad_norm": 10.12986946105957, | |
| "image_reward": 0.27263641357421875, | |
| "kl": 3.3592694640159606, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0652, | |
| "reward": 0.7886571228504181, | |
| "reward_std": 1.0737986475229264, | |
| "rewards/reward_func": 0.7886571228504181, | |
| "step": 5630, | |
| "toxic_reward": 3.668324041366577 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 37.95, | |
| "epoch": 1.332703213610586, | |
| "format_reward": 0.0, | |
| "grad_norm": 23.82711410522461, | |
| "image_reward": 0.2702301025390625, | |
| "kl": 12.466990399360657, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0401, | |
| "reward": 0.7557259559631347, | |
| "reward_std": 0.9376067817211151, | |
| "rewards/reward_func": 0.7557259559631347, | |
| "step": 5640, | |
| "toxic_reward": 4.100273895263672 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 46.625, | |
| "epoch": 1.3350661625708884, | |
| "format_reward": 0.0, | |
| "grad_norm": 7.783689975738525, | |
| "image_reward": 0.2736572265625, | |
| "kl": 9.604325413703918, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0135, | |
| "reward": 0.21887901425361633, | |
| "reward_std": 0.41371094444766643, | |
| "rewards/reward_func": 0.21887901425361633, | |
| "step": 5650, | |
| "toxic_reward": 4.314438569545746 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 54.125, | |
| "epoch": 1.337429111531191, | |
| "format_reward": 0.0, | |
| "grad_norm": 8.773420333862305, | |
| "image_reward": 0.2513946533203125, | |
| "kl": 9.296885073184967, | |
| "learning_rate": 5e-06, | |
| "loss": -0.095, | |
| "reward": 1.1378837168216704, | |
| "reward_std": 0.818750386312604, | |
| "rewards/reward_func": 1.1378837168216704, | |
| "step": 5660, | |
| "toxic_reward": 4.522894716262817 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 50.9, | |
| "epoch": 1.3397920604914935, | |
| "format_reward": -0.25, | |
| "grad_norm": 14.245692253112793, | |
| "image_reward": 0.24806925505399705, | |
| "kl": 6.753875517845154, | |
| "learning_rate": 5e-06, | |
| "loss": 0.021, | |
| "reward": 0.6894584268331527, | |
| "reward_std": 1.542804090678692, | |
| "rewards/reward_func": 0.6894584268331527, | |
| "step": 5670, | |
| "toxic_reward": 3.9723486423492433 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 45.075, | |
| "epoch": 1.3421550094517958, | |
| "format_reward": -0.25, | |
| "grad_norm": 24.408653259277344, | |
| "image_reward": 0.25640462189912794, | |
| "kl": 11.201071047782898, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0492, | |
| "reward": -0.00045427381992340087, | |
| "reward_std": 1.027926566079259, | |
| "rewards/reward_func": -0.00045427381992340087, | |
| "step": 5680, | |
| "toxic_reward": 3.891611325740814 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 44.65, | |
| "epoch": 1.3445179584120983, | |
| "format_reward": 0.0, | |
| "grad_norm": 1.8934930562973022, | |
| "image_reward": 0.25365397036075593, | |
| "kl": 4.947464096546173, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0784, | |
| "reward": 0.5487861603498458, | |
| "reward_std": 0.7702463563531637, | |
| "rewards/reward_func": 0.5487861603498458, | |
| "step": 5690, | |
| "toxic_reward": 3.977373069524765 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 49.5, | |
| "epoch": 1.3468809073724008, | |
| "format_reward": -0.25, | |
| "grad_norm": 9.322084426879883, | |
| "image_reward": 0.27449544221162797, | |
| "kl": 2.6174604117870333, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0094, | |
| "reward": 0.39208410382270814, | |
| "reward_std": 1.61805320084095, | |
| "rewards/reward_func": 0.39208410382270814, | |
| "step": 5700, | |
| "toxic_reward": 4.174729800224304 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 37.25, | |
| "epoch": 1.3492438563327032, | |
| "format_reward": 0.0, | |
| "grad_norm": 12.4689302444458, | |
| "image_reward": 0.2571044921875, | |
| "kl": 3.102731728553772, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0306, | |
| "reward": 0.24812114238739014, | |
| "reward_std": 0.6699782099574805, | |
| "rewards/reward_func": 0.24812114238739014, | |
| "step": 5710, | |
| "toxic_reward": 3.692942750453949 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 42.425, | |
| "epoch": 1.3516068052930057, | |
| "format_reward": 0.0, | |
| "grad_norm": 16.464384078979492, | |
| "image_reward": 0.2592987060546875, | |
| "kl": 41.42341262102127, | |
| "learning_rate": 5e-06, | |
| "loss": -0.1787, | |
| "reward": 0.9784101039171219, | |
| "reward_std": 1.2197245783172548, | |
| "rewards/reward_func": 0.9784101039171219, | |
| "step": 5720, | |
| "toxic_reward": 3.5939176797866823 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 41.775, | |
| "epoch": 1.353969754253308, | |
| "format_reward": 0.0, | |
| "grad_norm": 14.272177696228027, | |
| "image_reward": 0.24337158203125, | |
| "kl": 3.5139986366033553, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0502, | |
| "reward": 0.3250808596611023, | |
| "reward_std": 0.6109479434788228, | |
| "rewards/reward_func": 0.3250808596611023, | |
| "step": 5730, | |
| "toxic_reward": 4.485757279396057 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 48.6, | |
| "epoch": 1.3563327032136105, | |
| "format_reward": -0.25, | |
| "grad_norm": 8.131665229797363, | |
| "image_reward": 0.2514506012201309, | |
| "kl": 5.592804127931595, | |
| "learning_rate": 5e-06, | |
| "loss": -0.015, | |
| "reward": 0.3052162408828735, | |
| "reward_std": 1.201428510248661, | |
| "rewards/reward_func": 0.3052162408828735, | |
| "step": 5740, | |
| "toxic_reward": 4.2779217004776005 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 44.275, | |
| "epoch": 1.358695652173913, | |
| "format_reward": -0.5, | |
| "grad_norm": 15.648195266723633, | |
| "image_reward": 0.266064453125, | |
| "kl": 1.6513773769140243, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0092, | |
| "reward": -0.20032901763916017, | |
| "reward_std": 1.7222102746367454, | |
| "rewards/reward_func": -0.20032901763916017, | |
| "step": 5750, | |
| "toxic_reward": 4.259865856170654 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 56.0, | |
| "epoch": 1.3610586011342156, | |
| "format_reward": 0.0, | |
| "grad_norm": 10.893685340881348, | |
| "image_reward": 0.2588653564453125, | |
| "kl": 4.073341834545135, | |
| "learning_rate": 5e-06, | |
| "loss": 0.013, | |
| "reward": 0.916795802116394, | |
| "reward_std": 0.8524092853069305, | |
| "rewards/reward_func": 0.916795802116394, | |
| "step": 5760, | |
| "toxic_reward": 4.560049152374267 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 52.75, | |
| "epoch": 1.363421550094518, | |
| "format_reward": 0.0, | |
| "grad_norm": 3.932856798171997, | |
| "image_reward": 0.2459381103515625, | |
| "kl": 2.5305844336748122, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0338, | |
| "reward": 0.5017880856990814, | |
| "reward_std": 0.7364757396280766, | |
| "rewards/reward_func": 0.5017880856990814, | |
| "step": 5770, | |
| "toxic_reward": 4.69781801700592 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 50.45, | |
| "epoch": 1.3657844990548205, | |
| "format_reward": 0.0, | |
| "grad_norm": 1.3704707622528076, | |
| "image_reward": 0.2677764892578125, | |
| "kl": 1.8369466960430145, | |
| "learning_rate": 5e-06, | |
| "loss": 0.107, | |
| "reward": 0.7046410620212555, | |
| "reward_std": 0.9321951523423195, | |
| "rewards/reward_func": 0.7046410620212555, | |
| "step": 5780, | |
| "toxic_reward": 4.073530220985413 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 45.8, | |
| "epoch": 1.3681474480151228, | |
| "format_reward": -0.25, | |
| "grad_norm": 2.7950003147125244, | |
| "image_reward": 0.268048095703125, | |
| "kl": 3.3737578272819517, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0251, | |
| "reward": 0.1121946096420288, | |
| "reward_std": 1.2333336278796196, | |
| "rewards/reward_func": 0.1121946096420288, | |
| "step": 5790, | |
| "toxic_reward": 4.301294279098511 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 44.9, | |
| "epoch": 1.3705103969754253, | |
| "format_reward": -0.25, | |
| "grad_norm": 10.600517272949219, | |
| "image_reward": 0.24301045686006545, | |
| "kl": 5.24166065454483, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0188, | |
| "reward": -0.04525191783905029, | |
| "reward_std": 1.1580330106429755, | |
| "rewards/reward_func": -0.04525191783905029, | |
| "step": 5800, | |
| "toxic_reward": 3.95846186876297 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 52.6, | |
| "epoch": 1.3728733459357279, | |
| "format_reward": -0.5, | |
| "grad_norm": 22.423450469970703, | |
| "image_reward": 0.25951487123966216, | |
| "kl": 12.250067234039307, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0091, | |
| "reward": 0.0036635279655456545, | |
| "reward_std": 1.6421116095036268, | |
| "rewards/reward_func": 0.0036635279655456545, | |
| "step": 5810, | |
| "toxic_reward": 4.407265400886535 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 45.825, | |
| "epoch": 1.3752362948960302, | |
| "format_reward": 0.0, | |
| "grad_norm": 3.5839083194732666, | |
| "image_reward": 0.2497711181640625, | |
| "kl": 7.638963532447815, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0623, | |
| "reward": 0.36217689514160156, | |
| "reward_std": 1.057050895690918, | |
| "rewards/reward_func": 0.36217689514160156, | |
| "step": 5820, | |
| "toxic_reward": 3.8359474897384644 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 42.075, | |
| "epoch": 1.3775992438563327, | |
| "format_reward": 0.0, | |
| "grad_norm": 18.257360458374023, | |
| "image_reward": 0.242034912109375, | |
| "kl": 1406.6461040258407, | |
| "learning_rate": 5e-06, | |
| "loss": 0.3409, | |
| "reward": 0.35478733479976654, | |
| "reward_std": 0.5706452172249555, | |
| "rewards/reward_func": 0.35478733479976654, | |
| "step": 5830, | |
| "toxic_reward": 3.5973093271255494 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 39.55, | |
| "epoch": 1.3799621928166352, | |
| "format_reward": 0.0, | |
| "grad_norm": 4.36010217666626, | |
| "image_reward": 0.24814300537109374, | |
| "kl": 117.68144319057464, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0637, | |
| "reward": 0.3609376668930054, | |
| "reward_std": 0.6294937739614397, | |
| "rewards/reward_func": 0.3609376668930054, | |
| "step": 5840, | |
| "toxic_reward": 4.1664423704147335 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 38.65, | |
| "epoch": 1.3823251417769375, | |
| "format_reward": 0.0, | |
| "grad_norm": 14.234587669372559, | |
| "image_reward": 0.254736328125, | |
| "kl": 4.648911118507385, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0132, | |
| "reward": -0.4629164904356003, | |
| "reward_std": 0.8635658169165253, | |
| "rewards/reward_func": -0.4629164904356003, | |
| "step": 5850, | |
| "toxic_reward": 3.804247868061066 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 41.825, | |
| "epoch": 1.38468809073724, | |
| "format_reward": -0.25, | |
| "grad_norm": 9.249091148376465, | |
| "image_reward": 0.25889790803194046, | |
| "kl": 8.51909922361374, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0656, | |
| "reward": 0.20021165013313294, | |
| "reward_std": 1.1463583020493389, | |
| "rewards/reward_func": 0.20021165013313294, | |
| "step": 5860, | |
| "toxic_reward": 4.298932027816773 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 47.2, | |
| "epoch": 1.3870510396975426, | |
| "format_reward": 0.0, | |
| "grad_norm": 2.6423728466033936, | |
| "image_reward": 0.2569976806640625, | |
| "kl": 114.30452468395234, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0235, | |
| "reward": 0.7682538509368897, | |
| "reward_std": 0.9905061937868596, | |
| "rewards/reward_func": 0.7682538509368897, | |
| "step": 5870, | |
| "toxic_reward": 4.355731654167175 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 57.75, | |
| "epoch": 1.389413988657845, | |
| "format_reward": 0.0, | |
| "grad_norm": 6.193624496459961, | |
| "image_reward": 0.244573974609375, | |
| "kl": 27.17574143409729, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0466, | |
| "reward": 0.4181258499622345, | |
| "reward_std": 0.7019964678213, | |
| "rewards/reward_func": 0.4181258499622345, | |
| "step": 5880, | |
| "toxic_reward": 4.30544638633728 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 51.875, | |
| "epoch": 1.3917769376181475, | |
| "format_reward": 0.0, | |
| "grad_norm": 14.119263648986816, | |
| "image_reward": 0.24458719789981842, | |
| "kl": 22.515019488334655, | |
| "learning_rate": 5e-06, | |
| "loss": 0.022, | |
| "reward": 0.2917088523507118, | |
| "reward_std": 0.7304708318784833, | |
| "rewards/reward_func": 0.2917088523507118, | |
| "step": 5890, | |
| "toxic_reward": 4.011698079109192 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 49.95, | |
| "epoch": 1.3941398865784498, | |
| "format_reward": 0.0, | |
| "grad_norm": 203.53358459472656, | |
| "image_reward": 0.266705322265625, | |
| "kl": 130.7643344759941, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0925, | |
| "reward": 0.8561399459838868, | |
| "reward_std": 0.7673989269882441, | |
| "rewards/reward_func": 0.8561399459838868, | |
| "step": 5900, | |
| "toxic_reward": 4.3426886081695555 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 45.25, | |
| "epoch": 1.3965028355387523, | |
| "format_reward": 0.0, | |
| "grad_norm": 1.8555150032043457, | |
| "image_reward": 0.26331787109375, | |
| "kl": 25.51781210899353, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0759, | |
| "reward": 0.14676390141248702, | |
| "reward_std": 0.31099242605268956, | |
| "rewards/reward_func": 0.14676390141248702, | |
| "step": 5910, | |
| "toxic_reward": 4.393804085254669 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 44.375, | |
| "epoch": 1.3988657844990549, | |
| "format_reward": 0.0, | |
| "grad_norm": 1.794382095336914, | |
| "image_reward": 0.2638519287109375, | |
| "kl": 75.26498790383339, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0105, | |
| "reward": 0.0749910295009613, | |
| "reward_std": 0.9545040905475617, | |
| "rewards/reward_func": 0.0749910295009613, | |
| "step": 5920, | |
| "toxic_reward": 4.043283843994141 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 43.6, | |
| "epoch": 1.4012287334593574, | |
| "format_reward": 0.0, | |
| "grad_norm": 10.70461654663086, | |
| "image_reward": 0.26204325407743456, | |
| "kl": 6.424372181296349, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0192, | |
| "reward": 0.3424019992351532, | |
| "reward_std": 0.8532586313784123, | |
| "rewards/reward_func": 0.3424019992351532, | |
| "step": 5930, | |
| "toxic_reward": 3.697358027100563 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 47.7, | |
| "epoch": 1.4035916824196597, | |
| "format_reward": 0.0, | |
| "grad_norm": 15.443364143371582, | |
| "image_reward": 0.25388997346162795, | |
| "kl": 2.7157889783382414, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0194, | |
| "reward": 0.8944644808769227, | |
| "reward_std": 0.907353313267231, | |
| "rewards/reward_func": 0.8944644808769227, | |
| "step": 5940, | |
| "toxic_reward": 4.2960577487945555 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 46.625, | |
| "epoch": 1.4059546313799622, | |
| "format_reward": 0.0, | |
| "grad_norm": 9.80057144165039, | |
| "image_reward": 0.2643798828125, | |
| "kl": 3.2323968172073365, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0714, | |
| "reward": 0.7592375218868256, | |
| "reward_std": 1.0486908692866563, | |
| "rewards/reward_func": 0.7592375218868256, | |
| "step": 5950, | |
| "toxic_reward": 4.236204934120178 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 47.45, | |
| "epoch": 1.4083175803402646, | |
| "format_reward": 0.0, | |
| "grad_norm": 32.608253479003906, | |
| "image_reward": 0.2897979736328125, | |
| "kl": 0.868326199054718, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0072, | |
| "reward": 0.13819260597229005, | |
| "reward_std": 0.9927060969173909, | |
| "rewards/reward_func": 0.13819260597229005, | |
| "step": 5960, | |
| "toxic_reward": 4.137164163589477 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 49.875, | |
| "epoch": 1.410680529300567, | |
| "format_reward": -0.5, | |
| "grad_norm": 3.4970862865448, | |
| "image_reward": 0.254620361328125, | |
| "kl": 1.8707860291004181, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0251, | |
| "reward": -0.12371634542942048, | |
| "reward_std": 1.6602010667324065, | |
| "rewards/reward_func": -0.12371634542942048, | |
| "step": 5970, | |
| "toxic_reward": 4.560637950897217 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 47.675, | |
| "epoch": 1.4130434782608696, | |
| "format_reward": 0.0, | |
| "grad_norm": 18.951919555664062, | |
| "image_reward": 0.258807373046875, | |
| "kl": 1.7996377795934677, | |
| "learning_rate": 5e-06, | |
| "loss": 0.012, | |
| "reward": 0.7075730919837951, | |
| "reward_std": 0.9400279764086008, | |
| "rewards/reward_func": 0.7075730919837951, | |
| "step": 5980, | |
| "toxic_reward": 3.758779287338257 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 44.025, | |
| "epoch": 1.4154064272211722, | |
| "format_reward": 0.0, | |
| "grad_norm": 5.1872663497924805, | |
| "image_reward": 0.2968048095703125, | |
| "kl": 4.290337887406349, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0076, | |
| "reward": 0.3689495801925659, | |
| "reward_std": 0.5776140118017793, | |
| "rewards/reward_func": 0.3689495801925659, | |
| "step": 5990, | |
| "toxic_reward": 4.157499670982361 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 50.225, | |
| "epoch": 1.4177693761814745, | |
| "format_reward": 0.0, | |
| "grad_norm": 4.382224082946777, | |
| "image_reward": 0.24061279296875, | |
| "kl": 3.217728292942047, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0161, | |
| "reward": 0.37828874588012695, | |
| "reward_std": 0.3327252045273781, | |
| "rewards/reward_func": 0.37828874588012695, | |
| "step": 6000, | |
| "toxic_reward": 4.625493478775025 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 40.625, | |
| "epoch": 1.420132325141777, | |
| "format_reward": 0.0, | |
| "grad_norm": 17.742074966430664, | |
| "image_reward": 0.260565185546875, | |
| "kl": 3.087987443804741, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0098, | |
| "reward": 0.22000501453876495, | |
| "reward_std": 0.6759357416536659, | |
| "rewards/reward_func": 0.22000501453876495, | |
| "step": 6010, | |
| "toxic_reward": 4.1144504189491276 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 51.725, | |
| "epoch": 1.4224952741020793, | |
| "format_reward": -0.25, | |
| "grad_norm": 23.140647888183594, | |
| "image_reward": 0.2770843505859375, | |
| "kl": 1.3970532178878785, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0249, | |
| "reward": -0.045973950624465944, | |
| "reward_std": 1.367066621594131, | |
| "rewards/reward_func": -0.045973950624465944, | |
| "step": 6020, | |
| "toxic_reward": 4.268449664115906 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 52.125, | |
| "epoch": 1.4248582230623819, | |
| "format_reward": -0.5, | |
| "grad_norm": 63.63026428222656, | |
| "image_reward": 0.2500905364751816, | |
| "kl": 1.5568452209234238, | |
| "learning_rate": 5e-06, | |
| "loss": 0.032, | |
| "reward": 0.36329651772975924, | |
| "reward_std": 2.2665354389697314, | |
| "rewards/reward_func": 0.36329651772975924, | |
| "step": 6030, | |
| "toxic_reward": 3.9466104745864867 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 52.15, | |
| "epoch": 1.4272211720226844, | |
| "format_reward": -0.25, | |
| "grad_norm": 3.4662349224090576, | |
| "image_reward": 0.2569427490234375, | |
| "kl": 2.127922511100769, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0478, | |
| "reward": 0.4287997782230377, | |
| "reward_std": 1.335706689953804, | |
| "rewards/reward_func": 0.4287997782230377, | |
| "step": 6040, | |
| "toxic_reward": 4.417151093482971 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 51.775, | |
| "epoch": 1.4295841209829867, | |
| "format_reward": 0.0, | |
| "grad_norm": 6.905588626861572, | |
| "image_reward": 0.2496734619140625, | |
| "kl": 2.1467004269361496, | |
| "learning_rate": 5e-06, | |
| "loss": -0.1361, | |
| "reward": 0.6416638314723968, | |
| "reward_std": 0.6212250446900726, | |
| "rewards/reward_func": 0.6416638314723968, | |
| "step": 6050, | |
| "toxic_reward": 4.481846666336059 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 44.45, | |
| "epoch": 1.4319470699432892, | |
| "format_reward": 0.0, | |
| "grad_norm": 7.034083843231201, | |
| "image_reward": 0.2545166015625, | |
| "kl": 1.5230970159173012, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0407, | |
| "reward": 0.29611208438873293, | |
| "reward_std": 0.7949410590808839, | |
| "rewards/reward_func": 0.29611208438873293, | |
| "step": 6060, | |
| "toxic_reward": 4.335282778739929 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 45.75, | |
| "epoch": 1.4343100189035916, | |
| "format_reward": 0.0, | |
| "grad_norm": 3.7928450107574463, | |
| "image_reward": 0.24527740478515625, | |
| "kl": 0.8901469498872757, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0367, | |
| "reward": 0.26578280329704285, | |
| "reward_std": 1.3428313750773668, | |
| "rewards/reward_func": 0.26578280329704285, | |
| "step": 6070, | |
| "toxic_reward": 3.388633108139038 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 69.625, | |
| "epoch": 1.436672967863894, | |
| "format_reward": -0.25, | |
| "grad_norm": 19.84122085571289, | |
| "image_reward": 0.2548517853021622, | |
| "kl": 1.0234291791915893, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1725, | |
| "reward": 0.31002968549728394, | |
| "reward_std": 1.8640546321868896, | |
| "rewards/reward_func": 0.31002968549728394, | |
| "step": 6080, | |
| "toxic_reward": 3.8593465805053713 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 50.6, | |
| "epoch": 1.4390359168241966, | |
| "format_reward": -0.25, | |
| "grad_norm": 10.626410484313965, | |
| "image_reward": 0.2676523834466934, | |
| "kl": 5.208069609105587, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0794, | |
| "reward": 0.2428498387336731, | |
| "reward_std": 1.3197494292631746, | |
| "rewards/reward_func": 0.2428498387336731, | |
| "step": 6090, | |
| "toxic_reward": 4.562512469291687 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 43.8, | |
| "epoch": 1.4413988657844992, | |
| "format_reward": 0.0, | |
| "grad_norm": 11.22333812713623, | |
| "image_reward": 0.2790537506341934, | |
| "kl": 4.089116859436035, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0053, | |
| "reward": 1.0973919004201889, | |
| "reward_std": 0.9867459360510111, | |
| "rewards/reward_func": 1.0973919004201889, | |
| "step": 6100, | |
| "toxic_reward": 4.121850895881653 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 40.675, | |
| "epoch": 1.4437618147448015, | |
| "format_reward": 0.0, | |
| "grad_norm": 2.594348907470703, | |
| "image_reward": 0.24527740478515625, | |
| "kl": 2.355099043250084, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0302, | |
| "reward": 0.2879053592681885, | |
| "reward_std": 1.3369514867663383, | |
| "rewards/reward_func": 0.2879053592681885, | |
| "step": 6110, | |
| "toxic_reward": 3.482189404964447 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 43.625, | |
| "epoch": 1.446124763705104, | |
| "format_reward": 0.0, | |
| "grad_norm": 2.740832805633545, | |
| "image_reward": 0.27071533203125, | |
| "kl": 1.8061291784048081, | |
| "learning_rate": 5e-06, | |
| "loss": 0.098, | |
| "reward": 0.3818982481956482, | |
| "reward_std": 0.8427915960550308, | |
| "rewards/reward_func": 0.3818982481956482, | |
| "step": 6120, | |
| "toxic_reward": 4.095608282089233 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 45.05, | |
| "epoch": 1.4484877126654063, | |
| "format_reward": 0.0, | |
| "grad_norm": 16.700410842895508, | |
| "image_reward": 0.2543426513671875, | |
| "kl": 1.4419916868209839, | |
| "learning_rate": 5e-06, | |
| "loss": -0.056, | |
| "reward": 0.8265678405761718, | |
| "reward_std": 0.835081409662962, | |
| "rewards/reward_func": 0.8265678405761718, | |
| "step": 6130, | |
| "toxic_reward": 4.317971038818359 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 39.75, | |
| "epoch": 1.4508506616257089, | |
| "format_reward": 0.0, | |
| "grad_norm": 5.467940330505371, | |
| "image_reward": 0.253302001953125, | |
| "kl": 1.128901758790016, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0416, | |
| "reward": 0.22405808568000793, | |
| "reward_std": 0.430261270259507, | |
| "rewards/reward_func": 0.22405808568000793, | |
| "step": 6140, | |
| "toxic_reward": 4.605859112739563 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 43.7, | |
| "epoch": 1.4532136105860114, | |
| "format_reward": 0.0, | |
| "grad_norm": 15.90230941772461, | |
| "image_reward": 0.261627197265625, | |
| "kl": 0.6474134013056755, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0453, | |
| "reward": 0.23209627866744995, | |
| "reward_std": 0.9918515108525753, | |
| "rewards/reward_func": 0.23209627866744995, | |
| "step": 6150, | |
| "toxic_reward": 4.06419689655304 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 52.85, | |
| "epoch": 1.455576559546314, | |
| "format_reward": 0.0, | |
| "grad_norm": 14.443000793457031, | |
| "image_reward": 0.2465087890625, | |
| "kl": 0.6866413161158562, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0889, | |
| "reward": 0.37001847475767136, | |
| "reward_std": 0.7742633601650596, | |
| "rewards/reward_func": 0.37001847475767136, | |
| "step": 6160, | |
| "toxic_reward": 4.076632690429688 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 46.775, | |
| "epoch": 1.4579395085066162, | |
| "format_reward": -0.25, | |
| "grad_norm": 16.315828323364258, | |
| "image_reward": 0.27055562287569046, | |
| "kl": 1.8430037647485733, | |
| "learning_rate": 5e-06, | |
| "loss": -0.005, | |
| "reward": 0.37416398525238037, | |
| "reward_std": 1.238390678167343, | |
| "rewards/reward_func": 0.37416398525238037, | |
| "step": 6170, | |
| "toxic_reward": 3.7685179471969605 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 51.725, | |
| "epoch": 1.4603024574669188, | |
| "format_reward": -0.25, | |
| "grad_norm": 9.808785438537598, | |
| "image_reward": 0.26315511018037796, | |
| "kl": 3.501179130375385, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0757, | |
| "reward": 0.4839250385761261, | |
| "reward_std": 1.0200102254748344, | |
| "rewards/reward_func": 0.4839250385761261, | |
| "step": 6180, | |
| "toxic_reward": 4.458306789398193 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 45.1, | |
| "epoch": 1.462665406427221, | |
| "format_reward": 0.0, | |
| "grad_norm": 6.254196643829346, | |
| "image_reward": 0.2431121826171875, | |
| "kl": 0.6629411533474923, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0243, | |
| "reward": 1.0291070997714997, | |
| "reward_std": 0.6561918726190925, | |
| "rewards/reward_func": 1.0291070997714997, | |
| "step": 6190, | |
| "toxic_reward": 4.277180218696595 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 41.075, | |
| "epoch": 1.4650283553875236, | |
| "format_reward": 0.0, | |
| "grad_norm": 4.544678211212158, | |
| "image_reward": 0.288299560546875, | |
| "kl": 0.5533515185117721, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0114, | |
| "reward": 0.09159567654132843, | |
| "reward_std": 0.6166084105148911, | |
| "rewards/reward_func": 0.09159567654132843, | |
| "step": 6200, | |
| "toxic_reward": 4.60030083656311 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 49.75, | |
| "epoch": 1.4673913043478262, | |
| "format_reward": 0.0, | |
| "grad_norm": 2.3730123043060303, | |
| "image_reward": 0.2605743408203125, | |
| "kl": 0.9568765789270401, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0904, | |
| "reward": 0.7965957373380661, | |
| "reward_std": 0.7220977865159511, | |
| "rewards/reward_func": 0.7965957373380661, | |
| "step": 6210, | |
| "toxic_reward": 3.7931410372257233 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 43.05, | |
| "epoch": 1.4697542533081285, | |
| "format_reward": 0.0, | |
| "grad_norm": 9.046610832214355, | |
| "image_reward": 0.25125885009765625, | |
| "kl": 0.966689832508564, | |
| "learning_rate": 5e-06, | |
| "loss": 0.006, | |
| "reward": 0.42708381414413454, | |
| "reward_std": 0.8918632004410029, | |
| "rewards/reward_func": 0.42708381414413454, | |
| "step": 6220, | |
| "toxic_reward": 3.811506199836731 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 41.225, | |
| "epoch": 1.472117202268431, | |
| "format_reward": -0.25, | |
| "grad_norm": 11.783949851989746, | |
| "image_reward": 0.2802093505859375, | |
| "kl": 1.5058857083320618, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1229, | |
| "reward": -0.2064610540866852, | |
| "reward_std": 1.2439106579869985, | |
| "rewards/reward_func": -0.2064610540866852, | |
| "step": 6230, | |
| "toxic_reward": 3.776739251613617 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 51.55, | |
| "epoch": 1.4744801512287333, | |
| "format_reward": -0.25, | |
| "grad_norm": 19.82095718383789, | |
| "image_reward": 0.2695220947265625, | |
| "kl": 4.01448056101799, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0224, | |
| "reward": 0.12153833210468293, | |
| "reward_std": 1.4096519321203231, | |
| "rewards/reward_func": 0.12153833210468293, | |
| "step": 6240, | |
| "toxic_reward": 3.5601022720336912 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 53.575, | |
| "epoch": 1.4768431001890359, | |
| "format_reward": 0.0, | |
| "grad_norm": 3.002202272415161, | |
| "image_reward": 0.2589111328125, | |
| "kl": 6.391136825084686, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0246, | |
| "reward": 0.8984602272510529, | |
| "reward_std": 0.8823632273823023, | |
| "rewards/reward_func": 0.8984602272510529, | |
| "step": 6250, | |
| "toxic_reward": 4.186536240577698 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 41.95, | |
| "epoch": 1.4792060491493384, | |
| "format_reward": -0.25, | |
| "grad_norm": 6.6309990882873535, | |
| "image_reward": 0.25638376772403715, | |
| "kl": 5.093664228916168, | |
| "learning_rate": 5e-06, | |
| "loss": -0.1168, | |
| "reward": 0.7955404102802277, | |
| "reward_std": 1.139578907750547, | |
| "rewards/reward_func": 0.7955404102802277, | |
| "step": 6260, | |
| "toxic_reward": 4.489496183395386 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 36.475, | |
| "epoch": 1.481568998109641, | |
| "format_reward": -0.25, | |
| "grad_norm": 6.48809289932251, | |
| "image_reward": 0.24294535368680953, | |
| "kl": 2.03928547501564, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0415, | |
| "reward": 0.3108412384986877, | |
| "reward_std": 1.2936087466776371, | |
| "rewards/reward_func": 0.3108412384986877, | |
| "step": 6270, | |
| "toxic_reward": 3.8573724269866942 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 44.025, | |
| "epoch": 1.4839319470699432, | |
| "format_reward": 0.0, | |
| "grad_norm": 12.869200706481934, | |
| "image_reward": 0.269140625, | |
| "kl": 2.209321880340576, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0065, | |
| "reward": 1.0746480822563171, | |
| "reward_std": 0.9140975341200829, | |
| "rewards/reward_func": 1.0746480822563171, | |
| "step": 6280, | |
| "toxic_reward": 4.498701477050782 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 52.6, | |
| "epoch": 1.4862948960302458, | |
| "format_reward": -0.25, | |
| "grad_norm": 14.994149208068848, | |
| "image_reward": 0.2699289947748184, | |
| "kl": 1.982865560054779, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0155, | |
| "reward": -0.04395916759967804, | |
| "reward_std": 1.351198247075081, | |
| "rewards/reward_func": -0.04395916759967804, | |
| "step": 6290, | |
| "toxic_reward": 4.100183129310608 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 48.525, | |
| "epoch": 1.488657844990548, | |
| "format_reward": 0.0, | |
| "grad_norm": 13.48897647857666, | |
| "image_reward": 0.2738677978515625, | |
| "kl": 73.90320363342762, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0515, | |
| "reward": 0.2645439743995667, | |
| "reward_std": 0.8113596703857183, | |
| "rewards/reward_func": 0.2645439743995667, | |
| "step": 6300, | |
| "toxic_reward": 4.189095830917358 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 39.85, | |
| "epoch": 1.4910207939508506, | |
| "format_reward": -0.25, | |
| "grad_norm": 3.184455394744873, | |
| "image_reward": 0.2667582184076309, | |
| "kl": 1.9667763262987137, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0049, | |
| "reward": 0.582335239648819, | |
| "reward_std": 1.003530977293849, | |
| "rewards/reward_func": 0.582335239648819, | |
| "step": 6310, | |
| "toxic_reward": 4.428107571601868 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 43.475, | |
| "epoch": 1.4933837429111532, | |
| "format_reward": 0.0, | |
| "grad_norm": 16.498891830444336, | |
| "image_reward": 0.26070556640625, | |
| "kl": 2.5642897844314576, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0706, | |
| "reward": 0.8335122138261795, | |
| "reward_std": 0.8066307563334704, | |
| "rewards/reward_func": 0.8335122138261795, | |
| "step": 6320, | |
| "toxic_reward": 3.9580556988716125 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 49.875, | |
| "epoch": 1.4957466918714557, | |
| "format_reward": -0.25, | |
| "grad_norm": 10.699289321899414, | |
| "image_reward": 0.2347137451171875, | |
| "kl": 3.54896736741066, | |
| "learning_rate": 5e-06, | |
| "loss": 0.004, | |
| "reward": -0.006625699996948242, | |
| "reward_std": 1.1276695830747485, | |
| "rewards/reward_func": -0.006625699996948242, | |
| "step": 6330, | |
| "toxic_reward": 4.5597028732299805 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 44.2, | |
| "epoch": 1.498109640831758, | |
| "format_reward": 0.0, | |
| "grad_norm": 25.74764633178711, | |
| "image_reward": 0.2565399169921875, | |
| "kl": 3.312129205465317, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0109, | |
| "reward": 0.974502682685852, | |
| "reward_std": 0.8662806877866387, | |
| "rewards/reward_func": 0.974502682685852, | |
| "step": 6340, | |
| "toxic_reward": 4.394499397277832 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 40.75, | |
| "epoch": 1.5004725897920603, | |
| "format_reward": -0.25, | |
| "grad_norm": 7.920969009399414, | |
| "image_reward": 0.25830586850643156, | |
| "kl": 4.049802941083908, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1152, | |
| "reward": 0.04905744194984436, | |
| "reward_std": 1.192653514072299, | |
| "rewards/reward_func": 0.04905744194984436, | |
| "step": 6350, | |
| "toxic_reward": 4.2746446371078495 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 54.45, | |
| "epoch": 1.5028355387523629, | |
| "format_reward": 0.0, | |
| "grad_norm": 10.33079719543457, | |
| "image_reward": 0.26002349853515627, | |
| "kl": 5.124299117922783, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0575, | |
| "reward": 0.20074379444122314, | |
| "reward_std": 0.825444309413433, | |
| "rewards/reward_func": 0.20074379444122314, | |
| "step": 6360, | |
| "toxic_reward": 3.7550126791000364 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 45.8, | |
| "epoch": 1.5051984877126654, | |
| "format_reward": 0.0, | |
| "grad_norm": 8.266423225402832, | |
| "image_reward": 0.262652587890625, | |
| "kl": 13.647933864593506, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0262, | |
| "reward": 0.6701415419578552, | |
| "reward_std": 0.6827380709350109, | |
| "rewards/reward_func": 0.6701415419578552, | |
| "step": 6370, | |
| "toxic_reward": 4.507234740257263 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 52.55, | |
| "epoch": 1.507561436672968, | |
| "format_reward": -0.25, | |
| "grad_norm": 16.820531845092773, | |
| "image_reward": 0.2648305267095566, | |
| "kl": 2943.323862874508, | |
| "learning_rate": 5e-06, | |
| "loss": 0.3168, | |
| "reward": -0.18676466941833497, | |
| "reward_std": 1.4769982114434241, | |
| "rewards/reward_func": -0.18676466941833497, | |
| "step": 6380, | |
| "toxic_reward": 3.6400262832641603 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 45.85, | |
| "epoch": 1.5099243856332705, | |
| "format_reward": 0.0, | |
| "grad_norm": 22.587871551513672, | |
| "image_reward": 0.2433380126953125, | |
| "kl": 1.810417714715004, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0137, | |
| "reward": 0.35521286725997925, | |
| "reward_std": 0.5707202635705471, | |
| "rewards/reward_func": 0.35521286725997925, | |
| "step": 6390, | |
| "toxic_reward": 4.4419690608978275 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 45.85, | |
| "epoch": 1.5122873345935728, | |
| "format_reward": 0.0, | |
| "grad_norm": 7.745183944702148, | |
| "image_reward": 0.253375244140625, | |
| "kl": 1.7831827580928803, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0178, | |
| "reward": -0.09840984344482422, | |
| "reward_std": 0.7997165352106095, | |
| "rewards/reward_func": -0.09840984344482422, | |
| "step": 6400, | |
| "toxic_reward": 3.674038052558899 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 46.025, | |
| "epoch": 1.514650283553875, | |
| "format_reward": 0.0, | |
| "grad_norm": 6.026752948760986, | |
| "image_reward": 0.249884033203125, | |
| "kl": 4.073290675878525, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0349, | |
| "reward": 0.7357653975486755, | |
| "reward_std": 1.102572639286518, | |
| "rewards/reward_func": 0.7357653975486755, | |
| "step": 6410, | |
| "toxic_reward": 4.154096102714538 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 42.5, | |
| "epoch": 1.5170132325141776, | |
| "format_reward": 0.0, | |
| "grad_norm": 2.0699822902679443, | |
| "image_reward": 0.2804595947265625, | |
| "kl": 1.0494691252708435, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0105, | |
| "reward": 0.3559255480766296, | |
| "reward_std": 0.9502544086426497, | |
| "rewards/reward_func": 0.3559255480766296, | |
| "step": 6420, | |
| "toxic_reward": 3.829944038391113 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 50.725, | |
| "epoch": 1.5193761814744802, | |
| "format_reward": -0.25, | |
| "grad_norm": 2.6939737796783447, | |
| "image_reward": 0.24329833984375, | |
| "kl": 2.4543985188007356, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0488, | |
| "reward": -0.22206905484199524, | |
| "reward_std": 1.1714405838400126, | |
| "rewards/reward_func": -0.22206905484199524, | |
| "step": 6430, | |
| "toxic_reward": 4.02121376991272 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 41.8, | |
| "epoch": 1.5217391304347827, | |
| "format_reward": 0.0, | |
| "grad_norm": 7.930452823638916, | |
| "image_reward": 0.270928955078125, | |
| "kl": 2.3063239082694054, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0354, | |
| "reward": -0.090572190284729, | |
| "reward_std": 0.8380892558023334, | |
| "rewards/reward_func": -0.090572190284729, | |
| "step": 6440, | |
| "toxic_reward": 4.0341674268245695 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 45.675, | |
| "epoch": 1.524102079395085, | |
| "format_reward": -0.25, | |
| "grad_norm": 11.180920600891113, | |
| "image_reward": 0.26758829653263094, | |
| "kl": 1.2753668040037156, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0044, | |
| "reward": 0.8226781934499741, | |
| "reward_std": 1.3871233612298965, | |
| "rewards/reward_func": 0.8226781934499741, | |
| "step": 6450, | |
| "toxic_reward": 3.5244659066200255 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 49.75, | |
| "epoch": 1.5264650283553876, | |
| "format_reward": -0.25, | |
| "grad_norm": 21.079256057739258, | |
| "image_reward": 0.24825642853975297, | |
| "kl": 0.9987513780593872, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0776, | |
| "reward": 0.38041144609451294, | |
| "reward_std": 1.5831992760300637, | |
| "rewards/reward_func": 0.38041144609451294, | |
| "step": 6460, | |
| "toxic_reward": 4.319032979011536 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 38.5, | |
| "epoch": 1.5288279773156899, | |
| "format_reward": -0.25, | |
| "grad_norm": 6.294722557067871, | |
| "image_reward": 0.23506622314453124, | |
| "kl": 1.3047454893589019, | |
| "learning_rate": 5e-06, | |
| "loss": -0.037, | |
| "reward": 0.483357185125351, | |
| "reward_std": 1.4818070188164711, | |
| "rewards/reward_func": 0.483357185125351, | |
| "step": 6470, | |
| "toxic_reward": 4.28910231590271 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 44.35, | |
| "epoch": 1.5311909262759924, | |
| "format_reward": 0.0, | |
| "grad_norm": 25.183557510375977, | |
| "image_reward": 0.2621653228998184, | |
| "kl": 5.334516155719757, | |
| "learning_rate": 5e-06, | |
| "loss": 0.184, | |
| "reward": 0.5027847826480866, | |
| "reward_std": 0.6526452742516995, | |
| "rewards/reward_func": 0.5027847826480866, | |
| "step": 6480, | |
| "toxic_reward": 4.249637746810913 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 37.825, | |
| "epoch": 1.533553875236295, | |
| "format_reward": 0.0, | |
| "grad_norm": 10.60496997833252, | |
| "image_reward": 0.26144917905330656, | |
| "kl": 3.185924381017685, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0162, | |
| "reward": 0.6137366682291031, | |
| "reward_std": 0.5949534647166729, | |
| "rewards/reward_func": 0.6137366682291031, | |
| "step": 6490, | |
| "toxic_reward": 4.377641320228577 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 36.975, | |
| "epoch": 1.5359168241965975, | |
| "format_reward": 0.0, | |
| "grad_norm": 5.117782115936279, | |
| "image_reward": 0.2869781494140625, | |
| "kl": 0.5223678901791573, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0356, | |
| "reward": 0.20925453603267669, | |
| "reward_std": 0.5696444906294346, | |
| "rewards/reward_func": 0.20925453603267669, | |
| "step": 6500, | |
| "toxic_reward": 4.292421555519104 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 47.7, | |
| "epoch": 1.5382797731568998, | |
| "format_reward": 0.0, | |
| "grad_norm": 2.3820912837982178, | |
| "image_reward": 0.250634765625, | |
| "kl": 1.4595504850149155, | |
| "learning_rate": 5e-06, | |
| "loss": 0.02, | |
| "reward": 0.719105675816536, | |
| "reward_std": 0.9932258397340774, | |
| "rewards/reward_func": 0.719105675816536, | |
| "step": 6510, | |
| "toxic_reward": 4.147652292251587 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 49.65, | |
| "epoch": 1.5406427221172023, | |
| "format_reward": 0.0, | |
| "grad_norm": 10.115738868713379, | |
| "image_reward": 0.2735626220703125, | |
| "kl": 2.502386949956417, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0075, | |
| "reward": 0.229107666015625, | |
| "reward_std": 0.8675182597711683, | |
| "rewards/reward_func": 0.229107666015625, | |
| "step": 6520, | |
| "toxic_reward": 3.437857782840729 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 39.975, | |
| "epoch": 1.5430056710775046, | |
| "format_reward": 0.0, | |
| "grad_norm": 12.080828666687012, | |
| "image_reward": 0.26392364501953125, | |
| "kl": 1.0831295281648636, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0655, | |
| "reward": 0.5672924667596817, | |
| "reward_std": 1.0283904120326042, | |
| "rewards/reward_func": 0.5672924667596817, | |
| "step": 6530, | |
| "toxic_reward": 4.013876247406006 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 52.825, | |
| "epoch": 1.5453686200378072, | |
| "format_reward": -0.25, | |
| "grad_norm": 4.358407497406006, | |
| "image_reward": 0.2578287750482559, | |
| "kl": 0.8826712548732758, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0534, | |
| "reward": 0.1970734715461731, | |
| "reward_std": 1.158833772689104, | |
| "rewards/reward_func": 0.1970734715461731, | |
| "step": 6540, | |
| "toxic_reward": 4.346637082099915 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 39.85, | |
| "epoch": 1.5477315689981097, | |
| "format_reward": 0.0, | |
| "grad_norm": 1.7384082078933716, | |
| "image_reward": 0.245843505859375, | |
| "kl": 1.1669968128204347, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1047, | |
| "reward": 0.4790124922990799, | |
| "reward_std": 0.8679840985685587, | |
| "rewards/reward_func": 0.4790124922990799, | |
| "step": 6550, | |
| "toxic_reward": 4.215170729160309 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 57.025, | |
| "epoch": 1.5500945179584122, | |
| "format_reward": -0.25, | |
| "grad_norm": 19.237064361572266, | |
| "image_reward": 0.2605519607663155, | |
| "kl": 22.883435368537903, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0861, | |
| "reward": 0.3663973331451416, | |
| "reward_std": 1.3235621018335224, | |
| "rewards/reward_func": 0.3663973331451416, | |
| "step": 6560, | |
| "toxic_reward": 4.399778747558594 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 44.5, | |
| "epoch": 1.5524574669187146, | |
| "format_reward": 0.0, | |
| "grad_norm": 12.169953346252441, | |
| "image_reward": 0.240032958984375, | |
| "kl": 2.908788651227951, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0407, | |
| "reward": 0.6662415623664856, | |
| "reward_std": 0.9049512568861247, | |
| "rewards/reward_func": 0.6662415623664856, | |
| "step": 6570, | |
| "toxic_reward": 4.262545752525329 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 47.225, | |
| "epoch": 1.5548204158790169, | |
| "format_reward": 0.0, | |
| "grad_norm": 14.397954940795898, | |
| "image_reward": 0.2709014892578125, | |
| "kl": 1.8459113836288452, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0948, | |
| "reward": 0.4346597075462341, | |
| "reward_std": 1.024691704288125, | |
| "rewards/reward_func": 0.4346597075462341, | |
| "step": 6580, | |
| "toxic_reward": 3.837217903137207 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 50.25, | |
| "epoch": 1.5571833648393194, | |
| "format_reward": 0.0, | |
| "grad_norm": 5.823775768280029, | |
| "image_reward": 0.2421417236328125, | |
| "kl": 361.87849075496194, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1249, | |
| "reward": 0.7188747763633728, | |
| "reward_std": 0.7972227469086647, | |
| "rewards/reward_func": 0.7188747763633728, | |
| "step": 6590, | |
| "toxic_reward": 4.341918230056763 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 47.025, | |
| "epoch": 1.559546313799622, | |
| "format_reward": 0.0, | |
| "grad_norm": 10.157111167907715, | |
| "image_reward": 0.2904388427734375, | |
| "kl": 2.8333058834075926, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0125, | |
| "reward": 0.1938968062400818, | |
| "reward_std": 0.5627395014278591, | |
| "rewards/reward_func": 0.1938968062400818, | |
| "step": 6600, | |
| "toxic_reward": 4.045072281360627 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 39.875, | |
| "epoch": 1.5619092627599245, | |
| "format_reward": -0.5, | |
| "grad_norm": 7.558686256408691, | |
| "image_reward": 0.28143310397863386, | |
| "kl": 5.3599341928958895, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0333, | |
| "reward": 0.3363319247961044, | |
| "reward_std": 1.8769858199171723, | |
| "rewards/reward_func": 0.3363319247961044, | |
| "step": 6610, | |
| "toxic_reward": 3.4191954016685484 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 36.3, | |
| "epoch": 1.5642722117202268, | |
| "format_reward": 0.0, | |
| "grad_norm": 6.938777446746826, | |
| "image_reward": 0.2576054885983467, | |
| "kl": 3.9582558915019037, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0771, | |
| "reward": 0.785160881280899, | |
| "reward_std": 1.3131701787933707, | |
| "rewards/reward_func": 0.785160881280899, | |
| "step": 6620, | |
| "toxic_reward": 3.997890996932983 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 52.075, | |
| "epoch": 1.5666351606805293, | |
| "format_reward": 0.0, | |
| "grad_norm": 7.617331027984619, | |
| "image_reward": 0.2779083251953125, | |
| "kl": 2.7307909965515136, | |
| "learning_rate": 5e-06, | |
| "loss": 0.052, | |
| "reward": 0.6236707329750061, | |
| "reward_std": 0.8986939422786235, | |
| "rewards/reward_func": 0.6236707329750061, | |
| "step": 6630, | |
| "toxic_reward": 4.061939382553101 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 51.1, | |
| "epoch": 1.5689981096408316, | |
| "format_reward": -0.5, | |
| "grad_norm": 11.394710540771484, | |
| "image_reward": 0.27598063051700594, | |
| "kl": 4.903402748703956, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0366, | |
| "reward": 0.095058873295784, | |
| "reward_std": 1.6800902128219604, | |
| "rewards/reward_func": 0.095058873295784, | |
| "step": 6640, | |
| "toxic_reward": 3.813139808177948 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 59.8, | |
| "epoch": 1.5713610586011342, | |
| "format_reward": 0.0, | |
| "grad_norm": 3.759659767150879, | |
| "image_reward": 0.23873443603515626, | |
| "kl": 7.589515461027622, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0834, | |
| "reward": 0.44450428485870364, | |
| "reward_std": 0.5481153151020408, | |
| "rewards/reward_func": 0.44450428485870364, | |
| "step": 6650, | |
| "toxic_reward": 3.8413244128227233 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 51.3, | |
| "epoch": 1.5737240075614367, | |
| "format_reward": 0.0, | |
| "grad_norm": 13.018331527709961, | |
| "image_reward": 0.2600331619381905, | |
| "kl": 3.1137637734413146, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0214, | |
| "reward": 0.7498049587011337, | |
| "reward_std": 0.7316715233027935, | |
| "rewards/reward_func": 0.7498049587011337, | |
| "step": 6660, | |
| "toxic_reward": 4.240022134780884 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 49.575, | |
| "epoch": 1.5760869565217392, | |
| "format_reward": 0.0, | |
| "grad_norm": 7.951615333557129, | |
| "image_reward": 0.2636627197265625, | |
| "kl": 0.875077161192894, | |
| "learning_rate": 5e-06, | |
| "loss": -0.1023, | |
| "reward": 0.6656131267547607, | |
| "reward_std": 0.6067664973437786, | |
| "rewards/reward_func": 0.6656131267547607, | |
| "step": 6670, | |
| "toxic_reward": 4.546977305412293 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 40.725, | |
| "epoch": 1.5784499054820416, | |
| "format_reward": 0.0, | |
| "grad_norm": 4.168910026550293, | |
| "image_reward": 0.24169108122587205, | |
| "kl": 4.073548844456672, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0498, | |
| "reward": 0.5430697202682495, | |
| "reward_std": 1.2346604462713002, | |
| "rewards/reward_func": 0.5430697202682495, | |
| "step": 6680, | |
| "toxic_reward": 3.895166778564453 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 53.275, | |
| "epoch": 1.580812854442344, | |
| "format_reward": 0.0, | |
| "grad_norm": 2.9899535179138184, | |
| "image_reward": 0.23625640869140624, | |
| "kl": 6.596899893879891, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0148, | |
| "reward": 0.3857423186302185, | |
| "reward_std": 0.7419607482850552, | |
| "rewards/reward_func": 0.3857423186302185, | |
| "step": 6690, | |
| "toxic_reward": 4.416726422309876 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 40.475, | |
| "epoch": 1.5831758034026464, | |
| "format_reward": 0.0, | |
| "grad_norm": 17.505062103271484, | |
| "image_reward": 0.24977264404296876, | |
| "kl": 130.44692096710205, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0063, | |
| "reward": 0.8495797365903854, | |
| "reward_std": 1.0700383991003037, | |
| "rewards/reward_func": 0.8495797365903854, | |
| "step": 6700, | |
| "toxic_reward": 3.7804057955741883 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 46.25, | |
| "epoch": 1.585538752362949, | |
| "format_reward": -0.25, | |
| "grad_norm": 9.865876197814941, | |
| "image_reward": 0.2743357330560684, | |
| "kl": 1.342175406217575, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0144, | |
| "reward": -0.01569686532020569, | |
| "reward_std": 1.0583332434296608, | |
| "rewards/reward_func": -0.01569686532020569, | |
| "step": 6710, | |
| "toxic_reward": 4.605420160293579 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 46.65, | |
| "epoch": 1.5879017013232515, | |
| "format_reward": -0.25, | |
| "grad_norm": 1.4694126844406128, | |
| "image_reward": 0.26331074982881547, | |
| "kl": 1.179875871539116, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0439, | |
| "reward": 0.19556427299976348, | |
| "reward_std": 1.060202201642096, | |
| "rewards/reward_func": 0.19556427299976348, | |
| "step": 6720, | |
| "toxic_reward": 4.195696997642517 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 41.75, | |
| "epoch": 1.590264650283554, | |
| "format_reward": 0.0, | |
| "grad_norm": 14.831318855285645, | |
| "image_reward": 0.26193084716796877, | |
| "kl": 3.8945027977228164, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0405, | |
| "reward": 0.8553763270378113, | |
| "reward_std": 0.7129356294870377, | |
| "rewards/reward_func": 0.8553763270378113, | |
| "step": 6730, | |
| "toxic_reward": 4.33803927898407 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 56.475, | |
| "epoch": 1.5926275992438563, | |
| "format_reward": -0.25, | |
| "grad_norm": 19.41834831237793, | |
| "image_reward": 0.25118509978055953, | |
| "kl": 3.402638703584671, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0046, | |
| "reward": 0.5174610838294029, | |
| "reward_std": 1.2720857471227647, | |
| "rewards/reward_func": 0.5174610838294029, | |
| "step": 6740, | |
| "toxic_reward": 3.870224565267563 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 48.025, | |
| "epoch": 1.5949905482041586, | |
| "format_reward": 0.0, | |
| "grad_norm": 6.40997838973999, | |
| "image_reward": 0.258154296875, | |
| "kl": 1.6460766345262527, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0986, | |
| "reward": 1.0767779767513275, | |
| "reward_std": 1.5294719189405441, | |
| "rewards/reward_func": 1.0767779767513275, | |
| "step": 6750, | |
| "toxic_reward": 3.531558334827423 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 41.7, | |
| "epoch": 1.5973534971644612, | |
| "format_reward": -0.5, | |
| "grad_norm": 30.405113220214844, | |
| "image_reward": 0.25986429750919343, | |
| "kl": 3.0854232251644134, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0433, | |
| "reward": 0.2282954216003418, | |
| "reward_std": 2.2074968218803406, | |
| "rewards/reward_func": 0.2282954216003418, | |
| "step": 6760, | |
| "toxic_reward": 4.195013093948364 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 36.3, | |
| "epoch": 1.5997164461247637, | |
| "format_reward": -0.25, | |
| "grad_norm": 7.383143424987793, | |
| "image_reward": 0.26789347380399703, | |
| "kl": 3.230149340629578, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0221, | |
| "reward": 0.44291332364082336, | |
| "reward_std": 1.4428741056472063, | |
| "rewards/reward_func": 0.44291332364082336, | |
| "step": 6770, | |
| "toxic_reward": 3.8504308581352236 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 49.925, | |
| "epoch": 1.6020793950850662, | |
| "format_reward": 0.0, | |
| "grad_norm": 6.7697906494140625, | |
| "image_reward": 0.2436859130859375, | |
| "kl": 3.0731608659029006, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0282, | |
| "reward": 0.9663064420223236, | |
| "reward_std": 0.8580235980451107, | |
| "rewards/reward_func": 0.9663064420223236, | |
| "step": 6780, | |
| "toxic_reward": 3.754929578304291 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 43.725, | |
| "epoch": 1.6044423440453688, | |
| "format_reward": 0.0, | |
| "grad_norm": 3.5446696281433105, | |
| "image_reward": 0.260052490234375, | |
| "kl": 763.1712962627411, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0939, | |
| "reward": 0.7344351947307587, | |
| "reward_std": 0.6753151521086693, | |
| "rewards/reward_func": 0.7344351947307587, | |
| "step": 6790, | |
| "toxic_reward": 4.139233088493347 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 44.6, | |
| "epoch": 1.606805293005671, | |
| "format_reward": 0.0, | |
| "grad_norm": 21.1129093170166, | |
| "image_reward": 0.27585601806640625, | |
| "kl": 2.870541882514954, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0466, | |
| "reward": 0.5618703544139863, | |
| "reward_std": 0.8203244937583805, | |
| "rewards/reward_func": 0.5618703544139863, | |
| "step": 6800, | |
| "toxic_reward": 4.3937297582626345 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 57.125, | |
| "epoch": 1.6091682419659734, | |
| "format_reward": 0.0, | |
| "grad_norm": 5.104684352874756, | |
| "image_reward": 0.280615234375, | |
| "kl": 3.3239043831825255, | |
| "learning_rate": 5e-06, | |
| "loss": 0.045, | |
| "reward": 0.7852797448635102, | |
| "reward_std": 0.610455094370991, | |
| "rewards/reward_func": 0.7852797448635102, | |
| "step": 6810, | |
| "toxic_reward": 4.593313884735108 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 45.725, | |
| "epoch": 1.611531190926276, | |
| "format_reward": 0.0, | |
| "grad_norm": 2.496898889541626, | |
| "image_reward": 0.2752532958984375, | |
| "kl": 2.6775636196136476, | |
| "learning_rate": 5e-06, | |
| "loss": -0.012, | |
| "reward": 0.7639135122299194, | |
| "reward_std": 0.9162261974066496, | |
| "rewards/reward_func": 0.7639135122299194, | |
| "step": 6820, | |
| "toxic_reward": 4.146224117279052 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 42.075, | |
| "epoch": 1.6138941398865785, | |
| "format_reward": 0.0, | |
| "grad_norm": 6.608152866363525, | |
| "image_reward": 0.267572021484375, | |
| "kl": 3.533026337623596, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0572, | |
| "reward": 0.44893051087856295, | |
| "reward_std": 0.9419144628569484, | |
| "rewards/reward_func": 0.44893051087856295, | |
| "step": 6830, | |
| "toxic_reward": 4.1230400681495665 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 54.925, | |
| "epoch": 1.616257088846881, | |
| "format_reward": 0.0, | |
| "grad_norm": 11.28681755065918, | |
| "image_reward": 0.25560302734375, | |
| "kl": 7.369207835197448, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0152, | |
| "reward": 0.5668485701084137, | |
| "reward_std": 0.749977857619524, | |
| "rewards/reward_func": 0.5668485701084137, | |
| "step": 6840, | |
| "toxic_reward": 4.343744564056396 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 45.9, | |
| "epoch": 1.6186200378071833, | |
| "format_reward": -0.5, | |
| "grad_norm": 28.541820526123047, | |
| "image_reward": 0.2723876953125, | |
| "kl": 5.971449375152588, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0574, | |
| "reward": -0.5758611798286438, | |
| "reward_std": 1.3836607769131661, | |
| "rewards/reward_func": -0.5758611798286438, | |
| "step": 6850, | |
| "toxic_reward": 4.467629170417785 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 37.1, | |
| "epoch": 1.6209829867674859, | |
| "format_reward": 0.0, | |
| "grad_norm": 12.005922317504883, | |
| "image_reward": 0.2653228759765625, | |
| "kl": 6.38155357837677, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0537, | |
| "reward": 0.6645367026329041, | |
| "reward_std": 0.6022280365228653, | |
| "rewards/reward_func": 0.6645367026329041, | |
| "step": 6860, | |
| "toxic_reward": 4.12990357875824 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 50.175, | |
| "epoch": 1.6233459357277882, | |
| "format_reward": 0.0, | |
| "grad_norm": 4.00104284286499, | |
| "image_reward": 0.26532745361328125, | |
| "kl": 6.140709114074707, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0866, | |
| "reward": 0.5954837799072266, | |
| "reward_std": 0.9484383892267942, | |
| "rewards/reward_func": 0.5954837799072266, | |
| "step": 6870, | |
| "toxic_reward": 4.030814599990845 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 53.2, | |
| "epoch": 1.6257088846880907, | |
| "format_reward": 0.0, | |
| "grad_norm": 7.69809627532959, | |
| "image_reward": 0.2781280517578125, | |
| "kl": 8.641590279340743, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0299, | |
| "reward": 0.35074634552001954, | |
| "reward_std": 0.5954089154489338, | |
| "rewards/reward_func": 0.35074634552001954, | |
| "step": 6880, | |
| "toxic_reward": 4.168118977546692 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 47.6, | |
| "epoch": 1.6280718336483933, | |
| "format_reward": -0.25, | |
| "grad_norm": 7.882171154022217, | |
| "image_reward": 0.25705363005399706, | |
| "kl": 5.1897116780281065, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0134, | |
| "reward": -0.19078816771507262, | |
| "reward_std": 1.1844907969236373, | |
| "rewards/reward_func": -0.19078816771507262, | |
| "step": 6890, | |
| "toxic_reward": 4.343024659156799 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 45.925, | |
| "epoch": 1.6304347826086958, | |
| "format_reward": -0.25, | |
| "grad_norm": 13.869507789611816, | |
| "image_reward": 0.2716888427734375, | |
| "kl": 6.5070923328399655, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0103, | |
| "reward": 0.15193371772766112, | |
| "reward_std": 1.270319462940097, | |
| "rewards/reward_func": 0.15193371772766112, | |
| "step": 6900, | |
| "toxic_reward": 4.229231309890747 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 61.3, | |
| "epoch": 1.632797731568998, | |
| "format_reward": -0.25, | |
| "grad_norm": 6.519335746765137, | |
| "image_reward": 0.268505859375, | |
| "kl": 83.58075475692749, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0011, | |
| "reward": 0.7866749823093414, | |
| "reward_std": 1.1198090038727968, | |
| "rewards/reward_func": 0.7866749823093414, | |
| "step": 6910, | |
| "toxic_reward": 4.473654842376709 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 47.875, | |
| "epoch": 1.6351606805293004, | |
| "format_reward": 0.0, | |
| "grad_norm": 5.124833583831787, | |
| "image_reward": 0.2610076904296875, | |
| "kl": 2.196775460243225, | |
| "learning_rate": 5e-06, | |
| "loss": 0.015, | |
| "reward": 1.0072305798530579, | |
| "reward_std": 1.1389783814549446, | |
| "rewards/reward_func": 1.0072305798530579, | |
| "step": 6920, | |
| "toxic_reward": 4.3077033996582035 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 40.975, | |
| "epoch": 1.637523629489603, | |
| "format_reward": -0.5, | |
| "grad_norm": 3.5923500061035156, | |
| "image_reward": 0.2612147033214569, | |
| "kl": 4.726305472850799, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0617, | |
| "reward": 0.24457889199256896, | |
| "reward_std": 1.512747337669134, | |
| "rewards/reward_func": 0.24457889199256896, | |
| "step": 6930, | |
| "toxic_reward": 4.234147024154663 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 50.425, | |
| "epoch": 1.6398865784499055, | |
| "format_reward": 0.0, | |
| "grad_norm": 7.177937030792236, | |
| "image_reward": 0.2646942138671875, | |
| "kl": 5.225604176521301, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0462, | |
| "reward": 0.3636160969734192, | |
| "reward_std": 0.6955469690263272, | |
| "rewards/reward_func": 0.3636160969734192, | |
| "step": 6940, | |
| "toxic_reward": 4.153347599506378 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 47.25, | |
| "epoch": 1.642249527410208, | |
| "format_reward": 0.0, | |
| "grad_norm": 10.053350448608398, | |
| "image_reward": 0.253045654296875, | |
| "kl": 5.69408215880394, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0027, | |
| "reward": 0.612578509747982, | |
| "reward_std": 0.6395491607487201, | |
| "rewards/reward_func": 0.612578509747982, | |
| "step": 6950, | |
| "toxic_reward": 3.9997507095336915 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 47.025, | |
| "epoch": 1.6446124763705106, | |
| "format_reward": 0.0, | |
| "grad_norm": 43.84629440307617, | |
| "image_reward": 0.2462066650390625, | |
| "kl": 391.4977917432785, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0408, | |
| "reward": 0.610540634393692, | |
| "reward_std": 1.4011766005307436, | |
| "rewards/reward_func": 0.610540634393692, | |
| "step": 6960, | |
| "toxic_reward": 3.513826107978821 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 45.7, | |
| "epoch": 1.6469754253308129, | |
| "format_reward": 0.0, | |
| "grad_norm": 2.5391600131988525, | |
| "image_reward": 0.263494873046875, | |
| "kl": 26.704900431632996, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0209, | |
| "reward": 0.6357394754886627, | |
| "reward_std": 0.9666919514536858, | |
| "rewards/reward_func": 0.6357394754886627, | |
| "step": 6970, | |
| "toxic_reward": 4.3967194080352785 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 49.275, | |
| "epoch": 1.6493383742911152, | |
| "format_reward": 0.0, | |
| "grad_norm": 1.4004714488983154, | |
| "image_reward": 0.252850341796875, | |
| "kl": 5.729834485054016, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0379, | |
| "reward": 0.47990578413009644, | |
| "reward_std": 0.5631790950894355, | |
| "rewards/reward_func": 0.47990578413009644, | |
| "step": 6980, | |
| "toxic_reward": 4.208314228057861 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 57.5, | |
| "epoch": 1.6517013232514177, | |
| "format_reward": 0.0, | |
| "grad_norm": 5.806096076965332, | |
| "image_reward": 0.256463623046875, | |
| "kl": 6.840502554178238, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0496, | |
| "reward": 0.16656889617443085, | |
| "reward_std": 0.9250041805207729, | |
| "rewards/reward_func": 0.16656889617443085, | |
| "step": 6990, | |
| "toxic_reward": 3.7445754587650297 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 45.475, | |
| "epoch": 1.6540642722117203, | |
| "format_reward": -0.25, | |
| "grad_norm": 1.5651546716690063, | |
| "image_reward": 0.2714019775390625, | |
| "kl": 4.0309244930744175, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0266, | |
| "reward": 0.3252350568771362, | |
| "reward_std": 1.123583555780351, | |
| "rewards/reward_func": 0.3252350568771362, | |
| "step": 7000, | |
| "toxic_reward": 4.437454390525818 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 45.225, | |
| "epoch": 1.6564272211720228, | |
| "format_reward": -0.25, | |
| "grad_norm": 8.255953788757324, | |
| "image_reward": 0.2734893798828125, | |
| "kl": 13.629169458150864, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0185, | |
| "reward": 0.5219172418117524, | |
| "reward_std": 1.4155076075345279, | |
| "rewards/reward_func": 0.5219172418117524, | |
| "step": 7010, | |
| "toxic_reward": 3.9341206908226014 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 44.525, | |
| "epoch": 1.658790170132325, | |
| "format_reward": 0.0, | |
| "grad_norm": 7.765683650970459, | |
| "image_reward": 0.2684661865234375, | |
| "kl": 4.484488549828529, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0198, | |
| "reward": 0.41308672428131105, | |
| "reward_std": 0.6728175904601812, | |
| "rewards/reward_func": 0.41308672428131105, | |
| "step": 7020, | |
| "toxic_reward": 4.680417871475219 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 43.425, | |
| "epoch": 1.6611531190926276, | |
| "format_reward": 0.0, | |
| "grad_norm": 6.050631523132324, | |
| "image_reward": 0.2446624755859375, | |
| "kl": 5.303134024143219, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0023, | |
| "reward": 0.7305093944072724, | |
| "reward_std": 0.8056725425645709, | |
| "rewards/reward_func": 0.7305093944072724, | |
| "step": 7030, | |
| "toxic_reward": 4.371766519546509 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 50.725, | |
| "epoch": 1.66351606805293, | |
| "format_reward": -0.25, | |
| "grad_norm": 36.67766189575195, | |
| "image_reward": 0.2738332122564316, | |
| "kl": 3.6509076714515687, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0104, | |
| "reward": 0.1336117923259735, | |
| "reward_std": 1.408228962123394, | |
| "rewards/reward_func": 0.1336117923259735, | |
| "step": 7040, | |
| "toxic_reward": 3.868592691421509 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 45.95, | |
| "epoch": 1.6658790170132325, | |
| "format_reward": -0.25, | |
| "grad_norm": 3.2863216400146484, | |
| "image_reward": 0.2496002197265625, | |
| "kl": 47.7468825340271, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0431, | |
| "reward": -0.1912323772907257, | |
| "reward_std": 1.0762672819197179, | |
| "rewards/reward_func": -0.1912323772907257, | |
| "step": 7050, | |
| "toxic_reward": 4.286359405517578 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 50.8, | |
| "epoch": 1.668241965973535, | |
| "format_reward": -0.75, | |
| "grad_norm": 15.117477416992188, | |
| "image_reward": 0.2343353286385536, | |
| "kl": 12.957087469100951, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0128, | |
| "reward": -0.5250297307968139, | |
| "reward_std": 2.128708484955132, | |
| "rewards/reward_func": -0.5250297307968139, | |
| "step": 7060, | |
| "toxic_reward": 4.045247128605842 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 49.15, | |
| "epoch": 1.6706049149338376, | |
| "format_reward": -0.25, | |
| "grad_norm": 6.236794471740723, | |
| "image_reward": 0.2446756988763809, | |
| "kl": 3.7372434973716735, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0289, | |
| "reward": 0.1568456247448921, | |
| "reward_std": 1.3125899083912373, | |
| "rewards/reward_func": 0.1568456247448921, | |
| "step": 7070, | |
| "toxic_reward": 4.527845191955566 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 47.0, | |
| "epoch": 1.6729678638941399, | |
| "format_reward": 0.0, | |
| "grad_norm": 1.8276275396347046, | |
| "image_reward": 0.253338623046875, | |
| "kl": 18.982901883125304, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0591, | |
| "reward": 0.5796426713466645, | |
| "reward_std": 0.8607377586886287, | |
| "rewards/reward_func": 0.5796426713466645, | |
| "step": 7080, | |
| "toxic_reward": 4.365747809410095 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 52.0, | |
| "epoch": 1.6753308128544422, | |
| "format_reward": 0.0, | |
| "grad_norm": 5.115592956542969, | |
| "image_reward": 0.25368804931640626, | |
| "kl": 8.384132671356202, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0358, | |
| "reward": 0.4894866108894348, | |
| "reward_std": 0.82001001983881, | |
| "rewards/reward_func": 0.4894866108894348, | |
| "step": 7090, | |
| "toxic_reward": 4.161544275283814 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 55.45, | |
| "epoch": 1.6776937618147447, | |
| "format_reward": -0.25, | |
| "grad_norm": 4.421766757965088, | |
| "image_reward": 0.257010905444622, | |
| "kl": 1.5507995724678039, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0479, | |
| "reward": -0.053127193450927736, | |
| "reward_std": 0.8562082014977932, | |
| "rewards/reward_func": -0.053127193450927736, | |
| "step": 7100, | |
| "toxic_reward": 4.494407868385315 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 39.125, | |
| "epoch": 1.6800567107750473, | |
| "format_reward": 0.0, | |
| "grad_norm": 15.849198341369629, | |
| "image_reward": 0.254315185546875, | |
| "kl": 4.097546017169952, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0412, | |
| "reward": 0.42282047867774963, | |
| "reward_std": 0.9609952576458454, | |
| "rewards/reward_func": 0.42282047867774963, | |
| "step": 7110, | |
| "toxic_reward": 4.278821682929992 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 47.15, | |
| "epoch": 1.6824196597353498, | |
| "format_reward": -0.25, | |
| "grad_norm": 9.184070587158203, | |
| "image_reward": 0.2508982330560684, | |
| "kl": 1.9086317151784897, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0676, | |
| "reward": -0.08927419185638427, | |
| "reward_std": 1.1106989961117506, | |
| "rewards/reward_func": -0.08927419185638427, | |
| "step": 7120, | |
| "toxic_reward": 4.535511326789856 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 48.45, | |
| "epoch": 1.6847826086956523, | |
| "format_reward": 0.0, | |
| "grad_norm": 10.833540916442871, | |
| "image_reward": 0.2317718505859375, | |
| "kl": 1.5903507679700852, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0026, | |
| "reward": 0.5086119592189788, | |
| "reward_std": 0.610715470276773, | |
| "rewards/reward_func": 0.5086119592189788, | |
| "step": 7130, | |
| "toxic_reward": 4.333575582504272 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 49.75, | |
| "epoch": 1.6871455576559546, | |
| "format_reward": 0.0, | |
| "grad_norm": 5.887187480926514, | |
| "image_reward": 0.2499237060546875, | |
| "kl": 49.958091259002686, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0703, | |
| "reward": 0.7590021967887879, | |
| "reward_std": 0.8256058894097805, | |
| "rewards/reward_func": 0.7590021967887879, | |
| "step": 7140, | |
| "toxic_reward": 4.225302958488465 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 55.325, | |
| "epoch": 1.689508506616257, | |
| "format_reward": 0.0, | |
| "grad_norm": 12.265044212341309, | |
| "image_reward": 0.23530120849609376, | |
| "kl": 0.9182627111673355, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0673, | |
| "reward": 0.3405183613300323, | |
| "reward_std": 0.7431152425706387, | |
| "rewards/reward_func": 0.3405183613300323, | |
| "step": 7150, | |
| "toxic_reward": 4.184520816802978 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 52.175, | |
| "epoch": 1.6918714555765595, | |
| "format_reward": -0.25, | |
| "grad_norm": 3.966953754425049, | |
| "image_reward": 0.267620849609375, | |
| "kl": 1.64820496737957, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1687, | |
| "reward": 0.01701483130455017, | |
| "reward_std": 1.4536124819889664, | |
| "rewards/reward_func": 0.01701483130455017, | |
| "step": 7160, | |
| "toxic_reward": 3.7502978086471557 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 45.15, | |
| "epoch": 1.694234404536862, | |
| "format_reward": 0.0, | |
| "grad_norm": 5.9182000160217285, | |
| "image_reward": 0.25394287109375, | |
| "kl": 1.6780494809150697, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0719, | |
| "reward": 0.8605155050754547, | |
| "reward_std": 0.9149322494864464, | |
| "rewards/reward_func": 0.8605155050754547, | |
| "step": 7170, | |
| "toxic_reward": 3.9694084405899046 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 44.525, | |
| "epoch": 1.6965973534971646, | |
| "format_reward": 0.0, | |
| "grad_norm": 2.476659059524536, | |
| "image_reward": 0.272882080078125, | |
| "kl": 3.32854140996933, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0742, | |
| "reward": 1.0633208215236665, | |
| "reward_std": 0.9789414823055267, | |
| "rewards/reward_func": 1.0633208215236665, | |
| "step": 7180, | |
| "toxic_reward": 4.391367101669312 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 45.825, | |
| "epoch": 1.6989603024574669, | |
| "format_reward": 0.0, | |
| "grad_norm": 3.7560040950775146, | |
| "image_reward": 0.269110107421875, | |
| "kl": 1.4420736670494079, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0345, | |
| "reward": 1.0360184490680695, | |
| "reward_std": 0.8136029925197363, | |
| "rewards/reward_func": 1.0360184490680695, | |
| "step": 7190, | |
| "toxic_reward": 3.99700380563736 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 33.925, | |
| "epoch": 1.7013232514177694, | |
| "format_reward": 0.0, | |
| "grad_norm": 11.579362869262695, | |
| "image_reward": 0.2462371826171875, | |
| "kl": 0.9962957471609115, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0255, | |
| "reward": 0.2942840725183487, | |
| "reward_std": 0.3486198179423809, | |
| "rewards/reward_func": 0.2942840725183487, | |
| "step": 7200, | |
| "toxic_reward": 3.8329622387886046 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 48.825, | |
| "epoch": 1.7036862003780717, | |
| "format_reward": -0.5, | |
| "grad_norm": 15.74374008178711, | |
| "image_reward": 0.26438903957605364, | |
| "kl": 1.2382088035345078, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0735, | |
| "reward": 0.2413632392883301, | |
| "reward_std": 1.651388045027852, | |
| "rewards/reward_func": 0.2413632392883301, | |
| "step": 7210, | |
| "toxic_reward": 4.516292905807495 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 46.625, | |
| "epoch": 1.7060491493383743, | |
| "format_reward": 0.0, | |
| "grad_norm": 9.237770080566406, | |
| "image_reward": 0.2369354248046875, | |
| "kl": 1.5744222581386567, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0256, | |
| "reward": 0.6944510787725449, | |
| "reward_std": 1.11760393679142, | |
| "rewards/reward_func": 0.6944510787725449, | |
| "step": 7220, | |
| "toxic_reward": 3.7596142530441283 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 39.125, | |
| "epoch": 1.7084120982986768, | |
| "format_reward": 0.0, | |
| "grad_norm": 3.7665228843688965, | |
| "image_reward": 0.2577423095703125, | |
| "kl": 0.7259436190128327, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0117, | |
| "reward": 0.5142745256423951, | |
| "reward_std": 0.6884998820722104, | |
| "rewards/reward_func": 0.5142745256423951, | |
| "step": 7230, | |
| "toxic_reward": 4.332102084159851 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 48.475, | |
| "epoch": 1.7107750472589793, | |
| "format_reward": 0.0, | |
| "grad_norm": 4.795387268066406, | |
| "image_reward": 0.28794708251953127, | |
| "kl": 1.6049385368824005, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0341, | |
| "reward": 0.308843332529068, | |
| "reward_std": 0.4225019045174122, | |
| "rewards/reward_func": 0.308843332529068, | |
| "step": 7240, | |
| "toxic_reward": 4.501336789131164 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 55.0, | |
| "epoch": 1.7131379962192816, | |
| "format_reward": -0.25, | |
| "grad_norm": 11.164639472961426, | |
| "image_reward": 0.25756022036075593, | |
| "kl": 0.43412337452173233, | |
| "learning_rate": 5e-06, | |
| "loss": -0.026, | |
| "reward": 0.46165032386779786, | |
| "reward_std": 0.9854918915778399, | |
| "rewards/reward_func": 0.46165032386779786, | |
| "step": 7250, | |
| "toxic_reward": 4.23072258234024 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 48.925, | |
| "epoch": 1.715500945179584, | |
| "format_reward": 0.0, | |
| "grad_norm": 26.601303100585938, | |
| "image_reward": 0.24893798828125, | |
| "kl": 3.482639339566231, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0419, | |
| "reward": 0.5657954633235931, | |
| "reward_std": 1.2434701435267925, | |
| "rewards/reward_func": 0.5657954633235931, | |
| "step": 7260, | |
| "toxic_reward": 4.052207565307617 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 46.85, | |
| "epoch": 1.7178638941398865, | |
| "format_reward": -0.25, | |
| "grad_norm": 19.468366622924805, | |
| "image_reward": 0.24361775815486908, | |
| "kl": 0.6207199424505234, | |
| "learning_rate": 5e-06, | |
| "loss": -0.1011, | |
| "reward": 0.2289634108543396, | |
| "reward_std": 1.1521323285996914, | |
| "rewards/reward_func": 0.2289634108543396, | |
| "step": 7270, | |
| "toxic_reward": 4.243139553070068 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 44.475, | |
| "epoch": 1.720226843100189, | |
| "format_reward": 0.0, | |
| "grad_norm": 11.427348136901855, | |
| "image_reward": 0.24371236115694045, | |
| "kl": 1.2423572808504104, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0055, | |
| "reward": 0.44162888526916505, | |
| "reward_std": 1.226283924281597, | |
| "rewards/reward_func": 0.44162888526916505, | |
| "step": 7280, | |
| "toxic_reward": 3.9729990482330324 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 49.725, | |
| "epoch": 1.7225897920604916, | |
| "format_reward": 0.0, | |
| "grad_norm": 8.307239532470703, | |
| "image_reward": 0.2623565673828125, | |
| "kl": 0.795675303786993, | |
| "learning_rate": 5e-06, | |
| "loss": 0.005, | |
| "reward": 0.47723318338394166, | |
| "reward_std": 0.5881602220237255, | |
| "rewards/reward_func": 0.47723318338394166, | |
| "step": 7290, | |
| "toxic_reward": 4.611540603637695 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 49.125, | |
| "epoch": 1.724952741020794, | |
| "format_reward": 0.0, | |
| "grad_norm": 7.304860591888428, | |
| "image_reward": 0.2538177490234375, | |
| "kl": 1.0193208366632462, | |
| "learning_rate": 5e-06, | |
| "loss": -0.023, | |
| "reward": 0.17551978230476378, | |
| "reward_std": 0.5646818313747645, | |
| "rewards/reward_func": 0.17551978230476378, | |
| "step": 7300, | |
| "toxic_reward": 4.499063897132873 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 50.275, | |
| "epoch": 1.7273156899810964, | |
| "format_reward": 0.0, | |
| "grad_norm": 1.351771354675293, | |
| "image_reward": 0.2463592529296875, | |
| "kl": 1.9171950757503509, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0213, | |
| "reward": 0.466388076543808, | |
| "reward_std": 0.8451812721788883, | |
| "rewards/reward_func": 0.466388076543808, | |
| "step": 7310, | |
| "toxic_reward": 4.565359354019165 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 46.925, | |
| "epoch": 1.7296786389413987, | |
| "format_reward": -0.25, | |
| "grad_norm": 2.364166021347046, | |
| "image_reward": 0.25976969450712206, | |
| "kl": 0.5259292095899581, | |
| "learning_rate": 5e-06, | |
| "loss": -0.084, | |
| "reward": 0.45669102370738984, | |
| "reward_std": 1.098591622710228, | |
| "rewards/reward_func": 0.45669102370738984, | |
| "step": 7320, | |
| "toxic_reward": 4.627902317047119 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 50.825, | |
| "epoch": 1.7320415879017013, | |
| "format_reward": 0.0, | |
| "grad_norm": 23.96133804321289, | |
| "image_reward": 0.2389556884765625, | |
| "kl": 1.1734901428222657, | |
| "learning_rate": 5e-06, | |
| "loss": 0.038, | |
| "reward": 0.7277517914772034, | |
| "reward_std": 0.8356013357639313, | |
| "rewards/reward_func": 0.7277517914772034, | |
| "step": 7330, | |
| "toxic_reward": 4.407384157180786 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 44.0, | |
| "epoch": 1.7344045368620038, | |
| "format_reward": 0.0, | |
| "grad_norm": 17.774612426757812, | |
| "image_reward": 0.2680206298828125, | |
| "kl": 4.040656617283821, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0436, | |
| "reward": 0.2283779501914978, | |
| "reward_std": 0.34994165217503903, | |
| "rewards/reward_func": 0.2283779501914978, | |
| "step": 7340, | |
| "toxic_reward": 4.637366437911988 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 42.25, | |
| "epoch": 1.7367674858223063, | |
| "format_reward": -0.25, | |
| "grad_norm": 12.662446022033691, | |
| "image_reward": 0.24230550229549408, | |
| "kl": 0.5235348105430603, | |
| "learning_rate": 5e-06, | |
| "loss": -0.042, | |
| "reward": 0.8077804684638977, | |
| "reward_std": 1.315062115341425, | |
| "rewards/reward_func": 0.8077804684638977, | |
| "step": 7350, | |
| "toxic_reward": 4.6036452293396 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 49.75, | |
| "epoch": 1.7391304347826086, | |
| "format_reward": 0.0, | |
| "grad_norm": 2.7947723865509033, | |
| "image_reward": 0.259271240234375, | |
| "kl": 0.5023418068885803, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0101, | |
| "reward": 0.47644210457801817, | |
| "reward_std": 0.6371240261942148, | |
| "rewards/reward_func": 0.47644210457801817, | |
| "step": 7360, | |
| "toxic_reward": 4.352305841445923 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 53.975, | |
| "epoch": 1.7414933837429112, | |
| "format_reward": 0.0, | |
| "grad_norm": 6.967306137084961, | |
| "image_reward": 0.2540252685546875, | |
| "kl": 0.5360975474119186, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0708, | |
| "reward": 0.5753240287303925, | |
| "reward_std": 0.8622719066217541, | |
| "rewards/reward_func": 0.5753240287303925, | |
| "step": 7370, | |
| "toxic_reward": 4.0306689739227295 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 44.75, | |
| "epoch": 1.7438563327032135, | |
| "format_reward": -0.5, | |
| "grad_norm": 31.72753143310547, | |
| "image_reward": 0.22639973908662797, | |
| "kl": 0.5255977511405945, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0039, | |
| "reward": -0.24822215884923934, | |
| "reward_std": 1.6855425260961057, | |
| "rewards/reward_func": -0.24822215884923934, | |
| "step": 7380, | |
| "toxic_reward": 3.752596640586853 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 52.375, | |
| "epoch": 1.746219281663516, | |
| "format_reward": 0.0, | |
| "grad_norm": 7.6846818923950195, | |
| "image_reward": 0.2564056396484375, | |
| "kl": 3.591386225819588, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0212, | |
| "reward": 0.12304354310035706, | |
| "reward_std": 0.8115306086838245, | |
| "rewards/reward_func": 0.12304354310035706, | |
| "step": 7390, | |
| "toxic_reward": 3.613353615999222 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 49.65, | |
| "epoch": 1.7485822306238186, | |
| "format_reward": 0.0, | |
| "grad_norm": 2.726175308227539, | |
| "image_reward": 0.283404541015625, | |
| "kl": 0.9659576997160911, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0252, | |
| "reward": 0.3961315780878067, | |
| "reward_std": 1.0492550559341907, | |
| "rewards/reward_func": 0.3961315780878067, | |
| "step": 7400, | |
| "toxic_reward": 3.501691198348999 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 41.95, | |
| "epoch": 1.750945179584121, | |
| "format_reward": 0.0, | |
| "grad_norm": 3.0125391483306885, | |
| "image_reward": 0.2607086181640625, | |
| "kl": 0.6532519310712814, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0276, | |
| "reward": 0.4769218623638153, | |
| "reward_std": 0.6247519843280316, | |
| "rewards/reward_func": 0.4769218623638153, | |
| "step": 7410, | |
| "toxic_reward": 4.560657954216003 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 56.425, | |
| "epoch": 1.7533081285444234, | |
| "format_reward": 0.0, | |
| "grad_norm": 18.774812698364258, | |
| "image_reward": 0.24655609130859374, | |
| "kl": 1.901971572637558, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0145, | |
| "reward": 0.6345466494560241, | |
| "reward_std": 1.1331901341676711, | |
| "rewards/reward_func": 0.6345466494560241, | |
| "step": 7420, | |
| "toxic_reward": 4.449591946601868 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 52.375, | |
| "epoch": 1.755671077504726, | |
| "format_reward": 0.0, | |
| "grad_norm": 4.1103057861328125, | |
| "image_reward": 0.265411376953125, | |
| "kl": 1.7676091372966767, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0034, | |
| "reward": 0.6921305894851685, | |
| "reward_std": 0.6238477535545826, | |
| "rewards/reward_func": 0.6921305894851685, | |
| "step": 7430, | |
| "toxic_reward": 3.859569197893143 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 46.65, | |
| "epoch": 1.7580340264650283, | |
| "format_reward": 0.0, | |
| "grad_norm": 2.0048232078552246, | |
| "image_reward": 0.24298477172851562, | |
| "kl": 4.202221667766571, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1367, | |
| "reward": 0.9155125916004181, | |
| "reward_std": 0.7328770853579044, | |
| "rewards/reward_func": 0.9155125916004181, | |
| "step": 7440, | |
| "toxic_reward": 4.531428098678589 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 38.25, | |
| "epoch": 1.7603969754253308, | |
| "format_reward": 0.0, | |
| "grad_norm": 20.737003326416016, | |
| "image_reward": 0.25689697265625, | |
| "kl": 16.54909121990204, | |
| "learning_rate": 5e-06, | |
| "loss": 0.037, | |
| "reward": 0.8588055372238159, | |
| "reward_std": 0.9005012600682676, | |
| "rewards/reward_func": 0.8588055372238159, | |
| "step": 7450, | |
| "toxic_reward": 4.513736462593078 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 50.7, | |
| "epoch": 1.7627599243856333, | |
| "format_reward": 0.0, | |
| "grad_norm": 1.5940968990325928, | |
| "image_reward": 0.26309814453125, | |
| "kl": 2.317558985948563, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0018, | |
| "reward": 0.20084644556045533, | |
| "reward_std": 0.7237232834100723, | |
| "rewards/reward_func": 0.20084644556045533, | |
| "step": 7460, | |
| "toxic_reward": 4.334891009330749 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 44.325, | |
| "epoch": 1.7651228733459359, | |
| "format_reward": 0.0, | |
| "grad_norm": 10.07941722869873, | |
| "image_reward": 0.2528594970703125, | |
| "kl": 1.3212820410728454, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0444, | |
| "reward": 1.2387877494096755, | |
| "reward_std": 0.8179315060377121, | |
| "rewards/reward_func": 1.2387877494096755, | |
| "step": 7470, | |
| "toxic_reward": 4.363593196868896 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 49.825, | |
| "epoch": 1.7674858223062382, | |
| "format_reward": 0.0, | |
| "grad_norm": 28.392396926879883, | |
| "image_reward": 0.25749053955078127, | |
| "kl": 2.198029878735542, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0322, | |
| "reward": 0.1901194632053375, | |
| "reward_std": 0.5339192871004343, | |
| "rewards/reward_func": 0.1901194632053375, | |
| "step": 7480, | |
| "toxic_reward": 4.514597225189209 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 44.7, | |
| "epoch": 1.7698487712665405, | |
| "format_reward": 0.0, | |
| "grad_norm": 26.77941131591797, | |
| "image_reward": 0.241033935546875, | |
| "kl": 6.588536351919174, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0218, | |
| "reward": 0.2174743801355362, | |
| "reward_std": 0.8413432762026787, | |
| "rewards/reward_func": 0.2174743801355362, | |
| "step": 7490, | |
| "toxic_reward": 4.284235906600952 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 42.55, | |
| "epoch": 1.772211720226843, | |
| "format_reward": -0.25, | |
| "grad_norm": 18.408794403076172, | |
| "image_reward": 0.2581207275390625, | |
| "kl": 3.106099420785904, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0257, | |
| "reward": 0.31152122020721434, | |
| "reward_std": 1.2936958684585989, | |
| "rewards/reward_func": 0.31152122020721434, | |
| "step": 7500, | |
| "toxic_reward": 4.310132288932801 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 40.475, | |
| "epoch": 1.7745746691871456, | |
| "format_reward": 0.0, | |
| "grad_norm": 32.10823440551758, | |
| "image_reward": 0.232781982421875, | |
| "kl": 11.768871355056763, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0201, | |
| "reward": 1.5193881750106812, | |
| "reward_std": 0.8748866233974695, | |
| "rewards/reward_func": 1.5193881750106812, | |
| "step": 7510, | |
| "toxic_reward": 4.612711477279663 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 45.575, | |
| "epoch": 1.776937618147448, | |
| "format_reward": 0.0, | |
| "grad_norm": 10.912269592285156, | |
| "image_reward": 0.235693359375, | |
| "kl": 2.0526355147361754, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1284, | |
| "reward": 1.3539286196231841, | |
| "reward_std": 0.9052736334502697, | |
| "rewards/reward_func": 1.3539286196231841, | |
| "step": 7520, | |
| "toxic_reward": 4.487947154045105 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 46.575, | |
| "epoch": 1.7793005671077504, | |
| "format_reward": 0.0, | |
| "grad_norm": 13.928491592407227, | |
| "image_reward": 0.2556488037109375, | |
| "kl": 21.833010697364806, | |
| "learning_rate": 5e-06, | |
| "loss": -0.1174, | |
| "reward": 0.5344179272651672, | |
| "reward_std": 0.7245766028761864, | |
| "rewards/reward_func": 0.5344179272651672, | |
| "step": 7530, | |
| "toxic_reward": 4.373207831382752 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 46.975, | |
| "epoch": 1.781663516068053, | |
| "format_reward": 0.0, | |
| "grad_norm": 8.675307273864746, | |
| "image_reward": 0.24088897705078124, | |
| "kl": 1.3107800006866455, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0403, | |
| "reward": 0.04000200629234314, | |
| "reward_std": 1.0572677969932556, | |
| "rewards/reward_func": 0.04000200629234314, | |
| "step": 7540, | |
| "toxic_reward": 4.048869323730469 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 40.975, | |
| "epoch": 1.7840264650283553, | |
| "format_reward": -0.25, | |
| "grad_norm": 3.656561851501465, | |
| "image_reward": 0.24349263608455657, | |
| "kl": 3.6083962321281433, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0326, | |
| "reward": 0.10396124720573426, | |
| "reward_std": 1.0819443106651305, | |
| "rewards/reward_func": 0.10396124720573426, | |
| "step": 7550, | |
| "toxic_reward": 4.45411868095398 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 40.6, | |
| "epoch": 1.7863894139886578, | |
| "format_reward": 0.0, | |
| "grad_norm": 2.0053718090057373, | |
| "image_reward": 0.280938720703125, | |
| "kl": 1.8616322338581086, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0035, | |
| "reward": 0.602351513504982, | |
| "reward_std": 0.8774395015090704, | |
| "rewards/reward_func": 0.602351513504982, | |
| "step": 7560, | |
| "toxic_reward": 3.8221758723258974 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 49.15, | |
| "epoch": 1.7887523629489603, | |
| "format_reward": 0.0, | |
| "grad_norm": 6.999305248260498, | |
| "image_reward": 0.24803619384765624, | |
| "kl": 1.7729626595973969, | |
| "learning_rate": 5e-06, | |
| "loss": -0.004, | |
| "reward": 0.33846797943115237, | |
| "reward_std": 0.587756198644638, | |
| "rewards/reward_func": 0.33846797943115237, | |
| "step": 7570, | |
| "toxic_reward": 4.2568159103393555 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 38.375, | |
| "epoch": 1.7911153119092629, | |
| "format_reward": 0.0, | |
| "grad_norm": 12.467576026916504, | |
| "image_reward": 0.2431640625, | |
| "kl": 1.3180940926074982, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0225, | |
| "reward": 0.6568324744701386, | |
| "reward_std": 0.5710492163896561, | |
| "rewards/reward_func": 0.6568324744701386, | |
| "step": 7580, | |
| "toxic_reward": 4.575870084762573 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 48.675, | |
| "epoch": 1.7934782608695652, | |
| "format_reward": 0.0, | |
| "grad_norm": 41.636165618896484, | |
| "image_reward": 0.2553070068359375, | |
| "kl": 1.2196908950805665, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0276, | |
| "reward": 0.9933471500873565, | |
| "reward_std": 0.8478576868772507, | |
| "rewards/reward_func": 0.9933471500873565, | |
| "step": 7590, | |
| "toxic_reward": 4.177789008617401 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 36.3, | |
| "epoch": 1.7958412098298677, | |
| "format_reward": 0.0, | |
| "grad_norm": 8.115588188171387, | |
| "image_reward": 0.27226715087890624, | |
| "kl": 5.791901814937591, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0022, | |
| "reward": 0.3163196682929993, | |
| "reward_std": 0.8629786409437656, | |
| "rewards/reward_func": 0.3163196682929993, | |
| "step": 7600, | |
| "toxic_reward": 3.73489425778389 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 49.95, | |
| "epoch": 1.79820415879017, | |
| "format_reward": 0.0, | |
| "grad_norm": 2.3679516315460205, | |
| "image_reward": 0.2376190185546875, | |
| "kl": 4.311083900928497, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0175, | |
| "reward": 0.6290358543395996, | |
| "reward_std": 1.0244077319279312, | |
| "rewards/reward_func": 0.6290358543395996, | |
| "step": 7610, | |
| "toxic_reward": 4.054656505584717 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 49.35, | |
| "epoch": 1.8005671077504726, | |
| "format_reward": 0.0, | |
| "grad_norm": 2.1850380897521973, | |
| "image_reward": 0.247137451171875, | |
| "kl": 3.278796100616455, | |
| "learning_rate": 5e-06, | |
| "loss": 0.056, | |
| "reward": 1.2004601210355759, | |
| "reward_std": 0.7055684822611511, | |
| "rewards/reward_func": 1.2004601210355759, | |
| "step": 7620, | |
| "toxic_reward": 3.5256235122680666 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 39.75, | |
| "epoch": 1.802930056710775, | |
| "format_reward": -0.25, | |
| "grad_norm": 3.8605425357818604, | |
| "image_reward": 0.24492238312959672, | |
| "kl": 7.6126263558864595, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0959, | |
| "reward": 0.37777516841888426, | |
| "reward_std": 1.1775035494938493, | |
| "rewards/reward_func": 0.37777516841888426, | |
| "step": 7630, | |
| "toxic_reward": 4.522587513923645 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 45.375, | |
| "epoch": 1.8052930056710776, | |
| "format_reward": -0.25, | |
| "grad_norm": 6.144404411315918, | |
| "image_reward": 0.241168212890625, | |
| "kl": 1.456436914205551, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0231, | |
| "reward": 0.36865578293800355, | |
| "reward_std": 1.7164668783545494, | |
| "rewards/reward_func": 0.36865578293800355, | |
| "step": 7640, | |
| "toxic_reward": 3.9745461702346803 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 47.775, | |
| "epoch": 1.80765595463138, | |
| "format_reward": 0.0, | |
| "grad_norm": 33.95363998413086, | |
| "image_reward": 0.2472625732421875, | |
| "kl": 2.2694355845451355, | |
| "learning_rate": 5e-06, | |
| "loss": -0.1037, | |
| "reward": 0.8588967323303223, | |
| "reward_std": 1.019287913478911, | |
| "rewards/reward_func": 0.8588967323303223, | |
| "step": 7650, | |
| "toxic_reward": 4.213714742660523 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 50.35, | |
| "epoch": 1.8100189035916823, | |
| "format_reward": -0.25, | |
| "grad_norm": 6.865695953369141, | |
| "image_reward": 0.25559844970703127, | |
| "kl": 6.3857537567615505, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0924, | |
| "reward": -0.07846069931983948, | |
| "reward_std": 1.1336833463981748, | |
| "rewards/reward_func": -0.07846069931983948, | |
| "step": 7660, | |
| "toxic_reward": 4.2933889627456665 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 53.825, | |
| "epoch": 1.8123818525519848, | |
| "format_reward": 0.0, | |
| "grad_norm": 2.160090923309326, | |
| "image_reward": 0.2841064453125, | |
| "kl": 5.202520692348481, | |
| "learning_rate": 5e-06, | |
| "loss": 0.006, | |
| "reward": 1.153634887933731, | |
| "reward_std": 1.2888424217700958, | |
| "rewards/reward_func": 1.153634887933731, | |
| "step": 7670, | |
| "toxic_reward": 3.994613242149353 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 50.975, | |
| "epoch": 1.8147448015122873, | |
| "format_reward": 0.0, | |
| "grad_norm": 3.903553009033203, | |
| "image_reward": 0.26880950927734376, | |
| "kl": 63.095464119315146, | |
| "learning_rate": 5e-06, | |
| "loss": 0.001, | |
| "reward": 1.0250155806541443, | |
| "reward_std": 0.7393251709640026, | |
| "rewards/reward_func": 1.0250155806541443, | |
| "step": 7680, | |
| "toxic_reward": 3.694820535182953 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 43.15, | |
| "epoch": 1.8171077504725899, | |
| "format_reward": 0.0, | |
| "grad_norm": 15.986948013305664, | |
| "image_reward": 0.2573333740234375, | |
| "kl": 3.8690971970558166, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0365, | |
| "reward": 0.9120604813098907, | |
| "reward_std": 0.8725108332931996, | |
| "rewards/reward_func": 0.9120604813098907, | |
| "step": 7690, | |
| "toxic_reward": 4.068342316150665 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 46.65, | |
| "epoch": 1.8194706994328924, | |
| "format_reward": 0.0, | |
| "grad_norm": 2.521322727203369, | |
| "image_reward": 0.232586669921875, | |
| "kl": 1.3404993683099746, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0105, | |
| "reward": 0.08191419243812562, | |
| "reward_std": 0.6063120868057013, | |
| "rewards/reward_func": 0.08191419243812562, | |
| "step": 7700, | |
| "toxic_reward": 4.285334658622742 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 54.65, | |
| "epoch": 1.8218336483931947, | |
| "format_reward": 0.0, | |
| "grad_norm": 10.563508033752441, | |
| "image_reward": 0.251318359375, | |
| "kl": 4.375722473859787, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0574, | |
| "reward": 0.7043181240558625, | |
| "reward_std": 0.5366579249501229, | |
| "rewards/reward_func": 0.7043181240558625, | |
| "step": 7710, | |
| "toxic_reward": 4.468820595741272 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 47.175, | |
| "epoch": 1.824196597353497, | |
| "format_reward": -0.25, | |
| "grad_norm": 5.306228160858154, | |
| "image_reward": 0.25533854216337204, | |
| "kl": 1.2862511157989502, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0064, | |
| "reward": 0.2715910017490387, | |
| "reward_std": 1.3802445553243161, | |
| "rewards/reward_func": 0.2715910017490387, | |
| "step": 7720, | |
| "toxic_reward": 4.128815948963165 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 53.525, | |
| "epoch": 1.8265595463137996, | |
| "format_reward": 0.0, | |
| "grad_norm": 4.18682336807251, | |
| "image_reward": 0.22406005859375, | |
| "kl": 13.262214809656143, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0483, | |
| "reward": 0.43178263306617737, | |
| "reward_std": 0.5340902636758983, | |
| "rewards/reward_func": 0.43178263306617737, | |
| "step": 7730, | |
| "toxic_reward": 4.167550274729729 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 51.15, | |
| "epoch": 1.8289224952741021, | |
| "format_reward": 0.0, | |
| "grad_norm": 16.534120559692383, | |
| "image_reward": 0.2627288818359375, | |
| "kl": 4.888235807418823, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0231, | |
| "reward": 0.46792620718479155, | |
| "reward_std": 0.6471607919782401, | |
| "rewards/reward_func": 0.46792620718479155, | |
| "step": 7740, | |
| "toxic_reward": 4.068465518951416 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 45.225, | |
| "epoch": 1.8312854442344046, | |
| "format_reward": 0.0, | |
| "grad_norm": 10.179228782653809, | |
| "image_reward": 0.249444580078125, | |
| "kl": 3.951664477586746, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0106, | |
| "reward": 1.0039419054985046, | |
| "reward_std": 0.8490265306085348, | |
| "rewards/reward_func": 1.0039419054985046, | |
| "step": 7750, | |
| "toxic_reward": 4.28922358751297 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 43.0, | |
| "epoch": 1.833648393194707, | |
| "format_reward": 0.0, | |
| "grad_norm": 1.3173015117645264, | |
| "image_reward": 0.2619903564453125, | |
| "kl": 3.221765196323395, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0057, | |
| "reward": 0.5499142289161683, | |
| "reward_std": 0.8114865634590387, | |
| "rewards/reward_func": 0.5499142289161683, | |
| "step": 7760, | |
| "toxic_reward": 4.202396821975708 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 45.95, | |
| "epoch": 1.8360113421550095, | |
| "format_reward": -0.25, | |
| "grad_norm": 9.477835655212402, | |
| "image_reward": 0.275567626953125, | |
| "kl": 7.138307851552963, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0182, | |
| "reward": 0.5576439201831818, | |
| "reward_std": 1.718572654016316, | |
| "rewards/reward_func": 0.5576439201831818, | |
| "step": 7770, | |
| "toxic_reward": 3.892837381362915 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 55.325, | |
| "epoch": 1.8383742911153118, | |
| "format_reward": -0.25, | |
| "grad_norm": 15.364556312561035, | |
| "image_reward": 0.22845306396484374, | |
| "kl": 5.752876976132393, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0724, | |
| "reward": 0.4619426131248474, | |
| "reward_std": 0.994839246571064, | |
| "rewards/reward_func": 0.4619426131248474, | |
| "step": 7780, | |
| "toxic_reward": 4.777106142044067 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 62.75, | |
| "epoch": 1.8407372400756143, | |
| "format_reward": -0.25, | |
| "grad_norm": 13.07127571105957, | |
| "image_reward": 0.24772542268037795, | |
| "kl": 2.2956355273723603, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0286, | |
| "reward": 0.23627470731735228, | |
| "reward_std": 1.3513500357046724, | |
| "rewards/reward_func": 0.23627470731735228, | |
| "step": 7790, | |
| "toxic_reward": 3.998799777030945 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 54.675, | |
| "epoch": 1.8431001890359169, | |
| "format_reward": -0.25, | |
| "grad_norm": 1.4357541799545288, | |
| "image_reward": 0.2586761474609375, | |
| "kl": 2.018620651960373, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0136, | |
| "reward": 0.5957891523838044, | |
| "reward_std": 1.3981972932815552, | |
| "rewards/reward_func": 0.5957891523838044, | |
| "step": 7800, | |
| "toxic_reward": 3.74977787733078 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 50.9, | |
| "epoch": 1.8454631379962194, | |
| "format_reward": 0.0, | |
| "grad_norm": 12.382879257202148, | |
| "image_reward": 0.251861572265625, | |
| "kl": 2.0946659803390504, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0109, | |
| "reward": 0.2852811634540558, | |
| "reward_std": 0.7155913963913918, | |
| "rewards/reward_func": 0.2852811634540558, | |
| "step": 7810, | |
| "toxic_reward": 4.4501420021057125 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 48.125, | |
| "epoch": 1.8478260869565217, | |
| "format_reward": 0.0, | |
| "grad_norm": 4.438508987426758, | |
| "image_reward": 0.270166015625, | |
| "kl": 1.658120059967041, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0217, | |
| "reward": 0.8978000760078431, | |
| "reward_std": 1.2586904138326644, | |
| "rewards/reward_func": 0.8978000760078431, | |
| "step": 7820, | |
| "toxic_reward": 4.126551675796509 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 37.9, | |
| "epoch": 1.850189035916824, | |
| "format_reward": 0.0, | |
| "grad_norm": 1.4302005767822266, | |
| "image_reward": 0.2738861083984375, | |
| "kl": 1.6736175537109375, | |
| "learning_rate": 5e-06, | |
| "loss": -0.1503, | |
| "reward": 0.2234538435935974, | |
| "reward_std": 0.7356585245579481, | |
| "rewards/reward_func": 0.2234538435935974, | |
| "step": 7830, | |
| "toxic_reward": 3.9116656303405763 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 46.125, | |
| "epoch": 1.8525519848771266, | |
| "format_reward": 0.0, | |
| "grad_norm": 5.846213340759277, | |
| "image_reward": 0.2658111572265625, | |
| "kl": 1.36658373773098, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0164, | |
| "reward": -0.04418985247611999, | |
| "reward_std": 0.827529611485079, | |
| "rewards/reward_func": -0.04418985247611999, | |
| "step": 7840, | |
| "toxic_reward": 3.892432355880737 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 51.475, | |
| "epoch": 1.8549149338374291, | |
| "format_reward": 0.0, | |
| "grad_norm": 5.060561656951904, | |
| "image_reward": 0.23461151123046875, | |
| "kl": 0.5276786342263222, | |
| "learning_rate": 5e-06, | |
| "loss": -0.003, | |
| "reward": 0.7852385342121124, | |
| "reward_std": 0.9399228170514107, | |
| "rewards/reward_func": 0.7852385342121124, | |
| "step": 7850, | |
| "toxic_reward": 3.641209203004837 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 46.025, | |
| "epoch": 1.8572778827977316, | |
| "format_reward": 0.0, | |
| "grad_norm": 2.505263566970825, | |
| "image_reward": 0.259368896484375, | |
| "kl": 1.0133032470941543, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0581, | |
| "reward": 0.8989585757255554, | |
| "reward_std": 0.917613423243165, | |
| "rewards/reward_func": 0.8989585757255554, | |
| "step": 7860, | |
| "toxic_reward": 4.13404905796051 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 43.775, | |
| "epoch": 1.8596408317580342, | |
| "format_reward": 0.0, | |
| "grad_norm": 37.166500091552734, | |
| "image_reward": 0.25547637939453127, | |
| "kl": 2.1411855638027193, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1396, | |
| "reward": 0.21031073927879335, | |
| "reward_std": 0.6978237416595221, | |
| "rewards/reward_func": 0.21031073927879335, | |
| "step": 7870, | |
| "toxic_reward": 4.086808681488037 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 46.05, | |
| "epoch": 1.8620037807183365, | |
| "format_reward": 0.0, | |
| "grad_norm": 1.7183008193969727, | |
| "image_reward": 0.2409576416015625, | |
| "kl": 0.6873624622821808, | |
| "learning_rate": 5e-06, | |
| "loss": 0.033, | |
| "reward": 0.7567365884780883, | |
| "reward_std": 0.95932078063488, | |
| "rewards/reward_func": 0.7567365884780883, | |
| "step": 7880, | |
| "toxic_reward": 4.077835154533386 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 47.95, | |
| "epoch": 1.8643667296786388, | |
| "format_reward": 0.0, | |
| "grad_norm": 1.375571846961975, | |
| "image_reward": 0.251434326171875, | |
| "kl": 1.8803806602954865, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0674, | |
| "reward": 0.1468454658985138, | |
| "reward_std": 0.7339655995368958, | |
| "rewards/reward_func": 0.1468454658985138, | |
| "step": 7890, | |
| "toxic_reward": 4.2400289416313175 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 38.625, | |
| "epoch": 1.8667296786389413, | |
| "format_reward": 0.0, | |
| "grad_norm": 2.778831720352173, | |
| "image_reward": 0.273419189453125, | |
| "kl": 12.759307652711868, | |
| "learning_rate": 5e-06, | |
| "loss": 0.091, | |
| "reward": 0.2764736473560333, | |
| "reward_std": 0.6703889116644859, | |
| "rewards/reward_func": 0.2764736473560333, | |
| "step": 7900, | |
| "toxic_reward": 4.633634448051453 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 53.525, | |
| "epoch": 1.8690926275992439, | |
| "format_reward": 0.0, | |
| "grad_norm": 14.088724136352539, | |
| "image_reward": 0.23843803405761718, | |
| "kl": 2.752323019504547, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0523, | |
| "reward": 0.44507230520248414, | |
| "reward_std": 0.8451843298971653, | |
| "rewards/reward_func": 0.44507230520248414, | |
| "step": 7910, | |
| "toxic_reward": 3.7819975137710573 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 46.275, | |
| "epoch": 1.8714555765595464, | |
| "format_reward": 0.0, | |
| "grad_norm": 12.696130752563477, | |
| "image_reward": 0.25413665771484373, | |
| "kl": 2.022602713108063, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0137, | |
| "reward": 0.6168730854988098, | |
| "reward_std": 1.198334063589573, | |
| "rewards/reward_func": 0.6168730854988098, | |
| "step": 7920, | |
| "toxic_reward": 3.9178677558898927 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 51.75, | |
| "epoch": 1.8738185255198487, | |
| "format_reward": 0.0, | |
| "grad_norm": 6.441836357116699, | |
| "image_reward": 0.23479461669921875, | |
| "kl": 2.0815513670444488, | |
| "learning_rate": 5e-06, | |
| "loss": 0.099, | |
| "reward": 0.49921011328697207, | |
| "reward_std": 0.8878588248044252, | |
| "rewards/reward_func": 0.49921011328697207, | |
| "step": 7930, | |
| "toxic_reward": 4.21897873878479 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 44.575, | |
| "epoch": 1.8761814744801513, | |
| "format_reward": -0.25, | |
| "grad_norm": 17.865110397338867, | |
| "image_reward": 0.2600982666015625, | |
| "kl": 2.150428944826126, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0837, | |
| "reward": 0.48849809169769287, | |
| "reward_std": 1.4677658422850073, | |
| "rewards/reward_func": 0.48849809169769287, | |
| "step": 7940, | |
| "toxic_reward": 4.1003117799758915 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 49.925, | |
| "epoch": 1.8785444234404536, | |
| "format_reward": 0.0, | |
| "grad_norm": 6.697957992553711, | |
| "image_reward": 0.25945892333984377, | |
| "kl": 2.223458543419838, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0562, | |
| "reward": 0.20426468104124068, | |
| "reward_std": 0.5012361383065581, | |
| "rewards/reward_func": 0.20426468104124068, | |
| "step": 7950, | |
| "toxic_reward": 4.173866260051727 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 54.075, | |
| "epoch": 1.8809073724007561, | |
| "format_reward": 0.0, | |
| "grad_norm": 5.58077335357666, | |
| "image_reward": 0.25069351196289064, | |
| "kl": 8.21664493083954, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0792, | |
| "reward": 0.7380830064415932, | |
| "reward_std": 1.2196707382798195, | |
| "rewards/reward_func": 0.7380830064415932, | |
| "step": 7960, | |
| "toxic_reward": 3.5894944429397584 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 43.475, | |
| "epoch": 1.8832703213610587, | |
| "format_reward": 0.0, | |
| "grad_norm": 8.384510040283203, | |
| "image_reward": 0.23923797607421876, | |
| "kl": 21.914335840940474, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0204, | |
| "reward": 0.8768561869859696, | |
| "reward_std": 0.7653445459902286, | |
| "rewards/reward_func": 0.8768561869859696, | |
| "step": 7970, | |
| "toxic_reward": 3.7090541243553163 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 54.075, | |
| "epoch": 1.8856332703213612, | |
| "format_reward": -0.25, | |
| "grad_norm": 3.9271442890167236, | |
| "image_reward": 0.2266026809811592, | |
| "kl": 1.7329542875289916, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0735, | |
| "reward": 0.006925755739212036, | |
| "reward_std": 1.2168598100543022, | |
| "rewards/reward_func": 0.006925755739212036, | |
| "step": 7980, | |
| "toxic_reward": 4.598031067848206 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 46.375, | |
| "epoch": 1.8879962192816635, | |
| "format_reward": 0.0, | |
| "grad_norm": 17.941791534423828, | |
| "image_reward": 0.24617818146944045, | |
| "kl": 3.56347342133522, | |
| "learning_rate": 5e-06, | |
| "loss": -0.1121, | |
| "reward": 0.15787817239761354, | |
| "reward_std": 0.5696724381297826, | |
| "rewards/reward_func": 0.15787817239761354, | |
| "step": 7990, | |
| "toxic_reward": 4.376770114898681 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 40.675, | |
| "epoch": 1.8903591682419658, | |
| "format_reward": 0.0, | |
| "grad_norm": 26.69174575805664, | |
| "image_reward": 0.2819636031985283, | |
| "kl": 7.143774968385697, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0619, | |
| "reward": 0.6548231065273284, | |
| "reward_std": 0.8737724728882312, | |
| "rewards/reward_func": 0.6548231065273284, | |
| "step": 8000, | |
| "toxic_reward": 4.582368350028991 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 51.625, | |
| "epoch": 1.8927221172022684, | |
| "format_reward": 0.0, | |
| "grad_norm": 7.003530025482178, | |
| "image_reward": 0.261529541015625, | |
| "kl": 9.618525552749634, | |
| "learning_rate": 5e-06, | |
| "loss": 0.051, | |
| "reward": -0.017962449789047243, | |
| "reward_std": 0.5481395080685616, | |
| "rewards/reward_func": -0.017962449789047243, | |
| "step": 8010, | |
| "toxic_reward": 4.220958662033081 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 42.3, | |
| "epoch": 1.8950850661625709, | |
| "format_reward": 0.0, | |
| "grad_norm": 7.718620777130127, | |
| "image_reward": 0.2414947509765625, | |
| "kl": 4.341373115777969, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0835, | |
| "reward": 0.38290356993675234, | |
| "reward_std": 0.9348091699182988, | |
| "rewards/reward_func": 0.38290356993675234, | |
| "step": 8020, | |
| "toxic_reward": 4.002734637260437 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 46.5, | |
| "epoch": 1.8974480151228734, | |
| "format_reward": 0.0, | |
| "grad_norm": 10.61853313446045, | |
| "image_reward": 0.2405120849609375, | |
| "kl": 18.692966318130495, | |
| "learning_rate": 5e-06, | |
| "loss": 0.019, | |
| "reward": 0.6024147510528565, | |
| "reward_std": 0.8250786025077105, | |
| "rewards/reward_func": 0.6024147510528565, | |
| "step": 8030, | |
| "toxic_reward": 4.069397926330566 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 53.95, | |
| "epoch": 1.899810964083176, | |
| "format_reward": -0.5, | |
| "grad_norm": 4.87439489364624, | |
| "image_reward": 0.26594645231962205, | |
| "kl": 4.881083369255066, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0141, | |
| "reward": 0.1986172914505005, | |
| "reward_std": 1.8204052031040192, | |
| "rewards/reward_func": 0.1986172914505005, | |
| "step": 8040, | |
| "toxic_reward": 4.146627187728882 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 46.075, | |
| "epoch": 1.9021739130434783, | |
| "format_reward": 0.0, | |
| "grad_norm": 19.0607852935791, | |
| "image_reward": 0.2584747314453125, | |
| "kl": 13.449040079116822, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0336, | |
| "reward": 0.09852480292320251, | |
| "reward_std": 0.37513242168352007, | |
| "rewards/reward_func": 0.09852480292320251, | |
| "step": 8050, | |
| "toxic_reward": 4.5937717914581295 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 45.125, | |
| "epoch": 1.9045368620037806, | |
| "format_reward": 0.0, | |
| "grad_norm": 4.807636260986328, | |
| "image_reward": 0.238970947265625, | |
| "kl": 9.84277012348175, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0099, | |
| "reward": 0.7841103792190551, | |
| "reward_std": 0.931809046678245, | |
| "rewards/reward_func": 0.7841103792190551, | |
| "step": 8060, | |
| "toxic_reward": 4.410308980941773 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 47.575, | |
| "epoch": 1.9068998109640831, | |
| "format_reward": 0.0, | |
| "grad_norm": 30.570436477661133, | |
| "image_reward": 0.2677642822265625, | |
| "kl": 11.538963747024535, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0571, | |
| "reward": 0.7513397336006165, | |
| "reward_std": 0.6926180317997932, | |
| "rewards/reward_func": 0.7513397336006165, | |
| "step": 8070, | |
| "toxic_reward": 4.325729882717132 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 49.925, | |
| "epoch": 1.9092627599243857, | |
| "format_reward": 0.0, | |
| "grad_norm": 3.387159824371338, | |
| "image_reward": 0.2416534423828125, | |
| "kl": 36.42685050964356, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0984, | |
| "reward": 1.0627863883972168, | |
| "reward_std": 0.9809991672635079, | |
| "rewards/reward_func": 1.0627863883972168, | |
| "step": 8080, | |
| "toxic_reward": 4.355536723136902 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 51.875, | |
| "epoch": 1.9116257088846882, | |
| "format_reward": 0.0, | |
| "grad_norm": 8.823395729064941, | |
| "image_reward": 0.2377349853515625, | |
| "kl": 14.548263192176819, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0671, | |
| "reward": 0.2453417807817459, | |
| "reward_std": 0.8620891466736793, | |
| "rewards/reward_func": 0.2453417807817459, | |
| "step": 8090, | |
| "toxic_reward": 4.175139570236206 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 53.7, | |
| "epoch": 1.9139886578449905, | |
| "format_reward": -0.25, | |
| "grad_norm": 3.934446334838867, | |
| "image_reward": 0.2485321044921875, | |
| "kl": 476.8398398399353, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1697, | |
| "reward": -0.03726454377174378, | |
| "reward_std": 1.2227270498871803, | |
| "rewards/reward_func": -0.03726454377174378, | |
| "step": 8100, | |
| "toxic_reward": 4.573867344856263 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 47.8, | |
| "epoch": 1.916351606805293, | |
| "format_reward": 0.0, | |
| "grad_norm": 11.159741401672363, | |
| "image_reward": 0.2699676513671875, | |
| "kl": 5.77522222995758, | |
| "learning_rate": 5e-06, | |
| "loss": -0.1138, | |
| "reward": 0.4612575590610504, | |
| "reward_std": 0.5476422467269003, | |
| "rewards/reward_func": 0.4612575590610504, | |
| "step": 8110, | |
| "toxic_reward": 4.567614626884461 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 48.575, | |
| "epoch": 1.9187145557655954, | |
| "format_reward": 0.0, | |
| "grad_norm": 11.536759376525879, | |
| "image_reward": 0.24012298583984376, | |
| "kl": 6.318757677078247, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0103, | |
| "reward": 0.9109591245651245, | |
| "reward_std": 1.29407604560256, | |
| "rewards/reward_func": 0.9109591245651245, | |
| "step": 8120, | |
| "toxic_reward": 4.05263090133667 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 58.575, | |
| "epoch": 1.9210775047258979, | |
| "format_reward": 0.0, | |
| "grad_norm": 6.833136558532715, | |
| "image_reward": 0.2688323974609375, | |
| "kl": 6.9808355331420895, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0913, | |
| "reward": 0.9232870817184449, | |
| "reward_std": 0.8357461627572775, | |
| "rewards/reward_func": 0.9232870817184449, | |
| "step": 8130, | |
| "toxic_reward": 4.430827951431274 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 50.9, | |
| "epoch": 1.9234404536862004, | |
| "format_reward": 0.0, | |
| "grad_norm": 14.8239164352417, | |
| "image_reward": 0.2474365234375, | |
| "kl": 137.28185538053512, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0189, | |
| "reward": 0.401202654838562, | |
| "reward_std": 0.4000473257154226, | |
| "rewards/reward_func": 0.401202654838562, | |
| "step": 8140, | |
| "toxic_reward": 4.723700523376465 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 45.175, | |
| "epoch": 1.925803402646503, | |
| "format_reward": 0.0, | |
| "grad_norm": 1.823515772819519, | |
| "image_reward": 0.22316131591796876, | |
| "kl": 14.130688643455505, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0239, | |
| "reward": 1.057025855779648, | |
| "reward_std": 0.9014536026865244, | |
| "rewards/reward_func": 1.057025855779648, | |
| "step": 8150, | |
| "toxic_reward": 4.387946319580078 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 54.875, | |
| "epoch": 1.9281663516068053, | |
| "format_reward": -0.25, | |
| "grad_norm": 46.756038665771484, | |
| "image_reward": 0.26631062775850295, | |
| "kl": 6.435283923149109, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0328, | |
| "reward": 0.22599496245384215, | |
| "reward_std": 1.4984263110905887, | |
| "rewards/reward_func": 0.22599496245384215, | |
| "step": 8160, | |
| "toxic_reward": 4.138309001922607 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 53.45, | |
| "epoch": 1.9305293005671076, | |
| "format_reward": 0.0, | |
| "grad_norm": 34.66867446899414, | |
| "image_reward": 0.25106658935546877, | |
| "kl": 1020.1139773368835, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0999, | |
| "reward": 0.7446302771568298, | |
| "reward_std": 0.906285472586751, | |
| "rewards/reward_func": 0.7446302771568298, | |
| "step": 8170, | |
| "toxic_reward": 4.375624704360962 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 49.475, | |
| "epoch": 1.9328922495274101, | |
| "format_reward": 0.0, | |
| "grad_norm": 2.081218957901001, | |
| "image_reward": 0.242840576171875, | |
| "kl": 3.102721667289734, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0571, | |
| "reward": 0.5706271648406982, | |
| "reward_std": 0.9108416954986751, | |
| "rewards/reward_func": 0.5706271648406982, | |
| "step": 8180, | |
| "toxic_reward": 3.2474088430404664 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 40.475, | |
| "epoch": 1.9352551984877127, | |
| "format_reward": -0.25, | |
| "grad_norm": 13.313660621643066, | |
| "image_reward": 0.27943929135799406, | |
| "kl": 9.811500716209412, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0091, | |
| "reward": -0.0842776358127594, | |
| "reward_std": 1.1166115825995804, | |
| "rewards/reward_func": -0.0842776358127594, | |
| "step": 8190, | |
| "toxic_reward": 4.493260765075684 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 49.225, | |
| "epoch": 1.9376181474480152, | |
| "format_reward": 0.0, | |
| "grad_norm": 11.93384838104248, | |
| "image_reward": 0.2549346923828125, | |
| "kl": 13.695673048496246, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0083, | |
| "reward": 0.5832914412021637, | |
| "reward_std": 0.7408401468303054, | |
| "rewards/reward_func": 0.5832914412021637, | |
| "step": 8200, | |
| "toxic_reward": 4.143073153495789 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 44.125, | |
| "epoch": 1.9399810964083177, | |
| "format_reward": 0.0, | |
| "grad_norm": 6.53907585144043, | |
| "image_reward": 0.2403350830078125, | |
| "kl": 6.522427618503571, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1325, | |
| "reward": 0.1342033863067627, | |
| "reward_std": 0.7933921405114234, | |
| "rewards/reward_func": 0.1342033863067627, | |
| "step": 8210, | |
| "toxic_reward": 4.601714444160462 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 44.95, | |
| "epoch": 1.94234404536862, | |
| "format_reward": 0.0, | |
| "grad_norm": 23.774093627929688, | |
| "image_reward": 0.25664520263671875, | |
| "kl": 5.8061746001243595, | |
| "learning_rate": 5e-06, | |
| "loss": -0.1029, | |
| "reward": 0.6099749207496643, | |
| "reward_std": 1.0578389540314674, | |
| "rewards/reward_func": 0.6099749207496643, | |
| "step": 8220, | |
| "toxic_reward": 3.542074370384216 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 44.525, | |
| "epoch": 1.9447069943289224, | |
| "format_reward": 0.0, | |
| "grad_norm": 19.021333694458008, | |
| "image_reward": 0.25049285888671874, | |
| "kl": 4.400176310539246, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0805, | |
| "reward": 0.271647572517395, | |
| "reward_std": 0.8572761943563819, | |
| "rewards/reward_func": 0.271647572517395, | |
| "step": 8230, | |
| "toxic_reward": 4.576322746276856 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 48.4, | |
| "epoch": 1.947069943289225, | |
| "format_reward": -0.25, | |
| "grad_norm": 12.740744590759277, | |
| "image_reward": 0.2639495849609375, | |
| "kl": 53.72892454862595, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0517, | |
| "reward": 0.4684752345085144, | |
| "reward_std": 1.5598361855372787, | |
| "rewards/reward_func": 0.4684752345085144, | |
| "step": 8240, | |
| "toxic_reward": 4.280627131462097 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 42.05, | |
| "epoch": 1.9494328922495274, | |
| "format_reward": 0.0, | |
| "grad_norm": 8.727499961853027, | |
| "image_reward": 0.273443603515625, | |
| "kl": 9.401781392097472, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0578, | |
| "reward": 0.6043965280056, | |
| "reward_std": 0.7762668525800109, | |
| "rewards/reward_func": 0.6043965280056, | |
| "step": 8250, | |
| "toxic_reward": 4.007175719738006 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 39.175, | |
| "epoch": 1.95179584120983, | |
| "format_reward": -0.25, | |
| "grad_norm": 21.95665740966797, | |
| "image_reward": 0.2783833831548691, | |
| "kl": 6.502747631072998, | |
| "learning_rate": 5e-06, | |
| "loss": -0.1757, | |
| "reward": 0.7646288216114044, | |
| "reward_std": 1.2125793328508734, | |
| "rewards/reward_func": 0.7646288216114044, | |
| "step": 8260, | |
| "toxic_reward": 4.438870096206665 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 47.375, | |
| "epoch": 1.9541587901701323, | |
| "format_reward": 0.0, | |
| "grad_norm": 19.78591537475586, | |
| "image_reward": 0.2660888671875, | |
| "kl": 101.96959731578826, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0584, | |
| "reward": 0.8457072794437408, | |
| "reward_std": 0.8602423138916493, | |
| "rewards/reward_func": 0.8457072794437408, | |
| "step": 8270, | |
| "toxic_reward": 4.274328458309173 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 49.325, | |
| "epoch": 1.9565217391304348, | |
| "format_reward": -0.25, | |
| "grad_norm": 7.575157642364502, | |
| "image_reward": 0.26166178435087206, | |
| "kl": 7.8605184674263, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0592, | |
| "reward": 0.7780414521694183, | |
| "reward_std": 1.34521058909595, | |
| "rewards/reward_func": 0.7780414521694183, | |
| "step": 8280, | |
| "toxic_reward": 4.395621502399445 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 45.0, | |
| "epoch": 1.9588846880907371, | |
| "format_reward": 0.0, | |
| "grad_norm": 13.91838550567627, | |
| "image_reward": 0.2494293212890625, | |
| "kl": 3.4681380152702332, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0285, | |
| "reward": 1.0126874148845673, | |
| "reward_std": 0.884580178745091, | |
| "rewards/reward_func": 1.0126874148845673, | |
| "step": 8290, | |
| "toxic_reward": 4.259213161468506 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 39.275, | |
| "epoch": 1.9612476370510397, | |
| "format_reward": -0.5, | |
| "grad_norm": 11.346104621887207, | |
| "image_reward": 0.24504598081111909, | |
| "kl": 17.73236060142517, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0501, | |
| "reward": -0.41139370799064634, | |
| "reward_std": 1.535068777576089, | |
| "rewards/reward_func": -0.41139370799064634, | |
| "step": 8300, | |
| "toxic_reward": 4.184125363826752 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 47.1, | |
| "epoch": 1.9636105860113422, | |
| "format_reward": 0.0, | |
| "grad_norm": 7.8980631828308105, | |
| "image_reward": 0.2494049072265625, | |
| "kl": 1.3982277452945708, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0632, | |
| "reward": 0.7493218898773193, | |
| "reward_std": 0.7001253291964531, | |
| "rewards/reward_func": 0.7493218898773193, | |
| "step": 8310, | |
| "toxic_reward": 4.593434143066406 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 46.025, | |
| "epoch": 1.9659735349716447, | |
| "format_reward": 0.0, | |
| "grad_norm": 1.629384994506836, | |
| "image_reward": 0.2574554443359375, | |
| "kl": 9.406988048553467, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0561, | |
| "reward": 0.6752925157546997, | |
| "reward_std": 1.2529858350753784, | |
| "rewards/reward_func": 0.6752925157546997, | |
| "step": 8320, | |
| "toxic_reward": 3.6643527030944822 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 42.725, | |
| "epoch": 1.968336483931947, | |
| "format_reward": 0.0, | |
| "grad_norm": 6.693783283233643, | |
| "image_reward": 0.24935455322265626, | |
| "kl": 4.6708708822727205, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0369, | |
| "reward": 1.2317909479141236, | |
| "reward_std": 1.4201693460345268, | |
| "rewards/reward_func": 1.2317909479141236, | |
| "step": 8330, | |
| "toxic_reward": 3.705500102043152 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 43.675, | |
| "epoch": 1.9706994328922496, | |
| "format_reward": 0.0, | |
| "grad_norm": 13.678855895996094, | |
| "image_reward": 0.26453857421875, | |
| "kl": 1.7596666514873505, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0084, | |
| "reward": 0.6438661813735962, | |
| "reward_std": 0.5453263748437166, | |
| "rewards/reward_func": 0.6438661813735962, | |
| "step": 8340, | |
| "toxic_reward": 4.577846193313599 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 44.55, | |
| "epoch": 1.973062381852552, | |
| "format_reward": 0.0, | |
| "grad_norm": 5.530174255371094, | |
| "image_reward": 0.2434234619140625, | |
| "kl": 16.00339319705963, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0858, | |
| "reward": 0.7399854481220245, | |
| "reward_std": 0.5954274158924818, | |
| "rewards/reward_func": 0.7399854481220245, | |
| "step": 8350, | |
| "toxic_reward": 4.567293620109558 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 44.8, | |
| "epoch": 1.9754253308128544, | |
| "format_reward": -0.25, | |
| "grad_norm": 23.65260124206543, | |
| "image_reward": 0.2397003173828125, | |
| "kl": 413.27391294240954, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0452, | |
| "reward": 0.21110110878944396, | |
| "reward_std": 1.2717279449105263, | |
| "rewards/reward_func": 0.21110110878944396, | |
| "step": 8360, | |
| "toxic_reward": 4.29474036693573 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 58.15, | |
| "epoch": 1.977788279773157, | |
| "format_reward": 0.0, | |
| "grad_norm": 8.489328384399414, | |
| "image_reward": 0.2703460693359375, | |
| "kl": 11.292006134986877, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0396, | |
| "reward": 0.522923594713211, | |
| "reward_std": 0.6722989223897458, | |
| "rewards/reward_func": 0.522923594713211, | |
| "step": 8370, | |
| "toxic_reward": 4.362267994880677 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 53.7, | |
| "epoch": 1.9801512287334595, | |
| "format_reward": 0.0, | |
| "grad_norm": 14.112800598144531, | |
| "image_reward": 0.2404998779296875, | |
| "kl": 5.943003642559051, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0066, | |
| "reward": 1.046756339073181, | |
| "reward_std": 1.401267148554325, | |
| "rewards/reward_func": 1.046756339073181, | |
| "step": 8380, | |
| "toxic_reward": 4.379712152481079 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 45.075, | |
| "epoch": 1.9825141776937618, | |
| "format_reward": 0.0, | |
| "grad_norm": 15.351452827453613, | |
| "image_reward": 0.2679229736328125, | |
| "kl": 2.1231451511383055, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0874, | |
| "reward": 0.044296592473983765, | |
| "reward_std": 0.7907688375562429, | |
| "rewards/reward_func": 0.044296592473983765, | |
| "step": 8390, | |
| "toxic_reward": 4.44194188117981 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 52.8, | |
| "epoch": 1.9848771266540641, | |
| "format_reward": 0.0, | |
| "grad_norm": 14.493269920349121, | |
| "image_reward": 0.23359222412109376, | |
| "kl": 15.598973235487938, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0523, | |
| "reward": 0.6035852313041687, | |
| "reward_std": 0.7898097388446331, | |
| "rewards/reward_func": 0.6035852313041687, | |
| "step": 8400, | |
| "toxic_reward": 4.0595218420028685 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 49.025, | |
| "epoch": 1.9872400756143667, | |
| "format_reward": 0.0, | |
| "grad_norm": 2.004755735397339, | |
| "image_reward": 0.23458099365234375, | |
| "kl": 13.407473123073578, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0054, | |
| "reward": 0.5494411200284958, | |
| "reward_std": 0.5586541540920734, | |
| "rewards/reward_func": 0.5494411200284958, | |
| "step": 8410, | |
| "toxic_reward": 4.175926774740219 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 50.325, | |
| "epoch": 1.9896030245746692, | |
| "format_reward": 0.0, | |
| "grad_norm": 9.598527908325195, | |
| "image_reward": 0.301202392578125, | |
| "kl": 4.9204403221607205, | |
| "learning_rate": 5e-06, | |
| "loss": -0.001, | |
| "reward": 0.4649462789297104, | |
| "reward_std": 0.7171205889433623, | |
| "rewards/reward_func": 0.4649462789297104, | |
| "step": 8420, | |
| "toxic_reward": 3.910860872268677 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 57.825, | |
| "epoch": 1.9919659735349717, | |
| "format_reward": 0.0, | |
| "grad_norm": 2.607607841491699, | |
| "image_reward": 0.2745025634765625, | |
| "kl": 9.545298218727112, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0779, | |
| "reward": 0.43806184232234957, | |
| "reward_std": 0.8561135273426771, | |
| "rewards/reward_func": 0.43806184232234957, | |
| "step": 8430, | |
| "toxic_reward": 4.119659066200256 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 42.8, | |
| "epoch": 1.994328922495274, | |
| "format_reward": -0.25, | |
| "grad_norm": 1.9090756177902222, | |
| "image_reward": 0.264605712890625, | |
| "kl": 1.142916288971901, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0035, | |
| "reward": -0.06725225448608399, | |
| "reward_std": 1.1679431475698947, | |
| "rewards/reward_func": -0.06725225448608399, | |
| "step": 8440, | |
| "toxic_reward": 4.506508493423462 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 45.625, | |
| "epoch": 1.9966918714555766, | |
| "format_reward": -0.25, | |
| "grad_norm": 1.924688458442688, | |
| "image_reward": 0.247418212890625, | |
| "kl": 1.739441803097725, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0847, | |
| "reward": 0.2795759916305542, | |
| "reward_std": 1.532812624052167, | |
| "rewards/reward_func": 0.2795759916305542, | |
| "step": 8450, | |
| "toxic_reward": 3.7154327273368835 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 50.075, | |
| "epoch": 1.999054820415879, | |
| "format_reward": 0.0, | |
| "grad_norm": 3.183807373046875, | |
| "image_reward": 0.259228515625, | |
| "kl": 1.071340024471283, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0299, | |
| "reward": 1.3993828475475312, | |
| "reward_std": 1.1979968290776015, | |
| "rewards/reward_func": 1.3993828475475312, | |
| "step": 8460, | |
| "toxic_reward": 4.236328482627869 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 47.325, | |
| "epoch": 2.0014177693761814, | |
| "format_reward": 0.0, | |
| "grad_norm": 7.500320911407471, | |
| "image_reward": 0.2599090576171875, | |
| "kl": 1.2782041728496552, | |
| "learning_rate": 5e-06, | |
| "loss": 0.046, | |
| "reward": 1.2368434906005858, | |
| "reward_std": 1.188733378984034, | |
| "rewards/reward_func": 1.2368434906005858, | |
| "step": 8470, | |
| "toxic_reward": 3.8694416284561157 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 42.575, | |
| "epoch": 2.003780718336484, | |
| "format_reward": 0.0, | |
| "grad_norm": 3.4954817295074463, | |
| "image_reward": 0.25406494140625, | |
| "kl": 2.6761809453368186, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0561, | |
| "reward": 0.3607616722583771, | |
| "reward_std": 0.599818766117096, | |
| "rewards/reward_func": 0.3607616722583771, | |
| "step": 8480, | |
| "toxic_reward": 4.048572421073914 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 54.05, | |
| "epoch": 2.0061436672967865, | |
| "format_reward": 0.0, | |
| "grad_norm": 5.18286657333374, | |
| "image_reward": 0.22822036743164062, | |
| "kl": 2.461097413301468, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0261, | |
| "reward": 0.2195432722568512, | |
| "reward_std": 0.7936036609113216, | |
| "rewards/reward_func": 0.2195432722568512, | |
| "step": 8490, | |
| "toxic_reward": 4.110178589820862 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 41.9, | |
| "epoch": 2.008506616257089, | |
| "format_reward": -0.75, | |
| "grad_norm": 2.6953821182250977, | |
| "image_reward": 0.238427734375, | |
| "kl": 1.0251432090997696, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0027, | |
| "reward": -0.4569409370422363, | |
| "reward_std": 1.0821652268990873, | |
| "rewards/reward_func": -0.4569409370422363, | |
| "step": 8500, | |
| "toxic_reward": 4.185848736763001 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 52.4, | |
| "epoch": 2.010869565217391, | |
| "format_reward": 0.0, | |
| "grad_norm": 6.174482822418213, | |
| "image_reward": 0.245025634765625, | |
| "kl": 570.9768789380789, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0155, | |
| "reward": 0.634968101978302, | |
| "reward_std": 0.5698891028761863, | |
| "rewards/reward_func": 0.634968101978302, | |
| "step": 8510, | |
| "toxic_reward": 4.557809638977051 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 37.525, | |
| "epoch": 2.0132325141776937, | |
| "format_reward": 0.0, | |
| "grad_norm": 12.716261863708496, | |
| "image_reward": 0.2716217041015625, | |
| "kl": 1.0744814962148665, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0371, | |
| "reward": 0.8971363306045532, | |
| "reward_std": 1.0540940549224616, | |
| "rewards/reward_func": 0.8971363306045532, | |
| "step": 8520, | |
| "toxic_reward": 4.03425624370575 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 47.725, | |
| "epoch": 2.015595463137996, | |
| "format_reward": 0.0, | |
| "grad_norm": 11.573805809020996, | |
| "image_reward": 0.25701904296875, | |
| "kl": 1.1612621247768402, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0256, | |
| "reward": 0.4974235534667969, | |
| "reward_std": 0.7099893309175969, | |
| "rewards/reward_func": 0.4974235534667969, | |
| "step": 8530, | |
| "toxic_reward": 4.757012367248535 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 50.325, | |
| "epoch": 2.0179584120982987, | |
| "format_reward": 0.0, | |
| "grad_norm": 2.5829172134399414, | |
| "image_reward": 0.23183441162109375, | |
| "kl": 1.0122918039560318, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0224, | |
| "reward": 0.5489160656929016, | |
| "reward_std": 0.4481811560690403, | |
| "rewards/reward_func": 0.5489160656929016, | |
| "step": 8540, | |
| "toxic_reward": 4.330148541927338 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 45.6, | |
| "epoch": 2.0203213610586013, | |
| "format_reward": -0.25, | |
| "grad_norm": 8.287252426147461, | |
| "image_reward": 0.2674835205078125, | |
| "kl": 1.138858178257942, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0238, | |
| "reward": -0.09747375845909119, | |
| "reward_std": 0.8301142632961274, | |
| "rewards/reward_func": -0.09747375845909119, | |
| "step": 8550, | |
| "toxic_reward": 4.7045900344848635 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 49.65, | |
| "epoch": 2.022684310018904, | |
| "format_reward": 0.0, | |
| "grad_norm": 9.928176879882812, | |
| "image_reward": 0.2459930419921875, | |
| "kl": 1.701068675518036, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0446, | |
| "reward": 0.5473175823688508, | |
| "reward_std": 0.7223521884530782, | |
| "rewards/reward_func": 0.5473175823688508, | |
| "step": 8560, | |
| "toxic_reward": 4.571657824516296 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 41.725, | |
| "epoch": 2.025047258979206, | |
| "format_reward": -0.25, | |
| "grad_norm": 5.9600677490234375, | |
| "image_reward": 0.26257222443819045, | |
| "kl": 2.5904053121805193, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0163, | |
| "reward": -0.20251348614692688, | |
| "reward_std": 0.9303808398544788, | |
| "rewards/reward_func": -0.20251348614692688, | |
| "step": 8570, | |
| "toxic_reward": 4.581225419044495 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 45.55, | |
| "epoch": 2.0274102079395084, | |
| "format_reward": 0.0, | |
| "grad_norm": 3.309791088104248, | |
| "image_reward": 0.23095855712890626, | |
| "kl": 1.5135916233062745, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0068, | |
| "reward": 0.21151033639907837, | |
| "reward_std": 0.7603108703624457, | |
| "rewards/reward_func": 0.21151033639907837, | |
| "step": 8580, | |
| "toxic_reward": 4.328943312168121 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 47.5, | |
| "epoch": 2.029773156899811, | |
| "format_reward": 0.0, | |
| "grad_norm": 8.408251762390137, | |
| "image_reward": 0.2651763916015625, | |
| "kl": 0.6560351371765136, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0997, | |
| "reward": 0.012960964441299438, | |
| "reward_std": 0.35295800119638443, | |
| "rewards/reward_func": 0.012960964441299438, | |
| "step": 8590, | |
| "toxic_reward": 4.5852957487106325 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 45.425, | |
| "epoch": 2.0321361058601135, | |
| "format_reward": 0.0, | |
| "grad_norm": 10.234503746032715, | |
| "image_reward": 0.242572021484375, | |
| "kl": 1.6390519708395004, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0652, | |
| "reward": 0.49767774939537046, | |
| "reward_std": 0.8766103692352771, | |
| "rewards/reward_func": 0.49767774939537046, | |
| "step": 8600, | |
| "toxic_reward": 4.273276591300965 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 55.525, | |
| "epoch": 2.034499054820416, | |
| "format_reward": 0.0, | |
| "grad_norm": 31.749767303466797, | |
| "image_reward": 0.24173736572265625, | |
| "kl": 1.4096274197101593, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0081, | |
| "reward": 0.4087996184825897, | |
| "reward_std": 0.9022964790463448, | |
| "rewards/reward_func": 0.4087996184825897, | |
| "step": 8610, | |
| "toxic_reward": 4.324217915534973 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 54.825, | |
| "epoch": 2.036862003780718, | |
| "format_reward": -0.25, | |
| "grad_norm": 12.874961853027344, | |
| "image_reward": 0.2652323395013809, | |
| "kl": 1.0484755635261536, | |
| "learning_rate": 5e-06, | |
| "loss": 0.027, | |
| "reward": -0.2117618590593338, | |
| "reward_std": 1.249949687719345, | |
| "rewards/reward_func": -0.2117618590593338, | |
| "step": 8620, | |
| "toxic_reward": 3.5184057116508485 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 46.375, | |
| "epoch": 2.0392249527410207, | |
| "format_reward": 0.0, | |
| "grad_norm": 1.5234144926071167, | |
| "image_reward": 0.2357269287109375, | |
| "kl": 1.2848842471837998, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0752, | |
| "reward": 0.716532975435257, | |
| "reward_std": 0.89201683960855, | |
| "rewards/reward_func": 0.716532975435257, | |
| "step": 8630, | |
| "toxic_reward": 4.4441750049591064 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 56.125, | |
| "epoch": 2.041587901701323, | |
| "format_reward": 0.0, | |
| "grad_norm": 1.1807531118392944, | |
| "image_reward": 0.25293731689453125, | |
| "kl": 2.8808428183197976, | |
| "learning_rate": 5e-06, | |
| "loss": 0.023, | |
| "reward": 0.9091297924518585, | |
| "reward_std": 0.7464996237307787, | |
| "rewards/reward_func": 0.9091297924518585, | |
| "step": 8640, | |
| "toxic_reward": 4.224450874328613 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 40.15, | |
| "epoch": 2.0439508506616257, | |
| "format_reward": 0.0, | |
| "grad_norm": 10.838650703430176, | |
| "image_reward": 0.23937225341796875, | |
| "kl": 2.5997998148202894, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0419, | |
| "reward": 0.3074700653553009, | |
| "reward_std": 0.8474891871213913, | |
| "rewards/reward_func": 0.3074700653553009, | |
| "step": 8650, | |
| "toxic_reward": 4.419002604484558 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 40.3, | |
| "epoch": 2.0463137996219283, | |
| "format_reward": 0.0, | |
| "grad_norm": 2.063800811767578, | |
| "image_reward": 0.2351318359375, | |
| "kl": 2.429117688536644, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0507, | |
| "reward": 0.464035177230835, | |
| "reward_std": 0.8178490117192269, | |
| "rewards/reward_func": 0.464035177230835, | |
| "step": 8660, | |
| "toxic_reward": 4.122921991348266 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 50.725, | |
| "epoch": 2.048676748582231, | |
| "format_reward": -0.25, | |
| "grad_norm": 5.179421424865723, | |
| "image_reward": 0.2340398147702217, | |
| "kl": 1.4141836494207383, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0321, | |
| "reward": 0.2651833713054657, | |
| "reward_std": 1.3055690463632346, | |
| "rewards/reward_func": 0.2651833713054657, | |
| "step": 8670, | |
| "toxic_reward": 4.099702596664429 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 49.0, | |
| "epoch": 2.051039697542533, | |
| "format_reward": 0.0, | |
| "grad_norm": 5.729818344116211, | |
| "image_reward": 0.22962646484375, | |
| "kl": 1.0476927325129508, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0396, | |
| "reward": 0.5858624681830407, | |
| "reward_std": 0.8647454358637333, | |
| "rewards/reward_func": 0.5858624681830407, | |
| "step": 8680, | |
| "toxic_reward": 3.923676002025604 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 43.575, | |
| "epoch": 2.0534026465028354, | |
| "format_reward": -0.25, | |
| "grad_norm": 33.29255294799805, | |
| "image_reward": 0.23504893034696578, | |
| "kl": 0.5507122159004212, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0042, | |
| "reward": 0.5358553946018219, | |
| "reward_std": 1.371009534597397, | |
| "rewards/reward_func": 0.5358553946018219, | |
| "step": 8690, | |
| "toxic_reward": 4.1810872793197635 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 44.425, | |
| "epoch": 2.055765595463138, | |
| "format_reward": 0.0, | |
| "grad_norm": 9.838844299316406, | |
| "image_reward": 0.2368377685546875, | |
| "kl": 0.8958913296461105, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0191, | |
| "reward": 0.8707101225852967, | |
| "reward_std": 1.2157209530472755, | |
| "rewards/reward_func": 0.8707101225852967, | |
| "step": 8700, | |
| "toxic_reward": 3.7892824172973634 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 50.275, | |
| "epoch": 2.0581285444234405, | |
| "format_reward": 0.0, | |
| "grad_norm": 18.979665756225586, | |
| "image_reward": 0.259954833984375, | |
| "kl": 1.8286799043416977, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0646, | |
| "reward": 0.3829235196113586, | |
| "reward_std": 0.9690108880400657, | |
| "rewards/reward_func": 0.3829235196113586, | |
| "step": 8710, | |
| "toxic_reward": 4.264838469028473 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 50.35, | |
| "epoch": 2.060491493383743, | |
| "format_reward": 0.0, | |
| "grad_norm": 6.838248252868652, | |
| "image_reward": 0.22672042846679688, | |
| "kl": 0.7338828861713409, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0185, | |
| "reward": 0.827715927362442, | |
| "reward_std": 0.9109129812568426, | |
| "rewards/reward_func": 0.827715927362442, | |
| "step": 8720, | |
| "toxic_reward": 4.2410869836807255 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 48.725, | |
| "epoch": 2.0628544423440456, | |
| "format_reward": 0.0, | |
| "grad_norm": 4.239810943603516, | |
| "image_reward": 0.25128173828125, | |
| "kl": 5.776644492149353, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0179, | |
| "reward": 0.5484015077352524, | |
| "reward_std": 1.311685237288475, | |
| "rewards/reward_func": 0.5484015077352524, | |
| "step": 8730, | |
| "toxic_reward": 4.056830906867981 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 43.75, | |
| "epoch": 2.0652173913043477, | |
| "format_reward": 0.0, | |
| "grad_norm": 11.002195358276367, | |
| "image_reward": 0.229833984375, | |
| "kl": 1.1322214603424072, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1107, | |
| "reward": -0.03462121486663818, | |
| "reward_std": 0.43875638470053674, | |
| "rewards/reward_func": -0.03462121486663818, | |
| "step": 8740, | |
| "toxic_reward": 4.620968174934387 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 54.975, | |
| "epoch": 2.06758034026465, | |
| "format_reward": -0.25, | |
| "grad_norm": 11.761068344116211, | |
| "image_reward": 0.227862548828125, | |
| "kl": 7.682415267825126, | |
| "learning_rate": 5e-06, | |
| "loss": -0.09, | |
| "reward": 0.26341341733932494, | |
| "reward_std": 1.4870022028684615, | |
| "rewards/reward_func": 0.26341341733932494, | |
| "step": 8750, | |
| "toxic_reward": 4.433785676956177 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 49.425, | |
| "epoch": 2.0699432892249527, | |
| "format_reward": 0.0, | |
| "grad_norm": 5.119037628173828, | |
| "image_reward": 0.2395660400390625, | |
| "kl": 1.2481903672218322, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0746, | |
| "reward": 0.3276951313018799, | |
| "reward_std": 0.46017137840390204, | |
| "rewards/reward_func": 0.3276951313018799, | |
| "step": 8760, | |
| "toxic_reward": 4.593200016021728 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 44.775, | |
| "epoch": 2.0723062381852553, | |
| "format_reward": 0.0, | |
| "grad_norm": 1.704590916633606, | |
| "image_reward": 0.2639801025390625, | |
| "kl": 2.778309851884842, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0417, | |
| "reward": 0.30720534920692444, | |
| "reward_std": 0.6144355796277523, | |
| "rewards/reward_func": 0.30720534920692444, | |
| "step": 8770, | |
| "toxic_reward": 4.481031203269959 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 47.75, | |
| "epoch": 2.074669187145558, | |
| "format_reward": 0.0, | |
| "grad_norm": 11.392171859741211, | |
| "image_reward": 0.2711700439453125, | |
| "kl": 2.4740293115377425, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0428, | |
| "reward": 0.318191659450531, | |
| "reward_std": 0.6928373419679701, | |
| "rewards/reward_func": 0.318191659450531, | |
| "step": 8780, | |
| "toxic_reward": 4.502946138381958 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 45.85, | |
| "epoch": 2.07703213610586, | |
| "format_reward": 0.0, | |
| "grad_norm": 11.103386878967285, | |
| "image_reward": 0.2586761474609375, | |
| "kl": 0.8142087966203689, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0231, | |
| "reward": 0.5202795565128326, | |
| "reward_std": 0.9188865400850773, | |
| "rewards/reward_func": 0.5202795565128326, | |
| "step": 8790, | |
| "toxic_reward": 4.268853735923767 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 53.825, | |
| "epoch": 2.0793950850661624, | |
| "format_reward": -0.5, | |
| "grad_norm": 3.831815719604492, | |
| "image_reward": 0.23363494873046875, | |
| "kl": 3.2880129516124725, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0507, | |
| "reward": -0.17698687314987183, | |
| "reward_std": 1.4326126247644424, | |
| "rewards/reward_func": -0.17698687314987183, | |
| "step": 8800, | |
| "toxic_reward": 4.581766486167908 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 42.95, | |
| "epoch": 2.081758034026465, | |
| "format_reward": -0.5, | |
| "grad_norm": 13.328118324279785, | |
| "image_reward": 0.283209228515625, | |
| "kl": 2.583532452583313, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1117, | |
| "reward": 0.2989026606082916, | |
| "reward_std": 2.0480130195617674, | |
| "rewards/reward_func": 0.2989026606082916, | |
| "step": 8810, | |
| "toxic_reward": 3.5884770512580872 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 47.125, | |
| "epoch": 2.0841209829867675, | |
| "format_reward": 0.0, | |
| "grad_norm": 1.780765414237976, | |
| "image_reward": 0.242584228515625, | |
| "kl": 1.3252637952566146, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0244, | |
| "reward": 0.29446207284927367, | |
| "reward_std": 1.0302367629483342, | |
| "rewards/reward_func": 0.29446207284927367, | |
| "step": 8820, | |
| "toxic_reward": 4.3270234823226925 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 50.55, | |
| "epoch": 2.08648393194707, | |
| "format_reward": 0.0, | |
| "grad_norm": 0.9498596787452698, | |
| "image_reward": 0.25778045654296877, | |
| "kl": 1.5434826999902724, | |
| "learning_rate": 5e-06, | |
| "loss": -0.1175, | |
| "reward": 0.414847657084465, | |
| "reward_std": 0.7169051881879568, | |
| "rewards/reward_func": 0.414847657084465, | |
| "step": 8830, | |
| "toxic_reward": 4.253011137247086 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 42.925, | |
| "epoch": 2.0888468809073726, | |
| "format_reward": 0.0, | |
| "grad_norm": 8.519845008850098, | |
| "image_reward": 0.23509521484375, | |
| "kl": 0.5451234139502048, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1295, | |
| "reward": 0.510816776752472, | |
| "reward_std": 0.6249840931501239, | |
| "rewards/reward_func": 0.510816776752472, | |
| "step": 8840, | |
| "toxic_reward": 4.746308994293213 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 46.225, | |
| "epoch": 2.0912098298676747, | |
| "format_reward": 0.0, | |
| "grad_norm": 3.2223246097564697, | |
| "image_reward": 0.23397216796875, | |
| "kl": 1.196854567527771, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0121, | |
| "reward": 0.430766886472702, | |
| "reward_std": 1.1830935038626194, | |
| "rewards/reward_func": 0.430766886472702, | |
| "step": 8850, | |
| "toxic_reward": 4.010055112838745 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 50.1, | |
| "epoch": 2.093572778827977, | |
| "format_reward": 0.0, | |
| "grad_norm": 4.516735553741455, | |
| "image_reward": 0.248101806640625, | |
| "kl": 1.3512360364198686, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0064, | |
| "reward": 0.4724120795726776, | |
| "reward_std": 1.2229724466800689, | |
| "rewards/reward_func": 0.4724120795726776, | |
| "step": 8860, | |
| "toxic_reward": 4.219368410110474 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 51.45, | |
| "epoch": 2.0959357277882797, | |
| "format_reward": 0.0, | |
| "grad_norm": 10.528098106384277, | |
| "image_reward": 0.2487030029296875, | |
| "kl": 0.8361154735088349, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0096, | |
| "reward": 0.481815043091774, | |
| "reward_std": 0.8121814839541912, | |
| "rewards/reward_func": 0.481815043091774, | |
| "step": 8870, | |
| "toxic_reward": 4.385925316810608 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 42.625, | |
| "epoch": 2.0982986767485823, | |
| "format_reward": 0.0, | |
| "grad_norm": 14.012845993041992, | |
| "image_reward": 0.26658477783203127, | |
| "kl": 5.801792293787003, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0543, | |
| "reward": 0.8176810801029205, | |
| "reward_std": 0.9641741991043091, | |
| "rewards/reward_func": 0.8176810801029205, | |
| "step": 8880, | |
| "toxic_reward": 4.4868937015533445 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 58.625, | |
| "epoch": 2.100661625708885, | |
| "format_reward": 0.0, | |
| "grad_norm": 1.7836425304412842, | |
| "image_reward": 0.219366455078125, | |
| "kl": 2.3325648605823517, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0747, | |
| "reward": 0.07698584794998169, | |
| "reward_std": 0.7939232878386975, | |
| "rewards/reward_func": 0.07698584794998169, | |
| "step": 8890, | |
| "toxic_reward": 3.8968687295913695 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 35.1, | |
| "epoch": 2.1030245746691874, | |
| "format_reward": 0.0, | |
| "grad_norm": 4.322965145111084, | |
| "image_reward": 0.2381072998046875, | |
| "kl": 1.5042289346456528, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0396, | |
| "reward": 0.6448795169591903, | |
| "reward_std": 1.071408730885014, | |
| "rewards/reward_func": 0.6448795169591903, | |
| "step": 8900, | |
| "toxic_reward": 4.1689093708992 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 46.6, | |
| "epoch": 2.1053875236294894, | |
| "format_reward": -0.25, | |
| "grad_norm": 3.794384002685547, | |
| "image_reward": 0.2495122268795967, | |
| "kl": 1.5198310285806655, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0187, | |
| "reward": -0.018249320983886718, | |
| "reward_std": 0.9345291556790472, | |
| "rewards/reward_func": -0.018249320983886718, | |
| "step": 8910, | |
| "toxic_reward": 4.52795147895813 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 53.425, | |
| "epoch": 2.107750472589792, | |
| "format_reward": 0.0, | |
| "grad_norm": 5.023624420166016, | |
| "image_reward": 0.2479766845703125, | |
| "kl": 0.8687845975160599, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0294, | |
| "reward": 0.188352632522583, | |
| "reward_std": 0.6677849385887384, | |
| "rewards/reward_func": 0.188352632522583, | |
| "step": 8920, | |
| "toxic_reward": 4.531054210662842 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 50.625, | |
| "epoch": 2.1101134215500945, | |
| "format_reward": 0.0, | |
| "grad_norm": 2.3595223426818848, | |
| "image_reward": 0.2230987548828125, | |
| "kl": 8.144508588314057, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0584, | |
| "reward": 0.1267090529203415, | |
| "reward_std": 0.4783048752695322, | |
| "rewards/reward_func": 0.1267090529203415, | |
| "step": 8930, | |
| "toxic_reward": 4.553759598731995 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 48.075, | |
| "epoch": 2.112476370510397, | |
| "format_reward": 0.0, | |
| "grad_norm": 27.809850692749023, | |
| "image_reward": 0.2523773193359375, | |
| "kl": 0.7018902823328972, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0582, | |
| "reward": 0.3417531728744507, | |
| "reward_std": 0.6842773109674454, | |
| "rewards/reward_func": 0.3417531728744507, | |
| "step": 8940, | |
| "toxic_reward": 4.307323157787323 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 48.875, | |
| "epoch": 2.1148393194706996, | |
| "format_reward": -0.25, | |
| "grad_norm": 2.785470962524414, | |
| "image_reward": 0.2328338623046875, | |
| "kl": 0.866449561715126, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0235, | |
| "reward": 0.9232547760009766, | |
| "reward_std": 1.3794653311371803, | |
| "rewards/reward_func": 0.9232547760009766, | |
| "step": 8950, | |
| "toxic_reward": 4.619945740699768 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 41.5, | |
| "epoch": 2.1172022684310017, | |
| "format_reward": 0.0, | |
| "grad_norm": 7.097494602203369, | |
| "image_reward": 0.2396942138671875, | |
| "kl": 2.0439702540636064, | |
| "learning_rate": 5e-06, | |
| "loss": 0.072, | |
| "reward": 0.26967796087265017, | |
| "reward_std": 0.541577224060893, | |
| "rewards/reward_func": 0.26967796087265017, | |
| "step": 8960, | |
| "toxic_reward": 4.525745010375976 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 49.5, | |
| "epoch": 2.119565217391304, | |
| "format_reward": 0.0, | |
| "grad_norm": 9.064950942993164, | |
| "image_reward": 0.256915283203125, | |
| "kl": 1.2320655643939973, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0406, | |
| "reward": 0.2311327040195465, | |
| "reward_std": 0.5596455704420805, | |
| "rewards/reward_func": 0.2311327040195465, | |
| "step": 8970, | |
| "toxic_reward": 4.595108699798584 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 46.65, | |
| "epoch": 2.1219281663516067, | |
| "format_reward": 0.0, | |
| "grad_norm": 2.589348793029785, | |
| "image_reward": 0.2637451171875, | |
| "kl": 3.0987811207771303, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0007, | |
| "reward": 0.11190776824951172, | |
| "reward_std": 0.6281056736595929, | |
| "rewards/reward_func": 0.11190776824951172, | |
| "step": 8980, | |
| "toxic_reward": 3.8689257740974425 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 46.575, | |
| "epoch": 2.1242911153119093, | |
| "format_reward": 0.0, | |
| "grad_norm": 8.932865142822266, | |
| "image_reward": 0.246466064453125, | |
| "kl": 0.9722367227077484, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0272, | |
| "reward": 0.507748281955719, | |
| "reward_std": 0.49981794953346254, | |
| "rewards/reward_func": 0.507748281955719, | |
| "step": 8990, | |
| "toxic_reward": 4.460081267356872 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 52.45, | |
| "epoch": 2.126654064272212, | |
| "format_reward": 0.0, | |
| "grad_norm": 6.9373064041137695, | |
| "image_reward": 0.247686767578125, | |
| "kl": 1.1410144418478012, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0068, | |
| "reward": 0.7415260970592499, | |
| "reward_std": 0.7849105328321457, | |
| "rewards/reward_func": 0.7415260970592499, | |
| "step": 9000, | |
| "toxic_reward": 4.300376343727112 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 46.45, | |
| "epoch": 2.1290170132325144, | |
| "format_reward": 0.0, | |
| "grad_norm": 2.5460894107818604, | |
| "image_reward": 0.253936767578125, | |
| "kl": 0.8742299884557724, | |
| "learning_rate": 5e-06, | |
| "loss": 0.009, | |
| "reward": 0.2949145630002022, | |
| "reward_std": 0.8127535484731198, | |
| "rewards/reward_func": 0.2949145630002022, | |
| "step": 9010, | |
| "toxic_reward": 4.208229756355285 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 44.875, | |
| "epoch": 2.1313799621928164, | |
| "format_reward": 0.0, | |
| "grad_norm": 8.273791313171387, | |
| "image_reward": 0.23465576171875, | |
| "kl": 1.841402593255043, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0698, | |
| "reward": 0.2962026834487915, | |
| "reward_std": 0.5367021195590496, | |
| "rewards/reward_func": 0.2962026834487915, | |
| "step": 9020, | |
| "toxic_reward": 4.80813364982605 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 45.275, | |
| "epoch": 2.133742911153119, | |
| "format_reward": 0.0, | |
| "grad_norm": 1.396345853805542, | |
| "image_reward": 0.25015411376953123, | |
| "kl": 2.9800774693489074, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0132, | |
| "reward": 0.2012641340494156, | |
| "reward_std": 0.9129719872027635, | |
| "rewards/reward_func": 0.2012641340494156, | |
| "step": 9030, | |
| "toxic_reward": 4.130698096752167 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 51.85, | |
| "epoch": 2.1361058601134215, | |
| "format_reward": 0.0, | |
| "grad_norm": 6.688182830810547, | |
| "image_reward": 0.27446441650390624, | |
| "kl": 3.037346550822258, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0086, | |
| "reward": 0.7237348094582557, | |
| "reward_std": 0.9079257231205702, | |
| "rewards/reward_func": 0.7237348094582557, | |
| "step": 9040, | |
| "toxic_reward": 4.051010203361511 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 42.55, | |
| "epoch": 2.138468809073724, | |
| "format_reward": -0.25, | |
| "grad_norm": 2.8426876068115234, | |
| "image_reward": 0.26009623110294344, | |
| "kl": 234.93744373321533, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0741, | |
| "reward": 0.14779042601585388, | |
| "reward_std": 1.6020304949954152, | |
| "rewards/reward_func": 0.14779042601585388, | |
| "step": 9050, | |
| "toxic_reward": 4.297617936134339 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 46.55, | |
| "epoch": 2.1408317580340266, | |
| "format_reward": 0.0, | |
| "grad_norm": 14.305941581726074, | |
| "image_reward": 0.23194732666015624, | |
| "kl": 1.907991024851799, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0095, | |
| "reward": 0.5170632779598237, | |
| "reward_std": 0.8214797399006784, | |
| "rewards/reward_func": 0.5170632779598237, | |
| "step": 9060, | |
| "toxic_reward": 4.2402391791343685 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 44.0, | |
| "epoch": 2.143194706994329, | |
| "format_reward": 0.0, | |
| "grad_norm": 4.640130043029785, | |
| "image_reward": 0.2427520751953125, | |
| "kl": 7.810242688655853, | |
| "learning_rate": 5e-06, | |
| "loss": -0.032, | |
| "reward": 0.20102212131023406, | |
| "reward_std": 1.3208093732595443, | |
| "rewards/reward_func": 0.20102212131023406, | |
| "step": 9070, | |
| "toxic_reward": 3.7083510875701906 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 54.1, | |
| "epoch": 2.145557655954631, | |
| "format_reward": 0.0, | |
| "grad_norm": 1.3630574941635132, | |
| "image_reward": 0.2402252197265625, | |
| "kl": 10.548876631259919, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0112, | |
| "reward": 0.7412046194076538, | |
| "reward_std": 0.9147299766540528, | |
| "rewards/reward_func": 0.7412046194076538, | |
| "step": 9080, | |
| "toxic_reward": 4.000400519371032 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 39.2, | |
| "epoch": 2.1479206049149338, | |
| "format_reward": 0.0, | |
| "grad_norm": 2.9857964515686035, | |
| "image_reward": 0.2373077392578125, | |
| "kl": 11.153948432207107, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0525, | |
| "reward": 0.4043663561344147, | |
| "reward_std": 0.7483500481583178, | |
| "rewards/reward_func": 0.4043663561344147, | |
| "step": 9090, | |
| "toxic_reward": 4.85220890045166 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 54.05, | |
| "epoch": 2.1502835538752363, | |
| "format_reward": 0.0, | |
| "grad_norm": 2.3352339267730713, | |
| "image_reward": 0.2479736328125, | |
| "kl": 3.4772801220417024, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0421, | |
| "reward": 0.5415297746658325, | |
| "reward_std": 0.90726547986269, | |
| "rewards/reward_func": 0.5415297746658325, | |
| "step": 9100, | |
| "toxic_reward": 4.584239768981933 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 39.125, | |
| "epoch": 2.152646502835539, | |
| "format_reward": 0.0, | |
| "grad_norm": 7.961544990539551, | |
| "image_reward": 0.2632598876953125, | |
| "kl": 2.5744317561388015, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1083, | |
| "reward": 0.5891210317611695, | |
| "reward_std": 1.2839626222848892, | |
| "rewards/reward_func": 0.5891210317611695, | |
| "step": 9110, | |
| "toxic_reward": 4.181219959259034 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 49.075, | |
| "epoch": 2.1550094517958414, | |
| "format_reward": 0.0, | |
| "grad_norm": 9.345954895019531, | |
| "image_reward": 0.238812255859375, | |
| "kl": 6.054638743400574, | |
| "learning_rate": 5e-06, | |
| "loss": -0.1109, | |
| "reward": 0.06543984264135361, | |
| "reward_std": 0.519540898501873, | |
| "rewards/reward_func": 0.06543984264135361, | |
| "step": 9120, | |
| "toxic_reward": 4.270166897773743 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 40.875, | |
| "epoch": 2.1573724007561434, | |
| "format_reward": 0.0, | |
| "grad_norm": 8.313584327697754, | |
| "image_reward": 0.24713134765625, | |
| "kl": 6.326075008511543, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0767, | |
| "reward": 0.8043414294719696, | |
| "reward_std": 1.0881578013300897, | |
| "rewards/reward_func": 0.8043414294719696, | |
| "step": 9130, | |
| "toxic_reward": 4.155464768409729 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 53.075, | |
| "epoch": 2.159735349716446, | |
| "format_reward": 0.0, | |
| "grad_norm": 8.283196449279785, | |
| "image_reward": 0.2358612060546875, | |
| "kl": 2.606276285648346, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1205, | |
| "reward": 0.1571010023355484, | |
| "reward_std": 0.7534957839176059, | |
| "rewards/reward_func": 0.1571010023355484, | |
| "step": 9140, | |
| "toxic_reward": 4.338043940067291 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 49.65, | |
| "epoch": 2.1620982986767485, | |
| "format_reward": 0.0, | |
| "grad_norm": 12.468461036682129, | |
| "image_reward": 0.2525299072265625, | |
| "kl": 10.651741808652877, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0658, | |
| "reward": 0.499350106716156, | |
| "reward_std": 0.7922366757877171, | |
| "rewards/reward_func": 0.499350106716156, | |
| "step": 9150, | |
| "toxic_reward": 4.407905173301697 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 43.5, | |
| "epoch": 2.164461247637051, | |
| "format_reward": 0.0, | |
| "grad_norm": 1.4810751676559448, | |
| "image_reward": 0.22776641845703124, | |
| "kl": 6.478110730648041, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0464, | |
| "reward": 0.9298590540885925, | |
| "reward_std": 0.8800611793994904, | |
| "rewards/reward_func": 0.9298590540885925, | |
| "step": 9160, | |
| "toxic_reward": 4.576443719863891 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 52.875, | |
| "epoch": 2.1668241965973536, | |
| "format_reward": 0.0, | |
| "grad_norm": 9.652629852294922, | |
| "image_reward": 0.260015869140625, | |
| "kl": 1.726886612176895, | |
| "learning_rate": 5e-06, | |
| "loss": -0.127, | |
| "reward": 0.4531150579452515, | |
| "reward_std": 0.8976183220744133, | |
| "rewards/reward_func": 0.4531150579452515, | |
| "step": 9170, | |
| "toxic_reward": 4.432527303695679 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 45.0, | |
| "epoch": 2.169187145557656, | |
| "format_reward": 0.0, | |
| "grad_norm": 3.5050952434539795, | |
| "image_reward": 0.2384307861328125, | |
| "kl": 4.27691433429718, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0311, | |
| "reward": 0.6826965510845184, | |
| "reward_std": 0.7877496212720871, | |
| "rewards/reward_func": 0.6826965510845184, | |
| "step": 9180, | |
| "toxic_reward": 4.4837501525878904 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 52.1, | |
| "epoch": 2.171550094517958, | |
| "format_reward": 0.0, | |
| "grad_norm": 3.2137303352355957, | |
| "image_reward": 0.24414825439453125, | |
| "kl": 3.9786434292793276, | |
| "learning_rate": 5e-06, | |
| "loss": -0.048, | |
| "reward": 0.34388454258441925, | |
| "reward_std": 0.7642363490536809, | |
| "rewards/reward_func": 0.34388454258441925, | |
| "step": 9190, | |
| "toxic_reward": 4.380149924755097 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 45.8, | |
| "epoch": 2.1739130434782608, | |
| "format_reward": 0.0, | |
| "grad_norm": 10.09927749633789, | |
| "image_reward": 0.2380889892578125, | |
| "kl": 3.893726623058319, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0829, | |
| "reward": 0.28469178080558777, | |
| "reward_std": 0.7502976493909955, | |
| "rewards/reward_func": 0.28469178080558777, | |
| "step": 9200, | |
| "toxic_reward": 4.405940270423889 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 42.25, | |
| "epoch": 2.1762759924385633, | |
| "format_reward": -0.25, | |
| "grad_norm": 36.55522537231445, | |
| "image_reward": 0.23165105208754538, | |
| "kl": 25.98146269917488, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0734, | |
| "reward": -0.05637494325637817, | |
| "reward_std": 1.5777033947408199, | |
| "rewards/reward_func": -0.05637494325637817, | |
| "step": 9210, | |
| "toxic_reward": 4.138173961639405 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 47.45, | |
| "epoch": 2.178638941398866, | |
| "format_reward": 0.0, | |
| "grad_norm": 2.0444726943969727, | |
| "image_reward": 0.2312713623046875, | |
| "kl": 2.639026927947998, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0598, | |
| "reward": 0.5707060933113098, | |
| "reward_std": 1.1906714523211122, | |
| "rewards/reward_func": 0.5707060933113098, | |
| "step": 9220, | |
| "toxic_reward": 3.9467220425605776 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 44.725, | |
| "epoch": 2.1810018903591684, | |
| "format_reward": 0.0, | |
| "grad_norm": 14.821864128112793, | |
| "image_reward": 0.260614013671875, | |
| "kl": 2.6187705636024474, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0004, | |
| "reward": 0.776735657453537, | |
| "reward_std": 0.8302984148263931, | |
| "rewards/reward_func": 0.776735657453537, | |
| "step": 9230, | |
| "toxic_reward": 4.346326851844788 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 44.95, | |
| "epoch": 2.183364839319471, | |
| "format_reward": 0.0, | |
| "grad_norm": 8.473363876342773, | |
| "image_reward": 0.227203369140625, | |
| "kl": 1.9701256573200225, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0091, | |
| "reward": 0.6466103255748749, | |
| "reward_std": 0.5622012199833989, | |
| "rewards/reward_func": 0.6466103255748749, | |
| "step": 9240, | |
| "toxic_reward": 4.649976348876953 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 46.9, | |
| "epoch": 2.185727788279773, | |
| "format_reward": 0.0, | |
| "grad_norm": 16.42177391052246, | |
| "image_reward": 0.2678741455078125, | |
| "kl": 3.791227114200592, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0161, | |
| "reward": 0.5803273111581803, | |
| "reward_std": 0.8187548790127039, | |
| "rewards/reward_func": 0.5803273111581803, | |
| "step": 9250, | |
| "toxic_reward": 4.081698262691498 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 50.5, | |
| "epoch": 2.1880907372400755, | |
| "format_reward": 0.0, | |
| "grad_norm": 23.73421859741211, | |
| "image_reward": 0.227972412109375, | |
| "kl": 3.5688813447952272, | |
| "learning_rate": 5e-06, | |
| "loss": 0.052, | |
| "reward": 0.5074087619781494, | |
| "reward_std": 0.9598018784075976, | |
| "rewards/reward_func": 0.5074087619781494, | |
| "step": 9260, | |
| "toxic_reward": 4.401837420463562 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 60.425, | |
| "epoch": 2.190453686200378, | |
| "format_reward": 0.0, | |
| "grad_norm": 9.969075202941895, | |
| "image_reward": 0.245391845703125, | |
| "kl": 2.1753955483436584, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0984, | |
| "reward": 0.9439354777336121, | |
| "reward_std": 0.7434614159166812, | |
| "rewards/reward_func": 0.9439354777336121, | |
| "step": 9270, | |
| "toxic_reward": 4.398157930374145 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 54.325, | |
| "epoch": 2.1928166351606806, | |
| "format_reward": 0.0, | |
| "grad_norm": 11.830771446228027, | |
| "image_reward": 0.23084869384765624, | |
| "kl": 1.2656208366155624, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0047, | |
| "reward": 0.31412020325660706, | |
| "reward_std": 0.8172964336816222, | |
| "rewards/reward_func": 0.31412020325660706, | |
| "step": 9280, | |
| "toxic_reward": 4.430534148216248 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 44.475, | |
| "epoch": 2.195179584120983, | |
| "format_reward": 0.0, | |
| "grad_norm": 5.753904819488525, | |
| "image_reward": 0.24887237548828126, | |
| "kl": 1.3502902746200562, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0623, | |
| "reward": 0.7708447635173797, | |
| "reward_std": 0.9731228679418564, | |
| "rewards/reward_func": 0.7708447635173797, | |
| "step": 9290, | |
| "toxic_reward": 4.273838710784912 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 45.45, | |
| "epoch": 2.197542533081285, | |
| "format_reward": 0.0, | |
| "grad_norm": 7.7253947257995605, | |
| "image_reward": 0.2331573486328125, | |
| "kl": 3.059584191441536, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0942, | |
| "reward": 0.43726455271244047, | |
| "reward_std": 1.3362455716356636, | |
| "rewards/reward_func": 0.43726455271244047, | |
| "step": 9300, | |
| "toxic_reward": 4.275756049156189 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 40.525, | |
| "epoch": 2.1999054820415878, | |
| "format_reward": 0.0, | |
| "grad_norm": 14.917332649230957, | |
| "image_reward": 0.2485260009765625, | |
| "kl": 3.154623621702194, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0028, | |
| "reward": 0.7619877219200134, | |
| "reward_std": 0.7480236226692796, | |
| "rewards/reward_func": 0.7619877219200134, | |
| "step": 9310, | |
| "toxic_reward": 4.5632892370224 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 50.425, | |
| "epoch": 2.2022684310018903, | |
| "format_reward": 0.0, | |
| "grad_norm": 2.2232089042663574, | |
| "image_reward": 0.272552490234375, | |
| "kl": 4.570386919379234, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0767, | |
| "reward": -0.32990662753582, | |
| "reward_std": 0.6452065747231245, | |
| "rewards/reward_func": -0.32990662753582, | |
| "step": 9320, | |
| "toxic_reward": 4.057078433036804 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 51.475, | |
| "epoch": 2.204631379962193, | |
| "format_reward": 0.0, | |
| "grad_norm": 12.401063919067383, | |
| "image_reward": 0.22475687563419341, | |
| "kl": 2.3822293996810915, | |
| "learning_rate": 5e-06, | |
| "loss": -0.1368, | |
| "reward": 0.6720861852169037, | |
| "reward_std": 0.5467347849160433, | |
| "rewards/reward_func": 0.6720861852169037, | |
| "step": 9330, | |
| "toxic_reward": 4.748937749862671 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 46.725, | |
| "epoch": 2.2069943289224954, | |
| "format_reward": 0.0, | |
| "grad_norm": 18.317251205444336, | |
| "image_reward": 0.2457794189453125, | |
| "kl": 2.1576267421245574, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0443, | |
| "reward": 0.4097582340240479, | |
| "reward_std": 0.6367886804975569, | |
| "rewards/reward_func": 0.4097582340240479, | |
| "step": 9340, | |
| "toxic_reward": 3.8079322576522827 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 53.0, | |
| "epoch": 2.209357277882798, | |
| "format_reward": 0.0, | |
| "grad_norm": 9.418509483337402, | |
| "image_reward": 0.2345733642578125, | |
| "kl": 0.8293094992637634, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1013, | |
| "reward": 0.4199859380722046, | |
| "reward_std": 0.8710890758782626, | |
| "rewards/reward_func": 0.4199859380722046, | |
| "step": 9350, | |
| "toxic_reward": 4.7352869510650635 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 43.7, | |
| "epoch": 2.2117202268431, | |
| "format_reward": 0.0, | |
| "grad_norm": 16.66223907470703, | |
| "image_reward": 0.25906219482421877, | |
| "kl": 1.3076835095882415, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1376, | |
| "reward": 0.2918867290019989, | |
| "reward_std": 0.2911624666303396, | |
| "rewards/reward_func": 0.2918867290019989, | |
| "step": 9360, | |
| "toxic_reward": 4.793551731109619 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 54.2, | |
| "epoch": 2.2140831758034025, | |
| "format_reward": 0.0, | |
| "grad_norm": 6.431090354919434, | |
| "image_reward": 0.214971923828125, | |
| "kl": 2.7103491842746736, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0478, | |
| "reward": 0.3785900384187698, | |
| "reward_std": 0.8829892821609974, | |
| "rewards/reward_func": 0.3785900384187698, | |
| "step": 9370, | |
| "toxic_reward": 4.103424906730652 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 44.45, | |
| "epoch": 2.216446124763705, | |
| "format_reward": 0.0, | |
| "grad_norm": 5.423946857452393, | |
| "image_reward": 0.2241668701171875, | |
| "kl": 9.351680633425712, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0282, | |
| "reward": 0.6931559234857559, | |
| "reward_std": 0.9827461183071137, | |
| "rewards/reward_func": 0.6931559234857559, | |
| "step": 9380, | |
| "toxic_reward": 4.327680516242981 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 46.75, | |
| "epoch": 2.2188090737240076, | |
| "format_reward": 0.0, | |
| "grad_norm": 12.53814697265625, | |
| "image_reward": 0.24124603271484374, | |
| "kl": 1.539423054456711, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0074, | |
| "reward": 0.5662744238972663, | |
| "reward_std": 0.8970771560445427, | |
| "rewards/reward_func": 0.5662744238972663, | |
| "step": 9390, | |
| "toxic_reward": 3.676301693916321 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 53.85, | |
| "epoch": 2.22117202268431, | |
| "format_reward": -0.25, | |
| "grad_norm": 27.224220275878906, | |
| "image_reward": 0.26278177797794344, | |
| "kl": 8.620309627056121, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0561, | |
| "reward": 0.45245649218559264, | |
| "reward_std": 1.51493993550539, | |
| "rewards/reward_func": 0.45245649218559264, | |
| "step": 9400, | |
| "toxic_reward": 4.195838165283203 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 46.3, | |
| "epoch": 2.2235349716446127, | |
| "format_reward": 0.0, | |
| "grad_norm": 16.83915901184082, | |
| "image_reward": 0.244732666015625, | |
| "kl": 7.121062386035919, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0277, | |
| "reward": 0.5056580305099487, | |
| "reward_std": 0.6380140800029039, | |
| "rewards/reward_func": 0.5056580305099487, | |
| "step": 9410, | |
| "toxic_reward": 4.542606806755066 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 40.525, | |
| "epoch": 2.2258979206049148, | |
| "format_reward": 0.0, | |
| "grad_norm": 14.892727851867676, | |
| "image_reward": 0.24930419921875, | |
| "kl": 5.096332561969757, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0329, | |
| "reward": 0.48427205085754393, | |
| "reward_std": 1.0285473830997944, | |
| "rewards/reward_func": 0.48427205085754393, | |
| "step": 9420, | |
| "toxic_reward": 4.446974515914917 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 40.75, | |
| "epoch": 2.2282608695652173, | |
| "format_reward": -0.25, | |
| "grad_norm": 1.2352709770202637, | |
| "image_reward": 0.24373575747013093, | |
| "kl": 16.825757110118865, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0367, | |
| "reward": 0.5291045546531677, | |
| "reward_std": 1.2605504954233766, | |
| "rewards/reward_func": 0.5291045546531677, | |
| "step": 9430, | |
| "toxic_reward": 4.275347375869751 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 40.875, | |
| "epoch": 2.23062381852552, | |
| "format_reward": 0.0, | |
| "grad_norm": 22.900882720947266, | |
| "image_reward": 0.25095672607421876, | |
| "kl": 10.578787690401077, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0032, | |
| "reward": 0.26428125500679017, | |
| "reward_std": 0.9156784310936927, | |
| "rewards/reward_func": 0.26428125500679017, | |
| "step": 9440, | |
| "toxic_reward": 3.866254734992981 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 39.5, | |
| "epoch": 2.2329867674858224, | |
| "format_reward": -0.25, | |
| "grad_norm": 13.677875518798828, | |
| "image_reward": 0.238702392578125, | |
| "kl": 5.4229684472084045, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0035, | |
| "reward": 0.34162178039550783, | |
| "reward_std": 1.2561071523465217, | |
| "rewards/reward_func": 0.34162178039550783, | |
| "step": 9450, | |
| "toxic_reward": 4.54083218574524 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 50.25, | |
| "epoch": 2.235349716446125, | |
| "format_reward": 0.0, | |
| "grad_norm": 7.939948081970215, | |
| "image_reward": 0.245355224609375, | |
| "kl": 2.677640450000763, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0006, | |
| "reward": 0.28101458549499514, | |
| "reward_std": 0.8911348965018988, | |
| "rewards/reward_func": 0.28101458549499514, | |
| "step": 9460, | |
| "toxic_reward": 4.145281267166138 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 50.0, | |
| "epoch": 2.237712665406427, | |
| "format_reward": 0.0, | |
| "grad_norm": 1.8240618705749512, | |
| "image_reward": 0.231634521484375, | |
| "kl": 1.528824520111084, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0875, | |
| "reward": 0.37455313801765444, | |
| "reward_std": 0.4142075888812542, | |
| "rewards/reward_func": 0.37455313801765444, | |
| "step": 9470, | |
| "toxic_reward": 4.762905406951904 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 45.325, | |
| "epoch": 2.2400756143667295, | |
| "format_reward": 0.0, | |
| "grad_norm": 6.417304515838623, | |
| "image_reward": 0.2490814208984375, | |
| "kl": 1.3931379437446594, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0247, | |
| "reward": 1.1561188876628876, | |
| "reward_std": 0.7829106822609901, | |
| "rewards/reward_func": 1.1561188876628876, | |
| "step": 9480, | |
| "toxic_reward": 4.180247139930725 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 46.1, | |
| "epoch": 2.242438563327032, | |
| "format_reward": 0.0, | |
| "grad_norm": 6.768500328063965, | |
| "image_reward": 0.2283935546875, | |
| "kl": 0.9197474420070648, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0378, | |
| "reward": 0.5479820281267166, | |
| "reward_std": 0.8298372723162174, | |
| "rewards/reward_func": 0.5479820281267166, | |
| "step": 9490, | |
| "toxic_reward": 4.205300378799438 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 55.375, | |
| "epoch": 2.2448015122873346, | |
| "format_reward": 0.0, | |
| "grad_norm": 26.945127487182617, | |
| "image_reward": 0.26320343017578124, | |
| "kl": 1.3565968126058578, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0841, | |
| "reward": 0.7786126613616944, | |
| "reward_std": 1.1838067084550858, | |
| "rewards/reward_func": 0.7786126613616944, | |
| "step": 9500, | |
| "toxic_reward": 3.8664269924163817 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 36.3, | |
| "epoch": 2.247164461247637, | |
| "format_reward": -0.25, | |
| "grad_norm": 5.876742839813232, | |
| "image_reward": 0.2632466644048691, | |
| "kl": 2.275825482606888, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0704, | |
| "reward": 0.1731979250907898, | |
| "reward_std": 1.4120358280837535, | |
| "rewards/reward_func": 0.1731979250907898, | |
| "step": 9510, | |
| "toxic_reward": 4.367373514175415 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 51.525, | |
| "epoch": 2.2495274102079397, | |
| "format_reward": 0.0, | |
| "grad_norm": 4.764988422393799, | |
| "image_reward": 0.2396881103515625, | |
| "kl": 5.604674518108368, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0467, | |
| "reward": 0.2715915977954865, | |
| "reward_std": 0.6032503295689822, | |
| "rewards/reward_func": 0.2715915977954865, | |
| "step": 9520, | |
| "toxic_reward": 4.3036177396774296 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 46.15, | |
| "epoch": 2.251890359168242, | |
| "format_reward": 0.0, | |
| "grad_norm": 23.23556900024414, | |
| "image_reward": 0.2485931396484375, | |
| "kl": 2.390829586982727, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0824, | |
| "reward": 0.7335005760192871, | |
| "reward_std": 0.5389063037931919, | |
| "rewards/reward_func": 0.7335005760192871, | |
| "step": 9530, | |
| "toxic_reward": 4.7080058574676515 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 43.55, | |
| "epoch": 2.2542533081285443, | |
| "format_reward": 0.0, | |
| "grad_norm": 20.620105743408203, | |
| "image_reward": 0.2229095458984375, | |
| "kl": 5.458765661716461, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1314, | |
| "reward": 0.5191788256168366, | |
| "reward_std": 1.1402710743248463, | |
| "rewards/reward_func": 0.5191788256168366, | |
| "step": 9540, | |
| "toxic_reward": 4.577926540374756 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 51.75, | |
| "epoch": 2.256616257088847, | |
| "format_reward": 0.0, | |
| "grad_norm": 6.686956405639648, | |
| "image_reward": 0.231097412109375, | |
| "kl": 2.384101688861847, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0264, | |
| "reward": 0.3123217046260834, | |
| "reward_std": 1.1325789090245961, | |
| "rewards/reward_func": 0.3123217046260834, | |
| "step": 9550, | |
| "toxic_reward": 4.343110990524292 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 41.8, | |
| "epoch": 2.2589792060491494, | |
| "format_reward": 0.0, | |
| "grad_norm": 11.559769630432129, | |
| "image_reward": 0.236077880859375, | |
| "kl": 2.987881660461426, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0371, | |
| "reward": 1.1903966188430786, | |
| "reward_std": 0.8922829747200012, | |
| "rewards/reward_func": 1.1903966188430786, | |
| "step": 9560, | |
| "toxic_reward": 4.265221381187439 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 43.15, | |
| "epoch": 2.261342155009452, | |
| "format_reward": -0.25, | |
| "grad_norm": 8.45358657836914, | |
| "image_reward": 0.2397247314453125, | |
| "kl": 3.8612433314323424, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0161, | |
| "reward": -0.07919068932533264, | |
| "reward_std": 1.113222143240273, | |
| "rewards/reward_func": -0.07919068932533264, | |
| "step": 9570, | |
| "toxic_reward": 4.036798477172852 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 53.95, | |
| "epoch": 2.2637051039697544, | |
| "format_reward": 0.0, | |
| "grad_norm": 9.02270221710205, | |
| "image_reward": 0.232049560546875, | |
| "kl": 4.92200248837471, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0855, | |
| "reward": 0.7455990195274353, | |
| "reward_std": 0.8981746949255467, | |
| "rewards/reward_func": 0.7455990195274353, | |
| "step": 9580, | |
| "toxic_reward": 4.544493341445923 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 67.2, | |
| "epoch": 2.2660680529300565, | |
| "format_reward": 0.0, | |
| "grad_norm": 19.980321884155273, | |
| "image_reward": 0.2468475341796875, | |
| "kl": 3.3157293617725374, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1177, | |
| "reward": 0.4287958800792694, | |
| "reward_std": 0.8264384102076292, | |
| "rewards/reward_func": 0.4287958800792694, | |
| "step": 9590, | |
| "toxic_reward": 4.492921185493469 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 39.425, | |
| "epoch": 2.268431001890359, | |
| "format_reward": 0.0, | |
| "grad_norm": 1.6929293870925903, | |
| "image_reward": 0.2279388427734375, | |
| "kl": 11.748549377918243, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0227, | |
| "reward": 0.8450492799282074, | |
| "reward_std": 0.8821253469213843, | |
| "rewards/reward_func": 0.8450492799282074, | |
| "step": 9600, | |
| "toxic_reward": 4.329080724716187 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 44.075, | |
| "epoch": 2.2707939508506616, | |
| "format_reward": -0.25, | |
| "grad_norm": 6.6792683601379395, | |
| "image_reward": 0.2432342529296875, | |
| "kl": 23.715504467487335, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0603, | |
| "reward": 0.6058377146720886, | |
| "reward_std": 1.8178761571645736, | |
| "rewards/reward_func": 0.6058377146720886, | |
| "step": 9610, | |
| "toxic_reward": 3.8193166494369506 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 43.6, | |
| "epoch": 2.273156899810964, | |
| "format_reward": -0.25, | |
| "grad_norm": 4.806612014770508, | |
| "image_reward": 0.25201212614774704, | |
| "kl": 2.9662541508674622, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0205, | |
| "reward": 0.14691731929779053, | |
| "reward_std": 1.3215140633285045, | |
| "rewards/reward_func": 0.14691731929779053, | |
| "step": 9620, | |
| "toxic_reward": 4.118840670585632 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 56.875, | |
| "epoch": 2.2755198487712667, | |
| "format_reward": 0.0, | |
| "grad_norm": 11.628203392028809, | |
| "image_reward": 0.2614166259765625, | |
| "kl": 3.4263065993785857, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0042, | |
| "reward": 0.7031525075435638, | |
| "reward_std": 0.9792011518031358, | |
| "rewards/reward_func": 0.7031525075435638, | |
| "step": 9630, | |
| "toxic_reward": 4.620864820480347 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 33.3, | |
| "epoch": 2.2778827977315688, | |
| "format_reward": 0.0, | |
| "grad_norm": 10.783760070800781, | |
| "image_reward": 0.2508331298828125, | |
| "kl": 12.492328238487243, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0118, | |
| "reward": 0.2551820993423462, | |
| "reward_std": 0.8312258010730147, | |
| "rewards/reward_func": 0.2551820993423462, | |
| "step": 9640, | |
| "toxic_reward": 4.169378912448883 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 47.475, | |
| "epoch": 2.2802457466918713, | |
| "format_reward": 0.0, | |
| "grad_norm": 9.430181503295898, | |
| "image_reward": 0.256829833984375, | |
| "kl": 10.857812678813934, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0124, | |
| "reward": 0.6474542915821075, | |
| "reward_std": 1.0158755726995878, | |
| "rewards/reward_func": 0.6474542915821075, | |
| "step": 9650, | |
| "toxic_reward": 4.2302504777908325 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 35.225, | |
| "epoch": 2.282608695652174, | |
| "format_reward": 0.0, | |
| "grad_norm": 2.653324604034424, | |
| "image_reward": 0.258251953125, | |
| "kl": 4.948359310626984, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0303, | |
| "reward": 0.8631646454334259, | |
| "reward_std": 1.4457193814218043, | |
| "rewards/reward_func": 0.8631646454334259, | |
| "step": 9660, | |
| "toxic_reward": 4.028263640403748 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 50.475, | |
| "epoch": 2.2849716446124764, | |
| "format_reward": 0.0, | |
| "grad_norm": 46.17441940307617, | |
| "image_reward": 0.2468775436282158, | |
| "kl": 4.73446731865406, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0188, | |
| "reward": 0.47216950058937074, | |
| "reward_std": 0.5647860389202833, | |
| "rewards/reward_func": 0.47216950058937074, | |
| "step": 9670, | |
| "toxic_reward": 4.644181919097901 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 54.975, | |
| "epoch": 2.287334593572779, | |
| "format_reward": 0.0, | |
| "grad_norm": 6.452030181884766, | |
| "image_reward": 0.24481913298368455, | |
| "kl": 84.93760406374932, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0133, | |
| "reward": 0.45135449171066283, | |
| "reward_std": 0.9933142360066995, | |
| "rewards/reward_func": 0.45135449171066283, | |
| "step": 9680, | |
| "toxic_reward": 4.119644379615783 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 50.925, | |
| "epoch": 2.2896975425330814, | |
| "format_reward": 0.0, | |
| "grad_norm": 7.41399621963501, | |
| "image_reward": 0.2643157958984375, | |
| "kl": 2.3054057717323304, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0556, | |
| "reward": 0.8660075426101684, | |
| "reward_std": 1.1269529208540916, | |
| "rewards/reward_func": 0.8660075426101684, | |
| "step": 9690, | |
| "toxic_reward": 4.305045056343078 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 56.95, | |
| "epoch": 2.292060491493384, | |
| "format_reward": 0.0, | |
| "grad_norm": 4.1870198249816895, | |
| "image_reward": 0.255328369140625, | |
| "kl": 0.9636234432458878, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0444, | |
| "reward": 0.07925584316253662, | |
| "reward_std": 0.9312605137005449, | |
| "rewards/reward_func": 0.07925584316253662, | |
| "step": 9700, | |
| "toxic_reward": 4.480338978767395 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 47.175, | |
| "epoch": 2.294423440453686, | |
| "format_reward": 0.0, | |
| "grad_norm": 17.8924617767334, | |
| "image_reward": 0.2297576904296875, | |
| "kl": 1.2910286843776704, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0594, | |
| "reward": 0.09658912420272828, | |
| "reward_std": 0.534552292432636, | |
| "rewards/reward_func": 0.09658912420272828, | |
| "step": 9710, | |
| "toxic_reward": 4.421128726005554 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 51.5, | |
| "epoch": 2.2967863894139886, | |
| "format_reward": 0.0, | |
| "grad_norm": 8.845993995666504, | |
| "image_reward": 0.260223388671875, | |
| "kl": 11.969202554225921, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0984, | |
| "reward": 0.49135610461235046, | |
| "reward_std": 1.074414287507534, | |
| "rewards/reward_func": 0.49135610461235046, | |
| "step": 9720, | |
| "toxic_reward": 4.317939972877502 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 54.725, | |
| "epoch": 2.299149338374291, | |
| "format_reward": 0.0, | |
| "grad_norm": 2.5245182514190674, | |
| "image_reward": 0.2630401611328125, | |
| "kl": 18.936833548545838, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0013, | |
| "reward": 0.10787631869316101, | |
| "reward_std": 0.7801851622760296, | |
| "rewards/reward_func": 0.10787631869316101, | |
| "step": 9730, | |
| "toxic_reward": 4.423897337913513 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 47.875, | |
| "epoch": 2.3015122873345937, | |
| "format_reward": -0.25, | |
| "grad_norm": 3.0716590881347656, | |
| "image_reward": 0.25508524626493456, | |
| "kl": 2.160058504343033, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0232, | |
| "reward": 0.1063625156879425, | |
| "reward_std": 1.0759605418890714, | |
| "rewards/reward_func": 0.1063625156879425, | |
| "step": 9740, | |
| "toxic_reward": 4.3748561382293705 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 53.475, | |
| "epoch": 2.303875236294896, | |
| "format_reward": 0.0, | |
| "grad_norm": 2.622436285018921, | |
| "image_reward": 0.23123779296875, | |
| "kl": 8.901539516448974, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0451, | |
| "reward": 0.41706631779670716, | |
| "reward_std": 0.8451563934795558, | |
| "rewards/reward_func": 0.41706631779670716, | |
| "step": 9750, | |
| "toxic_reward": 4.452003169059753 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 42.425, | |
| "epoch": 2.3062381852551983, | |
| "format_reward": 0.0, | |
| "grad_norm": 7.1622514724731445, | |
| "image_reward": 0.2591522216796875, | |
| "kl": 17.092305302619934, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0045, | |
| "reward": 0.43682674169540403, | |
| "reward_std": 1.0151184625923633, | |
| "rewards/reward_func": 0.43682674169540403, | |
| "step": 9760, | |
| "toxic_reward": 3.4172345459461213 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 44.35, | |
| "epoch": 2.308601134215501, | |
| "format_reward": 0.0, | |
| "grad_norm": 12.564181327819824, | |
| "image_reward": 0.2551788330078125, | |
| "kl": 8.035814380645752, | |
| "learning_rate": 5e-06, | |
| "loss": -0.057, | |
| "reward": 0.47375474870204926, | |
| "reward_std": 0.6293097786605358, | |
| "rewards/reward_func": 0.47375474870204926, | |
| "step": 9770, | |
| "toxic_reward": 3.9829455375671388 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 45.425, | |
| "epoch": 2.3109640831758034, | |
| "format_reward": 0.0, | |
| "grad_norm": 12.808835983276367, | |
| "image_reward": 0.2319427490234375, | |
| "kl": 3.5698849081993105, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0338, | |
| "reward": 0.6212572991847992, | |
| "reward_std": 0.6545703388750553, | |
| "rewards/reward_func": 0.6212572991847992, | |
| "step": 9780, | |
| "toxic_reward": 4.615470266342163 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 43.7, | |
| "epoch": 2.313327032136106, | |
| "format_reward": 0.0, | |
| "grad_norm": 11.370565414428711, | |
| "image_reward": 0.2495208740234375, | |
| "kl": 58.05081114768982, | |
| "learning_rate": 5e-06, | |
| "loss": -0.003, | |
| "reward": 0.3863606512546539, | |
| "reward_std": 0.7871608097106219, | |
| "rewards/reward_func": 0.3863606512546539, | |
| "step": 9790, | |
| "toxic_reward": 4.543632507324219 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 53.225, | |
| "epoch": 2.3156899810964084, | |
| "format_reward": 0.0, | |
| "grad_norm": 17.273242950439453, | |
| "image_reward": 0.256707763671875, | |
| "kl": 63.41283442378044, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0867, | |
| "reward": -0.17932948917150499, | |
| "reward_std": 0.5697868175804615, | |
| "rewards/reward_func": -0.17932948917150499, | |
| "step": 9800, | |
| "toxic_reward": 3.8667405366897585 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 46.575, | |
| "epoch": 2.3180529300567105, | |
| "format_reward": 0.0, | |
| "grad_norm": 2.3364224433898926, | |
| "image_reward": 0.272943115234375, | |
| "kl": 2.4312843918800353, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0565, | |
| "reward": 0.25352796316146853, | |
| "reward_std": 0.8136134160682559, | |
| "rewards/reward_func": 0.25352796316146853, | |
| "step": 9810, | |
| "toxic_reward": 4.419120264053345 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 43.975, | |
| "epoch": 2.320415879017013, | |
| "format_reward": 0.0, | |
| "grad_norm": 16.5479793548584, | |
| "image_reward": 0.2720245361328125, | |
| "kl": 4.018320089578628, | |
| "learning_rate": 5e-06, | |
| "loss": -0.052, | |
| "reward": 0.3462803453207016, | |
| "reward_std": 0.6793602051213383, | |
| "rewards/reward_func": 0.3462803453207016, | |
| "step": 9820, | |
| "toxic_reward": 4.1594162940979 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 44.5, | |
| "epoch": 2.3227788279773156, | |
| "format_reward": 0.0, | |
| "grad_norm": 16.928800582885742, | |
| "image_reward": 0.2370941162109375, | |
| "kl": 5.093863940238952, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0256, | |
| "reward": -0.17395999431610107, | |
| "reward_std": 0.733401482924819, | |
| "rewards/reward_func": -0.17395999431610107, | |
| "step": 9830, | |
| "toxic_reward": 4.316698336601258 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 40.3, | |
| "epoch": 2.325141776937618, | |
| "format_reward": 0.0, | |
| "grad_norm": 5.420310020446777, | |
| "image_reward": 0.2193939208984375, | |
| "kl": 10.776195186376572, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0166, | |
| "reward": 0.9508269459009171, | |
| "reward_std": 1.237346090376377, | |
| "rewards/reward_func": 0.9508269459009171, | |
| "step": 9840, | |
| "toxic_reward": 4.61365122795105 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 43.6, | |
| "epoch": 2.3275047258979207, | |
| "format_reward": 0.0, | |
| "grad_norm": 8.147756576538086, | |
| "image_reward": 0.2236907958984375, | |
| "kl": 14.836266088485718, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0104, | |
| "reward": 1.2669459402561187, | |
| "reward_std": 0.8822499677538872, | |
| "rewards/reward_func": 1.2669459402561187, | |
| "step": 9850, | |
| "toxic_reward": 4.57221360206604 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 51.225, | |
| "epoch": 2.329867674858223, | |
| "format_reward": 0.0, | |
| "grad_norm": 9.47561264038086, | |
| "image_reward": 0.2329345703125, | |
| "kl": 7.03211784362793, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1103, | |
| "reward": 0.4148245692253113, | |
| "reward_std": 0.4656851476058364, | |
| "rewards/reward_func": 0.4148245692253113, | |
| "step": 9860, | |
| "toxic_reward": 4.482611513137817 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 45.45, | |
| "epoch": 2.3322306238185257, | |
| "format_reward": 0.0, | |
| "grad_norm": 5.070466995239258, | |
| "image_reward": 0.242041015625, | |
| "kl": 4.666208404302597, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0147, | |
| "reward": 0.29748362898826597, | |
| "reward_std": 0.7133689053356648, | |
| "rewards/reward_func": 0.29748362898826597, | |
| "step": 9870, | |
| "toxic_reward": 4.607629799842835 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 48.7, | |
| "epoch": 2.334593572778828, | |
| "format_reward": 0.0, | |
| "grad_norm": 18.918405532836914, | |
| "image_reward": 0.2441070556640625, | |
| "kl": 1.534792199730873, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0478, | |
| "reward": 1.092936259508133, | |
| "reward_std": 1.04658992420882, | |
| "rewards/reward_func": 1.092936259508133, | |
| "step": 9880, | |
| "toxic_reward": 4.236094212532043 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 45.525, | |
| "epoch": 2.3369565217391304, | |
| "format_reward": -0.25, | |
| "grad_norm": 2.751826524734497, | |
| "image_reward": 0.239617919921875, | |
| "kl": 4.680880203843117, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0816, | |
| "reward": 0.287129682302475, | |
| "reward_std": 1.596864845789969, | |
| "rewards/reward_func": 0.287129682302475, | |
| "step": 9890, | |
| "toxic_reward": 3.968970334529877 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 37.05, | |
| "epoch": 2.339319470699433, | |
| "format_reward": 0.0, | |
| "grad_norm": 7.023763179779053, | |
| "image_reward": 0.22930908203125, | |
| "kl": 2.7311850488185883, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0339, | |
| "reward": 0.4986346364021301, | |
| "reward_std": 1.2056658655405044, | |
| "rewards/reward_func": 0.4986346364021301, | |
| "step": 9900, | |
| "toxic_reward": 4.340516233444214 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 41.425, | |
| "epoch": 2.3416824196597354, | |
| "format_reward": -0.5, | |
| "grad_norm": 10.157588958740234, | |
| "image_reward": 0.24829813539981843, | |
| "kl": 2.7676683485507967, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0746, | |
| "reward": -0.3049712359905243, | |
| "reward_std": 1.7655695647001266, | |
| "rewards/reward_func": -0.3049712359905243, | |
| "step": 9910, | |
| "toxic_reward": 4.103359699249268 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 45.525, | |
| "epoch": 2.344045368620038, | |
| "format_reward": -0.25, | |
| "grad_norm": 13.108144760131836, | |
| "image_reward": 0.22231547087430953, | |
| "kl": 0.8436797827482223, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0158, | |
| "reward": 0.21243730187416077, | |
| "reward_std": 1.2439281724393367, | |
| "rewards/reward_func": 0.21243730187416077, | |
| "step": 9920, | |
| "toxic_reward": 4.508874106407165 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 39.875, | |
| "epoch": 2.34640831758034, | |
| "format_reward": 0.0, | |
| "grad_norm": 7.745576858520508, | |
| "image_reward": 0.2392669677734375, | |
| "kl": 2.106270205974579, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0696, | |
| "reward": 0.02986244559288025, | |
| "reward_std": 0.5784997101873159, | |
| "rewards/reward_func": 0.02986244559288025, | |
| "step": 9930, | |
| "toxic_reward": 4.542821955680847 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 45.5, | |
| "epoch": 2.3487712665406426, | |
| "format_reward": 0.0, | |
| "grad_norm": 12.980175018310547, | |
| "image_reward": 0.26973876953125, | |
| "kl": 6.715492057800293, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0504, | |
| "reward": 0.8542561173439026, | |
| "reward_std": 0.8472011580131948, | |
| "rewards/reward_func": 0.8542561173439026, | |
| "step": 9940, | |
| "toxic_reward": 4.496533703804016 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 45.775, | |
| "epoch": 2.351134215500945, | |
| "format_reward": 0.0, | |
| "grad_norm": 8.429265975952148, | |
| "image_reward": 0.245904541015625, | |
| "kl": 1.740691715478897, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0812, | |
| "reward": 0.524560397863388, | |
| "reward_std": 0.6684761707670986, | |
| "rewards/reward_func": 0.524560397863388, | |
| "step": 9950, | |
| "toxic_reward": 4.557698893547058 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 49.725, | |
| "epoch": 2.3534971644612477, | |
| "format_reward": 0.0, | |
| "grad_norm": 7.811772346496582, | |
| "image_reward": 0.23507537841796874, | |
| "kl": 4.798752707242966, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0502, | |
| "reward": 0.2622858464717865, | |
| "reward_std": 0.7538703501224517, | |
| "rewards/reward_func": 0.2622858464717865, | |
| "step": 9960, | |
| "toxic_reward": 4.338898825645447 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 35.6, | |
| "epoch": 2.35586011342155, | |
| "format_reward": 0.0, | |
| "grad_norm": 3.807326555252075, | |
| "image_reward": 0.227703857421875, | |
| "kl": 0.7807338133454322, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0267, | |
| "reward": 1.1823074579238892, | |
| "reward_std": 1.5363767087459563, | |
| "rewards/reward_func": 1.1823074579238892, | |
| "step": 9970, | |
| "toxic_reward": 4.390237951278687 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 44.225, | |
| "epoch": 2.3582230623818523, | |
| "format_reward": 0.0, | |
| "grad_norm": 1.2711127996444702, | |
| "image_reward": 0.23590087890625, | |
| "kl": 1.0028429985046388, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0221, | |
| "reward": 0.1531411349773407, | |
| "reward_std": 0.5583895549178124, | |
| "rewards/reward_func": 0.1531411349773407, | |
| "step": 9980, | |
| "toxic_reward": 4.677754878997803 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 42.775, | |
| "epoch": 2.360586011342155, | |
| "format_reward": 0.0, | |
| "grad_norm": 2.5527610778808594, | |
| "image_reward": 0.24173583984375, | |
| "kl": 1.1347097665071488, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0127, | |
| "reward": -0.07762867212295532, | |
| "reward_std": 0.6002773646265268, | |
| "rewards/reward_func": -0.07762867212295532, | |
| "step": 9990, | |
| "toxic_reward": 4.172585511207581 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 38.05, | |
| "epoch": 2.3629489603024574, | |
| "format_reward": 0.0, | |
| "grad_norm": 4.715011119842529, | |
| "image_reward": 0.23904571533203126, | |
| "kl": 7.07305488884449, | |
| "learning_rate": 5e-06, | |
| "loss": -0.021, | |
| "reward": 0.26526654958724977, | |
| "reward_std": 0.82782434374094, | |
| "rewards/reward_func": 0.26526654958724977, | |
| "step": 10000, | |
| "toxic_reward": 4.33233335018158 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 100000, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 24, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |