| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.1813999342753863, |
| "eval_steps": 500, |
| "global_step": 600, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.005915215248110417, |
| "grad_norm": 75.64228057861328, |
| "learning_rate": 6.535947712418302e-08, |
| "logits/chosen": 0.06831549108028412, |
| "logits/rejected": 0.22947487235069275, |
| "logps/chosen": -288.36907958984375, |
| "logps/rejected": -308.97442626953125, |
| "loss": 0.6936, |
| "rewards/accuracies": 0.15740741789340973, |
| "rewards/chosen": 0.0018866577884182334, |
| "rewards/margins": 0.00013007604866288602, |
| "rewards/rejected": 0.0017565814778208733, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.011830430496220835, |
| "grad_norm": 85.5863265991211, |
| "learning_rate": 1.6339869281045755e-07, |
| "logits/chosen": 0.1193937212228775, |
| "logits/rejected": 0.14183634519577026, |
| "logps/chosen": -297.21484375, |
| "logps/rejected": -289.2657165527344, |
| "loss": 0.6933, |
| "rewards/accuracies": 0.513888955116272, |
| "rewards/chosen": 0.00845375657081604, |
| "rewards/margins": 0.003605001140385866, |
| "rewards/rejected": 0.004848754033446312, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.017745645744331254, |
| "grad_norm": 92.01388549804688, |
| "learning_rate": 2.6143790849673207e-07, |
| "logits/chosen": 0.013294734060764313, |
| "logits/rejected": 0.10444601625204086, |
| "logps/chosen": -293.22174072265625, |
| "logps/rejected": -296.61444091796875, |
| "loss": 0.6879, |
| "rewards/accuracies": 0.5601852536201477, |
| "rewards/chosen": 0.0001924792304635048, |
| "rewards/margins": 0.014045190066099167, |
| "rewards/rejected": -0.013852710835635662, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.02366086099244167, |
| "grad_norm": 63.192481994628906, |
| "learning_rate": 3.5947712418300653e-07, |
| "logits/chosen": 0.1236824318766594, |
| "logits/rejected": 0.056549232453107834, |
| "logps/chosen": -284.45782470703125, |
| "logps/rejected": -284.9747009277344, |
| "loss": 0.6899, |
| "rewards/accuracies": 0.5370370149612427, |
| "rewards/chosen": 0.007598253898322582, |
| "rewards/margins": 0.011467371135950089, |
| "rewards/rejected": -0.003869118168950081, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.02957607624055209, |
| "grad_norm": 52.87683868408203, |
| "learning_rate": 4.5751633986928105e-07, |
| "logits/chosen": 0.1870713233947754, |
| "logits/rejected": 0.17200356721878052, |
| "logps/chosen": -278.11785888671875, |
| "logps/rejected": -288.7649841308594, |
| "loss": 0.6823, |
| "rewards/accuracies": 0.5694444179534912, |
| "rewards/chosen": 0.018766043707728386, |
| "rewards/margins": 0.029558217152953148, |
| "rewards/rejected": -0.010792172513902187, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.03549129148866251, |
| "grad_norm": 47.12651443481445, |
| "learning_rate": 5.555555555555555e-07, |
| "logits/chosen": 0.2681354880332947, |
| "logits/rejected": 0.2608181834220886, |
| "logps/chosen": -299.3112487792969, |
| "logps/rejected": -293.26654052734375, |
| "loss": 0.672, |
| "rewards/accuracies": 0.5972222685813904, |
| "rewards/chosen": 0.013350310735404491, |
| "rewards/margins": 0.058473195880651474, |
| "rewards/rejected": -0.04512288421392441, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.04140650673677292, |
| "grad_norm": 46.12940216064453, |
| "learning_rate": 6.535947712418302e-07, |
| "logits/chosen": 0.21857470273971558, |
| "logits/rejected": 0.1517491191625595, |
| "logps/chosen": -297.955078125, |
| "logps/rejected": -291.7802429199219, |
| "loss": 0.6583, |
| "rewards/accuracies": 0.5787036418914795, |
| "rewards/chosen": -0.0388781875371933, |
| "rewards/margins": 0.12246696650981903, |
| "rewards/rejected": -0.16134515404701233, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.04732172198488334, |
| "grad_norm": 43.86560821533203, |
| "learning_rate": 7.516339869281046e-07, |
| "logits/chosen": 0.24428892135620117, |
| "logits/rejected": 0.2012861967086792, |
| "logps/chosen": -294.25177001953125, |
| "logps/rejected": -295.1514892578125, |
| "loss": 0.658, |
| "rewards/accuracies": 0.6481481790542603, |
| "rewards/chosen": 0.02628326043486595, |
| "rewards/margins": 0.15634408593177795, |
| "rewards/rejected": -0.1300608068704605, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.053236937232993754, |
| "grad_norm": 34.92121124267578, |
| "learning_rate": 8.496732026143792e-07, |
| "logits/chosen": 0.19387498497962952, |
| "logits/rejected": 0.2192877233028412, |
| "logps/chosen": -297.76287841796875, |
| "logps/rejected": -298.34576416015625, |
| "loss": 0.6738, |
| "rewards/accuracies": 0.6064814925193787, |
| "rewards/chosen": -0.02720721624791622, |
| "rewards/margins": 0.20852135121822357, |
| "rewards/rejected": -0.23572856187820435, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.05915215248110418, |
| "grad_norm": 35.40020751953125, |
| "learning_rate": 9.477124183006536e-07, |
| "logits/chosen": 0.0057902163825929165, |
| "logits/rejected": 0.068918377161026, |
| "logps/chosen": -284.17608642578125, |
| "logps/rejected": -289.56268310546875, |
| "loss": 0.5956, |
| "rewards/accuracies": 0.6990741491317749, |
| "rewards/chosen": 0.17752066254615784, |
| "rewards/margins": 0.39666658639907837, |
| "rewards/rejected": -0.21914593875408173, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.06506736772921459, |
| "grad_norm": 65.21533966064453, |
| "learning_rate": 1.0457516339869283e-06, |
| "logits/chosen": 0.17345206439495087, |
| "logits/rejected": 0.2315821498632431, |
| "logps/chosen": -274.51983642578125, |
| "logps/rejected": -297.6330261230469, |
| "loss": 0.6454, |
| "rewards/accuracies": 0.5601851940155029, |
| "rewards/chosen": 0.33469104766845703, |
| "rewards/margins": 0.3119283616542816, |
| "rewards/rejected": 0.022762654349207878, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.07098258297732501, |
| "grad_norm": 52.888023376464844, |
| "learning_rate": 1.1437908496732026e-06, |
| "logits/chosen": 0.10116317868232727, |
| "logits/rejected": 0.12458281219005585, |
| "logps/chosen": -304.16064453125, |
| "logps/rejected": -330.84197998046875, |
| "loss": 0.6079, |
| "rewards/accuracies": 0.6342592835426331, |
| "rewards/chosen": 0.6159655451774597, |
| "rewards/margins": 0.4744771122932434, |
| "rewards/rejected": 0.14148837327957153, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.07689779822543542, |
| "grad_norm": 44.073524475097656, |
| "learning_rate": 1.2418300653594772e-06, |
| "logits/chosen": 0.11387699842453003, |
| "logits/rejected": 0.11010300368070602, |
| "logps/chosen": -282.6150207519531, |
| "logps/rejected": -302.6578369140625, |
| "loss": 0.5832, |
| "rewards/accuracies": 0.6527777910232544, |
| "rewards/chosen": 0.7451757192611694, |
| "rewards/margins": 0.5253991484642029, |
| "rewards/rejected": 0.21977655589580536, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.08281301347354585, |
| "grad_norm": 45.03438186645508, |
| "learning_rate": 1.3398692810457518e-06, |
| "logits/chosen": 0.14107277989387512, |
| "logits/rejected": 0.08299855887889862, |
| "logps/chosen": -289.40655517578125, |
| "logps/rejected": -301.38641357421875, |
| "loss": 0.5929, |
| "rewards/accuracies": 0.6759259700775146, |
| "rewards/chosen": 0.8502761125564575, |
| "rewards/margins": 0.6023391485214233, |
| "rewards/rejected": 0.24793694913387299, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.08872822872165625, |
| "grad_norm": 34.897117614746094, |
| "learning_rate": 1.4379084967320261e-06, |
| "logits/chosen": 0.11066042631864548, |
| "logits/rejected": 0.18711869418621063, |
| "logps/chosen": -277.82086181640625, |
| "logps/rejected": -311.96185302734375, |
| "loss": 0.574, |
| "rewards/accuracies": 0.6435185670852661, |
| "rewards/chosen": 0.7543072700500488, |
| "rewards/margins": 0.595453679561615, |
| "rewards/rejected": 0.15885356068611145, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.09464344396976668, |
| "grad_norm": 52.84747314453125, |
| "learning_rate": 1.535947712418301e-06, |
| "logits/chosen": 0.07594814896583557, |
| "logits/rejected": 0.06705646216869354, |
| "logps/chosen": -288.2305603027344, |
| "logps/rejected": -294.79473876953125, |
| "loss": 0.6463, |
| "rewards/accuracies": 0.6666666865348816, |
| "rewards/chosen": 0.5022045373916626, |
| "rewards/margins": 0.49619922041893005, |
| "rewards/rejected": 0.006005376577377319, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.1005586592178771, |
| "grad_norm": 42.00156784057617, |
| "learning_rate": 1.6339869281045753e-06, |
| "logits/chosen": 0.28929704427719116, |
| "logits/rejected": 0.28439193964004517, |
| "logps/chosen": -295.6617126464844, |
| "logps/rejected": -297.2070617675781, |
| "loss": 0.6407, |
| "rewards/accuracies": 0.5925926566123962, |
| "rewards/chosen": 0.33414918184280396, |
| "rewards/margins": 0.5068655610084534, |
| "rewards/rejected": -0.17271637916564941, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.10647387446598751, |
| "grad_norm": 41.05317687988281, |
| "learning_rate": 1.7320261437908499e-06, |
| "logits/chosen": 0.2613026797771454, |
| "logits/rejected": 0.25626230239868164, |
| "logps/chosen": -270.5616149902344, |
| "logps/rejected": -288.9526672363281, |
| "loss": 0.666, |
| "rewards/accuracies": 0.6111111044883728, |
| "rewards/chosen": 0.35354501008987427, |
| "rewards/margins": 0.516350507736206, |
| "rewards/rejected": -0.16280552744865417, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.11238908971409793, |
| "grad_norm": 40.66948318481445, |
| "learning_rate": 1.8300653594771242e-06, |
| "logits/chosen": 0.14654496312141418, |
| "logits/rejected": 0.18361543118953705, |
| "logps/chosen": -276.3260498046875, |
| "logps/rejected": -286.315185546875, |
| "loss": 0.6593, |
| "rewards/accuracies": 0.6527777910232544, |
| "rewards/chosen": 0.19915954768657684, |
| "rewards/margins": 0.5710794925689697, |
| "rewards/rejected": -0.3719198703765869, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.11830430496220835, |
| "grad_norm": 39.70878601074219, |
| "learning_rate": 1.928104575163399e-06, |
| "logits/chosen": 0.26200026273727417, |
| "logits/rejected": 0.2716706097126007, |
| "logps/chosen": -296.445068359375, |
| "logps/rejected": -302.8128356933594, |
| "loss": 0.6215, |
| "rewards/accuracies": 0.6481481790542603, |
| "rewards/chosen": 0.1488932967185974, |
| "rewards/margins": 0.6068000793457031, |
| "rewards/rejected": -0.4579067826271057, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.12421952021031876, |
| "grad_norm": 45.6502571105957, |
| "learning_rate": 2.0261437908496734e-06, |
| "logits/chosen": 0.16293856501579285, |
| "logits/rejected": 0.16650280356407166, |
| "logps/chosen": -279.3722839355469, |
| "logps/rejected": -302.0152282714844, |
| "loss": 0.5946, |
| "rewards/accuracies": 0.6527777910232544, |
| "rewards/chosen": 0.07076837122440338, |
| "rewards/margins": 0.6879870891571045, |
| "rewards/rejected": -0.6172187328338623, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.13013473545842918, |
| "grad_norm": 37.381980895996094, |
| "learning_rate": 2.1241830065359477e-06, |
| "logits/chosen": 0.1667034924030304, |
| "logits/rejected": 0.11124895513057709, |
| "logps/chosen": -286.760986328125, |
| "logps/rejected": -294.1531982421875, |
| "loss": 0.5771, |
| "rewards/accuracies": 0.6481481790542603, |
| "rewards/chosen": 0.002402188954874873, |
| "rewards/margins": 0.7446907758712769, |
| "rewards/rejected": -0.7422885298728943, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.1360499507065396, |
| "grad_norm": 40.16853713989258, |
| "learning_rate": 2.222222222222222e-06, |
| "logits/chosen": 0.1928640604019165, |
| "logits/rejected": 0.2365691065788269, |
| "logps/chosen": -289.02642822265625, |
| "logps/rejected": -299.3502197265625, |
| "loss": 0.599, |
| "rewards/accuracies": 0.6898148059844971, |
| "rewards/chosen": 0.0025644637644290924, |
| "rewards/margins": 0.755738377571106, |
| "rewards/rejected": -0.7531739473342896, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.14196516595465003, |
| "grad_norm": 61.28244400024414, |
| "learning_rate": 2.320261437908497e-06, |
| "logits/chosen": 0.13621635735034943, |
| "logits/rejected": 0.2548728287220001, |
| "logps/chosen": -289.6425476074219, |
| "logps/rejected": -316.8446350097656, |
| "loss": 0.5755, |
| "rewards/accuracies": 0.6944444179534912, |
| "rewards/chosen": 0.012163564562797546, |
| "rewards/margins": 0.7862691879272461, |
| "rewards/rejected": -0.7741057276725769, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.14788038120276042, |
| "grad_norm": 72.57213592529297, |
| "learning_rate": 2.4183006535947716e-06, |
| "logits/chosen": 0.05144810676574707, |
| "logits/rejected": 0.13899767398834229, |
| "logps/chosen": -280.32623291015625, |
| "logps/rejected": -301.4366760253906, |
| "loss": 0.5793, |
| "rewards/accuracies": 0.6527777910232544, |
| "rewards/chosen": 0.18221376836299896, |
| "rewards/margins": 0.7852480411529541, |
| "rewards/rejected": -0.603034257888794, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.15379559645087085, |
| "grad_norm": 43.978668212890625, |
| "learning_rate": 2.516339869281046e-06, |
| "logits/chosen": 0.2053721845149994, |
| "logits/rejected": 0.301210880279541, |
| "logps/chosen": -293.8603820800781, |
| "logps/rejected": -311.3579406738281, |
| "loss": 0.5898, |
| "rewards/accuracies": 0.6712962985038757, |
| "rewards/chosen": 0.04886661097407341, |
| "rewards/margins": 0.7296194434165955, |
| "rewards/rejected": -0.6807528734207153, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.15971081169898127, |
| "grad_norm": 36.1211051940918, |
| "learning_rate": 2.6143790849673208e-06, |
| "logits/chosen": 0.0973750576376915, |
| "logits/rejected": 0.1262703239917755, |
| "logps/chosen": -286.0694274902344, |
| "logps/rejected": -307.6643371582031, |
| "loss": 0.5745, |
| "rewards/accuracies": 0.680555522441864, |
| "rewards/chosen": 0.17426416277885437, |
| "rewards/margins": 0.898255467414856, |
| "rewards/rejected": -0.7239912748336792, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.1656260269470917, |
| "grad_norm": 34.10630416870117, |
| "learning_rate": 2.7124183006535947e-06, |
| "logits/chosen": 0.135942280292511, |
| "logits/rejected": 0.17899185419082642, |
| "logps/chosen": -282.0833740234375, |
| "logps/rejected": -294.80340576171875, |
| "loss": 0.5664, |
| "rewards/accuracies": 0.6666666865348816, |
| "rewards/chosen": 0.24072617292404175, |
| "rewards/margins": 0.7590986490249634, |
| "rewards/rejected": -0.5183724761009216, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.17154124219520211, |
| "grad_norm": 101.0091323852539, |
| "learning_rate": 2.8104575163398695e-06, |
| "logits/chosen": 0.049379341304302216, |
| "logits/rejected": 0.22774741053581238, |
| "logps/chosen": -276.23651123046875, |
| "logps/rejected": -328.92730712890625, |
| "loss": 0.6363, |
| "rewards/accuracies": 0.6388888955116272, |
| "rewards/chosen": 0.24817490577697754, |
| "rewards/margins": 0.6880936026573181, |
| "rewards/rejected": -0.4399186372756958, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.1774564574433125, |
| "grad_norm": 41.10043716430664, |
| "learning_rate": 2.9084967320261443e-06, |
| "logits/chosen": 0.04306400939822197, |
| "logits/rejected": 0.12012603133916855, |
| "logps/chosen": -273.14862060546875, |
| "logps/rejected": -297.67779541015625, |
| "loss": 0.6138, |
| "rewards/accuracies": 0.6388888955116272, |
| "rewards/chosen": 0.2495533525943756, |
| "rewards/margins": 0.97026127576828, |
| "rewards/rejected": -0.720707893371582, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.18337167269142293, |
| "grad_norm": 36.96125030517578, |
| "learning_rate": 3.0065359477124182e-06, |
| "logits/chosen": -0.0033540725708007812, |
| "logits/rejected": 0.15706853568553925, |
| "logps/chosen": -280.9652099609375, |
| "logps/rejected": -311.3870544433594, |
| "loss": 0.5552, |
| "rewards/accuracies": 0.6990741491317749, |
| "rewards/chosen": 0.41701826453208923, |
| "rewards/margins": 1.1586750745773315, |
| "rewards/rejected": -0.7416568994522095, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.18928688793953335, |
| "grad_norm": 34.02159118652344, |
| "learning_rate": 3.104575163398693e-06, |
| "logits/chosen": 0.055927395820617676, |
| "logits/rejected": 0.07919944822788239, |
| "logps/chosen": -277.48199462890625, |
| "logps/rejected": -295.92218017578125, |
| "loss": 0.5402, |
| "rewards/accuracies": 0.6898148059844971, |
| "rewards/chosen": 0.7134115099906921, |
| "rewards/margins": 1.0553935766220093, |
| "rewards/rejected": -0.34198200702667236, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.19520210318764378, |
| "grad_norm": 30.828338623046875, |
| "learning_rate": 3.2026143790849674e-06, |
| "logits/chosen": 0.02216392755508423, |
| "logits/rejected": 0.17252111434936523, |
| "logps/chosen": -274.6871337890625, |
| "logps/rejected": -308.18707275390625, |
| "loss": 0.5325, |
| "rewards/accuracies": 0.6944444179534912, |
| "rewards/chosen": 0.7039467692375183, |
| "rewards/margins": 1.234229326248169, |
| "rewards/rejected": -0.5302824974060059, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.2011173184357542, |
| "grad_norm": 55.14480972290039, |
| "learning_rate": 3.300653594771242e-06, |
| "logits/chosen": 0.0836678296327591, |
| "logits/rejected": 0.1754518747329712, |
| "logps/chosen": -285.959228515625, |
| "logps/rejected": -307.93658447265625, |
| "loss": 0.6502, |
| "rewards/accuracies": 0.6759259700775146, |
| "rewards/chosen": 0.4246281087398529, |
| "rewards/margins": 1.0537660121917725, |
| "rewards/rejected": -0.6291378736495972, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.2070325336838646, |
| "grad_norm": 62.848453521728516, |
| "learning_rate": 3.398692810457517e-06, |
| "logits/chosen": 0.027146054431796074, |
| "logits/rejected": 0.0840519368648529, |
| "logps/chosen": -294.8350524902344, |
| "logps/rejected": -316.4007873535156, |
| "loss": 0.6039, |
| "rewards/accuracies": 0.6712962985038757, |
| "rewards/chosen": 0.3796502947807312, |
| "rewards/margins": 1.3719719648361206, |
| "rewards/rejected": -0.9923217296600342, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.21294774893197502, |
| "grad_norm": 34.59516143798828, |
| "learning_rate": 3.496732026143791e-06, |
| "logits/chosen": 0.18716061115264893, |
| "logits/rejected": 0.2005024403333664, |
| "logps/chosen": -294.5157470703125, |
| "logps/rejected": -318.09368896484375, |
| "loss": 0.646, |
| "rewards/accuracies": 0.703703761100769, |
| "rewards/chosen": 0.1613771915435791, |
| "rewards/margins": 1.098291277885437, |
| "rewards/rejected": -0.9369141459465027, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.21886296418008544, |
| "grad_norm": 59.11061096191406, |
| "learning_rate": 3.5947712418300657e-06, |
| "logits/chosen": 0.02729523926973343, |
| "logits/rejected": 0.18032339215278625, |
| "logps/chosen": -279.9372863769531, |
| "logps/rejected": -314.1539306640625, |
| "loss": 0.6169, |
| "rewards/accuracies": 0.6944445371627808, |
| "rewards/chosen": 0.2143605351448059, |
| "rewards/margins": 1.1479460000991821, |
| "rewards/rejected": -0.933585524559021, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.22477817942819586, |
| "grad_norm": 54.83635711669922, |
| "learning_rate": 3.6928104575163404e-06, |
| "logits/chosen": -0.07722613215446472, |
| "logits/rejected": 0.056545909494161606, |
| "logps/chosen": -265.40985107421875, |
| "logps/rejected": -308.90350341796875, |
| "loss": 0.5748, |
| "rewards/accuracies": 0.6944445371627808, |
| "rewards/chosen": 0.5514373183250427, |
| "rewards/margins": 1.499307632446289, |
| "rewards/rejected": -0.9478704333305359, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.23069339467630628, |
| "grad_norm": 82.6873779296875, |
| "learning_rate": 3.7908496732026144e-06, |
| "logits/chosen": -0.05033176392316818, |
| "logits/rejected": 0.08841504901647568, |
| "logps/chosen": -284.0753479003906, |
| "logps/rejected": -315.85931396484375, |
| "loss": 0.6855, |
| "rewards/accuracies": 0.6990741491317749, |
| "rewards/chosen": 0.38630211353302, |
| "rewards/margins": 1.3165416717529297, |
| "rewards/rejected": -0.9302395582199097, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.2366086099244167, |
| "grad_norm": 34.905941009521484, |
| "learning_rate": 3.88888888888889e-06, |
| "logits/chosen": -0.014662293717265129, |
| "logits/rejected": 0.03569987416267395, |
| "logps/chosen": -269.83001708984375, |
| "logps/rejected": -294.5672912597656, |
| "loss": 0.5944, |
| "rewards/accuracies": 0.703703761100769, |
| "rewards/chosen": 0.29097482562065125, |
| "rewards/margins": 1.4026880264282227, |
| "rewards/rejected": -1.111713171005249, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.2425238251725271, |
| "grad_norm": 34.07733917236328, |
| "learning_rate": 3.986928104575164e-06, |
| "logits/chosen": -0.08946999907493591, |
| "logits/rejected": 0.01624571532011032, |
| "logps/chosen": -283.3293151855469, |
| "logps/rejected": -312.3072509765625, |
| "loss": 0.554, |
| "rewards/accuracies": 0.7083333730697632, |
| "rewards/chosen": 0.07897276431322098, |
| "rewards/margins": 1.4951574802398682, |
| "rewards/rejected": -1.416184663772583, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.24843904042063752, |
| "grad_norm": 47.81401824951172, |
| "learning_rate": 4.084967320261438e-06, |
| "logits/chosen": -0.08563312888145447, |
| "logits/rejected": -0.123930923640728, |
| "logps/chosen": -308.0613098144531, |
| "logps/rejected": -300.32025146484375, |
| "loss": 0.6619, |
| "rewards/accuracies": 0.6342592835426331, |
| "rewards/chosen": -0.285559743642807, |
| "rewards/margins": 1.018710970878601, |
| "rewards/rejected": -1.304270625114441, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.2543542556687479, |
| "grad_norm": 40.03066635131836, |
| "learning_rate": 4.183006535947713e-06, |
| "logits/chosen": 0.0017192339291796088, |
| "logits/rejected": 0.14679786562919617, |
| "logps/chosen": -316.32379150390625, |
| "logps/rejected": -339.078125, |
| "loss": 0.6979, |
| "rewards/accuracies": 0.6481481790542603, |
| "rewards/chosen": -0.6922714710235596, |
| "rewards/margins": 0.9672110080718994, |
| "rewards/rejected": -1.659482717514038, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.26026947091685837, |
| "grad_norm": 53.74517822265625, |
| "learning_rate": 4.281045751633987e-06, |
| "logits/chosen": -0.05079513043165207, |
| "logits/rejected": 0.07216000556945801, |
| "logps/chosen": -295.9747314453125, |
| "logps/rejected": -332.39013671875, |
| "loss": 0.5792, |
| "rewards/accuracies": 0.7222222685813904, |
| "rewards/chosen": -0.8141295909881592, |
| "rewards/margins": 1.5237984657287598, |
| "rewards/rejected": -2.337928295135498, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.26618468616496876, |
| "grad_norm": 35.11404800415039, |
| "learning_rate": 4.379084967320262e-06, |
| "logits/chosen": -0.046733301132917404, |
| "logits/rejected": 0.0873221755027771, |
| "logps/chosen": -289.7781982421875, |
| "logps/rejected": -330.5262756347656, |
| "loss": 0.5458, |
| "rewards/accuracies": 0.7129629850387573, |
| "rewards/chosen": -0.5395014882087708, |
| "rewards/margins": 1.484311819076538, |
| "rewards/rejected": -2.023813247680664, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.2720999014130792, |
| "grad_norm": 46.060089111328125, |
| "learning_rate": 4.477124183006537e-06, |
| "logits/chosen": -0.10985089838504791, |
| "logits/rejected": 0.007513361983001232, |
| "logps/chosen": -291.6156311035156, |
| "logps/rejected": -330.0778503417969, |
| "loss": 0.6452, |
| "rewards/accuracies": 0.6712963581085205, |
| "rewards/chosen": -0.8152337074279785, |
| "rewards/margins": 1.0877575874328613, |
| "rewards/rejected": -1.9029912948608398, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.2780151166611896, |
| "grad_norm": 36.288475036621094, |
| "learning_rate": 4.5751633986928105e-06, |
| "logits/chosen": -0.12672923505306244, |
| "logits/rejected": -0.05195396766066551, |
| "logps/chosen": -291.0050048828125, |
| "logps/rejected": -328.9648742675781, |
| "loss": 0.622, |
| "rewards/accuracies": 0.6990740299224854, |
| "rewards/chosen": -0.6529079675674438, |
| "rewards/margins": 1.3228920698165894, |
| "rewards/rejected": -1.9758000373840332, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.28393033190930006, |
| "grad_norm": 61.34006118774414, |
| "learning_rate": 4.673202614379085e-06, |
| "logits/chosen": -0.09269940853118896, |
| "logits/rejected": -0.09529760479927063, |
| "logps/chosen": -308.4407958984375, |
| "logps/rejected": -327.4217224121094, |
| "loss": 0.7128, |
| "rewards/accuracies": 0.6805555820465088, |
| "rewards/chosen": -0.8733962774276733, |
| "rewards/margins": 1.3924760818481445, |
| "rewards/rejected": -2.2658724784851074, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.28984554715741045, |
| "grad_norm": 60.66278839111328, |
| "learning_rate": 4.77124183006536e-06, |
| "logits/chosen": -0.21165470778942108, |
| "logits/rejected": -0.08071193099021912, |
| "logps/chosen": -292.4040832519531, |
| "logps/rejected": -327.4679260253906, |
| "loss": 0.6824, |
| "rewards/accuracies": 0.6805555820465088, |
| "rewards/chosen": -1.0551072359085083, |
| "rewards/margins": 1.499154806137085, |
| "rewards/rejected": -2.5542619228363037, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.29576076240552085, |
| "grad_norm": 37.254608154296875, |
| "learning_rate": 4.869281045751634e-06, |
| "logits/chosen": -0.09953123331069946, |
| "logits/rejected": -0.10298528522253036, |
| "logps/chosen": -307.6355285644531, |
| "logps/rejected": -334.2861328125, |
| "loss": 0.6938, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -1.1909699440002441, |
| "rewards/margins": 1.456664800643921, |
| "rewards/rejected": -2.647634506225586, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.3016759776536313, |
| "grad_norm": 29.970460891723633, |
| "learning_rate": 4.967320261437909e-06, |
| "logits/chosen": -0.16488048434257507, |
| "logits/rejected": -0.026979412883520126, |
| "logps/chosen": -292.1322021484375, |
| "logps/rejected": -334.13330078125, |
| "loss": 0.7198, |
| "rewards/accuracies": 0.6666666865348816, |
| "rewards/chosen": -1.4055306911468506, |
| "rewards/margins": 1.0318067073822021, |
| "rewards/rejected": -2.4373371601104736, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.3075911929017417, |
| "grad_norm": 102.86775970458984, |
| "learning_rate": 4.999973746084687e-06, |
| "logits/chosen": -0.2688429355621338, |
| "logits/rejected": -0.08179165422916412, |
| "logps/chosen": -292.56201171875, |
| "logps/rejected": -334.35443115234375, |
| "loss": 0.6896, |
| "rewards/accuracies": 0.6712962985038757, |
| "rewards/chosen": -1.556567907333374, |
| "rewards/margins": 1.6510968208312988, |
| "rewards/rejected": -3.2076644897460938, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.31350640814985214, |
| "grad_norm": 35.398048400878906, |
| "learning_rate": 4.999835914537063e-06, |
| "logits/chosen": -0.1668413281440735, |
| "logits/rejected": -0.13912776112556458, |
| "logps/chosen": -304.44842529296875, |
| "logps/rejected": -323.0757141113281, |
| "loss": 0.5734, |
| "rewards/accuracies": 0.7546296119689941, |
| "rewards/chosen": -1.2236417531967163, |
| "rewards/margins": 1.9698877334594727, |
| "rewards/rejected": -3.1935291290283203, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.31942162339796254, |
| "grad_norm": 54.54057312011719, |
| "learning_rate": 4.999579948383184e-06, |
| "logits/chosen": -0.15461772680282593, |
| "logits/rejected": -0.03271166980266571, |
| "logps/chosen": -295.79632568359375, |
| "logps/rejected": -327.92584228515625, |
| "loss": 0.7708, |
| "rewards/accuracies": 0.6620371341705322, |
| "rewards/chosen": -1.315285086631775, |
| "rewards/margins": 1.5617358684539795, |
| "rewards/rejected": -2.877020835876465, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.32533683864607293, |
| "grad_norm": 34.624603271484375, |
| "learning_rate": 4.9992058597192255e-06, |
| "logits/chosen": -0.10605038702487946, |
| "logits/rejected": 0.021997269243001938, |
| "logps/chosen": -306.4567565917969, |
| "logps/rejected": -336.64208984375, |
| "loss": 0.6976, |
| "rewards/accuracies": 0.6620370745658875, |
| "rewards/chosen": -1.345373511314392, |
| "rewards/margins": 1.6246647834777832, |
| "rewards/rejected": -2.9700381755828857, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.3312520538941834, |
| "grad_norm": 67.67698669433594, |
| "learning_rate": 4.9987136662234764e-06, |
| "logits/chosen": -0.0949287861585617, |
| "logits/rejected": -0.04236632585525513, |
| "logps/chosen": -310.03973388671875, |
| "logps/rejected": -338.93768310546875, |
| "loss": 0.7999, |
| "rewards/accuracies": 0.6851851940155029, |
| "rewards/chosen": -2.0794289112091064, |
| "rewards/margins": 1.357082724571228, |
| "rewards/rejected": -3.436511516571045, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.3371672691422938, |
| "grad_norm": 32.37373733520508, |
| "learning_rate": 4.998103391155496e-06, |
| "logits/chosen": -0.0922163650393486, |
| "logits/rejected": 0.0038010727148503065, |
| "logps/chosen": -299.1915283203125, |
| "logps/rejected": -340.01202392578125, |
| "loss": 0.7605, |
| "rewards/accuracies": 0.6990741491317749, |
| "rewards/chosen": -2.1031405925750732, |
| "rewards/margins": 1.5288575887680054, |
| "rewards/rejected": -3.631998062133789, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.34308248439040423, |
| "grad_norm": 41.414024353027344, |
| "learning_rate": 4.997375063355021e-06, |
| "logits/chosen": -0.14530247449874878, |
| "logits/rejected": -0.017910713329911232, |
| "logps/chosen": -294.3675537109375, |
| "logps/rejected": -331.98138427734375, |
| "loss": 0.6571, |
| "rewards/accuracies": 0.6898148059844971, |
| "rewards/chosen": -2.397458791732788, |
| "rewards/margins": 1.8552653789520264, |
| "rewards/rejected": -4.252723693847656, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.3489976996385146, |
| "grad_norm": 48.10707473754883, |
| "learning_rate": 4.996528717240595e-06, |
| "logits/chosen": -0.05039427801966667, |
| "logits/rejected": -0.03388180956244469, |
| "logps/chosen": -334.423583984375, |
| "logps/rejected": -366.33087158203125, |
| "loss": 0.6121, |
| "rewards/accuracies": 0.7129630446434021, |
| "rewards/chosen": -2.6137490272521973, |
| "rewards/margins": 2.267016887664795, |
| "rewards/rejected": -4.880765438079834, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.354912914886625, |
| "grad_norm": 48.403987884521484, |
| "learning_rate": 4.995564392807951e-06, |
| "logits/chosen": -0.12557795643806458, |
| "logits/rejected": -0.08240347355604172, |
| "logps/chosen": -307.76190185546875, |
| "logps/rejected": -336.8500671386719, |
| "loss": 0.8248, |
| "rewards/accuracies": 0.6527777910232544, |
| "rewards/chosen": -2.706207036972046, |
| "rewards/margins": 1.6851190328598022, |
| "rewards/rejected": -4.391325950622559, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.36082813013473547, |
| "grad_norm": 33.078346252441406, |
| "learning_rate": 4.994482135628115e-06, |
| "logits/chosen": -0.0540686696767807, |
| "logits/rejected": -0.02901211380958557, |
| "logps/chosen": -308.53997802734375, |
| "logps/rejected": -325.751220703125, |
| "loss": 0.6532, |
| "rewards/accuracies": 0.6805555820465088, |
| "rewards/chosen": -2.590834617614746, |
| "rewards/margins": 2.104125738143921, |
| "rewards/rejected": -4.694960594177246, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.36674334538284586, |
| "grad_norm": 48.71003723144531, |
| "learning_rate": 4.993281996845253e-06, |
| "logits/chosen": -0.12156584858894348, |
| "logits/rejected": -0.009240781888365746, |
| "logps/chosen": -309.54833984375, |
| "logps/rejected": -357.3154296875, |
| "loss": 0.7973, |
| "rewards/accuracies": 0.6805555820465088, |
| "rewards/chosen": -3.1666088104248047, |
| "rewards/margins": 1.5774611234664917, |
| "rewards/rejected": -4.744070053100586, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.3726585606309563, |
| "grad_norm": 34.78226089477539, |
| "learning_rate": 4.991964033174257e-06, |
| "logits/chosen": 0.03807515650987625, |
| "logits/rejected": 0.03681405261158943, |
| "logps/chosen": -311.2495422363281, |
| "logps/rejected": -324.69183349609375, |
| "loss": 0.7562, |
| "rewards/accuracies": 0.6388888359069824, |
| "rewards/chosen": -2.8808019161224365, |
| "rewards/margins": 1.3406963348388672, |
| "rewards/rejected": -4.221498489379883, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.3785737758790667, |
| "grad_norm": 40.5612678527832, |
| "learning_rate": 4.990528306898062e-06, |
| "logits/chosen": -0.02013694867491722, |
| "logits/rejected": 0.06969591975212097, |
| "logps/chosen": -311.6338806152344, |
| "logps/rejected": -357.86932373046875, |
| "loss": 0.6847, |
| "rewards/accuracies": 0.6481481790542603, |
| "rewards/chosen": -2.982748031616211, |
| "rewards/margins": 1.80600905418396, |
| "rewards/rejected": -4.78875732421875, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.3844889911271771, |
| "grad_norm": 40.58036804199219, |
| "learning_rate": 4.988974885864706e-06, |
| "logits/chosen": -0.07019342482089996, |
| "logits/rejected": 0.034122247248888016, |
| "logps/chosen": -311.8460388183594, |
| "logps/rejected": -335.7353515625, |
| "loss": 0.6818, |
| "rewards/accuracies": 0.7083333730697632, |
| "rewards/chosen": -2.6682796478271484, |
| "rewards/margins": 2.0244059562683105, |
| "rewards/rejected": -4.692685127258301, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.39040420637528755, |
| "grad_norm": 40.306243896484375, |
| "learning_rate": 4.987303843484119e-06, |
| "logits/chosen": -0.04457690566778183, |
| "logits/rejected": -0.04778536409139633, |
| "logps/chosen": -323.8233337402344, |
| "logps/rejected": -337.99658203125, |
| "loss": 0.8443, |
| "rewards/accuracies": 0.6712962985038757, |
| "rewards/chosen": -2.713296413421631, |
| "rewards/margins": 2.1648170948028564, |
| "rewards/rejected": -4.878113269805908, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.39631942162339795, |
| "grad_norm": 40.987098693847656, |
| "learning_rate": 4.985515258724657e-06, |
| "logits/chosen": -0.1199549064040184, |
| "logits/rejected": 0.04760899394750595, |
| "logps/chosen": -315.40380859375, |
| "logps/rejected": -365.968017578125, |
| "loss": 0.8342, |
| "rewards/accuracies": 0.6296296119689941, |
| "rewards/chosen": -2.873556137084961, |
| "rewards/margins": 1.9774025678634644, |
| "rewards/rejected": -4.850958824157715, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.4022346368715084, |
| "grad_norm": 63.48135757446289, |
| "learning_rate": 4.983609216109371e-06, |
| "logits/chosen": -0.11053924262523651, |
| "logits/rejected": 0.05218297988176346, |
| "logps/chosen": -315.01361083984375, |
| "logps/rejected": -348.6982421875, |
| "loss": 0.8018, |
| "rewards/accuracies": 0.6481481790542603, |
| "rewards/chosen": -3.1666953563690186, |
| "rewards/margins": 2.3253214359283447, |
| "rewards/rejected": -5.492016792297363, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.4081498521196188, |
| "grad_norm": 38.437744140625, |
| "learning_rate": 4.981585805712011e-06, |
| "logits/chosen": 0.056118495762348175, |
| "logits/rejected": 0.09087176620960236, |
| "logps/chosen": -318.7930603027344, |
| "logps/rejected": -362.77825927734375, |
| "loss": 0.8394, |
| "rewards/accuracies": 0.638888955116272, |
| "rewards/chosen": -3.5864510536193848, |
| "rewards/margins": 2.024658203125, |
| "rewards/rejected": -5.611109733581543, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.4140650673677292, |
| "grad_norm": 38.63540267944336, |
| "learning_rate": 4.979445123152767e-06, |
| "logits/chosen": 0.05023570358753204, |
| "logits/rejected": 0.07232505083084106, |
| "logps/chosen": -325.7311096191406, |
| "logps/rejected": -351.2863464355469, |
| "loss": 0.8521, |
| "rewards/accuracies": 0.6574074625968933, |
| "rewards/chosen": -3.6044719219207764, |
| "rewards/margins": 1.671539306640625, |
| "rewards/rejected": -5.2760114669799805, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.41998028261583964, |
| "grad_norm": 41.875938415527344, |
| "learning_rate": 4.977187269593758e-06, |
| "logits/chosen": -0.009280918166041374, |
| "logits/rejected": 0.08061732351779938, |
| "logps/chosen": -309.8787841796875, |
| "logps/rejected": -338.63531494140625, |
| "loss": 0.9004, |
| "rewards/accuracies": 0.6481481790542603, |
| "rewards/chosen": -3.5854623317718506, |
| "rewards/margins": 1.1396470069885254, |
| "rewards/rejected": -4.725109100341797, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.42589549786395003, |
| "grad_norm": 103.75460815429688, |
| "learning_rate": 4.974812351734241e-06, |
| "logits/chosen": -0.02067667804658413, |
| "logits/rejected": 0.05178874731063843, |
| "logps/chosen": -312.7908630371094, |
| "logps/rejected": -342.73443603515625, |
| "loss": 0.802, |
| "rewards/accuracies": 0.6666666865348816, |
| "rewards/chosen": -3.1982085704803467, |
| "rewards/margins": 1.6781682968139648, |
| "rewards/rejected": -4.876377105712891, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.4318107131120605, |
| "grad_norm": 45.85707092285156, |
| "learning_rate": 4.972320481805578e-06, |
| "logits/chosen": -0.1770055741071701, |
| "logits/rejected": -0.07678120583295822, |
| "logps/chosen": -323.437744140625, |
| "logps/rejected": -376.6873474121094, |
| "loss": 0.7821, |
| "rewards/accuracies": 0.6805555820465088, |
| "rewards/chosen": -3.227814197540283, |
| "rewards/margins": 1.796374797821045, |
| "rewards/rejected": -5.024188995361328, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.4377259283601709, |
| "grad_norm": 61.05466079711914, |
| "learning_rate": 4.969711777565928e-06, |
| "logits/chosen": -0.05364451929926872, |
| "logits/rejected": -0.02637811005115509, |
| "logps/chosen": -334.20367431640625, |
| "logps/rejected": -368.54693603515625, |
| "loss": 0.8648, |
| "rewards/accuracies": 0.6759259104728699, |
| "rewards/chosen": -3.613854169845581, |
| "rewards/margins": 1.6240665912628174, |
| "rewards/rejected": -5.237921237945557, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.4436411436082813, |
| "grad_norm": 34.13490295410156, |
| "learning_rate": 4.96698636229468e-06, |
| "logits/chosen": -0.005055941641330719, |
| "logits/rejected": 0.06123928725719452, |
| "logps/chosen": -324.4971923828125, |
| "logps/rejected": -349.1205749511719, |
| "loss": 0.7293, |
| "rewards/accuracies": 0.7083333730697632, |
| "rewards/chosen": -3.4245200157165527, |
| "rewards/margins": 1.7457599639892578, |
| "rewards/rejected": -5.1702799797058105, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.4495563588563917, |
| "grad_norm": 83.93833923339844, |
| "learning_rate": 4.964144364786632e-06, |
| "logits/chosen": 0.13881027698516846, |
| "logits/rejected": 0.12519869208335876, |
| "logps/chosen": -348.6081237792969, |
| "logps/rejected": -361.4942321777344, |
| "loss": 1.0233, |
| "rewards/accuracies": 0.6851851940155029, |
| "rewards/chosen": -4.027976036071777, |
| "rewards/margins": 1.528003454208374, |
| "rewards/rejected": -5.555978775024414, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.4554715741045021, |
| "grad_norm": 97.00470733642578, |
| "learning_rate": 4.9611859193459015e-06, |
| "logits/chosen": 0.07493604719638824, |
| "logits/rejected": 0.14453333616256714, |
| "logps/chosen": -326.7818298339844, |
| "logps/rejected": -349.25189208984375, |
| "loss": 0.92, |
| "rewards/accuracies": 0.6666666865348816, |
| "rewards/chosen": -4.245251655578613, |
| "rewards/margins": 1.4085248708724976, |
| "rewards/rejected": -5.653777122497559, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.46138678935261257, |
| "grad_norm": 42.000885009765625, |
| "learning_rate": 4.958111165779579e-06, |
| "logits/chosen": 0.0968238115310669, |
| "logits/rejected": 0.17590413987636566, |
| "logps/chosen": -324.4273376464844, |
| "logps/rejected": -368.86004638671875, |
| "loss": 0.8419, |
| "rewards/accuracies": 0.6666666865348816, |
| "rewards/chosen": -4.32670259475708, |
| "rewards/margins": 1.9024198055267334, |
| "rewards/rejected": -6.229121685028076, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.46730200460072296, |
| "grad_norm": 34.4752197265625, |
| "learning_rate": 4.954920249391123e-06, |
| "logits/chosen": 0.035372521728277206, |
| "logits/rejected": 0.07403655350208282, |
| "logps/chosen": -336.0986633300781, |
| "logps/rejected": -353.7882995605469, |
| "loss": 0.6037, |
| "rewards/accuracies": 0.7546296119689941, |
| "rewards/chosen": -3.8972387313842773, |
| "rewards/margins": 1.817379355430603, |
| "rewards/rejected": -5.714618682861328, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.4732172198488334, |
| "grad_norm": 28.94403648376465, |
| "learning_rate": 4.951613320973491e-06, |
| "logits/chosen": 0.0323605015873909, |
| "logits/rejected": 0.0774674192070961, |
| "logps/chosen": -324.99774169921875, |
| "logps/rejected": -344.53985595703125, |
| "loss": 0.7093, |
| "rewards/accuracies": 0.7268518805503845, |
| "rewards/chosen": -4.354706764221191, |
| "rewards/margins": 1.9214775562286377, |
| "rewards/rejected": -6.276185035705566, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.4791324350969438, |
| "grad_norm": 30.506528854370117, |
| "learning_rate": 4.948190536802015e-06, |
| "logits/chosen": -0.06049029156565666, |
| "logits/rejected": 0.006028448697179556, |
| "logps/chosen": -324.240234375, |
| "logps/rejected": -350.5492858886719, |
| "loss": 0.7427, |
| "rewards/accuracies": 0.6388888955116272, |
| "rewards/chosen": -4.099154472351074, |
| "rewards/margins": 1.713746428489685, |
| "rewards/rejected": -5.812901496887207, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.4850476503450542, |
| "grad_norm": 73.70082092285156, |
| "learning_rate": 4.944652058627013e-06, |
| "logits/chosen": -0.08537846058607101, |
| "logits/rejected": -0.032611675560474396, |
| "logps/chosen": -336.5609130859375, |
| "logps/rejected": -372.4656677246094, |
| "loss": 0.6616, |
| "rewards/accuracies": 0.703703761100769, |
| "rewards/chosen": -4.300870895385742, |
| "rewards/margins": 1.9437074661254883, |
| "rewards/rejected": -6.2445783615112305, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.49096286559316465, |
| "grad_norm": 34.85869598388672, |
| "learning_rate": 4.9409980536661535e-06, |
| "logits/chosen": -0.06217961013317108, |
| "logits/rejected": -0.004529049154371023, |
| "logps/chosen": -340.4526062011719, |
| "logps/rejected": -369.7755126953125, |
| "loss": 0.8365, |
| "rewards/accuracies": 0.6527777910232544, |
| "rewards/chosen": -4.246068000793457, |
| "rewards/margins": 1.704252004623413, |
| "rewards/rejected": -5.950319290161133, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.49687808084127505, |
| "grad_norm": 39.66143035888672, |
| "learning_rate": 4.937228694596545e-06, |
| "logits/chosen": -0.12100633233785629, |
| "logits/rejected": -0.006573869846761227, |
| "logps/chosen": -320.8197021484375, |
| "logps/rejected": -355.7471923828125, |
| "loss": 0.6241, |
| "rewards/accuracies": 0.7083333730697632, |
| "rewards/chosen": -3.746490478515625, |
| "rewards/margins": 2.363032579421997, |
| "rewards/rejected": -6.109523296356201, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.5027932960893855, |
| "grad_norm": 33.68574142456055, |
| "learning_rate": 4.933344159546577e-06, |
| "logits/chosen": -0.18371449410915375, |
| "logits/rejected": -0.022255782037973404, |
| "logps/chosen": -333.9938049316406, |
| "logps/rejected": -384.731689453125, |
| "loss": 0.8266, |
| "rewards/accuracies": 0.6666666865348816, |
| "rewards/chosen": -4.545222759246826, |
| "rewards/margins": 2.332798719406128, |
| "rewards/rejected": -6.878022193908691, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.5087085113374958, |
| "grad_norm": 45.985260009765625, |
| "learning_rate": 4.929344632087506e-06, |
| "logits/chosen": -0.14562034606933594, |
| "logits/rejected": -0.04126621410250664, |
| "logps/chosen": -327.22882080078125, |
| "logps/rejected": -382.4744873046875, |
| "loss": 0.6009, |
| "rewards/accuracies": 0.7777777910232544, |
| "rewards/chosen": -4.554243564605713, |
| "rewards/margins": 3.3480947017669678, |
| "rewards/rejected": -7.902338027954102, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.5146237265856063, |
| "grad_norm": 59.11403274536133, |
| "learning_rate": 4.9252303012247775e-06, |
| "logits/chosen": -0.11811560392379761, |
| "logits/rejected": -0.01293177530169487, |
| "logps/chosen": -351.2400817871094, |
| "logps/rejected": -404.04693603515625, |
| "loss": 0.7836, |
| "rewards/accuracies": 0.7592592239379883, |
| "rewards/chosen": -5.45390510559082, |
| "rewards/margins": 3.5169780254364014, |
| "rewards/rejected": -8.970884323120117, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.5205389418337167, |
| "grad_norm": 38.673831939697266, |
| "learning_rate": 4.921001361389096e-06, |
| "logits/chosen": -0.05326487869024277, |
| "logits/rejected": 0.009572159498929977, |
| "logps/chosen": -333.938232421875, |
| "logps/rejected": -371.80816650390625, |
| "loss": 0.7732, |
| "rewards/accuracies": 0.7175925970077515, |
| "rewards/chosen": -5.093016624450684, |
| "rewards/margins": 2.952821731567383, |
| "rewards/rejected": -8.045838356018066, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.5264541570818272, |
| "grad_norm": 33.54100036621094, |
| "learning_rate": 4.916658012427235e-06, |
| "logits/chosen": -0.037946220487356186, |
| "logits/rejected": 0.06312233209609985, |
| "logps/chosen": -342.2742614746094, |
| "logps/rejected": -391.18646240234375, |
| "loss": 0.7494, |
| "rewards/accuracies": 0.7083333730697632, |
| "rewards/chosen": -5.872312545776367, |
| "rewards/margins": 2.313776969909668, |
| "rewards/rejected": -8.186089515686035, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.5323693723299375, |
| "grad_norm": 37.88241195678711, |
| "learning_rate": 4.912200459592595e-06, |
| "logits/chosen": -0.06072680652141571, |
| "logits/rejected": 0.09704061597585678, |
| "logps/chosen": -343.1576843261719, |
| "logps/rejected": -407.43798828125, |
| "loss": 0.8544, |
| "rewards/accuracies": 0.6620370149612427, |
| "rewards/chosen": -6.514297008514404, |
| "rewards/margins": 2.3704113960266113, |
| "rewards/rejected": -8.884708404541016, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.538284587578048, |
| "grad_norm": 71.56549072265625, |
| "learning_rate": 4.9076289135355e-06, |
| "logits/chosen": 0.02188403531908989, |
| "logits/rejected": 0.1214473694562912, |
| "logps/chosen": -355.06793212890625, |
| "logps/rejected": -388.5225830078125, |
| "loss": 0.873, |
| "rewards/accuracies": 0.6342592835426331, |
| "rewards/chosen": -6.959141731262207, |
| "rewards/margins": 1.8934102058410645, |
| "rewards/rejected": -8.852551460266113, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.5441998028261584, |
| "grad_norm": 55.54237365722656, |
| "learning_rate": 4.902943590293245e-06, |
| "logits/chosen": 0.0230946596711874, |
| "logits/rejected": 0.08508029580116272, |
| "logps/chosen": -349.1761169433594, |
| "logps/rejected": -398.710693359375, |
| "loss": 0.8547, |
| "rewards/accuracies": 0.7222222685813904, |
| "rewards/chosen": -6.52820348739624, |
| "rewards/margins": 2.2471137046813965, |
| "rewards/rejected": -8.775317192077637, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.5501150180742688, |
| "grad_norm": 48.57643127441406, |
| "learning_rate": 4.898144711279894e-06, |
| "logits/chosen": -0.07424692809581757, |
| "logits/rejected": 0.09649358689785004, |
| "logps/chosen": -339.84686279296875, |
| "logps/rejected": -393.98016357421875, |
| "loss": 0.8443, |
| "rewards/accuracies": 0.5972222685813904, |
| "rewards/chosen": -6.690239906311035, |
| "rewards/margins": 2.0843098163604736, |
| "rewards/rejected": -8.77454948425293, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.5560302333223792, |
| "grad_norm": 28.025590896606445, |
| "learning_rate": 4.8932325032758006e-06, |
| "logits/chosen": -0.13962030410766602, |
| "logits/rejected": 0.05175274237990379, |
| "logps/chosen": -335.58966064453125, |
| "logps/rejected": -378.72833251953125, |
| "loss": 0.6595, |
| "rewards/accuracies": 0.703703761100769, |
| "rewards/chosen": -5.493226528167725, |
| "rewards/margins": 2.881283760070801, |
| "rewards/rejected": -8.374510765075684, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.5619454485704897, |
| "grad_norm": 42.00820541381836, |
| "learning_rate": 4.8882071984169055e-06, |
| "logits/chosen": 0.007744944654405117, |
| "logits/rejected": 0.10258468985557556, |
| "logps/chosen": -363.2281494140625, |
| "logps/rejected": -410.42266845703125, |
| "loss": 0.7705, |
| "rewards/accuracies": 0.6805555820465088, |
| "rewards/chosen": -6.42072868347168, |
| "rewards/margins": 2.2050187587738037, |
| "rewards/rejected": -8.625746726989746, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.5678606638186001, |
| "grad_norm": 54.86595916748047, |
| "learning_rate": 4.8830690341837596e-06, |
| "logits/chosen": -0.032913923263549805, |
| "logits/rejected": 0.1072501391172409, |
| "logps/chosen": -358.8460693359375, |
| "logps/rejected": -409.0872802734375, |
| "loss": 0.9018, |
| "rewards/accuracies": 0.6944444179534912, |
| "rewards/chosen": -6.712845802307129, |
| "rewards/margins": 2.6474528312683105, |
| "rewards/rejected": -9.360298156738281, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.5737758790667105, |
| "grad_norm": 63.118587493896484, |
| "learning_rate": 4.877818253390303e-06, |
| "logits/chosen": -0.04406512528657913, |
| "logits/rejected": 0.006794461514800787, |
| "logps/chosen": -367.21063232421875, |
| "logps/rejected": -415.6984558105469, |
| "loss": 0.866, |
| "rewards/accuracies": 0.6944445371627808, |
| "rewards/chosen": -7.632047176361084, |
| "rewards/margins": 2.7398147583007812, |
| "rewards/rejected": -10.371862411499023, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.5796910943148209, |
| "grad_norm": 35.43265914916992, |
| "learning_rate": 4.872455104172392e-06, |
| "logits/chosen": -0.018917741253972054, |
| "logits/rejected": 0.09038470685482025, |
| "logps/chosen": -353.57666015625, |
| "logps/rejected": -384.8489990234375, |
| "loss": 0.8107, |
| "rewards/accuracies": 0.703703761100769, |
| "rewards/chosen": -7.3933916091918945, |
| "rewards/margins": 2.474519729614258, |
| "rewards/rejected": -9.867910385131836, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.5856063095629314, |
| "grad_norm": 44.21418762207031, |
| "learning_rate": 4.866979839976068e-06, |
| "logits/chosen": -0.0317760705947876, |
| "logits/rejected": 0.05773278325796127, |
| "logps/chosen": -358.10247802734375, |
| "logps/rejected": -407.8479919433594, |
| "loss": 0.8849, |
| "rewards/accuracies": 0.6666666865348816, |
| "rewards/chosen": -8.095900535583496, |
| "rewards/margins": 2.4276022911071777, |
| "rewards/rejected": -10.523502349853516, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.5915215248110417, |
| "grad_norm": 63.40266799926758, |
| "learning_rate": 4.861392719545586e-06, |
| "logits/chosen": -0.07806281745433807, |
| "logits/rejected": -0.02447107993066311, |
| "logps/chosen": -356.18218994140625, |
| "logps/rejected": -387.41180419921875, |
| "loss": 0.9313, |
| "rewards/accuracies": 0.6388888955116272, |
| "rewards/chosen": -7.356019020080566, |
| "rewards/margins": 1.7391393184661865, |
| "rewards/rejected": -9.095157623291016, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.5974367400591521, |
| "grad_norm": 66.6312484741211, |
| "learning_rate": 4.855694006911184e-06, |
| "logits/chosen": -0.06037697196006775, |
| "logits/rejected": -0.07680558413267136, |
| "logps/chosen": -368.1618347167969, |
| "logps/rejected": -388.21026611328125, |
| "loss": 0.9553, |
| "rewards/accuracies": 0.6435185670852661, |
| "rewards/chosen": -7.906184673309326, |
| "rewards/margins": 1.781913161277771, |
| "rewards/rejected": -9.688097953796387, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.6033519553072626, |
| "grad_norm": 50.65395736694336, |
| "learning_rate": 4.849883971376608e-06, |
| "logits/chosen": -0.036385513842105865, |
| "logits/rejected": 0.02312180958688259, |
| "logps/chosen": -360.32763671875, |
| "logps/rejected": -385.574462890625, |
| "loss": 0.8076, |
| "rewards/accuracies": 0.6805555820465088, |
| "rewards/chosen": -7.673541069030762, |
| "rewards/margins": 2.30534291267395, |
| "rewards/rejected": -9.97888469696045, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.6092671705553729, |
| "grad_norm": 37.83553695678711, |
| "learning_rate": 4.843962887506382e-06, |
| "logits/chosen": -0.007246436085551977, |
| "logits/rejected": 0.10245460271835327, |
| "logps/chosen": -360.2029724121094, |
| "logps/rejected": -388.7747497558594, |
| "loss": 1.0146, |
| "rewards/accuracies": 0.638888955116272, |
| "rewards/chosen": -7.557069301605225, |
| "rewards/margins": 2.0892889499664307, |
| "rewards/rejected": -9.646357536315918, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.6151823858034834, |
| "grad_norm": 67.90631103515625, |
| "learning_rate": 4.837931035112836e-06, |
| "logits/chosen": -0.021308597177267075, |
| "logits/rejected": 0.05145422741770744, |
| "logps/chosen": -340.68328857421875, |
| "logps/rejected": -396.016845703125, |
| "loss": 0.8041, |
| "rewards/accuracies": 0.6712962985038757, |
| "rewards/chosen": -6.7121500968933105, |
| "rewards/margins": 3.0467376708984375, |
| "rewards/rejected": -9.758888244628906, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.6210976010515938, |
| "grad_norm": 38.1522331237793, |
| "learning_rate": 4.831788699242882e-06, |
| "logits/chosen": 0.12459397315979004, |
| "logits/rejected": 0.07747067511081696, |
| "logps/chosen": -381.19317626953125, |
| "logps/rejected": -386.43023681640625, |
| "loss": 0.8322, |
| "rewards/accuracies": 0.6666666865348816, |
| "rewards/chosen": -7.670474529266357, |
| "rewards/margins": 2.657536268234253, |
| "rewards/rejected": -10.328010559082031, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.6270128162997043, |
| "grad_norm": 37.3199462890625, |
| "learning_rate": 4.825536170164543e-06, |
| "logits/chosen": 0.0364363007247448, |
| "logits/rejected": 0.1072683334350586, |
| "logps/chosen": -384.99658203125, |
| "logps/rejected": -423.906494140625, |
| "loss": 0.8372, |
| "rewards/accuracies": 0.703703761100769, |
| "rewards/chosen": -8.085029602050781, |
| "rewards/margins": 2.9126205444335938, |
| "rewards/rejected": -10.997650146484375, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.6329280315478146, |
| "grad_norm": 84.70304870605469, |
| "learning_rate": 4.819173743353237e-06, |
| "logits/chosen": 0.07106878608465195, |
| "logits/rejected": 0.07161180675029755, |
| "logps/chosen": -353.46929931640625, |
| "logps/rejected": -382.072998046875, |
| "loss": 0.9152, |
| "rewards/accuracies": 0.6620370745658875, |
| "rewards/chosen": -8.362825393676758, |
| "rewards/margins": 2.396571636199951, |
| "rewards/rejected": -10.759397506713867, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.6388432467959251, |
| "grad_norm": 41.621517181396484, |
| "learning_rate": 4.812701719477813e-06, |
| "logits/chosen": -0.042387984693050385, |
| "logits/rejected": -0.006728718988597393, |
| "logps/chosen": -397.9143981933594, |
| "logps/rejected": -417.5738220214844, |
| "loss": 0.7585, |
| "rewards/accuracies": 0.7222222685813904, |
| "rewards/chosen": -8.613710403442383, |
| "rewards/margins": 3.0014796257019043, |
| "rewards/rejected": -11.615188598632812, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.6447584620440355, |
| "grad_norm": 34.132080078125, |
| "learning_rate": 4.80612040438634e-06, |
| "logits/chosen": -0.005274191033095121, |
| "logits/rejected": -0.0017657628050073981, |
| "logps/chosen": -391.5089111328125, |
| "logps/rejected": -413.4745788574219, |
| "loss": 0.8454, |
| "rewards/accuracies": 0.6759259700775146, |
| "rewards/chosen": -8.282991409301758, |
| "rewards/margins": 2.736060619354248, |
| "rewards/rejected": -11.019050598144531, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.6506736772921459, |
| "grad_norm": 42.51496505737305, |
| "learning_rate": 4.799430109091659e-06, |
| "logits/chosen": -0.15326707065105438, |
| "logits/rejected": -0.08302909135818481, |
| "logps/chosen": -363.5447998046875, |
| "logps/rejected": -425.05645751953125, |
| "loss": 0.8109, |
| "rewards/accuracies": 0.7314814925193787, |
| "rewards/chosen": -8.056258201599121, |
| "rewards/margins": 3.1323180198669434, |
| "rewards/rejected": -11.188575744628906, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.6565888925402563, |
| "grad_norm": 38.95637512207031, |
| "learning_rate": 4.792631149756683e-06, |
| "logits/chosen": -0.10734808444976807, |
| "logits/rejected": -0.13398586213588715, |
| "logps/chosen": -383.37957763671875, |
| "logps/rejected": -396.33050537109375, |
| "loss": 0.9087, |
| "rewards/accuracies": 0.6805557012557983, |
| "rewards/chosen": -9.402881622314453, |
| "rewards/margins": 2.247579574584961, |
| "rewards/rejected": -11.65046215057373, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.6625041077883668, |
| "grad_norm": 30.90215301513672, |
| "learning_rate": 4.785723847679451e-06, |
| "logits/chosen": -0.17347650229930878, |
| "logits/rejected": -0.04830838367342949, |
| "logps/chosen": -365.1563415527344, |
| "logps/rejected": -424.2984619140625, |
| "loss": 0.7836, |
| "rewards/accuracies": 0.7222222089767456, |
| "rewards/chosen": -9.815483093261719, |
| "rewards/margins": 2.839183807373047, |
| "rewards/rejected": -12.654666900634766, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.6684193230364771, |
| "grad_norm": 47.471736907958984, |
| "learning_rate": 4.778708529277954e-06, |
| "logits/chosen": -0.1100246012210846, |
| "logits/rejected": -0.06617899239063263, |
| "logps/chosen": -385.6170654296875, |
| "logps/rejected": -420.5422058105469, |
| "loss": 0.9052, |
| "rewards/accuracies": 0.703703761100769, |
| "rewards/chosen": -9.508901596069336, |
| "rewards/margins": 3.112328052520752, |
| "rewards/rejected": -12.621230125427246, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.6743345382845876, |
| "grad_norm": 47.64808654785156, |
| "learning_rate": 4.7715855260747e-06, |
| "logits/chosen": -0.1708156168460846, |
| "logits/rejected": -0.07380948960781097, |
| "logps/chosen": -389.4483337402344, |
| "logps/rejected": -430.76080322265625, |
| "loss": 0.7454, |
| "rewards/accuracies": 0.6990741491317749, |
| "rewards/chosen": -9.387763023376465, |
| "rewards/margins": 3.1104159355163574, |
| "rewards/rejected": -12.498178482055664, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.680249753532698, |
| "grad_norm": 34.76079559326172, |
| "learning_rate": 4.764355174681056e-06, |
| "logits/chosen": -0.12020980566740036, |
| "logits/rejected": -0.09629341214895248, |
| "logps/chosen": -386.8775329589844, |
| "logps/rejected": -407.75299072265625, |
| "loss": 0.7826, |
| "rewards/accuracies": 0.7129629850387573, |
| "rewards/chosen": -9.380858421325684, |
| "rewards/margins": 2.675083637237549, |
| "rewards/rejected": -12.055941581726074, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.6861649687808085, |
| "grad_norm": 31.153663635253906, |
| "learning_rate": 4.757017816781331e-06, |
| "logits/chosen": -0.16364365816116333, |
| "logits/rejected": -0.0642888993024826, |
| "logps/chosen": -379.285400390625, |
| "logps/rejected": -452.3611755371094, |
| "loss": 0.6606, |
| "rewards/accuracies": 0.7314814925193787, |
| "rewards/chosen": -9.602463722229004, |
| "rewards/margins": 3.611215591430664, |
| "rewards/rejected": -13.213679313659668, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.6920801840289188, |
| "grad_norm": 49.72859191894531, |
| "learning_rate": 4.74957379911664e-06, |
| "logits/chosen": -0.07502593845129013, |
| "logits/rejected": 0.00437380513176322, |
| "logps/chosen": -395.549560546875, |
| "logps/rejected": -437.9383544921875, |
| "loss": 0.8434, |
| "rewards/accuracies": 0.6990741491317749, |
| "rewards/chosen": -10.370527267456055, |
| "rewards/margins": 2.701253652572632, |
| "rewards/rejected": -13.07178020477295, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.6979953992770292, |
| "grad_norm": 56.92310333251953, |
| "learning_rate": 4.7420234734685104e-06, |
| "logits/chosen": -0.03868694603443146, |
| "logits/rejected": -0.0014290250837802887, |
| "logps/chosen": -401.3062744140625, |
| "logps/rejected": -445.33807373046875, |
| "loss": 0.8181, |
| "rewards/accuracies": 0.7083333730697632, |
| "rewards/chosen": -10.017228126525879, |
| "rewards/margins": 3.0577263832092285, |
| "rewards/rejected": -13.07495403289795, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.7039106145251397, |
| "grad_norm": 28.037378311157227, |
| "learning_rate": 4.7343671966422584e-06, |
| "logits/chosen": -0.19204241037368774, |
| "logits/rejected": -0.08173765987157822, |
| "logps/chosen": -388.4847106933594, |
| "logps/rejected": -435.093505859375, |
| "loss": 0.8228, |
| "rewards/accuracies": 0.6944444179534912, |
| "rewards/chosen": -9.829904556274414, |
| "rewards/margins": 2.792029857635498, |
| "rewards/rejected": -12.621932983398438, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.70982582977325, |
| "grad_norm": 40.521690368652344, |
| "learning_rate": 4.726605330450132e-06, |
| "logits/chosen": -0.13022971153259277, |
| "logits/rejected": -0.08149293065071106, |
| "logps/chosen": -379.46185302734375, |
| "logps/rejected": -414.7354736328125, |
| "loss": 0.7553, |
| "rewards/accuracies": 0.7222222685813904, |
| "rewards/chosen": -8.846694946289062, |
| "rewards/margins": 2.8018383979797363, |
| "rewards/rejected": -11.64853286743164, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.7157410450213605, |
| "grad_norm": 58.53022003173828, |
| "learning_rate": 4.718738241694207e-06, |
| "logits/chosen": -0.1846962869167328, |
| "logits/rejected": -0.10722313821315765, |
| "logps/chosen": -353.771484375, |
| "logps/rejected": -411.9970703125, |
| "loss": 0.7709, |
| "rewards/accuracies": 0.708333432674408, |
| "rewards/chosen": -8.93757152557373, |
| "rewards/margins": 2.895968198776245, |
| "rewards/rejected": -11.833539962768555, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.7216562602694709, |
| "grad_norm": 62.94578552246094, |
| "learning_rate": 4.710766302149059e-06, |
| "logits/chosen": -0.11727502197027206, |
| "logits/rejected": -0.07848824560642242, |
| "logps/chosen": -386.8691101074219, |
| "logps/rejected": -423.58203125, |
| "loss": 0.9148, |
| "rewards/accuracies": 0.6990741491317749, |
| "rewards/chosen": -9.200393676757812, |
| "rewards/margins": 2.5602023601531982, |
| "rewards/rejected": -11.76059627532959, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.7275714755175814, |
| "grad_norm": 41.92021560668945, |
| "learning_rate": 4.7026898885441895e-06, |
| "logits/chosen": -0.23892198503017426, |
| "logits/rejected": -0.19355922937393188, |
| "logps/chosen": -371.76507568359375, |
| "logps/rejected": -428.6576843261719, |
| "loss": 0.5582, |
| "rewards/accuracies": 0.7685184478759766, |
| "rewards/chosen": -8.767149925231934, |
| "rewards/margins": 3.590496063232422, |
| "rewards/rejected": -12.357645988464355, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.7334866907656917, |
| "grad_norm": 49.8127555847168, |
| "learning_rate": 4.694509382546225e-06, |
| "logits/chosen": -0.15272876620292664, |
| "logits/rejected": -0.16361907124519348, |
| "logps/chosen": -391.66729736328125, |
| "logps/rejected": -436.54638671875, |
| "loss": 0.7923, |
| "rewards/accuracies": 0.7222222685813904, |
| "rewards/chosen": -9.770153045654297, |
| "rewards/margins": 3.136183738708496, |
| "rewards/rejected": -12.906336784362793, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.7394019060138022, |
| "grad_norm": 58.40044403076172, |
| "learning_rate": 4.686225170740881e-06, |
| "logits/chosen": -0.18054398894309998, |
| "logits/rejected": -0.10335493087768555, |
| "logps/chosen": -412.2997741699219, |
| "logps/rejected": -459.0295104980469, |
| "loss": 0.8095, |
| "rewards/accuracies": 0.6805555820465088, |
| "rewards/chosen": -9.97874641418457, |
| "rewards/margins": 2.9409613609313965, |
| "rewards/rejected": -12.919708251953125, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.7453171212619126, |
| "grad_norm": 39.62379837036133, |
| "learning_rate": 4.677837644614692e-06, |
| "logits/chosen": -0.0909600779414177, |
| "logits/rejected": -0.023966720327734947, |
| "logps/chosen": -404.3166198730469, |
| "logps/rejected": -445.68218994140625, |
| "loss": 0.8851, |
| "rewards/accuracies": 0.6712962985038757, |
| "rewards/chosen": -10.392790794372559, |
| "rewards/margins": 2.6830623149871826, |
| "rewards/rejected": -13.07585334777832, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.751232336510023, |
| "grad_norm": 70.16593170166016, |
| "learning_rate": 4.669347200536513e-06, |
| "logits/chosen": -0.22100476920604706, |
| "logits/rejected": -0.07667340338230133, |
| "logps/chosen": -388.3753662109375, |
| "logps/rejected": -459.80645751953125, |
| "loss": 0.9195, |
| "rewards/accuracies": 0.6944444179534912, |
| "rewards/chosen": -9.879241943359375, |
| "rewards/margins": 3.15310001373291, |
| "rewards/rejected": -13.032341003417969, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.7571475517581334, |
| "grad_norm": 31.03739356994629, |
| "learning_rate": 4.660754239738784e-06, |
| "logits/chosen": -0.13154415786266327, |
| "logits/rejected": -0.14076797664165497, |
| "logps/chosen": -357.6185302734375, |
| "logps/rejected": -386.7919921875, |
| "loss": 0.7967, |
| "rewards/accuracies": 0.7083333730697632, |
| "rewards/chosen": -8.652280807495117, |
| "rewards/margins": 3.0989902019500732, |
| "rewards/rejected": -11.751270294189453, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.7630627670062439, |
| "grad_norm": 36.11872482299805, |
| "learning_rate": 4.652059168298575e-06, |
| "logits/chosen": -0.1265685260295868, |
| "logits/rejected": -0.16448134183883667, |
| "logps/chosen": -372.2834167480469, |
| "logps/rejected": -404.4194030761719, |
| "loss": 0.8985, |
| "rewards/accuracies": 0.6527777910232544, |
| "rewards/chosen": -8.796895980834961, |
| "rewards/margins": 2.566009521484375, |
| "rewards/rejected": -11.362905502319336, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.7689779822543542, |
| "grad_norm": 46.44245910644531, |
| "learning_rate": 4.6432623971183914e-06, |
| "logits/chosen": -0.14210101962089539, |
| "logits/rejected": -0.10977351665496826, |
| "logps/chosen": -394.84918212890625, |
| "logps/rejected": -433.3570556640625, |
| "loss": 0.9522, |
| "rewards/accuracies": 0.6759259104728699, |
| "rewards/chosen": -9.386205673217773, |
| "rewards/margins": 2.8492612838745117, |
| "rewards/rejected": -12.235466003417969, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.7748931975024647, |
| "grad_norm": 97.72257232666016, |
| "learning_rate": 4.634364341906758e-06, |
| "logits/chosen": -0.0367339588701725, |
| "logits/rejected": -0.006284890230745077, |
| "logps/chosen": -384.237548828125, |
| "logps/rejected": -411.18560791015625, |
| "loss": 0.9734, |
| "rewards/accuracies": 0.6435185670852661, |
| "rewards/chosen": -9.642322540283203, |
| "rewards/margins": 2.240882635116577, |
| "rewards/rejected": -11.88320541381836, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.7808084127505751, |
| "grad_norm": 31.60021209716797, |
| "learning_rate": 4.6253654231585724e-06, |
| "logits/chosen": -0.04863632842898369, |
| "logits/rejected": -0.05471419543027878, |
| "logps/chosen": -393.5486755371094, |
| "logps/rejected": -420.154541015625, |
| "loss": 0.8569, |
| "rewards/accuracies": 0.6666666865348816, |
| "rewards/chosen": -10.027898788452148, |
| "rewards/margins": 2.8087966442108154, |
| "rewards/rejected": -12.83669662475586, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.7867236279986856, |
| "grad_norm": 34.62734603881836, |
| "learning_rate": 4.616266066135236e-06, |
| "logits/chosen": 0.012388413771986961, |
| "logits/rejected": 0.08139034360647202, |
| "logps/chosen": -403.1490478515625, |
| "logps/rejected": -445.5652770996094, |
| "loss": 0.8866, |
| "rewards/accuracies": 0.7175925970077515, |
| "rewards/chosen": -10.999832153320312, |
| "rewards/margins": 2.9154164791107178, |
| "rewards/rejected": -13.915247917175293, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.7926388432467959, |
| "grad_norm": 45.868507385253906, |
| "learning_rate": 4.6070667008445565e-06, |
| "logits/chosen": -0.1295236349105835, |
| "logits/rejected": -0.05852815508842468, |
| "logps/chosen": -410.78717041015625, |
| "logps/rejected": -466.6341247558594, |
| "loss": 0.8873, |
| "rewards/accuracies": 0.6898148059844971, |
| "rewards/chosen": -11.442038536071777, |
| "rewards/margins": 2.919829845428467, |
| "rewards/rejected": -14.361867904663086, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.7985540584949063, |
| "grad_norm": 57.16217041015625, |
| "learning_rate": 4.597767762020425e-06, |
| "logits/chosen": -0.09274892508983612, |
| "logits/rejected": -0.0731249749660492, |
| "logps/chosen": -414.71905517578125, |
| "logps/rejected": -454.8731384277344, |
| "loss": 0.8367, |
| "rewards/accuracies": 0.6759259104728699, |
| "rewards/chosen": -12.529696464538574, |
| "rewards/margins": 2.8657846450805664, |
| "rewards/rejected": -15.395480155944824, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.8044692737430168, |
| "grad_norm": 40.70981979370117, |
| "learning_rate": 4.588369689102275e-06, |
| "logits/chosen": -0.19083881378173828, |
| "logits/rejected": -0.22435228526592255, |
| "logps/chosen": -420.8725280761719, |
| "logps/rejected": -443.5750732421875, |
| "loss": 0.8609, |
| "rewards/accuracies": 0.7037036418914795, |
| "rewards/chosen": -12.435295104980469, |
| "rewards/margins": 2.047469139099121, |
| "rewards/rejected": -14.48276424407959, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.8103844889911271, |
| "grad_norm": 81.51207733154297, |
| "learning_rate": 4.578872926214312e-06, |
| "logits/chosen": -0.1143086701631546, |
| "logits/rejected": -0.12217384576797485, |
| "logps/chosen": -426.69769287109375, |
| "logps/rejected": -465.9218444824219, |
| "loss": 0.8227, |
| "rewards/accuracies": 0.6898148059844971, |
| "rewards/chosen": -11.509513854980469, |
| "rewards/margins": 3.481245756149292, |
| "rewards/rejected": -14.990760803222656, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.8162997042392376, |
| "grad_norm": 37.07810974121094, |
| "learning_rate": 4.569277922144531e-06, |
| "logits/chosen": -0.07632291316986084, |
| "logits/rejected": -0.09633226692676544, |
| "logps/chosen": -380.8441162109375, |
| "logps/rejected": -426.43780517578125, |
| "loss": 0.7403, |
| "rewards/accuracies": 0.7175925970077515, |
| "rewards/chosen": -10.021100997924805, |
| "rewards/margins": 3.8166158199310303, |
| "rewards/rejected": -13.83771800994873, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.822214919487348, |
| "grad_norm": 40.48078155517578, |
| "learning_rate": 4.559585130323503e-06, |
| "logits/chosen": -0.11609819531440735, |
| "logits/rejected": -0.11777342855930328, |
| "logps/chosen": -382.35833740234375, |
| "logps/rejected": -425.1037292480469, |
| "loss": 0.7272, |
| "rewards/accuracies": 0.7453703880310059, |
| "rewards/chosen": -10.085898399353027, |
| "rewards/margins": 4.251326560974121, |
| "rewards/rejected": -14.337224960327148, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.8281301347354584, |
| "grad_norm": 23.162534713745117, |
| "learning_rate": 4.549795008802951e-06, |
| "logits/chosen": -0.09667672216892242, |
| "logits/rejected": -0.16936007142066956, |
| "logps/chosen": -414.39794921875, |
| "logps/rejected": -467.24224853515625, |
| "loss": 0.71, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -10.526082992553711, |
| "rewards/margins": 5.361236572265625, |
| "rewards/rejected": -15.887319564819336, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.8340453499835688, |
| "grad_norm": 46.23252868652344, |
| "learning_rate": 4.539908020234101e-06, |
| "logits/chosen": -0.18942461907863617, |
| "logits/rejected": -0.20741616189479828, |
| "logps/chosen": -395.4661560058594, |
| "logps/rejected": -422.1636962890625, |
| "loss": 0.8233, |
| "rewards/accuracies": 0.7361111044883728, |
| "rewards/chosen": -11.337644577026367, |
| "rewards/margins": 3.6029138565063477, |
| "rewards/rejected": -14.940558433532715, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.8399605652316793, |
| "grad_norm": 39.3061637878418, |
| "learning_rate": 4.529924631845819e-06, |
| "logits/chosen": -0.22194555401802063, |
| "logits/rejected": -0.2846095860004425, |
| "logps/chosen": -391.3856201171875, |
| "logps/rejected": -436.1163024902344, |
| "loss": 0.7741, |
| "rewards/accuracies": 0.7083333730697632, |
| "rewards/chosen": -11.072221755981445, |
| "rewards/margins": 3.993955612182617, |
| "rewards/rejected": -15.066177368164062, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.8458757804797897, |
| "grad_norm": 35.3477783203125, |
| "learning_rate": 4.5198453154225336e-06, |
| "logits/chosen": -0.20290356874465942, |
| "logits/rejected": -0.22691264748573303, |
| "logps/chosen": -401.7808532714844, |
| "logps/rejected": -431.0494384765625, |
| "loss": 0.9883, |
| "rewards/accuracies": 0.6527777910232544, |
| "rewards/chosen": -11.68575668334961, |
| "rewards/margins": 3.1858057975769043, |
| "rewards/rejected": -14.871562957763672, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.8517909957279001, |
| "grad_norm": 45.111228942871094, |
| "learning_rate": 4.509670547281938e-06, |
| "logits/chosen": -0.1648390144109726, |
| "logits/rejected": -0.15016193687915802, |
| "logps/chosen": -408.5751647949219, |
| "logps/rejected": -455.25286865234375, |
| "loss": 0.685, |
| "rewards/accuracies": 0.7129630446434021, |
| "rewards/chosen": -11.243772506713867, |
| "rewards/margins": 4.181289196014404, |
| "rewards/rejected": -15.425060272216797, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.8577062109760105, |
| "grad_norm": 46.299076080322266, |
| "learning_rate": 4.499400808252481e-06, |
| "logits/chosen": -0.17850446701049805, |
| "logits/rejected": -0.17678868770599365, |
| "logps/chosen": -426.147216796875, |
| "logps/rejected": -462.2786560058594, |
| "loss": 0.8077, |
| "rewards/accuracies": 0.7453703880310059, |
| "rewards/chosen": -12.385601043701172, |
| "rewards/margins": 3.7832705974578857, |
| "rewards/rejected": -16.168874740600586, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.863621426224121, |
| "grad_norm": 33.022735595703125, |
| "learning_rate": 4.489036583650649e-06, |
| "logits/chosen": -0.23058825731277466, |
| "logits/rejected": -0.1485554724931717, |
| "logps/chosen": -404.13458251953125, |
| "logps/rejected": -459.26434326171875, |
| "loss": 0.7394, |
| "rewards/accuracies": 0.6851851940155029, |
| "rewards/chosen": -12.277857780456543, |
| "rewards/margins": 4.416906833648682, |
| "rewards/rejected": -16.694765090942383, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.8695366414722313, |
| "grad_norm": 35.275081634521484, |
| "learning_rate": 4.478578363258023e-06, |
| "logits/chosen": -0.16853290796279907, |
| "logits/rejected": -0.1521489918231964, |
| "logps/chosen": -415.5618591308594, |
| "logps/rejected": -451.77178955078125, |
| "loss": 0.8398, |
| "rewards/accuracies": 0.6898148059844971, |
| "rewards/chosen": -12.673078536987305, |
| "rewards/margins": 3.461433172225952, |
| "rewards/rejected": -16.134510040283203, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.8754518567203418, |
| "grad_norm": 33.70681381225586, |
| "learning_rate": 4.468026641298142e-06, |
| "logits/chosen": -0.1421818733215332, |
| "logits/rejected": -0.08376338332891464, |
| "logps/chosen": -427.12335205078125, |
| "logps/rejected": -484.59649658203125, |
| "loss": 0.7882, |
| "rewards/accuracies": 0.6666666865348816, |
| "rewards/chosen": -13.382842063903809, |
| "rewards/margins": 3.6166725158691406, |
| "rewards/rejected": -16.999515533447266, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.8813670719684522, |
| "grad_norm": 33.35516357421875, |
| "learning_rate": 4.457381916413141e-06, |
| "logits/chosen": -0.13292686641216278, |
| "logits/rejected": -0.18141326308250427, |
| "logps/chosen": -425.69287109375, |
| "logps/rejected": -463.3936767578125, |
| "loss": 0.8456, |
| "rewards/accuracies": 0.7083333730697632, |
| "rewards/chosen": -12.859245300292969, |
| "rewards/margins": 3.692777156829834, |
| "rewards/rejected": -16.55202293395996, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.8872822872165625, |
| "grad_norm": 35.195640563964844, |
| "learning_rate": 4.4466446916401895e-06, |
| "logits/chosen": -0.22234384715557098, |
| "logits/rejected": -0.08050793409347534, |
| "logps/chosen": -418.515869140625, |
| "logps/rejected": -479.6258239746094, |
| "loss": 0.754, |
| "rewards/accuracies": 0.7222222685813904, |
| "rewards/chosen": -13.117304801940918, |
| "rewards/margins": 4.073483467102051, |
| "rewards/rejected": -17.19078826904297, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.893197502464673, |
| "grad_norm": 57.9023551940918, |
| "learning_rate": 4.435815474387719e-06, |
| "logits/chosen": -0.117046058177948, |
| "logits/rejected": -0.09925241768360138, |
| "logps/chosen": -431.378662109375, |
| "logps/rejected": -481.478271484375, |
| "loss": 0.9017, |
| "rewards/accuracies": 0.6898148059844971, |
| "rewards/chosen": -13.18567943572998, |
| "rewards/margins": 3.816985845565796, |
| "rewards/rejected": -17.002666473388672, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.8991127177127834, |
| "grad_norm": 32.576194763183594, |
| "learning_rate": 4.424894776411445e-06, |
| "logits/chosen": -0.14799581468105316, |
| "logits/rejected": -0.14989186823368073, |
| "logps/chosen": -423.6829833984375, |
| "logps/rejected": -472.8582458496094, |
| "loss": 0.751, |
| "rewards/accuracies": 0.7222222685813904, |
| "rewards/chosen": -12.279951095581055, |
| "rewards/margins": 4.995446681976318, |
| "rewards/rejected": -17.27539825439453, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.9050279329608939, |
| "grad_norm": 42.71744155883789, |
| "learning_rate": 4.413883113790183e-06, |
| "logits/chosen": -0.1738191843032837, |
| "logits/rejected": -0.13758057355880737, |
| "logps/chosen": -426.9272155761719, |
| "logps/rejected": -479.89434814453125, |
| "loss": 0.9077, |
| "rewards/accuracies": 0.6805555820465088, |
| "rewards/chosen": -13.401716232299805, |
| "rewards/margins": 3.820394277572632, |
| "rewards/rejected": -17.22211265563965, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.9109431482090042, |
| "grad_norm": 39.175025939941406, |
| "learning_rate": 4.402781006901457e-06, |
| "logits/chosen": -0.2070755809545517, |
| "logits/rejected": -0.14179669320583344, |
| "logps/chosen": -414.93804931640625, |
| "logps/rejected": -477.036376953125, |
| "loss": 0.8665, |
| "rewards/accuracies": 0.6805555820465088, |
| "rewards/chosen": -13.253950119018555, |
| "rewards/margins": 3.051947593688965, |
| "rewards/rejected": -16.305896759033203, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.9168583634571147, |
| "grad_norm": 22.947998046875, |
| "learning_rate": 4.391588980396913e-06, |
| "logits/chosen": -0.1404464989900589, |
| "logits/rejected": -0.181797593832016, |
| "logps/chosen": -406.57586669921875, |
| "logps/rejected": -447.8778076171875, |
| "loss": 0.6584, |
| "rewards/accuracies": 0.7407407760620117, |
| "rewards/chosen": -11.844066619873047, |
| "rewards/margins": 4.259424209594727, |
| "rewards/rejected": -16.103490829467773, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.9227735787052251, |
| "grad_norm": 27.205076217651367, |
| "learning_rate": 4.380307563177523e-06, |
| "logits/chosen": -0.27725404500961304, |
| "logits/rejected": -0.20193539559841156, |
| "logps/chosen": -409.8758239746094, |
| "logps/rejected": -500.9091491699219, |
| "loss": 0.7579, |
| "rewards/accuracies": 0.7361111640930176, |
| "rewards/chosen": -12.209704399108887, |
| "rewards/margins": 5.026104927062988, |
| "rewards/rejected": -17.235809326171875, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.9286887939533355, |
| "grad_norm": 28.20140266418457, |
| "learning_rate": 4.36893728836859e-06, |
| "logits/chosen": -0.2689639925956726, |
| "logits/rejected": -0.20782801508903503, |
| "logps/chosen": -396.9151611328125, |
| "logps/rejected": -470.01593017578125, |
| "loss": 0.5835, |
| "rewards/accuracies": 0.7731481790542603, |
| "rewards/chosen": -12.019521713256836, |
| "rewards/margins": 5.0072855949401855, |
| "rewards/rejected": -17.02680778503418, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.9346040092014459, |
| "grad_norm": 32.47650909423828, |
| "learning_rate": 4.357478693294557e-06, |
| "logits/chosen": -0.3439037799835205, |
| "logits/rejected": -0.28947803378105164, |
| "logps/chosen": -415.56072998046875, |
| "logps/rejected": -484.7381286621094, |
| "loss": 0.7827, |
| "rewards/accuracies": 0.763888955116272, |
| "rewards/chosen": -12.406390190124512, |
| "rewards/margins": 5.7553629875183105, |
| "rewards/rejected": -18.161752700805664, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.9405192244495564, |
| "grad_norm": 37.89875793457031, |
| "learning_rate": 4.345932319453612e-06, |
| "logits/chosen": -0.3605978488922119, |
| "logits/rejected": -0.29322177171707153, |
| "logps/chosen": -429.87091064453125, |
| "logps/rejected": -483.867431640625, |
| "loss": 0.8029, |
| "rewards/accuracies": 0.703703761100769, |
| "rewards/chosen": -13.3162841796875, |
| "rewards/margins": 4.183923721313477, |
| "rewards/rejected": -17.500207901000977, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.9464344396976668, |
| "grad_norm": 50.86394119262695, |
| "learning_rate": 4.334298712492098e-06, |
| "logits/chosen": -0.2936496138572693, |
| "logits/rejected": -0.32563602924346924, |
| "logps/chosen": -436.94317626953125, |
| "logps/rejected": -472.5697937011719, |
| "loss": 0.9131, |
| "rewards/accuracies": 0.6574074029922485, |
| "rewards/chosen": -13.809501647949219, |
| "rewards/margins": 3.8074746131896973, |
| "rewards/rejected": -17.61697769165039, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.9523496549457772, |
| "grad_norm": 52.91617202758789, |
| "learning_rate": 4.32257842217873e-06, |
| "logits/chosen": -0.2928038239479065, |
| "logits/rejected": -0.25982213020324707, |
| "logps/chosen": -438.8931579589844, |
| "logps/rejected": -474.4143981933594, |
| "loss": 0.8308, |
| "rewards/accuracies": 0.7314814925193787, |
| "rewards/chosen": -14.169482231140137, |
| "rewards/margins": 3.389918327331543, |
| "rewards/rejected": -17.559402465820312, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.9582648701938876, |
| "grad_norm": 40.86037826538086, |
| "learning_rate": 4.310772002378613e-06, |
| "logits/chosen": -0.33903825283050537, |
| "logits/rejected": -0.27782881259918213, |
| "logps/chosen": -430.27288818359375, |
| "logps/rejected": -489.8445739746094, |
| "loss": 0.7149, |
| "rewards/accuracies": 0.7407407164573669, |
| "rewards/chosen": -14.304798126220703, |
| "rewards/margins": 4.365303993225098, |
| "rewards/rejected": -18.670101165771484, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.9641800854419981, |
| "grad_norm": 38.9904899597168, |
| "learning_rate": 4.298880011027067e-06, |
| "logits/chosen": -0.3663506507873535, |
| "logits/rejected": -0.2783759832382202, |
| "logps/chosen": -428.8545837402344, |
| "logps/rejected": -498.8543701171875, |
| "loss": 0.54, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -14.275480270385742, |
| "rewards/margins": 4.776096343994141, |
| "rewards/rejected": -19.051578521728516, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.9700953006901084, |
| "grad_norm": 33.28893280029297, |
| "learning_rate": 4.286903010103267e-06, |
| "logits/chosen": -0.34440621733665466, |
| "logits/rejected": -0.3880541920661926, |
| "logps/chosen": -448.9291687011719, |
| "logps/rejected": -492.80609130859375, |
| "loss": 0.7484, |
| "rewards/accuracies": 0.7546297311782837, |
| "rewards/chosen": -13.652152061462402, |
| "rewards/margins": 4.3886871337890625, |
| "rewards/rejected": -18.04084014892578, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.9760105159382189, |
| "grad_norm": 23.393043518066406, |
| "learning_rate": 4.274841565603674e-06, |
| "logits/chosen": -0.3807776868343353, |
| "logits/rejected": -0.37542426586151123, |
| "logps/chosen": -420.848388671875, |
| "logps/rejected": -468.35150146484375, |
| "loss": 0.6653, |
| "rewards/accuracies": 0.7361111044883728, |
| "rewards/chosen": -13.749267578125, |
| "rewards/margins": 3.9116599559783936, |
| "rewards/rejected": -17.660926818847656, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.9819257311863293, |
| "grad_norm": 71.44178771972656, |
| "learning_rate": 4.262696247515298e-06, |
| "logits/chosen": -0.33630847930908203, |
| "logits/rejected": -0.3011205196380615, |
| "logps/chosen": -427.74493408203125, |
| "logps/rejected": -486.2608337402344, |
| "loss": 0.79, |
| "rewards/accuracies": 0.7453703284263611, |
| "rewards/chosen": -13.645870208740234, |
| "rewards/margins": 3.291267156600952, |
| "rewards/rejected": -16.937137603759766, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.9878409464344396, |
| "grad_norm": 38.925453186035156, |
| "learning_rate": 4.250467629788758e-06, |
| "logits/chosen": -0.3359528183937073, |
| "logits/rejected": -0.32171425223350525, |
| "logps/chosen": -416.31011962890625, |
| "logps/rejected": -453.65667724609375, |
| "loss": 0.8185, |
| "rewards/accuracies": 0.7592593431472778, |
| "rewards/chosen": -13.421798706054688, |
| "rewards/margins": 3.002066135406494, |
| "rewards/rejected": -16.423866271972656, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.9937561616825501, |
| "grad_norm": 43.50007247924805, |
| "learning_rate": 4.238156290311159e-06, |
| "logits/chosen": -0.2119762897491455, |
| "logits/rejected": -0.21785835921764374, |
| "logps/chosen": -419.2802734375, |
| "logps/rejected": -464.8529052734375, |
| "loss": 0.8978, |
| "rewards/accuracies": 0.7083333730697632, |
| "rewards/chosen": -12.489054679870605, |
| "rewards/margins": 2.8680572509765625, |
| "rewards/rejected": -15.357111930847168, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.9996713769306605, |
| "grad_norm": 28.424711227416992, |
| "learning_rate": 4.2257628108787855e-06, |
| "logits/chosen": -0.3750268220901489, |
| "logits/rejected": -0.3536463975906372, |
| "logps/chosen": -406.7919921875, |
| "logps/rejected": -456.8607177734375, |
| "loss": 0.8355, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -11.890769004821777, |
| "rewards/margins": 3.3902645111083984, |
| "rewards/rejected": -15.281034469604492, |
| "step": 507 |
| }, |
| { |
| "epoch": 1.0039434768320736, |
| "grad_norm": 9.685483932495117, |
| "learning_rate": 4.2132877771696e-06, |
| "logits/chosen": -0.44336915016174316, |
| "logits/rejected": -0.3426854610443115, |
| "logps/chosen": -401.211669921875, |
| "logps/rejected": -507.76446533203125, |
| "loss": 0.2311, |
| "rewards/accuracies": 0.9487179517745972, |
| "rewards/chosen": -8.906108856201172, |
| "rewards/margins": 9.99924087524414, |
| "rewards/rejected": -18.905351638793945, |
| "step": 510 |
| }, |
| { |
| "epoch": 1.009858692080184, |
| "grad_norm": 15.767539978027344, |
| "learning_rate": 4.200731778715575e-06, |
| "logits/chosen": -0.3226369619369507, |
| "logits/rejected": -0.23430070281028748, |
| "logps/chosen": -386.5018005371094, |
| "logps/rejected": -498.0735778808594, |
| "loss": 0.0674, |
| "rewards/accuracies": 0.9722223281860352, |
| "rewards/chosen": -8.576701164245605, |
| "rewards/margins": 10.921747207641602, |
| "rewards/rejected": -19.49844741821289, |
| "step": 513 |
| }, |
| { |
| "epoch": 1.0157739073282945, |
| "grad_norm": 8.739374160766602, |
| "learning_rate": 4.188095408874829e-06, |
| "logits/chosen": -0.3742499053478241, |
| "logits/rejected": -0.33725112676620483, |
| "logps/chosen": -354.2779235839844, |
| "logps/rejected": -456.7283935546875, |
| "loss": 0.0407, |
| "rewards/accuracies": 0.9907407760620117, |
| "rewards/chosen": -8.695253372192383, |
| "rewards/margins": 9.780828475952148, |
| "rewards/rejected": -18.47608184814453, |
| "step": 516 |
| }, |
| { |
| "epoch": 1.0216891225764049, |
| "grad_norm": 13.139928817749023, |
| "learning_rate": 4.175379264803587e-06, |
| "logits/chosen": -0.37535345554351807, |
| "logits/rejected": -0.31396183371543884, |
| "logps/chosen": -375.81396484375, |
| "logps/rejected": -471.1697692871094, |
| "loss": 0.0762, |
| "rewards/accuracies": 0.9768518805503845, |
| "rewards/chosen": -8.316873550415039, |
| "rewards/margins": 9.90269660949707, |
| "rewards/rejected": -18.219572067260742, |
| "step": 519 |
| }, |
| { |
| "epoch": 1.0276043378245152, |
| "grad_norm": 3.5331549644470215, |
| "learning_rate": 4.162583947427958e-06, |
| "logits/chosen": -0.40028223395347595, |
| "logits/rejected": -0.22156159579753876, |
| "logps/chosen": -378.267822265625, |
| "logps/rejected": -504.5182800292969, |
| "loss": 0.1258, |
| "rewards/accuracies": 0.953703761100769, |
| "rewards/chosen": -9.443648338317871, |
| "rewards/margins": 11.473689079284668, |
| "rewards/rejected": -20.917339324951172, |
| "step": 522 |
| }, |
| { |
| "epoch": 1.0335195530726258, |
| "grad_norm": 10.87333869934082, |
| "learning_rate": 4.149710061415542e-06, |
| "logits/chosen": -0.3893941044807434, |
| "logits/rejected": -0.22475658357143402, |
| "logps/chosen": -389.7325134277344, |
| "logps/rejected": -521.1325073242188, |
| "loss": 0.0873, |
| "rewards/accuracies": 0.9675926566123962, |
| "rewards/chosen": -9.745861053466797, |
| "rewards/margins": 12.743724822998047, |
| "rewards/rejected": -22.489585876464844, |
| "step": 525 |
| }, |
| { |
| "epoch": 1.0394347683207361, |
| "grad_norm": 10.995318412780762, |
| "learning_rate": 4.13675821514685e-06, |
| "logits/chosen": -0.39965152740478516, |
| "logits/rejected": -0.252058744430542, |
| "logps/chosen": -410.03411865234375, |
| "logps/rejected": -544.2578735351562, |
| "loss": 0.0969, |
| "rewards/accuracies": 0.9675926566123962, |
| "rewards/chosen": -11.346731185913086, |
| "rewards/margins": 11.341825485229492, |
| "rewards/rejected": -22.688552856445312, |
| "step": 528 |
| }, |
| { |
| "epoch": 1.0453499835688465, |
| "grad_norm": 8.572622299194336, |
| "learning_rate": 4.12372902068656e-06, |
| "logits/chosen": -0.4358088970184326, |
| "logits/rejected": -0.2031504511833191, |
| "logps/chosen": -403.5755310058594, |
| "logps/rejected": -527.644775390625, |
| "loss": 0.1168, |
| "rewards/accuracies": 0.9444444179534912, |
| "rewards/chosen": -11.759601593017578, |
| "rewards/margins": 10.76253890991211, |
| "rewards/rejected": -22.522140502929688, |
| "step": 531 |
| }, |
| { |
| "epoch": 1.051265198816957, |
| "grad_norm": 7.874849796295166, |
| "learning_rate": 4.110623093754585e-06, |
| "logits/chosen": -0.38974201679229736, |
| "logits/rejected": -0.25777122378349304, |
| "logps/chosen": -408.3241882324219, |
| "logps/rejected": -516.1395263671875, |
| "loss": 0.1062, |
| "rewards/accuracies": 0.9629629850387573, |
| "rewards/chosen": -11.630317687988281, |
| "rewards/margins": 10.122415542602539, |
| "rewards/rejected": -21.752735137939453, |
| "step": 534 |
| }, |
| { |
| "epoch": 1.0571804140650674, |
| "grad_norm": 7.267045974731445, |
| "learning_rate": 4.097441053696985e-06, |
| "logits/chosen": -0.44127148389816284, |
| "logits/rejected": -0.32113516330718994, |
| "logps/chosen": -430.0728759765625, |
| "logps/rejected": -561.5933837890625, |
| "loss": 0.0811, |
| "rewards/accuracies": 0.9722222685813904, |
| "rewards/chosen": -11.996893882751465, |
| "rewards/margins": 12.880267143249512, |
| "rewards/rejected": -24.87716293334961, |
| "step": 537 |
| }, |
| { |
| "epoch": 1.0630956293131777, |
| "grad_norm": 26.037012100219727, |
| "learning_rate": 4.08418352345669e-06, |
| "logits/chosen": -0.3767847418785095, |
| "logits/rejected": -0.31423503160476685, |
| "logps/chosen": -406.0875244140625, |
| "logps/rejected": -526.44482421875, |
| "loss": 0.0899, |
| "rewards/accuracies": 0.9768518805503845, |
| "rewards/chosen": -10.038455963134766, |
| "rewards/margins": 13.710319519042969, |
| "rewards/rejected": -23.748777389526367, |
| "step": 540 |
| }, |
| { |
| "epoch": 1.0690108445612883, |
| "grad_norm": 13.05281925201416, |
| "learning_rate": 4.070851129544065e-06, |
| "logits/chosen": -0.465512216091156, |
| "logits/rejected": -0.2587343454360962, |
| "logps/chosen": -399.33642578125, |
| "logps/rejected": -554.8599243164062, |
| "loss": 0.1257, |
| "rewards/accuracies": 0.953703761100769, |
| "rewards/chosen": -10.270711898803711, |
| "rewards/margins": 13.220477104187012, |
| "rewards/rejected": -23.491188049316406, |
| "step": 543 |
| }, |
| { |
| "epoch": 1.0749260598093986, |
| "grad_norm": 24.870304107666016, |
| "learning_rate": 4.057444502007306e-06, |
| "logits/chosen": -0.5171704888343811, |
| "logits/rejected": -0.33416056632995605, |
| "logps/chosen": -398.2466735839844, |
| "logps/rejected": -536.3380737304688, |
| "loss": 0.1337, |
| "rewards/accuracies": 0.953703761100769, |
| "rewards/chosen": -9.948308944702148, |
| "rewards/margins": 13.192928314208984, |
| "rewards/rejected": -23.1412353515625, |
| "step": 546 |
| }, |
| { |
| "epoch": 1.0808412750575092, |
| "grad_norm": 3.7352793216705322, |
| "learning_rate": 4.043964274402663e-06, |
| "logits/chosen": -0.4453051686286926, |
| "logits/rejected": -0.30837786197662354, |
| "logps/chosen": -386.0931091308594, |
| "logps/rejected": -503.6631774902344, |
| "loss": 0.0532, |
| "rewards/accuracies": 0.9768519401550293, |
| "rewards/chosen": -9.698603630065918, |
| "rewards/margins": 12.827930450439453, |
| "rewards/rejected": -22.526535034179688, |
| "step": 549 |
| }, |
| { |
| "epoch": 1.0867564903056195, |
| "grad_norm": 9.68443489074707, |
| "learning_rate": 4.030411083764498e-06, |
| "logits/chosen": -0.5020241737365723, |
| "logits/rejected": -0.28794384002685547, |
| "logps/chosen": -378.8649597167969, |
| "logps/rejected": -540.6756591796875, |
| "loss": 0.0671, |
| "rewards/accuracies": 0.9814814925193787, |
| "rewards/chosen": -9.302837371826172, |
| "rewards/margins": 13.408391952514648, |
| "rewards/rejected": -22.711231231689453, |
| "step": 552 |
| }, |
| { |
| "epoch": 1.0926717055537298, |
| "grad_norm": 20.763072967529297, |
| "learning_rate": 4.0167855705751855e-06, |
| "logits/chosen": -0.5195566415786743, |
| "logits/rejected": -0.35310834646224976, |
| "logps/chosen": -388.2412109375, |
| "logps/rejected": -517.09375, |
| "loss": 0.1577, |
| "rewards/accuracies": 0.9351853132247925, |
| "rewards/chosen": -9.682899475097656, |
| "rewards/margins": 12.611946105957031, |
| "rewards/rejected": -22.294845581054688, |
| "step": 555 |
| }, |
| { |
| "epoch": 1.0985869208018402, |
| "grad_norm": 10.18380069732666, |
| "learning_rate": 4.003088378734841e-06, |
| "logits/chosen": -0.5263486504554749, |
| "logits/rejected": -0.3212735056877136, |
| "logps/chosen": -395.2178649902344, |
| "logps/rejected": -554.233642578125, |
| "loss": 0.0954, |
| "rewards/accuracies": 0.9722222685813904, |
| "rewards/chosen": -11.4990234375, |
| "rewards/margins": 13.107860565185547, |
| "rewards/rejected": -24.606884002685547, |
| "step": 558 |
| }, |
| { |
| "epoch": 1.1045021360499507, |
| "grad_norm": 17.160139083862305, |
| "learning_rate": 3.989320155530894e-06, |
| "logits/chosen": -0.4392577111721039, |
| "logits/rejected": -0.2696951925754547, |
| "logps/chosen": -410.08343505859375, |
| "logps/rejected": -540.97509765625, |
| "loss": 0.1387, |
| "rewards/accuracies": 0.9583333730697632, |
| "rewards/chosen": -11.576102256774902, |
| "rewards/margins": 12.610298156738281, |
| "rewards/rejected": -24.186399459838867, |
| "step": 561 |
| }, |
| { |
| "epoch": 1.110417351298061, |
| "grad_norm": 16.328100204467773, |
| "learning_rate": 3.9754815516075e-06, |
| "logits/chosen": -0.49670523405075073, |
| "logits/rejected": -0.22816550731658936, |
| "logps/chosen": -402.760498046875, |
| "logps/rejected": -556.445556640625, |
| "loss": 0.1141, |
| "rewards/accuracies": 0.9490741491317749, |
| "rewards/chosen": -11.802339553833008, |
| "rewards/margins": 13.369563102722168, |
| "rewards/rejected": -25.171903610229492, |
| "step": 564 |
| }, |
| { |
| "epoch": 1.1163325665461716, |
| "grad_norm": 3.9692351818084717, |
| "learning_rate": 3.9615732209347925e-06, |
| "logits/chosen": -0.4443477988243103, |
| "logits/rejected": -0.30315929651260376, |
| "logps/chosen": -390.0570068359375, |
| "logps/rejected": -514.3804931640625, |
| "loss": 0.1019, |
| "rewards/accuracies": 0.9629630446434021, |
| "rewards/chosen": -11.126566886901855, |
| "rewards/margins": 10.942947387695312, |
| "rewards/rejected": -22.069515228271484, |
| "step": 567 |
| }, |
| { |
| "epoch": 1.122247781794282, |
| "grad_norm": 14.457867622375488, |
| "learning_rate": 3.947595820777978e-06, |
| "logits/chosen": -0.517672061920166, |
| "logits/rejected": -0.24748222529888153, |
| "logps/chosen": -386.17291259765625, |
| "logps/rejected": -539.0173950195312, |
| "loss": 0.1354, |
| "rewards/accuracies": 0.9444444179534912, |
| "rewards/chosen": -11.382842063903809, |
| "rewards/margins": 13.581473350524902, |
| "rewards/rejected": -24.96431541442871, |
| "step": 570 |
| }, |
| { |
| "epoch": 1.1281629970423923, |
| "grad_norm": 8.523018836975098, |
| "learning_rate": 3.933550011666275e-06, |
| "logits/chosen": -0.401355117559433, |
| "logits/rejected": -0.2663224935531616, |
| "logps/chosen": -422.0758972167969, |
| "logps/rejected": -550.5574951171875, |
| "loss": 0.1321, |
| "rewards/accuracies": 0.9444444179534912, |
| "rewards/chosen": -12.331350326538086, |
| "rewards/margins": 13.582274436950684, |
| "rewards/rejected": -25.913619995117188, |
| "step": 573 |
| }, |
| { |
| "epoch": 1.1340782122905029, |
| "grad_norm": 37.025264739990234, |
| "learning_rate": 3.919436457361701e-06, |
| "logits/chosen": -0.3494156002998352, |
| "logits/rejected": -0.2177804559469223, |
| "logps/chosen": -423.7979431152344, |
| "logps/rejected": -559.4660034179688, |
| "loss": 0.1034, |
| "rewards/accuracies": 0.9675926566123962, |
| "rewards/chosen": -12.480727195739746, |
| "rewards/margins": 12.493712425231934, |
| "rewards/rejected": -24.97443962097168, |
| "step": 576 |
| }, |
| { |
| "epoch": 1.1399934275386132, |
| "grad_norm": 10.311182975769043, |
| "learning_rate": 3.905255824827703e-06, |
| "logits/chosen": -0.404990017414093, |
| "logits/rejected": -0.23333707451820374, |
| "logps/chosen": -400.90179443359375, |
| "logps/rejected": -549.761962890625, |
| "loss": 0.0563, |
| "rewards/accuracies": 0.9861111044883728, |
| "rewards/chosen": -12.794790267944336, |
| "rewards/margins": 12.089736938476562, |
| "rewards/rejected": -24.8845272064209, |
| "step": 579 |
| }, |
| { |
| "epoch": 1.1459086427867236, |
| "grad_norm": 16.51976203918457, |
| "learning_rate": 3.891008784197642e-06, |
| "logits/chosen": -0.4602348208427429, |
| "logits/rejected": -0.32792428135871887, |
| "logps/chosen": -443.4609069824219, |
| "logps/rejected": -569.1358642578125, |
| "loss": 0.1204, |
| "rewards/accuracies": 0.9490741491317749, |
| "rewards/chosen": -13.16263484954834, |
| "rewards/margins": 12.034723281860352, |
| "rewards/rejected": -25.197355270385742, |
| "step": 582 |
| }, |
| { |
| "epoch": 1.1518238580348341, |
| "grad_norm": 19.857742309570312, |
| "learning_rate": 3.87669600874312e-06, |
| "logits/chosen": -0.45605939626693726, |
| "logits/rejected": -0.2824591398239136, |
| "logps/chosen": -440.00982666015625, |
| "logps/rejected": -580.4444580078125, |
| "loss": 0.1661, |
| "rewards/accuracies": 0.9490741491317749, |
| "rewards/chosen": -13.881183624267578, |
| "rewards/margins": 12.989599227905273, |
| "rewards/rejected": -26.87078094482422, |
| "step": 585 |
| }, |
| { |
| "epoch": 1.1577390732829445, |
| "grad_norm": 13.731415748596191, |
| "learning_rate": 3.8623181748421705e-06, |
| "logits/chosen": -0.5049574375152588, |
| "logits/rejected": -0.3214029371738434, |
| "logps/chosen": -420.5501403808594, |
| "logps/rejected": -567.5693969726562, |
| "loss": 0.0816, |
| "rewards/accuracies": 0.9768518805503845, |
| "rewards/chosen": -13.632789611816406, |
| "rewards/margins": 13.632120132446289, |
| "rewards/rejected": -27.264907836914062, |
| "step": 588 |
| }, |
| { |
| "epoch": 1.1636542885310548, |
| "grad_norm": 8.935365676879883, |
| "learning_rate": 3.847875961947284e-06, |
| "logits/chosen": -0.4097817540168762, |
| "logits/rejected": -0.30726078152656555, |
| "logps/chosen": -445.2584228515625, |
| "logps/rejected": -565.4356689453125, |
| "loss": 0.0672, |
| "rewards/accuracies": 0.9629629850387573, |
| "rewards/chosen": -13.714117050170898, |
| "rewards/margins": 12.752127647399902, |
| "rewards/rejected": -26.466245651245117, |
| "step": 591 |
| }, |
| { |
| "epoch": 1.1695695037791654, |
| "grad_norm": 7.481489181518555, |
| "learning_rate": 3.833370052553311e-06, |
| "logits/chosen": -0.44872909784317017, |
| "logits/rejected": -0.2258618324995041, |
| "logps/chosen": -420.02825927734375, |
| "logps/rejected": -583.5780029296875, |
| "loss": 0.106, |
| "rewards/accuracies": 0.9583333730697632, |
| "rewards/chosen": -12.75123119354248, |
| "rewards/margins": 15.216802597045898, |
| "rewards/rejected": -27.968032836914062, |
| "step": 594 |
| }, |
| { |
| "epoch": 1.1754847190272757, |
| "grad_norm": 18.711483001708984, |
| "learning_rate": 3.818801132165203e-06, |
| "logits/chosen": -0.5571283102035522, |
| "logits/rejected": -0.3472014367580414, |
| "logps/chosen": -406.1875915527344, |
| "logps/rejected": -583.6386108398438, |
| "loss": 0.1648, |
| "rewards/accuracies": 0.953703761100769, |
| "rewards/chosen": -11.881487846374512, |
| "rewards/margins": 16.52305793762207, |
| "rewards/rejected": -28.404544830322266, |
| "step": 597 |
| }, |
| { |
| "epoch": 1.1813999342753863, |
| "grad_norm": 16.07924461364746, |
| "learning_rate": 3.804169889265615e-06, |
| "logits/chosen": -0.5486902594566345, |
| "logits/rejected": -0.3185918927192688, |
| "logps/chosen": -393.58740234375, |
| "logps/rejected": -564.5778198242188, |
| "loss": 0.1159, |
| "rewards/accuracies": 0.953703761100769, |
| "rewards/chosen": -11.905487060546875, |
| "rewards/margins": 15.971022605895996, |
| "rewards/rejected": -27.876510620117188, |
| "step": 600 |
| } |
| ], |
| "logging_steps": 3, |
| "max_steps": 1524, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 300, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.14720897968746e+19, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|