| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 100, |
| "global_step": 1856, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "entropy": 0.40908203125, |
| "epoch": 0.0053893829156561575, |
| "grad_norm": 7.759696960449219, |
| "learning_rate": 4.8387096774193546e-08, |
| "logits/chosen": -0.44672375660898817, |
| "logits/rejected": -0.39365523942633257, |
| "logps/chosen": -107.6, |
| "logps/rejected": -109.3, |
| "loss": 0.6923, |
| "mean_token_accuracy": 0.9019625961780549, |
| "num_tokens": 365562.0, |
| "rewards/accuracies": 0.328125, |
| "rewards/chosen": 0.00010342597961425782, |
| "rewards/margins": 0.0004913330078125, |
| "rewards/rejected": -0.0003886222839355469, |
| "step": 10 |
| }, |
| { |
| "entropy": 0.41064453125, |
| "epoch": 0.010778765831312315, |
| "grad_norm": 7.587465286254883, |
| "learning_rate": 1.0215053763440861e-07, |
| "logits/chosen": -0.3728776172134608, |
| "logits/rejected": -0.332460740198616, |
| "logps/chosen": -100.825, |
| "logps/rejected": -102.975, |
| "loss": 0.693, |
| "mean_token_accuracy": 0.9035390466451645, |
| "num_tokens": 720708.0, |
| "rewards/accuracies": 0.375, |
| "rewards/chosen": -0.00041942596435546876, |
| "rewards/margins": -0.00022115707397460938, |
| "rewards/rejected": -0.00019679069519042968, |
| "step": 20 |
| }, |
| { |
| "entropy": 0.4173828125, |
| "epoch": 0.016168148746968473, |
| "grad_norm": 6.577247142791748, |
| "learning_rate": 1.5591397849462365e-07, |
| "logits/chosen": -0.472471438697433, |
| "logits/rejected": -0.39264794802016467, |
| "logps/chosen": -96.45, |
| "logps/rejected": -99.825, |
| "loss": 0.6929, |
| "mean_token_accuracy": 0.9041816651821136, |
| "num_tokens": 1075168.0, |
| "rewards/accuracies": 0.375, |
| "rewards/chosen": -0.00027996301651000977, |
| "rewards/margins": -0.00011727809906005859, |
| "rewards/rejected": -0.00016357898712158204, |
| "step": 30 |
| }, |
| { |
| "entropy": 0.39150390625, |
| "epoch": 0.02155753166262463, |
| "grad_norm": 6.858296871185303, |
| "learning_rate": 2.0967741935483871e-07, |
| "logits/chosen": -0.3733890259921028, |
| "logits/rejected": -0.3552958369517544, |
| "logps/chosen": -99.3375, |
| "logps/rejected": -103.175, |
| "loss": 0.6929, |
| "mean_token_accuracy": 0.9075490444898605, |
| "num_tokens": 1443719.0, |
| "rewards/accuracies": 0.35, |
| "rewards/chosen": -0.0002532958984375, |
| "rewards/margins": 0.000262451171875, |
| "rewards/rejected": -0.0005157232284545898, |
| "step": 40 |
| }, |
| { |
| "entropy": 0.39990234375, |
| "epoch": 0.026946914578280787, |
| "grad_norm": 6.983979225158691, |
| "learning_rate": 2.6344086021505376e-07, |
| "logits/chosen": -0.3704425655981937, |
| "logits/rejected": -0.3571443883083806, |
| "logps/chosen": -89.5625, |
| "logps/rejected": -92.6, |
| "loss": 0.6927, |
| "mean_token_accuracy": 0.9039196908473969, |
| "num_tokens": 1782804.0, |
| "rewards/accuracies": 0.428125, |
| "rewards/chosen": -0.0011333465576171876, |
| "rewards/margins": 0.0005399942398071289, |
| "rewards/rejected": -0.0016736984252929688, |
| "step": 50 |
| }, |
| { |
| "entropy": 0.4013671875, |
| "epoch": 0.03233629749393695, |
| "grad_norm": 6.333939552307129, |
| "learning_rate": 3.172043010752688e-07, |
| "logits/chosen": -0.34019105871641864, |
| "logits/rejected": -0.3218748638672174, |
| "logps/chosen": -94.4875, |
| "logps/rejected": -93.575, |
| "loss": 0.6919, |
| "mean_token_accuracy": 0.9046859920024872, |
| "num_tokens": 2126112.0, |
| "rewards/accuracies": 0.490625, |
| "rewards/chosen": -0.0008403778076171875, |
| "rewards/margins": 0.0018994331359863282, |
| "rewards/rejected": -0.002740812301635742, |
| "step": 60 |
| }, |
| { |
| "entropy": 0.41484375, |
| "epoch": 0.0377256804095931, |
| "grad_norm": 7.4297566413879395, |
| "learning_rate": 3.7096774193548384e-07, |
| "logits/chosen": -0.3875610308110304, |
| "logits/rejected": -0.35152331612465404, |
| "logps/chosen": -97.375, |
| "logps/rejected": -99.3625, |
| "loss": 0.6914, |
| "mean_token_accuracy": 0.9037097364664077, |
| "num_tokens": 2470754.0, |
| "rewards/accuracies": 0.540625, |
| "rewards/chosen": -0.0022185802459716796, |
| "rewards/margins": 0.002935457229614258, |
| "rewards/rejected": -0.005158233642578125, |
| "step": 70 |
| }, |
| { |
| "entropy": 0.41259765625, |
| "epoch": 0.04311506332524926, |
| "grad_norm": 7.677794456481934, |
| "learning_rate": 4.247311827956989e-07, |
| "logits/chosen": -0.35103980570551985, |
| "logits/rejected": -0.3017016560657181, |
| "logps/chosen": -97.5, |
| "logps/rejected": -99.3875, |
| "loss": 0.6909, |
| "mean_token_accuracy": 0.9047763884067536, |
| "num_tokens": 2836116.0, |
| "rewards/accuracies": 0.571875, |
| "rewards/chosen": -0.004664897918701172, |
| "rewards/margins": 0.004430198669433593, |
| "rewards/rejected": -0.009100341796875, |
| "step": 80 |
| }, |
| { |
| "entropy": 0.41826171875, |
| "epoch": 0.04850444624090541, |
| "grad_norm": 6.765298366546631, |
| "learning_rate": 4.78494623655914e-07, |
| "logits/chosen": -0.4268290587468382, |
| "logits/rejected": -0.37336222025804355, |
| "logps/chosen": -108.175, |
| "logps/rejected": -111.6, |
| "loss": 0.6898, |
| "mean_token_accuracy": 0.9004561603069305, |
| "num_tokens": 3207605.0, |
| "rewards/accuracies": 0.684375, |
| "rewards/chosen": -0.008296608785167336, |
| "rewards/margins": 0.006721109163481742, |
| "rewards/rejected": -0.01501771821640432, |
| "setc/cal_net_lr": 3.375e-05, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.052821920812129976, |
| "setc/logratio_margin_delta": -0.0747640784829855, |
| "setc/logratio_margin_vanilla": 0.746875, |
| "setc/logratio_margin_weighted": 0.6721109215170145, |
| "step": 90 |
| }, |
| { |
| "entropy": 0.44228515625, |
| "epoch": 0.05389382915656157, |
| "grad_norm": 7.393381595611572, |
| "learning_rate": 5.322580645161289e-07, |
| "logits/chosen": -0.4361902457762258, |
| "logits/rejected": -0.3870189698671949, |
| "logps/chosen": -101.75, |
| "logps/rejected": -109.4, |
| "loss": 0.6872, |
| "mean_token_accuracy": 0.9016273647546769, |
| "num_tokens": 3548981.0, |
| "rewards/accuracies": 0.728125, |
| "rewards/chosen": -0.013736124988645316, |
| "rewards/margins": 0.01212767151882872, |
| "rewards/rejected": -0.02586379610002041, |
| "setc/cal_net_lr": 0.00011666666666666668, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.053199329786002635, |
| "setc/logratio_margin_delta": -0.07141252495348453, |
| "setc/logratio_margin_vanilla": 1.2841796875, |
| "setc/logratio_margin_weighted": 1.2127671625465155, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.05389382915656157, |
| "eval_entropy": 0.4267578125, |
| "eval_logits/chosen": -0.450571413522897, |
| "eval_logits/rejected": -0.3940343719054589, |
| "eval_logps/chosen": -108.859375, |
| "eval_logps/rejected": -108.640625, |
| "eval_loss": 0.6871142387390137, |
| "eval_mean_token_accuracy": 0.9000339470803738, |
| "eval_num_tokens": 3548981.0, |
| "eval_rewards/accuracies": 0.671875, |
| "eval_rewards/chosen": -0.021255508123431355, |
| "eval_rewards/margins": 0.012425659064319916, |
| "eval_rewards/rejected": -0.03368116734782234, |
| "eval_runtime": 14.2001, |
| "eval_samples_per_second": 35.211, |
| "eval_setc/cal_net_lr": 0.00015833333333333327, |
| "eval_setc/credit_mean": 1.0, |
| "eval_setc/credit_std": 0.04981097369454801, |
| "eval_setc/logratio_margin_delta": -0.06453109718859196, |
| "eval_setc/logratio_margin_vanilla": 1.3050537109375, |
| "eval_setc/logratio_margin_weighted": 1.240522613748908, |
| "eval_steps_per_second": 2.254, |
| "step": 100 |
| }, |
| { |
| "entropy": 0.42685546875, |
| "epoch": 0.059283212072217734, |
| "grad_norm": 7.357357501983643, |
| "learning_rate": 5.860215053763441e-07, |
| "logits/chosen": -0.43970988910262926, |
| "logits/rejected": -0.40844065752963693, |
| "logps/chosen": -107.05, |
| "logps/rejected": -113.025, |
| "loss": 0.6854, |
| "mean_token_accuracy": 0.899716067314148, |
| "num_tokens": 3902446.0, |
| "rewards/accuracies": 0.725, |
| "rewards/chosen": -0.02834852202795446, |
| "rewards/margins": 0.015831261326093228, |
| "rewards/rejected": -0.04417978236451745, |
| "setc/cal_net_lr": 0.0002, |
| "setc/credit_mean": 0.9999999970197677, |
| "setc/credit_std": 0.047479902021586896, |
| "setc/logratio_margin_delta": -0.10251835882663726, |
| "setc/logratio_margin_vanilla": 1.68564453125, |
| "setc/logratio_margin_weighted": 1.5831261724233627, |
| "step": 110 |
| }, |
| { |
| "entropy": 0.42177734375, |
| "epoch": 0.0646725949878739, |
| "grad_norm": 7.097802639007568, |
| "learning_rate": 6.397849462365591e-07, |
| "logits/chosen": -0.41605972462656393, |
| "logits/rejected": -0.3486092219206212, |
| "logps/chosen": -97.4, |
| "logps/rejected": -101.15, |
| "loss": 0.6811, |
| "mean_token_accuracy": 0.8998741269111633, |
| "num_tokens": 4248207.0, |
| "rewards/accuracies": 0.715625, |
| "rewards/chosen": -0.0486921863630414, |
| "rewards/margins": 0.02490431647747755, |
| "rewards/rejected": -0.07359650395810605, |
| "setc/cal_net_lr": 0.0002833333333333333, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.040153346955776215, |
| "setc/logratio_margin_delta": -0.10605261325836182, |
| "setc/logratio_margin_vanilla": 2.596484375, |
| "setc/logratio_margin_weighted": 2.4904317617416383, |
| "step": 120 |
| }, |
| { |
| "entropy": 0.4609375, |
| "epoch": 0.07006197790353004, |
| "grad_norm": 8.569380760192871, |
| "learning_rate": 6.935483870967742e-07, |
| "logits/chosen": -0.5107418885495224, |
| "logits/rejected": -0.46866435782064403, |
| "logps/chosen": -125.8, |
| "logps/rejected": -125.425, |
| "loss": 0.6705, |
| "mean_token_accuracy": 0.8874441146850586, |
| "num_tokens": 4601428.0, |
| "rewards/accuracies": 0.721875, |
| "rewards/chosen": -0.09584741443395614, |
| "rewards/margins": 0.04766115248203277, |
| "rewards/rejected": -0.14350856877863408, |
| "setc/cal_net_lr": 0.0003666666666666668, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.035092563927173616, |
| "setc/logratio_margin_delta": -0.18232210874557495, |
| "setc/logratio_margin_vanilla": 4.9484375, |
| "setc/logratio_margin_weighted": 4.766115391254425, |
| "step": 130 |
| }, |
| { |
| "entropy": 0.46455078125, |
| "epoch": 0.0754513608191862, |
| "grad_norm": 7.780826091766357, |
| "learning_rate": 7.473118279569892e-07, |
| "logits/chosen": -0.4342919440785252, |
| "logits/rejected": -0.42807028323097684, |
| "logps/chosen": -100.9, |
| "logps/rejected": -110.775, |
| "loss": 0.6591, |
| "mean_token_accuracy": 0.8856117874383926, |
| "num_tokens": 4920681.0, |
| "rewards/accuracies": 0.725, |
| "rewards/chosen": -0.1356092609465122, |
| "rewards/margins": 0.07387373577803373, |
| "rewards/rejected": -0.2094829984009266, |
| "setc/cal_net_lr": 0.00045, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.022299187257885933, |
| "setc/logratio_margin_delta": -0.09700125455856323, |
| "setc/logratio_margin_vanilla": 7.484375, |
| "setc/logratio_margin_weighted": 7.387373745441437, |
| "step": 140 |
| }, |
| { |
| "entropy": 0.45078125, |
| "epoch": 0.08084074373484236, |
| "grad_norm": 8.610173225402832, |
| "learning_rate": 8.010752688172043e-07, |
| "logits/chosen": -0.41425887318933086, |
| "logits/rejected": -0.3917996553785628, |
| "logps/chosen": -125.75, |
| "logps/rejected": -139.075, |
| "loss": 0.6385, |
| "mean_token_accuracy": 0.88282710313797, |
| "num_tokens": 5269262.0, |
| "rewards/accuracies": 0.7125, |
| "rewards/chosen": -0.2560404367744923, |
| "rewards/margins": 0.1263975765556097, |
| "rewards/rejected": -0.3824380189180374, |
| "setc/cal_net_lr": 0.0005333333333333333, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.012131102010607719, |
| "setc/logratio_margin_delta": 0.07022677659988404, |
| "setc/logratio_margin_vanilla": 12.56953125, |
| "setc/logratio_margin_weighted": 12.639758026599884, |
| "step": 150 |
| }, |
| { |
| "entropy": 0.46650390625, |
| "epoch": 0.08623012665049852, |
| "grad_norm": 10.757467269897461, |
| "learning_rate": 8.548387096774193e-07, |
| "logits/chosen": -0.48324375921289925, |
| "logits/rejected": -0.4760727307021881, |
| "logps/chosen": -147.85, |
| "logps/rejected": -173.725, |
| "loss": 0.6143, |
| "mean_token_accuracy": 0.8614455878734588, |
| "num_tokens": 5635034.0, |
| "rewards/accuracies": 0.725, |
| "rewards/chosen": -0.46175644397735593, |
| "rewards/margins": 0.20489476919174193, |
| "rewards/rejected": -0.6666512250900268, |
| "setc/cal_net_lr": 0.0006166666666666666, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.01982680819928646, |
| "setc/logratio_margin_delta": 0.5832273483276367, |
| "setc/logratio_margin_vanilla": 19.90625, |
| "setc/logratio_margin_weighted": 20.489477348327636, |
| "step": 160 |
| }, |
| { |
| "entropy": 0.4853515625, |
| "epoch": 0.09161950956615468, |
| "grad_norm": 12.929532051086426, |
| "learning_rate": 9.086021505376343e-07, |
| "logits/chosen": -0.5325447226004508, |
| "logits/rejected": -0.533857912904264, |
| "logps/chosen": -177.75, |
| "logps/rejected": -207.0, |
| "loss": 0.5938, |
| "mean_token_accuracy": 0.8355448335409165, |
| "num_tokens": 5996408.0, |
| "rewards/accuracies": 0.71875, |
| "rewards/chosen": -0.7701589599251747, |
| "rewards/margins": 0.2884057696908712, |
| "rewards/rejected": -1.0585647374391556, |
| "setc/cal_net_lr": 0.0007000000000000002, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.04059908324852586, |
| "setc/logratio_margin_delta": 1.1937029838562012, |
| "setc/logratio_margin_vanilla": 27.646875, |
| "setc/logratio_margin_weighted": 28.8405779838562, |
| "step": 170 |
| }, |
| { |
| "entropy": 0.4525390625, |
| "epoch": 0.09700889248181083, |
| "grad_norm": 13.670575141906738, |
| "learning_rate": 9.623655913978494e-07, |
| "logits/chosen": -0.5174588059832541, |
| "logits/rejected": -0.4903172377790385, |
| "logps/chosen": -212.5, |
| "logps/rejected": -245.725, |
| "loss": 0.5927, |
| "mean_token_accuracy": 0.818328133225441, |
| "num_tokens": 6344423.0, |
| "rewards/accuracies": 0.69375, |
| "rewards/chosen": -1.1680178940296173, |
| "rewards/margins": 0.3832601685076952, |
| "rewards/rejected": -1.5512780517339706, |
| "setc/cal_net_lr": 0.0007833333333333333, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.06093887090682983, |
| "setc/logratio_margin_delta": 2.3182060956954955, |
| "setc/logratio_margin_vanilla": 36.0078125, |
| "setc/logratio_margin_weighted": 38.326018595695494, |
| "step": 180 |
| }, |
| { |
| "entropy": 0.4759765625, |
| "epoch": 0.10239827539746699, |
| "grad_norm": 13.61806583404541, |
| "learning_rate": 9.999920375234096e-07, |
| "logits/chosen": -0.5286533092264682, |
| "logits/rejected": -0.5037254571131352, |
| "logps/chosen": -254.55, |
| "logps/rejected": -306.6, |
| "loss": 0.5476, |
| "mean_token_accuracy": 0.8002152472734452, |
| "num_tokens": 6709991.0, |
| "rewards/accuracies": 0.715625, |
| "rewards/chosen": -1.5595814138650894, |
| "rewards/margins": 0.5580403164029122, |
| "rewards/rejected": -2.1176217913627626, |
| "setc/cal_net_lr": 0.0008666666666666668, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.07659839615225791, |
| "setc/logratio_margin_delta": 3.3602832794189452, |
| "setc/logratio_margin_vanilla": 52.44375, |
| "setc/logratio_margin_weighted": 55.804033279418945, |
| "step": 190 |
| }, |
| { |
| "entropy": 0.43544921875, |
| "epoch": 0.10778765831312315, |
| "grad_norm": 14.541138648986816, |
| "learning_rate": 9.998504894389128e-07, |
| "logits/chosen": -0.527879368703732, |
| "logits/rejected": -0.5018783955560024, |
| "logps/chosen": -266.7, |
| "logps/rejected": -320.55, |
| "loss": 0.5664, |
| "mean_token_accuracy": 0.800949826836586, |
| "num_tokens": 7083816.0, |
| "rewards/accuracies": 0.70625, |
| "rewards/chosen": -1.7819188117980957, |
| "rewards/margins": 0.5936955399811268, |
| "rewards/rejected": -2.3756143033504484, |
| "setc/cal_net_lr": 0.0009499999999999998, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.0854849111288786, |
| "setc/logratio_margin_delta": 3.891430473327637, |
| "setc/logratio_margin_vanilla": 55.478125, |
| "setc/logratio_margin_weighted": 59.36955547332764, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.10778765831312315, |
| "eval_entropy": 0.44390869140625, |
| "eval_logits/chosen": -0.579585194957734, |
| "eval_logits/rejected": -0.5690945474827908, |
| "eval_logps/chosen": -275.25, |
| "eval_logps/rejected": -336.9375, |
| "eval_loss": 0.5417425036430359, |
| "eval_mean_token_accuracy": 0.7958512287586927, |
| "eval_num_tokens": 7083816.0, |
| "eval_rewards/accuracies": 0.755859375, |
| "eval_rewards/chosen": -1.8367619067430496, |
| "eval_rewards/margins": 0.6809160728007555, |
| "eval_rewards/rejected": -2.5176779851317406, |
| "eval_runtime": 14.1879, |
| "eval_samples_per_second": 35.241, |
| "eval_setc/cal_net_lr": 0.0009916666666666676, |
| "eval_setc/credit_mean": 1.0, |
| "eval_setc/credit_std": 0.09134258469566703, |
| "eval_setc/logratio_margin_delta": 4.674029171466827, |
| "eval_setc/logratio_margin_vanilla": 64.115234375, |
| "eval_setc/logratio_margin_weighted": 68.78926354646683, |
| "eval_steps_per_second": 2.255, |
| "step": 200 |
| }, |
| { |
| "entropy": 0.4421875, |
| "epoch": 0.11317704122877931, |
| "grad_norm": 15.038735389709473, |
| "learning_rate": 9.995320550872586e-07, |
| "logits/chosen": -0.5982554625710975, |
| "logits/rejected": -0.5934455933800111, |
| "logps/chosen": -288.05, |
| "logps/rejected": -341.35, |
| "loss": 0.5767, |
| "mean_token_accuracy": 0.7952886909246445, |
| "num_tokens": 7430056.0, |
| "rewards/accuracies": 0.740625, |
| "rewards/chosen": -1.9779862701892852, |
| "rewards/margins": 0.5607165463268757, |
| "rewards/rejected": -2.538702827692032, |
| "setc/cal_net_lr": 0.0009994884161461147, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.09023903161287308, |
| "setc/logratio_margin_delta": 3.6450933456420898, |
| "setc/logratio_margin_vanilla": 52.4265625, |
| "setc/logratio_margin_weighted": 56.07165584564209, |
| "step": 210 |
| }, |
| { |
| "entropy": 0.42705078125, |
| "epoch": 0.11856642414443547, |
| "grad_norm": 16.99634552001953, |
| "learning_rate": 9.990368471554702e-07, |
| "logits/chosen": -0.5453219890997539, |
| "logits/rejected": -0.5525475843850532, |
| "logps/chosen": -261.55, |
| "logps/rejected": -330.25, |
| "loss": 0.5191, |
| "mean_token_accuracy": 0.8015528202056885, |
| "num_tokens": 7791142.0, |
| "rewards/accuracies": 0.76875, |
| "rewards/chosen": -1.7827031493186951, |
| "rewards/margins": 0.7491906136274338, |
| "rewards/rejected": -2.531893861293793, |
| "setc/cal_net_lr": 0.0009992394884850806, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.09283662959933281, |
| "setc/logratio_margin_delta": 5.744063377380371, |
| "setc/logratio_margin_vanilla": 69.175, |
| "setc/logratio_margin_weighted": 74.91906337738037, |
| "step": 220 |
| }, |
| { |
| "entropy": 0.426953125, |
| "epoch": 0.12395580706009161, |
| "grad_norm": 13.820030212402344, |
| "learning_rate": 9.98365040886914e-07, |
| "logits/chosen": -0.5075058959811635, |
| "logits/rejected": -0.5316922545794671, |
| "logps/chosen": -297.45, |
| "logps/rejected": -381.2, |
| "loss": 0.5132, |
| "mean_token_accuracy": 0.8004782497882843, |
| "num_tokens": 8160190.0, |
| "rewards/accuracies": 0.75625, |
| "rewards/chosen": -2.1069876670837404, |
| "rewards/margins": 0.8769942551851273, |
| "rewards/rejected": -2.9839819371700287, |
| "setc/cal_net_lr": 0.0009978552797828366, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.10098463930189609, |
| "setc/logratio_margin_delta": 6.56817512512207, |
| "setc/logratio_margin_vanilla": 81.13125, |
| "setc/logratio_margin_weighted": 87.69942512512208, |
| "step": 230 |
| }, |
| { |
| "entropy": 0.41240234375, |
| "epoch": 0.1293451899757478, |
| "grad_norm": 20.368221282958984, |
| "learning_rate": 9.975168740192852e-07, |
| "logits/chosen": -0.5192135050793214, |
| "logits/rejected": -0.5336463823173456, |
| "logps/chosen": -274.9, |
| "logps/rejected": -356.6, |
| "loss": 0.5139, |
| "mean_token_accuracy": 0.7949420899152756, |
| "num_tokens": 8525986.0, |
| "rewards/accuracies": 0.753125, |
| "rewards/chosen": -1.9865135312080384, |
| "rewards/margins": 0.8216024398803711, |
| "rewards/rejected": -2.8081159591674805, |
| "setc/cal_net_lr": 0.00099575444916116, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.1036558359861374, |
| "setc/logratio_margin_delta": 7.082119369506836, |
| "setc/logratio_margin_vanilla": 75.078125, |
| "setc/logratio_margin_weighted": 82.16024436950684, |
| "step": 240 |
| }, |
| { |
| "entropy": 0.4126953125, |
| "epoch": 0.13473457289140395, |
| "grad_norm": 32.17527770996094, |
| "learning_rate": 9.964926467004765e-07, |
| "logits/chosen": -0.5428571750679503, |
| "logits/rejected": -0.5702547242679802, |
| "logps/chosen": -319.9, |
| "logps/rejected": -424.2, |
| "loss": 0.4687, |
| "mean_token_accuracy": 0.782842355966568, |
| "num_tokens": 8896270.0, |
| "rewards/accuracies": 0.775, |
| "rewards/chosen": -2.4224733889102934, |
| "rewards/margins": 1.0895347505807877, |
| "rewards/rejected": -3.5120081663131715, |
| "setc/cal_net_lr": 0.000992940020593752, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.10739997066557408, |
| "setc/logratio_margin_delta": 8.453478050231933, |
| "setc/logratio_margin_vanilla": 100.5, |
| "setc/logratio_margin_weighted": 108.95347805023194, |
| "step": 250 |
| }, |
| { |
| "entropy": 0.40068359375, |
| "epoch": 0.14012395580706008, |
| "grad_norm": 26.627729415893555, |
| "learning_rate": 9.952927213823627e-07, |
| "logits/chosen": -0.4605946438072367, |
| "logits/rejected": -0.490033485518619, |
| "logps/chosen": -351.4, |
| "logps/rejected": -466.9, |
| "loss": 0.4693, |
| "mean_token_accuracy": 0.7776162534952163, |
| "num_tokens": 9245420.0, |
| "rewards/accuracies": 0.76875, |
| "rewards/chosen": -2.833879363536835, |
| "rewards/margins": 1.184336504340172, |
| "rewards/rejected": -4.018215835094452, |
| "setc/cal_net_lr": 0.000989416045220137, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.10721651390194893, |
| "setc/logratio_margin_delta": 7.883653450012207, |
| "setc/logratio_margin_vanilla": 110.55, |
| "setc/logratio_margin_weighted": 118.4336534500122, |
| "step": 260 |
| }, |
| { |
| "entropy": 0.412109375, |
| "epoch": 0.14551333872271624, |
| "grad_norm": 35.465599060058594, |
| "learning_rate": 9.939175226925377e-07, |
| "logits/chosen": -0.5947771680223737, |
| "logits/rejected": -0.609633545923327, |
| "logps/chosen": -345.4, |
| "logps/rejected": -476.9, |
| "loss": 0.4478, |
| "mean_token_accuracy": 0.7697073727846145, |
| "num_tokens": 9603870.0, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -2.724973976612091, |
| "rewards/margins": 1.3795409053564072, |
| "rewards/rejected": -4.10451488494873, |
| "setc/cal_net_lr": 0.000985187595514384, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.10481685362756252, |
| "setc/logratio_margin_delta": 10.8415922164917, |
| "setc/logratio_margin_vanilla": 127.1125, |
| "setc/logratio_margin_weighted": 137.9540922164917, |
| "step": 270 |
| }, |
| { |
| "entropy": 0.402734375, |
| "epoch": 0.1509027216383724, |
| "grad_norm": 20.82551383972168, |
| "learning_rate": 9.92367537284046e-07, |
| "logits/chosen": -0.5548631230339891, |
| "logits/rejected": -0.5871126674101008, |
| "logps/chosen": -330.9, |
| "logps/rejected": -440.2, |
| "loss": 0.4953, |
| "mean_token_accuracy": 0.7791453748941422, |
| "num_tokens": 9933926.0, |
| "rewards/accuracies": 0.775, |
| "rewards/chosen": -2.6326347470283507, |
| "rewards/margins": 1.1489318370819093, |
| "rewards/rejected": -3.7815666437149047, |
| "setc/cal_net_lr": 0.0009802607579836912, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.10762366987764835, |
| "setc/logratio_margin_delta": 10.655691719055175, |
| "setc/logratio_margin_vanilla": 104.2375, |
| "setc/logratio_margin_weighted": 114.89319171905518, |
| "step": 280 |
| }, |
| { |
| "entropy": 0.3955078125, |
| "epoch": 0.15629210455402856, |
| "grad_norm": 18.485855102539062, |
| "learning_rate": 9.906433136631696e-07, |
| "logits/chosen": -0.543341739516008, |
| "logits/rejected": -0.5576040736941209, |
| "logps/chosen": -321.1, |
| "logps/rejected": -424.9, |
| "loss": 0.4656, |
| "mean_token_accuracy": 0.7913450181484223, |
| "num_tokens": 10300364.0, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -2.416798382997513, |
| "rewards/margins": 1.1477631881833077, |
| "rewards/rejected": -3.5645615816116334, |
| "setc/cal_net_lr": 0.0009746426244073601, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.1039271742105484, |
| "setc/logratio_margin_delta": 9.02632179260254, |
| "setc/logratio_margin_vanilla": 105.75, |
| "setc/logratio_margin_weighted": 114.77632179260254, |
| "step": 290 |
| }, |
| { |
| "entropy": 0.37470703125, |
| "epoch": 0.16168148746968472, |
| "grad_norm": 19.445205688476562, |
| "learning_rate": 9.887454619953203e-07, |
| "logits/chosen": -0.5234257126029256, |
| "logits/rejected": -0.5395859317035523, |
| "logps/chosen": -333.7, |
| "logps/rejected": -451.1, |
| "loss": 0.4567, |
| "mean_token_accuracy": 0.7912471622228623, |
| "num_tokens": 10652272.0, |
| "rewards/accuracies": 0.81875, |
| "rewards/chosen": -2.56951225399971, |
| "rewards/margins": 1.2171583086252213, |
| "rewards/rejected": -3.7866706013679505, |
| "setc/cal_net_lr": 0.0009683412816287581, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.09890042394399642, |
| "setc/logratio_margin_delta": 9.815832328796386, |
| "setc/logratio_margin_vanilla": 111.9, |
| "setc/logratio_margin_weighted": 121.71583232879638, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.16168148746968472, |
| "eval_entropy": 0.3961181640625, |
| "eval_logits/chosen": -0.5485536702445084, |
| "eval_logits/rejected": -0.5521437157571957, |
| "eval_logps/chosen": -344.875, |
| "eval_logps/rejected": -457.9375, |
| "eval_loss": 0.45288482308387756, |
| "eval_mean_token_accuracy": 0.7810891252011061, |
| "eval_num_tokens": 10652272.0, |
| "eval_rewards/accuracies": 0.810546875, |
| "eval_rewards/chosen": -2.6322504356503487, |
| "eval_rewards/margins": 1.2452996505890042, |
| "eval_rewards/rejected": -3.877550110220909, |
| "eval_runtime": 14.1906, |
| "eval_samples_per_second": 35.235, |
| "eval_setc/cal_net_lr": 0.000964790405843831, |
| "eval_setc/credit_mean": 1.0, |
| "eval_setc/credit_std": 0.09871792490594089, |
| "eval_setc/logratio_margin_delta": 9.83822700381279, |
| "eval_setc/logratio_margin_vanilla": 115.9619140625, |
| "eval_setc/logratio_margin_weighted": 125.80014106631279, |
| "eval_steps_per_second": 2.255, |
| "step": 300 |
| }, |
| { |
| "entropy": 0.37734375, |
| "epoch": 0.16707087038534088, |
| "grad_norm": 25.576738357543945, |
| "learning_rate": 9.866746538891172e-07, |
| "logits/chosen": -0.5276412640451731, |
| "logits/rejected": -0.5328882291269553, |
| "logps/chosen": -361.3, |
| "logps/rejected": -468.8, |
| "loss": 0.5015, |
| "mean_token_accuracy": 0.7804528266191483, |
| "num_tokens": 11025201.0, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -2.8542094469070434, |
| "rewards/margins": 1.1937554821372032, |
| "rewards/rejected": -4.047964870929718, |
| "setc/cal_net_lr": 0.0009613657999149686, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.09489493407309055, |
| "setc/logratio_margin_delta": 11.188052368164062, |
| "setc/logratio_margin_vanilla": 108.1875, |
| "setc/logratio_margin_weighted": 119.37555236816407, |
| "step": 310 |
| }, |
| { |
| "entropy": 0.39609375, |
| "epoch": 0.17246025330099704, |
| "grad_norm": 25.457834243774414, |
| "learning_rate": 9.844316221587202e-07, |
| "logits/chosen": -0.547783353678369, |
| "logits/rejected": -0.5447233904757801, |
| "logps/chosen": -354.2, |
| "logps/rejected": -475.9, |
| "loss": 0.4576, |
| "mean_token_accuracy": 0.7729141265153885, |
| "num_tokens": 11378241.0, |
| "rewards/accuracies": 0.775, |
| "rewards/chosen": -2.7483198761940004, |
| "rewards/margins": 1.3043047487735748, |
| "rewards/rejected": -4.052624702453613, |
| "setc/cal_net_lr": 0.0009537262199008873, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.09067346304655075, |
| "setc/logratio_margin_delta": 9.902355837821961, |
| "setc/logratio_margin_vanilla": 120.528125, |
| "setc/logratio_margin_weighted": 130.43048083782196, |
| "step": 320 |
| }, |
| { |
| "entropy": 0.3943359375, |
| "epoch": 0.1778496362166532, |
| "grad_norm": 26.59535789489746, |
| "learning_rate": 9.82017160564499e-07, |
| "logits/chosen": -0.5416634787030847, |
| "logits/rejected": -0.5507818676121325, |
| "logps/chosen": -370.9, |
| "logps/rejected": -523.6, |
| "loss": 0.4163, |
| "mean_token_accuracy": 0.7765046328306198, |
| "num_tokens": 11731948.0, |
| "rewards/accuracies": 0.809375, |
| "rewards/chosen": -2.9148443579673766, |
| "rewards/margins": 1.6325711846351623, |
| "rewards/rejected": -4.547415578365326, |
| "setc/cal_net_lr": 0.0009454335381365463, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.09494004286825657, |
| "setc/logratio_margin_delta": 11.8196231842041, |
| "setc/logratio_margin_vanilla": 151.4375, |
| "setc/logratio_margin_weighted": 163.2571231842041, |
| "step": 330 |
| }, |
| { |
| "entropy": 0.35849609375, |
| "epoch": 0.18323901913230936, |
| "grad_norm": 32.58420944213867, |
| "learning_rate": 9.79432123532143e-07, |
| "logits/chosen": -0.49375742239443066, |
| "logits/rejected": -0.5324902808324306, |
| "logps/chosen": -475.7, |
| "logps/rejected": -628.1, |
| "loss": 0.4615, |
| "mean_token_accuracy": 0.7430515229701996, |
| "num_tokens": 12055728.0, |
| "rewards/accuracies": 0.75625, |
| "rewards/chosen": -4.24499009847641, |
| "rewards/margins": 1.6139036893844605, |
| "rewards/rejected": -5.858893728256225, |
| "setc/cal_net_lr": 0.0009364996912584834, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.09690484963357449, |
| "setc/logratio_margin_delta": 11.415373611450196, |
| "setc/logratio_margin_vanilla": 149.975, |
| "setc/logratio_margin_weighted": 161.3903736114502, |
| "step": 340 |
| }, |
| { |
| "entropy": 0.3806640625, |
| "epoch": 0.18862840204796552, |
| "grad_norm": 27.812801361083984, |
| "learning_rate": 9.76677425850295e-07, |
| "logits/chosen": -0.6376107562818031, |
| "logits/rejected": -0.6418548650284829, |
| "logps/chosen": -465.6, |
| "logps/rejected": -605.1, |
| "loss": 0.4574, |
| "mean_token_accuracy": 0.7482803016901016, |
| "num_tokens": 12415175.0, |
| "rewards/accuracies": 0.778125, |
| "rewards/chosen": -3.9335139870643614, |
| "rewards/margins": 1.5329814106225967, |
| "rewards/rejected": -5.466495335102081, |
| "setc/cal_net_lr": 0.000926937538807931, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.08994356840848923, |
| "setc/logratio_margin_delta": 11.082522773742676, |
| "setc/logratio_margin_vanilla": 142.215625, |
| "setc/logratio_margin_weighted": 153.29814777374267, |
| "step": 350 |
| }, |
| { |
| "entropy": 0.4140625, |
| "epoch": 0.19401778496362165, |
| "grad_norm": 21.86480712890625, |
| "learning_rate": 9.737540423468298e-07, |
| "logits/chosen": -0.6784440598334419, |
| "logits/rejected": -0.7089506769710789, |
| "logps/chosen": -366.3, |
| "logps/rejected": -501.6, |
| "loss": 0.4309, |
| "mean_token_accuracy": 0.7626632928848267, |
| "num_tokens": 12762716.0, |
| "rewards/accuracies": 0.834375, |
| "rewards/chosen": -2.877052891254425, |
| "rewards/margins": 1.4415714114904403, |
| "rewards/rejected": -4.318624341487885, |
| "setc/cal_net_lr": 0.0009167608447205565, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.08553993590176105, |
| "setc/logratio_margin_delta": 10.219639015197753, |
| "setc/logratio_margin_vanilla": 133.9375, |
| "setc/logratio_margin_weighted": 144.15713901519774, |
| "step": 360 |
| }, |
| { |
| "entropy": 0.43505859375, |
| "epoch": 0.1994071678792778, |
| "grad_norm": 38.346282958984375, |
| "learning_rate": 9.706630075438816e-07, |
| "logits/chosen": -0.641167718623423, |
| "logits/rejected": -0.6645962228342419, |
| "logps/chosen": -407.6, |
| "logps/rejected": -551.5, |
| "loss": 0.4323, |
| "mean_token_accuracy": 0.7579832553863526, |
| "num_tokens": 13126907.0, |
| "rewards/accuracies": 0.815625, |
| "rewards/chosen": -3.3240499973297117, |
| "rewards/margins": 1.5297267407178878, |
| "rewards/rejected": -4.8537767171859745, |
| "setc/cal_net_lr": 0.0009059842575144065, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.08733033128082753, |
| "setc/logratio_margin_delta": 11.610177040100098, |
| "setc/logratio_margin_vanilla": 141.3625, |
| "setc/logratio_margin_weighted": 152.9726770401001, |
| "step": 370 |
| }, |
| { |
| "entropy": 0.44599609375, |
| "epoch": 0.20479655079493397, |
| "grad_norm": 23.460803985595703, |
| "learning_rate": 9.67405415291751e-07, |
| "logits/chosen": -0.7566490767380819, |
| "logits/rejected": -0.7921768754823747, |
| "logps/chosen": -448.3, |
| "logps/rejected": -605.6, |
| "loss": 0.4256, |
| "mean_token_accuracy": 0.7468535989522934, |
| "num_tokens": 13476130.0, |
| "rewards/accuracies": 0.828125, |
| "rewards/chosen": -3.7169021725654603, |
| "rewards/margins": 1.668231150507927, |
| "rewards/rejected": -5.385133290290833, |
| "setc/cal_net_lr": 0.0008946232892045628, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.08953866809606552, |
| "setc/logratio_margin_delta": 11.298118209838867, |
| "setc/logratio_margin_vanilla": 155.525, |
| "setc/logratio_margin_weighted": 166.82311820983887, |
| "step": 380 |
| }, |
| { |
| "entropy": 0.4015625, |
| "epoch": 0.21018593371059013, |
| "grad_norm": 37.17218780517578, |
| "learning_rate": 9.639824183818136e-07, |
| "logits/chosen": -0.6983920089322826, |
| "logits/rejected": -0.7127737130064461, |
| "logps/chosen": -405.7, |
| "logps/rejected": -581.9, |
| "loss": 0.3994, |
| "mean_token_accuracy": 0.7641981273889542, |
| "num_tokens": 13824865.0, |
| "rewards/accuracies": 0.8375, |
| "rewards/chosen": -3.3119530200958254, |
| "rewards/margins": 1.9102674454450608, |
| "rewards/rejected": -5.222220623493195, |
| "setc/cal_net_lr": 0.0008826942929748675, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.08975566849112511, |
| "setc/logratio_margin_delta": 13.101746749877929, |
| "setc/logratio_margin_vanilla": 177.925, |
| "setc/logratio_margin_weighted": 191.02674674987793, |
| "step": 390 |
| }, |
| { |
| "entropy": 0.39140625, |
| "epoch": 0.2155753166262463, |
| "grad_norm": 31.108200073242188, |
| "learning_rate": 9.603952281385731e-07, |
| "logits/chosen": -0.6745283746311073, |
| "logits/rejected": -0.7180434918542554, |
| "logps/chosen": -434.1, |
| "logps/rejected": -567.2, |
| "loss": 0.4993, |
| "mean_token_accuracy": 0.7623392403125763, |
| "num_tokens": 14186289.0, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -3.6153177618980408, |
| "rewards/margins": 1.431649774312973, |
| "rewards/rejected": -5.046967601776123, |
| "setc/cal_net_lr": 0.0008702144396388513, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.0924822997301817, |
| "setc/logratio_margin_delta": 9.014981460571288, |
| "setc/logratio_margin_vanilla": 134.15, |
| "setc/logratio_margin_weighted": 143.16498146057128, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.2155753166262463, |
| "eval_entropy": 0.39117431640625, |
| "eval_logits/chosen": -0.7596684509609115, |
| "eval_logits/rejected": -0.7672941152249027, |
| "eval_logps/chosen": -379.0625, |
| "eval_logps/rejected": -529.625, |
| "eval_loss": 0.403584361076355, |
| "eval_mean_token_accuracy": 0.7722007241100073, |
| "eval_num_tokens": 14186289.0, |
| "eval_rewards/accuracies": 0.833984375, |
| "eval_rewards/chosen": -2.980880632996559, |
| "eval_rewards/margins": 1.6366331987082958, |
| "eval_rewards/rejected": -4.617513798177242, |
| "eval_runtime": 14.2806, |
| "eval_samples_per_second": 35.013, |
| "eval_setc/cal_net_lr": 0.0008634697702573876, |
| "eval_setc/credit_mean": 1.0, |
| "eval_setc/credit_std": 0.08779020188376307, |
| "eval_setc/logratio_margin_delta": 11.151197791099548, |
| "eval_setc/logratio_margin_vanilla": 153.91796875, |
| "eval_setc/logratio_margin_weighted": 165.06916654109955, |
| "eval_steps_per_second": 2.241, |
| "step": 400 |
| }, |
| { |
| "entropy": 0.3703125, |
| "epoch": 0.22096469954190245, |
| "grad_norm": 24.23314094543457, |
| "learning_rate": 9.566451139909988e-07, |
| "logits/chosen": -0.6472189487288608, |
| "logits/rejected": -0.6706296399402472, |
| "logps/chosen": -380.5, |
| "logps/rejected": -509.3, |
| "loss": 0.4374, |
| "mean_token_accuracy": 0.7814467757940292, |
| "num_tokens": 14563626.0, |
| "rewards/accuracies": 0.809375, |
| "rewards/chosen": -3.0536776065826414, |
| "rewards/margins": 1.3719112485647202, |
| "rewards/rejected": -4.425588870048523, |
| "setc/cal_net_lr": 0.0008572016929237579, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.08474965952336788, |
| "setc/logratio_margin_delta": 9.878629875183105, |
| "setc/logratio_margin_vanilla": 127.3125, |
| "setc/logratio_margin_weighted": 137.1911298751831, |
| "step": 410 |
| }, |
| { |
| "entropy": 0.37265625, |
| "epoch": 0.22635408245755861, |
| "grad_norm": 28.370731353759766, |
| "learning_rate": 9.527334030233028e-07, |
| "logits/chosen": -0.6874033644949044, |
| "logits/rejected": -0.6702958053090058, |
| "logps/chosen": -365.85, |
| "logps/rejected": -494.9, |
| "loss": 0.486, |
| "mean_token_accuracy": 0.7773175150156021, |
| "num_tokens": 14919543.0, |
| "rewards/accuracies": 0.775, |
| "rewards/chosen": -2.9779480576515196, |
| "rewards/margins": 1.3624024033546447, |
| "rewards/rejected": -4.340350472927094, |
| "setc/cal_net_lr": 0.0008436747836132224, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.08348823450505734, |
| "setc/logratio_margin_delta": 11.090241241455079, |
| "setc/logratio_margin_vanilla": 125.15, |
| "setc/logratio_margin_weighted": 136.24024124145507, |
| "step": 420 |
| }, |
| { |
| "entropy": 0.36806640625, |
| "epoch": 0.23174346537321477, |
| "grad_norm": 20.790348052978516, |
| "learning_rate": 9.486614795053136e-07, |
| "logits/chosen": -0.687221071504475, |
| "logits/rejected": -0.6840851017343192, |
| "logps/chosen": -355.1, |
| "logps/rejected": -485.7, |
| "loss": 0.4321, |
| "mean_token_accuracy": 0.785921522974968, |
| "num_tokens": 15277577.0, |
| "rewards/accuracies": 0.825, |
| "rewards/chosen": -2.7120434999465943, |
| "rewards/margins": 1.4238930448889733, |
| "rewards/rejected": -4.135936522483826, |
| "setc/cal_net_lr": 0.0008296531825858478, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.08098721131682396, |
| "setc/logratio_margin_delta": 11.239310264587402, |
| "setc/logratio_margin_vanilla": 131.15, |
| "setc/logratio_margin_weighted": 142.3893102645874, |
| "step": 430 |
| }, |
| { |
| "entropy": 0.36962890625, |
| "epoch": 0.23713284828887093, |
| "grad_norm": 29.099376678466797, |
| "learning_rate": 9.444307844026128e-07, |
| "logits/chosen": -0.7001576273000311, |
| "logits/rejected": -0.7564168572512255, |
| "logps/chosen": -394.0, |
| "logps/rejected": -555.2, |
| "loss": 0.4012, |
| "mean_token_accuracy": 0.777391391992569, |
| "num_tokens": 15614929.0, |
| "rewards/accuracies": 0.834375, |
| "rewards/chosen": -3.16938259601593, |
| "rewards/margins": 1.7262235552072525, |
| "rewards/rejected": -4.8956061720848085, |
| "setc/cal_net_lr": 0.0008151570727884652, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.08620954863727093, |
| "setc/logratio_margin_delta": 14.22236442565918, |
| "setc/logratio_margin_vanilla": 158.4, |
| "setc/logratio_margin_weighted": 172.62236442565919, |
| "step": 440 |
| }, |
| { |
| "entropy": 0.35576171875, |
| "epoch": 0.2425222312045271, |
| "grad_norm": 36.5993537902832, |
| "learning_rate": 9.400428148666088e-07, |
| "logits/chosen": -0.7156464373663424, |
| "logits/rejected": -0.7045427588458122, |
| "logps/chosen": -444.4, |
| "logps/rejected": -642.3, |
| "loss": 0.4123, |
| "mean_token_accuracy": 0.7572789788246155, |
| "num_tokens": 15977200.0, |
| "rewards/accuracies": 0.821875, |
| "rewards/chosen": -3.769306206703186, |
| "rewards/margins": 2.0929836332798004, |
| "rewards/rejected": -5.86228985786438, |
| "setc/cal_net_lr": 0.0008002073201844367, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.08339687958359718, |
| "setc/logratio_margin_delta": 14.948371696472169, |
| "setc/logratio_margin_vanilla": 194.35, |
| "setc/logratio_margin_weighted": 209.29837169647217, |
| "step": 450 |
| }, |
| { |
| "entropy": 0.34697265625, |
| "epoch": 0.24791161412018323, |
| "grad_norm": 21.59111785888672, |
| "learning_rate": 9.354991237047272e-07, |
| "logits/chosen": -0.6320623481408233, |
| "logits/rejected": -0.6675801158708582, |
| "logps/chosen": -436.2, |
| "logps/rejected": -619.0, |
| "loss": 0.3781, |
| "mean_token_accuracy": 0.7620709240436554, |
| "num_tokens": 16318118.0, |
| "rewards/accuracies": 0.828125, |
| "rewards/chosen": -3.739296352863312, |
| "rewards/margins": 1.971435186266899, |
| "rewards/rejected": -5.710731565952301, |
| "setc/cal_net_lr": 0.0007848254437188073, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.08574292548000813, |
| "setc/logratio_margin_delta": 13.068524169921876, |
| "setc/logratio_margin_vanilla": 184.075, |
| "setc/logratio_margin_weighted": 197.14352416992188, |
| "step": 460 |
| }, |
| { |
| "entropy": 0.35234375, |
| "epoch": 0.2533009970358394, |
| "grad_norm": 39.83766555786133, |
| "learning_rate": 9.308013188309052e-07, |
| "logits/chosen": -0.5782339558042321, |
| "logits/rejected": -0.5778674119896376, |
| "logps/chosen": -449.1, |
| "logps/rejected": -629.1, |
| "loss": 0.4338, |
| "mean_token_accuracy": 0.7699876993894577, |
| "num_tokens": 16695798.0, |
| "rewards/accuracies": 0.828125, |
| "rewards/chosen": -3.8279688358306885, |
| "rewards/margins": 1.9190792471170426, |
| "rewards/rejected": -5.747048032283783, |
| "setc/cal_net_lr": 0.0007690335843435461, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.08457843437790871, |
| "setc/logratio_margin_delta": 15.239182376861573, |
| "setc/logratio_margin_vanilla": 176.66875, |
| "setc/logratio_margin_weighted": 191.90793237686157, |
| "step": 470 |
| }, |
| { |
| "entropy": 0.37353515625, |
| "epoch": 0.2586903799514956, |
| "grad_norm": 61.235111236572266, |
| "learning_rate": 9.259510626965874e-07, |
| "logits/chosen": -0.6445287935159719, |
| "logits/rejected": -0.6687137779580832, |
| "logps/chosen": -441.0, |
| "logps/rejected": -700.9, |
| "loss": 0.3949, |
| "mean_token_accuracy": 0.7573456019163132, |
| "num_tokens": 17051928.0, |
| "rewards/accuracies": 0.846875, |
| "rewards/chosen": -3.6645211458206175, |
| "rewards/margins": 2.796916735172272, |
| "rewards/rejected": -6.46143786907196, |
| "setc/cal_net_lr": 0.0007528544731474588, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.08809281550347806, |
| "setc/logratio_margin_delta": 17.716680145263673, |
| "setc/logratio_margin_vanilla": 261.975, |
| "setc/logratio_margin_weighted": 279.69168014526366, |
| "step": 480 |
| }, |
| { |
| "entropy": 0.3984375, |
| "epoch": 0.26407976286715173, |
| "grad_norm": 51.56365966796875, |
| "learning_rate": 9.209500717024184e-07, |
| "logits/chosen": -0.5772588425384099, |
| "logits/rejected": -0.5829906453335669, |
| "logps/chosen": -433.8, |
| "logps/rejected": -669.7, |
| "loss": 0.3728, |
| "mean_token_accuracy": 0.7556526750326157, |
| "num_tokens": 17399473.0, |
| "rewards/accuracies": 0.840625, |
| "rewards/chosen": -3.6556755781173704, |
| "rewards/margins": 2.525260826945305, |
| "rewards/rejected": -6.180936527252197, |
| "setc/cal_net_lr": 0.0007363113986366443, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.08914664313197136, |
| "setc/logratio_margin_delta": 16.12609519958496, |
| "setc/logratio_margin_vanilla": 236.4, |
| "setc/logratio_margin_weighted": 252.52609519958497, |
| "step": 490 |
| }, |
| { |
| "entropy": 0.3970703125, |
| "epoch": 0.2694691457828079, |
| "grad_norm": 41.8188591003418, |
| "learning_rate": 9.158001155908463e-07, |
| "logits/chosen": -0.5640562576190764, |
| "logits/rejected": -0.5751849485254861, |
| "logps/chosen": -457.0, |
| "logps/rejected": -668.7, |
| "loss": 0.4275, |
| "mean_token_accuracy": 0.7455924898386002, |
| "num_tokens": 17747161.0, |
| "rewards/accuracies": 0.83125, |
| "rewards/chosen": -3.902832806110382, |
| "rewards/margins": 2.2202740639448164, |
| "rewards/rejected": -6.123106646537781, |
| "setc/cal_net_lr": 0.0007194281732126001, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.0880829505622387, |
| "setc/logratio_margin_delta": 13.539912796020507, |
| "setc/logratio_margin_vanilla": 208.4875, |
| "setc/logratio_margin_weighted": 222.0274127960205, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.2694691457828079, |
| "eval_entropy": 0.40399169921875, |
| "eval_logits/chosen": -0.592532312686467, |
| "eval_logits/rejected": -0.605988185624285, |
| "eval_logps/chosen": -462.9375, |
| "eval_logps/rejected": -684.0, |
| "eval_loss": 0.35599419474601746, |
| "eval_mean_token_accuracy": 0.7497607115656137, |
| "eval_num_tokens": 17747161.0, |
| "eval_rewards/accuracies": 0.849609375, |
| "eval_rewards/chosen": -3.872654564678669, |
| "eval_rewards/margins": 2.3638561107218266, |
| "eval_rewards/rejected": -6.236510649323463, |
| "eval_runtime": 14.2091, |
| "eval_samples_per_second": 35.189, |
| "eval_setc/cal_net_lr": 0.000710449074239924, |
| "eval_setc/credit_mean": 1.0, |
| "eval_setc/credit_std": 0.08703692350536585, |
| "eval_setc/logratio_margin_delta": 14.291290760040283, |
| "eval_setc/logratio_margin_vanilla": 222.796875, |
| "eval_setc/logratio_margin_weighted": 237.08816576004028, |
| "eval_steps_per_second": 2.252, |
| "step": 500 |
| }, |
| { |
| "entropy": 0.3931640625, |
| "epoch": 0.274858528698464, |
| "grad_norm": 24.2525577545166, |
| "learning_rate": 9.105030168198502e-07, |
| "logits/chosen": -0.6082334317471639, |
| "logits/rejected": -0.6164790579325305, |
| "logps/chosen": -376.2, |
| "logps/rejected": -538.2, |
| "loss": 0.4458, |
| "mean_token_accuracy": 0.7635719299316406, |
| "num_tokens": 18074898.0, |
| "rewards/accuracies": 0.809375, |
| "rewards/chosen": -3.032584583759308, |
| "rewards/margins": 1.7272971540689468, |
| "rewards/rejected": -4.7598817348480225, |
| "setc/cal_net_lr": 0.0007022290988962156, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.08350460566580295, |
| "setc/logratio_margin_delta": 12.80472412109375, |
| "setc/logratio_margin_vanilla": 159.925, |
| "setc/logratio_margin_weighted": 172.72972412109374, |
| "step": 510 |
| }, |
| { |
| "entropy": 0.418359375, |
| "epoch": 0.28024791161412016, |
| "grad_norm": 49.45343017578125, |
| "learning_rate": 9.050606499180102e-07, |
| "logits/chosen": -0.6519357377370888, |
| "logits/rejected": -0.6423741925659583, |
| "logps/chosen": -402.5, |
| "logps/rejected": -578.4, |
| "loss": 0.4126, |
| "mean_token_accuracy": 0.7656619876623154, |
| "num_tokens": 18445858.0, |
| "rewards/accuracies": 0.834375, |
| "rewards/chosen": -3.202549707889557, |
| "rewards/margins": 1.860961028933525, |
| "rewards/rejected": -5.063510704040527, |
| "setc/cal_net_lr": 0.0006847389323470055, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.0847404919564724, |
| "setc/logratio_margin_delta": 11.646109008789063, |
| "setc/logratio_margin_vanilla": 174.45, |
| "setc/logratio_margin_weighted": 186.09610900878906, |
| "step": 520 |
| }, |
| { |
| "entropy": 0.43330078125, |
| "epoch": 0.2856372945297763, |
| "grad_norm": 29.773883819580078, |
| "learning_rate": 8.994749408211536e-07, |
| "logits/chosen": -0.7015092396795138, |
| "logits/rejected": -0.7123617485729619, |
| "logps/chosen": -520.0, |
| "logps/rejected": -806.7, |
| "loss": 0.3962, |
| "mean_token_accuracy": 0.729384770989418, |
| "num_tokens": 18791922.0, |
| "rewards/accuracies": 0.815625, |
| "rewards/chosen": -4.475372099876404, |
| "rewards/margins": 3.0225441813468934, |
| "rewards/rejected": -7.497916245460511, |
| "setc/cal_net_lr": 0.0006669828492279234, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.08948785215616226, |
| "setc/logratio_margin_delta": 17.254426956176758, |
| "setc/logratio_margin_vanilla": 285.0, |
| "setc/logratio_margin_weighted": 302.25442695617676, |
| "step": 530 |
| }, |
| { |
| "entropy": 0.4228515625, |
| "epoch": 0.2910266774454325, |
| "grad_norm": 21.04458236694336, |
| "learning_rate": 8.937478661908069e-07, |
| "logits/chosen": -0.7061397320826232, |
| "logits/rejected": -0.7431562334397835, |
| "logps/chosen": -449.0, |
| "logps/rejected": -646.0, |
| "loss": 0.3719, |
| "mean_token_accuracy": 0.7576641947031021, |
| "num_tokens": 19156930.0, |
| "rewards/accuracies": 0.828125, |
| "rewards/chosen": -3.6808414697647094, |
| "rewards/margins": 2.149125117063522, |
| "rewards/rejected": -5.829966616630554, |
| "setc/cal_net_lr": 0.0006489864079670575, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.08795219510793686, |
| "setc/logratio_margin_delta": 14.512511444091796, |
| "setc/logratio_margin_vanilla": 200.4, |
| "setc/logratio_margin_weighted": 214.9125114440918, |
| "step": 540 |
| }, |
| { |
| "entropy": 0.424609375, |
| "epoch": 0.29641606036108864, |
| "grad_norm": 67.26245880126953, |
| "learning_rate": 8.878814527146985e-07, |
| "logits/chosen": -0.6747327955549893, |
| "logits/rejected": -0.729661536184305, |
| "logps/chosen": -396.5, |
| "logps/rejected": -612.5, |
| "loss": 0.3854, |
| "mean_token_accuracy": 0.7647815823554993, |
| "num_tokens": 19505372.0, |
| "rewards/accuracies": 0.8375, |
| "rewards/chosen": -3.2515084028244017, |
| "rewards/margins": 2.2733805954456328, |
| "rewards/rejected": -5.52488911151886, |
| "setc/cal_net_lr": 0.0006307755129683639, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.09044467471539974, |
| "setc/logratio_margin_delta": 15.188063812255859, |
| "setc/logratio_margin_vanilla": 212.15, |
| "setc/logratio_margin_weighted": 227.33806381225585, |
| "step": 550 |
| }, |
| { |
| "entropy": 0.410546875, |
| "epoch": 0.3018054432767448, |
| "grad_norm": 56.778804779052734, |
| "learning_rate": 8.818777763895585e-07, |
| "logits/chosen": -0.6860816122826272, |
| "logits/rejected": -0.6895918590666351, |
| "logps/chosen": -430.6, |
| "logps/rejected": -624.7, |
| "loss": 0.3921, |
| "mean_token_accuracy": 0.7563153892755509, |
| "num_tokens": 19863864.0, |
| "rewards/accuracies": 0.83125, |
| "rewards/chosen": -3.628837537765503, |
| "rewards/margins": 2.037660950422287, |
| "rewards/rejected": -5.666498494148255, |
| "setc/cal_net_lr": 0.0006123763773243994, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.08885525353252888, |
| "setc/logratio_margin_delta": 13.316099929809571, |
| "setc/logratio_margin_vanilla": 190.45, |
| "setc/logratio_margin_weighted": 203.76609992980957, |
| "step": 560 |
| }, |
| { |
| "entropy": 0.3953125, |
| "epoch": 0.30719482619240096, |
| "grad_norm": 42.43838119506836, |
| "learning_rate": 8.75738961786467e-07, |
| "logits/chosen": -0.688892356215907, |
| "logits/rejected": -0.7157972843996309, |
| "logps/chosen": -461.9, |
| "logps/rejected": -660.4, |
| "loss": 0.4452, |
| "mean_token_accuracy": 0.7513888716697693, |
| "num_tokens": 20215920.0, |
| "rewards/accuracies": 0.803125, |
| "rewards/chosen": -3.9202752113342285, |
| "rewards/margins": 2.134818637371063, |
| "rewards/rejected": -6.055093717575073, |
| "setc/cal_net_lr": 0.0005938154850847184, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.08897339478135109, |
| "setc/logratio_margin_delta": 14.231868362426757, |
| "setc/logratio_margin_vanilla": 199.25, |
| "setc/logratio_margin_weighted": 213.48186836242675, |
| "step": 570 |
| }, |
| { |
| "entropy": 0.3853515625, |
| "epoch": 0.3125842091080571, |
| "grad_norm": 39.56584167480469, |
| "learning_rate": 8.694671812990155e-07, |
| "logits/chosen": -0.643150039587863, |
| "logits/rejected": -0.6802687248343776, |
| "logps/chosen": -361.9, |
| "logps/rejected": -505.1, |
| "loss": 0.4191, |
| "mean_token_accuracy": 0.7855405032634735, |
| "num_tokens": 20574156.0, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -2.867064654827118, |
| "rewards/margins": 1.4799500912427903, |
| "rewards/rejected": -4.347014737129212, |
| "setc/cal_net_lr": 0.0005751195531342541, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.08569788709282875, |
| "setc/logratio_margin_delta": 10.945015716552735, |
| "setc/logratio_margin_vanilla": 137.05, |
| "setc/logratio_margin_weighted": 147.99501571655273, |
| "step": 580 |
| }, |
| { |
| "entropy": 0.41494140625, |
| "epoch": 0.3179735920237133, |
| "grad_norm": 18.509885787963867, |
| "learning_rate": 8.630646543745433e-07, |
| "logits/chosen": -0.7564691341004269, |
| "logits/rejected": -0.7964878528594659, |
| "logps/chosen": -466.7, |
| "logps/rejected": -751.8, |
| "loss": 0.3255, |
| "mean_token_accuracy": 0.7475934147834777, |
| "num_tokens": 20942542.0, |
| "rewards/accuracies": 0.85, |
| "rewards/chosen": -3.9546597361564637, |
| "rewards/margins": 3.00535169839859, |
| "rewards/rejected": -6.960011458396911, |
| "setc/cal_net_lr": 0.000556315492736548, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.08982903957366943, |
| "setc/logratio_margin_delta": 20.885171508789064, |
| "setc/logratio_margin_vanilla": 279.65, |
| "setc/logratio_margin_weighted": 300.53517150878906, |
| "step": 590 |
| }, |
| { |
| "entropy": 0.405859375, |
| "epoch": 0.32336297493936944, |
| "grad_norm": 26.894384384155273, |
| "learning_rate": 8.565336467287235e-07, |
| "logits/chosen": -0.7470103611470064, |
| "logits/rejected": -0.7722877603949199, |
| "logps/chosen": -531.8, |
| "logps/rejected": -820.6, |
| "loss": 0.3808, |
| "mean_token_accuracy": 0.7415017306804657, |
| "num_tokens": 21291866.0, |
| "rewards/accuracies": 0.834375, |
| "rewards/chosen": -4.668173170089721, |
| "rewards/margins": 3.097328555583954, |
| "rewards/rejected": -7.765501689910889, |
| "setc/cal_net_lr": 0.0005374303707971904, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.08936081901192665, |
| "setc/logratio_margin_delta": 19.732860565185547, |
| "setc/logratio_margin_vanilla": 290.0, |
| "setc/logratio_margin_weighted": 309.73286056518555, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.32336297493936944, |
| "eval_entropy": 0.4246826171875, |
| "eval_logits/chosen": -0.7950303357768591, |
| "eval_logits/rejected": -0.809009124352729, |
| "eval_logps/chosen": -570.9375, |
| "eval_logps/rejected": -884.625, |
| "eval_loss": 0.3066518008708954, |
| "eval_mean_token_accuracy": 0.726261779665947, |
| "eval_num_tokens": 21291866.0, |
| "eval_rewards/accuracies": 0.875, |
| "eval_rewards/chosen": -5.0408351719379425, |
| "eval_rewards/margins": 3.342687487602234, |
| "eval_rewards/rejected": -8.383522659540176, |
| "eval_runtime": 14.2973, |
| "eval_samples_per_second": 34.972, |
| "eval_setc/cal_net_lr": 0.0005274940322432844, |
| "eval_setc/credit_mean": 1.0, |
| "eval_setc/credit_std": 0.09106356324627995, |
| "eval_setc/logratio_margin_delta": 19.50989866256714, |
| "eval_setc/logratio_margin_vanilla": 315.8125, |
| "eval_setc/logratio_margin_weighted": 335.32239866256714, |
| "eval_steps_per_second": 2.238, |
| "step": 600 |
| }, |
| { |
| "entropy": 0.43974609375, |
| "epoch": 0.3287523578550256, |
| "grad_norm": 41.537105560302734, |
| "learning_rate": 8.498764695437746e-07, |
| "logits/chosen": -0.7988740761782873, |
| "logits/rejected": -0.7901971178299118, |
| "logps/chosen": -550.8, |
| "logps/rejected": -910.0, |
| "loss": 0.3901, |
| "mean_token_accuracy": 0.7285310417413712, |
| "num_tokens": 21645741.0, |
| "rewards/accuracies": 0.8375, |
| "rewards/chosen": -4.841351461410523, |
| "rewards/margins": 3.7519346356391905, |
| "rewards/rejected": -8.593285942077637, |
| "setc/cal_net_lr": 0.0005184913709032242, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.08994946219027042, |
| "setc/logratio_margin_delta": 18.493466186523438, |
| "setc/logratio_margin_vanilla": 356.7, |
| "setc/logratio_margin_weighted": 375.19346618652344, |
| "step": 610 |
| }, |
| { |
| "entropy": 0.43916015625, |
| "epoch": 0.33414174077068176, |
| "grad_norm": 42.426490783691406, |
| "learning_rate": 8.430954786505839e-07, |
| "logits/chosen": -0.8440707111321254, |
| "logits/rejected": -0.8533261529276125, |
| "logps/chosen": -431.9, |
| "logps/rejected": -656.1, |
| "loss": 0.3473, |
| "mean_token_accuracy": 0.7551120847463608, |
| "num_tokens": 21998359.0, |
| "rewards/accuracies": 0.878125, |
| "rewards/chosen": -3.5407957673072814, |
| "rewards/margins": 2.3734045028686523, |
| "rewards/rejected": -5.914200258255005, |
| "setc/cal_net_lr": 0.0004995257541945978, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.08794644176959991, |
| "setc/logratio_margin_delta": 15.790457153320313, |
| "setc/logratio_margin_vanilla": 221.55, |
| "setc/logratio_margin_weighted": 237.3404571533203, |
| "step": 620 |
| }, |
| { |
| "entropy": 0.4126953125, |
| "epoch": 0.3395311236863379, |
| "grad_norm": 24.879779815673828, |
| "learning_rate": 8.361930736950299e-07, |
| "logits/chosen": -0.8090851961587198, |
| "logits/rejected": -0.8543913061734949, |
| "logps/chosen": -432.2, |
| "logps/rejected": -689.9, |
| "loss": 0.3312, |
| "mean_token_accuracy": 0.7620727181434631, |
| "num_tokens": 22367339.0, |
| "rewards/accuracies": 0.884375, |
| "rewards/chosen": -3.6054784178733827, |
| "rewards/margins": 2.696714848279953, |
| "rewards/rejected": -6.302193331718445, |
| "setc/cal_net_lr": 0.00048056082012398516, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.0865101970732212, |
| "setc/logratio_margin_delta": 17.24649543762207, |
| "setc/logratio_margin_vanilla": 252.425, |
| "setc/logratio_margin_weighted": 269.67149543762207, |
| "step": 630 |
| }, |
| { |
| "entropy": 0.402734375, |
| "epoch": 0.3449205066019941, |
| "grad_norm": 29.819805145263672, |
| "learning_rate": 8.29171697288799e-07, |
| "logits/chosen": -0.7799408370558261, |
| "logits/rejected": -0.7945290786054311, |
| "logps/chosen": -520.6, |
| "logps/rejected": -788.4, |
| "loss": 0.4063, |
| "mean_token_accuracy": 0.7504911392927169, |
| "num_tokens": 22746633.0, |
| "rewards/accuracies": 0.825, |
| "rewards/chosen": -4.477042305469513, |
| "rewards/margins": 2.874571180343628, |
| "rewards/rejected": -7.351613521575928, |
| "setc/cal_net_lr": 0.00046162386716145867, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.08807485699653625, |
| "setc/logratio_margin_delta": 20.70712242126465, |
| "setc/logratio_margin_vanilla": 266.75, |
| "setc/logratio_margin_weighted": 287.45712242126467, |
| "step": 640 |
| }, |
| { |
| "entropy": 0.411328125, |
| "epoch": 0.35030988951765024, |
| "grad_norm": 45.361915588378906, |
| "learning_rate": 8.220338341449986e-07, |
| "logits/chosen": -0.7536949779449323, |
| "logits/rejected": -0.797799150332984, |
| "logps/chosen": -483.3, |
| "logps/rejected": -779.8, |
| "loss": 0.3723, |
| "mean_token_accuracy": 0.7493287652730942, |
| "num_tokens": 23123993.0, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": -4.141227328777314, |
| "rewards/margins": 3.1743425011634825, |
| "rewards/rejected": -7.315569949150086, |
| "setc/cal_net_lr": 0.00044274215350057644, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.08726362958550453, |
| "setc/logratio_margin_delta": 19.73426208496094, |
| "setc/logratio_margin_vanilla": 297.7, |
| "setc/logratio_margin_weighted": 317.43426208496095, |
| "step": 650 |
| }, |
| { |
| "entropy": 0.4083984375, |
| "epoch": 0.3556992724333064, |
| "grad_norm": 37.76664352416992, |
| "learning_rate": 8.147820101988704e-07, |
| "logits/chosen": -0.8120619076794536, |
| "logits/rejected": -0.8494678070244117, |
| "logps/chosen": -361.7, |
| "logps/rejected": -565.3, |
| "loss": 0.3474, |
| "mean_token_accuracy": 0.766129943728447, |
| "num_tokens": 23454227.0, |
| "rewards/accuracies": 0.8375, |
| "rewards/chosen": -2.9051371812820435, |
| "rewards/margins": 2.1819508969783783, |
| "rewards/rejected": -5.087088012695313, |
| "setc/cal_net_lr": 0.00042394285782244395, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.08217526227235794, |
| "setc/logratio_margin_delta": 14.345091247558594, |
| "setc/logratio_margin_vanilla": 203.85, |
| "setc/logratio_margin_weighted": 218.1950912475586, |
| "step": 660 |
| }, |
| { |
| "entropy": 0.421875, |
| "epoch": 0.36108865534896256, |
| "grad_norm": 39.73728942871094, |
| "learning_rate": 8.074187917139175e-07, |
| "logits/chosen": -0.7319666812006074, |
| "logits/rejected": -0.7562072698077329, |
| "logps/chosen": -492.1, |
| "logps/rejected": -771.8, |
| "loss": 0.37, |
| "mean_token_accuracy": 0.7497668653726578, |
| "num_tokens": 23818594.0, |
| "rewards/accuracies": 0.85, |
| "rewards/chosen": -4.2121153473854065, |
| "rewards/margins": 2.9903420001268386, |
| "rewards/rejected": -7.202457308769226, |
| "setc/cal_net_lr": 0.00040525304017422745, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.08255718871951104, |
| "setc/logratio_margin_delta": 18.634216690063475, |
| "setc/logratio_margin_vanilla": 280.4, |
| "setc/logratio_margin_weighted": 299.0342166900635, |
| "step": 670 |
| }, |
| { |
| "entropy": 0.42548828125, |
| "epoch": 0.3664780382646187, |
| "grad_norm": 40.21190643310547, |
| "learning_rate": 7.999467843737582e-07, |
| "logits/chosen": -0.8038282930095126, |
| "logits/rejected": -0.8421265761188057, |
| "logps/chosen": -460.9, |
| "logps/rejected": -701.0, |
| "loss": 0.3708, |
| "mean_token_accuracy": 0.7515695452690124, |
| "num_tokens": 24157407.0, |
| "rewards/accuracies": 0.859375, |
| "rewards/chosen": -3.902362608909607, |
| "rewards/margins": 2.5381152600049974, |
| "rewards/rejected": -6.440477788448334, |
| "setc/cal_net_lr": 0.00038669960301843077, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.0827824518084526, |
| "setc/logratio_margin_delta": 16.236531066894532, |
| "setc/logratio_margin_vanilla": 237.575, |
| "setc/logratio_margin_weighted": 253.81153106689453, |
| "step": 680 |
| }, |
| { |
| "entropy": 0.39599609375, |
| "epoch": 0.3718674211802749, |
| "grad_norm": 20.98696517944336, |
| "learning_rate": 7.92368632360032e-07, |
| "logits/chosen": -0.8042416360177865, |
| "logits/rejected": -0.8270914788970604, |
| "logps/chosen": -469.8, |
| "logps/rejected": -730.8, |
| "loss": 0.344, |
| "mean_token_accuracy": 0.7479289263486862, |
| "num_tokens": 24493771.0, |
| "rewards/accuracies": 0.859375, |
| "rewards/chosen": -4.073775279521942, |
| "rewards/margins": 2.744447809457779, |
| "rewards/rejected": -6.8182231187820435, |
| "setc/cal_net_lr": 0.000368309252509004, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.08153500594198704, |
| "setc/logratio_margin_delta": 16.7447868347168, |
| "setc/logratio_margin_vanilla": 257.7, |
| "setc/logratio_margin_weighted": 274.4447868347168, |
| "step": 690 |
| }, |
| { |
| "entropy": 0.3873046875, |
| "epoch": 0.37725680409593104, |
| "grad_norm": 58.19563674926758, |
| "learning_rate": 7.846870174166803e-07, |
| "logits/chosen": -0.751562047809603, |
| "logits/rejected": -0.7788989722070948, |
| "logps/chosen": -553.7, |
| "logps/rejected": -904.5, |
| "loss": 0.3499, |
| "mean_token_accuracy": 0.7495994985103607, |
| "num_tokens": 24861785.0, |
| "rewards/accuracies": 0.8375, |
| "rewards/chosen": -4.865620005130768, |
| "rewards/margins": 3.707144260406494, |
| "rewards/rejected": -8.57276439666748, |
| "setc/cal_net_lr": 0.00035010846005002045, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.0810060478746891, |
| "setc/logratio_margin_delta": 21.064439392089845, |
| "setc/logratio_margin_vanilla": 349.65, |
| "setc/logratio_margin_weighted": 370.71443939208984, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.37725680409593104, |
| "eval_entropy": 0.4024658203125, |
| "eval_logits/chosen": -0.8513818849761876, |
| "eval_logits/rejected": -0.8638025440627952, |
| "eval_logps/chosen": -570.4375, |
| "eval_logps/rejected": -930.875, |
| "eval_loss": 0.3007389008998871, |
| "eval_mean_token_accuracy": 0.7318701520562172, |
| "eval_num_tokens": 24861785.0, |
| "eval_rewards/accuracies": 0.86328125, |
| "eval_rewards/chosen": -5.0088541358709335, |
| "eval_rewards/margins": 3.838240258395672, |
| "eval_rewards/rejected": -8.847094357013702, |
| "eval_runtime": 14.2035, |
| "eval_samples_per_second": 35.202, |
| "eval_setc/cal_net_lr": 0.00034062822999975144, |
| "eval_setc/credit_mean": 1.0, |
| "eval_setc/credit_std": 0.08001079317182302, |
| "eval_setc/logratio_margin_delta": 21.063920974731445, |
| "eval_setc/logratio_margin_vanilla": 363.875, |
| "eval_setc/logratio_margin_weighted": 384.93892097473145, |
| "eval_steps_per_second": 2.253, |
| "step": 700 |
| }, |
| { |
| "entropy": 0.4060546875, |
| "epoch": 0.38264618701158715, |
| "grad_norm": 86.25070190429688, |
| "learning_rate": 7.769046579009356e-07, |
| "logits/chosen": -0.8918346681389394, |
| "logits/rejected": -0.9074910639956075, |
| "logps/chosen": -639.5, |
| "logps/rejected": -989.4, |
| "loss": 0.4045, |
| "mean_token_accuracy": 0.7125069379806519, |
| "num_tokens": 25201876.0, |
| "rewards/accuracies": 0.821875, |
| "rewards/chosen": -5.8089584589004515, |
| "rewards/margins": 3.719921666383743, |
| "rewards/rejected": -9.52888035774231, |
| "setc/cal_net_lr": 0.0003321234241922574, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.07928324565291404, |
| "setc/logratio_margin_delta": 21.29217185974121, |
| "setc/logratio_margin_vanilla": 350.7, |
| "setc/logratio_margin_weighted": 371.99217185974123, |
| "step": 710 |
| }, |
| { |
| "entropy": 0.4359375, |
| "epoch": 0.3880355699272433, |
| "grad_norm": 45.142372131347656, |
| "learning_rate": 7.690243078213531e-07, |
| "logits/chosen": -0.9740272973330708, |
| "logits/rejected": -1.0063528667573212, |
| "logps/chosen": -689.2, |
| "logps/rejected": -1139.4, |
| "loss": 0.3136, |
| "mean_token_accuracy": 0.6987293243408204, |
| "num_tokens": 25567728.0, |
| "rewards/accuracies": 0.88125, |
| "rewards/chosen": -6.253266739845276, |
| "rewards/margins": 4.749369788169861, |
| "rewards/rejected": -11.002636575698853, |
| "setc/cal_net_lr": 0.0003143800329225269, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.0756493654102087, |
| "setc/logratio_margin_delta": 25.636982727050782, |
| "setc/logratio_margin_vanilla": 449.3, |
| "setc/logratio_margin_weighted": 474.9369827270508, |
| "step": 720 |
| }, |
| { |
| "entropy": 0.45234375, |
| "epoch": 0.39342495284289947, |
| "grad_norm": 13.179286003112793, |
| "learning_rate": 7.610487558632278e-07, |
| "logits/chosen": -1.0633192171048695, |
| "logits/rejected": -1.1078102349589132, |
| "logps/chosen": -656.4, |
| "logps/rejected": -1050.0, |
| "loss": 0.4255, |
| "mean_token_accuracy": 0.693815928697586, |
| "num_tokens": 25901754.0, |
| "rewards/accuracies": 0.85625, |
| "rewards/chosen": -5.947020316123963, |
| "rewards/margins": 4.109345865249634, |
| "rewards/rejected": -10.056366229057312, |
| "setc/cal_net_lr": 0.00029690382640003857, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.07734486311674119, |
| "setc/logratio_margin_delta": 20.284591674804688, |
| "setc/logratio_margin_vanilla": 390.65, |
| "setc/logratio_margin_weighted": 410.93459167480466, |
| "step": 730 |
| }, |
| { |
| "entropy": 0.44501953125, |
| "epoch": 0.3988143357585556, |
| "grad_norm": 73.25760650634766, |
| "learning_rate": 7.529808244017382e-07, |
| "logits/chosen": -1.0208730154549697, |
| "logits/rejected": -1.055008389785436, |
| "logps/chosen": -627.0, |
| "logps/rejected": -961.0, |
| "loss": 0.4673, |
| "mean_token_accuracy": 0.70859514772892, |
| "num_tokens": 26254733.0, |
| "rewards/accuracies": 0.865625, |
| "rewards/chosen": -5.5865224480628966, |
| "rewards/margins": 3.537639796733856, |
| "rewards/rejected": -9.124162173271179, |
| "setc/cal_net_lr": 0.00027971996019343084, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.07454265281558037, |
| "setc/logratio_margin_delta": 17.0139892578125, |
| "setc/logratio_margin_vanilla": 336.75, |
| "setc/logratio_margin_weighted": 353.7639892578125, |
| "step": 740 |
| }, |
| { |
| "entropy": 0.45361328125, |
| "epoch": 0.4042037186742118, |
| "grad_norm": 94.29061126708984, |
| "learning_rate": 7.448233685031693e-07, |
| "logits/chosen": -1.0630623814562947, |
| "logits/rejected": -1.1073087245931403, |
| "logps/chosen": -550.3, |
| "logps/rejected": -882.2, |
| "loss": 0.3111, |
| "mean_token_accuracy": 0.7219261229038239, |
| "num_tokens": 26587461.0, |
| "rewards/accuracies": 0.89375, |
| "rewards/chosen": -4.7769605159759525, |
| "rewards/margins": 3.535008490085602, |
| "rewards/rejected": -8.311969017982483, |
| "setc/cal_net_lr": 0.00026285316907138917, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.07642085328698159, |
| "setc/logratio_margin_delta": 19.450860595703126, |
| "setc/logratio_margin_vanilla": 334.05, |
| "setc/logratio_margin_weighted": 353.5008605957031, |
| "step": 750 |
| }, |
| { |
| "entropy": 0.40673828125, |
| "epoch": 0.40959310158986795, |
| "grad_norm": 35.175498962402344, |
| "learning_rate": 7.365792749145662e-07, |
| "logits/chosen": -1.0218951494827677, |
| "logits/rejected": -1.048595356950924, |
| "logps/chosen": -488.2, |
| "logps/rejected": -758.2, |
| "loss": 0.37, |
| "mean_token_accuracy": 0.73828344643116, |
| "num_tokens": 26953136.0, |
| "rewards/accuracies": 0.871875, |
| "rewards/chosen": -4.2265440940856935, |
| "rewards/margins": 2.8608301758766173, |
| "rewards/rejected": -7.08737428188324, |
| "setc/cal_net_lr": 0.00024632773139897124, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.07358978651463985, |
| "setc/logratio_margin_delta": 16.033021926879883, |
| "setc/logratio_margin_vanilla": 270.05, |
| "setc/logratio_margin_weighted": 286.0830219268799, |
| "step": 760 |
| }, |
| { |
| "entropy": 0.40654296875, |
| "epoch": 0.4149824845055241, |
| "grad_norm": 37.247169494628906, |
| "learning_rate": 7.28251461042177e-07, |
| "logits/chosen": -1.0070175249471507, |
| "logits/rejected": -1.0310765859216962, |
| "logps/chosen": -496.5, |
| "logps/rejected": -771.8, |
| "loss": 0.37, |
| "mean_token_accuracy": 0.7500552415847779, |
| "num_tokens": 27312799.0, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": -4.265070939064026, |
| "rewards/margins": 2.9076979637145994, |
| "rewards/rejected": -7.172768950462341, |
| "setc/cal_net_lr": 0.00023016743419088836, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.07328317761421203, |
| "setc/logratio_margin_delta": 18.319805145263672, |
| "setc/logratio_margin_vanilla": 272.45, |
| "setc/logratio_margin_weighted": 290.76980514526366, |
| "step": 770 |
| }, |
| { |
| "entropy": 0.43095703125, |
| "epoch": 0.42037186742118027, |
| "grad_norm": 19.564472198486328, |
| "learning_rate": 7.198428739190457e-07, |
| "logits/chosen": -1.082859984019952, |
| "logits/rejected": -1.0929271185391367, |
| "logps/chosen": -557.8, |
| "logps/rejected": -832.2, |
| "loss": 0.3455, |
| "mean_token_accuracy": 0.7363463073968888, |
| "num_tokens": 27667656.0, |
| "rewards/accuracies": 0.86875, |
| "rewards/chosen": -4.847213423252105, |
| "rewards/margins": 2.919535148143768, |
| "rewards/rejected": -7.766748380661011, |
| "setc/cal_net_lr": 0.00021439553887204476, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.07149026319384574, |
| "setc/logratio_margin_delta": 16.803517150878907, |
| "setc/logratio_margin_vanilla": 275.15, |
| "setc/logratio_margin_weighted": 291.9535171508789, |
| "step": 780 |
| }, |
| { |
| "entropy": 0.428515625, |
| "epoch": 0.4257612503368364, |
| "grad_norm": 24.025815963745117, |
| "learning_rate": 7.113564891621212e-07, |
| "logits/chosen": -0.9810840312218222, |
| "logits/rejected": -1.0128754190678195, |
| "logps/chosen": -626.1, |
| "logps/rejected": -965.2, |
| "loss": 0.3468, |
| "mean_token_accuracy": 0.729249706864357, |
| "num_tokens": 28037217.0, |
| "rewards/accuracies": 0.86875, |
| "rewards/chosen": -5.643828749656677, |
| "rewards/margins": 3.574371063709259, |
| "rewards/rejected": -9.218199872970581, |
| "setc/cal_net_lr": 0.00019903474779462064, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.07268696911633014, |
| "setc/logratio_margin_delta": 19.687115478515626, |
| "setc/logratio_margin_vanilla": 337.75, |
| "setc/logratio_margin_weighted": 357.43711547851564, |
| "step": 790 |
| }, |
| { |
| "entropy": 0.4359375, |
| "epoch": 0.4311506332524926, |
| "grad_norm": 26.062593460083008, |
| "learning_rate": 7.027953099192509e-07, |
| "logits/chosen": -1.037471741150463, |
| "logits/rejected": -1.0548466740521376, |
| "logps/chosen": -552.5, |
| "logps/rejected": -859.8, |
| "loss": 0.2958, |
| "mean_token_accuracy": 0.7349712908267975, |
| "num_tokens": 28389736.0, |
| "rewards/accuracies": 0.896875, |
| "rewards/chosen": -4.908501935005188, |
| "rewards/margins": 3.250082492828369, |
| "rewards/rejected": -8.15858452320099, |
| "setc/cal_net_lr": 0.0001841071715598952, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.0715821348130703, |
| "setc/logratio_margin_delta": 19.408258819580077, |
| "setc/logratio_margin_vanilla": 305.6, |
| "setc/logratio_margin_weighted": 325.0082588195801, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.4311506332524926, |
| "eval_entropy": 0.437255859375, |
| "eval_logits/chosen": -1.0900919600946075, |
| "eval_logits/rejected": -1.1059430813034792, |
| "eval_logps/chosen": -572.875, |
| "eval_logps/rejected": -897.125, |
| "eval_loss": 0.29659560322761536, |
| "eval_mean_token_accuracy": 0.7379258405417204, |
| "eval_num_tokens": 28389736.0, |
| "eval_rewards/accuracies": 0.890625, |
| "eval_rewards/chosen": -5.015224754810333, |
| "eval_rewards/margins": 3.451749622821808, |
| "eval_rewards/rejected": -8.466974467039108, |
| "eval_runtime": 14.2077, |
| "eval_samples_per_second": 35.192, |
| "eval_setc/cal_net_lr": 0.00017643152107359864, |
| "eval_setc/credit_mean": 1.0, |
| "eval_setc/credit_std": 0.07336517237126827, |
| "eval_setc/logratio_margin_delta": 19.209267139434814, |
| "eval_setc/logratio_margin_vanilla": 328.59375, |
| "eval_setc/logratio_margin_weighted": 347.8030171394348, |
| "eval_steps_per_second": 2.252, |
| "step": 800 |
| }, |
| { |
| "entropy": 0.43359375, |
| "epoch": 0.43654001616814875, |
| "grad_norm": 70.45535278320312, |
| "learning_rate": 6.941623658064312e-07, |
| "logits/chosen": -1.0733893563106265, |
| "logits/rejected": -1.0982346659335882, |
| "logps/chosen": -633.1, |
| "logps/rejected": -941.0, |
| "loss": 0.3849, |
| "mean_token_accuracy": 0.729145985841751, |
| "num_tokens": 28733805.0, |
| "rewards/accuracies": 0.834375, |
| "rewards/chosen": -5.708724451065064, |
| "rewards/margins": 3.248158019781113, |
| "rewards/rejected": -8.956882286071778, |
| "setc/cal_net_lr": 0.00016963429719184532, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.07415341325104237, |
| "setc/logratio_margin_delta": 17.66580581665039, |
| "setc/logratio_margin_vanilla": 307.15, |
| "setc/logratio_margin_weighted": 324.81580581665037, |
| "step": 810 |
| }, |
| { |
| "entropy": 0.4302734375, |
| "epoch": 0.4419293990838049, |
| "grad_norm": 40.56505584716797, |
| "learning_rate": 6.854607118356928e-07, |
| "logits/chosen": -1.1385733399006264, |
| "logits/rejected": -1.183113374768871, |
| "logps/chosen": -566.1, |
| "logps/rejected": -896.6, |
| "loss": 0.3152, |
| "mean_token_accuracy": 0.7246308416128159, |
| "num_tokens": 29076104.0, |
| "rewards/accuracies": 0.859375, |
| "rewards/chosen": -5.08197500705719, |
| "rewards/margins": 3.4515433311462402, |
| "rewards/rejected": -8.53351833820343, |
| "setc/cal_net_lr": 0.0001556369572083343, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.07320533730089665, |
| "setc/logratio_margin_delta": 18.154342651367188, |
| "setc/logratio_margin_vanilla": 327.0, |
| "setc/logratio_margin_weighted": 345.1543426513672, |
| "step": 820 |
| }, |
| { |
| "entropy": 0.43427734375, |
| "epoch": 0.44731878199946107, |
| "grad_norm": 56.18523025512695, |
| "learning_rate": 6.766934273339973e-07, |
| "logits/chosen": -1.1405602822344687, |
| "logits/rejected": -1.1867411468308247, |
| "logps/chosen": -557.2, |
| "logps/rejected": -863.8, |
| "loss": 0.2993, |
| "mean_token_accuracy": 0.7315610319375991, |
| "num_tokens": 29438936.0, |
| "rewards/accuracies": 0.88125, |
| "rewards/chosen": -4.800099658966064, |
| "rewards/margins": 3.2655778348445894, |
| "rewards/rejected": -8.065677428245545, |
| "setc/cal_net_lr": 0.00014213529963440723, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.07172517664730549, |
| "setc/logratio_margin_delta": 18.5077823638916, |
| "setc/logratio_margin_vanilla": 308.05, |
| "setc/logratio_margin_weighted": 326.5577823638916, |
| "step": 830 |
| }, |
| { |
| "entropy": 0.4123046875, |
| "epoch": 0.45270816491511723, |
| "grad_norm": 27.114397048950195, |
| "learning_rate": 6.678636148535307e-07, |
| "logits/chosen": -1.1366168963090462, |
| "logits/rejected": -1.173395336965087, |
| "logps/chosen": -496.6, |
| "logps/rejected": -803.6, |
| "loss": 0.3137, |
| "mean_token_accuracy": 0.7537930309772491, |
| "num_tokens": 29818328.0, |
| "rewards/accuracies": 0.871875, |
| "rewards/chosen": -4.169769895076752, |
| "rewards/margins": 3.23821382522583, |
| "rewards/rejected": -7.407983732223511, |
| "setc/cal_net_lr": 0.0001291487590008584, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.07007276304066182, |
| "setc/logratio_margin_delta": 19.77139205932617, |
| "setc/logratio_margin_vanilla": 304.05, |
| "setc/logratio_margin_weighted": 323.8213920593262, |
| "step": 840 |
| }, |
| { |
| "entropy": 0.4216796875, |
| "epoch": 0.4580975478307734, |
| "grad_norm": 41.05332946777344, |
| "learning_rate": 6.58974399073777e-07, |
| "logits/chosen": -1.1544986734058926, |
| "logits/rejected": -1.1544554202476698, |
| "logps/chosen": -538.0, |
| "logps/rejected": -871.0, |
| "loss": 0.2942, |
| "mean_token_accuracy": 0.736896401643753, |
| "num_tokens": 30188501.0, |
| "rewards/accuracies": 0.8875, |
| "rewards/chosen": -4.634168601036071, |
| "rewards/margins": 3.525930380821228, |
| "rewards/rejected": -8.160099053382874, |
| "setc/cal_net_lr": 0.00011669602836981566, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.07121242731809616, |
| "setc/logratio_margin_delta": 21.643047332763672, |
| "setc/logratio_margin_vanilla": 330.95, |
| "setc/logratio_margin_weighted": 352.59304733276366, |
| "step": 850 |
| }, |
| { |
| "entropy": 0.4259765625, |
| "epoch": 0.46348693074642955, |
| "grad_norm": 64.55622100830078, |
| "learning_rate": 6.500289256957615e-07, |
| "logits/chosen": -1.1011358195249479, |
| "logits/rejected": -1.1499861979740114, |
| "logps/chosen": -573.8, |
| "logps/rejected": -874.0, |
| "loss": 0.4537, |
| "mean_token_accuracy": 0.7331288278102874, |
| "num_tokens": 30547224.0, |
| "rewards/accuracies": 0.803125, |
| "rewards/chosen": -5.030296123027801, |
| "rewards/margins": 3.2068679571151733, |
| "rewards/rejected": -8.237163949012757, |
| "setc/cal_net_lr": 0.00010479503242760742, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.0720590002834797, |
| "setc/logratio_margin_delta": 18.836805725097655, |
| "setc/logratio_margin_vanilla": 301.85, |
| "setc/logratio_margin_weighted": 320.68680572509766, |
| "step": 860 |
| }, |
| { |
| "entropy": 0.43505859375, |
| "epoch": 0.4688763136620857, |
| "grad_norm": 39.62507247924805, |
| "learning_rate": 6.410303603288561e-07, |
| "logits/chosen": -1.088289906345851, |
| "logits/rejected": -1.0841832767082082, |
| "logps/chosen": -404.8, |
| "logps/rejected": -620.1, |
| "loss": 0.3673, |
| "mean_token_accuracy": 0.7633604764938354, |
| "num_tokens": 30902393.0, |
| "rewards/accuracies": 0.8625, |
| "rewards/chosen": -3.1859230756759644, |
| "rewards/margins": 2.2959958791732786, |
| "rewards/rejected": -5.481918931007385, |
| "setc/cal_net_lr": 9.346290168364385e-05, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.06972141042351723, |
| "setc/logratio_margin_delta": 14.549591827392579, |
| "setc/logratio_margin_vanilla": 215.05, |
| "setc/logratio_margin_weighted": 229.59959182739257, |
| "step": 870 |
| }, |
| { |
| "entropy": 0.41787109375, |
| "epoch": 0.47426569657774187, |
| "grad_norm": 33.14332962036133, |
| "learning_rate": 6.319818873705377e-07, |
| "logits/chosen": -1.089281081386084, |
| "logits/rejected": -1.114244560013979, |
| "logps/chosen": -462.6, |
| "logps/rejected": -784.6, |
| "loss": 0.3062, |
| "mean_token_accuracy": 0.7454081952571869, |
| "num_tokens": 31235617.0, |
| "rewards/accuracies": 0.896875, |
| "rewards/chosen": -3.9631998538970947, |
| "rewards/margins": 3.428285652399063, |
| "rewards/rejected": -7.391485595703125, |
| "setc/cal_net_lr": 8.271594781245119e-05, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.07049608752131462, |
| "setc/logratio_margin_delta": 20.25357093811035, |
| "setc/logratio_margin_vanilla": 322.575, |
| "setc/logratio_margin_weighted": 342.8285709381104, |
| "step": 880 |
| }, |
| { |
| "entropy": 0.4427734375, |
| "epoch": 0.47965507949339803, |
| "grad_norm": 25.35371208190918, |
| "learning_rate": 6.228867088794997e-07, |
| "logits/chosen": -1.117235813203115, |
| "logits/rejected": -1.1379867493894777, |
| "logps/chosen": -611.3, |
| "logps/rejected": -992.4, |
| "loss": 0.3308, |
| "mean_token_accuracy": 0.7231000691652298, |
| "num_tokens": 31611665.0, |
| "rewards/accuracies": 0.85625, |
| "rewards/chosen": -5.4193216323852536, |
| "rewards/margins": 4.017894721031189, |
| "rewards/rejected": -9.437216472625732, |
| "setc/cal_net_lr": 7.256964017435153e-05, |
| "setc/credit_mean": 0.9999999970197677, |
| "setc/credit_std": 0.0703140676021576, |
| "setc/logratio_margin_delta": 22.489488220214845, |
| "setc/logratio_margin_vanilla": 379.3, |
| "setc/logratio_margin_weighted": 401.7894882202148, |
| "step": 890 |
| }, |
| { |
| "entropy": 0.425390625, |
| "epoch": 0.4850444624090542, |
| "grad_norm": 21.312118530273438, |
| "learning_rate": 6.137480434425124e-07, |
| "logits/chosen": -1.063890301715667, |
| "logits/rejected": -1.1046153500158946, |
| "logps/chosen": -502.7, |
| "logps/rejected": -788.2, |
| "loss": 0.3396, |
| "mean_token_accuracy": 0.7333939164876938, |
| "num_tokens": 31947911.0, |
| "rewards/accuracies": 0.853125, |
| "rewards/chosen": -4.411977684497833, |
| "rewards/margins": 2.9957467019557953, |
| "rewards/rejected": -7.407724452018738, |
| "setc/cal_net_lr": 6.303858354858501e-05, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.06882553435862064, |
| "setc/logratio_margin_delta": 16.82468185424805, |
| "setc/logratio_margin_vanilla": 282.75, |
| "setc/logratio_margin_weighted": 299.57468185424807, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.4850444624090542, |
| "eval_entropy": 0.458984375, |
| "eval_logits/chosen": -1.136106022790833, |
| "eval_logits/rejected": -1.1553218335773052, |
| "eval_logps/chosen": -567.9375, |
| "eval_logps/rejected": -919.125, |
| "eval_loss": 0.2798229157924652, |
| "eval_mean_token_accuracy": 0.7211284190416336, |
| "eval_num_tokens": 31947911.0, |
| "eval_rewards/accuracies": 0.888671875, |
| "eval_rewards/chosen": -4.92750097066164, |
| "eval_rewards/margins": 3.7332901805639267, |
| "eval_rewards/rejected": -8.660791024565697, |
| "eval_runtime": 14.2045, |
| "eval_samples_per_second": 35.2, |
| "eval_setc/cal_net_lr": 5.825929969209536e-05, |
| "eval_setc/credit_mean": 1.0, |
| "eval_setc/credit_std": 0.07130194280762225, |
| "eval_setc/logratio_margin_delta": 20.13284683227539, |
| "eval_setc/logratio_margin_vanilla": 354.25, |
| "eval_setc/logratio_margin_weighted": 374.3828468322754, |
| "eval_steps_per_second": 2.253, |
| "step": 900 |
| }, |
| { |
| "entropy": 0.4380859375, |
| "epoch": 0.4904338453247103, |
| "grad_norm": 30.84346580505371, |
| "learning_rate": 6.045691250354349e-07, |
| "logits/chosen": -1.0498209270589363, |
| "logits/rejected": -1.07947071859647, |
| "logps/chosen": -607.5, |
| "logps/rejected": -951.4, |
| "loss": 0.3157, |
| "mean_token_accuracy": 0.7191443383693695, |
| "num_tokens": 32308377.0, |
| "rewards/accuracies": 0.859375, |
| "rewards/chosen": -5.468822598457336, |
| "rewards/margins": 3.641323584318161, |
| "rewards/rejected": -9.110146307945252, |
| "setc/cal_net_lr": 5.413649711092565e-05, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.07014792338013649, |
| "setc/logratio_margin_delta": 21.932370758056642, |
| "setc/logratio_margin_vanilla": 342.2, |
| "setc/logratio_margin_weighted": 364.13237075805665, |
| "step": 910 |
| }, |
| { |
| "entropy": 0.43798828125, |
| "epoch": 0.49582322824036645, |
| "grad_norm": 69.57173919677734, |
| "learning_rate": 5.953532018787807e-07, |
| "logits/chosen": -1.0614989872760174, |
| "logits/rejected": -1.1125016970713177, |
| "logps/chosen": -562.6, |
| "logps/rejected": -910.6, |
| "loss": 0.3801, |
| "mean_token_accuracy": 0.7194907575845718, |
| "num_tokens": 32667657.0, |
| "rewards/accuracies": 0.834375, |
| "rewards/chosen": -5.013018798828125, |
| "rewards/margins": 3.655552077293396, |
| "rewards/rejected": -8.668570947647094, |
| "setc/cal_net_lr": 4.587619468605094e-05, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.07026223614811897, |
| "setc/logratio_margin_delta": 19.905216217041016, |
| "setc/logratio_margin_vanilla": 345.65, |
| "setc/logratio_margin_weighted": 365.555216217041, |
| "step": 920 |
| }, |
| { |
| "entropy": 0.43251953125, |
| "epoch": 0.5012126111560227, |
| "grad_norm": 37.31387710571289, |
| "learning_rate": 5.861035352882434e-07, |
| "logits/chosen": -1.1028243420067865, |
| "logits/rejected": -1.1239636597884728, |
| "logps/chosen": -668.2, |
| "logps/rejected": -1051.8, |
| "loss": 0.3082, |
| "mean_token_accuracy": 0.7101206243038177, |
| "num_tokens": 33038205.0, |
| "rewards/accuracies": 0.871875, |
| "rewards/chosen": -6.0299333214759825, |
| "rewards/margins": 4.101047283411026, |
| "rewards/rejected": -10.130980563163757, |
| "setc/cal_net_lr": 3.826956630309029e-05, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.07096938453614712, |
| "setc/logratio_margin_delta": 25.654744720458986, |
| "setc/logratio_margin_vanilla": 384.45, |
| "setc/logratio_margin_weighted": 410.104744720459, |
| "step": 930 |
| }, |
| { |
| "entropy": 0.43154296875, |
| "epoch": 0.5066019940716788, |
| "grad_norm": 31.79937744140625, |
| "learning_rate": 5.768233985205871e-07, |
| "logits/chosen": -1.0400086846324763, |
| "logits/rejected": -1.0826617979469901, |
| "logps/chosen": -603.6, |
| "logps/rejected": -1010.8, |
| "loss": 0.3537, |
| "mean_token_accuracy": 0.7205777823925018, |
| "num_tokens": 33412806.0, |
| "rewards/accuracies": 0.8375, |
| "rewards/chosen": -5.440573036670685, |
| "rewards/margins": 4.329031145572662, |
| "rewards/rejected": -9.769603991508484, |
| "setc/cal_net_lr": 3.1327561080901155e-05, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.07101482562720776, |
| "setc/logratio_margin_delta": 24.953127288818358, |
| "setc/logratio_margin_vanilla": 407.95, |
| "setc/logratio_margin_weighted": 432.9031272888184, |
| "step": 940 |
| }, |
| { |
| "entropy": 0.44111328125, |
| "epoch": 0.511991376987335, |
| "grad_norm": 41.023216247558594, |
| "learning_rate": 5.675160756153119e-07, |
| "logits/chosen": -1.063226666319973, |
| "logits/rejected": -1.103878181164049, |
| "logps/chosen": -572.8, |
| "logps/rejected": -937.8, |
| "loss": 0.2939, |
| "mean_token_accuracy": 0.7170851469039917, |
| "num_tokens": 33771494.0, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -5.1082786321640015, |
| "rewards/margins": 3.9003884315490724, |
| "rewards/rejected": -9.0086669921875, |
| "setc/cal_net_lr": 2.5060171467709125e-05, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.0710456695407629, |
| "setc/logratio_margin_delta": 24.788859558105468, |
| "setc/logratio_margin_vanilla": 365.25, |
| "setc/logratio_margin_weighted": 390.03885955810546, |
| "step": 950 |
| }, |
| { |
| "entropy": 0.44267578125, |
| "epoch": 0.5173807599029911, |
| "grad_norm": 52.85020446777344, |
| "learning_rate": 5.581848602325041e-07, |
| "logits/chosen": -1.0075269843195531, |
| "logits/rejected": -1.0690200247116604, |
| "logps/chosen": -536.4, |
| "logps/rejected": -866.6, |
| "loss": 0.3122, |
| "mean_token_accuracy": 0.7294569611549377, |
| "num_tokens": 34132231.0, |
| "rewards/accuracies": 0.878125, |
| "rewards/chosen": -4.7246493101119995, |
| "rewards/margins": 3.508440887928009, |
| "rewards/rejected": -8.233090209960938, |
| "setc/cal_net_lr": 1.9476418857796457e-05, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.07001486346125603, |
| "setc/logratio_margin_delta": 22.394107818603516, |
| "setc/logratio_margin_vanilla": 328.45, |
| "setc/logratio_margin_weighted": 350.8441078186035, |
| "step": 960 |
| }, |
| { |
| "entropy": 0.4556640625, |
| "epoch": 0.5227701428186473, |
| "grad_norm": 22.979949951171875, |
| "learning_rate": 5.488330544872797e-07, |
| "logits/chosen": -1.0871756321135606, |
| "logits/rejected": -1.1191254896199978, |
| "logps/chosen": -508.9, |
| "logps/rejected": -850.4, |
| "loss": 0.3046, |
| "mean_token_accuracy": 0.7397548407316208, |
| "num_tokens": 34502355.0, |
| "rewards/accuracies": 0.859375, |
| "rewards/chosen": -4.326072335243225, |
| "rewards/margins": 3.6845016717910766, |
| "rewards/rejected": -8.010574007034302, |
| "setc/cal_net_lr": 1.4584340605943599e-05, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.07133265994489194, |
| "setc/logratio_margin_delta": 22.850178527832032, |
| "setc/logratio_margin_vanilla": 345.6, |
| "setc/logratio_margin_weighted": 368.450178527832, |
| "step": 970 |
| }, |
| { |
| "entropy": 0.43857421875, |
| "epoch": 0.5281595257343035, |
| "grad_norm": 25.516042709350586, |
| "learning_rate": 5.394639677812387e-07, |
| "logits/chosen": -1.088331639351806, |
| "logits/rejected": -1.134811225998461, |
| "logps/chosen": -455.4, |
| "logps/rejected": -754.1, |
| "loss": 0.3234, |
| "mean_token_accuracy": 0.7478211104869843, |
| "num_tokens": 34852059.0, |
| "rewards/accuracies": 0.871875, |
| "rewards/chosen": -3.8503175020217895, |
| "rewards/margins": 3.19860480427742, |
| "rewards/rejected": -7.048922348022461, |
| "setc/cal_net_lr": 1.0390978458315019e-05, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.07125458978116513, |
| "setc/logratio_margin_delta": 19.160488891601563, |
| "setc/logratio_margin_vanilla": 300.7, |
| "setc/logratio_margin_weighted": 319.86048889160156, |
| "step": 980 |
| }, |
| { |
| "entropy": 0.41787109375, |
| "epoch": 0.5335489086499596, |
| "grad_norm": 22.045780181884766, |
| "learning_rate": 5.300809156313389e-07, |
| "logits/chosen": -1.1293688528513106, |
| "logits/rejected": -1.1482995959404332, |
| "logps/chosen": -498.4, |
| "logps/rejected": -795.4, |
| "loss": 0.3843, |
| "mean_token_accuracy": 0.7528087586164475, |
| "num_tokens": 35212012.0, |
| "rewards/accuracies": 0.86875, |
| "rewards/chosen": -4.3080038785934445, |
| "rewards/margins": 3.206386703252792, |
| "rewards/rejected": -7.514390516281128, |
| "setc/cal_net_lr": 6.902368416441938e-06, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.07167959362268447, |
| "setc/logratio_margin_delta": 22.038671493530273, |
| "setc/logratio_margin_vanilla": 298.6, |
| "setc/logratio_margin_weighted": 320.6386714935303, |
| "step": 990 |
| }, |
| { |
| "entropy": 0.4220703125, |
| "epoch": 0.5389382915656158, |
| "grad_norm": 29.74481773376465, |
| "learning_rate": 5.206872184966065e-07, |
| "logits/chosen": -1.2005332112522926, |
| "logits/rejected": -1.223600991098159, |
| "logps/chosen": -475.5, |
| "logps/rejected": -771.0, |
| "loss": 0.3032, |
| "mean_token_accuracy": 0.7501377999782562, |
| "num_tokens": 35559559.0, |
| "rewards/accuracies": 0.884375, |
| "rewards/chosen": -4.051128447055817, |
| "rewards/margins": 3.163189709186554, |
| "rewards/rejected": -7.214318156242371, |
| "setc/cal_net_lr": 4.123532048892359e-06, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.07157905176281928, |
| "setc/logratio_margin_delta": 21.718975830078126, |
| "setc/logratio_margin_vanilla": 294.6, |
| "setc/logratio_margin_weighted": 316.3189758300781, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.5389382915656158, |
| "eval_entropy": 0.42791748046875, |
| "eval_logits/chosen": -1.205733974360692, |
| "eval_logits/rejected": -1.2256254141801155, |
| "eval_logps/chosen": -520.25, |
| "eval_logps/rejected": -881.125, |
| "eval_loss": 0.26266777515411377, |
| "eval_mean_token_accuracy": 0.7448298633098602, |
| "eval_num_tokens": 35559559.0, |
| "eval_rewards/accuracies": 0.912109375, |
| "eval_rewards/chosen": -4.464685194194317, |
| "eval_rewards/margins": 3.8784405440092087, |
| "eval_rewards/rejected": -8.343125775456429, |
| "eval_runtime": 14.2575, |
| "eval_samples_per_second": 35.069, |
| "eval_setc/cal_net_lr": 2.9204217399144684e-06, |
| "eval_setc/credit_mean": 1.0, |
| "eval_setc/credit_std": 0.07189792324788868, |
| "eval_setc/logratio_margin_delta": 25.44654369354248, |
| "eval_setc/logratio_margin_vanilla": 364.4375, |
| "eval_setc/logratio_margin_weighted": 389.8840436935425, |
| "eval_steps_per_second": 2.244, |
| "step": 1000 |
| }, |
| { |
| "entropy": 0.4033203125, |
| "epoch": 0.5443276744812718, |
| "grad_norm": 84.13672637939453, |
| "learning_rate": 5.11286200603097e-07, |
| "logits/chosen": -1.1744355391727033, |
| "logits/rejected": -1.1982522802884472, |
| "logps/chosen": -589.9, |
| "logps/rejected": -966.7, |
| "loss": 0.3591, |
| "mean_token_accuracy": 0.7260809451341629, |
| "num_tokens": 35914588.0, |
| "rewards/accuracies": 0.86875, |
| "rewards/chosen": -5.367753648757935, |
| "rewards/margins": 4.012299507856369, |
| "rewards/rejected": -9.380053186416626, |
| "setc/cal_net_lr": 2.058469263134096e-06, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.07252811081707478, |
| "setc/logratio_margin_delta": 27.179954528808594, |
| "setc/logratio_margin_vanilla": 374.05, |
| "setc/logratio_margin_weighted": 401.2299545288086, |
| "step": 1010 |
| }, |
| { |
| "entropy": 0.41494140625, |
| "epoch": 0.549717057396928, |
| "grad_norm": 45.3025016784668, |
| "learning_rate": 5.018811887675243e-07, |
| "logits/chosen": -1.2095668156933808, |
| "logits/rejected": -1.2754331190012376, |
| "logps/chosen": -638.7, |
| "logps/rejected": -1027.8, |
| "loss": 0.2909, |
| "mean_token_accuracy": 0.7233078300952911, |
| "num_tokens": 36272220.0, |
| "rewards/accuracies": 0.9, |
| "rewards/chosen": -5.741470265388489, |
| "rewards/margins": 4.172668009996414, |
| "rewards/rejected": -9.914138317108154, |
| "setc/cal_net_lr": 7.101525479954687e-07, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.07240922413766385, |
| "setc/logratio_margin_delta": 29.066813659667968, |
| "setc/logratio_margin_vanilla": 388.2, |
| "setc/logratio_margin_weighted": 417.26681365966795, |
| "step": 1020 |
| }, |
| { |
| "entropy": 0.41572265625, |
| "epoch": 0.5551064403125842, |
| "grad_norm": 54.05473327636719, |
| "learning_rate": 4.924755112199719e-07, |
| "logits/chosen": -1.1848740348447429, |
| "logits/rejected": -1.2239745932740136, |
| "logps/chosen": -530.6, |
| "logps/rejected": -873.4, |
| "loss": 0.3204, |
| "mean_token_accuracy": 0.737355038523674, |
| "num_tokens": 36608348.0, |
| "rewards/accuracies": 0.846875, |
| "rewards/chosen": -4.719290387630463, |
| "rewards/margins": 3.6306647956371307, |
| "rewards/rejected": -8.349955177307129, |
| "setc/cal_net_lr": 8.052269501096502e-08, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.07150659598410129, |
| "setc/logratio_margin_delta": 23.716495513916016, |
| "setc/logratio_margin_vanilla": 339.35, |
| "setc/logratio_margin_weighted": 363.06649551391604, |
| "step": 1030 |
| }, |
| { |
| "entropy": 0.40263671875, |
| "epoch": 0.5604958232282403, |
| "grad_norm": 51.89723587036133, |
| "learning_rate": 4.830724964261044e-07, |
| "logits/chosen": -1.2390042148153568, |
| "logits/rejected": -1.2885369769102084, |
| "logps/chosen": -552.1, |
| "logps/rejected": -873.2, |
| "loss": 0.3291, |
| "mean_token_accuracy": 0.7273021250963211, |
| "num_tokens": 36931299.0, |
| "rewards/accuracies": 0.8875, |
| "rewards/chosen": -4.9631112813949585, |
| "rewards/margins": 3.396042114496231, |
| "rewards/rejected": -8.359153366088867, |
| "setc/cal_net_lr": 1.704860048107271e-07, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.0706510066986084, |
| "setc/logratio_margin_delta": 24.204216766357423, |
| "setc/logratio_margin_vanilla": 315.4, |
| "setc/logratio_margin_weighted": 339.6042167663574, |
| "step": 1040 |
| }, |
| { |
| "entropy": 0.42197265625, |
| "epoch": 0.5658852061438965, |
| "grad_norm": 108.92530822753906, |
| "learning_rate": 4.736754719092948e-07, |
| "logits/chosen": -1.2478903284456264, |
| "logits/rejected": -1.3215152839167994, |
| "logps/chosen": -660.8, |
| "logps/rejected": -1058.0, |
| "loss": 0.317, |
| "mean_token_accuracy": 0.7129073649644851, |
| "num_tokens": 37295844.0, |
| "rewards/accuracies": 0.8875, |
| "rewards/chosen": -5.961016845703125, |
| "rewards/margins": 4.262525498867035, |
| "rewards/rejected": -10.223542261123658, |
| "setc/cal_net_lr": 9.799129825749437e-07, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.0731208048760891, |
| "setc/logratio_margin_delta": 28.65255813598633, |
| "setc/logratio_margin_vanilla": 397.6, |
| "setc/logratio_margin_weighted": 426.2525581359863, |
| "step": 1050 |
| }, |
| { |
| "entropy": 0.40234375, |
| "epoch": 0.5712745890595526, |
| "grad_norm": 23.12958335876465, |
| "learning_rate": 4.6428776307308795e-07, |
| "logits/chosen": -1.2329498912657004, |
| "logits/rejected": -1.247452417260519, |
| "logps/chosen": -669.1, |
| "logps/rejected": -1018.6, |
| "loss": 0.3105, |
| "mean_token_accuracy": 0.7226340204477311, |
| "num_tokens": 37665756.0, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -6.078487741947174, |
| "rewards/margins": 3.7634204626083374, |
| "rewards/rejected": -9.841908311843872, |
| "setc/cal_net_lr": 2.5076385244310457e-06, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.07050310261547565, |
| "setc/logratio_margin_delta": 26.592060089111328, |
| "setc/logratio_margin_vanilla": 349.75, |
| "setc/logratio_margin_weighted": 376.3420600891113, |
| "step": 1060 |
| }, |
| { |
| "entropy": 0.42890625, |
| "epoch": 0.5766639719752088, |
| "grad_norm": 54.92173385620117, |
| "learning_rate": 4.5491269202441044e-07, |
| "logits/chosen": -1.2268895919032963, |
| "logits/rejected": -1.2605704569389382, |
| "logps/chosen": -601.6, |
| "logps/rejected": -936.8, |
| "loss": 0.2924, |
| "mean_token_accuracy": 0.7248776346445084, |
| "num_tokens": 38031467.0, |
| "rewards/accuracies": 0.871875, |
| "rewards/chosen": -5.383153641223908, |
| "rewards/margins": 3.53509316444397, |
| "rewards/rejected": -8.918246793746949, |
| "setc/cal_net_lr": 4.751463594525389e-06, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.07096486799418926, |
| "setc/logratio_margin_delta": 21.009319305419922, |
| "setc/logratio_margin_vanilla": 332.5, |
| "setc/logratio_margin_weighted": 353.5093193054199, |
| "step": 1070 |
| }, |
| { |
| "entropy": 0.43212890625, |
| "epoch": 0.582053354890865, |
| "grad_norm": 29.253252029418945, |
| "learning_rate": 4.455535763979489e-07, |
| "logits/chosen": -1.2967908691930823, |
| "logits/rejected": -1.334540072490827, |
| "logps/chosen": -673.7, |
| "logps/rejected": -1018.8, |
| "loss": 0.276, |
| "mean_token_accuracy": 0.7103154689073563, |
| "num_tokens": 38406113.0, |
| "rewards/accuracies": 0.8875, |
| "rewards/chosen": -6.113679957389832, |
| "rewards/margins": 3.689132344722748, |
| "rewards/rejected": -9.802812147140504, |
| "setc/cal_net_lr": 7.708158390355337e-06, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.07218135371804238, |
| "setc/logratio_margin_delta": 23.763233947753907, |
| "setc/logratio_margin_vanilla": 345.15, |
| "setc/logratio_margin_weighted": 368.9132339477539, |
| "step": 1080 |
| }, |
| { |
| "entropy": 0.43740234375, |
| "epoch": 0.5874427378065211, |
| "grad_norm": 63.62852096557617, |
| "learning_rate": 4.3621372818211233e-07, |
| "logits/chosen": -1.2966434675325627, |
| "logits/rejected": -1.3388852385699441, |
| "logps/chosen": -717.7, |
| "logps/rejected": -1140.4, |
| "loss": 0.2884, |
| "mean_token_accuracy": 0.7034007340669632, |
| "num_tokens": 38739807.0, |
| "rewards/accuracies": 0.9, |
| "rewards/chosen": -6.62959771156311, |
| "rewards/margins": 4.472747385501862, |
| "rewards/rejected": -11.1023451089859, |
| "setc/cal_net_lr": 1.1373466991805524e-05, |
| "setc/credit_mean": 0.9999999970197677, |
| "setc/credit_std": 0.07208193615078926, |
| "setc/logratio_margin_delta": 29.124752044677734, |
| "setc/logratio_margin_vanilla": 418.15, |
| "setc/logratio_margin_weighted": 447.2747520446777, |
| "step": 1090 |
| }, |
| { |
| "entropy": 0.43876953125, |
| "epoch": 0.5928321207221773, |
| "grad_norm": 58.927433013916016, |
| "learning_rate": 4.2689645254698956e-07, |
| "logits/chosen": -1.3082769209270118, |
| "logits/rejected": -1.357060948196913, |
| "logps/chosen": -709.7, |
| "logps/rejected": -1137.4, |
| "loss": 0.2492, |
| "mean_token_accuracy": 0.713100990653038, |
| "num_tokens": 39121036.0, |
| "rewards/accuracies": 0.8875, |
| "rewards/chosen": -6.4656531572341915, |
| "rewards/margins": 4.571207290887832, |
| "rewards/rejected": -11.03686032295227, |
| "setc/cal_net_lr": 1.5742113487196524e-05, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.07217907235026359, |
| "setc/logratio_margin_delta": 28.670735931396486, |
| "setc/logratio_margin_vanilla": 428.45, |
| "setc/logratio_margin_weighted": 457.1207359313965, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.5928321207221773, |
| "eval_entropy": 0.44512939453125, |
| "eval_logits/chosen": -1.3603613258581002, |
| "eval_logits/rejected": -1.3904849397392145, |
| "eval_logps/chosen": -694.0625, |
| "eval_logps/rejected": -1149.25, |
| "eval_loss": 0.2531175911426544, |
| "eval_mean_token_accuracy": 0.7049690876156092, |
| "eval_num_tokens": 39121036.0, |
| "eval_rewards/accuracies": 0.904296875, |
| "eval_rewards/chosen": -6.33006377518177, |
| "eval_rewards/margins": 4.873635433614254, |
| "eval_rewards/rejected": -11.20369903743267, |
| "eval_runtime": 14.1864, |
| "eval_samples_per_second": 35.245, |
| "eval_setc/cal_net_lr": 1.8286307534312433e-05, |
| "eval_setc/credit_mean": 1.0, |
| "eval_setc/credit_std": 0.07362057128921151, |
| "eval_setc/logratio_margin_delta": 30.792871475219727, |
| "eval_setc/logratio_margin_vanilla": 457.6875, |
| "eval_setc/logratio_margin_weighted": 488.4803714752197, |
| "eval_steps_per_second": 2.256, |
| "step": 1100 |
| }, |
| { |
| "entropy": 0.43310546875, |
| "epoch": 0.5982215036378334, |
| "grad_norm": 40.20852279663086, |
| "learning_rate": 4.176050466747224e-07, |
| "logits/chosen": -1.3270166713790517, |
| "logits/rejected": -1.4020509422736323, |
| "logps/chosen": -716.6, |
| "logps/rejected": -1198.2, |
| "loss": 0.2997, |
| "mean_token_accuracy": 0.6954291999340058, |
| "num_tokens": 39473664.0, |
| "rewards/accuracies": 0.878125, |
| "rewards/chosen": -6.649608445167542, |
| "rewards/margins": 5.128565120697021, |
| "rewards/rejected": -11.778173661231994, |
| "setc/cal_net_lr": 2.0807809567527706e-05, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.07330171279609203, |
| "setc/logratio_margin_delta": 34.506527709960935, |
| "setc/logratio_margin_vanilla": 478.35, |
| "setc/logratio_margin_weighted": 512.8565277099609, |
| "step": 1110 |
| }, |
| { |
| "entropy": 0.4423828125, |
| "epoch": 0.6036108865534896, |
| "grad_norm": 70.10661315917969, |
| "learning_rate": 4.0834279859270284e-07, |
| "logits/chosen": -1.3479278371743117, |
| "logits/rejected": -1.402101682907808, |
| "logps/chosen": -684.2, |
| "logps/rejected": -1106.6, |
| "loss": 0.2735, |
| "mean_token_accuracy": 0.698912826180458, |
| "num_tokens": 39821548.0, |
| "rewards/accuracies": 0.8875, |
| "rewards/chosen": -6.291271615028381, |
| "rewards/margins": 4.475275766849518, |
| "rewards/rejected": -10.766547250747681, |
| "setc/cal_net_lr": 2.6563263577983417e-05, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.07238798253238202, |
| "setc/logratio_margin_delta": 27.777584838867188, |
| "setc/logratio_margin_vanilla": 419.75, |
| "setc/logratio_margin_weighted": 447.5275848388672, |
| "step": 1120 |
| }, |
| { |
| "entropy": 0.43671875, |
| "epoch": 0.6090002694691458, |
| "grad_norm": 20.157817840576172, |
| "learning_rate": 3.9911298601001273e-07, |
| "logits/chosen": -1.3612183495667927, |
| "logits/rejected": -1.3977843345025138, |
| "logps/chosen": -601.0, |
| "logps/rejected": -1085.4, |
| "loss": 0.2369, |
| "mean_token_accuracy": 0.7253735572099685, |
| "num_tokens": 40192876.0, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -5.32529776096344, |
| "rewards/margins": 5.10829610824585, |
| "rewards/rejected": -10.433593916893006, |
| "setc/cal_net_lr": 3.3000191013673035e-05, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.07107761949300766, |
| "setc/logratio_margin_delta": 29.07962646484375, |
| "setc/logratio_margin_vanilla": 481.75, |
| "setc/logratio_margin_weighted": 510.8296264648437, |
| "step": 1130 |
| }, |
| { |
| "entropy": 0.45537109375, |
| "epoch": 0.6143896523848019, |
| "grad_norm": 47.181636810302734, |
| "learning_rate": 3.8991887515751285e-07, |
| "logits/chosen": -1.3811772662010782, |
| "logits/rejected": -1.4073035922265509, |
| "logps/chosen": -634.4, |
| "logps/rejected": -992.8, |
| "loss": 0.3292, |
| "mean_token_accuracy": 0.7175350487232208, |
| "num_tokens": 40559383.0, |
| "rewards/accuracies": 0.890625, |
| "rewards/chosen": -5.5887420058250425, |
| "rewards/margins": 3.8901084780693056, |
| "rewards/rejected": -9.478850364685059, |
| "setc/cal_net_lr": 4.010932644449762e-05, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.07180496528744698, |
| "setc/logratio_margin_delta": 26.260860443115234, |
| "setc/logratio_margin_vanilla": 362.75, |
| "setc/logratio_margin_weighted": 389.01086044311523, |
| "step": 1140 |
| }, |
| { |
| "entropy": 0.43125, |
| "epoch": 0.6197790353004581, |
| "grad_norm": 69.74315643310547, |
| "learning_rate": 3.807637196319943e-07, |
| "logits/chosen": -1.4106397734688008, |
| "logits/rejected": -1.4677872876176674, |
| "logps/chosen": -626.8, |
| "logps/rejected": -1127.6, |
| "loss": 0.3109, |
| "mean_token_accuracy": 0.7143973648548126, |
| "num_tokens": 40918488.0, |
| "rewards/accuracies": 0.871875, |
| "rewards/chosen": -5.679463398456574, |
| "rewards/margins": 5.340268242359161, |
| "rewards/rejected": -11.019731426239014, |
| "setc/cal_net_lr": 4.7880436851978056e-05, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.07189065329730511, |
| "setc/logratio_margin_delta": 37.0768325805664, |
| "setc/logratio_margin_vanilla": 496.95, |
| "setc/logratio_margin_weighted": 534.0268325805664, |
| "step": 1150 |
| }, |
| { |
| "entropy": 0.45703125, |
| "epoch": 0.6251684182161142, |
| "grad_norm": 98.07344055175781, |
| "learning_rate": 3.716507592448015e-07, |
| "logits/chosen": -1.388660761223864, |
| "logits/rejected": -1.3975069124599455, |
| "logps/chosen": -674.2, |
| "logps/rejected": -1123.8, |
| "loss": 0.2989, |
| "mean_token_accuracy": 0.711270448565483, |
| "num_tokens": 41284352.0, |
| "rewards/accuracies": 0.890625, |
| "rewards/chosen": -6.101946842670441, |
| "rewards/margins": 4.818000304698944, |
| "rewards/rejected": -10.919947171211243, |
| "setc/cal_net_lr": 5.630233635884748e-05, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.07173714898526669, |
| "setc/logratio_margin_delta": 30.45003662109375, |
| "setc/logratio_margin_vanilla": 451.35, |
| "setc/logratio_margin_weighted": 481.80003662109374, |
| "step": 1160 |
| }, |
| { |
| "entropy": 0.43251953125, |
| "epoch": 0.6305578011317704, |
| "grad_norm": 35.384769439697266, |
| "learning_rate": 3.625832188753326e-07, |
| "logits/chosen": -1.3047580708198727, |
| "logits/rejected": -1.369543789182836, |
| "logps/chosen": -584.7, |
| "logps/rejected": -974.6, |
| "loss": 0.3559, |
| "mean_token_accuracy": 0.7312755823135376, |
| "num_tokens": 41646831.0, |
| "rewards/accuracies": 0.88125, |
| "rewards/chosen": -5.220397734642029, |
| "rewards/margins": 4.157508033514023, |
| "rewards/rejected": -9.377905797958373, |
| "setc/cal_net_lr": 6.536290233020582e-05, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.07165979258716107, |
| "setc/logratio_margin_delta": 27.70081787109375, |
| "setc/logratio_margin_vanilla": 388.05, |
| "setc/logratio_margin_weighted": 415.7508178710938, |
| "step": 1170 |
| }, |
| { |
| "entropy": 0.45009765625, |
| "epoch": 0.6359471840474266, |
| "grad_norm": 32.074398040771484, |
| "learning_rate": 3.5356430732982537e-07, |
| "logits/chosen": -1.3558946550666882, |
| "logits/rejected": -1.367336323455139, |
| "logps/chosen": -480.1, |
| "logps/rejected": -838.2, |
| "loss": 0.3099, |
| "mean_token_accuracy": 0.7344884812831879, |
| "num_tokens": 41977848.0, |
| "rewards/accuracies": 0.86875, |
| "rewards/chosen": -4.130843257904052, |
| "rewards/margins": 3.8041255950927733, |
| "rewards/rejected": -7.934968829154968, |
| "setc/cal_net_lr": 7.504909282306044e-05, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.07002530321478843, |
| "setc/logratio_margin_delta": 23.66255874633789, |
| "setc/logratio_margin_vanilla": 356.75, |
| "setc/logratio_margin_weighted": 380.4125587463379, |
| "step": 1180 |
| }, |
| { |
| "entropy": 0.4537109375, |
| "epoch": 0.6413365669630827, |
| "grad_norm": 93.54570007324219, |
| "learning_rate": 3.445972162058286e-07, |
| "logits/chosen": -1.348678425603877, |
| "logits/rejected": -1.3903070843099647, |
| "logps/chosen": -526.7, |
| "logps/rejected": -908.2, |
| "loss": 0.2711, |
| "mean_token_accuracy": 0.7370507389307022, |
| "num_tokens": 42336278.0, |
| "rewards/accuracies": 0.9, |
| "rewards/chosen": -4.534214103221894, |
| "rewards/margins": 4.061459875106811, |
| "rewards/rejected": -8.595674014091491, |
| "setc/cal_net_lr": 8.534696535913519e-05, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.0704065527766943, |
| "setc/logratio_margin_delta": 27.095995330810545, |
| "setc/logratio_margin_vanilla": 379.05, |
| "setc/logratio_margin_weighted": 406.14599533081054, |
| "step": 1190 |
| }, |
| { |
| "entropy": 0.46689453125, |
| "epoch": 0.6467259498787389, |
| "grad_norm": 44.626190185546875, |
| "learning_rate": 3.356851187627665e-07, |
| "logits/chosen": -1.3881511589012898, |
| "logits/rejected": -1.4340061933201436, |
| "logps/chosen": -612.0, |
| "logps/rejected": -1049.2, |
| "loss": 0.2864, |
| "mean_token_accuracy": 0.7160806059837341, |
| "num_tokens": 42714490.0, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -5.426479089260101, |
| "rewards/margins": 4.657109332084656, |
| "rewards/rejected": -10.083588361740112, |
| "setc/cal_net_lr": 9.624169699392697e-05, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.0706709910184145, |
| "setc/logratio_margin_delta": 29.010940551757812, |
| "setc/logratio_margin_vanilla": 436.7, |
| "setc/logratio_margin_weighted": 465.7109405517578, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.6467259498787389, |
| "eval_entropy": 0.48931884765625, |
| "eval_logits/chosen": -1.3905256226255422, |
| "eval_logits/rejected": -1.424202928967488, |
| "eval_logps/chosen": -595.625, |
| "eval_logps/rejected": -1020.375, |
| "eval_loss": 0.26192784309387207, |
| "eval_mean_token_accuracy": 0.7144393119961023, |
| "eval_num_tokens": 42714490.0, |
| "eval_rewards/accuracies": 0.90625, |
| "eval_rewards/chosen": -5.241253539919853, |
| "eval_rewards/margins": 4.524848110973835, |
| "eval_rewards/rejected": -9.766101628541946, |
| "eval_runtime": 14.2072, |
| "eval_samples_per_second": 35.193, |
| "eval_setc/cal_net_lr": 0.0001021713083937391, |
| "eval_setc/credit_mean": 1.0, |
| "eval_setc/credit_std": 0.07142715808004141, |
| "eval_setc/logratio_margin_delta": 26.581721305847168, |
| "eval_setc/logratio_margin_vanilla": 426.4375, |
| "eval_setc/logratio_margin_weighted": 453.01922130584717, |
| "eval_steps_per_second": 2.252, |
| "step": 1200 |
| }, |
| { |
| "entropy": 0.48623046875, |
| "epoch": 0.652115332794395, |
| "grad_norm": 33.88088607788086, |
| "learning_rate": 3.2683116879898995e-07, |
| "logits/chosen": -1.3838917142029392, |
| "logits/rejected": -1.4172299467669451, |
| "logps/chosen": -591.7, |
| "logps/rejected": -991.4, |
| "loss": 0.2971, |
| "mean_token_accuracy": 0.7138142108917236, |
| "num_tokens": 43061800.0, |
| "rewards/accuracies": 0.8875, |
| "rewards/chosen": -5.238682866096497, |
| "rewards/margins": 4.275832235813141, |
| "rewards/rejected": -9.514515256881714, |
| "setc/cal_net_lr": 0.00010771760565312037, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.07107691392302513, |
| "setc/logratio_margin_delta": 23.933246612548828, |
| "setc/logratio_margin_vanilla": 403.65, |
| "setc/logratio_margin_weighted": 427.5832466125488, |
| "step": 1210 |
| }, |
| { |
| "entropy": 0.47412109375, |
| "epoch": 0.6575047157100512, |
| "grad_norm": 24.915206909179688, |
| "learning_rate": 3.180384995357155e-07, |
| "logits/chosen": -1.3262967787135937, |
| "logits/rejected": -1.349477324233645, |
| "logps/chosen": -513.3, |
| "logps/rejected": -880.6, |
| "loss": 0.282, |
| "mean_token_accuracy": 0.7303446799516677, |
| "num_tokens": 43422934.0, |
| "rewards/accuracies": 0.890625, |
| "rewards/chosen": -4.445479357242585, |
| "rewards/margins": 3.892670524120331, |
| "rewards/rejected": -8.338150024414062, |
| "setc/cal_net_lr": 0.00011975817270565024, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.06996985897421837, |
| "setc/logratio_margin_delta": 25.342060852050782, |
| "setc/logratio_margin_vanilla": 363.925, |
| "setc/logratio_margin_weighted": 389.2670608520508, |
| "step": 1220 |
| }, |
| { |
| "entropy": 0.47568359375, |
| "epoch": 0.6628940986257074, |
| "grad_norm": 41.575965881347656, |
| "learning_rate": 3.093102225082468e-07, |
| "logits/chosen": -1.4003033108084804, |
| "logits/rejected": -1.4641346348518416, |
| "logps/chosen": -629.7, |
| "logps/rejected": -1016.2, |
| "loss": 0.3094, |
| "mean_token_accuracy": 0.7089363127946854, |
| "num_tokens": 43787087.0, |
| "rewards/accuracies": 0.896875, |
| "rewards/chosen": -5.634258687496185, |
| "rewards/margins": 4.108717429637909, |
| "rewards/rejected": -9.742976069450378, |
| "setc/cal_net_lr": 0.00013234606674091868, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.06970880702137947, |
| "setc/logratio_margin_delta": 24.44675521850586, |
| "setc/logratio_margin_vanilla": 386.425, |
| "setc/logratio_margin_weighted": 410.8717552185059, |
| "step": 1230 |
| }, |
| { |
| "entropy": 0.4681640625, |
| "epoch": 0.6682834815413635, |
| "grad_norm": 42.247772216796875, |
| "learning_rate": 3.006494264648687e-07, |
| "logits/chosen": -1.5038021364170624, |
| "logits/rejected": -1.5682670959321656, |
| "logps/chosen": -746.6, |
| "logps/rejected": -1242.8, |
| "loss": 0.3417, |
| "mean_token_accuracy": 0.6826083391904831, |
| "num_tokens": 44151449.0, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -6.940376424789429, |
| "rewards/margins": 5.315678989887237, |
| "rewards/rejected": -12.256055474281311, |
| "setc/cal_net_lr": 0.00014546316851594206, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.07185462042689324, |
| "setc/logratio_margin_delta": 34.71790542602539, |
| "setc/logratio_margin_vanilla": 496.85, |
| "setc/logratio_margin_weighted": 531.5679054260254, |
| "step": 1240 |
| }, |
| { |
| "entropy": 0.45966796875, |
| "epoch": 0.6736728644570197, |
| "grad_norm": 16.57280731201172, |
| "learning_rate": 2.9205917627380717e-07, |
| "logits/chosen": -1.450344017268741, |
| "logits/rejected": -1.4847681129953636, |
| "logps/chosen": -670.9, |
| "logps/rejected": -1120.8, |
| "loss": 0.2943, |
| "mean_token_accuracy": 0.7004325985908508, |
| "num_tokens": 44493045.0, |
| "rewards/accuracies": 0.8625, |
| "rewards/chosen": -6.097113466262817, |
| "rewards/margins": 4.771983242034912, |
| "rewards/rejected": -10.869096612930297, |
| "setc/cal_net_lr": 0.00015909059703651744, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.07053840048611164, |
| "setc/logratio_margin_delta": 30.54833221435547, |
| "setc/logratio_margin_vanilla": 446.65, |
| "setc/logratio_margin_weighted": 477.19833221435545, |
| "step": 1250 |
| }, |
| { |
| "entropy": 0.4638671875, |
| "epoch": 0.6790622473726758, |
| "grad_norm": 22.801801681518555, |
| "learning_rate": 2.8354251183863833e-07, |
| "logits/chosen": -1.4518458887210037, |
| "logits/rejected": -1.505511142314074, |
| "logps/chosen": -650.8, |
| "logps/rejected": -1089.8, |
| "loss": 0.2263, |
| "mean_token_accuracy": 0.709148183465004, |
| "num_tokens": 44862628.0, |
| "rewards/accuracies": 0.896875, |
| "rewards/chosen": -5.8279621481895445, |
| "rewards/margins": 4.705725991725922, |
| "rewards/rejected": -10.533688116073609, |
| "setc/cal_net_lr": 0.00017320873673486848, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.06929908581078052, |
| "setc/logratio_margin_delta": 31.472618103027344, |
| "setc/logratio_margin_vanilla": 439.1, |
| "setc/logratio_margin_weighted": 470.57261810302737, |
| "step": 1260 |
| }, |
| { |
| "entropy": 0.45927734375, |
| "epoch": 0.684451630288332, |
| "grad_norm": 81.56884002685547, |
| "learning_rate": 2.7510244702253166e-07, |
| "logits/chosen": -1.5361301765199689, |
| "logits/rejected": -1.6149365987857098, |
| "logps/chosen": -617.4, |
| "logps/rejected": -1038.0, |
| "loss": 0.3026, |
| "mean_token_accuracy": 0.7010406374931335, |
| "num_tokens": 45189795.0, |
| "rewards/accuracies": 0.8875, |
| "rewards/chosen": -5.554684376716613, |
| "rewards/margins": 4.473910105228424, |
| "rewards/rejected": -10.028594422340394, |
| "setc/cal_net_lr": 0.0001877972657046496, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.0700082466006279, |
| "setc/logratio_margin_delta": 30.791018676757812, |
| "setc/logratio_margin_vanilla": 416.6, |
| "setc/logratio_margin_weighted": 447.3910186767578, |
| "step": 1270 |
| }, |
| { |
| "entropy": 0.43818359375, |
| "epoch": 0.6898410132039882, |
| "grad_norm": 42.3488883972168, |
| "learning_rate": 2.667419685817105e-07, |
| "logits/chosen": -1.4125931971850059, |
| "logits/rejected": -1.4648399384142876, |
| "logps/chosen": -500.7, |
| "logps/rejected": -882.1, |
| "loss": 0.3085, |
| "mean_token_accuracy": 0.7451159060001373, |
| "num_tokens": 45554415.0, |
| "rewards/accuracies": 0.871875, |
| "rewards/chosen": -4.311119174957275, |
| "rewards/margins": 4.081181025505066, |
| "rewards/rejected": -8.392300033569336, |
| "setc/cal_net_lr": 0.00020283518495266647, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.06918111927807331, |
| "setc/logratio_margin_delta": 28.018105697631835, |
| "setc/logratio_margin_vanilla": 380.1, |
| "setc/logratio_margin_weighted": 408.11810569763185, |
| "step": 1280 |
| }, |
| { |
| "entropy": 0.4404296875, |
| "epoch": 0.6952303961196443, |
| "grad_norm": 52.7696647644043, |
| "learning_rate": 2.5846403510850225e-07, |
| "logits/chosen": -1.5529430524670726, |
| "logits/rejected": -1.5987173557025316, |
| "logps/chosen": -522.1, |
| "logps/rejected": -968.8, |
| "loss": 0.2509, |
| "mean_token_accuracy": 0.7253134727478028, |
| "num_tokens": 45901757.0, |
| "rewards/accuracies": 0.896875, |
| "rewards/chosen": -4.538894402980804, |
| "rewards/margins": 4.75179933309555, |
| "rewards/rejected": -9.290693855285644, |
| "setc/cal_net_lr": 0.00021830084862520705, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.0703603059053421, |
| "setc/logratio_margin_delta": 34.279936981201175, |
| "setc/logratio_margin_vanilla": 440.9, |
| "setc/logratio_margin_weighted": 475.1799369812012, |
| "step": 1290 |
| }, |
| { |
| "entropy": 0.44638671875, |
| "epoch": 0.7006197790353005, |
| "grad_norm": 31.051944732666016, |
| "learning_rate": 2.5027157598435777e-07, |
| "logits/chosen": -1.612937918631245, |
| "logits/rejected": -1.7009304808047285, |
| "logps/chosen": -665.4, |
| "logps/rejected": -1133.0, |
| "loss": 0.358, |
| "mean_token_accuracy": 0.7091849476099015, |
| "num_tokens": 46262002.0, |
| "rewards/accuracies": 0.8625, |
| "rewards/chosen": -5.984306502342224, |
| "rewards/margins": 5.062739598751068, |
| "rewards/rejected": -11.047045946121216, |
| "setc/cal_net_lr": 0.00023417199516547726, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.07216440998017788, |
| "setc/logratio_margin_delta": 34.07397308349609, |
| "setc/logratio_margin_vanilla": 472.2, |
| "setc/logratio_margin_weighted": 506.2739730834961, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.7006197790353005, |
| "eval_entropy": 0.45013427734375, |
| "eval_logits/chosen": -1.5569858902716183, |
| "eval_logits/rejected": -1.6061105394898563, |
| "eval_logps/chosen": -589.3125, |
| "eval_logps/rejected": -1073.625, |
| "eval_loss": 0.2594917416572571, |
| "eval_mean_token_accuracy": 0.721536174416542, |
| "eval_num_tokens": 46262002.0, |
| "eval_rewards/accuracies": 0.90234375, |
| "eval_rewards/chosen": -5.197996735572815, |
| "eval_rewards/margins": 5.20976897329092, |
| "eval_rewards/rejected": -10.407765805721283, |
| "eval_runtime": 14.2113, |
| "eval_samples_per_second": 35.183, |
| "eval_setc/cal_net_lr": 0.00024264359402975666, |
| "eval_setc/credit_mean": 1.0, |
| "eval_setc/credit_std": 0.0713688328396529, |
| "eval_setc/logratio_margin_delta": 34.81822967529297, |
| "eval_setc/logratio_margin_vanilla": 487.0, |
| "eval_setc/logratio_margin_weighted": 521.818229675293, |
| "eval_steps_per_second": 2.252, |
| "step": 1300 |
| }, |
| { |
| "entropy": 0.41630859375, |
| "epoch": 0.7060091619509566, |
| "grad_norm": 77.33255004882812, |
| "learning_rate": 2.4216749034320574e-07, |
| "logits/chosen": -1.5246676903481826, |
| "logits/rejected": -1.6122385093574638, |
| "logps/chosen": -567.6, |
| "logps/rejected": -984.0, |
| "loss": 0.3325, |
| "mean_token_accuracy": 0.728985533118248, |
| "num_tokens": 46607558.0, |
| "rewards/accuracies": 0.878125, |
| "rewards/chosen": -5.136111176013946, |
| "rewards/margins": 4.455406987667084, |
| "rewards/rejected": -9.591518354415893, |
| "setc/cal_net_lr": 0.0002504257793572883, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.07067357562482357, |
| "setc/logratio_margin_delta": 31.340717315673828, |
| "setc/logratio_margin_vanilla": 414.2, |
| "setc/logratio_margin_weighted": 445.5407173156738, |
| "step": 1310 |
| }, |
| { |
| "entropy": 0.440625, |
| "epoch": 0.7113985448666128, |
| "grad_norm": 23.72609519958496, |
| "learning_rate": 2.341546460455126e-07, |
| "logits/chosen": -1.540273190267993, |
| "logits/rejected": -1.6056650116281355, |
| "logps/chosen": -530.0, |
| "logps/rejected": -924.0, |
| "loss": 0.2819, |
| "mean_token_accuracy": 0.7307059586048126, |
| "num_tokens": 46949311.0, |
| "rewards/accuracies": 0.9, |
| "rewards/chosen": -4.646138024330139, |
| "rewards/margins": 4.262345945835113, |
| "rewards/rejected": -8.908483982086182, |
| "setc/cal_net_lr": 0.0002670388052088759, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.06920704822987318, |
| "setc/logratio_margin_delta": 31.384617614746094, |
| "setc/logratio_margin_vanilla": 394.85, |
| "setc/logratio_margin_weighted": 426.23461761474607, |
| "step": 1320 |
| }, |
| { |
| "entropy": 0.47109375, |
| "epoch": 0.716787927782269, |
| "grad_norm": 20.47443389892578, |
| "learning_rate": 2.2623587866340888e-07, |
| "logits/chosen": -1.5787250099693941, |
| "logits/rejected": -1.6378218184439, |
| "logps/chosen": -555.9, |
| "logps/rejected": -959.8, |
| "loss": 0.3328, |
| "mean_token_accuracy": 0.7292061507701874, |
| "num_tokens": 47301716.0, |
| "rewards/accuracies": 0.90625, |
| "rewards/chosen": -4.8078773021698, |
| "rewards/margins": 4.328281128406525, |
| "rewards/rejected": -9.13615837097168, |
| "setc/cal_net_lr": 0.00028398715962951504, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.06858081389218569, |
| "setc/logratio_margin_delta": 31.928114318847655, |
| "setc/logratio_margin_vanilla": 400.9, |
| "setc/logratio_margin_weighted": 432.82811431884767, |
| "step": 1330 |
| }, |
| { |
| "entropy": 0.44931640625, |
| "epoch": 0.7221773106979251, |
| "grad_norm": 30.538745880126953, |
| "learning_rate": 2.1841399047724047e-07, |
| "logits/chosen": -1.52773450533801, |
| "logits/rejected": -1.58617710115334, |
| "logps/chosen": -509.1, |
| "logps/rejected": -886.6, |
| "loss": 0.2378, |
| "mean_token_accuracy": 0.7392058879137039, |
| "num_tokens": 47673875.0, |
| "rewards/accuracies": 0.9125, |
| "rewards/chosen": -4.407375812530518, |
| "rewards/margins": 4.048559367656708, |
| "rewards/rejected": -8.455935263633728, |
| "setc/cal_net_lr": 0.0003012464468504569, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.06842790953814984, |
| "setc/logratio_margin_delta": 29.705943298339843, |
| "setc/logratio_margin_vanilla": 375.15, |
| "setc/logratio_margin_weighted": 404.85594329833987, |
| "step": 1340 |
| }, |
| { |
| "entropy": 0.442578125, |
| "epoch": 0.7275666936135813, |
| "grad_norm": 72.51182556152344, |
| "learning_rate": 2.1069174948390435e-07, |
| "logits/chosen": -1.584456418416361, |
| "logits/rejected": -1.6523068810983297, |
| "logps/chosen": -554.8, |
| "logps/rejected": -955.6, |
| "loss": 0.3051, |
| "mean_token_accuracy": 0.7066467195749283, |
| "num_tokens": 47987918.0, |
| "rewards/accuracies": 0.90625, |
| "rewards/chosen": -4.997482490539551, |
| "rewards/margins": 4.298661887645721, |
| "rewards/rejected": -9.296144366264343, |
| "setc/cal_net_lr": 0.00031879182354063983, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.07268548682332039, |
| "setc/logratio_margin_delta": 33.21618576049805, |
| "setc/logratio_margin_vanilla": 396.65, |
| "setc/logratio_margin_weighted": 429.86618576049807, |
| "step": 1350 |
| }, |
| { |
| "entropy": 0.46162109375, |
| "epoch": 0.7329560765292374, |
| "grad_norm": 53.63888931274414, |
| "learning_rate": 2.0307188841731193e-07, |
| "logits/chosen": -1.6495496355737422, |
| "logits/rejected": -1.6986198259558798, |
| "logps/chosen": -670.8, |
| "logps/rejected": -1126.8, |
| "loss": 0.3292, |
| "mean_token_accuracy": 0.7082957863807678, |
| "num_tokens": 48363901.0, |
| "rewards/accuracies": 0.884375, |
| "rewards/chosen": -6.070537447929382, |
| "rewards/margins": 4.876463973522187, |
| "rewards/rejected": -10.947001552581787, |
| "setc/cal_net_lr": 0.0003365980345666318, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.0717622920870781, |
| "setc/logratio_margin_delta": 33.046409606933594, |
| "setc/logratio_margin_vanilla": 454.6, |
| "setc/logratio_margin_weighted": 487.6464096069336, |
| "step": 1360 |
| }, |
| { |
| "entropy": 0.45615234375, |
| "epoch": 0.7383454594448936, |
| "grad_norm": 68.25013732910156, |
| "learning_rate": 1.9555710378133584e-07, |
| "logits/chosen": -1.5475741416931696, |
| "logits/rejected": -1.5981262042934623, |
| "logps/chosen": -553.0, |
| "logps/rejected": -915.8, |
| "loss": 0.3524, |
| "mean_token_accuracy": 0.719152620434761, |
| "num_tokens": 48721520.0, |
| "rewards/accuracies": 0.89375, |
| "rewards/chosen": -4.9144437432289125, |
| "rewards/margins": 3.8710513949394225, |
| "rewards/rejected": -8.785495042800903, |
| "setc/cal_net_lr": 0.00035463944934532626, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.07162475697696209, |
| "setc/logratio_margin_delta": 26.60514907836914, |
| "setc/logratio_margin_vanilla": 360.5, |
| "setc/logratio_margin_weighted": 387.10514907836915, |
| "step": 1370 |
| }, |
| { |
| "entropy": 0.47626953125, |
| "epoch": 0.7437348423605498, |
| "grad_norm": 27.951370239257812, |
| "learning_rate": 1.88150054895574e-07, |
| "logits/chosen": -1.5752492477710134, |
| "logits/rejected": -1.6267678817590063, |
| "logps/chosen": -530.9, |
| "logps/rejected": -860.2, |
| "loss": 0.331, |
| "mean_token_accuracy": 0.7230644017457962, |
| "num_tokens": 49073907.0, |
| "rewards/accuracies": 0.878125, |
| "rewards/chosen": -4.60977201461792, |
| "rewards/margins": 3.515028989315033, |
| "rewards/rejected": -8.12480103969574, |
| "setc/cal_net_lr": 0.00037289009873706997, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.0722591146826744, |
| "setc/logratio_margin_delta": 24.0529052734375, |
| "setc/logratio_margin_vanilla": 327.45, |
| "setc/logratio_margin_weighted": 351.5029052734375, |
| "step": 1380 |
| }, |
| { |
| "entropy": 0.4615234375, |
| "epoch": 0.7491242252762059, |
| "grad_norm": 55.398651123046875, |
| "learning_rate": 1.808533629542751e-07, |
| "logits/chosen": -1.519858605305458, |
| "logits/rejected": -1.6093533244431188, |
| "logps/chosen": -509.7, |
| "logps/rejected": -855.6, |
| "loss": 0.3028, |
| "mean_token_accuracy": 0.7340264290571212, |
| "num_tokens": 49418680.0, |
| "rewards/accuracies": 0.890625, |
| "rewards/chosen": -4.401390933990479, |
| "rewards/margins": 3.69234881401062, |
| "rewards/rejected": -8.093739652633667, |
| "setc/cal_net_lr": 0.0003913237124261124, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.071388328820467, |
| "setc/logratio_margin_delta": 25.53488998413086, |
| "setc/logratio_margin_vanilla": 343.7, |
| "setc/logratio_margin_weighted": 369.23488998413086, |
| "step": 1390 |
| }, |
| { |
| "entropy": 0.47451171875, |
| "epoch": 0.7545136081918621, |
| "grad_norm": 20.171005249023438, |
| "learning_rate": 1.736696100987543e-07, |
| "logits/chosen": -1.6546586863128225, |
| "logits/rejected": -1.7063404244684879, |
| "logps/chosen": -534.9, |
| "logps/rejected": -856.2, |
| "loss": 0.3403, |
| "mean_token_accuracy": 0.7271544426679611, |
| "num_tokens": 49772876.0, |
| "rewards/accuracies": 0.859375, |
| "rewards/chosen": -4.6236349701881405, |
| "rewards/margins": 3.4730340123176573, |
| "rewards/rejected": -8.096669101715088, |
| "setc/cal_net_lr": 0.0004099137567345749, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.07029190417379141, |
| "setc/logratio_margin_delta": 24.003412628173827, |
| "setc/logratio_margin_vanilla": 323.3, |
| "setc/logratio_margin_weighted": 347.3034126281738, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.7545136081918621, |
| "eval_entropy": 0.4881591796875, |
| "eval_logits/chosen": -1.5865094371675443, |
| "eval_logits/rejected": -1.6286456849256612, |
| "eval_logps/chosen": -503.875, |
| "eval_logps/rejected": -873.75, |
| "eval_loss": 0.25063228607177734, |
| "eval_mean_token_accuracy": 0.7336005866527557, |
| "eval_num_tokens": 49772876.0, |
| "eval_rewards/accuracies": 0.9140625, |
| "eval_rewards/chosen": -4.2661414965987206, |
| "eval_rewards/margins": 3.9734192714095116, |
| "eval_rewards/rejected": -8.239560678601265, |
| "eval_runtime": 14.2136, |
| "eval_samples_per_second": 35.178, |
| "eval_setc/cal_net_lr": 0.00041972234003674093, |
| "eval_setc/credit_mean": 1.0, |
| "eval_setc/credit_std": 0.06988485460169613, |
| "eval_setc/logratio_margin_delta": 25.711304664611816, |
| "eval_setc/logratio_margin_vanilla": 373.5625, |
| "eval_setc/logratio_margin_weighted": 399.2738046646118, |
| "eval_steps_per_second": 2.251, |
| "step": 1400 |
| }, |
| { |
| "entropy": 0.46767578125, |
| "epoch": 0.7599029911075182, |
| "grad_norm": 59.77590560913086, |
| "learning_rate": 1.6660133850362878e-07, |
| "logits/chosen": -1.5703001768437603, |
| "logits/rejected": -1.6146571741836044, |
| "logps/chosen": -567.2, |
| "logps/rejected": -932.8, |
| "loss": 0.3168, |
| "mean_token_accuracy": 0.7329632073640824, |
| "num_tokens": 50163681.0, |
| "rewards/accuracies": 0.884375, |
| "rewards/chosen": -4.9076042652130125, |
| "rewards/margins": 3.9311932802200316, |
| "rewards/rejected": -8.838797640800475, |
| "setc/cal_net_lr": 0.00042863347281550787, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.07002903185784817, |
| "setc/logratio_margin_delta": 26.31934051513672, |
| "setc/logratio_margin_vanilla": 366.8, |
| "setc/logratio_margin_weighted": 393.11934051513674, |
| "step": 1410 |
| }, |
| { |
| "entropy": 0.46171875, |
| "epoch": 0.7652923740231743, |
| "grad_norm": 56.6906852722168, |
| "learning_rate": 1.5965104947719816e-07, |
| "logits/chosen": -1.5351415663599084, |
| "logits/rejected": -1.622101327043853, |
| "logps/chosen": -546.1, |
| "logps/rejected": -945.4, |
| "loss": 0.2704, |
| "mean_token_accuracy": 0.7306698024272918, |
| "num_tokens": 50536109.0, |
| "rewards/accuracies": 0.896875, |
| "rewards/chosen": -4.773424637317658, |
| "rewards/margins": 4.259200811386108, |
| "rewards/rejected": -9.032625222206116, |
| "setc/cal_net_lr": 0.0004474559151700609, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.07033420763909817, |
| "setc/logratio_margin_delta": 29.820078659057618, |
| "setc/logratio_margin_vanilla": 396.1, |
| "setc/logratio_margin_weighted": 425.9200786590576, |
| "step": 1420 |
| }, |
| { |
| "entropy": 0.4712890625, |
| "epoch": 0.7706817569388305, |
| "grad_norm": 27.157922744750977, |
| "learning_rate": 1.5282120257628493e-07, |
| "logits/chosen": -1.5769850546803867, |
| "logits/rejected": -1.675950265945526, |
| "logps/chosen": -572.0, |
| "logps/rejected": -961.6, |
| "loss": 0.3383, |
| "mean_token_accuracy": 0.7177787780761719, |
| "num_tokens": 50898774.0, |
| "rewards/accuracies": 0.85, |
| "rewards/chosen": -5.021525096893311, |
| "rewards/margins": 4.185178649425507, |
| "rewards/rejected": -9.206703734397887, |
| "setc/cal_net_lr": 0.00046635399043332197, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.07177232243120671, |
| "setc/logratio_margin_delta": 28.917872619628906, |
| "setc/logratio_margin_vanilla": 389.6, |
| "setc/logratio_margin_weighted": 418.5178726196289, |
| "step": 1430 |
| }, |
| { |
| "entropy": 0.46376953125, |
| "epoch": 0.7760711398544866, |
| "grad_norm": 40.695457458496094, |
| "learning_rate": 1.46114214735851e-07, |
| "logits/chosen": -1.6385950412691188, |
| "logits/rejected": -1.686338215651635, |
| "logps/chosen": -551.6, |
| "logps/rejected": -926.2, |
| "loss": 0.3597, |
| "mean_token_accuracy": 0.7252315312623978, |
| "num_tokens": 51264449.0, |
| "rewards/accuracies": 0.859375, |
| "rewards/chosen": -4.829545390605927, |
| "rewards/margins": 3.9724243760108946, |
| "rewards/rejected": -8.801969718933105, |
| "setc/cal_net_lr": 0.00048530049637299856, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.06923934705555439, |
| "setc/logratio_margin_delta": 24.39244613647461, |
| "setc/logratio_margin_vanilla": 372.85, |
| "setc/logratio_margin_weighted": 397.2424461364746, |
| "step": 1440 |
| }, |
| { |
| "entropy": 0.4931640625, |
| "epoch": 0.7814605227701428, |
| "grad_norm": 35.001766204833984, |
| "learning_rate": 1.3953245941369606e-07, |
| "logits/chosen": -1.5720843194136846, |
| "logits/rejected": -1.6047685664358426, |
| "logps/chosen": -596.7, |
| "logps/rejected": -962.6, |
| "loss": 0.4049, |
| "mean_token_accuracy": 0.7161958605051041, |
| "num_tokens": 51627142.0, |
| "rewards/accuracies": 0.86875, |
| "rewards/chosen": -5.245314371585846, |
| "rewards/margins": 3.897673261165619, |
| "rewards/rejected": -9.142987942695617, |
| "setc/cal_net_lr": 0.0005042681610448041, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.06911584474146366, |
| "setc/logratio_margin_delta": 24.167342376708984, |
| "setc/logratio_margin_vanilla": 365.6, |
| "setc/logratio_margin_weighted": 389.767342376709, |
| "step": 1450 |
| }, |
| { |
| "entropy": 0.46611328125, |
| "epoch": 0.7868499056857989, |
| "grad_norm": 25.934104919433594, |
| "learning_rate": 1.3307826575054303e-07, |
| "logits/chosen": -1.5709140280737057, |
| "logits/rejected": -1.6279985828979888, |
| "logps/chosen": -520.7, |
| "logps/rejected": -859.0, |
| "loss": 0.2908, |
| "mean_token_accuracy": 0.7397780060768128, |
| "num_tokens": 51996636.0, |
| "rewards/accuracies": 0.8875, |
| "rewards/chosen": -4.433149778842926, |
| "rewards/margins": 3.6351953089237212, |
| "rewards/rejected": -8.068345022201537, |
| "setc/cal_net_lr": 0.0005232296820481889, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.06630576048046351, |
| "setc/logratio_margin_delta": 23.869540405273437, |
| "setc/logratio_margin_vanilla": 339.65, |
| "setc/logratio_margin_weighted": 363.51954040527346, |
| "step": 1460 |
| }, |
| { |
| "entropy": 0.4646484375, |
| "epoch": 0.7922392886014551, |
| "grad_norm": 109.72233581542969, |
| "learning_rate": 1.267539177458053e-07, |
| "logits/chosen": -1.5302456922524228, |
| "logits/rejected": -1.5671663859152356, |
| "logps/chosen": -500.8, |
| "logps/rejected": -826.2, |
| "loss": 0.3222, |
| "mean_token_accuracy": 0.7304188579320907, |
| "num_tokens": 52363307.0, |
| "rewards/accuracies": 0.871875, |
| "rewards/chosen": -4.318540203571319, |
| "rewards/margins": 3.475985234975815, |
| "rewards/rejected": -7.794525456428528, |
| "setc/cal_net_lr": 0.0005421577658259117, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.06553097013384104, |
| "setc/logratio_margin_delta": 22.448529052734376, |
| "setc/logratio_margin_vanilla": 325.15, |
| "setc/logratio_margin_weighted": 347.59852905273436, |
| "step": 1470 |
| }, |
| { |
| "entropy": 0.46298828125, |
| "epoch": 0.7976286715171113, |
| "grad_norm": 28.780488967895508, |
| "learning_rate": 1.2056165344932827e-07, |
| "logits/chosen": -1.4931537332170401, |
| "logits/rejected": -1.569671151563729, |
| "logps/chosen": -528.1, |
| "logps/rejected": -940.6, |
| "loss": 0.2665, |
| "mean_token_accuracy": 0.7405376106500625, |
| "num_tokens": 52725552.0, |
| "rewards/accuracies": 0.90625, |
| "rewards/chosen": -4.563779902458191, |
| "rewards/margins": 4.378486478328705, |
| "rewards/rejected": -8.942266464233398, |
| "setc/cal_net_lr": 0.0005610251669508799, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.06495412047952413, |
| "setc/logratio_margin_delta": 26.19866180419922, |
| "setc/logratio_margin_vanilla": 411.65, |
| "setc/logratio_margin_weighted": 437.8486618041992, |
| "step": 1480 |
| }, |
| { |
| "entropy": 0.44052734375, |
| "epoch": 0.8030180544327674, |
| "grad_norm": 62.47037124633789, |
| "learning_rate": 1.1450366416939295e-07, |
| "logits/chosen": -1.611906114241489, |
| "logits/rejected": -1.6829238200447194, |
| "logps/chosen": -531.7, |
| "logps/rejected": -886.6, |
| "loss": 0.286, |
| "mean_token_accuracy": 0.7277172565460205, |
| "num_tokens": 53058665.0, |
| "rewards/accuracies": 0.890625, |
| "rewards/chosen": -4.697575926780701, |
| "rewards/margins": 3.8217047095298766, |
| "rewards/rejected": -8.519280743598937, |
| "setc/cal_net_lr": 0.000579804727343712, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.06567572541534901, |
| "setc/logratio_margin_delta": 26.470491790771483, |
| "setc/logratio_margin_vanilla": 355.7, |
| "setc/logratio_margin_weighted": 382.1704917907715, |
| "step": 1490 |
| }, |
| { |
| "entropy": 0.46953125, |
| "epoch": 0.8084074373484236, |
| "grad_norm": 42.8690185546875, |
| "learning_rate": 1.0858209369725851e-07, |
| "logits/chosen": -1.5465363961992153, |
| "logits/rejected": -1.6092895249530645, |
| "logps/chosen": -617.6, |
| "logps/rejected": -1037.6, |
| "loss": 0.2758, |
| "mean_token_accuracy": 0.7125173568725586, |
| "num_tokens": 53427519.0, |
| "rewards/accuracies": 0.88125, |
| "rewards/chosen": -5.522391545772552, |
| "rewards/margins": 4.461647641658783, |
| "rewards/rejected": -9.984039378166198, |
| "setc/cal_net_lr": 0.0005984694153645696, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.06612777952104806, |
| "setc/logratio_margin_delta": 29.16478729248047, |
| "setc/logratio_margin_vanilla": 417.0, |
| "setc/logratio_margin_weighted": 446.16478729248047, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.8084074373484236, |
| "eval_entropy": 0.4835205078125, |
| "eval_logits/chosen": -1.574804083909467, |
| "eval_logits/rejected": -1.6192383259931051, |
| "eval_logps/chosen": -594.5625, |
| "eval_logps/rejected": -1023.125, |
| "eval_loss": 0.24590468406677246, |
| "eval_mean_token_accuracy": 0.7153119649738073, |
| "eval_num_tokens": 53427519.0, |
| "eval_rewards/accuracies": 0.908203125, |
| "eval_rewards/chosen": -5.237039923667908, |
| "eval_rewards/margins": 4.583509214222431, |
| "eval_rewards/rejected": -9.820549130439758, |
| "eval_runtime": 14.2433, |
| "eval_samples_per_second": 35.104, |
| "eval_setc/cal_net_lr": 0.0006082198069690511, |
| "eval_setc/credit_mean": 1.0, |
| "eval_setc/credit_std": 0.06576518306974322, |
| "eval_setc/logratio_margin_delta": 28.384427070617676, |
| "eval_setc/logratio_margin_vanilla": 431.625, |
| "eval_setc/logratio_margin_weighted": 460.0094270706177, |
| "eval_steps_per_second": 2.247, |
| "step": 1500 |
| }, |
| { |
| "entropy": 0.4947265625, |
| "epoch": 0.8137968202640797, |
| "grad_norm": 25.9521427154541, |
| "learning_rate": 1.0279903754852164e-07, |
| "logits/chosen": -1.5614659093580356, |
| "logits/rejected": -1.6606548524269127, |
| "logps/chosen": -632.0, |
| "logps/rejected": -1072.0, |
| "loss": 0.2877, |
| "mean_token_accuracy": 0.7104504853487015, |
| "num_tokens": 53792050.0, |
| "rewards/accuracies": 0.9125, |
| "rewards/chosen": -5.629306566715241, |
| "rewards/margins": 4.753253519535065, |
| "rewards/rejected": -10.382560396194458, |
| "setc/cal_net_lr": 0.0006169923647229892, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.0657088104635477, |
| "setc/logratio_margin_delta": 28.475372314453125, |
| "setc/logratio_margin_vanilla": 446.85, |
| "setc/logratio_margin_weighted": 475.32537231445315, |
| "step": 1510 |
| }, |
| { |
| "entropy": 0.484375, |
| "epoch": 0.8191862031797359, |
| "grad_norm": 33.998130798339844, |
| "learning_rate": 9.715654222155811e-08, |
| "logits/chosen": -1.5534200931238813, |
| "logits/rejected": -1.5930939373874997, |
| "logps/chosen": -587.5, |
| "logps/rejected": -1001.8, |
| "loss": 0.3289, |
| "mean_token_accuracy": 0.72098990380764, |
| "num_tokens": 54136636.0, |
| "rewards/accuracies": 0.878125, |
| "rewards/chosen": -5.229832494258881, |
| "rewards/margins": 4.401166546344757, |
| "rewards/rejected": -9.63099913597107, |
| "setc/cal_net_lr": 0.0006353469131497097, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.06840050462633371, |
| "setc/logratio_margin_delta": 29.116664123535156, |
| "setc/logratio_margin_vanilla": 411.0, |
| "setc/logratio_margin_weighted": 440.11666412353514, |
| "step": 1520 |
| }, |
| { |
| "entropy": 0.4716796875, |
| "epoch": 0.824575586095392, |
| "grad_norm": 50.3288688659668, |
| "learning_rate": 9.16566044733122e-08, |
| "logits/chosen": -1.5786561767257699, |
| "logits/rejected": -1.642224351067974, |
| "logps/chosen": -626.7, |
| "logps/rejected": -1025.2, |
| "loss": 0.2522, |
| "mean_token_accuracy": 0.7102588206529618, |
| "num_tokens": 54516451.0, |
| "rewards/accuracies": 0.896875, |
| "rewards/chosen": -5.588171422481537, |
| "rewards/margins": 4.30382000207901, |
| "rewards/rejected": -9.891991662979127, |
| "setc/cal_net_lr": 0.0006535066407748258, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.0688111089169979, |
| "setc/logratio_margin_delta": 28.632022094726562, |
| "setc/logratio_margin_vanilla": 401.75, |
| "setc/logratio_margin_weighted": 430.38202209472655, |
| "step": 1530 |
| }, |
| { |
| "entropy": 0.4650390625, |
| "epoch": 0.8299649690110482, |
| "grad_norm": 61.28715515136719, |
| "learning_rate": 8.630117061268733e-08, |
| "logits/chosen": -1.6015775772223662, |
| "logits/rejected": -1.6506740047739197, |
| "logps/chosen": -673.1, |
| "logps/rejected": -1067.2, |
| "loss": 0.3355, |
| "mean_token_accuracy": 0.7012999951839447, |
| "num_tokens": 54867398.0, |
| "rewards/accuracies": 0.8625, |
| "rewards/chosen": -6.10805311203003, |
| "rewards/margins": 4.206076884269715, |
| "rewards/rejected": -10.314130067825317, |
| "setc/cal_net_lr": 0.0006714454081570259, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.06854588873684406, |
| "setc/logratio_margin_delta": 26.507701110839843, |
| "setc/logratio_margin_vanilla": 394.1, |
| "setc/logratio_margin_weighted": 420.60770111083986, |
| "step": 1540 |
| }, |
| { |
| "entropy": 0.46708984375, |
| "epoch": 0.8353543519267044, |
| "grad_norm": 66.6949234008789, |
| "learning_rate": 8.109213581178897e-08, |
| "logits/chosen": -1.519720828447748, |
| "logits/rejected": -1.5808756968990825, |
| "logps/chosen": -617.7, |
| "logps/rejected": -967.2, |
| "loss": 0.3575, |
| "mean_token_accuracy": 0.7194855481386184, |
| "num_tokens": 55239887.0, |
| "rewards/accuracies": 0.9, |
| "rewards/chosen": -5.524090123176575, |
| "rewards/margins": 3.7780436635017396, |
| "rewards/rejected": -9.302133798599243, |
| "setc/cal_net_lr": 0.0006891373939091811, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.0681806318461895, |
| "setc/logratio_margin_delta": 26.4543701171875, |
| "setc/logratio_margin_vanilla": 351.35, |
| "setc/logratio_margin_weighted": 377.8043701171875, |
| "step": 1550 |
| }, |
| { |
| "entropy": 0.4662109375, |
| "epoch": 0.8407437348423605, |
| "grad_norm": 27.215456008911133, |
| "learning_rate": 7.603134343526502e-08, |
| "logits/chosen": -1.5236826772256906, |
| "logits/rejected": -1.570188853218213, |
| "logps/chosen": -587.4, |
| "logps/rejected": -975.4, |
| "loss": 0.233, |
| "mean_token_accuracy": 0.7267722725868225, |
| "num_tokens": 55609384.0, |
| "rewards/accuracies": 0.90625, |
| "rewards/chosen": -5.191406297683716, |
| "rewards/margins": 4.120009970664978, |
| "rewards/rejected": -9.311416292190552, |
| "setc/cal_net_lr": 0.0007065571318661164, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.06620657239109277, |
| "setc/logratio_margin_delta": 27.050999450683594, |
| "setc/logratio_margin_vanilla": 384.95, |
| "setc/logratio_margin_weighted": 412.0009994506836, |
| "step": 1560 |
| }, |
| { |
| "entropy": 0.46328125, |
| "epoch": 0.8461331177580167, |
| "grad_norm": 47.70095443725586, |
| "learning_rate": 7.112058438797858e-08, |
| "logits/chosen": -1.5296358009335131, |
| "logits/rejected": -1.5978488752687707, |
| "logps/chosen": -614.6, |
| "logps/rejected": -1019.2, |
| "loss": 0.2631, |
| "mean_token_accuracy": 0.7203928977251053, |
| "num_tokens": 55970954.0, |
| "rewards/accuracies": 0.903125, |
| "rewards/chosen": -5.481227231025696, |
| "rewards/margins": 4.321574485301971, |
| "rewards/rejected": -9.80280179977417, |
| "setc/cal_net_lr": 0.0007236795477410702, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.06795285735279322, |
| "setc/logratio_margin_delta": 27.957456970214842, |
| "setc/logratio_margin_vanilla": 404.2, |
| "setc/logratio_margin_weighted": 432.15745697021487, |
| "step": 1570 |
| }, |
| { |
| "entropy": 0.46845703125, |
| "epoch": 0.8515225006736729, |
| "grad_norm": 36.03289794921875, |
| "learning_rate": 6.636159648124557e-08, |
| "logits/chosen": -1.5813003009716322, |
| "logits/rejected": -1.6186931994037377, |
| "logps/chosen": -607.3, |
| "logps/rejected": -967.4, |
| "loss": 0.3036, |
| "mean_token_accuracy": 0.7082692325115204, |
| "num_tokens": 56337079.0, |
| "rewards/accuracies": 0.85, |
| "rewards/chosen": -5.434537315368653, |
| "rewards/margins": 3.795243227481842, |
| "rewards/rejected": -9.229780340194703, |
| "setc/cal_net_lr": 0.0007404799952180797, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.06792235635221004, |
| "setc/logratio_margin_delta": 25.124325561523438, |
| "setc/logratio_margin_vanilla": 354.4, |
| "setc/logratio_margin_weighted": 379.5243255615234, |
| "step": 1580 |
| }, |
| { |
| "entropy": 0.4748046875, |
| "epoch": 0.856911883589329, |
| "grad_norm": 24.535364151000977, |
| "learning_rate": 6.175606381786069e-08, |
| "logits/chosen": -1.5430637306828987, |
| "logits/rejected": -1.6079818626695932, |
| "logps/chosen": -650.9, |
| "logps/rejected": -1084.8, |
| "loss": 0.2913, |
| "mean_token_accuracy": 0.7224538773298264, |
| "num_tokens": 56731554.0, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -5.72764368057251, |
| "rewards/margins": 4.639845299720764, |
| "rewards/rejected": -10.367488980293274, |
| "setc/cal_net_lr": 0.0007569342914283324, |
| "setc/credit_mean": 0.9999999970197677, |
| "setc/credit_std": 0.06582386922091246, |
| "setc/logratio_margin_delta": 29.334539794921874, |
| "setc/logratio_margin_vanilla": 434.65, |
| "setc/logratio_margin_weighted": 463.9845397949219, |
| "step": 1590 |
| }, |
| { |
| "entropy": 0.48642578125, |
| "epoch": 0.8623012665049852, |
| "grad_norm": 79.2051773071289, |
| "learning_rate": 5.7305616196130334e-08, |
| "logits/chosen": -1.6237664047229114, |
| "logits/rejected": -1.6430690136847275, |
| "logps/chosen": -653.3, |
| "logps/rejected": -1039.8, |
| "loss": 0.2893, |
| "mean_token_accuracy": 0.7086077839136123, |
| "num_tokens": 57110285.0, |
| "rewards/accuracies": 0.85625, |
| "rewards/chosen": -5.711839783191681, |
| "rewards/margins": 4.166626226902008, |
| "rewards/rejected": -9.87846598625183, |
| "setc/cal_net_lr": 0.0007730187517594288, |
| "setc/credit_mean": 0.9999999970197677, |
| "setc/credit_std": 0.06541631631553173, |
| "setc/logratio_margin_delta": 25.612635803222656, |
| "setc/logratio_margin_vanilla": 391.05, |
| "setc/logratio_margin_weighted": 416.66263580322266, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.8623012665049852, |
| "eval_entropy": 0.4923095703125, |
| "eval_logits/chosen": -1.603833879515406, |
| "eval_logits/rejected": -1.6448864097538396, |
| "eval_logps/chosen": -600.5, |
| "eval_logps/rejected": -1020.375, |
| "eval_loss": 0.24327774345874786, |
| "eval_mean_token_accuracy": 0.7129223179072142, |
| "eval_num_tokens": 57110285.0, |
| "eval_rewards/accuracies": 0.916015625, |
| "eval_rewards/chosen": -5.2806950733065605, |
| "eval_rewards/margins": 4.483514308929443, |
| "eval_rewards/rejected": -9.764209300279617, |
| "eval_runtime": 14.2982, |
| "eval_samples_per_second": 34.969, |
| "eval_setc/cal_net_lr": 0.0007813240527369953, |
| "eval_setc/credit_mean": 1.0, |
| "eval_setc/credit_std": 0.06434770347550511, |
| "eval_setc/logratio_margin_delta": 26.474138259887695, |
| "eval_setc/logratio_margin_vanilla": 423.375, |
| "eval_setc/logratio_margin_weighted": 449.8491382598877, |
| "eval_steps_per_second": 2.238, |
| "step": 1600 |
| }, |
| { |
| "entropy": 0.48271484375, |
| "epoch": 0.8676906494206413, |
| "grad_norm": 70.79142761230469, |
| "learning_rate": 5.3011828533122024e-08, |
| "logits/chosen": -1.6149875326285763, |
| "logits/rejected": -1.6959871557083672, |
| "logps/chosen": -639.3, |
| "logps/rejected": -1038.2, |
| "loss": 0.2599, |
| "mean_token_accuracy": 0.713002547621727, |
| "num_tokens": 57472462.0, |
| "rewards/accuracies": 0.878125, |
| "rewards/chosen": -5.632159161567688, |
| "rewards/margins": 4.25579309463501, |
| "rewards/rejected": -9.887952256202698, |
| "setc/cal_net_lr": 0.0007887102239474405, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.06394433472305536, |
| "setc/logratio_margin_delta": 26.4293212890625, |
| "setc/logratio_margin_vanilla": 399.15, |
| "setc/logratio_margin_weighted": 425.5793212890625, |
| "step": 1610 |
| }, |
| { |
| "entropy": 0.475, |
| "epoch": 0.8730800323362975, |
| "grad_norm": 39.525123596191406, |
| "learning_rate": 4.887622030733507e-08, |
| "logits/chosen": -1.5679981820658682, |
| "logits/rejected": -1.6499297438183103, |
| "logps/chosen": -620.7, |
| "logps/rejected": -1020.0, |
| "loss": 0.2992, |
| "mean_token_accuracy": 0.7138593286275864, |
| "num_tokens": 57826454.0, |
| "rewards/accuracies": 0.90625, |
| "rewards/chosen": -5.5272397756576535, |
| "rewards/margins": 4.316784369945526, |
| "rewards/rejected": -9.84402413368225, |
| "setc/cal_net_lr": 0.0008039861214027002, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.060222255811095235, |
| "setc/logratio_margin_delta": 27.97843780517578, |
| "setc/logratio_margin_vanilla": 403.7, |
| "setc/logratio_margin_weighted": 431.6784378051758, |
| "step": 1620 |
| }, |
| { |
| "entropy": 0.4849609375, |
| "epoch": 0.8784694152519537, |
| "grad_norm": 50.614253997802734, |
| "learning_rate": 4.4900255020990764e-08, |
| "logits/chosen": -1.554491747697552, |
| "logits/rejected": -1.6372942528114076, |
| "logps/chosen": -619.1, |
| "logps/rejected": -1022.0, |
| "loss": 0.3179, |
| "mean_token_accuracy": 0.7143799781799316, |
| "num_tokens": 58225092.0, |
| "rewards/accuracies": 0.871875, |
| "rewards/chosen": -5.4253825664520265, |
| "rewards/margins": 4.251791071891785, |
| "rewards/rejected": -9.677173686027526, |
| "setc/cal_net_lr": 0.0008188244557213483, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.05570842530578375, |
| "setc/logratio_margin_delta": 23.57911911010742, |
| "setc/logratio_margin_vanilla": 401.6, |
| "setc/logratio_margin_weighted": 425.1791191101074, |
| "step": 1630 |
| }, |
| { |
| "entropy": 0.47099609375, |
| "epoch": 0.8838587981676098, |
| "grad_norm": 33.20830535888672, |
| "learning_rate": 4.108533968212963e-08, |
| "logits/chosen": -1.5388461767468533, |
| "logits/rejected": -1.583654628206283, |
| "logps/chosen": -595.7, |
| "logps/rejected": -1035.6, |
| "loss": 0.2626, |
| "mean_token_accuracy": 0.713904058933258, |
| "num_tokens": 58584790.0, |
| "rewards/accuracies": 0.896875, |
| "rewards/chosen": -5.293009197711944, |
| "rewards/margins": 4.6426942348480225, |
| "rewards/rejected": -9.93570351600647, |
| "setc/cal_net_lr": 0.000833203868335841, |
| "setc/credit_mean": 0.9999999970197677, |
| "setc/credit_std": 0.05715073775500059, |
| "setc/logratio_margin_delta": 29.46942901611328, |
| "setc/logratio_margin_vanilla": 434.8, |
| "setc/logratio_margin_weighted": 464.2694290161133, |
| "step": 1640 |
| }, |
| { |
| "entropy": 0.4619140625, |
| "epoch": 0.889248181083266, |
| "grad_norm": 49.0516242980957, |
| "learning_rate": 3.743282430670286e-08, |
| "logits/chosen": -1.5955134523755936, |
| "logits/rejected": -1.631971502049342, |
| "logps/chosen": -559.5, |
| "logps/rejected": -978.0, |
| "loss": 0.278, |
| "mean_token_accuracy": 0.7260301500558853, |
| "num_tokens": 58947907.0, |
| "rewards/accuracies": 0.88125, |
| "rewards/chosen": -4.951177608966828, |
| "rewards/margins": 4.426893401145935, |
| "rewards/rejected": -9.378070974349976, |
| "setc/cal_net_lr": 0.0008471036612588582, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.057837378792464735, |
| "setc/logratio_margin_delta": 26.0393424987793, |
| "setc/logratio_margin_vanilla": 416.65, |
| "setc/logratio_margin_weighted": 442.6893424987793, |
| "step": 1650 |
| }, |
| { |
| "entropy": 0.476953125, |
| "epoch": 0.8946375639989221, |
| "grad_norm": 61.91923141479492, |
| "learning_rate": 3.3944001440829986e-08, |
| "logits/chosen": -1.5750558117502387, |
| "logits/rejected": -1.657317549975772, |
| "logps/chosen": -623.0, |
| "logps/rejected": -1002.8, |
| "loss": 0.3331, |
| "mean_token_accuracy": 0.7019858151674271, |
| "num_tokens": 59303281.0, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -5.602843141555786, |
| "rewards/margins": 4.078434026241302, |
| "rewards/rejected": -9.681277179718018, |
| "setc/cal_net_lr": 0.0008605038268763623, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.059012738056480885, |
| "setc/logratio_margin_delta": 26.393409729003906, |
| "setc/logratio_margin_vanilla": 381.45, |
| "setc/logratio_margin_weighted": 407.8434097290039, |
| "step": 1660 |
| }, |
| { |
| "entropy": 0.4888671875, |
| "epoch": 0.9000269469145783, |
| "grad_norm": 40.752716064453125, |
| "learning_rate": 3.0620105703395075e-08, |
| "logits/chosen": -1.6270705285389657, |
| "logits/rejected": -1.6553936166815, |
| "logps/chosen": -588.7, |
| "logps/rejected": -1051.8, |
| "loss": 0.2932, |
| "mean_token_accuracy": 0.7158574312925339, |
| "num_tokens": 59687228.0, |
| "rewards/accuracies": 0.88125, |
| "rewards/chosen": -5.1385871052742, |
| "rewards/margins": 4.8742870688438416, |
| "rewards/rejected": -10.012874150276184, |
| "setc/cal_net_lr": 0.0008733850767469216, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.05472625363618135, |
| "setc/logratio_margin_delta": 24.0787109375, |
| "setc/logratio_margin_vanilla": 463.35, |
| "setc/logratio_margin_weighted": 487.4287109375, |
| "step": 1670 |
| }, |
| { |
| "entropy": 0.47392578125, |
| "epoch": 0.9054163298302345, |
| "grad_norm": 57.972511291503906, |
| "learning_rate": 2.7462313349142063e-08, |
| "logits/chosen": -1.5235790048606868, |
| "logits/rejected": -1.5691862969507655, |
| "logps/chosen": -614.3, |
| "logps/rejected": -1054.2, |
| "loss": 0.2667, |
| "mean_token_accuracy": 0.7157090991735459, |
| "num_tokens": 60046942.0, |
| "rewards/accuracies": 0.878125, |
| "rewards/chosen": -5.51959844827652, |
| "rewards/margins": 4.6187913656234745, |
| "rewards/rejected": -10.138389611244202, |
| "setc/cal_net_lr": 0.0008857288693658431, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.05935143679380417, |
| "setc/logratio_margin_delta": 24.97913818359375, |
| "setc/logratio_margin_vanilla": 436.9, |
| "setc/logratio_margin_weighted": 461.87913818359374, |
| "step": 1680 |
| }, |
| { |
| "entropy": 0.478125, |
| "epoch": 0.9108057127458906, |
| "grad_norm": 56.78584289550781, |
| "learning_rate": 2.4471741852423233e-08, |
| "logits/chosen": -1.6328370435487536, |
| "logits/rejected": -1.6685099427783272, |
| "logps/chosen": -598.0, |
| "logps/rejected": -951.4, |
| "loss": 0.3008, |
| "mean_token_accuracy": 0.7114303112030029, |
| "num_tokens": 60420768.0, |
| "rewards/accuracies": 0.88125, |
| "rewards/chosen": -5.270635271072388, |
| "rewards/margins": 3.7526545763015746, |
| "rewards/rejected": -9.023289966583253, |
| "setc/cal_net_lr": 0.0008975174368541483, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.06038776412606239, |
| "setc/logratio_margin_delta": 21.315467834472656, |
| "setc/logratio_margin_vanilla": 353.95, |
| "setc/logratio_margin_weighted": 375.26546783447264, |
| "step": 1690 |
| }, |
| { |
| "entropy": 0.4927734375, |
| "epoch": 0.9161950956615468, |
| "grad_norm": 57.52949142456055, |
| "learning_rate": 2.1649449511749796e-08, |
| "logits/chosen": -1.5933153416721983, |
| "logits/rejected": -1.6209732453760946, |
| "logps/chosen": -600.1, |
| "logps/rejected": -1020.8, |
| "loss": 0.2691, |
| "mean_token_accuracy": 0.7082908779382706, |
| "num_tokens": 60787745.0, |
| "rewards/accuracies": 0.890625, |
| "rewards/chosen": -5.318318796157837, |
| "rewards/margins": 4.3899754106998445, |
| "rewards/rejected": -9.708294129371643, |
| "setc/cal_net_lr": 0.0009087338105339832, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.06079798210412264, |
| "setc/logratio_margin_delta": 25.297548675537108, |
| "setc/logratio_margin_vanilla": 413.7, |
| "setc/logratio_margin_weighted": 438.99754867553713, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.9161950956615468, |
| "eval_entropy": 0.49981689453125, |
| "eval_logits/chosen": -1.64129217363471, |
| "eval_logits/rejected": -1.6824805209804925, |
| "eval_logps/chosen": -589.3125, |
| "eval_logps/rejected": -1005.5, |
| "eval_loss": 0.24332645535469055, |
| "eval_mean_token_accuracy": 0.7128801997750998, |
| "eval_num_tokens": 60787745.0, |
| "eval_rewards/accuracies": 0.916015625, |
| "eval_rewards/chosen": -5.122073702514172, |
| "eval_rewards/margins": 4.437938891351223, |
| "eval_rewards/rejected": -9.560012698173523, |
| "eval_runtime": 14.1962, |
| "eval_samples_per_second": 35.221, |
| "eval_setc/cal_net_lr": 0.0009144126721245054, |
| "eval_setc/credit_mean": 1.0, |
| "eval_setc/credit_std": 0.05909399001393467, |
| "eval_setc/logratio_margin_delta": 25.14619731903076, |
| "eval_setc/logratio_margin_vanilla": 420.25, |
| "eval_setc/logratio_margin_weighted": 445.39619731903076, |
| "eval_steps_per_second": 2.254, |
| "step": 1700 |
| }, |
| { |
| "entropy": 0.47744140625, |
| "epoch": 0.9215844785772029, |
| "grad_norm": 32.23965072631836, |
| "learning_rate": 1.8996435075282425e-08, |
| "logits/chosen": -1.589121973571888, |
| "logits/rejected": -1.6433100488728811, |
| "logps/chosen": -585.4, |
| "logps/rejected": -1005.6, |
| "loss": 0.2847, |
| "mean_token_accuracy": 0.7224965184926987, |
| "num_tokens": 61140539.0, |
| "rewards/accuracies": 0.896875, |
| "rewards/chosen": -5.167207801342011, |
| "rewards/margins": 4.406006598472596, |
| "rewards/rejected": -9.573214316368103, |
| "setc/cal_net_lr": 0.0009193618453536389, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.05848235320299864, |
| "setc/logratio_margin_delta": 24.300669860839843, |
| "setc/logratio_margin_vanilla": 416.3, |
| "setc/logratio_margin_weighted": 440.60066986083984, |
| "step": 1710 |
| }, |
| { |
| "entropy": 0.49404296875, |
| "epoch": 0.9269738614928591, |
| "grad_norm": 104.77435302734375, |
| "learning_rate": 1.6513637387396138e-08, |
| "logits/chosen": -1.5879688472104307, |
| "logits/rejected": -1.6660038253212073, |
| "logps/chosen": -603.2, |
| "logps/rejected": -1010.6, |
| "loss": 0.3055, |
| "mean_token_accuracy": 0.7246152043342591, |
| "num_tokens": 61515724.0, |
| "rewards/accuracies": 0.88125, |
| "rewards/chosen": -5.219302845001221, |
| "rewards/margins": 4.304101645946503, |
| "rewards/rejected": -9.5234046459198, |
| "setc/cal_net_lr": 0.0009293862431270326, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.060384664312005044, |
| "setc/logratio_margin_delta": 24.260169982910156, |
| "setc/logratio_margin_vanilla": 406.15, |
| "setc/logratio_margin_weighted": 430.41016998291013, |
| "step": 1720 |
| }, |
| { |
| "entropy": 0.47578125, |
| "epoch": 0.9323632444085153, |
| "grad_norm": 62.20964813232422, |
| "learning_rate": 1.4201935056443681e-08, |
| "logits/chosen": -1.6094559658891932, |
| "logits/rejected": -1.6524878440333168, |
| "logps/chosen": -615.5, |
| "logps/rejected": -1023.2, |
| "loss": 0.2376, |
| "mean_token_accuracy": 0.714499345421791, |
| "num_tokens": 61880478.0, |
| "rewards/accuracies": 0.9125, |
| "rewards/chosen": -5.445954358577728, |
| "rewards/margins": 4.328825449943542, |
| "rewards/rejected": -9.774779748916625, |
| "setc/cal_net_lr": 0.000938792574554196, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.05732750911265612, |
| "setc/logratio_margin_delta": 26.48255157470703, |
| "setc/logratio_margin_vanilla": 406.4, |
| "setc/logratio_margin_weighted": 432.882551574707, |
| "step": 1730 |
| }, |
| { |
| "entropy": 0.47080078125, |
| "epoch": 0.9377526273241714, |
| "grad_norm": 26.240835189819336, |
| "learning_rate": 1.2062146143834939e-08, |
| "logits/chosen": -1.6424857519172662, |
| "logits/rejected": -1.6840320911940327, |
| "logps/chosen": -589.7, |
| "logps/rejected": -973.6, |
| "loss": 0.3045, |
| "mean_token_accuracy": 0.710444837808609, |
| "num_tokens": 62237198.0, |
| "rewards/accuracies": 0.865625, |
| "rewards/chosen": -5.231074094772339, |
| "rewards/margins": 4.04384742975235, |
| "rewards/rejected": -9.274921464920045, |
| "setc/cal_net_lr": 0.0009475672999910659, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.05614407397806644, |
| "setc/logratio_margin_delta": 23.08474655151367, |
| "setc/logratio_margin_vanilla": 381.3, |
| "setc/logratio_margin_weighted": 404.3847465515137, |
| "step": 1740 |
| }, |
| { |
| "entropy": 0.47841796875, |
| "epoch": 0.9431420102398276, |
| "grad_norm": 25.031240463256836, |
| "learning_rate": 1.009502787454264e-08, |
| "logits/chosen": -1.628094637586519, |
| "logits/rejected": -1.6946888854450202, |
| "logps/chosen": -561.6, |
| "logps/rejected": -962.4, |
| "loss": 0.2709, |
| "mean_token_accuracy": 0.7199163377285004, |
| "num_tokens": 62613336.0, |
| "rewards/accuracies": 0.871875, |
| "rewards/chosen": -4.926135575771331, |
| "rewards/margins": 4.213748967647552, |
| "rewards/rejected": -9.13988437652588, |
| "setc/cal_net_lr": 0.0009556977889387003, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.05476319156587124, |
| "setc/logratio_margin_delta": 26.124908447265625, |
| "setc/logratio_margin_vanilla": 395.25, |
| "setc/logratio_margin_weighted": 421.3749084472656, |
| "step": 1750 |
| }, |
| { |
| "entropy": 0.47578125, |
| "epoch": 0.9485313931554837, |
| "grad_norm": 33.9154052734375, |
| "learning_rate": 8.301276369136912e-09, |
| "logits/chosen": -1.5798610346038533, |
| "logits/rejected": -1.6392516028325452, |
| "logps/chosen": -602.1, |
| "logps/rejected": -1041.4, |
| "loss": 0.2397, |
| "mean_token_accuracy": 0.7180195420980453, |
| "num_tokens": 63005622.0, |
| "rewards/accuracies": 0.9, |
| "rewards/chosen": -5.272761929035187, |
| "rewards/margins": 4.6579270362854, |
| "rewards/rejected": -9.930689001083374, |
| "setc/cal_net_lr": 0.0009631723382238432, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.056856663711369035, |
| "setc/logratio_margin_delta": 27.44272232055664, |
| "setc/logratio_margin_vanilla": 438.35, |
| "setc/logratio_margin_weighted": 465.79272232055666, |
| "step": 1760 |
| }, |
| { |
| "entropy": 0.48935546875, |
| "epoch": 0.9539207760711399, |
| "grad_norm": 71.26800537109375, |
| "learning_rate": 6.68152639744346e-09, |
| "logits/chosen": -1.650358383302045, |
| "logits/rejected": -1.7143002367810154, |
| "logps/chosen": -613.1, |
| "logps/rejected": -987.6, |
| "loss": 0.2931, |
| "mean_token_accuracy": 0.7065669685602188, |
| "num_tokens": 63373491.0, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -5.3662453413009645, |
| "rewards/margins": 3.9946064949035645, |
| "rewards/rejected": -9.360851907730103, |
| "setc/cal_net_lr": 0.0009699801888446867, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.05508008189499378, |
| "setc/logratio_margin_delta": 24.06066436767578, |
| "setc/logratio_margin_vanilla": 375.4, |
| "setc/logratio_margin_weighted": 399.4606643676758, |
| "step": 1770 |
| }, |
| { |
| "entropy": 0.46416015625, |
| "epoch": 0.9593101589867961, |
| "grad_norm": 25.516454696655273, |
| "learning_rate": 5.236351153911855e-09, |
| "logits/chosen": -1.7053886135754528, |
| "logits/rejected": -1.753958930302091, |
| "logps/chosen": -552.3, |
| "logps/rejected": -933.8, |
| "loss": 0.2468, |
| "mean_token_accuracy": 0.7095615237951278, |
| "num_tokens": 63708180.0, |
| "rewards/accuracies": 0.884375, |
| "rewards/chosen": -4.928412961959839, |
| "rewards/margins": 4.00942040681839, |
| "rewards/rejected": -8.937833166122436, |
| "setc/cal_net_lr": 0.0009761115414575713, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.05466606095433235, |
| "setc/logratio_margin_delta": 26.192051696777344, |
| "setc/logratio_margin_vanilla": 374.75, |
| "setc/logratio_margin_weighted": 400.94205169677736, |
| "step": 1780 |
| }, |
| { |
| "entropy": 0.46875, |
| "epoch": 0.9646995419024522, |
| "grad_norm": 46.8420295715332, |
| "learning_rate": 3.966262054774794e-09, |
| "logits/chosen": -1.686366542637256, |
| "logits/rejected": -1.7361481268819614, |
| "logps/chosen": -585.3, |
| "logps/rejected": -962.2, |
| "loss": 0.2608, |
| "mean_token_accuracy": 0.7172308325767517, |
| "num_tokens": 64063107.0, |
| "rewards/accuracies": 0.8875, |
| "rewards/chosen": -5.106739115715027, |
| "rewards/margins": 4.049427050352096, |
| "rewards/rejected": -9.156166195869446, |
| "setc/cal_net_lr": 0.0009815575704823403, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.055073692835867404, |
| "setc/logratio_margin_delta": 27.29270706176758, |
| "setc/logratio_margin_vanilla": 377.65, |
| "setc/logratio_margin_weighted": 404.9427070617676, |
| "step": 1790 |
| }, |
| { |
| "entropy": 0.47333984375, |
| "epoch": 0.9700889248181084, |
| "grad_norm": 41.61103820800781, |
| "learning_rate": 2.8717085570689194e-09, |
| "logits/chosen": -1.6285714864267278, |
| "logits/rejected": -1.7007568628321508, |
| "logps/chosen": -543.7, |
| "logps/rejected": -972.0, |
| "loss": 0.2488, |
| "mean_token_accuracy": 0.7220604807138443, |
| "num_tokens": 64415287.0, |
| "rewards/accuracies": 0.915625, |
| "rewards/chosen": -4.766039276123047, |
| "rewards/margins": 4.599205017089844, |
| "rewards/rejected": -9.365244293212891, |
| "setc/cal_net_lr": 0.0009863104368060393, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.0533838925883174, |
| "setc/logratio_margin_delta": 31.27052230834961, |
| "setc/logratio_margin_vanilla": 428.65, |
| "setc/logratio_margin_weighted": 459.92052230834963, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.9700889248181084, |
| "eval_entropy": 0.496337890625, |
| "eval_logits/chosen": -1.6497335267505044, |
| "eval_logits/rejected": -1.691259456206689, |
| "eval_logps/chosen": -595.0, |
| "eval_logps/rejected": -1017.0, |
| "eval_loss": 0.242402583360672, |
| "eval_mean_token_accuracy": 0.7130081094801426, |
| "eval_num_tokens": 64415287.0, |
| "eval_rewards/accuracies": 0.916015625, |
| "eval_rewards/chosen": -5.198269315063953, |
| "eval_rewards/margins": 4.524538166821003, |
| "eval_rewards/rejected": -9.722807422280312, |
| "eval_runtime": 14.196, |
| "eval_samples_per_second": 35.221, |
| "eval_setc/cal_net_lr": 0.0009885550958571609, |
| "eval_setc/credit_mean": 1.0, |
| "eval_setc/credit_std": 0.05235194368287921, |
| "eval_setc/logratio_margin_delta": 28.659388542175293, |
| "eval_setc/logratio_margin_vanilla": 425.4375, |
| "eval_setc/logratio_margin_weighted": 454.0968885421753, |
| "eval_steps_per_second": 2.254, |
| "step": 1800 |
| }, |
| { |
| "entropy": 0.49453125, |
| "epoch": 0.9754783077337645, |
| "grad_norm": 58.74081039428711, |
| "learning_rate": 1.953077999581498e-09, |
| "logits/chosen": -1.5973200334126898, |
| "logits/rejected": -1.6452927645084596, |
| "logps/chosen": -579.2, |
| "logps/rejected": -1085.2, |
| "loss": 0.2545, |
| "mean_token_accuracy": 0.7099454998970032, |
| "num_tokens": 64753784.0, |
| "rewards/accuracies": 0.915625, |
| "rewards/chosen": -5.223498034477234, |
| "rewards/margins": 5.342882573604584, |
| "rewards/rejected": -10.566380524635315, |
| "setc/cal_net_lr": 0.0009903632990666783, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.05157312601804733, |
| "setc/logratio_margin_delta": 32.93826675415039, |
| "setc/logratio_margin_vanilla": 501.35, |
| "setc/logratio_margin_weighted": 534.2882667541504, |
| "step": 1810 |
| }, |
| { |
| "entropy": 0.4802734375, |
| "epoch": 0.9808676906494206, |
| "grad_norm": 35.12337112426758, |
| "learning_rate": 1.210695465779732e-09, |
| "logits/chosen": -1.6434376954136094, |
| "logits/rejected": -1.6868111194134685, |
| "logps/chosen": -599.4, |
| "logps/rejected": -951.4, |
| "loss": 0.31, |
| "mean_token_accuracy": 0.7225580751895905, |
| "num_tokens": 65114010.0, |
| "rewards/accuracies": 0.88125, |
| "rewards/chosen": -5.201381134986877, |
| "rewards/margins": 3.772454595565796, |
| "rewards/rejected": -8.973835754394532, |
| "setc/cal_net_lr": 0.0009937103235008124, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.04924789797514677, |
| "setc/logratio_margin_delta": 25.745468139648438, |
| "setc/logratio_margin_vanilla": 351.5, |
| "setc/logratio_margin_weighted": 377.24546813964844, |
| "step": 1820 |
| }, |
| { |
| "entropy": 0.48134765625, |
| "epoch": 0.9862570735650767, |
| "grad_norm": 41.82625198364258, |
| "learning_rate": 6.44823668770833e-10, |
| "logits/chosen": -1.6893693610067781, |
| "logits/rejected": -1.7599484026922272, |
| "logps/chosen": -573.5, |
| "logps/rejected": -927.0, |
| "loss": 0.2913, |
| "mean_token_accuracy": 0.7189024925231934, |
| "num_tokens": 65453240.0, |
| "rewards/accuracies": 0.871875, |
| "rewards/chosen": -4.990872526168824, |
| "rewards/margins": 3.8168386936187746, |
| "rewards/rejected": -8.807711195945739, |
| "setc/cal_net_lr": 0.000996346692340764, |
| "setc/credit_mean": 0.9999999970197677, |
| "setc/credit_std": 0.04604421444237232, |
| "setc/logratio_margin_delta": 27.133870697021486, |
| "setc/logratio_margin_vanilla": 354.55, |
| "setc/logratio_margin_weighted": 381.68387069702146, |
| "step": 1830 |
| }, |
| { |
| "entropy": 0.4810546875, |
| "epoch": 0.9916464564807329, |
| "grad_norm": 64.11349487304688, |
| "learning_rate": 2.556628583335052e-10, |
| "logits/chosen": -1.6629026317813094, |
| "logits/rejected": -1.702124887555161, |
| "logps/chosen": -631.7, |
| "logps/rejected": -1014.0, |
| "loss": 0.2856, |
| "mean_token_accuracy": 0.7035313218832016, |
| "num_tokens": 65813540.0, |
| "rewards/accuracies": 0.859375, |
| "rewards/chosen": -5.657264351844788, |
| "rewards/margins": 4.0819092988967896, |
| "rewards/rejected": -9.739173555374146, |
| "setc/cal_net_lr": 0.0009982686107494103, |
| "setc/credit_mean": 1.0, |
| "setc/credit_std": 0.04834742061793804, |
| "setc/logratio_margin_delta": 25.690938568115236, |
| "setc/logratio_margin_vanilla": 382.5, |
| "setc/logratio_margin_weighted": 408.19093856811526, |
| "step": 1840 |
| }, |
| { |
| "entropy": 0.474609375, |
| "epoch": 0.9970358393963891, |
| "grad_norm": 32.58039855957031, |
| "learning_rate": 4.3350750053627874e-11, |
| "logits/chosen": -1.6136593090507105, |
| "logits/rejected": -1.6928662572741515, |
| "logps/chosen": -604.8, |
| "logps/rejected": -1012.2, |
| "loss": 0.294, |
| "mean_token_accuracy": 0.712560287117958, |
| "num_tokens": 66170915.0, |
| "rewards/accuracies": 0.846875, |
| "rewards/chosen": -5.385611450672149, |
| "rewards/margins": 4.348533475399018, |
| "rewards/rejected": -9.734145045280457, |
| "setc/cal_net_lr": 0.0009994733122825335, |
| "setc/credit_mean": 0.9999999970197677, |
| "setc/credit_std": 0.04455969799309969, |
| "setc/logratio_margin_delta": 27.10336227416992, |
| "setc/logratio_margin_vanilla": 407.75, |
| "setc/logratio_margin_weighted": 434.8533622741699, |
| "step": 1850 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 1856, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 100, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 4.3990617254086574e+18, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|