| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 100, | |
| "global_step": 2500, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0004, | |
| "grad_norm": 7034011.299815918, | |
| "learning_rate": 2e-09, | |
| "logits/chosen": -2.3609464168548584, | |
| "logits/rejected": -2.4021644592285156, | |
| "logps/chosen": -72.32479858398438, | |
| "logps/rejected": -106.78115844726562, | |
| "loss": 138817.4219, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.004, | |
| "grad_norm": 5234909.800991719, | |
| "learning_rate": 2e-08, | |
| "logits/chosen": -2.3249125480651855, | |
| "logits/rejected": -2.3054325580596924, | |
| "logps/chosen": -70.72610473632812, | |
| "logps/rejected": -68.99564361572266, | |
| "loss": 125594.3333, | |
| "rewards/accuracies": 0.375, | |
| "rewards/chosen": 6.445489361794898e-06, | |
| "rewards/margins": -2.8922620913363062e-05, | |
| "rewards/rejected": 3.536810982041061e-05, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.008, | |
| "grad_norm": 6739339.394495674, | |
| "learning_rate": 4e-08, | |
| "logits/chosen": -2.3423686027526855, | |
| "logits/rejected": -2.3319311141967773, | |
| "logps/chosen": -72.6821060180664, | |
| "logps/rejected": -76.68476867675781, | |
| "loss": 128657.6, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": 7.994120824150741e-05, | |
| "rewards/margins": 0.00015745378914289176, | |
| "rewards/rejected": -7.751256634946913e-05, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.012, | |
| "grad_norm": 6270796.093737289, | |
| "learning_rate": 6e-08, | |
| "logits/chosen": -2.3667407035827637, | |
| "logits/rejected": -2.366872549057007, | |
| "logps/chosen": -86.75081634521484, | |
| "logps/rejected": -96.1201171875, | |
| "loss": 129234.2, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -9.695839253254235e-05, | |
| "rewards/margins": -2.3627610062249005e-05, | |
| "rewards/rejected": -7.333078247029334e-05, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.016, | |
| "grad_norm": 6487964.338687279, | |
| "learning_rate": 8e-08, | |
| "logits/chosen": -2.330949068069458, | |
| "logits/rejected": -2.304487466812134, | |
| "logps/chosen": -70.66746520996094, | |
| "logps/rejected": -76.26786804199219, | |
| "loss": 132677.1375, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -0.00011891069880221039, | |
| "rewards/margins": 2.9055625418550335e-05, | |
| "rewards/rejected": -0.00014796630421187729, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "grad_norm": 5808382.473785103, | |
| "learning_rate": 1e-07, | |
| "logits/chosen": -2.3761391639709473, | |
| "logits/rejected": -2.4001965522766113, | |
| "logps/chosen": -64.84712219238281, | |
| "logps/rejected": -85.47789001464844, | |
| "loss": 131065.9, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -0.00034615895128808916, | |
| "rewards/margins": 0.00022984863608144224, | |
| "rewards/rejected": -0.0005760076455771923, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.024, | |
| "grad_norm": 4282982.05143524, | |
| "learning_rate": 1.2e-07, | |
| "logits/chosen": -2.3628551959991455, | |
| "logits/rejected": -2.325425386428833, | |
| "logps/chosen": -76.96721649169922, | |
| "logps/rejected": -81.25682067871094, | |
| "loss": 126675.9375, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -0.0004624463617801666, | |
| "rewards/margins": 0.0005581938894465566, | |
| "rewards/rejected": -0.001020640367642045, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.028, | |
| "grad_norm": 7692791.228759594, | |
| "learning_rate": 1.4e-07, | |
| "logits/chosen": -2.3956987857818604, | |
| "logits/rejected": -2.4127683639526367, | |
| "logps/chosen": -71.30229187011719, | |
| "logps/rejected": -74.56432342529297, | |
| "loss": 134539.0625, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.0008852133760228753, | |
| "rewards/margins": 9.680164657766e-05, | |
| "rewards/rejected": -0.0009820150444284081, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.032, | |
| "grad_norm": 5583562.2282863185, | |
| "learning_rate": 1.6e-07, | |
| "logits/chosen": -2.3956503868103027, | |
| "logits/rejected": -2.350037097930908, | |
| "logps/chosen": -75.2908706665039, | |
| "logps/rejected": -77.54324340820312, | |
| "loss": 125353.275, | |
| "rewards/accuracies": 0.44999998807907104, | |
| "rewards/chosen": -0.0011667849030345678, | |
| "rewards/margins": 0.00017681324970908463, | |
| "rewards/rejected": -0.0013435978908091784, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.036, | |
| "grad_norm": 7411307.114758692, | |
| "learning_rate": 1.8e-07, | |
| "logits/chosen": -2.3411900997161865, | |
| "logits/rejected": -2.32747220993042, | |
| "logps/chosen": -76.68790435791016, | |
| "logps/rejected": -77.229736328125, | |
| "loss": 134888.05, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.0010598390363156796, | |
| "rewards/margins": 0.00033332061138935387, | |
| "rewards/rejected": -0.001393159618601203, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 6105565.361340655, | |
| "learning_rate": 2e-07, | |
| "logits/chosen": -2.2321219444274902, | |
| "logits/rejected": -2.25978684425354, | |
| "logps/chosen": -69.29805755615234, | |
| "logps/rejected": -70.91548156738281, | |
| "loss": 128186.8375, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.0023287434596568346, | |
| "rewards/margins": 0.0005028151208534837, | |
| "rewards/rejected": -0.0028315584640949965, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.044, | |
| "grad_norm": 7344396.598489226, | |
| "learning_rate": 2.1999999999999998e-07, | |
| "logits/chosen": -2.1552886962890625, | |
| "logits/rejected": -2.167569637298584, | |
| "logps/chosen": -70.13446044921875, | |
| "logps/rejected": -86.2125015258789, | |
| "loss": 129394.525, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.005608140490949154, | |
| "rewards/margins": 0.004356575198471546, | |
| "rewards/rejected": -0.009964716620743275, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.048, | |
| "grad_norm": 9496198.88633965, | |
| "learning_rate": 2.4e-07, | |
| "logits/chosen": -1.9701220989227295, | |
| "logits/rejected": -1.9230693578720093, | |
| "logps/chosen": -110.21456146240234, | |
| "logps/rejected": -117.58609771728516, | |
| "loss": 129791.4125, | |
| "rewards/accuracies": 0.48750001192092896, | |
| "rewards/chosen": -0.01813032478094101, | |
| "rewards/margins": 0.003227741690352559, | |
| "rewards/rejected": -0.02135806903243065, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.052, | |
| "grad_norm": 10004391.66976001, | |
| "learning_rate": 2.6e-07, | |
| "logits/chosen": -2.015622854232788, | |
| "logits/rejected": -2.026458740234375, | |
| "logps/chosen": -100.22117614746094, | |
| "logps/rejected": -107.8635482788086, | |
| "loss": 129000.775, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.015937697142362595, | |
| "rewards/margins": 0.008421100676059723, | |
| "rewards/rejected": -0.024358797818422318, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.056, | |
| "grad_norm": 10735198.371540312, | |
| "learning_rate": 2.8e-07, | |
| "logits/chosen": -1.9458515644073486, | |
| "logits/rejected": -1.964914321899414, | |
| "logps/chosen": -100.13922882080078, | |
| "logps/rejected": -123.00533294677734, | |
| "loss": 132137.4875, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.024147292599081993, | |
| "rewards/margins": 0.011009057983756065, | |
| "rewards/rejected": -0.03515635430812836, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "grad_norm": 7516435.594198061, | |
| "learning_rate": 3e-07, | |
| "logits/chosen": -1.9916763305664062, | |
| "logits/rejected": -1.9970576763153076, | |
| "logps/chosen": -89.16957092285156, | |
| "logps/rejected": -104.74922180175781, | |
| "loss": 125561.5375, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.020500652492046356, | |
| "rewards/margins": 0.011713030748069286, | |
| "rewards/rejected": -0.03221368417143822, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.064, | |
| "grad_norm": 7027700.618512748, | |
| "learning_rate": 3.2e-07, | |
| "logits/chosen": -2.159398078918457, | |
| "logits/rejected": -2.1420938968658447, | |
| "logps/chosen": -76.12110900878906, | |
| "logps/rejected": -94.0234603881836, | |
| "loss": 124492.5625, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -0.014212280511856079, | |
| "rewards/margins": 0.01188136450946331, | |
| "rewards/rejected": -0.02609364315867424, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.068, | |
| "grad_norm": 8112303.913406089, | |
| "learning_rate": 3.4000000000000003e-07, | |
| "logits/chosen": -2.103625774383545, | |
| "logits/rejected": -2.0622053146362305, | |
| "logps/chosen": -101.08997344970703, | |
| "logps/rejected": -129.8938446044922, | |
| "loss": 120661.3375, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.013411534950137138, | |
| "rewards/margins": 0.01583944633603096, | |
| "rewards/rejected": -0.029250985011458397, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.072, | |
| "grad_norm": 8237392.210524994, | |
| "learning_rate": 3.6e-07, | |
| "logits/chosen": -2.1253855228424072, | |
| "logits/rejected": -2.123330593109131, | |
| "logps/chosen": -80.26612854003906, | |
| "logps/rejected": -116.00606536865234, | |
| "loss": 125813.875, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -0.014310337603092194, | |
| "rewards/margins": 0.027810264378786087, | |
| "rewards/rejected": -0.04212059825658798, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.076, | |
| "grad_norm": 8482656.343559477, | |
| "learning_rate": 3.7999999999999996e-07, | |
| "logits/chosen": -2.122274875640869, | |
| "logits/rejected": -2.0924127101898193, | |
| "logps/chosen": -70.27191162109375, | |
| "logps/rejected": -88.20128631591797, | |
| "loss": 122274.4125, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.011920216493308544, | |
| "rewards/margins": 0.012561318464577198, | |
| "rewards/rejected": -0.02448153682053089, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 15995751.957306068, | |
| "learning_rate": 4e-07, | |
| "logits/chosen": -2.16402006149292, | |
| "logits/rejected": -2.1568922996520996, | |
| "logps/chosen": -84.34500885009766, | |
| "logps/rejected": -106.30509185791016, | |
| "loss": 124034.3625, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.019195228815078735, | |
| "rewards/margins": 0.019908469170331955, | |
| "rewards/rejected": -0.03910370171070099, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.084, | |
| "grad_norm": 9976473.779353945, | |
| "learning_rate": 4.1999999999999995e-07, | |
| "logits/chosen": -2.1947314739227295, | |
| "logits/rejected": -2.155924081802368, | |
| "logps/chosen": -85.31925964355469, | |
| "logps/rejected": -116.8820571899414, | |
| "loss": 133085.9375, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.016604367643594742, | |
| "rewards/margins": 0.010993210598826408, | |
| "rewards/rejected": -0.027597576379776, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.088, | |
| "grad_norm": 7143746.706174395, | |
| "learning_rate": 4.3999999999999997e-07, | |
| "logits/chosen": -2.181243419647217, | |
| "logits/rejected": -2.1664962768554688, | |
| "logps/chosen": -74.75950622558594, | |
| "logps/rejected": -87.78418731689453, | |
| "loss": 127414.575, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.014525257050991058, | |
| "rewards/margins": 0.003328158985823393, | |
| "rewards/rejected": -0.01785341463983059, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.092, | |
| "grad_norm": 9204902.414337158, | |
| "learning_rate": 4.6e-07, | |
| "logits/chosen": -2.108741044998169, | |
| "logits/rejected": -2.048841953277588, | |
| "logps/chosen": -78.65644073486328, | |
| "logps/rejected": -95.38871765136719, | |
| "loss": 127270.9375, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.013477807864546776, | |
| "rewards/margins": 0.008071732707321644, | |
| "rewards/rejected": -0.021549541503190994, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.096, | |
| "grad_norm": 6495004.829819743, | |
| "learning_rate": 4.8e-07, | |
| "logits/chosen": -2.111128330230713, | |
| "logits/rejected": -2.0940356254577637, | |
| "logps/chosen": -92.46067810058594, | |
| "logps/rejected": -117.76658630371094, | |
| "loss": 122517.0, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.015996446833014488, | |
| "rewards/margins": 0.015014531090855598, | |
| "rewards/rejected": -0.031010976061224937, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "grad_norm": 3914167.3327231077, | |
| "learning_rate": 5e-07, | |
| "logits/chosen": -2.129955768585205, | |
| "logits/rejected": -2.1201798915863037, | |
| "logps/chosen": -91.17083740234375, | |
| "logps/rejected": -122.591064453125, | |
| "loss": 126768.9, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.017495278269052505, | |
| "rewards/margins": 0.019053593277931213, | |
| "rewards/rejected": -0.03654887527227402, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.104, | |
| "grad_norm": 9274304.06015198, | |
| "learning_rate": 4.977777777777777e-07, | |
| "logits/chosen": -2.1294829845428467, | |
| "logits/rejected": -2.1332812309265137, | |
| "logps/chosen": -86.34416198730469, | |
| "logps/rejected": -110.06596374511719, | |
| "loss": 123661.6875, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.019683022052049637, | |
| "rewards/margins": 0.017954688519239426, | |
| "rewards/rejected": -0.03763771429657936, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.108, | |
| "grad_norm": 5830092.205559107, | |
| "learning_rate": 4.955555555555556e-07, | |
| "logits/chosen": -2.1960532665252686, | |
| "logits/rejected": -2.2218282222747803, | |
| "logps/chosen": -94.33865356445312, | |
| "logps/rejected": -113.83499908447266, | |
| "loss": 125464.4375, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": -0.018877552822232246, | |
| "rewards/margins": 0.010512979701161385, | |
| "rewards/rejected": -0.02939053252339363, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.112, | |
| "grad_norm": 8438742.306721646, | |
| "learning_rate": 4.933333333333333e-07, | |
| "logits/chosen": -2.257341146469116, | |
| "logits/rejected": -2.3109583854675293, | |
| "logps/chosen": -94.39613342285156, | |
| "logps/rejected": -118.54805755615234, | |
| "loss": 126082.3375, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.021040180698037148, | |
| "rewards/margins": 0.01762349344789982, | |
| "rewards/rejected": -0.038663674145936966, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.116, | |
| "grad_norm": 7914235.455845386, | |
| "learning_rate": 4.91111111111111e-07, | |
| "logits/chosen": -2.327070713043213, | |
| "logits/rejected": -2.3570895195007324, | |
| "logps/chosen": -87.95598602294922, | |
| "logps/rejected": -108.05839538574219, | |
| "loss": 129599.7125, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.0221787728369236, | |
| "rewards/margins": 0.01672218181192875, | |
| "rewards/rejected": -0.0389009527862072, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "grad_norm": 6457742.625324568, | |
| "learning_rate": 4.888888888888889e-07, | |
| "logits/chosen": -2.4012951850891113, | |
| "logits/rejected": -2.4345765113830566, | |
| "logps/chosen": -74.74217224121094, | |
| "logps/rejected": -100.60356140136719, | |
| "loss": 124479.875, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.018593108281493187, | |
| "rewards/margins": 0.021032758057117462, | |
| "rewards/rejected": -0.0396258682012558, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.124, | |
| "grad_norm": 6578431.206476413, | |
| "learning_rate": 4.866666666666666e-07, | |
| "logits/chosen": -2.452791452407837, | |
| "logits/rejected": -2.4812235832214355, | |
| "logps/chosen": -95.68658447265625, | |
| "logps/rejected": -111.42750549316406, | |
| "loss": 126451.425, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.018242117017507553, | |
| "rewards/margins": 0.01176674384623766, | |
| "rewards/rejected": -0.030008861795067787, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.128, | |
| "grad_norm": 10851035.518032862, | |
| "learning_rate": 4.844444444444445e-07, | |
| "logits/chosen": -2.4537229537963867, | |
| "logits/rejected": -2.4691717624664307, | |
| "logps/chosen": -82.9326171875, | |
| "logps/rejected": -116.93620300292969, | |
| "loss": 123506.3125, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.02308080717921257, | |
| "rewards/margins": 0.024886813014745712, | |
| "rewards/rejected": -0.04796762019395828, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.132, | |
| "grad_norm": 9223772.443364851, | |
| "learning_rate": 4.822222222222222e-07, | |
| "logits/chosen": -2.391624927520752, | |
| "logits/rejected": -2.407311201095581, | |
| "logps/chosen": -91.67464447021484, | |
| "logps/rejected": -117.0147705078125, | |
| "loss": 121261.9125, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -0.023308029398322105, | |
| "rewards/margins": 0.017484817653894424, | |
| "rewards/rejected": -0.04079284518957138, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.136, | |
| "grad_norm": 8085358.939583512, | |
| "learning_rate": 4.8e-07, | |
| "logits/chosen": -2.48149037361145, | |
| "logits/rejected": -2.4932546615600586, | |
| "logps/chosen": -96.05111694335938, | |
| "logps/rejected": -131.0735626220703, | |
| "loss": 126914.2875, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.02739790640771389, | |
| "rewards/margins": 0.029247354716062546, | |
| "rewards/rejected": -0.056645262986421585, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "grad_norm": 7944883.3667990295, | |
| "learning_rate": 4.777777777777778e-07, | |
| "logits/chosen": -2.45344877243042, | |
| "logits/rejected": -2.5137851238250732, | |
| "logps/chosen": -89.93304443359375, | |
| "logps/rejected": -108.8600845336914, | |
| "loss": 122914.55, | |
| "rewards/accuracies": 0.512499988079071, | |
| "rewards/chosen": -0.0250026136636734, | |
| "rewards/margins": 0.014374235644936562, | |
| "rewards/rejected": -0.03937685117125511, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.144, | |
| "grad_norm": 11202153.92151104, | |
| "learning_rate": 4.7555555555555554e-07, | |
| "logits/chosen": -2.569916248321533, | |
| "logits/rejected": -2.5970470905303955, | |
| "logps/chosen": -95.05995178222656, | |
| "logps/rejected": -127.2323226928711, | |
| "loss": 124243.4125, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.025854643434286118, | |
| "rewards/margins": 0.023228293284773827, | |
| "rewards/rejected": -0.049082934856414795, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.148, | |
| "grad_norm": 6901221.964419149, | |
| "learning_rate": 4.733333333333333e-07, | |
| "logits/chosen": -2.4675538539886475, | |
| "logits/rejected": -2.4503865242004395, | |
| "logps/chosen": -85.31706237792969, | |
| "logps/rejected": -102.17588806152344, | |
| "loss": 127540.3375, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -0.024571221321821213, | |
| "rewards/margins": 0.011107890866696835, | |
| "rewards/rejected": -0.03567911311984062, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.152, | |
| "grad_norm": 6993857.423860367, | |
| "learning_rate": 4.711111111111111e-07, | |
| "logits/chosen": -2.459782123565674, | |
| "logits/rejected": -2.48356032371521, | |
| "logps/chosen": -110.59651184082031, | |
| "logps/rejected": -130.7666778564453, | |
| "loss": 127438.7, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.031783945858478546, | |
| "rewards/margins": 0.014480188488960266, | |
| "rewards/rejected": -0.046264130622148514, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.156, | |
| "grad_norm": 6436648.717203954, | |
| "learning_rate": 4.6888888888888887e-07, | |
| "logits/chosen": -2.4548838138580322, | |
| "logits/rejected": -2.456662654876709, | |
| "logps/chosen": -117.49080657958984, | |
| "logps/rejected": -128.62191772460938, | |
| "loss": 126004.45, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.027950212359428406, | |
| "rewards/margins": 0.008381237275898457, | |
| "rewards/rejected": -0.036331452429294586, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "grad_norm": 7569273.392881057, | |
| "learning_rate": 4.6666666666666666e-07, | |
| "logits/chosen": -2.4525585174560547, | |
| "logits/rejected": -2.4519400596618652, | |
| "logps/chosen": -104.88145446777344, | |
| "logps/rejected": -128.08416748046875, | |
| "loss": 126857.475, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.022735530510544777, | |
| "rewards/margins": 0.01596837118268013, | |
| "rewards/rejected": -0.038703907281160355, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.164, | |
| "grad_norm": 6861745.545448723, | |
| "learning_rate": 4.644444444444444e-07, | |
| "logits/chosen": -2.5066254138946533, | |
| "logits/rejected": -2.5164337158203125, | |
| "logps/chosen": -86.57884216308594, | |
| "logps/rejected": -119.33331298828125, | |
| "loss": 124486.5125, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.02158975414931774, | |
| "rewards/margins": 0.030435938388109207, | |
| "rewards/rejected": -0.0520256944000721, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.168, | |
| "grad_norm": 6923216.083132582, | |
| "learning_rate": 4.622222222222222e-07, | |
| "logits/chosen": -2.4752566814422607, | |
| "logits/rejected": -2.463294506072998, | |
| "logps/chosen": -85.61151885986328, | |
| "logps/rejected": -102.90364837646484, | |
| "loss": 124946.475, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.020689889788627625, | |
| "rewards/margins": 0.011657947674393654, | |
| "rewards/rejected": -0.03234783932566643, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.172, | |
| "grad_norm": 7450190.250939408, | |
| "learning_rate": 4.6e-07, | |
| "logits/chosen": -2.547010660171509, | |
| "logits/rejected": -2.531663179397583, | |
| "logps/chosen": -97.37740325927734, | |
| "logps/rejected": -135.26629638671875, | |
| "loss": 131081.35, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.025491004809737206, | |
| "rewards/margins": 0.02565266191959381, | |
| "rewards/rejected": -0.051143668591976166, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.176, | |
| "grad_norm": 6469045.345880665, | |
| "learning_rate": 4.577777777777778e-07, | |
| "logits/chosen": -2.6610159873962402, | |
| "logits/rejected": -2.659968852996826, | |
| "logps/chosen": -99.9223861694336, | |
| "logps/rejected": -124.8481216430664, | |
| "loss": 117640.1, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.027902353554964066, | |
| "rewards/margins": 0.019689548760652542, | |
| "rewards/rejected": -0.04759190231561661, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "grad_norm": 7434403.705215201, | |
| "learning_rate": 4.555555555555555e-07, | |
| "logits/chosen": -2.700005054473877, | |
| "logits/rejected": -2.6503853797912598, | |
| "logps/chosen": -96.16465759277344, | |
| "logps/rejected": -124.81199645996094, | |
| "loss": 123096.9, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.026438185945153236, | |
| "rewards/margins": 0.018792394548654556, | |
| "rewards/rejected": -0.04523057863116264, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.184, | |
| "grad_norm": 7874629.531526446, | |
| "learning_rate": 4.5333333333333326e-07, | |
| "logits/chosen": -2.724388837814331, | |
| "logits/rejected": -2.7652335166931152, | |
| "logps/chosen": -101.94267272949219, | |
| "logps/rejected": -110.65840148925781, | |
| "loss": 128125.7, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.024334359914064407, | |
| "rewards/margins": 0.005520271137356758, | |
| "rewards/rejected": -0.029854634776711464, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.188, | |
| "grad_norm": 7560431.171192426, | |
| "learning_rate": 4.511111111111111e-07, | |
| "logits/chosen": -2.6493871212005615, | |
| "logits/rejected": -2.6714179515838623, | |
| "logps/chosen": -105.4653549194336, | |
| "logps/rejected": -150.9534149169922, | |
| "loss": 122614.7375, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.02969837561249733, | |
| "rewards/margins": 0.027200985699892044, | |
| "rewards/rejected": -0.056899357587099075, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.192, | |
| "grad_norm": 8394101.213808972, | |
| "learning_rate": 4.4888888888888885e-07, | |
| "logits/chosen": -2.495974063873291, | |
| "logits/rejected": -2.4936139583587646, | |
| "logps/chosen": -105.5544662475586, | |
| "logps/rejected": -139.51068115234375, | |
| "loss": 127604.6125, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.026578962802886963, | |
| "rewards/margins": 0.027212362736463547, | |
| "rewards/rejected": -0.05379132181406021, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.196, | |
| "grad_norm": 8149957.80282127, | |
| "learning_rate": 4.4666666666666664e-07, | |
| "logits/chosen": -2.44439959526062, | |
| "logits/rejected": -2.4615180492401123, | |
| "logps/chosen": -115.50093078613281, | |
| "logps/rejected": -153.07492065429688, | |
| "loss": 120568.475, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.02712417207658291, | |
| "rewards/margins": 0.03037952445447445, | |
| "rewards/rejected": -0.05750369280576706, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "grad_norm": 9689223.456248827, | |
| "learning_rate": 4.444444444444444e-07, | |
| "logits/chosen": -2.4836983680725098, | |
| "logits/rejected": -2.4954071044921875, | |
| "logps/chosen": -108.3786392211914, | |
| "logps/rejected": -131.80975341796875, | |
| "loss": 130260.4375, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -0.03161335736513138, | |
| "rewards/margins": 0.019194485619664192, | |
| "rewards/rejected": -0.05080784484744072, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.204, | |
| "grad_norm": 9433316.334462296, | |
| "learning_rate": 4.4222222222222223e-07, | |
| "logits/chosen": -2.637115955352783, | |
| "logits/rejected": -2.6541285514831543, | |
| "logps/chosen": -106.7952880859375, | |
| "logps/rejected": -119.38945007324219, | |
| "loss": 125224.65, | |
| "rewards/accuracies": 0.4625000059604645, | |
| "rewards/chosen": -0.02900713123381138, | |
| "rewards/margins": 0.011991321109235287, | |
| "rewards/rejected": -0.04099845141172409, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.208, | |
| "grad_norm": 6547291.880532919, | |
| "learning_rate": 4.3999999999999997e-07, | |
| "logits/chosen": -2.6971993446350098, | |
| "logits/rejected": -2.6676185131073, | |
| "logps/chosen": -89.63105773925781, | |
| "logps/rejected": -119.62776947021484, | |
| "loss": 121176.4, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": -0.025825385004281998, | |
| "rewards/margins": 0.021441804245114326, | |
| "rewards/rejected": -0.04726719111204147, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.212, | |
| "grad_norm": 6024322.818937677, | |
| "learning_rate": 4.3777777777777776e-07, | |
| "logits/chosen": -2.732637405395508, | |
| "logits/rejected": -2.718721866607666, | |
| "logps/chosen": -84.20713806152344, | |
| "logps/rejected": -120.58128356933594, | |
| "loss": 118993.7, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.023359743878245354, | |
| "rewards/margins": 0.02675134316086769, | |
| "rewards/rejected": -0.0501110777258873, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.216, | |
| "grad_norm": 6460660.035353449, | |
| "learning_rate": 4.355555555555555e-07, | |
| "logits/chosen": -2.488724708557129, | |
| "logits/rejected": -2.504575490951538, | |
| "logps/chosen": -98.4452133178711, | |
| "logps/rejected": -128.78163146972656, | |
| "loss": 123458.275, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.026253730058670044, | |
| "rewards/margins": 0.024806631729006767, | |
| "rewards/rejected": -0.05106035992503166, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "grad_norm": 8461699.658062043, | |
| "learning_rate": 4.3333333333333335e-07, | |
| "logits/chosen": -2.5291218757629395, | |
| "logits/rejected": -2.536240339279175, | |
| "logps/chosen": -100.16661071777344, | |
| "logps/rejected": -149.42355346679688, | |
| "loss": 124909.075, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -0.027327323332428932, | |
| "rewards/margins": 0.036261945962905884, | |
| "rewards/rejected": -0.06358926743268967, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.224, | |
| "grad_norm": 6921956.012662945, | |
| "learning_rate": 4.311111111111111e-07, | |
| "logits/chosen": -2.628513813018799, | |
| "logits/rejected": -2.6058189868927, | |
| "logps/chosen": -107.0963363647461, | |
| "logps/rejected": -113.91001892089844, | |
| "loss": 127428.7375, | |
| "rewards/accuracies": 0.4749999940395355, | |
| "rewards/chosen": -0.03390585258603096, | |
| "rewards/margins": 0.005185864400118589, | |
| "rewards/rejected": -0.039091721177101135, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.228, | |
| "grad_norm": 8197834.099947786, | |
| "learning_rate": 4.2888888888888883e-07, | |
| "logits/chosen": -2.561366558074951, | |
| "logits/rejected": -2.561455249786377, | |
| "logps/chosen": -105.6274185180664, | |
| "logps/rejected": -136.18792724609375, | |
| "loss": 126906.175, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.031804267317056656, | |
| "rewards/margins": 0.02525196596980095, | |
| "rewards/rejected": -0.057056229561567307, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.232, | |
| "grad_norm": 8846278.644102238, | |
| "learning_rate": 4.266666666666667e-07, | |
| "logits/chosen": -2.6525886058807373, | |
| "logits/rejected": -2.588754653930664, | |
| "logps/chosen": -136.8567657470703, | |
| "logps/rejected": -178.04689025878906, | |
| "loss": 132691.7875, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -0.04016602411866188, | |
| "rewards/margins": 0.027137309312820435, | |
| "rewards/rejected": -0.06730332970619202, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.236, | |
| "grad_norm": 6333324.696405062, | |
| "learning_rate": 4.244444444444444e-07, | |
| "logits/chosen": -2.4680473804473877, | |
| "logits/rejected": -2.465623378753662, | |
| "logps/chosen": -123.52412414550781, | |
| "logps/rejected": -135.17831420898438, | |
| "loss": 129527.5, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.03627150505781174, | |
| "rewards/margins": 0.012457914650440216, | |
| "rewards/rejected": -0.048729415982961655, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "grad_norm": 7036898.682503791, | |
| "learning_rate": 4.222222222222222e-07, | |
| "logits/chosen": -2.584538459777832, | |
| "logits/rejected": -2.579874038696289, | |
| "logps/chosen": -99.02510070800781, | |
| "logps/rejected": -107.3072280883789, | |
| "loss": 128720.725, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.03315219283103943, | |
| "rewards/margins": 0.003997699357569218, | |
| "rewards/rejected": -0.03714989498257637, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.244, | |
| "grad_norm": 7159293.986125982, | |
| "learning_rate": 4.1999999999999995e-07, | |
| "logits/chosen": -2.649663209915161, | |
| "logits/rejected": -2.6600253582000732, | |
| "logps/chosen": -101.28582000732422, | |
| "logps/rejected": -119.93243408203125, | |
| "loss": 129047.95, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.031149577349424362, | |
| "rewards/margins": 0.011909973807632923, | |
| "rewards/rejected": -0.04305955022573471, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.248, | |
| "grad_norm": 7467292.937718221, | |
| "learning_rate": 4.177777777777778e-07, | |
| "logits/chosen": -2.572115898132324, | |
| "logits/rejected": -2.525055408477783, | |
| "logps/chosen": -105.48246765136719, | |
| "logps/rejected": -131.28749084472656, | |
| "loss": 122677.825, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.03393036499619484, | |
| "rewards/margins": 0.017631059512495995, | |
| "rewards/rejected": -0.051561422646045685, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.252, | |
| "grad_norm": 6649301.452495339, | |
| "learning_rate": 4.1555555555555554e-07, | |
| "logits/chosen": -2.5688421726226807, | |
| "logits/rejected": -2.5850729942321777, | |
| "logps/chosen": -106.14555358886719, | |
| "logps/rejected": -141.26199340820312, | |
| "loss": 124169.6, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.027926957234740257, | |
| "rewards/margins": 0.031329791992902756, | |
| "rewards/rejected": -0.05925675109028816, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.256, | |
| "grad_norm": 5701852.577919224, | |
| "learning_rate": 4.1333333333333333e-07, | |
| "logits/chosen": -2.5793604850769043, | |
| "logits/rejected": -2.6216492652893066, | |
| "logps/chosen": -94.40669250488281, | |
| "logps/rejected": -141.09725952148438, | |
| "loss": 119443.9125, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.029792586341500282, | |
| "rewards/margins": 0.0306295957416296, | |
| "rewards/rejected": -0.06042218208312988, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "grad_norm": 7828661.867350275, | |
| "learning_rate": 4.1111111111111107e-07, | |
| "logits/chosen": -2.5071213245391846, | |
| "logits/rejected": -2.5167384147644043, | |
| "logps/chosen": -100.64595794677734, | |
| "logps/rejected": -135.44271850585938, | |
| "loss": 127055.0875, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.03445083647966385, | |
| "rewards/margins": 0.021719755604863167, | |
| "rewards/rejected": -0.056170590221881866, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.264, | |
| "grad_norm": 7230836.816701007, | |
| "learning_rate": 4.088888888888889e-07, | |
| "logits/chosen": -2.49631404876709, | |
| "logits/rejected": -2.536076068878174, | |
| "logps/chosen": -101.35478210449219, | |
| "logps/rejected": -124.0557632446289, | |
| "loss": 128004.0625, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.03691136837005615, | |
| "rewards/margins": 0.013063013553619385, | |
| "rewards/rejected": -0.04997437819838524, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.268, | |
| "grad_norm": 9181742.684525523, | |
| "learning_rate": 4.0666666666666666e-07, | |
| "logits/chosen": -2.5028629302978516, | |
| "logits/rejected": -2.5143425464630127, | |
| "logps/chosen": -114.0814437866211, | |
| "logps/rejected": -130.69984436035156, | |
| "loss": 132355.1, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.03546031937003136, | |
| "rewards/margins": 0.012831469066441059, | |
| "rewards/rejected": -0.04829178377985954, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.272, | |
| "grad_norm": 5953835.069109496, | |
| "learning_rate": 4.044444444444444e-07, | |
| "logits/chosen": -2.3693368434906006, | |
| "logits/rejected": -2.3933675289154053, | |
| "logps/chosen": -96.07215881347656, | |
| "logps/rejected": -133.98353576660156, | |
| "loss": 122900.6875, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.029592838138341904, | |
| "rewards/margins": 0.028876733034849167, | |
| "rewards/rejected": -0.05846957489848137, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.276, | |
| "grad_norm": 9742020.165182771, | |
| "learning_rate": 4.022222222222222e-07, | |
| "logits/chosen": -2.362238645553589, | |
| "logits/rejected": -2.3378891944885254, | |
| "logps/chosen": -128.8614501953125, | |
| "logps/rejected": -144.07827758789062, | |
| "loss": 125425.0375, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -0.03773171454668045, | |
| "rewards/margins": 0.013301841914653778, | |
| "rewards/rejected": -0.05103355646133423, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "grad_norm": 5698717.703929527, | |
| "learning_rate": 4e-07, | |
| "logits/chosen": -2.4838829040527344, | |
| "logits/rejected": -2.50342059135437, | |
| "logps/chosen": -111.4194107055664, | |
| "logps/rejected": -129.75094604492188, | |
| "loss": 125053.125, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.03320156782865524, | |
| "rewards/margins": 0.017379306256771088, | |
| "rewards/rejected": -0.05058088153600693, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.284, | |
| "grad_norm": 8985877.177134423, | |
| "learning_rate": 3.977777777777778e-07, | |
| "logits/chosen": -2.495482921600342, | |
| "logits/rejected": -2.4964957237243652, | |
| "logps/chosen": -122.45703125, | |
| "logps/rejected": -142.83428955078125, | |
| "loss": 128369.4125, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.03306376561522484, | |
| "rewards/margins": 0.016845058649778366, | |
| "rewards/rejected": -0.049908824265003204, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.288, | |
| "grad_norm": 6293531.48238979, | |
| "learning_rate": 3.955555555555555e-07, | |
| "logits/chosen": -2.391200065612793, | |
| "logits/rejected": -2.4489917755126953, | |
| "logps/chosen": -98.37910461425781, | |
| "logps/rejected": -126.7882080078125, | |
| "loss": 127457.0, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.029601294547319412, | |
| "rewards/margins": 0.01803305558860302, | |
| "rewards/rejected": -0.04763435199856758, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.292, | |
| "grad_norm": 7897192.107065841, | |
| "learning_rate": 3.933333333333333e-07, | |
| "logits/chosen": -2.4582555294036865, | |
| "logits/rejected": -2.4759137630462646, | |
| "logps/chosen": -98.81364440917969, | |
| "logps/rejected": -144.63548278808594, | |
| "loss": 123344.0875, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.029745137318968773, | |
| "rewards/margins": 0.03918560594320297, | |
| "rewards/rejected": -0.06893075257539749, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.296, | |
| "grad_norm": 6327010.4235527, | |
| "learning_rate": 3.911111111111111e-07, | |
| "logits/chosen": -2.578629732131958, | |
| "logits/rejected": -2.5296969413757324, | |
| "logps/chosen": -93.8338623046875, | |
| "logps/rejected": -119.47264099121094, | |
| "loss": 130155.3875, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.031289439648389816, | |
| "rewards/margins": 0.02071293443441391, | |
| "rewards/rejected": -0.052002377808094025, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "grad_norm": 6618521.62330563, | |
| "learning_rate": 3.888888888888889e-07, | |
| "logits/chosen": -2.5220892429351807, | |
| "logits/rejected": -2.533546209335327, | |
| "logps/chosen": -115.8681869506836, | |
| "logps/rejected": -141.22885131835938, | |
| "loss": 119614.875, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.027656156569719315, | |
| "rewards/margins": 0.018271705135703087, | |
| "rewards/rejected": -0.04592785984277725, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.304, | |
| "grad_norm": 10187163.188769344, | |
| "learning_rate": 3.8666666666666664e-07, | |
| "logits/chosen": -2.5118823051452637, | |
| "logits/rejected": -2.504910945892334, | |
| "logps/chosen": -101.41685485839844, | |
| "logps/rejected": -148.4906005859375, | |
| "loss": 126592.4875, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.033656515181064606, | |
| "rewards/margins": 0.03055490553379059, | |
| "rewards/rejected": -0.06421142816543579, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.308, | |
| "grad_norm": 5303031.219251705, | |
| "learning_rate": 3.8444444444444443e-07, | |
| "logits/chosen": -2.430718421936035, | |
| "logits/rejected": -2.42429518699646, | |
| "logps/chosen": -104.3238754272461, | |
| "logps/rejected": -152.8984832763672, | |
| "loss": 120932.45, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.02648145519196987, | |
| "rewards/margins": 0.034856077283620834, | |
| "rewards/rejected": -0.06133753061294556, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.312, | |
| "grad_norm": 7788606.775046531, | |
| "learning_rate": 3.822222222222222e-07, | |
| "logits/chosen": -2.440924882888794, | |
| "logits/rejected": -2.420545816421509, | |
| "logps/chosen": -115.5602035522461, | |
| "logps/rejected": -155.5218048095703, | |
| "loss": 125781.1125, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.033296506851911545, | |
| "rewards/margins": 0.026110276579856873, | |
| "rewards/rejected": -0.059406787157058716, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.316, | |
| "grad_norm": 8342190.296045118, | |
| "learning_rate": 3.7999999999999996e-07, | |
| "logits/chosen": -2.476647138595581, | |
| "logits/rejected": -2.469700813293457, | |
| "logps/chosen": -128.41778564453125, | |
| "logps/rejected": -171.7101287841797, | |
| "loss": 123186.7, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.036413662135601044, | |
| "rewards/margins": 0.02540646493434906, | |
| "rewards/rejected": -0.0618201307952404, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "grad_norm": 6557257.336959155, | |
| "learning_rate": 3.7777777777777775e-07, | |
| "logits/chosen": -2.5064010620117188, | |
| "logits/rejected": -2.4810726642608643, | |
| "logps/chosen": -92.31603240966797, | |
| "logps/rejected": -137.16452026367188, | |
| "loss": 122089.9625, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -0.026451414451003075, | |
| "rewards/margins": 0.033910416066646576, | |
| "rewards/rejected": -0.0603618249297142, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.324, | |
| "grad_norm": 6904823.359277868, | |
| "learning_rate": 3.755555555555555e-07, | |
| "logits/chosen": -2.4048755168914795, | |
| "logits/rejected": -2.4163169860839844, | |
| "logps/chosen": -103.93983459472656, | |
| "logps/rejected": -146.17251586914062, | |
| "loss": 125580.95, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.034494899213314056, | |
| "rewards/margins": 0.028069961816072464, | |
| "rewards/rejected": -0.06256486475467682, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.328, | |
| "grad_norm": 7723585.3043322805, | |
| "learning_rate": 3.7333333333333334e-07, | |
| "logits/chosen": -2.3999173641204834, | |
| "logits/rejected": -2.3777313232421875, | |
| "logps/chosen": -98.98230743408203, | |
| "logps/rejected": -113.75994873046875, | |
| "loss": 128410.575, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -0.0317092090845108, | |
| "rewards/margins": 0.005884192418307066, | |
| "rewards/rejected": -0.037593401968479156, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.332, | |
| "grad_norm": 5742576.756144718, | |
| "learning_rate": 3.711111111111111e-07, | |
| "logits/chosen": -2.3831002712249756, | |
| "logits/rejected": -2.3882925510406494, | |
| "logps/chosen": -94.30198669433594, | |
| "logps/rejected": -129.58078002929688, | |
| "loss": 126154.8625, | |
| "rewards/accuracies": 0.512499988079071, | |
| "rewards/chosen": -0.02777601219713688, | |
| "rewards/margins": 0.01992596499621868, | |
| "rewards/rejected": -0.04770197719335556, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.336, | |
| "grad_norm": 7609192.056262804, | |
| "learning_rate": 3.688888888888889e-07, | |
| "logits/chosen": -2.434124708175659, | |
| "logits/rejected": -2.4821083545684814, | |
| "logps/chosen": -96.55790710449219, | |
| "logps/rejected": -122.23677062988281, | |
| "loss": 123363.6125, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -0.026610519737005234, | |
| "rewards/margins": 0.02544989623129368, | |
| "rewards/rejected": -0.05206041410565376, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "grad_norm": 8034675.762755762, | |
| "learning_rate": 3.666666666666666e-07, | |
| "logits/chosen": -2.3713624477386475, | |
| "logits/rejected": -2.366699457168579, | |
| "logps/chosen": -115.35933685302734, | |
| "logps/rejected": -129.19752502441406, | |
| "loss": 126533.75, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.0336347371339798, | |
| "rewards/margins": 0.0172466691583395, | |
| "rewards/rejected": -0.05088140815496445, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.344, | |
| "grad_norm": 5975456.469702007, | |
| "learning_rate": 3.6444444444444446e-07, | |
| "logits/chosen": -2.527848482131958, | |
| "logits/rejected": -2.5321898460388184, | |
| "logps/chosen": -101.052734375, | |
| "logps/rejected": -146.85986328125, | |
| "loss": 117886.0, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.026471996679902077, | |
| "rewards/margins": 0.038311395794153214, | |
| "rewards/rejected": -0.06478338688611984, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.348, | |
| "grad_norm": 6906670.4436170915, | |
| "learning_rate": 3.622222222222222e-07, | |
| "logits/chosen": -2.5193216800689697, | |
| "logits/rejected": -2.55930495262146, | |
| "logps/chosen": -103.41465759277344, | |
| "logps/rejected": -136.6109161376953, | |
| "loss": 129100.275, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -0.029144983738660812, | |
| "rewards/margins": 0.02550877258181572, | |
| "rewards/rejected": -0.05465375632047653, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.352, | |
| "grad_norm": 7471407.013777157, | |
| "learning_rate": 3.6e-07, | |
| "logits/chosen": -2.451826572418213, | |
| "logits/rejected": -2.4506518840789795, | |
| "logps/chosen": -98.89453125, | |
| "logps/rejected": -141.08523559570312, | |
| "loss": 122662.65, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.025945227593183517, | |
| "rewards/margins": 0.03901258111000061, | |
| "rewards/rejected": -0.06495781242847443, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.356, | |
| "grad_norm": 6728467.134306638, | |
| "learning_rate": 3.5777777777777773e-07, | |
| "logits/chosen": -2.5389392375946045, | |
| "logits/rejected": -2.5246338844299316, | |
| "logps/chosen": -120.97161865234375, | |
| "logps/rejected": -168.5087432861328, | |
| "loss": 122320.4375, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.03245999664068222, | |
| "rewards/margins": 0.04135856777429581, | |
| "rewards/rejected": -0.07381855696439743, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "grad_norm": 7104916.695741348, | |
| "learning_rate": 3.5555555555555553e-07, | |
| "logits/chosen": -2.5385003089904785, | |
| "logits/rejected": -2.5232315063476562, | |
| "logps/chosen": -87.90635681152344, | |
| "logps/rejected": -116.77327728271484, | |
| "loss": 126440.375, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -0.032553546130657196, | |
| "rewards/margins": 0.01972118578851223, | |
| "rewards/rejected": -0.052274733781814575, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.364, | |
| "grad_norm": 5970259.1415026905, | |
| "learning_rate": 3.533333333333333e-07, | |
| "logits/chosen": -2.508732557296753, | |
| "logits/rejected": -2.5209858417510986, | |
| "logps/chosen": -101.33818054199219, | |
| "logps/rejected": -125.9253921508789, | |
| "loss": 121322.675, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.03439612686634064, | |
| "rewards/margins": 0.02015717700123787, | |
| "rewards/rejected": -0.054553307592868805, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.368, | |
| "grad_norm": 8365175.917846098, | |
| "learning_rate": 3.5111111111111106e-07, | |
| "logits/chosen": -2.4121992588043213, | |
| "logits/rejected": -2.403446674346924, | |
| "logps/chosen": -110.45259857177734, | |
| "logps/rejected": -146.91270446777344, | |
| "loss": 124905.35, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -0.03642472252249718, | |
| "rewards/margins": 0.029706323519349098, | |
| "rewards/rejected": -0.06613104045391083, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.372, | |
| "grad_norm": 7152589.843566105, | |
| "learning_rate": 3.488888888888889e-07, | |
| "logits/chosen": -2.4605088233947754, | |
| "logits/rejected": -2.5225741863250732, | |
| "logps/chosen": -104.7183609008789, | |
| "logps/rejected": -129.02984619140625, | |
| "loss": 125537.525, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.030679216608405113, | |
| "rewards/margins": 0.023534944280982018, | |
| "rewards/rejected": -0.05421415716409683, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.376, | |
| "grad_norm": 7902918.244011351, | |
| "learning_rate": 3.4666666666666665e-07, | |
| "logits/chosen": -2.4962337017059326, | |
| "logits/rejected": -2.560391664505005, | |
| "logps/chosen": -88.20844268798828, | |
| "logps/rejected": -139.12539672851562, | |
| "loss": 120117.675, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.023946184664964676, | |
| "rewards/margins": 0.035228628665208817, | |
| "rewards/rejected": -0.05917481333017349, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "grad_norm": 7058088.493943834, | |
| "learning_rate": 3.4444444444444444e-07, | |
| "logits/chosen": -2.4186971187591553, | |
| "logits/rejected": -2.479079484939575, | |
| "logps/chosen": -109.65245056152344, | |
| "logps/rejected": -145.39144897460938, | |
| "loss": 131717.475, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": -0.035849470645189285, | |
| "rewards/margins": 0.030028488487005234, | |
| "rewards/rejected": -0.06587796658277512, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.384, | |
| "grad_norm": 9519920.624854606, | |
| "learning_rate": 3.422222222222222e-07, | |
| "logits/chosen": -2.388206958770752, | |
| "logits/rejected": -2.4197933673858643, | |
| "logps/chosen": -130.57398986816406, | |
| "logps/rejected": -206.50595092773438, | |
| "loss": 121014.6125, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.03395534306764603, | |
| "rewards/margins": 0.04584265127778053, | |
| "rewards/rejected": -0.07979799807071686, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.388, | |
| "grad_norm": 6508580.208947205, | |
| "learning_rate": 3.4000000000000003e-07, | |
| "logits/chosen": -2.4630963802337646, | |
| "logits/rejected": -2.4617621898651123, | |
| "logps/chosen": -97.78787231445312, | |
| "logps/rejected": -137.83377075195312, | |
| "loss": 129298.375, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.03728308528661728, | |
| "rewards/margins": 0.032224711030721664, | |
| "rewards/rejected": -0.06950780749320984, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.392, | |
| "grad_norm": 8870008.273394352, | |
| "learning_rate": 3.3777777777777777e-07, | |
| "logits/chosen": -2.346057176589966, | |
| "logits/rejected": -2.388782262802124, | |
| "logps/chosen": -99.29377746582031, | |
| "logps/rejected": -174.5662841796875, | |
| "loss": 123724.2875, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.025686006993055344, | |
| "rewards/margins": 0.048049140721559525, | |
| "rewards/rejected": -0.07373513281345367, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.396, | |
| "grad_norm": 8011988.902105641, | |
| "learning_rate": 3.3555555555555556e-07, | |
| "logits/chosen": -2.392421007156372, | |
| "logits/rejected": -2.373751163482666, | |
| "logps/chosen": -98.69950866699219, | |
| "logps/rejected": -133.2752227783203, | |
| "loss": 122756.525, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.029735039919614792, | |
| "rewards/margins": 0.027242526412010193, | |
| "rewards/rejected": -0.05697755888104439, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 9489098.770004237, | |
| "learning_rate": 3.333333333333333e-07, | |
| "logits/chosen": -2.326141595840454, | |
| "logits/rejected": -2.350912094116211, | |
| "logps/chosen": -113.98426818847656, | |
| "logps/rejected": -161.37271118164062, | |
| "loss": 120187.8125, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.03596794605255127, | |
| "rewards/margins": 0.03252139315009117, | |
| "rewards/rejected": -0.06848934292793274, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.404, | |
| "grad_norm": 7831638.304742374, | |
| "learning_rate": 3.311111111111111e-07, | |
| "logits/chosen": -2.4753966331481934, | |
| "logits/rejected": -2.4720969200134277, | |
| "logps/chosen": -93.01488494873047, | |
| "logps/rejected": -129.34759521484375, | |
| "loss": 123952.0625, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.029807209968566895, | |
| "rewards/margins": 0.028074974194169044, | |
| "rewards/rejected": -0.05788217857480049, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.408, | |
| "grad_norm": 9432041.545640234, | |
| "learning_rate": 3.288888888888889e-07, | |
| "logits/chosen": -2.4366466999053955, | |
| "logits/rejected": -2.4423012733459473, | |
| "logps/chosen": -104.15936279296875, | |
| "logps/rejected": -127.2192611694336, | |
| "loss": 126318.675, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -0.03481433913111687, | |
| "rewards/margins": 0.019000452011823654, | |
| "rewards/rejected": -0.05381479114294052, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.412, | |
| "grad_norm": 8015912.365619365, | |
| "learning_rate": 3.2666666666666663e-07, | |
| "logits/chosen": -2.55189847946167, | |
| "logits/rejected": -2.544987440109253, | |
| "logps/chosen": -107.0470199584961, | |
| "logps/rejected": -162.27200317382812, | |
| "loss": 121624.775, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.034415554255247116, | |
| "rewards/margins": 0.04376577213406563, | |
| "rewards/rejected": -0.07818132638931274, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.416, | |
| "grad_norm": 6289031.406175271, | |
| "learning_rate": 3.244444444444444e-07, | |
| "logits/chosen": -2.46891450881958, | |
| "logits/rejected": -2.4879581928253174, | |
| "logps/chosen": -95.2767562866211, | |
| "logps/rejected": -143.16920471191406, | |
| "loss": 121056.125, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.031673580408096313, | |
| "rewards/margins": 0.03125213831663132, | |
| "rewards/rejected": -0.06292571872472763, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "grad_norm": 8474897.39551202, | |
| "learning_rate": 3.222222222222222e-07, | |
| "logits/chosen": -2.5424282550811768, | |
| "logits/rejected": -2.5662999153137207, | |
| "logps/chosen": -111.43955993652344, | |
| "logps/rejected": -153.2001190185547, | |
| "loss": 122305.1625, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.034298766404390335, | |
| "rewards/margins": 0.029853323474526405, | |
| "rewards/rejected": -0.06415208429098129, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.424, | |
| "grad_norm": 7118829.871963521, | |
| "learning_rate": 3.2e-07, | |
| "logits/chosen": -2.5913608074188232, | |
| "logits/rejected": -2.607485294342041, | |
| "logps/chosen": -110.69046783447266, | |
| "logps/rejected": -146.65023803710938, | |
| "loss": 120745.8125, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.02970978617668152, | |
| "rewards/margins": 0.02858895994722843, | |
| "rewards/rejected": -0.0582987479865551, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.428, | |
| "grad_norm": 6184465.945134909, | |
| "learning_rate": 3.1777777777777775e-07, | |
| "logits/chosen": -2.61472225189209, | |
| "logits/rejected": -2.6045632362365723, | |
| "logps/chosen": -118.44596862792969, | |
| "logps/rejected": -141.05259704589844, | |
| "loss": 119233.675, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.030257636681199074, | |
| "rewards/margins": 0.027423173189163208, | |
| "rewards/rejected": -0.05768080800771713, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.432, | |
| "grad_norm": 8384350.327301131, | |
| "learning_rate": 3.1555555555555554e-07, | |
| "logits/chosen": -2.5862181186676025, | |
| "logits/rejected": -2.5921201705932617, | |
| "logps/chosen": -107.20783996582031, | |
| "logps/rejected": -146.17771911621094, | |
| "loss": 123723.375, | |
| "rewards/accuracies": 0.48750001192092896, | |
| "rewards/chosen": -0.028097758069634438, | |
| "rewards/margins": 0.03525155037641525, | |
| "rewards/rejected": -0.06334930658340454, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.436, | |
| "grad_norm": 6672967.979703066, | |
| "learning_rate": 3.1333333333333333e-07, | |
| "logits/chosen": -2.5249099731445312, | |
| "logits/rejected": -2.5252327919006348, | |
| "logps/chosen": -82.09342956542969, | |
| "logps/rejected": -124.22785949707031, | |
| "loss": 125011.25, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.02518371120095253, | |
| "rewards/margins": 0.0352654904127121, | |
| "rewards/rejected": -0.06044920161366463, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "grad_norm": 8650903.887605142, | |
| "learning_rate": 3.111111111111111e-07, | |
| "logits/chosen": -2.4037208557128906, | |
| "logits/rejected": -2.4414310455322266, | |
| "logps/chosen": -97.84339141845703, | |
| "logps/rejected": -142.56967163085938, | |
| "loss": 119550.375, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.029644513502717018, | |
| "rewards/margins": 0.03777293115854263, | |
| "rewards/rejected": -0.0674174427986145, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.444, | |
| "grad_norm": 6832446.375717195, | |
| "learning_rate": 3.0888888888888887e-07, | |
| "logits/chosen": -2.4431042671203613, | |
| "logits/rejected": -2.456604480743408, | |
| "logps/chosen": -107.29023742675781, | |
| "logps/rejected": -135.47000122070312, | |
| "loss": 121918.525, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.03453664109110832, | |
| "rewards/margins": 0.019270362332463264, | |
| "rewards/rejected": -0.05380700156092644, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.448, | |
| "grad_norm": 6428580.094510342, | |
| "learning_rate": 3.066666666666666e-07, | |
| "logits/chosen": -2.515045642852783, | |
| "logits/rejected": -2.51838755607605, | |
| "logps/chosen": -117.26094818115234, | |
| "logps/rejected": -153.82720947265625, | |
| "loss": 127075.725, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.031917281448841095, | |
| "rewards/margins": 0.024132903665304184, | |
| "rewards/rejected": -0.05605018883943558, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.452, | |
| "grad_norm": 7059143.842956961, | |
| "learning_rate": 3.0444444444444445e-07, | |
| "logits/chosen": -2.4530346393585205, | |
| "logits/rejected": -2.4681754112243652, | |
| "logps/chosen": -106.003662109375, | |
| "logps/rejected": -166.18124389648438, | |
| "loss": 125480.3875, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.03530178219079971, | |
| "rewards/margins": 0.03853614255785942, | |
| "rewards/rejected": -0.07383792102336884, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.456, | |
| "grad_norm": 7003392.120968072, | |
| "learning_rate": 3.022222222222222e-07, | |
| "logits/chosen": -2.526331901550293, | |
| "logits/rejected": -2.5595362186431885, | |
| "logps/chosen": -103.13214111328125, | |
| "logps/rejected": -151.08694458007812, | |
| "loss": 122330.225, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -0.0366961732506752, | |
| "rewards/margins": 0.045485951006412506, | |
| "rewards/rejected": -0.08218212425708771, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "grad_norm": 8693683.344797961, | |
| "learning_rate": 3e-07, | |
| "logits/chosen": -2.570127248764038, | |
| "logits/rejected": -2.5569756031036377, | |
| "logps/chosen": -118.91337585449219, | |
| "logps/rejected": -157.19509887695312, | |
| "loss": 121470.875, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.030104130506515503, | |
| "rewards/margins": 0.03460243344306946, | |
| "rewards/rejected": -0.06470657885074615, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.464, | |
| "grad_norm": 9534213.097492808, | |
| "learning_rate": 2.9777777777777773e-07, | |
| "logits/chosen": -2.5106987953186035, | |
| "logits/rejected": -2.4634547233581543, | |
| "logps/chosen": -89.20586395263672, | |
| "logps/rejected": -149.52505493164062, | |
| "loss": 116528.675, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.02851666882634163, | |
| "rewards/margins": 0.04616966471076012, | |
| "rewards/rejected": -0.07468634098768234, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.468, | |
| "grad_norm": 8777276.722714778, | |
| "learning_rate": 2.9555555555555557e-07, | |
| "logits/chosen": -2.488731861114502, | |
| "logits/rejected": -2.4706058502197266, | |
| "logps/chosen": -112.614501953125, | |
| "logps/rejected": -158.08847045898438, | |
| "loss": 123641.3375, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.037982989102602005, | |
| "rewards/margins": 0.035983096808195114, | |
| "rewards/rejected": -0.07396609336137772, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.472, | |
| "grad_norm": 7805149.222381511, | |
| "learning_rate": 2.933333333333333e-07, | |
| "logits/chosen": -2.4993319511413574, | |
| "logits/rejected": -2.4856998920440674, | |
| "logps/chosen": -103.1233901977539, | |
| "logps/rejected": -121.76432037353516, | |
| "loss": 122802.05, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.03225432708859444, | |
| "rewards/margins": 0.015282504260540009, | |
| "rewards/rejected": -0.04753682762384415, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.476, | |
| "grad_norm": 9127383.125403812, | |
| "learning_rate": 2.911111111111111e-07, | |
| "logits/chosen": -2.3260111808776855, | |
| "logits/rejected": -2.384988784790039, | |
| "logps/chosen": -111.52685546875, | |
| "logps/rejected": -160.5020751953125, | |
| "loss": 125717.575, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.03155955299735069, | |
| "rewards/margins": 0.043639086186885834, | |
| "rewards/rejected": -0.07519863545894623, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "grad_norm": 8277710.5533771645, | |
| "learning_rate": 2.8888888888888885e-07, | |
| "logits/chosen": -2.4511587619781494, | |
| "logits/rejected": -2.473177433013916, | |
| "logps/chosen": -101.6271743774414, | |
| "logps/rejected": -154.0011749267578, | |
| "loss": 119860.575, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.03446241840720177, | |
| "rewards/margins": 0.0441209152340889, | |
| "rewards/rejected": -0.07858333736658096, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.484, | |
| "grad_norm": 9000426.122325586, | |
| "learning_rate": 2.866666666666667e-07, | |
| "logits/chosen": -2.352407693862915, | |
| "logits/rejected": -2.3879640102386475, | |
| "logps/chosen": -111.2037353515625, | |
| "logps/rejected": -155.64559936523438, | |
| "loss": 125368.55, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.033860720694065094, | |
| "rewards/margins": 0.028398964554071426, | |
| "rewards/rejected": -0.06225968152284622, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.488, | |
| "grad_norm": 7439506.210055379, | |
| "learning_rate": 2.8444444444444443e-07, | |
| "logits/chosen": -2.4001262187957764, | |
| "logits/rejected": -2.406364917755127, | |
| "logps/chosen": -96.29491424560547, | |
| "logps/rejected": -133.1979522705078, | |
| "loss": 124972.3125, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.031061682850122452, | |
| "rewards/margins": 0.028077807277441025, | |
| "rewards/rejected": -0.05913949012756348, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.492, | |
| "grad_norm": 8522710.668188507, | |
| "learning_rate": 2.8222222222222217e-07, | |
| "logits/chosen": -2.455540418624878, | |
| "logits/rejected": -2.5077738761901855, | |
| "logps/chosen": -118.55711364746094, | |
| "logps/rejected": -183.99917602539062, | |
| "loss": 123136.65, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.03442969545722008, | |
| "rewards/margins": 0.04292518272995949, | |
| "rewards/rejected": -0.07735487818717957, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.496, | |
| "grad_norm": 7800702.638161312, | |
| "learning_rate": 2.8e-07, | |
| "logits/chosen": -2.4330172538757324, | |
| "logits/rejected": -2.4518215656280518, | |
| "logps/chosen": -106.44327545166016, | |
| "logps/rejected": -148.51351928710938, | |
| "loss": 123850.675, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.032119907438755035, | |
| "rewards/margins": 0.03575160354375839, | |
| "rewards/rejected": -0.06787151843309402, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "grad_norm": 6293849.548335739, | |
| "learning_rate": 2.7777777777777776e-07, | |
| "logits/chosen": -2.5701098442077637, | |
| "logits/rejected": -2.563598394393921, | |
| "logps/chosen": -112.28812408447266, | |
| "logps/rejected": -145.1765594482422, | |
| "loss": 122208.9125, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.034340750426054, | |
| "rewards/margins": 0.029792601242661476, | |
| "rewards/rejected": -0.06413334608078003, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.504, | |
| "grad_norm": 8164963.35867732, | |
| "learning_rate": 2.7555555555555555e-07, | |
| "logits/chosen": -2.5201942920684814, | |
| "logits/rejected": -2.491058111190796, | |
| "logps/chosen": -116.97322845458984, | |
| "logps/rejected": -161.62765502929688, | |
| "loss": 123380.825, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -0.04085034877061844, | |
| "rewards/margins": 0.03332878276705742, | |
| "rewards/rejected": -0.07417913526296616, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.508, | |
| "grad_norm": 7878494.751400375, | |
| "learning_rate": 2.733333333333333e-07, | |
| "logits/chosen": -2.5432791709899902, | |
| "logits/rejected": -2.5216073989868164, | |
| "logps/chosen": -104.03746032714844, | |
| "logps/rejected": -157.35208129882812, | |
| "loss": 115568.125, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.02601642534136772, | |
| "rewards/margins": 0.04680890962481499, | |
| "rewards/rejected": -0.07282533496618271, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.512, | |
| "grad_norm": 8078756.180573847, | |
| "learning_rate": 2.7111111111111114e-07, | |
| "logits/chosen": -2.4107511043548584, | |
| "logits/rejected": -2.4441466331481934, | |
| "logps/chosen": -119.5540771484375, | |
| "logps/rejected": -161.68992614746094, | |
| "loss": 125619.95, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.0344010666012764, | |
| "rewards/margins": 0.039026811718940735, | |
| "rewards/rejected": -0.07342787086963654, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.516, | |
| "grad_norm": 8353367.375300777, | |
| "learning_rate": 2.688888888888889e-07, | |
| "logits/chosen": -2.553907632827759, | |
| "logits/rejected": -2.5770821571350098, | |
| "logps/chosen": -119.15167236328125, | |
| "logps/rejected": -144.7032012939453, | |
| "loss": 121825.9875, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.03453459218144417, | |
| "rewards/margins": 0.01783282496035099, | |
| "rewards/rejected": -0.05236741900444031, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "grad_norm": 7928099.234468471, | |
| "learning_rate": 2.6666666666666667e-07, | |
| "logits/chosen": -2.4872889518737793, | |
| "logits/rejected": -2.4608452320098877, | |
| "logps/chosen": -97.89281463623047, | |
| "logps/rejected": -121.05744934082031, | |
| "loss": 121883.65, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.028054479509592056, | |
| "rewards/margins": 0.019761094823479652, | |
| "rewards/rejected": -0.04781556874513626, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.524, | |
| "grad_norm": 7247015.687306507, | |
| "learning_rate": 2.644444444444444e-07, | |
| "logits/chosen": -2.516070604324341, | |
| "logits/rejected": -2.552224636077881, | |
| "logps/chosen": -106.81401062011719, | |
| "logps/rejected": -151.16502380371094, | |
| "loss": 115673.95, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -0.025754611939191818, | |
| "rewards/margins": 0.03385675325989723, | |
| "rewards/rejected": -0.05961136147379875, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.528, | |
| "grad_norm": 7422418.597697799, | |
| "learning_rate": 2.6222222222222226e-07, | |
| "logits/chosen": -2.524921417236328, | |
| "logits/rejected": -2.5199413299560547, | |
| "logps/chosen": -91.73576354980469, | |
| "logps/rejected": -131.1287384033203, | |
| "loss": 122971.8125, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.031464457511901855, | |
| "rewards/margins": 0.029294759035110474, | |
| "rewards/rejected": -0.06075920909643173, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.532, | |
| "grad_norm": 6692184.86494848, | |
| "learning_rate": 2.6e-07, | |
| "logits/chosen": -2.4487650394439697, | |
| "logits/rejected": -2.42417573928833, | |
| "logps/chosen": -105.08377838134766, | |
| "logps/rejected": -152.42251586914062, | |
| "loss": 124211.9125, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.03637874126434326, | |
| "rewards/margins": 0.04163909703493118, | |
| "rewards/rejected": -0.07801783829927444, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.536, | |
| "grad_norm": 8776394.815220755, | |
| "learning_rate": 2.5777777777777774e-07, | |
| "logits/chosen": -2.426945209503174, | |
| "logits/rejected": -2.4211270809173584, | |
| "logps/chosen": -100.32334899902344, | |
| "logps/rejected": -158.4632568359375, | |
| "loss": 119074.8125, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.02933502197265625, | |
| "rewards/margins": 0.04685003310441971, | |
| "rewards/rejected": -0.07618506252765656, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "grad_norm": 7921315.937162929, | |
| "learning_rate": 2.5555555555555553e-07, | |
| "logits/chosen": -2.4435505867004395, | |
| "logits/rejected": -2.4641623497009277, | |
| "logps/chosen": -97.31932067871094, | |
| "logps/rejected": -139.09976196289062, | |
| "loss": 119875.825, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.03033040091395378, | |
| "rewards/margins": 0.03286002576351166, | |
| "rewards/rejected": -0.06319043040275574, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.544, | |
| "grad_norm": 7064909.318018072, | |
| "learning_rate": 2.533333333333333e-07, | |
| "logits/chosen": -2.4914050102233887, | |
| "logits/rejected": -2.4965710639953613, | |
| "logps/chosen": -117.77690124511719, | |
| "logps/rejected": -150.23826599121094, | |
| "loss": 119877.625, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.0384187288582325, | |
| "rewards/margins": 0.02663787081837654, | |
| "rewards/rejected": -0.06505659967660904, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.548, | |
| "grad_norm": 7218217.506885473, | |
| "learning_rate": 2.511111111111111e-07, | |
| "logits/chosen": -2.4381630420684814, | |
| "logits/rejected": -2.4487814903259277, | |
| "logps/chosen": -102.14595794677734, | |
| "logps/rejected": -154.1560516357422, | |
| "loss": 112836.2875, | |
| "rewards/accuracies": 0.8500000238418579, | |
| "rewards/chosen": -0.029405618086457253, | |
| "rewards/margins": 0.04147377982735634, | |
| "rewards/rejected": -0.07087938487529755, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.552, | |
| "grad_norm": 8751055.476830697, | |
| "learning_rate": 2.4888888888888886e-07, | |
| "logits/chosen": -2.3979814052581787, | |
| "logits/rejected": -2.4224982261657715, | |
| "logps/chosen": -112.16400146484375, | |
| "logps/rejected": -158.16567993164062, | |
| "loss": 123894.4, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.031505607068538666, | |
| "rewards/margins": 0.03382585197687149, | |
| "rewards/rejected": -0.06533145159482956, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.556, | |
| "grad_norm": 10628471.178826654, | |
| "learning_rate": 2.4666666666666665e-07, | |
| "logits/chosen": -2.42820143699646, | |
| "logits/rejected": -2.439427375793457, | |
| "logps/chosen": -86.3087387084961, | |
| "logps/rejected": -118.8100814819336, | |
| "loss": 122684.5, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -0.028887376189231873, | |
| "rewards/margins": 0.024274542927742004, | |
| "rewards/rejected": -0.05316191911697388, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "grad_norm": 7340403.161227933, | |
| "learning_rate": 2.4444444444444445e-07, | |
| "logits/chosen": -2.3746609687805176, | |
| "logits/rejected": -2.39011812210083, | |
| "logps/chosen": -112.76751708984375, | |
| "logps/rejected": -155.74038696289062, | |
| "loss": 121104.9875, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.032277911901474, | |
| "rewards/margins": 0.032715652137994766, | |
| "rewards/rejected": -0.06499356776475906, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.564, | |
| "grad_norm": 7097101.153173888, | |
| "learning_rate": 2.4222222222222224e-07, | |
| "logits/chosen": -2.440396547317505, | |
| "logits/rejected": -2.4504265785217285, | |
| "logps/chosen": -104.66170501708984, | |
| "logps/rejected": -160.94100952148438, | |
| "loss": 117720.675, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.019881997257471085, | |
| "rewards/margins": 0.04168248176574707, | |
| "rewards/rejected": -0.06156448274850845, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.568, | |
| "grad_norm": 5809898.3420226695, | |
| "learning_rate": 2.4e-07, | |
| "logits/chosen": -2.445601463317871, | |
| "logits/rejected": -2.4264917373657227, | |
| "logps/chosen": -95.17555236816406, | |
| "logps/rejected": -143.88890075683594, | |
| "loss": 120431.9875, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.03070194646716118, | |
| "rewards/margins": 0.04166869446635246, | |
| "rewards/rejected": -0.07237063348293304, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.572, | |
| "grad_norm": 4988644.975711128, | |
| "learning_rate": 2.3777777777777777e-07, | |
| "logits/chosen": -2.4375877380371094, | |
| "logits/rejected": -2.441622257232666, | |
| "logps/chosen": -90.29289245605469, | |
| "logps/rejected": -128.98793029785156, | |
| "loss": 119927.5, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.027714330703020096, | |
| "rewards/margins": 0.029140587896108627, | |
| "rewards/rejected": -0.056854914873838425, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.576, | |
| "grad_norm": 7822455.89349568, | |
| "learning_rate": 2.3555555555555554e-07, | |
| "logits/chosen": -2.458700656890869, | |
| "logits/rejected": -2.4986705780029297, | |
| "logps/chosen": -117.4685287475586, | |
| "logps/rejected": -151.8163299560547, | |
| "loss": 123864.1, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.03203599527478218, | |
| "rewards/margins": 0.031274113804101944, | |
| "rewards/rejected": -0.06331010907888412, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "grad_norm": 14175243.183000157, | |
| "learning_rate": 2.3333333333333333e-07, | |
| "logits/chosen": -2.4828193187713623, | |
| "logits/rejected": -2.479646682739258, | |
| "logps/chosen": -103.30030822753906, | |
| "logps/rejected": -158.0785369873047, | |
| "loss": 124849.6125, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.032672982662916183, | |
| "rewards/margins": 0.04446934536099434, | |
| "rewards/rejected": -0.07714232802391052, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.584, | |
| "grad_norm": 6091294.506455895, | |
| "learning_rate": 2.311111111111111e-07, | |
| "logits/chosen": -2.39980411529541, | |
| "logits/rejected": -2.382380723953247, | |
| "logps/chosen": -95.15815734863281, | |
| "logps/rejected": -165.42840576171875, | |
| "loss": 123090.975, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.027900245040655136, | |
| "rewards/margins": 0.05805187299847603, | |
| "rewards/rejected": -0.08595212548971176, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.588, | |
| "grad_norm": 6711707.485572769, | |
| "learning_rate": 2.288888888888889e-07, | |
| "logits/chosen": -2.4506986141204834, | |
| "logits/rejected": -2.460022449493408, | |
| "logps/chosen": -99.18559265136719, | |
| "logps/rejected": -133.17782592773438, | |
| "loss": 120549.275, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.02819480374455452, | |
| "rewards/margins": 0.026910748332738876, | |
| "rewards/rejected": -0.0551055483520031, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.592, | |
| "grad_norm": 7790254.418823996, | |
| "learning_rate": 2.2666666666666663e-07, | |
| "logits/chosen": -2.512367010116577, | |
| "logits/rejected": -2.5243959426879883, | |
| "logps/chosen": -124.3129653930664, | |
| "logps/rejected": -171.23019409179688, | |
| "loss": 119269.2875, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.03208141773939133, | |
| "rewards/margins": 0.038219161331653595, | |
| "rewards/rejected": -0.07030057162046432, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.596, | |
| "grad_norm": 7436386.208544868, | |
| "learning_rate": 2.2444444444444442e-07, | |
| "logits/chosen": -2.45894193649292, | |
| "logits/rejected": -2.438983678817749, | |
| "logps/chosen": -81.72193908691406, | |
| "logps/rejected": -120.81268310546875, | |
| "loss": 119383.7875, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.023425322026014328, | |
| "rewards/margins": 0.031169379130005836, | |
| "rewards/rejected": -0.054594703018665314, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "grad_norm": 6098566.562808592, | |
| "learning_rate": 2.222222222222222e-07, | |
| "logits/chosen": -2.4491584300994873, | |
| "logits/rejected": -2.4278082847595215, | |
| "logps/chosen": -109.7543716430664, | |
| "logps/rejected": -137.11195373535156, | |
| "loss": 122590.0875, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.03018758073449135, | |
| "rewards/margins": 0.019970091059803963, | |
| "rewards/rejected": -0.05015767365694046, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.604, | |
| "grad_norm": 5676696.337246529, | |
| "learning_rate": 2.1999999999999998e-07, | |
| "logits/chosen": -2.3899359703063965, | |
| "logits/rejected": -2.3801169395446777, | |
| "logps/chosen": -125.1462173461914, | |
| "logps/rejected": -157.1474151611328, | |
| "loss": 123626.575, | |
| "rewards/accuracies": 0.512499988079071, | |
| "rewards/chosen": -0.035767387598752975, | |
| "rewards/margins": 0.029546618461608887, | |
| "rewards/rejected": -0.06531400978565216, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.608, | |
| "grad_norm": 6408066.768940639, | |
| "learning_rate": 2.1777777777777775e-07, | |
| "logits/chosen": -2.511399984359741, | |
| "logits/rejected": -2.556304693222046, | |
| "logps/chosen": -102.42405700683594, | |
| "logps/rejected": -140.045654296875, | |
| "loss": 124030.1, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.026485705748200417, | |
| "rewards/margins": 0.027697976678609848, | |
| "rewards/rejected": -0.054183680564165115, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.612, | |
| "grad_norm": 5703068.150369112, | |
| "learning_rate": 2.1555555555555554e-07, | |
| "logits/chosen": -2.328718900680542, | |
| "logits/rejected": -2.3354427814483643, | |
| "logps/chosen": -104.07130432128906, | |
| "logps/rejected": -145.46546936035156, | |
| "loss": 119267.475, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -0.035999588668346405, | |
| "rewards/margins": 0.03417082130908966, | |
| "rewards/rejected": -0.07017041742801666, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.616, | |
| "grad_norm": 6771552.786870031, | |
| "learning_rate": 2.1333333333333334e-07, | |
| "logits/chosen": -2.3548483848571777, | |
| "logits/rejected": -2.3827786445617676, | |
| "logps/chosen": -101.12074279785156, | |
| "logps/rejected": -159.588623046875, | |
| "loss": 122909.8, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.030131155624985695, | |
| "rewards/margins": 0.04275660961866379, | |
| "rewards/rejected": -0.07288776338100433, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "grad_norm": 6949724.758162161, | |
| "learning_rate": 2.111111111111111e-07, | |
| "logits/chosen": -2.3255906105041504, | |
| "logits/rejected": -2.3439719676971436, | |
| "logps/chosen": -108.85466003417969, | |
| "logps/rejected": -158.19845581054688, | |
| "loss": 123216.3625, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.02804810181260109, | |
| "rewards/margins": 0.046050988137722015, | |
| "rewards/rejected": -0.0740990936756134, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.624, | |
| "grad_norm": 9265006.397727864, | |
| "learning_rate": 2.088888888888889e-07, | |
| "logits/chosen": -2.3858678340911865, | |
| "logits/rejected": -2.3714261054992676, | |
| "logps/chosen": -107.79356384277344, | |
| "logps/rejected": -142.68203735351562, | |
| "loss": 128108.675, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.0350666344165802, | |
| "rewards/margins": 0.025052737444639206, | |
| "rewards/rejected": -0.0601193793118, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.628, | |
| "grad_norm": 8448160.384168701, | |
| "learning_rate": 2.0666666666666666e-07, | |
| "logits/chosen": -2.302703857421875, | |
| "logits/rejected": -2.317382335662842, | |
| "logps/chosen": -106.1356201171875, | |
| "logps/rejected": -165.3655242919922, | |
| "loss": 119009.5625, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.02817341312766075, | |
| "rewards/margins": 0.043338801711797714, | |
| "rewards/rejected": -0.07151221483945847, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.632, | |
| "grad_norm": 6797346.176978063, | |
| "learning_rate": 2.0444444444444446e-07, | |
| "logits/chosen": -2.3419301509857178, | |
| "logits/rejected": -2.360405683517456, | |
| "logps/chosen": -100.89833068847656, | |
| "logps/rejected": -136.70143127441406, | |
| "loss": 121410.0125, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.026873702183365822, | |
| "rewards/margins": 0.03099043294787407, | |
| "rewards/rejected": -0.05786413699388504, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.636, | |
| "grad_norm": 7957900.602760819, | |
| "learning_rate": 2.022222222222222e-07, | |
| "logits/chosen": -2.325685739517212, | |
| "logits/rejected": -2.3536834716796875, | |
| "logps/chosen": -86.800048828125, | |
| "logps/rejected": -118.13228607177734, | |
| "loss": 121560.0125, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.027103954926133156, | |
| "rewards/margins": 0.023416386917233467, | |
| "rewards/rejected": -0.050520338118076324, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "grad_norm": 7524893.989731965, | |
| "learning_rate": 2e-07, | |
| "logits/chosen": -2.379504919052124, | |
| "logits/rejected": -2.364494562149048, | |
| "logps/chosen": -92.89854431152344, | |
| "logps/rejected": -144.94566345214844, | |
| "loss": 118599.6, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.027095776051282883, | |
| "rewards/margins": 0.04017645865678787, | |
| "rewards/rejected": -0.06727223098278046, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.644, | |
| "grad_norm": 7808682.74577819, | |
| "learning_rate": 1.9777777777777776e-07, | |
| "logits/chosen": -2.462635040283203, | |
| "logits/rejected": -2.3920907974243164, | |
| "logps/chosen": -123.39002990722656, | |
| "logps/rejected": -168.66348266601562, | |
| "loss": 123616.1375, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.02962644398212433, | |
| "rewards/margins": 0.026749838143587112, | |
| "rewards/rejected": -0.05637627840042114, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.648, | |
| "grad_norm": 7508118.164730088, | |
| "learning_rate": 1.9555555555555555e-07, | |
| "logits/chosen": -2.3979110717773438, | |
| "logits/rejected": -2.394200563430786, | |
| "logps/chosen": -113.19117736816406, | |
| "logps/rejected": -152.71365356445312, | |
| "loss": 122617.65, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.0326806977391243, | |
| "rewards/margins": 0.03160088136792183, | |
| "rewards/rejected": -0.06428157538175583, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.652, | |
| "grad_norm": 9886937.518719045, | |
| "learning_rate": 1.9333333333333332e-07, | |
| "logits/chosen": -2.4126977920532227, | |
| "logits/rejected": -2.4325013160705566, | |
| "logps/chosen": -91.72865295410156, | |
| "logps/rejected": -134.23829650878906, | |
| "loss": 125535.1625, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.029735634103417397, | |
| "rewards/margins": 0.03641930967569351, | |
| "rewards/rejected": -0.06615494191646576, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.656, | |
| "grad_norm": 5382013.583341201, | |
| "learning_rate": 1.911111111111111e-07, | |
| "logits/chosen": -2.3764185905456543, | |
| "logits/rejected": -2.382178544998169, | |
| "logps/chosen": -91.31561279296875, | |
| "logps/rejected": -137.80345153808594, | |
| "loss": 111719.4375, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -0.01971900835633278, | |
| "rewards/margins": 0.04298390820622444, | |
| "rewards/rejected": -0.06270290911197662, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "grad_norm": 6409699.176115095, | |
| "learning_rate": 1.8888888888888888e-07, | |
| "logits/chosen": -2.330606698989868, | |
| "logits/rejected": -2.312016010284424, | |
| "logps/chosen": -114.25804138183594, | |
| "logps/rejected": -165.2264404296875, | |
| "loss": 122533.2125, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.026577278971672058, | |
| "rewards/margins": 0.0414334274828434, | |
| "rewards/rejected": -0.06801070272922516, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.664, | |
| "grad_norm": 6977532.54737261, | |
| "learning_rate": 1.8666666666666667e-07, | |
| "logits/chosen": -2.320591449737549, | |
| "logits/rejected": -2.3048043251037598, | |
| "logps/chosen": -117.83941650390625, | |
| "logps/rejected": -176.08311462402344, | |
| "loss": 118259.3375, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.030085504055023193, | |
| "rewards/margins": 0.03497748449444771, | |
| "rewards/rejected": -0.0650629848241806, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.668, | |
| "grad_norm": 7151487.887768503, | |
| "learning_rate": 1.8444444444444444e-07, | |
| "logits/chosen": -2.325629949569702, | |
| "logits/rejected": -2.37601637840271, | |
| "logps/chosen": -95.17213439941406, | |
| "logps/rejected": -149.96546936035156, | |
| "loss": 117774.0875, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.028405601158738136, | |
| "rewards/margins": 0.04639150947332382, | |
| "rewards/rejected": -0.07479710876941681, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.672, | |
| "grad_norm": 5729479.50083699, | |
| "learning_rate": 1.8222222222222223e-07, | |
| "logits/chosen": -2.4593453407287598, | |
| "logits/rejected": -2.5003867149353027, | |
| "logps/chosen": -96.59799194335938, | |
| "logps/rejected": -145.85215759277344, | |
| "loss": 123032.9, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.03039277158677578, | |
| "rewards/margins": 0.034516870975494385, | |
| "rewards/rejected": -0.06490965187549591, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.676, | |
| "grad_norm": 6685189.148553702, | |
| "learning_rate": 1.8e-07, | |
| "logits/chosen": -2.420285224914551, | |
| "logits/rejected": -2.405721426010132, | |
| "logps/chosen": -101.75447082519531, | |
| "logps/rejected": -151.68434143066406, | |
| "loss": 121099.5375, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.033398691564798355, | |
| "rewards/margins": 0.04428454115986824, | |
| "rewards/rejected": -0.0776832327246666, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "grad_norm": 6727764.202542203, | |
| "learning_rate": 1.7777777777777776e-07, | |
| "logits/chosen": -2.5021843910217285, | |
| "logits/rejected": -2.475396156311035, | |
| "logps/chosen": -103.27125549316406, | |
| "logps/rejected": -146.76480102539062, | |
| "loss": 124458.15, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.027325114235281944, | |
| "rewards/margins": 0.04307966306805611, | |
| "rewards/rejected": -0.0704047828912735, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.684, | |
| "grad_norm": 8951493.717829395, | |
| "learning_rate": 1.7555555555555553e-07, | |
| "logits/chosen": -2.359651565551758, | |
| "logits/rejected": -2.3753628730773926, | |
| "logps/chosen": -92.47693634033203, | |
| "logps/rejected": -143.73085021972656, | |
| "loss": 122129.25, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.021263476461172104, | |
| "rewards/margins": 0.047465912997722626, | |
| "rewards/rejected": -0.06872939318418503, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.688, | |
| "grad_norm": 6117231.523934995, | |
| "learning_rate": 1.7333333333333332e-07, | |
| "logits/chosen": -2.3933348655700684, | |
| "logits/rejected": -2.424694538116455, | |
| "logps/chosen": -101.28050231933594, | |
| "logps/rejected": -161.67837524414062, | |
| "loss": 114482.8625, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.025342673063278198, | |
| "rewards/margins": 0.051556408405303955, | |
| "rewards/rejected": -0.07689908146858215, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.692, | |
| "grad_norm": 8463223.381087825, | |
| "learning_rate": 1.711111111111111e-07, | |
| "logits/chosen": -2.4435369968414307, | |
| "logits/rejected": -2.421454668045044, | |
| "logps/chosen": -94.87931823730469, | |
| "logps/rejected": -127.29423522949219, | |
| "loss": 117968.2625, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.030688803642988205, | |
| "rewards/margins": 0.025830263271927834, | |
| "rewards/rejected": -0.05651906877756119, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.696, | |
| "grad_norm": 8481439.735231701, | |
| "learning_rate": 1.6888888888888888e-07, | |
| "logits/chosen": -2.422682285308838, | |
| "logits/rejected": -2.4244956970214844, | |
| "logps/chosen": -103.77542877197266, | |
| "logps/rejected": -137.70925903320312, | |
| "loss": 120156.825, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.028543483465909958, | |
| "rewards/margins": 0.021968597546219826, | |
| "rewards/rejected": -0.05051208287477493, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "grad_norm": 9216619.77120461, | |
| "learning_rate": 1.6666666666666665e-07, | |
| "logits/chosen": -2.505079507827759, | |
| "logits/rejected": -2.477112054824829, | |
| "logps/chosen": -105.45587158203125, | |
| "logps/rejected": -140.0381317138672, | |
| "loss": 113155.975, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.02943289838731289, | |
| "rewards/margins": 0.03403625637292862, | |
| "rewards/rejected": -0.06346915662288666, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.704, | |
| "grad_norm": 7248683.781183183, | |
| "learning_rate": 1.6444444444444444e-07, | |
| "logits/chosen": -2.480299949645996, | |
| "logits/rejected": -2.470823287963867, | |
| "logps/chosen": -122.30104064941406, | |
| "logps/rejected": -164.48951721191406, | |
| "loss": 117051.6, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.029943257570266724, | |
| "rewards/margins": 0.036385588347911835, | |
| "rewards/rejected": -0.06632884591817856, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.708, | |
| "grad_norm": 8402038.296433628, | |
| "learning_rate": 1.622222222222222e-07, | |
| "logits/chosen": -2.472791910171509, | |
| "logits/rejected": -2.4726855754852295, | |
| "logps/chosen": -114.65007019042969, | |
| "logps/rejected": -153.65573120117188, | |
| "loss": 120727.275, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.03354992717504501, | |
| "rewards/margins": 0.03721586614847183, | |
| "rewards/rejected": -0.07076579332351685, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.712, | |
| "grad_norm": 9719574.93632757, | |
| "learning_rate": 1.6e-07, | |
| "logits/chosen": -2.4326717853546143, | |
| "logits/rejected": -2.421159267425537, | |
| "logps/chosen": -115.16218566894531, | |
| "logps/rejected": -164.61166381835938, | |
| "loss": 122159.4125, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.031878646463155746, | |
| "rewards/margins": 0.04045126959681511, | |
| "rewards/rejected": -0.07232991605997086, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.716, | |
| "grad_norm": 7274636.012655509, | |
| "learning_rate": 1.5777777777777777e-07, | |
| "logits/chosen": -2.3492178916931152, | |
| "logits/rejected": -2.3669705390930176, | |
| "logps/chosen": -110.37937927246094, | |
| "logps/rejected": -148.16090393066406, | |
| "loss": 122808.1875, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.033610500395298004, | |
| "rewards/margins": 0.031820036470890045, | |
| "rewards/rejected": -0.06543054431676865, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "grad_norm": 7960487.891716203, | |
| "learning_rate": 1.5555555555555556e-07, | |
| "logits/chosen": -2.5343594551086426, | |
| "logits/rejected": -2.509129762649536, | |
| "logps/chosen": -112.82719421386719, | |
| "logps/rejected": -139.89376831054688, | |
| "loss": 124301.7875, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.031808070838451385, | |
| "rewards/margins": 0.015915410593152046, | |
| "rewards/rejected": -0.04772348329424858, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.724, | |
| "grad_norm": 7184987.914163716, | |
| "learning_rate": 1.533333333333333e-07, | |
| "logits/chosen": -2.5088343620300293, | |
| "logits/rejected": -2.5166666507720947, | |
| "logps/chosen": -104.326904296875, | |
| "logps/rejected": -150.2886505126953, | |
| "loss": 128243.85, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.034587159752845764, | |
| "rewards/margins": 0.03759396821260452, | |
| "rewards/rejected": -0.07218112796545029, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.728, | |
| "grad_norm": 10039229.460908188, | |
| "learning_rate": 1.511111111111111e-07, | |
| "logits/chosen": -2.45367693901062, | |
| "logits/rejected": -2.4529166221618652, | |
| "logps/chosen": -107.16294860839844, | |
| "logps/rejected": -140.8982696533203, | |
| "loss": 124612.4625, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.03276178613305092, | |
| "rewards/margins": 0.03512474521994591, | |
| "rewards/rejected": -0.06788653880357742, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.732, | |
| "grad_norm": 7124197.534522493, | |
| "learning_rate": 1.4888888888888886e-07, | |
| "logits/chosen": -2.4414725303649902, | |
| "logits/rejected": -2.422440767288208, | |
| "logps/chosen": -84.65110778808594, | |
| "logps/rejected": -130.88641357421875, | |
| "loss": 125120.0625, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.018780285492539406, | |
| "rewards/margins": 0.03842353820800781, | |
| "rewards/rejected": -0.05720382183790207, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.736, | |
| "grad_norm": 9253360.097651359, | |
| "learning_rate": 1.4666666666666666e-07, | |
| "logits/chosen": -2.3342652320861816, | |
| "logits/rejected": -2.3714077472686768, | |
| "logps/chosen": -101.37190246582031, | |
| "logps/rejected": -128.89321899414062, | |
| "loss": 120892.2875, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.02679322101175785, | |
| "rewards/margins": 0.027122925966978073, | |
| "rewards/rejected": -0.053916145116090775, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "grad_norm": 7423694.38438217, | |
| "learning_rate": 1.4444444444444442e-07, | |
| "logits/chosen": -2.4630966186523438, | |
| "logits/rejected": -2.413529634475708, | |
| "logps/chosen": -103.9755630493164, | |
| "logps/rejected": -133.18716430664062, | |
| "loss": 123775.5125, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.03415101021528244, | |
| "rewards/margins": 0.02861020900309086, | |
| "rewards/rejected": -0.06276122480630875, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.744, | |
| "grad_norm": 5647373.604720096, | |
| "learning_rate": 1.4222222222222222e-07, | |
| "logits/chosen": -2.464050769805908, | |
| "logits/rejected": -2.501260757446289, | |
| "logps/chosen": -125.53248596191406, | |
| "logps/rejected": -175.7611083984375, | |
| "loss": 117425.8375, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.02989697828888893, | |
| "rewards/margins": 0.040645621716976166, | |
| "rewards/rejected": -0.0705425962805748, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.748, | |
| "grad_norm": 8230581.203213356, | |
| "learning_rate": 1.4e-07, | |
| "logits/chosen": -2.5151472091674805, | |
| "logits/rejected": -2.509429454803467, | |
| "logps/chosen": -92.0722885131836, | |
| "logps/rejected": -167.84060668945312, | |
| "loss": 119465.4875, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.02767838165163994, | |
| "rewards/margins": 0.05135294049978256, | |
| "rewards/rejected": -0.0790313258767128, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.752, | |
| "grad_norm": 12754476.984279104, | |
| "learning_rate": 1.3777777777777778e-07, | |
| "logits/chosen": -2.5021023750305176, | |
| "logits/rejected": -2.484841823577881, | |
| "logps/chosen": -94.76568603515625, | |
| "logps/rejected": -139.48953247070312, | |
| "loss": 118982.3875, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.02872481569647789, | |
| "rewards/margins": 0.03676723688840866, | |
| "rewards/rejected": -0.06549205631017685, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.756, | |
| "grad_norm": 6983911.433265186, | |
| "learning_rate": 1.3555555555555557e-07, | |
| "logits/chosen": -2.5138821601867676, | |
| "logits/rejected": -2.5218331813812256, | |
| "logps/chosen": -89.62753295898438, | |
| "logps/rejected": -128.04603576660156, | |
| "loss": 120175.3625, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.025366436690092087, | |
| "rewards/margins": 0.028226271271705627, | |
| "rewards/rejected": -0.053592704236507416, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "grad_norm": 7315179.12228925, | |
| "learning_rate": 1.3333333333333334e-07, | |
| "logits/chosen": -2.3722000122070312, | |
| "logits/rejected": -2.361008644104004, | |
| "logps/chosen": -106.6434326171875, | |
| "logps/rejected": -154.347412109375, | |
| "loss": 118594.25, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.028675338253378868, | |
| "rewards/margins": 0.03925652056932449, | |
| "rewards/rejected": -0.06793185323476791, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.764, | |
| "grad_norm": 6191576.733772953, | |
| "learning_rate": 1.3111111111111113e-07, | |
| "logits/chosen": -2.361506938934326, | |
| "logits/rejected": -2.416877269744873, | |
| "logps/chosen": -121.11296081542969, | |
| "logps/rejected": -182.23646545410156, | |
| "loss": 120340.05, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.027058254927396774, | |
| "rewards/margins": 0.04295941814780235, | |
| "rewards/rejected": -0.07001767307519913, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.768, | |
| "grad_norm": 7960326.332116376, | |
| "learning_rate": 1.2888888888888887e-07, | |
| "logits/chosen": -2.3773555755615234, | |
| "logits/rejected": -2.393650531768799, | |
| "logps/chosen": -86.46308135986328, | |
| "logps/rejected": -132.29776000976562, | |
| "loss": 122114.95, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.02390897274017334, | |
| "rewards/margins": 0.037305351346731186, | |
| "rewards/rejected": -0.061214327812194824, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.772, | |
| "grad_norm": 8545237.759950696, | |
| "learning_rate": 1.2666666666666666e-07, | |
| "logits/chosen": -2.3730955123901367, | |
| "logits/rejected": -2.346529960632324, | |
| "logps/chosen": -98.66458129882812, | |
| "logps/rejected": -133.67532348632812, | |
| "loss": 121957.9, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.030175382271409035, | |
| "rewards/margins": 0.031160462647676468, | |
| "rewards/rejected": -0.06133584305644035, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.776, | |
| "grad_norm": 7541759.4377164915, | |
| "learning_rate": 1.2444444444444443e-07, | |
| "logits/chosen": -2.4905495643615723, | |
| "logits/rejected": -2.467737913131714, | |
| "logps/chosen": -97.59669494628906, | |
| "logps/rejected": -144.62152099609375, | |
| "loss": 120647.3875, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.029411058872938156, | |
| "rewards/margins": 0.03424420207738876, | |
| "rewards/rejected": -0.06365526467561722, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "grad_norm": 9641484.377151929, | |
| "learning_rate": 1.2222222222222222e-07, | |
| "logits/chosen": -2.49107027053833, | |
| "logits/rejected": -2.490995407104492, | |
| "logps/chosen": -101.06126403808594, | |
| "logps/rejected": -136.541748046875, | |
| "loss": 121525.775, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.026407599449157715, | |
| "rewards/margins": 0.03157456964254379, | |
| "rewards/rejected": -0.05798216909170151, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.784, | |
| "grad_norm": 5545873.184552746, | |
| "learning_rate": 1.2e-07, | |
| "logits/chosen": -2.5349438190460205, | |
| "logits/rejected": -2.5639233589172363, | |
| "logps/chosen": -100.9637222290039, | |
| "logps/rejected": -172.61062622070312, | |
| "loss": 119077.0375, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.019367219880223274, | |
| "rewards/margins": 0.057732999324798584, | |
| "rewards/rejected": -0.0771002247929573, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.788, | |
| "grad_norm": 9492873.423361877, | |
| "learning_rate": 1.1777777777777777e-07, | |
| "logits/chosen": -2.358701229095459, | |
| "logits/rejected": -2.379284381866455, | |
| "logps/chosen": -92.12281036376953, | |
| "logps/rejected": -166.2657012939453, | |
| "loss": 114722.3, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.02654215320944786, | |
| "rewards/margins": 0.06289757788181305, | |
| "rewards/rejected": -0.08943972736597061, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 0.792, | |
| "grad_norm": 8128359.015235812, | |
| "learning_rate": 1.1555555555555555e-07, | |
| "logits/chosen": -2.445798397064209, | |
| "logits/rejected": -2.463740110397339, | |
| "logps/chosen": -111.72492980957031, | |
| "logps/rejected": -154.72195434570312, | |
| "loss": 123011.1125, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.029620587825775146, | |
| "rewards/margins": 0.03493895009160042, | |
| "rewards/rejected": -0.06455953419208527, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.796, | |
| "grad_norm": 9306300.401598651, | |
| "learning_rate": 1.1333333333333332e-07, | |
| "logits/chosen": -2.417834520339966, | |
| "logits/rejected": -2.4220337867736816, | |
| "logps/chosen": -91.2249526977539, | |
| "logps/rejected": -162.97373962402344, | |
| "loss": 118218.9, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.023723283782601357, | |
| "rewards/margins": 0.057645224034786224, | |
| "rewards/rejected": -0.08136852085590363, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 8765512.962268472, | |
| "learning_rate": 1.111111111111111e-07, | |
| "logits/chosen": -2.40264630317688, | |
| "logits/rejected": -2.427499294281006, | |
| "logps/chosen": -96.80880737304688, | |
| "logps/rejected": -145.74375915527344, | |
| "loss": 120947.8375, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.02879420481622219, | |
| "rewards/margins": 0.04018041118979454, | |
| "rewards/rejected": -0.06897461414337158, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.804, | |
| "grad_norm": 7358207.507871911, | |
| "learning_rate": 1.0888888888888888e-07, | |
| "logits/chosen": -2.491158962249756, | |
| "logits/rejected": -2.4770848751068115, | |
| "logps/chosen": -109.10551452636719, | |
| "logps/rejected": -167.1204071044922, | |
| "loss": 116044.225, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.02867368422448635, | |
| "rewards/margins": 0.04405529797077179, | |
| "rewards/rejected": -0.07272897660732269, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 0.808, | |
| "grad_norm": 6847868.235200222, | |
| "learning_rate": 1.0666666666666667e-07, | |
| "logits/chosen": -2.4977283477783203, | |
| "logits/rejected": -2.4795002937316895, | |
| "logps/chosen": -92.58442687988281, | |
| "logps/rejected": -137.3270721435547, | |
| "loss": 120623.725, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.02764459326863289, | |
| "rewards/margins": 0.029529035091400146, | |
| "rewards/rejected": -0.057173628360033035, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 0.812, | |
| "grad_norm": 7345224.131775115, | |
| "learning_rate": 1.0444444444444445e-07, | |
| "logits/chosen": -2.581921100616455, | |
| "logits/rejected": -2.5818488597869873, | |
| "logps/chosen": -94.2405014038086, | |
| "logps/rejected": -151.82931518554688, | |
| "loss": 118029.625, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -0.026779672130942345, | |
| "rewards/margins": 0.04871240258216858, | |
| "rewards/rejected": -0.07549206912517548, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 0.816, | |
| "grad_norm": 8179129.121592561, | |
| "learning_rate": 1.0222222222222223e-07, | |
| "logits/chosen": -2.510425090789795, | |
| "logits/rejected": -2.5082054138183594, | |
| "logps/chosen": -98.96800231933594, | |
| "logps/rejected": -132.51150512695312, | |
| "loss": 119617.05, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.02764180861413479, | |
| "rewards/margins": 0.024158382788300514, | |
| "rewards/rejected": -0.0518001914024353, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "grad_norm": 8464195.380143736, | |
| "learning_rate": 1e-07, | |
| "logits/chosen": -2.4244985580444336, | |
| "logits/rejected": -2.43390154838562, | |
| "logps/chosen": -106.4210205078125, | |
| "logps/rejected": -164.44268798828125, | |
| "loss": 120873.525, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.029137443751096725, | |
| "rewards/margins": 0.049099259078502655, | |
| "rewards/rejected": -0.07823669910430908, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.824, | |
| "grad_norm": 7635743.090822039, | |
| "learning_rate": 9.777777777777778e-08, | |
| "logits/chosen": -2.45817494392395, | |
| "logits/rejected": -2.472324848175049, | |
| "logps/chosen": -102.89268493652344, | |
| "logps/rejected": -155.74337768554688, | |
| "loss": 120353.5125, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.0331621877849102, | |
| "rewards/margins": 0.045612066984176636, | |
| "rewards/rejected": -0.07877425849437714, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 0.828, | |
| "grad_norm": 6226582.326517582, | |
| "learning_rate": 9.555555555555556e-08, | |
| "logits/chosen": -2.4659340381622314, | |
| "logits/rejected": -2.475663661956787, | |
| "logps/chosen": -94.48152923583984, | |
| "logps/rejected": -128.58985900878906, | |
| "loss": 123484.35, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.02351956069469452, | |
| "rewards/margins": 0.03220217674970627, | |
| "rewards/rejected": -0.05572172999382019, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 0.832, | |
| "grad_norm": 9949795.202652398, | |
| "learning_rate": 9.333333333333334e-08, | |
| "logits/chosen": -2.4222323894500732, | |
| "logits/rejected": -2.42124342918396, | |
| "logps/chosen": -103.56013488769531, | |
| "logps/rejected": -154.19448852539062, | |
| "loss": 122220.075, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.032791610807180405, | |
| "rewards/margins": 0.03772992268204689, | |
| "rewards/rejected": -0.0705215334892273, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 0.836, | |
| "grad_norm": 7095174.456895947, | |
| "learning_rate": 9.111111111111112e-08, | |
| "logits/chosen": -2.5458078384399414, | |
| "logits/rejected": -2.5440893173217773, | |
| "logps/chosen": -104.78141784667969, | |
| "logps/rejected": -135.82858276367188, | |
| "loss": 121374.0375, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.026525821536779404, | |
| "rewards/margins": 0.02192816510796547, | |
| "rewards/rejected": -0.04845398664474487, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "grad_norm": 10736924.474295698, | |
| "learning_rate": 8.888888888888888e-08, | |
| "logits/chosen": -2.4108989238739014, | |
| "logits/rejected": -2.442476749420166, | |
| "logps/chosen": -109.81797790527344, | |
| "logps/rejected": -154.10580444335938, | |
| "loss": 118612.975, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.030571172013878822, | |
| "rewards/margins": 0.04257971793413162, | |
| "rewards/rejected": -0.0731508880853653, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.844, | |
| "grad_norm": 7793006.873257276, | |
| "learning_rate": 8.666666666666666e-08, | |
| "logits/chosen": -2.408449411392212, | |
| "logits/rejected": -2.401078939437866, | |
| "logps/chosen": -98.3482894897461, | |
| "logps/rejected": -144.44593811035156, | |
| "loss": 119906.3125, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.025788147002458572, | |
| "rewards/margins": 0.0393107533454895, | |
| "rewards/rejected": -0.06509890407323837, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 0.848, | |
| "grad_norm": 8262242.910041632, | |
| "learning_rate": 8.444444444444444e-08, | |
| "logits/chosen": -2.328664779663086, | |
| "logits/rejected": -2.3485660552978516, | |
| "logps/chosen": -115.07906341552734, | |
| "logps/rejected": -163.2090301513672, | |
| "loss": 118916.3625, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.028421631082892418, | |
| "rewards/margins": 0.04112589359283447, | |
| "rewards/rejected": -0.06954751908779144, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 0.852, | |
| "grad_norm": 7788645.367897111, | |
| "learning_rate": 8.222222222222222e-08, | |
| "logits/chosen": -2.381272554397583, | |
| "logits/rejected": -2.3988587856292725, | |
| "logps/chosen": -98.18142700195312, | |
| "logps/rejected": -158.4501190185547, | |
| "loss": 114372.2, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -0.027454352006316185, | |
| "rewards/margins": 0.04615364223718643, | |
| "rewards/rejected": -0.07360798865556717, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 0.856, | |
| "grad_norm": 7241736.462505716, | |
| "learning_rate": 8e-08, | |
| "logits/chosen": -2.384153127670288, | |
| "logits/rejected": -2.4060769081115723, | |
| "logps/chosen": -112.68023681640625, | |
| "logps/rejected": -133.3109588623047, | |
| "loss": 122776.875, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.035701293498277664, | |
| "rewards/margins": 0.014023616909980774, | |
| "rewards/rejected": -0.04972491040825844, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "grad_norm": 7653248.620162212, | |
| "learning_rate": 7.777777777777778e-08, | |
| "logits/chosen": -2.382563591003418, | |
| "logits/rejected": -2.4016215801239014, | |
| "logps/chosen": -91.17201232910156, | |
| "logps/rejected": -136.58473205566406, | |
| "loss": 117265.6125, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.025211047381162643, | |
| "rewards/margins": 0.03577146679162979, | |
| "rewards/rejected": -0.060982514172792435, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.864, | |
| "grad_norm": 7731438.19804926, | |
| "learning_rate": 7.555555555555555e-08, | |
| "logits/chosen": -2.3781442642211914, | |
| "logits/rejected": -2.3403193950653076, | |
| "logps/chosen": -121.6617431640625, | |
| "logps/rejected": -155.835205078125, | |
| "loss": 120368.2625, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.027185499668121338, | |
| "rewards/margins": 0.031700123101472855, | |
| "rewards/rejected": -0.05888562276959419, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 0.868, | |
| "grad_norm": 8267606.777008629, | |
| "learning_rate": 7.333333333333333e-08, | |
| "logits/chosen": -2.3575565814971924, | |
| "logits/rejected": -2.386939287185669, | |
| "logps/chosen": -101.64804077148438, | |
| "logps/rejected": -148.3137664794922, | |
| "loss": 115195.7375, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.023973077535629272, | |
| "rewards/margins": 0.044489845633506775, | |
| "rewards/rejected": -0.06846292316913605, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 0.872, | |
| "grad_norm": 9005598.684897516, | |
| "learning_rate": 7.111111111111111e-08, | |
| "logits/chosen": -2.3442575931549072, | |
| "logits/rejected": -2.366135597229004, | |
| "logps/chosen": -103.1920166015625, | |
| "logps/rejected": -157.7998504638672, | |
| "loss": 118398.575, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.03024686500430107, | |
| "rewards/margins": 0.04585784301161766, | |
| "rewards/rejected": -0.07610471546649933, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 0.876, | |
| "grad_norm": 9533026.874445997, | |
| "learning_rate": 6.888888888888889e-08, | |
| "logits/chosen": -2.380309581756592, | |
| "logits/rejected": -2.356041431427002, | |
| "logps/chosen": -110.62892150878906, | |
| "logps/rejected": -141.9584197998047, | |
| "loss": 121318.95, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.03162779659032822, | |
| "rewards/margins": 0.02817652001976967, | |
| "rewards/rejected": -0.059804320335388184, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "grad_norm": 10673576.986433594, | |
| "learning_rate": 6.666666666666667e-08, | |
| "logits/chosen": -2.3506739139556885, | |
| "logits/rejected": -2.372131586074829, | |
| "logps/chosen": -100.54646301269531, | |
| "logps/rejected": -135.2465362548828, | |
| "loss": 126468.65, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.03161459043622017, | |
| "rewards/margins": 0.02985607460141182, | |
| "rewards/rejected": -0.06147066876292229, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.884, | |
| "grad_norm": 8461999.794241536, | |
| "learning_rate": 6.444444444444443e-08, | |
| "logits/chosen": -2.362769842147827, | |
| "logits/rejected": -2.406325578689575, | |
| "logps/chosen": -97.97603607177734, | |
| "logps/rejected": -142.4342041015625, | |
| "loss": 127463.35, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.03243636339902878, | |
| "rewards/margins": 0.029870545491576195, | |
| "rewards/rejected": -0.062306903302669525, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 0.888, | |
| "grad_norm": 7368698.764464808, | |
| "learning_rate": 6.222222222222221e-08, | |
| "logits/chosen": -2.3991339206695557, | |
| "logits/rejected": -2.4037344455718994, | |
| "logps/chosen": -93.59549713134766, | |
| "logps/rejected": -162.4844970703125, | |
| "loss": 120745.575, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -0.027704676613211632, | |
| "rewards/margins": 0.05211573839187622, | |
| "rewards/rejected": -0.0798204094171524, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 0.892, | |
| "grad_norm": 8665468.38774931, | |
| "learning_rate": 6e-08, | |
| "logits/chosen": -2.4542791843414307, | |
| "logits/rejected": -2.4168543815612793, | |
| "logps/chosen": -93.68408966064453, | |
| "logps/rejected": -148.40516662597656, | |
| "loss": 118598.825, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.03159097954630852, | |
| "rewards/margins": 0.04323247820138931, | |
| "rewards/rejected": -0.07482346147298813, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 0.896, | |
| "grad_norm": 7933695.302057784, | |
| "learning_rate": 5.7777777777777775e-08, | |
| "logits/chosen": -2.3784477710723877, | |
| "logits/rejected": -2.3589086532592773, | |
| "logps/chosen": -92.63359832763672, | |
| "logps/rejected": -134.8274383544922, | |
| "loss": 120496.3875, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.028888177126646042, | |
| "rewards/margins": 0.03402668610215187, | |
| "rewards/rejected": -0.06291486322879791, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "grad_norm": 9969155.359289682, | |
| "learning_rate": 5.555555555555555e-08, | |
| "logits/chosen": -2.2903988361358643, | |
| "logits/rejected": -2.3016340732574463, | |
| "logps/chosen": -85.76798248291016, | |
| "logps/rejected": -127.5031967163086, | |
| "loss": 120240.075, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.02621074579656124, | |
| "rewards/margins": 0.03313954919576645, | |
| "rewards/rejected": -0.05935030058026314, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.904, | |
| "grad_norm": 9157992.023374882, | |
| "learning_rate": 5.3333333333333334e-08, | |
| "logits/chosen": -2.336127996444702, | |
| "logits/rejected": -2.2945499420166016, | |
| "logps/chosen": -91.5769271850586, | |
| "logps/rejected": -135.12843322753906, | |
| "loss": 120103.1125, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.027380824089050293, | |
| "rewards/margins": 0.03264584392309189, | |
| "rewards/rejected": -0.06002666801214218, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 0.908, | |
| "grad_norm": 8951108.084423833, | |
| "learning_rate": 5.1111111111111114e-08, | |
| "logits/chosen": -2.347435712814331, | |
| "logits/rejected": -2.3315415382385254, | |
| "logps/chosen": -113.215576171875, | |
| "logps/rejected": -128.22955322265625, | |
| "loss": 124138.05, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.032336726784706116, | |
| "rewards/margins": 0.017136305570602417, | |
| "rewards/rejected": -0.04947303608059883, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 0.912, | |
| "grad_norm": 8242314.92846825, | |
| "learning_rate": 4.888888888888889e-08, | |
| "logits/chosen": -2.4864022731781006, | |
| "logits/rejected": -2.4819133281707764, | |
| "logps/chosen": -105.97566223144531, | |
| "logps/rejected": -151.5330047607422, | |
| "loss": 120462.5125, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.026608863845467567, | |
| "rewards/margins": 0.0363970547914505, | |
| "rewards/rejected": -0.06300591677427292, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 0.916, | |
| "grad_norm": 9094101.689046768, | |
| "learning_rate": 4.666666666666667e-08, | |
| "logits/chosen": -2.4374148845672607, | |
| "logits/rejected": -2.4441134929656982, | |
| "logps/chosen": -103.56266784667969, | |
| "logps/rejected": -172.01528930664062, | |
| "loss": 121326.7125, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.02742699161171913, | |
| "rewards/margins": 0.06065355986356735, | |
| "rewards/rejected": -0.08808055520057678, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "grad_norm": 10641032.764093434, | |
| "learning_rate": 4.444444444444444e-08, | |
| "logits/chosen": -2.4069132804870605, | |
| "logits/rejected": -2.4145545959472656, | |
| "logps/chosen": -82.15616607666016, | |
| "logps/rejected": -143.5443115234375, | |
| "loss": 115663.2125, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -0.02152777649462223, | |
| "rewards/margins": 0.0520428791642189, | |
| "rewards/rejected": -0.07357065379619598, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.924, | |
| "grad_norm": 12548436.959082082, | |
| "learning_rate": 4.222222222222222e-08, | |
| "logits/chosen": -2.448031187057495, | |
| "logits/rejected": -2.4767231941223145, | |
| "logps/chosen": -115.41275787353516, | |
| "logps/rejected": -162.3332977294922, | |
| "loss": 123697.6375, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.024739082902669907, | |
| "rewards/margins": 0.045056119561195374, | |
| "rewards/rejected": -0.06979519873857498, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 0.928, | |
| "grad_norm": 8095941.315550662, | |
| "learning_rate": 4e-08, | |
| "logits/chosen": -2.3472633361816406, | |
| "logits/rejected": -2.34629225730896, | |
| "logps/chosen": -96.70509338378906, | |
| "logps/rejected": -156.36058044433594, | |
| "loss": 123115.8875, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.02763884700834751, | |
| "rewards/margins": 0.04638766124844551, | |
| "rewards/rejected": -0.07402651011943817, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 0.932, | |
| "grad_norm": 9261661.294557055, | |
| "learning_rate": 3.7777777777777774e-08, | |
| "logits/chosen": -2.379624128341675, | |
| "logits/rejected": -2.39247727394104, | |
| "logps/chosen": -84.12887573242188, | |
| "logps/rejected": -139.15478515625, | |
| "loss": 116878.95, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -0.0225827656686306, | |
| "rewards/margins": 0.04350755736231804, | |
| "rewards/rejected": -0.06609033048152924, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 0.936, | |
| "grad_norm": 11782492.59177351, | |
| "learning_rate": 3.5555555555555554e-08, | |
| "logits/chosen": -2.453583240509033, | |
| "logits/rejected": -2.4496898651123047, | |
| "logps/chosen": -101.22218322753906, | |
| "logps/rejected": -146.49490356445312, | |
| "loss": 116143.6875, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.0275820791721344, | |
| "rewards/margins": 0.036730751395225525, | |
| "rewards/rejected": -0.06431283056735992, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "grad_norm": 7788195.554054044, | |
| "learning_rate": 3.3333333333333334e-08, | |
| "logits/chosen": -2.3583855628967285, | |
| "logits/rejected": -2.325178623199463, | |
| "logps/chosen": -86.75674438476562, | |
| "logps/rejected": -152.60946655273438, | |
| "loss": 122277.5875, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.02565554343163967, | |
| "rewards/margins": 0.05285739153623581, | |
| "rewards/rejected": -0.07851293683052063, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.944, | |
| "grad_norm": 9203769.822900785, | |
| "learning_rate": 3.111111111111111e-08, | |
| "logits/chosen": -2.3866257667541504, | |
| "logits/rejected": -2.3973793983459473, | |
| "logps/chosen": -103.6191177368164, | |
| "logps/rejected": -177.63768005371094, | |
| "loss": 119365.9125, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.03137093037366867, | |
| "rewards/margins": 0.06034323573112488, | |
| "rewards/rejected": -0.09171417355537415, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 0.948, | |
| "grad_norm": 9423001.082938906, | |
| "learning_rate": 2.8888888888888887e-08, | |
| "logits/chosen": -2.467428684234619, | |
| "logits/rejected": -2.4925427436828613, | |
| "logps/chosen": -104.86148834228516, | |
| "logps/rejected": -129.6764373779297, | |
| "loss": 123624.5625, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": -0.030190538614988327, | |
| "rewards/margins": 0.025643909350037575, | |
| "rewards/rejected": -0.05583444982767105, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 0.952, | |
| "grad_norm": 8004704.995606078, | |
| "learning_rate": 2.6666666666666667e-08, | |
| "logits/chosen": -2.478123188018799, | |
| "logits/rejected": -2.4850118160247803, | |
| "logps/chosen": -78.22926330566406, | |
| "logps/rejected": -149.72119140625, | |
| "loss": 111276.7625, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.020721960812807083, | |
| "rewards/margins": 0.06527476012706757, | |
| "rewards/rejected": -0.08599671721458435, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 0.956, | |
| "grad_norm": 9750139.945790045, | |
| "learning_rate": 2.4444444444444444e-08, | |
| "logits/chosen": -2.4585745334625244, | |
| "logits/rejected": -2.4466397762298584, | |
| "logps/chosen": -103.5519790649414, | |
| "logps/rejected": -164.13674926757812, | |
| "loss": 121310.4375, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.0302131325006485, | |
| "rewards/margins": 0.04971124976873398, | |
| "rewards/rejected": -0.07992438226938248, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "grad_norm": 7278490.232350556, | |
| "learning_rate": 2.222222222222222e-08, | |
| "logits/chosen": -2.43925404548645, | |
| "logits/rejected": -2.438615322113037, | |
| "logps/chosen": -105.12055969238281, | |
| "logps/rejected": -164.0720977783203, | |
| "loss": 118679.0375, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.02877245843410492, | |
| "rewards/margins": 0.052051056176424026, | |
| "rewards/rejected": -0.08082351088523865, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.964, | |
| "grad_norm": 10483716.8937703, | |
| "learning_rate": 2e-08, | |
| "logits/chosen": -2.4821510314941406, | |
| "logits/rejected": -2.46364426612854, | |
| "logps/chosen": -101.2098159790039, | |
| "logps/rejected": -143.97183227539062, | |
| "loss": 117960.6, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.03032396174967289, | |
| "rewards/margins": 0.03740059584379196, | |
| "rewards/rejected": -0.0677245557308197, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 0.968, | |
| "grad_norm": 12173721.82019396, | |
| "learning_rate": 1.7777777777777777e-08, | |
| "logits/chosen": -2.379589557647705, | |
| "logits/rejected": -2.4067189693450928, | |
| "logps/chosen": -122.52884674072266, | |
| "logps/rejected": -172.4658966064453, | |
| "loss": 124411.0625, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.03309926018118858, | |
| "rewards/margins": 0.042371779680252075, | |
| "rewards/rejected": -0.07547104358673096, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 0.972, | |
| "grad_norm": 9706266.63311398, | |
| "learning_rate": 1.5555555555555554e-08, | |
| "logits/chosen": -2.436565637588501, | |
| "logits/rejected": -2.4684276580810547, | |
| "logps/chosen": -109.63997650146484, | |
| "logps/rejected": -152.08335876464844, | |
| "loss": 122077.7, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.03295578807592392, | |
| "rewards/margins": 0.03596381098031998, | |
| "rewards/rejected": -0.0689195990562439, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 0.976, | |
| "grad_norm": 5892646.249318525, | |
| "learning_rate": 1.3333333333333334e-08, | |
| "logits/chosen": -2.531766891479492, | |
| "logits/rejected": -2.5235583782196045, | |
| "logps/chosen": -125.18925476074219, | |
| "logps/rejected": -160.97665405273438, | |
| "loss": 123783.1, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -0.026068557053804398, | |
| "rewards/margins": 0.030096372589468956, | |
| "rewards/rejected": -0.0561649315059185, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "grad_norm": 10534775.155367365, | |
| "learning_rate": 1.111111111111111e-08, | |
| "logits/chosen": -2.369849443435669, | |
| "logits/rejected": -2.381124258041382, | |
| "logps/chosen": -103.98789978027344, | |
| "logps/rejected": -156.1312255859375, | |
| "loss": 119931.5125, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.02474259026348591, | |
| "rewards/margins": 0.03790457919239998, | |
| "rewards/rejected": -0.06264716386795044, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.984, | |
| "grad_norm": 9946607.778478429, | |
| "learning_rate": 8.888888888888889e-09, | |
| "logits/chosen": -2.487614393234253, | |
| "logits/rejected": -2.465937614440918, | |
| "logps/chosen": -111.37520599365234, | |
| "logps/rejected": -137.24546813964844, | |
| "loss": 124467.0125, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.03445830196142197, | |
| "rewards/margins": 0.02246803045272827, | |
| "rewards/rejected": -0.05692633241415024, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 0.988, | |
| "grad_norm": 12838026.989788342, | |
| "learning_rate": 6.666666666666667e-09, | |
| "logits/chosen": -2.3559987545013428, | |
| "logits/rejected": -2.3515543937683105, | |
| "logps/chosen": -103.37791442871094, | |
| "logps/rejected": -170.76101684570312, | |
| "loss": 123802.275, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.028120238333940506, | |
| "rewards/margins": 0.047586239874362946, | |
| "rewards/rejected": -0.07570647448301315, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 0.992, | |
| "grad_norm": 8735124.614081156, | |
| "learning_rate": 4.444444444444444e-09, | |
| "logits/chosen": -2.4489405155181885, | |
| "logits/rejected": -2.4833984375, | |
| "logps/chosen": -95.29798889160156, | |
| "logps/rejected": -143.02792358398438, | |
| "loss": 118030.2125, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -0.025047576054930687, | |
| "rewards/margins": 0.04086794704198837, | |
| "rewards/rejected": -0.06591552495956421, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 0.996, | |
| "grad_norm": 9991241.9099312, | |
| "learning_rate": 2.222222222222222e-09, | |
| "logits/chosen": -2.351677417755127, | |
| "logits/rejected": -2.272353410720825, | |
| "logps/chosen": -93.49928283691406, | |
| "logps/rejected": -141.19610595703125, | |
| "loss": 118406.9625, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.02804369106888771, | |
| "rewards/margins": 0.03656899183988571, | |
| "rewards/rejected": -0.06461267918348312, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 11977840.992411703, | |
| "learning_rate": 0.0, | |
| "logits/chosen": -2.5079355239868164, | |
| "logits/rejected": -2.510873556137085, | |
| "logps/chosen": -100.9855728149414, | |
| "logps/rejected": -157.40550231933594, | |
| "loss": 117766.25, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.029782477766275406, | |
| "rewards/margins": 0.0452786386013031, | |
| "rewards/rejected": -0.07506111264228821, | |
| "step": 2500 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 2500, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |