Invalid JSON:Unexpected token 'N', ..."ejected": NaN,
"... is not valid JSON
| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9996685449121644, | |
| "eval_steps": 200, | |
| "global_step": 754, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.013258203513423931, | |
| "grad_norm": 27.282764434814453, | |
| "learning_rate": 5.000000000000001e-07, | |
| "logits/chosen": -0.5551050901412964, | |
| "logits/rejected": -0.5903115272521973, | |
| "logps/chosen": -123.05072021484375, | |
| "logps/rejected": -128.62611389160156, | |
| "loss": 1.9744, | |
| "nll_loss": 2.560427188873291, | |
| "rewards/accuracies": 0.38749998807907104, | |
| "rewards/chosen": -0.002333300421014428, | |
| "rewards/margins": -0.002295339945703745, | |
| "rewards/rejected": -3.796028977376409e-05, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.026516407026847863, | |
| "grad_norm": 17.47486114501953, | |
| "learning_rate": 1.0000000000000002e-06, | |
| "logits/chosen": -0.5378572940826416, | |
| "logits/rejected": -0.5796166658401489, | |
| "logps/chosen": -125.56513977050781, | |
| "logps/rejected": -122.72200012207031, | |
| "loss": 1.9287, | |
| "nll_loss": 2.4739668369293213, | |
| "rewards/accuracies": 0.534375011920929, | |
| "rewards/chosen": 0.0028954462613910437, | |
| "rewards/margins": 0.004081044811755419, | |
| "rewards/rejected": -0.0011855984339490533, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.039774610540271794, | |
| "grad_norm": 28.93717384338379, | |
| "learning_rate": 1.5e-06, | |
| "logits/chosen": -0.5573912262916565, | |
| "logits/rejected": -0.6301255226135254, | |
| "logps/chosen": -120.21688079833984, | |
| "logps/rejected": -120.69698333740234, | |
| "loss": 1.9568, | |
| "nll_loss": 2.5327491760253906, | |
| "rewards/accuracies": 0.628125011920929, | |
| "rewards/chosen": 0.006910824682563543, | |
| "rewards/margins": 0.007299685385078192, | |
| "rewards/rejected": -0.0003888603823725134, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.053032814053695726, | |
| "grad_norm": 26.4592227935791, | |
| "learning_rate": 2.0000000000000003e-06, | |
| "logits/chosen": -0.5415462255477905, | |
| "logits/rejected": -0.5897966623306274, | |
| "logps/chosen": -123.216064453125, | |
| "logps/rejected": -116.96390533447266, | |
| "loss": 1.945, | |
| "nll_loss": 2.5195693969726562, | |
| "rewards/accuracies": 0.6968749761581421, | |
| "rewards/chosen": 0.02091406285762787, | |
| "rewards/margins": 0.020817000418901443, | |
| "rewards/rejected": 9.70602995948866e-05, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.06629101756711965, | |
| "grad_norm": 17.95655059814453, | |
| "learning_rate": 2.5e-06, | |
| "logits/chosen": -0.5369003415107727, | |
| "logits/rejected": -0.5648149251937866, | |
| "logps/chosen": -112.5962905883789, | |
| "logps/rejected": -106.9513931274414, | |
| "loss": 1.93, | |
| "nll_loss": 2.506865978240967, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": 0.04093035310506821, | |
| "rewards/margins": 0.0435122512280941, | |
| "rewards/rejected": -0.0025818957947194576, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.07954922108054359, | |
| "grad_norm": 18.845481872558594, | |
| "learning_rate": 3e-06, | |
| "logits/chosen": -0.5306503176689148, | |
| "logits/rejected": -0.5870386958122253, | |
| "logps/chosen": -115.35811614990234, | |
| "logps/rejected": -119.94677734375, | |
| "loss": 1.9014, | |
| "nll_loss": 2.470397472381592, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": 0.0722598135471344, | |
| "rewards/margins": 0.07325105369091034, | |
| "rewards/rejected": -0.000991240842267871, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.09280742459396751, | |
| "grad_norm": 22.062213897705078, | |
| "learning_rate": 3.5e-06, | |
| "logits/chosen": -0.5302293300628662, | |
| "logits/rejected": -0.5745421648025513, | |
| "logps/chosen": -107.54048156738281, | |
| "logps/rejected": -108.28858947753906, | |
| "loss": 1.9362, | |
| "nll_loss": 2.587956428527832, | |
| "rewards/accuracies": 0.7593749761581421, | |
| "rewards/chosen": 0.1419026404619217, | |
| "rewards/margins": 0.144887775182724, | |
| "rewards/rejected": -0.0029851621948182583, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.10606562810739145, | |
| "grad_norm": 22.929567337036133, | |
| "learning_rate": 4.000000000000001e-06, | |
| "logits/chosen": -0.5311123132705688, | |
| "logits/rejected": -0.601387083530426, | |
| "logps/chosen": -106.30036926269531, | |
| "logps/rejected": -114.9739761352539, | |
| "loss": 1.8807, | |
| "nll_loss": 2.5304553508758545, | |
| "rewards/accuracies": 0.815625011920929, | |
| "rewards/chosen": 0.22294898331165314, | |
| "rewards/margins": 0.23250994086265564, | |
| "rewards/rejected": -0.009560950100421906, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.11932383162081538, | |
| "grad_norm": 15.968583106994629, | |
| "learning_rate": 4.5e-06, | |
| "logits/chosen": -0.5320655703544617, | |
| "logits/rejected": -0.558965802192688, | |
| "logps/chosen": -113.92137145996094, | |
| "logps/rejected": -106.32939147949219, | |
| "loss": 1.8047, | |
| "nll_loss": 2.424870014190674, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": 0.31752246618270874, | |
| "rewards/margins": 0.33052030205726624, | |
| "rewards/rejected": -0.012997796759009361, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.1325820351342393, | |
| "grad_norm": 16.777925491333008, | |
| "learning_rate": 5e-06, | |
| "logits/chosen": -0.5369315147399902, | |
| "logits/rejected": -0.550090491771698, | |
| "logps/chosen": -115.67036437988281, | |
| "logps/rejected": -113.78245544433594, | |
| "loss": 1.7842, | |
| "nll_loss": 2.421271800994873, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": 0.4060588777065277, | |
| "rewards/margins": 0.42890095710754395, | |
| "rewards/rejected": -0.02284209243953228, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.14584023864766324, | |
| "grad_norm": 12.77545166015625, | |
| "learning_rate": 4.923547400611622e-06, | |
| "logits/chosen": -0.4923822283744812, | |
| "logits/rejected": -0.550975501537323, | |
| "logps/chosen": -104.06398010253906, | |
| "logps/rejected": -105.51200103759766, | |
| "loss": 1.7203, | |
| "nll_loss": 2.3695566654205322, | |
| "rewards/accuracies": 0.824999988079071, | |
| "rewards/chosen": 0.5946463942527771, | |
| "rewards/margins": 0.6244661211967468, | |
| "rewards/rejected": -0.02981976605951786, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.15909844216108718, | |
| "grad_norm": 14.160531997680664, | |
| "learning_rate": 4.847094801223242e-06, | |
| "logits/chosen": -0.5261751413345337, | |
| "logits/rejected": -0.593400239944458, | |
| "logps/chosen": -109.50382995605469, | |
| "logps/rejected": -117.4461669921875, | |
| "loss": 1.6824, | |
| "nll_loss": 2.298811674118042, | |
| "rewards/accuracies": 0.831250011920929, | |
| "rewards/chosen": 0.646867573261261, | |
| "rewards/margins": 0.6841082572937012, | |
| "rewards/rejected": -0.037240687757730484, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.17235664567451112, | |
| "grad_norm": 11.232987403869629, | |
| "learning_rate": 4.770642201834863e-06, | |
| "logits/chosen": -0.5387733578681946, | |
| "logits/rejected": -0.5604445338249207, | |
| "logps/chosen": -105.5321044921875, | |
| "logps/rejected": -108.08251953125, | |
| "loss": 1.6447, | |
| "nll_loss": 2.28352689743042, | |
| "rewards/accuracies": 0.846875011920929, | |
| "rewards/chosen": 0.8856587409973145, | |
| "rewards/margins": 0.9316526651382446, | |
| "rewards/rejected": -0.04599405825138092, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.18561484918793503, | |
| "grad_norm": 13.136269569396973, | |
| "learning_rate": 4.694189602446483e-06, | |
| "logits/chosen": -0.4762907028198242, | |
| "logits/rejected": -0.5645761489868164, | |
| "logps/chosen": -104.01419830322266, | |
| "logps/rejected": -105.42718505859375, | |
| "loss": 1.5795, | |
| "nll_loss": 2.1962618827819824, | |
| "rewards/accuracies": 0.8687499761581421, | |
| "rewards/chosen": 1.1001434326171875, | |
| "rewards/margins": 1.1506679058074951, | |
| "rewards/rejected": -0.0505245216190815, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.19887305270135897, | |
| "grad_norm": 11.437210083007812, | |
| "learning_rate": 4.617737003058104e-06, | |
| "logits/chosen": -0.45225948095321655, | |
| "logits/rejected": -0.5816742181777954, | |
| "logps/chosen": -95.95039367675781, | |
| "logps/rejected": -116.07032775878906, | |
| "loss": 1.5695, | |
| "nll_loss": 2.187495708465576, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": 1.3856970071792603, | |
| "rewards/margins": 1.4338386058807373, | |
| "rewards/rejected": -0.04814162850379944, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.2121312562147829, | |
| "grad_norm": 11.217316627502441, | |
| "learning_rate": 4.541284403669725e-06, | |
| "logits/chosen": -0.4223412573337555, | |
| "logits/rejected": -0.5557634234428406, | |
| "logps/chosen": -99.70536804199219, | |
| "logps/rejected": -108.40791320800781, | |
| "loss": 1.5248, | |
| "nll_loss": 2.0865731239318848, | |
| "rewards/accuracies": 0.856249988079071, | |
| "rewards/chosen": 1.4318909645080566, | |
| "rewards/margins": 1.4701259136199951, | |
| "rewards/rejected": -0.03823506087064743, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.22538945972820681, | |
| "grad_norm": 11.582230567932129, | |
| "learning_rate": 4.464831804281346e-06, | |
| "logits/chosen": -0.4045068323612213, | |
| "logits/rejected": -0.5808693170547485, | |
| "logps/chosen": -101.94197845458984, | |
| "logps/rejected": -115.78692626953125, | |
| "loss": 1.5259, | |
| "nll_loss": 2.059906482696533, | |
| "rewards/accuracies": 0.871874988079071, | |
| "rewards/chosen": 1.6565885543823242, | |
| "rewards/margins": 1.685520887374878, | |
| "rewards/rejected": -0.028932059183716774, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.23864766324163075, | |
| "grad_norm": 9.85542106628418, | |
| "learning_rate": 4.388379204892967e-06, | |
| "logits/chosen": -0.40881863236427307, | |
| "logits/rejected": -0.5515257120132446, | |
| "logps/chosen": -94.77958679199219, | |
| "logps/rejected": -109.2522201538086, | |
| "loss": 1.4906, | |
| "nll_loss": 2.006005048751831, | |
| "rewards/accuracies": 0.8968750238418579, | |
| "rewards/chosen": 1.7907886505126953, | |
| "rewards/margins": 1.8029606342315674, | |
| "rewards/rejected": -0.012172091752290726, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.25190586675505466, | |
| "grad_norm": 11.775798797607422, | |
| "learning_rate": 4.311926605504588e-06, | |
| "logits/chosen": -0.40728694200515747, | |
| "logits/rejected": -0.5780837535858154, | |
| "logps/chosen": -102.8525619506836, | |
| "logps/rejected": -113.8287353515625, | |
| "loss": 1.5071, | |
| "nll_loss": 2.015195846557617, | |
| "rewards/accuracies": 0.903124988079071, | |
| "rewards/chosen": 1.94949209690094, | |
| "rewards/margins": 1.9533236026763916, | |
| "rewards/rejected": -0.003831386100500822, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.2651640702684786, | |
| "grad_norm": 9.207958221435547, | |
| "learning_rate": 4.235474006116208e-06, | |
| "logits/chosen": -0.3382512331008911, | |
| "logits/rejected": -0.5608124136924744, | |
| "logps/chosen": -95.6505355834961, | |
| "logps/rejected": -115.0374755859375, | |
| "loss": 1.4625, | |
| "nll_loss": 1.900460958480835, | |
| "rewards/accuracies": 0.8812500238418579, | |
| "rewards/chosen": 1.9269275665283203, | |
| "rewards/margins": 1.9018806219100952, | |
| "rewards/rejected": 0.0250468198210001, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.2651640702684786, | |
| "eval_logits/chosen": 0.10900751501321793, | |
| "eval_logits/rejected": -0.6459429860115051, | |
| "eval_logps/chosen": -26.658893585205078, | |
| "eval_logps/rejected": -23.843191146850586, | |
| "eval_loss": 1.61775803565979, | |
| "eval_nll_loss": 2.3195407390594482, | |
| "eval_rewards/accuracies": 0.995156466960907, | |
| "eval_rewards/chosen": 1.6962153911590576, | |
| "eval_rewards/margins": 1.591001272201538, | |
| "eval_rewards/rejected": 0.10521402209997177, | |
| "eval_runtime": 126.4924, | |
| "eval_samples_per_second": 21.203, | |
| "eval_steps_per_second": 5.305, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.27842227378190254, | |
| "grad_norm": 11.874744415283203, | |
| "learning_rate": 4.1590214067278286e-06, | |
| "logits/chosen": -0.3222394287586212, | |
| "logits/rejected": -0.5524163246154785, | |
| "logps/chosen": -91.2304916381836, | |
| "logps/rejected": -112.20948791503906, | |
| "loss": 1.4373, | |
| "nll_loss": 1.863207221031189, | |
| "rewards/accuracies": 0.8843749761581421, | |
| "rewards/chosen": 2.0715649127960205, | |
| "rewards/margins": 2.029603958129883, | |
| "rewards/rejected": 0.04196098819375038, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.2916804772953265, | |
| "grad_norm": 12.305173873901367, | |
| "learning_rate": 4.08256880733945e-06, | |
| "logits/chosen": -0.24730145931243896, | |
| "logits/rejected": -0.5199188590049744, | |
| "logps/chosen": -87.4412841796875, | |
| "logps/rejected": -108.43525695800781, | |
| "loss": 1.4032, | |
| "nll_loss": 1.8019367456436157, | |
| "rewards/accuracies": 0.8968750238418579, | |
| "rewards/chosen": 2.422152280807495, | |
| "rewards/margins": 2.348961114883423, | |
| "rewards/rejected": 0.07319097220897675, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.3049386808087504, | |
| "grad_norm": 10.129953384399414, | |
| "learning_rate": 4.00611620795107e-06, | |
| "logits/chosen": -0.31045737862586975, | |
| "logits/rejected": -0.5804657340049744, | |
| "logps/chosen": -91.82988739013672, | |
| "logps/rejected": -122.0359878540039, | |
| "loss": 1.4386, | |
| "nll_loss": 1.850035309791565, | |
| "rewards/accuracies": 0.90625, | |
| "rewards/chosen": 2.229877233505249, | |
| "rewards/margins": 2.152693748474121, | |
| "rewards/rejected": 0.07718367874622345, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.31819688432217436, | |
| "grad_norm": 13.068564414978027, | |
| "learning_rate": 3.9296636085626916e-06, | |
| "logits/chosen": -0.2514588534832001, | |
| "logits/rejected": -0.5521794557571411, | |
| "logps/chosen": -87.9677963256836, | |
| "logps/rejected": -109.2548828125, | |
| "loss": 1.4216, | |
| "nll_loss": 1.8036372661590576, | |
| "rewards/accuracies": 0.909375011920929, | |
| "rewards/chosen": 2.569032907485962, | |
| "rewards/margins": 2.454697370529175, | |
| "rewards/rejected": 0.11433545500040054, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.3314550878355983, | |
| "grad_norm": 9.738222122192383, | |
| "learning_rate": 3.853211009174313e-06, | |
| "logits/chosen": -0.22102048993110657, | |
| "logits/rejected": -0.5118038654327393, | |
| "logps/chosen": -81.05973815917969, | |
| "logps/rejected": -107.3470458984375, | |
| "loss": 1.3802, | |
| "nll_loss": 1.738581895828247, | |
| "rewards/accuracies": 0.890625, | |
| "rewards/chosen": 2.435701847076416, | |
| "rewards/margins": 2.2943472862243652, | |
| "rewards/rejected": 0.14135441184043884, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.34471329134902223, | |
| "grad_norm": 11.227466583251953, | |
| "learning_rate": 3.776758409785933e-06, | |
| "logits/chosen": -0.24970126152038574, | |
| "logits/rejected": -0.5423383116722107, | |
| "logps/chosen": -90.58589172363281, | |
| "logps/rejected": -124.6546859741211, | |
| "loss": 1.4231, | |
| "nll_loss": 1.796224594116211, | |
| "rewards/accuracies": 0.8843749761581421, | |
| "rewards/chosen": 2.3060898780822754, | |
| "rewards/margins": 2.1725523471832275, | |
| "rewards/rejected": 0.13353754580020905, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.3579714948624461, | |
| "grad_norm": 10.950806617736816, | |
| "learning_rate": 3.7003058103975537e-06, | |
| "logits/chosen": -0.22132663428783417, | |
| "logits/rejected": -0.5066910982131958, | |
| "logps/chosen": -83.74676513671875, | |
| "logps/rejected": -106.14500427246094, | |
| "loss": 1.3775, | |
| "nll_loss": 1.7145506143569946, | |
| "rewards/accuracies": 0.921875, | |
| "rewards/chosen": 2.4925014972686768, | |
| "rewards/margins": 2.321049690246582, | |
| "rewards/rejected": 0.17145180702209473, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.37122969837587005, | |
| "grad_norm": 10.97486686706543, | |
| "learning_rate": 3.6238532110091746e-06, | |
| "logits/chosen": -0.258320152759552, | |
| "logits/rejected": -0.5529795289039612, | |
| "logps/chosen": -86.88526916503906, | |
| "logps/rejected": -111.84498596191406, | |
| "loss": 1.3977, | |
| "nll_loss": 1.7480520009994507, | |
| "rewards/accuracies": 0.8968750238418579, | |
| "rewards/chosen": 2.6251580715179443, | |
| "rewards/margins": 2.444127321243286, | |
| "rewards/rejected": 0.1810309737920761, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.384487901889294, | |
| "grad_norm": 9.71605110168457, | |
| "learning_rate": 3.5474006116207954e-06, | |
| "logits/chosen": -0.2799197733402252, | |
| "logits/rejected": -0.5588380098342896, | |
| "logps/chosen": -95.93229675292969, | |
| "logps/rejected": -128.71484375, | |
| "loss": 1.4329, | |
| "nll_loss": 1.8220994472503662, | |
| "rewards/accuracies": 0.940625011920929, | |
| "rewards/chosen": 2.5101046562194824, | |
| "rewards/margins": 2.3628010749816895, | |
| "rewards/rejected": 0.14730362594127655, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.39774610540271793, | |
| "grad_norm": 21.213260650634766, | |
| "learning_rate": 3.4709480122324163e-06, | |
| "logits/chosen": -0.18812108039855957, | |
| "logits/rejected": -0.50641930103302, | |
| "logps/chosen": -90.42585754394531, | |
| "logps/rejected": -107.69468688964844, | |
| "loss": 1.39, | |
| "nll_loss": 1.7294094562530518, | |
| "rewards/accuracies": 0.940625011920929, | |
| "rewards/chosen": 2.7429447174072266, | |
| "rewards/margins": 2.536832094192505, | |
| "rewards/rejected": 0.20611290633678436, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.41100430891614187, | |
| "grad_norm": 11.334510803222656, | |
| "learning_rate": 3.394495412844037e-06, | |
| "logits/chosen": -0.2053213119506836, | |
| "logits/rejected": -0.5050525665283203, | |
| "logps/chosen": -84.91264343261719, | |
| "logps/rejected": -112.988037109375, | |
| "loss": 1.375, | |
| "nll_loss": 1.7265828847885132, | |
| "rewards/accuracies": 0.921875, | |
| "rewards/chosen": 2.71644926071167, | |
| "rewards/margins": 2.498016834259033, | |
| "rewards/rejected": 0.21843275427818298, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.4242625124295658, | |
| "grad_norm": 10.023775100708008, | |
| "learning_rate": 3.318042813455658e-06, | |
| "logits/chosen": -0.20158584415912628, | |
| "logits/rejected": -0.5022256970405579, | |
| "logps/chosen": -90.71162414550781, | |
| "logps/rejected": -111.33503723144531, | |
| "loss": 1.381, | |
| "nll_loss": 1.7314481735229492, | |
| "rewards/accuracies": 0.925000011920929, | |
| "rewards/chosen": 2.7540974617004395, | |
| "rewards/margins": 2.4751474857330322, | |
| "rewards/rejected": 0.2789500057697296, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.43752071594298975, | |
| "grad_norm": 10.706851959228516, | |
| "learning_rate": 3.2415902140672784e-06, | |
| "logits/chosen": -0.1562536209821701, | |
| "logits/rejected": -0.5007289052009583, | |
| "logps/chosen": -89.73895263671875, | |
| "logps/rejected": -106.5843276977539, | |
| "loss": 1.3693, | |
| "nll_loss": 1.6994127035140991, | |
| "rewards/accuracies": 0.903124988079071, | |
| "rewards/chosen": 2.805201292037964, | |
| "rewards/margins": 2.5105228424072266, | |
| "rewards/rejected": 0.29467862844467163, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.45077891945641363, | |
| "grad_norm": 25.567277908325195, | |
| "learning_rate": 3.1651376146788993e-06, | |
| "logits/chosen": -0.23418506979942322, | |
| "logits/rejected": -0.5170575976371765, | |
| "logps/chosen": -102.624267578125, | |
| "logps/rejected": -112.3178939819336, | |
| "loss": 1.4032, | |
| "nll_loss": 1.7835899591445923, | |
| "rewards/accuracies": 0.918749988079071, | |
| "rewards/chosen": 2.494511842727661, | |
| "rewards/margins": 2.1974825859069824, | |
| "rewards/rejected": 0.2970294654369354, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.46403712296983757, | |
| "grad_norm": 12.165983200073242, | |
| "learning_rate": 3.08868501529052e-06, | |
| "logits/chosen": -0.20166996121406555, | |
| "logits/rejected": -0.4993151128292084, | |
| "logps/chosen": -99.72298431396484, | |
| "logps/rejected": -117.60599517822266, | |
| "loss": 1.387, | |
| "nll_loss": 1.7688575983047485, | |
| "rewards/accuracies": 0.9156249761581421, | |
| "rewards/chosen": 2.6475043296813965, | |
| "rewards/margins": 2.348958969116211, | |
| "rewards/rejected": 0.2985452711582184, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.4772953264832615, | |
| "grad_norm": 9.041903495788574, | |
| "learning_rate": 3.012232415902141e-06, | |
| "logits/chosen": -0.07940540462732315, | |
| "logits/rejected": -0.4662111699581146, | |
| "logps/chosen": -75.92832946777344, | |
| "logps/rejected": -104.47607421875, | |
| "loss": 1.3136, | |
| "nll_loss": 1.6304452419281006, | |
| "rewards/accuracies": 0.921875, | |
| "rewards/chosen": 2.911555051803589, | |
| "rewards/margins": 2.5380759239196777, | |
| "rewards/rejected": 0.37347906827926636, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.49055352999668544, | |
| "grad_norm": 10.296601295471191, | |
| "learning_rate": 2.935779816513762e-06, | |
| "logits/chosen": -0.08891765028238297, | |
| "logits/rejected": -0.4381803572177887, | |
| "logps/chosen": -83.01859283447266, | |
| "logps/rejected": -97.76497650146484, | |
| "loss": 1.3309, | |
| "nll_loss": 1.6442056894302368, | |
| "rewards/accuracies": 0.9156249761581421, | |
| "rewards/chosen": 2.8263065814971924, | |
| "rewards/margins": 2.439760208129883, | |
| "rewards/rejected": 0.3865460455417633, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.5038117335101093, | |
| "grad_norm": 11.621145248413086, | |
| "learning_rate": 2.8593272171253827e-06, | |
| "logits/chosen": -0.13201047480106354, | |
| "logits/rejected": -0.4527694582939148, | |
| "logps/chosen": -87.77113342285156, | |
| "logps/rejected": -122.87522888183594, | |
| "loss": 1.3474, | |
| "nll_loss": 1.6941699981689453, | |
| "rewards/accuracies": 0.9125000238418579, | |
| "rewards/chosen": 2.6734607219696045, | |
| "rewards/margins": 2.329221248626709, | |
| "rewards/rejected": 0.3442399501800537, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.5170699370235333, | |
| "grad_norm": 9.234843254089355, | |
| "learning_rate": 2.782874617737003e-06, | |
| "logits/chosen": -0.13576461374759674, | |
| "logits/rejected": -0.5005991458892822, | |
| "logps/chosen": -98.45845031738281, | |
| "logps/rejected": -119.65950012207031, | |
| "loss": 1.3579, | |
| "nll_loss": 1.7100152969360352, | |
| "rewards/accuracies": 0.9437500238418579, | |
| "rewards/chosen": 2.792757034301758, | |
| "rewards/margins": 2.4400179386138916, | |
| "rewards/rejected": 0.35273903608322144, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.5303281405369572, | |
| "grad_norm": 10.394464492797852, | |
| "learning_rate": 2.706422018348624e-06, | |
| "logits/chosen": -0.14902424812316895, | |
| "logits/rejected": -0.49045664072036743, | |
| "logps/chosen": -96.21540069580078, | |
| "logps/rejected": -113.64534759521484, | |
| "loss": 1.3661, | |
| "nll_loss": 1.73909592628479, | |
| "rewards/accuracies": 0.9156249761581421, | |
| "rewards/chosen": 2.7949509620666504, | |
| "rewards/margins": 2.4181408882141113, | |
| "rewards/rejected": 0.3768100440502167, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.5303281405369572, | |
| "eval_logits/chosen": 0.7696120142936707, | |
| "eval_logits/rejected": -0.5085250735282898, | |
| "eval_logps/chosen": -23.760295867919922, | |
| "eval_logps/rejected": -19.921955108642578, | |
| "eval_loss": 1.6759577989578247, | |
| "eval_nll_loss": 2.218850612640381, | |
| "eval_rewards/accuracies": 0.8729507923126221, | |
| "eval_rewards/chosen": 1.9860752820968628, | |
| "eval_rewards/margins": 1.4887374639511108, | |
| "eval_rewards/rejected": 0.4973376393318176, | |
| "eval_runtime": 126.408, | |
| "eval_samples_per_second": 21.217, | |
| "eval_steps_per_second": 5.308, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.5435863440503812, | |
| "grad_norm": 12.399572372436523, | |
| "learning_rate": 2.629969418960245e-06, | |
| "logits/chosen": -0.05847010016441345, | |
| "logits/rejected": -0.42683249711990356, | |
| "logps/chosen": -82.53068542480469, | |
| "logps/rejected": -105.50111389160156, | |
| "loss": 1.3133, | |
| "nll_loss": 1.6254644393920898, | |
| "rewards/accuracies": 0.9156249761581421, | |
| "rewards/chosen": 3.054617166519165, | |
| "rewards/margins": 2.591078042984009, | |
| "rewards/rejected": 0.4635390341281891, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.5568445475638051, | |
| "grad_norm": 11.11414909362793, | |
| "learning_rate": 2.5535168195718657e-06, | |
| "logits/chosen": -0.10004544258117676, | |
| "logits/rejected": -0.45182594656944275, | |
| "logps/chosen": -85.72924041748047, | |
| "logps/rejected": -116.79786682128906, | |
| "loss": 1.3335, | |
| "nll_loss": 1.6914409399032593, | |
| "rewards/accuracies": 0.934374988079071, | |
| "rewards/chosen": 2.7389349937438965, | |
| "rewards/margins": 2.347107410430908, | |
| "rewards/rejected": 0.39182740449905396, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.5701027510772291, | |
| "grad_norm": 11.188493728637695, | |
| "learning_rate": 2.4770642201834866e-06, | |
| "logits/chosen": -0.0028325587045401335, | |
| "logits/rejected": -0.40763336420059204, | |
| "logps/chosen": -83.37824249267578, | |
| "logps/rejected": -90.0846176147461, | |
| "loss": 1.2739, | |
| "nll_loss": 1.597538709640503, | |
| "rewards/accuracies": 0.9593750238418579, | |
| "rewards/chosen": 3.1972365379333496, | |
| "rewards/margins": 2.69289493560791, | |
| "rewards/rejected": 0.5043416619300842, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.583360954590653, | |
| "grad_norm": 8.546418190002441, | |
| "learning_rate": 2.400611620795107e-06, | |
| "logits/chosen": -0.017235688865184784, | |
| "logits/rejected": -0.4104672372341156, | |
| "logps/chosen": -79.5479507446289, | |
| "logps/rejected": -100.47693634033203, | |
| "loss": 1.3056, | |
| "nll_loss": 1.6203285455703735, | |
| "rewards/accuracies": 0.9125000238418579, | |
| "rewards/chosen": 3.0248606204986572, | |
| "rewards/margins": 2.5377180576324463, | |
| "rewards/rejected": 0.4871426224708557, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.596619158104077, | |
| "grad_norm": 12.532038688659668, | |
| "learning_rate": 2.324159021406728e-06, | |
| "logits/chosen": -0.08448103815317154, | |
| "logits/rejected": -0.41612687706947327, | |
| "logps/chosen": -89.61862182617188, | |
| "logps/rejected": -112.32474517822266, | |
| "loss": 1.3386, | |
| "nll_loss": 1.6953102350234985, | |
| "rewards/accuracies": 0.921875, | |
| "rewards/chosen": 2.803964376449585, | |
| "rewards/margins": 2.3399243354797363, | |
| "rewards/rejected": 0.4640396535396576, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.6098773616175008, | |
| "grad_norm": 13.633370399475098, | |
| "learning_rate": 2.2477064220183487e-06, | |
| "logits/chosen": -0.013189451768994331, | |
| "logits/rejected": -0.41869059205055237, | |
| "logps/chosen": -90.39549255371094, | |
| "logps/rejected": -107.79378509521484, | |
| "loss": 1.289, | |
| "nll_loss": 1.6027923822402954, | |
| "rewards/accuracies": 0.918749988079071, | |
| "rewards/chosen": 2.938788652420044, | |
| "rewards/margins": 2.443359851837158, | |
| "rewards/rejected": 0.49542921781539917, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.6231355651309247, | |
| "grad_norm": 11.236469268798828, | |
| "learning_rate": 2.1712538226299696e-06, | |
| "logits/chosen": -0.09504064172506332, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -106.80879974365234, | |
| "logps/rejected": -117.15657806396484, | |
| "loss": 1.3584, | |
| "nll_loss": 1.7296257019042969, | |
| "rewards/accuracies": 0.9312499761581421, | |
| "rewards/chosen": 2.964348316192627, | |
| "rewards/margins": 2.4477875232696533, | |
| "rewards/rejected": 0.516560971736908, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.6363937686443487, | |
| "grad_norm": 10.896163940429688, | |
| "learning_rate": 2.0948012232415905e-06, | |
| "logits/chosen": -0.0007806614157743752, | |
| "logits/rejected": -0.39822936058044434, | |
| "logps/chosen": -95.16539001464844, | |
| "logps/rejected": -116.85235595703125, | |
| "loss": 1.3068, | |
| "nll_loss": 1.6314979791641235, | |
| "rewards/accuracies": 0.903124988079071, | |
| "rewards/chosen": 2.8457746505737305, | |
| "rewards/margins": 2.327517509460449, | |
| "rewards/rejected": 0.518257200717926, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.6496519721577726, | |
| "grad_norm": 7.040874004364014, | |
| "learning_rate": 2.0183486238532113e-06, | |
| "logits/chosen": 0.059870027005672455, | |
| "logits/rejected": -0.3692580461502075, | |
| "logps/chosen": -71.55367279052734, | |
| "logps/rejected": -92.91752624511719, | |
| "loss": 1.2388, | |
| "nll_loss": 1.5524179935455322, | |
| "rewards/accuracies": 0.953125, | |
| "rewards/chosen": 3.224459171295166, | |
| "rewards/margins": 2.605776309967041, | |
| "rewards/rejected": 0.6186825037002563, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.6629101756711966, | |
| "grad_norm": 12.266393661499023, | |
| "learning_rate": 1.9418960244648317e-06, | |
| "logits/chosen": -0.09728819876909256, | |
| "logits/rejected": -0.4279538094997406, | |
| "logps/chosen": -98.3723373413086, | |
| "logps/rejected": -116.90461730957031, | |
| "loss": 1.3497, | |
| "nll_loss": 1.7364962100982666, | |
| "rewards/accuracies": 0.9281250238418579, | |
| "rewards/chosen": 2.8716821670532227, | |
| "rewards/margins": 2.3796398639678955, | |
| "rewards/rejected": 0.4920427203178406, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.6761683791846205, | |
| "grad_norm": 11.75126838684082, | |
| "learning_rate": 1.8654434250764528e-06, | |
| "logits/chosen": -0.06470651179552078, | |
| "logits/rejected": -0.40114492177963257, | |
| "logps/chosen": -93.51762390136719, | |
| "logps/rejected": -114.25923156738281, | |
| "loss": 1.325, | |
| "nll_loss": 1.6921663284301758, | |
| "rewards/accuracies": 0.9437500238418579, | |
| "rewards/chosen": 3.008223056793213, | |
| "rewards/margins": 2.433964490890503, | |
| "rewards/rejected": 0.5742586851119995, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.6894265826980445, | |
| "grad_norm": 9.93482494354248, | |
| "learning_rate": 1.7889908256880737e-06, | |
| "logits/chosen": 0.08653802424669266, | |
| "logits/rejected": -0.34983566403388977, | |
| "logps/chosen": -74.48589324951172, | |
| "logps/rejected": -97.51747131347656, | |
| "loss": 1.2279, | |
| "nll_loss": 1.5161999464035034, | |
| "rewards/accuracies": 0.9437500238418579, | |
| "rewards/chosen": 3.1022284030914307, | |
| "rewards/margins": 2.469442367553711, | |
| "rewards/rejected": 0.6327860951423645, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.7026847862114683, | |
| "grad_norm": 10.692046165466309, | |
| "learning_rate": 1.7125382262996943e-06, | |
| "logits/chosen": 0.032983891665935516, | |
| "logits/rejected": -0.3582807779312134, | |
| "logps/chosen": -82.62593078613281, | |
| "logps/rejected": -102.52696228027344, | |
| "loss": 1.2671, | |
| "nll_loss": 1.5854206085205078, | |
| "rewards/accuracies": 0.9312499761581421, | |
| "rewards/chosen": 3.2378318309783936, | |
| "rewards/margins": 2.618978977203369, | |
| "rewards/rejected": 0.618852436542511, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.7159429897248922, | |
| "grad_norm": 16.475303649902344, | |
| "learning_rate": 1.6360856269113152e-06, | |
| "logits/chosen": -0.015640150755643845, | |
| "logits/rejected": -0.3781605362892151, | |
| "logps/chosen": -90.1484146118164, | |
| "logps/rejected": -112.01336669921875, | |
| "loss": 1.2988, | |
| "nll_loss": 1.6478378772735596, | |
| "rewards/accuracies": 0.9312499761581421, | |
| "rewards/chosen": 2.8458220958709717, | |
| "rewards/margins": 2.3063721656799316, | |
| "rewards/rejected": 0.53944993019104, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.7292011932383162, | |
| "grad_norm": 9.720301628112793, | |
| "learning_rate": 1.559633027522936e-06, | |
| "logits/chosen": -0.07955951988697052, | |
| "logits/rejected": -0.4525434374809265, | |
| "logps/chosen": -93.29837799072266, | |
| "logps/rejected": -139.53305053710938, | |
| "loss": 1.3062, | |
| "nll_loss": 1.6710792779922485, | |
| "rewards/accuracies": 0.934374988079071, | |
| "rewards/chosen": 2.821993827819824, | |
| "rewards/margins": 2.3934006690979004, | |
| "rewards/rejected": 0.42859315872192383, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.7424593967517401, | |
| "grad_norm": 12.09350872039795, | |
| "learning_rate": 1.4831804281345567e-06, | |
| "logits/chosen": 0.014041140675544739, | |
| "logits/rejected": -0.37960466742515564, | |
| "logps/chosen": -93.00960540771484, | |
| "logps/rejected": -117.96089935302734, | |
| "loss": 1.2977, | |
| "nll_loss": 1.6683666706085205, | |
| "rewards/accuracies": 0.956250011920929, | |
| "rewards/chosen": 3.0185599327087402, | |
| "rewards/margins": 2.459902286529541, | |
| "rewards/rejected": 0.5586578249931335, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.7557176002651641, | |
| "grad_norm": 9.332964897155762, | |
| "learning_rate": 1.4067278287461775e-06, | |
| "logits/chosen": -0.0421045646071434, | |
| "logits/rejected": -0.3469962775707245, | |
| "logps/chosen": -93.99418640136719, | |
| "logps/rejected": -120.124267578125, | |
| "loss": 1.3144, | |
| "nll_loss": 1.673651099205017, | |
| "rewards/accuracies": 0.925000011920929, | |
| "rewards/chosen": 2.913119077682495, | |
| "rewards/margins": 2.3597278594970703, | |
| "rewards/rejected": 0.5533913373947144, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.768975803778588, | |
| "grad_norm": 8.982401847839355, | |
| "learning_rate": 1.3302752293577984e-06, | |
| "logits/chosen": 0.049627698957920074, | |
| "logits/rejected": -0.3409837484359741, | |
| "logps/chosen": -80.563232421875, | |
| "logps/rejected": -110.6832504272461, | |
| "loss": 1.2686, | |
| "nll_loss": 1.6071580648422241, | |
| "rewards/accuracies": 0.9468749761581421, | |
| "rewards/chosen": 3.0636610984802246, | |
| "rewards/margins": 2.460207223892212, | |
| "rewards/rejected": 0.6034537553787231, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.782234007292012, | |
| "grad_norm": 11.456655502319336, | |
| "learning_rate": 1.253822629969419e-06, | |
| "logits/chosen": 0.01245723944157362, | |
| "logits/rejected": -0.34452953934669495, | |
| "logps/chosen": -83.06973266601562, | |
| "logps/rejected": -112.3355712890625, | |
| "loss": 1.2596, | |
| "nll_loss": 1.579742193222046, | |
| "rewards/accuracies": 0.934374988079071, | |
| "rewards/chosen": 3.0834407806396484, | |
| "rewards/margins": 2.512244701385498, | |
| "rewards/rejected": 0.5711959600448608, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.7954922108054359, | |
| "grad_norm": 14.906444549560547, | |
| "learning_rate": 1.17737003058104e-06, | |
| "logits/chosen": 0.10093510150909424, | |
| "logits/rejected": -0.3423386812210083, | |
| "logps/chosen": -84.59368896484375, | |
| "logps/rejected": -118.52232360839844, | |
| "loss": 1.2424, | |
| "nll_loss": 1.5443143844604492, | |
| "rewards/accuracies": 0.934374988079071, | |
| "rewards/chosen": 3.1625304222106934, | |
| "rewards/margins": 2.5714595317840576, | |
| "rewards/rejected": 0.5910708904266357, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.7954922108054359, | |
| "eval_logits/chosen": 1.064488410949707, | |
| "eval_logits/rejected": -0.3661547303199768, | |
| "eval_logps/chosen": -22.925987243652344, | |
| "eval_logps/rejected": -17.408470153808594, | |
| "eval_loss": 1.7094613313674927, | |
| "eval_nll_loss": 2.171783685684204, | |
| "eval_rewards/accuracies": 0.7257823944091797, | |
| "eval_rewards/chosen": 2.0695061683654785, | |
| "eval_rewards/margins": 1.3208197355270386, | |
| "eval_rewards/rejected": 0.7486862540245056, | |
| "eval_runtime": 126.8733, | |
| "eval_samples_per_second": 21.139, | |
| "eval_steps_per_second": 5.289, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.8087504143188597, | |
| "grad_norm": 11.739520072937012, | |
| "learning_rate": 1.1009174311926608e-06, | |
| "logits/chosen": 0.1604587882757187, | |
| "logits/rejected": -0.30457383394241333, | |
| "logps/chosen": -77.71000671386719, | |
| "logps/rejected": -100.43540954589844, | |
| "loss": 1.2066, | |
| "nll_loss": 1.4790329933166504, | |
| "rewards/accuracies": 0.949999988079071, | |
| "rewards/chosen": 3.3722426891326904, | |
| "rewards/margins": 2.629152297973633, | |
| "rewards/rejected": 0.743090808391571, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.8220086178322837, | |
| "grad_norm": 10.975104331970215, | |
| "learning_rate": 1.0244648318042814e-06, | |
| "logits/chosen": 0.08241738379001617, | |
| "logits/rejected": -0.30928146839141846, | |
| "logps/chosen": -87.46654510498047, | |
| "logps/rejected": -106.96031188964844, | |
| "loss": 1.2453, | |
| "nll_loss": 1.555537223815918, | |
| "rewards/accuracies": 0.953125, | |
| "rewards/chosen": 3.170712947845459, | |
| "rewards/margins": 2.561098575592041, | |
| "rewards/rejected": 0.6096144318580627, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.8352668213457076, | |
| "grad_norm": 13.56505298614502, | |
| "learning_rate": 9.480122324159022e-07, | |
| "logits/chosen": 0.023757517337799072, | |
| "logits/rejected": -0.33807113766670227, | |
| "logps/chosen": -94.47450256347656, | |
| "logps/rejected": -128.47509765625, | |
| "loss": 1.2845, | |
| "nll_loss": 1.631945013999939, | |
| "rewards/accuracies": 0.9312499761581421, | |
| "rewards/chosen": 2.836199998855591, | |
| "rewards/margins": 2.2965328693389893, | |
| "rewards/rejected": 0.5396672487258911, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.8485250248591316, | |
| "grad_norm": 16.084320068359375, | |
| "learning_rate": 8.71559633027523e-07, | |
| "logits/chosen": 0.0821368619799614, | |
| "logits/rejected": -0.33640867471694946, | |
| "logps/chosen": -78.98149108886719, | |
| "logps/rejected": -111.6216049194336, | |
| "loss": 1.2382, | |
| "nll_loss": 1.5553481578826904, | |
| "rewards/accuracies": 0.9468749761581421, | |
| "rewards/chosen": 3.1928162574768066, | |
| "rewards/margins": 2.547853708267212, | |
| "rewards/rejected": 0.6449624300003052, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.8617832283725555, | |
| "grad_norm": 9.197409629821777, | |
| "learning_rate": 7.951070336391438e-07, | |
| "logits/chosen": 0.11906716972589493, | |
| "logits/rejected": -0.3259919583797455, | |
| "logps/chosen": -74.3010482788086, | |
| "logps/rejected": -108.84968566894531, | |
| "loss": 1.2331, | |
| "nll_loss": 1.558100938796997, | |
| "rewards/accuracies": 0.965624988079071, | |
| "rewards/chosen": 3.2675209045410156, | |
| "rewards/margins": 2.6362545490264893, | |
| "rewards/rejected": 0.6312668323516846, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.8750414318859795, | |
| "grad_norm": 9.966322898864746, | |
| "learning_rate": 7.186544342507645e-07, | |
| "logits/chosen": 0.03254573419690132, | |
| "logits/rejected": -0.3160571753978729, | |
| "logps/chosen": -85.23878479003906, | |
| "logps/rejected": -104.35621643066406, | |
| "loss": 1.2927, | |
| "nll_loss": 1.662453293800354, | |
| "rewards/accuracies": 0.953125, | |
| "rewards/chosen": 3.136343002319336, | |
| "rewards/margins": 2.490025043487549, | |
| "rewards/rejected": 0.6463181376457214, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.8882996353994034, | |
| "grad_norm": 9.719200134277344, | |
| "learning_rate": 6.422018348623854e-07, | |
| "logits/chosen": 0.060719866305589676, | |
| "logits/rejected": -0.37117859721183777, | |
| "logps/chosen": -82.31736755371094, | |
| "logps/rejected": -121.16090393066406, | |
| "loss": 1.2738, | |
| "nll_loss": 1.6195169687271118, | |
| "rewards/accuracies": 0.9312499761581421, | |
| "rewards/chosen": 3.1395468711853027, | |
| "rewards/margins": 2.5326294898986816, | |
| "rewards/rejected": 0.6069172620773315, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.9015578389128273, | |
| "grad_norm": 10.185211181640625, | |
| "learning_rate": 5.657492354740061e-07, | |
| "logits/chosen": 0.16828341782093048, | |
| "logits/rejected": -0.28492841124534607, | |
| "logps/chosen": -74.0738754272461, | |
| "logps/rejected": -89.85967254638672, | |
| "loss": 1.2257, | |
| "nll_loss": 1.5249927043914795, | |
| "rewards/accuracies": 0.934374988079071, | |
| "rewards/chosen": 3.2941131591796875, | |
| "rewards/margins": 2.549973249435425, | |
| "rewards/rejected": 0.744140088558197, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.9148160424262513, | |
| "grad_norm": 11.983025550842285, | |
| "learning_rate": 4.89296636085627e-07, | |
| "logits/chosen": 0.035757843405008316, | |
| "logits/rejected": -0.3307420015335083, | |
| "logps/chosen": -98.5334701538086, | |
| "logps/rejected": -112.04931640625, | |
| "loss": 1.2842, | |
| "nll_loss": 1.63511061668396, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": 3.1852855682373047, | |
| "rewards/margins": 2.5147435665130615, | |
| "rewards/rejected": 0.6705416440963745, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.9280742459396751, | |
| "grad_norm": 9.73780632019043, | |
| "learning_rate": 4.128440366972478e-07, | |
| "logits/chosen": 0.06930799782276154, | |
| "logits/rejected": -0.33910712599754333, | |
| "logps/chosen": -86.6443862915039, | |
| "logps/rejected": -112.4611587524414, | |
| "loss": 1.2466, | |
| "nll_loss": 1.5855239629745483, | |
| "rewards/accuracies": 0.9437500238418579, | |
| "rewards/chosen": 3.226916551589966, | |
| "rewards/margins": 2.535816192626953, | |
| "rewards/rejected": 0.6910998225212097, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.9413324494530991, | |
| "grad_norm": 9.678099632263184, | |
| "learning_rate": 3.363914373088685e-07, | |
| "logits/chosen": 0.12796175479888916, | |
| "logits/rejected": -0.29829975962638855, | |
| "logps/chosen": -83.7620620727539, | |
| "logps/rejected": -99.74095153808594, | |
| "loss": 1.2307, | |
| "nll_loss": 1.524804711341858, | |
| "rewards/accuracies": 0.9468749761581421, | |
| "rewards/chosen": 3.5030083656311035, | |
| "rewards/margins": 2.73651385307312, | |
| "rewards/rejected": 0.7664941549301147, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.954590652966523, | |
| "grad_norm": 9.837422370910645, | |
| "learning_rate": 2.599388379204893e-07, | |
| "logits/chosen": 0.06604432314634323, | |
| "logits/rejected": -0.326642245054245, | |
| "logps/chosen": -87.40840911865234, | |
| "logps/rejected": -112.01222229003906, | |
| "loss": 1.2683, | |
| "nll_loss": 1.62287175655365, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": 3.202043056488037, | |
| "rewards/margins": 2.5358872413635254, | |
| "rewards/rejected": 0.6661559343338013, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.967848856479947, | |
| "grad_norm": 14.159199714660645, | |
| "learning_rate": 1.8348623853211012e-07, | |
| "logits/chosen": 0.02280101552605629, | |
| "logits/rejected": -0.3272295594215393, | |
| "logps/chosen": -84.34774017333984, | |
| "logps/rejected": -106.10346984863281, | |
| "loss": 1.2922, | |
| "nll_loss": 1.6527671813964844, | |
| "rewards/accuracies": 0.9437500238418579, | |
| "rewards/chosen": 3.2514991760253906, | |
| "rewards/margins": 2.529658555984497, | |
| "rewards/rejected": 0.7218402624130249, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.9811070599933709, | |
| "grad_norm": 9.912482261657715, | |
| "learning_rate": 1.070336391437309e-07, | |
| "logits/chosen": 0.09289325773715973, | |
| "logits/rejected": -0.31532809138298035, | |
| "logps/chosen": -82.03899383544922, | |
| "logps/rejected": -116.87910461425781, | |
| "loss": 1.2127, | |
| "nll_loss": 1.5196421146392822, | |
| "rewards/accuracies": 0.9624999761581421, | |
| "rewards/chosen": 3.268120527267456, | |
| "rewards/margins": 2.592498302459717, | |
| "rewards/rejected": 0.6756229996681213, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.9943652635067949, | |
| "grad_norm": 10.23357105255127, | |
| "learning_rate": 3.0581039755351686e-08, | |
| "logits/chosen": 0.015320442616939545, | |
| "logits/rejected": -0.3034003674983978, | |
| "logps/chosen": -99.24415588378906, | |
| "logps/rejected": -107.158447265625, | |
| "loss": 1.3115, | |
| "nll_loss": 1.6806989908218384, | |
| "rewards/accuracies": 0.925000011920929, | |
| "rewards/chosen": 3.2072606086730957, | |
| "rewards/margins": 2.46455717086792, | |
| "rewards/rejected": 0.7427036166191101, | |
| "step": 750 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 754, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 200, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |