| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 1309, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0007639419404125286, |
| "grad_norm": 258.47398808043715, |
| "learning_rate": 0.0, |
| "logits/chosen": -0.5703125, |
| "logits/rejected": -0.5234375, |
| "logps/chosen": -540.0, |
| "logps/rejected": -464.0, |
| "loss": 0.6924, |
| "nll_loss": 1.2421875, |
| "rewards/accuracies": 0.0, |
| "rewards/chosen": 0.0, |
| "rewards/margins": 0.0, |
| "rewards/rejected": 0.0, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.007639419404125287, |
| "grad_norm": 314.4150784740708, |
| "learning_rate": 3.435114503816794e-08, |
| "logits/chosen": -0.3402777910232544, |
| "logits/rejected": -0.34063720703125, |
| "logps/chosen": -493.77777099609375, |
| "logps/rejected": -482.8888854980469, |
| "loss": 0.6921, |
| "nll_loss": 0.9296875, |
| "rewards/accuracies": 0.2083333283662796, |
| "rewards/chosen": -0.00835503451526165, |
| "rewards/margins": 0.00830756314098835, |
| "rewards/rejected": -0.0166490338742733, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.015278838808250574, |
| "grad_norm": 260.33930694178486, |
| "learning_rate": 7.251908396946564e-08, |
| "logits/chosen": -0.4122558534145355, |
| "logits/rejected": -0.42744141817092896, |
| "logps/chosen": -515.4000244140625, |
| "logps/rejected": -470.3999938964844, |
| "loss": 0.7081, |
| "nll_loss": 0.98046875, |
| "rewards/accuracies": 0.20000000298023224, |
| "rewards/chosen": -0.02316894568502903, |
| "rewards/margins": -0.0243988037109375, |
| "rewards/rejected": 0.0012573242420330644, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.02291825821237586, |
| "grad_norm": 231.79105073106817, |
| "learning_rate": 1.1068702290076336e-07, |
| "logits/chosen": -0.38813477754592896, |
| "logits/rejected": -0.3990234434604645, |
| "logps/chosen": -586.2000122070312, |
| "logps/rejected": -565.2000122070312, |
| "loss": 0.6695, |
| "nll_loss": 0.967968761920929, |
| "rewards/accuracies": 0.375, |
| "rewards/chosen": 0.07893677055835724, |
| "rewards/margins": 0.06370849907398224, |
| "rewards/rejected": 0.01502075232565403, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.030557677616501147, |
| "grad_norm": 207.3625348390448, |
| "learning_rate": 1.4885496183206107e-07, |
| "logits/chosen": -0.4215331971645355, |
| "logits/rejected": -0.3983154296875, |
| "logps/chosen": -525.5999755859375, |
| "logps/rejected": -436.20001220703125, |
| "loss": 0.6524, |
| "nll_loss": 0.9371093511581421, |
| "rewards/accuracies": 0.42500001192092896, |
| "rewards/chosen": 0.13503417372703552, |
| "rewards/margins": 0.10765381157398224, |
| "rewards/rejected": 0.02747192420065403, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.03819709702062643, |
| "grad_norm": 226.87256558590633, |
| "learning_rate": 1.8702290076335877e-07, |
| "logits/chosen": -0.176025390625, |
| "logits/rejected": -0.181396484375, |
| "logps/chosen": -536.4000244140625, |
| "logps/rejected": -475.20001220703125, |
| "loss": 0.6743, |
| "nll_loss": 0.833984375, |
| "rewards/accuracies": 0.5249999761581421, |
| "rewards/chosen": 0.20390625298023224, |
| "rewards/margins": 0.06151122972369194, |
| "rewards/rejected": 0.14276733994483948, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.04583651642475172, |
| "grad_norm": 183.79471712514464, |
| "learning_rate": 2.2519083969465648e-07, |
| "logits/chosen": -0.36970216035842896, |
| "logits/rejected": -0.38398438692092896, |
| "logps/chosen": -692.4000244140625, |
| "logps/rejected": -639.5999755859375, |
| "loss": 0.5822, |
| "nll_loss": 0.9417968988418579, |
| "rewards/accuracies": 0.612500011920929, |
| "rewards/chosen": 0.377685546875, |
| "rewards/margins": 0.3079589903354645, |
| "rewards/rejected": 0.07008667290210724, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.053475935828877004, |
| "grad_norm": 193.89130497070676, |
| "learning_rate": 2.633587786259542e-07, |
| "logits/chosen": -0.397705078125, |
| "logits/rejected": -0.43745118379592896, |
| "logps/chosen": -535.7999877929688, |
| "logps/rejected": -487.3999938964844, |
| "loss": 0.5212, |
| "nll_loss": 0.9515625238418579, |
| "rewards/accuracies": 0.675000011920929, |
| "rewards/chosen": 0.46650391817092896, |
| "rewards/margins": 0.5068359375, |
| "rewards/rejected": -0.0400390625, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.061115355233002294, |
| "grad_norm": 204.2303212377364, |
| "learning_rate": 3.015267175572519e-07, |
| "logits/chosen": -0.32470703125, |
| "logits/rejected": -0.3303466737270355, |
| "logps/chosen": -473.6000061035156, |
| "logps/rejected": -454.0, |
| "loss": 0.5459, |
| "nll_loss": 0.876171886920929, |
| "rewards/accuracies": 0.637499988079071, |
| "rewards/chosen": 0.556835949420929, |
| "rewards/margins": 0.4847656190395355, |
| "rewards/rejected": 0.07137451320886612, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.06875477463712758, |
| "grad_norm": 155.46344646489163, |
| "learning_rate": 3.396946564885496e-07, |
| "logits/chosen": -0.32172852754592896, |
| "logits/rejected": -0.33759766817092896, |
| "logps/chosen": -715.7999877929688, |
| "logps/rejected": -700.5999755859375, |
| "loss": 0.5522, |
| "nll_loss": 1.0656249523162842, |
| "rewards/accuracies": 0.5874999761581421, |
| "rewards/chosen": 0.6611328125, |
| "rewards/margins": 0.5273803472518921, |
| "rewards/rejected": 0.1341552734375, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.07639419404125286, |
| "grad_norm": 141.18575275944212, |
| "learning_rate": 3.7786259541984735e-07, |
| "logits/chosen": -0.3919433653354645, |
| "logits/rejected": -0.40632325410842896, |
| "logps/chosen": -533.0, |
| "logps/rejected": -520.7999877929688, |
| "loss": 0.5137, |
| "nll_loss": 0.940234363079071, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": 0.7120116949081421, |
| "rewards/margins": 0.7798827886581421, |
| "rewards/rejected": -0.06877441704273224, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.08403361344537816, |
| "grad_norm": 231.1316603751942, |
| "learning_rate": 4.1603053435114506e-07, |
| "logits/chosen": -0.2874999940395355, |
| "logits/rejected": -0.3013549745082855, |
| "logps/chosen": -599.2000122070312, |
| "logps/rejected": -513.0, |
| "loss": 0.4408, |
| "nll_loss": 0.859375, |
| "rewards/accuracies": 0.675000011920929, |
| "rewards/chosen": 1.0283203125, |
| "rewards/margins": 1.157617211341858, |
| "rewards/rejected": -0.12840576469898224, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.09167303284950344, |
| "grad_norm": 132.89992060448196, |
| "learning_rate": 4.541984732824427e-07, |
| "logits/chosen": -0.25129395723342896, |
| "logits/rejected": -0.27121275663375854, |
| "logps/chosen": -602.4000244140625, |
| "logps/rejected": -581.7999877929688, |
| "loss": 0.4933, |
| "nll_loss": 0.8753906488418579, |
| "rewards/accuracies": 0.675000011920929, |
| "rewards/chosen": 0.906445324420929, |
| "rewards/margins": 1.029296875, |
| "rewards/rejected": -0.121826171875, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.09931245225362872, |
| "grad_norm": 133.95238732292788, |
| "learning_rate": 4.923664122137405e-07, |
| "logits/chosen": -0.21683350205421448, |
| "logits/rejected": -0.2558349668979645, |
| "logps/chosen": -614.2000122070312, |
| "logps/rejected": -569.5999755859375, |
| "loss": 0.3928, |
| "nll_loss": 0.8531249761581421, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": 0.694927990436554, |
| "rewards/margins": 1.353906273841858, |
| "rewards/rejected": -0.658007800579071, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.10695187165775401, |
| "grad_norm": 168.1618677127587, |
| "learning_rate": 4.966044142614601e-07, |
| "logits/chosen": -0.3291992247104645, |
| "logits/rejected": -0.2532714903354645, |
| "logps/chosen": -496.79998779296875, |
| "logps/rejected": -428.79998779296875, |
| "loss": 0.5624, |
| "nll_loss": 0.9046875238418579, |
| "rewards/accuracies": 0.6625000238418579, |
| "rewards/chosen": 0.8631836175918579, |
| "rewards/margins": 1.0146484375, |
| "rewards/rejected": -0.15104980766773224, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.11459129106187929, |
| "grad_norm": 162.0020969653275, |
| "learning_rate": 4.923599320882851e-07, |
| "logits/chosen": -0.3076171875, |
| "logits/rejected": -0.3332763612270355, |
| "logps/chosen": -540.4000244140625, |
| "logps/rejected": -498.20001220703125, |
| "loss": 0.4469, |
| "nll_loss": 0.893750011920929, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": 1.2221190929412842, |
| "rewards/margins": 1.4638671875, |
| "rewards/rejected": -0.24140624701976776, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.12223071046600459, |
| "grad_norm": 174.01769486132392, |
| "learning_rate": 4.881154499151103e-07, |
| "logits/chosen": -0.32622069120407104, |
| "logits/rejected": -0.3348449766635895, |
| "logps/chosen": -591.0, |
| "logps/rejected": -580.7999877929688, |
| "loss": 0.5754, |
| "nll_loss": 1.0691406726837158, |
| "rewards/accuracies": 0.6625000238418579, |
| "rewards/chosen": 1.2189452648162842, |
| "rewards/margins": 1.0265624523162842, |
| "rewards/rejected": 0.19337157905101776, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.12987012987012986, |
| "grad_norm": 190.37882731718457, |
| "learning_rate": 4.838709677419355e-07, |
| "logits/chosen": -0.2706542909145355, |
| "logits/rejected": -0.33831483125686646, |
| "logps/chosen": -535.5999755859375, |
| "logps/rejected": -530.2000122070312, |
| "loss": 0.5538, |
| "nll_loss": 0.8460937738418579, |
| "rewards/accuracies": 0.6499999761581421, |
| "rewards/chosen": 1.3611328601837158, |
| "rewards/margins": 0.967968761920929, |
| "rewards/rejected": 0.3932128846645355, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.13750954927425516, |
| "grad_norm": 145.8998739490587, |
| "learning_rate": 4.796264855687606e-07, |
| "logits/chosen": -0.2790161073207855, |
| "logits/rejected": -0.29090577363967896, |
| "logps/chosen": -595.5999755859375, |
| "logps/rejected": -568.4000244140625, |
| "loss": 0.4184, |
| "nll_loss": 0.8812500238418579, |
| "rewards/accuracies": 0.6625000238418579, |
| "rewards/chosen": 0.9041992425918579, |
| "rewards/margins": 1.43359375, |
| "rewards/rejected": -0.53125, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.14514896867838045, |
| "grad_norm": 102.7664998097143, |
| "learning_rate": 4.753820033955857e-07, |
| "logits/chosen": -0.36333006620407104, |
| "logits/rejected": -0.30805665254592896, |
| "logps/chosen": -529.4000244140625, |
| "logps/rejected": -484.79998779296875, |
| "loss": 0.3831, |
| "nll_loss": 0.8929687738418579, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": 1.4363281726837158, |
| "rewards/margins": 1.766992211341858, |
| "rewards/rejected": -0.33027344942092896, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.15278838808250572, |
| "grad_norm": 62.90690254682499, |
| "learning_rate": 4.7113752122241087e-07, |
| "logits/chosen": -0.16816405951976776, |
| "logits/rejected": -0.15498046576976776, |
| "logps/chosen": -583.4000244140625, |
| "logps/rejected": -536.5999755859375, |
| "loss": 0.4999, |
| "nll_loss": 0.892578125, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": 1.306249976158142, |
| "rewards/margins": 1.3748047351837158, |
| "rewards/rejected": -0.06831054389476776, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.16042780748663102, |
| "grad_norm": 84.68520470211422, |
| "learning_rate": 4.66893039049236e-07, |
| "logits/chosen": -0.19780273735523224, |
| "logits/rejected": -0.24296875298023224, |
| "logps/chosen": -525.0, |
| "logps/rejected": -510.79998779296875, |
| "loss": 0.6017, |
| "nll_loss": 0.946093738079071, |
| "rewards/accuracies": 0.637499988079071, |
| "rewards/chosen": 0.9354492425918579, |
| "rewards/margins": 1.27734375, |
| "rewards/rejected": -0.34150391817092896, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.16806722689075632, |
| "grad_norm": 185.43708476964164, |
| "learning_rate": 4.6264855687606106e-07, |
| "logits/chosen": -0.27099609375, |
| "logits/rejected": -0.23505859076976776, |
| "logps/chosen": -484.3999938964844, |
| "logps/rejected": -489.20001220703125, |
| "loss": 0.487, |
| "nll_loss": 0.8511718511581421, |
| "rewards/accuracies": 0.675000011920929, |
| "rewards/chosen": 0.3269897401332855, |
| "rewards/margins": 1.3175780773162842, |
| "rewards/rejected": -0.990429699420929, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.17570664629488159, |
| "grad_norm": 108.47585085006368, |
| "learning_rate": 4.5840407470288624e-07, |
| "logits/chosen": -0.2373046875, |
| "logits/rejected": -0.26665037870407104, |
| "logps/chosen": -519.2000122070312, |
| "logps/rejected": -494.6000061035156, |
| "loss": 0.3942, |
| "nll_loss": 0.8656250238418579, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": 0.721728503704071, |
| "rewards/margins": 2.1226563453674316, |
| "rewards/rejected": -1.4031250476837158, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.18334606569900688, |
| "grad_norm": 167.38681415802225, |
| "learning_rate": 4.5415959252971136e-07, |
| "logits/chosen": -0.18267822265625, |
| "logits/rejected": -0.17292480170726776, |
| "logps/chosen": -525.0, |
| "logps/rejected": -501.6000061035156, |
| "loss": 0.5013, |
| "nll_loss": 0.892578125, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": 0.698437511920929, |
| "rewards/margins": 1.597265601158142, |
| "rewards/rejected": -0.89697265625, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.19098548510313215, |
| "grad_norm": 101.64538233833237, |
| "learning_rate": 4.499151103565365e-07, |
| "logits/chosen": -0.15837402641773224, |
| "logits/rejected": -0.19194336235523224, |
| "logps/chosen": -677.4000244140625, |
| "logps/rejected": -586.4000244140625, |
| "loss": 0.4529, |
| "nll_loss": 0.887890636920929, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": 0.6534668207168579, |
| "rewards/margins": 1.8076171875, |
| "rewards/rejected": -1.1537597179412842, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.19862490450725745, |
| "grad_norm": 294.75439921434145, |
| "learning_rate": 4.456706281833616e-07, |
| "logits/chosen": -0.2868896424770355, |
| "logits/rejected": -0.22414550185203552, |
| "logps/chosen": -544.0, |
| "logps/rejected": -496.3999938964844, |
| "loss": 0.4019, |
| "nll_loss": 0.926562488079071, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": 1.142968773841858, |
| "rewards/margins": 1.655859351158142, |
| "rewards/rejected": -0.5120483636856079, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.20626432391138275, |
| "grad_norm": 117.11210831501775, |
| "learning_rate": 4.4142614601018673e-07, |
| "logits/chosen": -0.19429931044578552, |
| "logits/rejected": -0.218994140625, |
| "logps/chosen": -575.2000122070312, |
| "logps/rejected": -544.2000122070312, |
| "loss": 0.5657, |
| "nll_loss": 0.871874988079071, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": 1.008203148841858, |
| "rewards/margins": 1.3722655773162842, |
| "rewards/rejected": -0.3637451231479645, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.21390374331550802, |
| "grad_norm": 163.8676544002689, |
| "learning_rate": 4.3718166383701186e-07, |
| "logits/chosen": -0.25664061307907104, |
| "logits/rejected": -0.23786620795726776, |
| "logps/chosen": -637.7999877929688, |
| "logps/rejected": -592.0, |
| "loss": 0.3987, |
| "nll_loss": 0.989453136920929, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": 0.5733398199081421, |
| "rewards/margins": 1.9832031726837158, |
| "rewards/rejected": -1.4110839366912842, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.2215431627196333, |
| "grad_norm": 264.70174523111024, |
| "learning_rate": 4.32937181663837e-07, |
| "logits/chosen": -0.16657714545726776, |
| "logits/rejected": -0.16425780951976776, |
| "logps/chosen": -623.0, |
| "logps/rejected": -573.7999877929688, |
| "loss": 0.5026, |
| "nll_loss": 0.8687499761581421, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": 0.6055663824081421, |
| "rewards/margins": 1.7492187023162842, |
| "rewards/rejected": -1.142822265625, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.22918258212375858, |
| "grad_norm": 188.9470989355876, |
| "learning_rate": 4.286926994906621e-07, |
| "logits/chosen": -0.35651856660842896, |
| "logits/rejected": -0.402099609375, |
| "logps/chosen": -510.6000061035156, |
| "logps/rejected": -459.3999938964844, |
| "loss": 0.4784, |
| "nll_loss": 0.896484375, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": 1.013574242591858, |
| "rewards/margins": 1.8142578601837158, |
| "rewards/rejected": -0.8011718988418579, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.23682200152788388, |
| "grad_norm": 225.67003396669838, |
| "learning_rate": 4.244482173174873e-07, |
| "logits/chosen": -0.1279296875, |
| "logits/rejected": -0.13811035454273224, |
| "logps/chosen": -545.0, |
| "logps/rejected": -516.7999877929688, |
| "loss": 0.4794, |
| "nll_loss": 0.8207031488418579, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": 1.391210913658142, |
| "rewards/margins": 1.7058594226837158, |
| "rewards/rejected": -0.3139282166957855, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.24446142093200918, |
| "grad_norm": 105.64614366845814, |
| "learning_rate": 4.202037351443124e-07, |
| "logits/chosen": -0.16025391221046448, |
| "logits/rejected": -0.19588622450828552, |
| "logps/chosen": -463.79998779296875, |
| "logps/rejected": -460.6000061035156, |
| "loss": 0.2819, |
| "nll_loss": 0.8589843511581421, |
| "rewards/accuracies": 0.8374999761581421, |
| "rewards/chosen": 1.2197265625, |
| "rewards/margins": 2.479687452316284, |
| "rewards/rejected": -1.260498046875, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.25210084033613445, |
| "grad_norm": 287.90740376642924, |
| "learning_rate": 4.159592529711375e-07, |
| "logits/chosen": -0.2608642578125, |
| "logits/rejected": -0.25861817598342896, |
| "logps/chosen": -548.4000244140625, |
| "logps/rejected": -534.7999877929688, |
| "loss": 0.6499, |
| "nll_loss": 0.977734386920929, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": 0.809765636920929, |
| "rewards/margins": 1.789453148841858, |
| "rewards/rejected": -0.979785144329071, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.2597402597402597, |
| "grad_norm": 184.4996723823412, |
| "learning_rate": 4.1171477079796265e-07, |
| "logits/chosen": -0.15278320014476776, |
| "logits/rejected": -0.20881347358226776, |
| "logps/chosen": -534.4000244140625, |
| "logps/rejected": -502.20001220703125, |
| "loss": 0.3265, |
| "nll_loss": 0.8968750238418579, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": 0.8492431640625, |
| "rewards/margins": 2.390625, |
| "rewards/rejected": -1.5436522960662842, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.26737967914438504, |
| "grad_norm": 176.1001324023726, |
| "learning_rate": 4.074702886247878e-07, |
| "logits/chosen": -0.291748046875, |
| "logits/rejected": -0.30827635526657104, |
| "logps/chosen": -536.7999877929688, |
| "logps/rejected": -502.20001220703125, |
| "loss": 0.5069, |
| "nll_loss": 0.903124988079071, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": 0.542651355266571, |
| "rewards/margins": 1.62109375, |
| "rewards/rejected": -1.079248070716858, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.2750190985485103, |
| "grad_norm": 215.75426907278032, |
| "learning_rate": 4.0322580645161285e-07, |
| "logits/chosen": -0.19513702392578125, |
| "logits/rejected": -0.18853148818016052, |
| "logps/chosen": -659.4000244140625, |
| "logps/rejected": -631.2000122070312, |
| "loss": 0.4296, |
| "nll_loss": 0.93359375, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": 1.0828125476837158, |
| "rewards/margins": 1.869531273841858, |
| "rewards/rejected": -0.787548840045929, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.2826585179526356, |
| "grad_norm": 108.88771986132863, |
| "learning_rate": 3.98981324278438e-07, |
| "logits/chosen": -0.3078857362270355, |
| "logits/rejected": -0.29511719942092896, |
| "logps/chosen": -550.2000122070312, |
| "logps/rejected": -524.5999755859375, |
| "loss": 0.4382, |
| "nll_loss": 0.8999999761581421, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": 0.9032226800918579, |
| "rewards/margins": 1.9148437976837158, |
| "rewards/rejected": -1.0095703601837158, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.2902979373567609, |
| "grad_norm": 157.57914105986907, |
| "learning_rate": 3.9473684210526315e-07, |
| "logits/chosen": -0.24609375, |
| "logits/rejected": -0.2859863340854645, |
| "logps/chosen": -623.5999755859375, |
| "logps/rejected": -571.5999755859375, |
| "loss": 0.4434, |
| "nll_loss": 0.9203125238418579, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": 1.6277344226837158, |
| "rewards/margins": 1.9421875476837158, |
| "rewards/rejected": -0.314453125, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.2979373567608862, |
| "grad_norm": 95.48144608914365, |
| "learning_rate": 3.9049235993208827e-07, |
| "logits/chosen": -0.24777832627296448, |
| "logits/rejected": -0.23862305283546448, |
| "logps/chosen": -589.5999755859375, |
| "logps/rejected": -546.5999755859375, |
| "loss": 0.414, |
| "nll_loss": 0.923046886920929, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": 1.652441382408142, |
| "rewards/margins": 2.092968702316284, |
| "rewards/rejected": -0.43925780057907104, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.30557677616501144, |
| "grad_norm": 132.87548084265904, |
| "learning_rate": 3.862478777589134e-07, |
| "logits/chosen": -0.21234130859375, |
| "logits/rejected": -0.22281494736671448, |
| "logps/chosen": -596.5999755859375, |
| "logps/rejected": -545.7999877929688, |
| "loss": 0.5241, |
| "nll_loss": 1.0109374523162842, |
| "rewards/accuracies": 0.6499999761581421, |
| "rewards/chosen": 1.0762450695037842, |
| "rewards/margins": 1.8078124523162842, |
| "rewards/rejected": -0.730297863483429, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.31321619556913677, |
| "grad_norm": 94.35786051595302, |
| "learning_rate": 3.820033955857385e-07, |
| "logits/chosen": -0.16411133110523224, |
| "logits/rejected": -0.12357177585363388, |
| "logps/chosen": -545.5999755859375, |
| "logps/rejected": -495.0, |
| "loss": 0.4153, |
| "nll_loss": 0.915234386920929, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": 1.5988280773162842, |
| "rewards/margins": 2.166015625, |
| "rewards/rejected": -0.5667968988418579, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.32085561497326204, |
| "grad_norm": 136.65656778443866, |
| "learning_rate": 3.7775891341256364e-07, |
| "logits/chosen": -0.20586547255516052, |
| "logits/rejected": -0.18314209580421448, |
| "logps/chosen": -649.5999755859375, |
| "logps/rejected": -575.4000244140625, |
| "loss": 0.3882, |
| "nll_loss": 0.908984363079071, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": 1.666894555091858, |
| "rewards/margins": 2.0859375, |
| "rewards/rejected": -0.41822510957717896, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.3284950343773873, |
| "grad_norm": 245.401265321346, |
| "learning_rate": 3.735144312393888e-07, |
| "logits/chosen": -0.11831054836511612, |
| "logits/rejected": -0.17479248344898224, |
| "logps/chosen": -665.5999755859375, |
| "logps/rejected": -618.4000244140625, |
| "loss": 0.5723, |
| "nll_loss": 0.8828125, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": 1.431249976158142, |
| "rewards/margins": 1.7414062023162842, |
| "rewards/rejected": -0.3086914122104645, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.33613445378151263, |
| "grad_norm": 78.84105453853267, |
| "learning_rate": 3.692699490662139e-07, |
| "logits/chosen": -0.07949218899011612, |
| "logits/rejected": -0.07717285305261612, |
| "logps/chosen": -481.79998779296875, |
| "logps/rejected": -444.20001220703125, |
| "loss": 0.3382, |
| "nll_loss": 0.89453125, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": 1.306640625, |
| "rewards/margins": 2.328125, |
| "rewards/rejected": -1.0234375, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.3437738731856379, |
| "grad_norm": 126.23202663934781, |
| "learning_rate": 3.65025466893039e-07, |
| "logits/chosen": -0.29511719942092896, |
| "logits/rejected": -0.2608398497104645, |
| "logps/chosen": -547.5999755859375, |
| "logps/rejected": -525.5999755859375, |
| "loss": 0.4856, |
| "nll_loss": 0.9339843988418579, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": 1.1731445789337158, |
| "rewards/margins": 1.9373047351837158, |
| "rewards/rejected": -0.7630859613418579, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.35141329258976317, |
| "grad_norm": 76.23435884387047, |
| "learning_rate": 3.607809847198642e-07, |
| "logits/chosen": -0.3000244200229645, |
| "logits/rejected": -0.30839842557907104, |
| "logps/chosen": -572.4000244140625, |
| "logps/rejected": -551.7999877929688, |
| "loss": 0.3774, |
| "nll_loss": 0.9281250238418579, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": 2.0835938453674316, |
| "rewards/margins": 2.276562452316284, |
| "rewards/rejected": -0.19318847358226776, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.35905271199388844, |
| "grad_norm": 114.53029164371587, |
| "learning_rate": 3.5653650254668926e-07, |
| "logits/chosen": -0.17070312798023224, |
| "logits/rejected": -0.16102905571460724, |
| "logps/chosen": -531.2000122070312, |
| "logps/rejected": -517.7999877929688, |
| "loss": 0.4004, |
| "nll_loss": 0.873046875, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": 1.2610352039337158, |
| "rewards/margins": 1.8546874523162842, |
| "rewards/rejected": -0.590991199016571, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.36669213139801377, |
| "grad_norm": 243.54108183416537, |
| "learning_rate": 3.5229202037351443e-07, |
| "logits/chosen": -0.30506592988967896, |
| "logits/rejected": -0.24588623642921448, |
| "logps/chosen": -532.5999755859375, |
| "logps/rejected": -517.0, |
| "loss": 0.3923, |
| "nll_loss": 0.908203125, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": 1.15625, |
| "rewards/margins": 2.299999952316284, |
| "rewards/rejected": -1.144677758216858, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.37433155080213903, |
| "grad_norm": 174.36984130018106, |
| "learning_rate": 3.4804753820033956e-07, |
| "logits/chosen": -0.29462891817092896, |
| "logits/rejected": -0.34052735567092896, |
| "logps/chosen": -650.7999877929688, |
| "logps/rejected": -604.5999755859375, |
| "loss": 0.3593, |
| "nll_loss": 0.997265636920929, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": 1.580468773841858, |
| "rewards/margins": 2.7640624046325684, |
| "rewards/rejected": -1.182226538658142, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.3819709702062643, |
| "grad_norm": 161.0508842577597, |
| "learning_rate": 3.438030560271647e-07, |
| "logits/chosen": -0.2794433534145355, |
| "logits/rejected": -0.26513671875, |
| "logps/chosen": -607.2000122070312, |
| "logps/rejected": -620.0, |
| "loss": 0.4101, |
| "nll_loss": 0.9351562261581421, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": 1.4298827648162842, |
| "rewards/margins": 2.234375, |
| "rewards/rejected": -0.80517578125, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.38961038961038963, |
| "grad_norm": 110.76591319339768, |
| "learning_rate": 3.395585738539898e-07, |
| "logits/chosen": -0.16807861626148224, |
| "logits/rejected": -0.12810058891773224, |
| "logps/chosen": -636.5999755859375, |
| "logps/rejected": -607.4000244140625, |
| "loss": 0.3684, |
| "nll_loss": 0.936718761920929, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": 0.9383300542831421, |
| "rewards/margins": 2.294921875, |
| "rewards/rejected": -1.353674292564392, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.3972498090145149, |
| "grad_norm": 180.59465535461902, |
| "learning_rate": 3.3531409168081493e-07, |
| "logits/chosen": -0.18726806342601776, |
| "logits/rejected": -0.16033935546875, |
| "logps/chosen": -668.2000122070312, |
| "logps/rejected": -634.5999755859375, |
| "loss": 0.4376, |
| "nll_loss": 0.938671886920929, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": 1.7078125476837158, |
| "rewards/margins": 2.2789063453674316, |
| "rewards/rejected": -0.5711914300918579, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.40488922841864017, |
| "grad_norm": 138.49457254902308, |
| "learning_rate": 3.3106960950764005e-07, |
| "logits/chosen": -0.2599121034145355, |
| "logits/rejected": -0.22050781548023224, |
| "logps/chosen": -661.5999755859375, |
| "logps/rejected": -634.5999755859375, |
| "loss": 0.4448, |
| "nll_loss": 1.0085937976837158, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": 0.7669922113418579, |
| "rewards/margins": 1.933203101158142, |
| "rewards/rejected": -1.1654784679412842, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.4125286478227655, |
| "grad_norm": 231.1850522757914, |
| "learning_rate": 3.268251273344652e-07, |
| "logits/chosen": -0.17841796576976776, |
| "logits/rejected": -0.18508300185203552, |
| "logps/chosen": -628.2000122070312, |
| "logps/rejected": -584.2000122070312, |
| "loss": 0.4491, |
| "nll_loss": 1.060546875, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": 0.76904296875, |
| "rewards/margins": 2.246875047683716, |
| "rewards/rejected": -1.480224609375, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.42016806722689076, |
| "grad_norm": 160.7987831948394, |
| "learning_rate": 3.225806451612903e-07, |
| "logits/chosen": -0.14698180556297302, |
| "logits/rejected": -0.19716796278953552, |
| "logps/chosen": -602.2000122070312, |
| "logps/rejected": -539.7999877929688, |
| "loss": 0.3708, |
| "nll_loss": 0.871874988079071, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": 0.7313476800918579, |
| "rewards/margins": 2.765625, |
| "rewards/rejected": -2.0337891578674316, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.42780748663101603, |
| "grad_norm": 90.82442844767456, |
| "learning_rate": 3.183361629881154e-07, |
| "logits/chosen": -0.16887207329273224, |
| "logits/rejected": -0.19123534858226776, |
| "logps/chosen": -600.2000122070312, |
| "logps/rejected": -548.5999755859375, |
| "loss": 0.4537, |
| "nll_loss": 0.9097656011581421, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": 0.8675781488418579, |
| "rewards/margins": 2.024218797683716, |
| "rewards/rejected": -1.1591796875, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.43544690603514136, |
| "grad_norm": 138.92459223223193, |
| "learning_rate": 3.140916808149406e-07, |
| "logits/chosen": -0.11478271335363388, |
| "logits/rejected": -0.1641845703125, |
| "logps/chosen": -657.2000122070312, |
| "logps/rejected": -604.2000122070312, |
| "loss": 0.5563, |
| "nll_loss": 0.9136718511581421, |
| "rewards/accuracies": 0.675000011920929, |
| "rewards/chosen": 1.17626953125, |
| "rewards/margins": 1.947656273841858, |
| "rewards/rejected": -0.772656261920929, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.4430863254392666, |
| "grad_norm": 123.1501820354294, |
| "learning_rate": 3.0984719864176567e-07, |
| "logits/chosen": -0.11948242038488388, |
| "logits/rejected": -0.10875244438648224, |
| "logps/chosen": -553.0, |
| "logps/rejected": -479.79998779296875, |
| "loss": 0.3869, |
| "nll_loss": 0.857421875, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": 1.2839844226837158, |
| "rewards/margins": 2.4769530296325684, |
| "rewards/rejected": -1.1936523914337158, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.4507257448433919, |
| "grad_norm": 127.32337855269951, |
| "learning_rate": 3.056027164685908e-07, |
| "logits/chosen": -0.2039794921875, |
| "logits/rejected": -0.17943115532398224, |
| "logps/chosen": -564.4000244140625, |
| "logps/rejected": -573.7999877929688, |
| "loss": 0.4879, |
| "nll_loss": 0.991015613079071, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": 0.6973632574081421, |
| "rewards/margins": 1.755468726158142, |
| "rewards/rejected": -1.059179663658142, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.45836516424751717, |
| "grad_norm": 107.25927292937021, |
| "learning_rate": 3.0135823429541597e-07, |
| "logits/chosen": -0.18730469048023224, |
| "logits/rejected": -0.17886963486671448, |
| "logps/chosen": -597.2000122070312, |
| "logps/rejected": -592.0, |
| "loss": 0.4284, |
| "nll_loss": 1.019140601158142, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": 0.826953113079071, |
| "rewards/margins": 2.2720704078674316, |
| "rewards/rejected": -1.450781226158142, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.4660045836516425, |
| "grad_norm": 181.85392573395544, |
| "learning_rate": 2.9711375212224104e-07, |
| "logits/chosen": -0.10260009765625, |
| "logits/rejected": -0.1400146484375, |
| "logps/chosen": -596.7999877929688, |
| "logps/rejected": -581.5999755859375, |
| "loss": 0.4928, |
| "nll_loss": 0.938281238079071, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": 0.825878918170929, |
| "rewards/margins": 1.986718773841858, |
| "rewards/rejected": -1.161718726158142, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.47364400305576776, |
| "grad_norm": 498.482891329986, |
| "learning_rate": 2.928692699490662e-07, |
| "logits/chosen": -0.32451170682907104, |
| "logits/rejected": -0.33247071504592896, |
| "logps/chosen": -636.5999755859375, |
| "logps/rejected": -602.7999877929688, |
| "loss": 0.4174, |
| "nll_loss": 1.0402343273162842, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": 0.42646485567092896, |
| "rewards/margins": 2.5859375, |
| "rewards/rejected": -2.1615233421325684, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.48128342245989303, |
| "grad_norm": 90.59697238922614, |
| "learning_rate": 2.8862478777589134e-07, |
| "logits/chosen": -0.2081756591796875, |
| "logits/rejected": -0.24516601860523224, |
| "logps/chosen": -705.7999877929688, |
| "logps/rejected": -635.5999755859375, |
| "loss": 0.475, |
| "nll_loss": 0.9750000238418579, |
| "rewards/accuracies": 0.6625000238418579, |
| "rewards/chosen": 0.801025390625, |
| "rewards/margins": 2.24609375, |
| "rewards/rejected": -1.4406249523162842, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.48892284186401835, |
| "grad_norm": 70.07533244919708, |
| "learning_rate": 2.8438030560271646e-07, |
| "logits/chosen": -0.23149414360523224, |
| "logits/rejected": -0.24143067002296448, |
| "logps/chosen": -601.7999877929688, |
| "logps/rejected": -607.0, |
| "loss": 0.4966, |
| "nll_loss": 1.02734375, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": 0.388671875, |
| "rewards/margins": 2.166015625, |
| "rewards/rejected": -1.7734375, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.4965622612681436, |
| "grad_norm": 155.4435454773161, |
| "learning_rate": 2.801358234295416e-07, |
| "logits/chosen": -0.12587890028953552, |
| "logits/rejected": -0.12092284858226776, |
| "logps/chosen": -496.6000061035156, |
| "logps/rejected": -498.20001220703125, |
| "loss": 0.4247, |
| "nll_loss": 0.916015625, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": 0.5431762933731079, |
| "rewards/margins": 2.1624999046325684, |
| "rewards/rejected": -1.619140625, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.5042016806722689, |
| "grad_norm": 262.5809391278708, |
| "learning_rate": 2.758913412563667e-07, |
| "logits/chosen": -0.12770995497703552, |
| "logits/rejected": -0.14760741591453552, |
| "logps/chosen": -530.4000244140625, |
| "logps/rejected": -482.0, |
| "loss": 0.5208, |
| "nll_loss": 0.903515636920929, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": 1.02490234375, |
| "rewards/margins": 1.9609375, |
| "rewards/rejected": -0.9371093511581421, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.5118411000763942, |
| "grad_norm": 116.54142626086018, |
| "learning_rate": 2.7164685908319183e-07, |
| "logits/chosen": -0.09782715141773224, |
| "logits/rejected": -0.11018066108226776, |
| "logps/chosen": -588.2000122070312, |
| "logps/rejected": -562.0, |
| "loss": 0.4879, |
| "nll_loss": 0.98828125, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": 1.236718773841858, |
| "rewards/margins": 2.2178711891174316, |
| "rewards/rejected": -0.982714831829071, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.5194805194805194, |
| "grad_norm": 147.62557960833126, |
| "learning_rate": 2.67402376910017e-07, |
| "logits/chosen": -0.2564453184604645, |
| "logits/rejected": -0.24064941704273224, |
| "logps/chosen": -534.0, |
| "logps/rejected": -507.79998779296875, |
| "loss": 0.3249, |
| "nll_loss": 0.9417968988418579, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": 0.764941394329071, |
| "rewards/margins": 2.680859327316284, |
| "rewards/rejected": -1.916015625, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.5271199388846448, |
| "grad_norm": 73.38717469252549, |
| "learning_rate": 2.631578947368421e-07, |
| "logits/chosen": -0.169677734375, |
| "logits/rejected": -0.21303710341453552, |
| "logps/chosen": -604.7999877929688, |
| "logps/rejected": -589.5999755859375, |
| "loss": 0.5577, |
| "nll_loss": 0.919921875, |
| "rewards/accuracies": 0.6625000238418579, |
| "rewards/chosen": 0.28046876192092896, |
| "rewards/margins": 1.9656250476837158, |
| "rewards/rejected": -1.6857421398162842, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.5347593582887701, |
| "grad_norm": 125.30928463570962, |
| "learning_rate": 2.589134125636672e-07, |
| "logits/chosen": -0.24868163466453552, |
| "logits/rejected": -0.21337890625, |
| "logps/chosen": -669.7999877929688, |
| "logps/rejected": -607.0, |
| "loss": 0.5241, |
| "nll_loss": 0.987109363079071, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": 0.698010265827179, |
| "rewards/margins": 2.317578077316284, |
| "rewards/rejected": -1.62060546875, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.5423987776928954, |
| "grad_norm": 86.47824062520769, |
| "learning_rate": 2.546689303904924e-07, |
| "logits/chosen": -0.15998534858226776, |
| "logits/rejected": -0.13544921576976776, |
| "logps/chosen": -527.0, |
| "logps/rejected": -507.3999938964844, |
| "loss": 0.4433, |
| "nll_loss": 0.8832031488418579, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": 0.833325207233429, |
| "rewards/margins": 1.917578101158142, |
| "rewards/rejected": -1.0823242664337158, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.5500381970970206, |
| "grad_norm": 241.9764128861329, |
| "learning_rate": 2.5042444821731745e-07, |
| "logits/chosen": -0.23203125596046448, |
| "logits/rejected": -0.15019531548023224, |
| "logps/chosen": -623.2000122070312, |
| "logps/rejected": -519.4000244140625, |
| "loss": 0.4256, |
| "nll_loss": 0.934374988079071, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": 0.8759765625, |
| "rewards/margins": 2.4292969703674316, |
| "rewards/rejected": -1.5539062023162842, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.5576776165011459, |
| "grad_norm": 199.54510567975194, |
| "learning_rate": 2.4617996604414257e-07, |
| "logits/chosen": -0.20654296875, |
| "logits/rejected": -0.18228760361671448, |
| "logps/chosen": -569.5999755859375, |
| "logps/rejected": -539.2000122070312, |
| "loss": 0.44, |
| "nll_loss": 0.9097656011581421, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": 0.22563476860523224, |
| "rewards/margins": 2.035937547683716, |
| "rewards/rejected": -1.8076171875, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.5653170359052712, |
| "grad_norm": 57.705062448013685, |
| "learning_rate": 2.4193548387096775e-07, |
| "logits/chosen": -0.29963380098342896, |
| "logits/rejected": -0.2674804627895355, |
| "logps/chosen": -658.4000244140625, |
| "logps/rejected": -642.4000244140625, |
| "loss": 0.395, |
| "nll_loss": 0.998046875, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -0.02570800855755806, |
| "rewards/margins": 2.359375, |
| "rewards/rejected": -2.383593797683716, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.5729564553093965, |
| "grad_norm": 90.1800130721227, |
| "learning_rate": 2.3769100169779285e-07, |
| "logits/chosen": -0.17238768935203552, |
| "logits/rejected": -0.21173095703125, |
| "logps/chosen": -690.4000244140625, |
| "logps/rejected": -671.0, |
| "loss": 0.4136, |
| "nll_loss": 0.9566406011581421, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": 0.26005858182907104, |
| "rewards/margins": 2.177929639816284, |
| "rewards/rejected": -1.918359398841858, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.5805958747135218, |
| "grad_norm": 128.16902765016945, |
| "learning_rate": 2.33446519524618e-07, |
| "logits/chosen": -0.18836669623851776, |
| "logits/rejected": -0.13487549126148224, |
| "logps/chosen": -570.0, |
| "logps/rejected": -552.4000244140625, |
| "loss": 0.4234, |
| "nll_loss": 0.950390636920929, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": 0.518481433391571, |
| "rewards/margins": 2.1351561546325684, |
| "rewards/rejected": -1.615625023841858, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.5882352941176471, |
| "grad_norm": 179.91151487829103, |
| "learning_rate": 2.2920203735144312e-07, |
| "logits/chosen": -0.15025635063648224, |
| "logits/rejected": -0.20454101264476776, |
| "logps/chosen": -590.5999755859375, |
| "logps/rejected": -570.7999877929688, |
| "loss": 0.3452, |
| "nll_loss": 0.905078113079071, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": 1.132714867591858, |
| "rewards/margins": 2.6820311546325684, |
| "rewards/rejected": -1.5529296398162842, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.5958747135217723, |
| "grad_norm": 147.28111171420568, |
| "learning_rate": 2.2495755517826824e-07, |
| "logits/chosen": -0.2244003266096115, |
| "logits/rejected": -0.20163574814796448, |
| "logps/chosen": -580.0, |
| "logps/rejected": -517.0, |
| "loss": 0.4638, |
| "nll_loss": 0.9203125238418579, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": 1.217919945716858, |
| "rewards/margins": 2.2210936546325684, |
| "rewards/rejected": -1.00244140625, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.6035141329258976, |
| "grad_norm": 148.2346332858827, |
| "learning_rate": 2.2071307300509337e-07, |
| "logits/chosen": -0.2839111387729645, |
| "logits/rejected": -0.26567381620407104, |
| "logps/chosen": -589.5999755859375, |
| "logps/rejected": -557.0, |
| "loss": 0.3842, |
| "nll_loss": 0.866406261920929, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": 0.83984375, |
| "rewards/margins": 2.340625047683716, |
| "rewards/rejected": -1.5037109851837158, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.6111535523300229, |
| "grad_norm": 88.75284081371973, |
| "learning_rate": 2.164685908319185e-07, |
| "logits/chosen": -0.23496094346046448, |
| "logits/rejected": -0.19589844346046448, |
| "logps/chosen": -686.0, |
| "logps/rejected": -661.2000122070312, |
| "loss": 0.4166, |
| "nll_loss": 1.068359375, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": 1.013671875, |
| "rewards/margins": 2.171875, |
| "rewards/rejected": -1.1590087413787842, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.6187929717341482, |
| "grad_norm": 145.23948778484626, |
| "learning_rate": 2.1222410865874364e-07, |
| "logits/chosen": -0.20212402939796448, |
| "logits/rejected": -0.20252685248851776, |
| "logps/chosen": -539.0, |
| "logps/rejected": -484.6000061035156, |
| "loss": 0.3318, |
| "nll_loss": 0.8890625238418579, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": 0.648242175579071, |
| "rewards/margins": 2.7249999046325684, |
| "rewards/rejected": -2.0816407203674316, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.6264323911382735, |
| "grad_norm": 128.52828674770328, |
| "learning_rate": 2.0797962648556874e-07, |
| "logits/chosen": -0.11715392768383026, |
| "logits/rejected": -0.13743896782398224, |
| "logps/chosen": -547.0, |
| "logps/rejected": -538.2000122070312, |
| "loss": 0.4387, |
| "nll_loss": 0.813671886920929, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": 1.023046851158142, |
| "rewards/margins": 2.3441405296325684, |
| "rewards/rejected": -1.322265625, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.6340718105423988, |
| "grad_norm": 144.47861170444614, |
| "learning_rate": 2.037351443123939e-07, |
| "logits/chosen": -0.06186523288488388, |
| "logits/rejected": -0.10273437201976776, |
| "logps/chosen": -533.4000244140625, |
| "logps/rejected": -495.0, |
| "loss": 0.3348, |
| "nll_loss": 0.8785156011581421, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": 0.628613293170929, |
| "rewards/margins": 2.4515624046325684, |
| "rewards/rejected": -1.8227050304412842, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.6417112299465241, |
| "grad_norm": 231.8284171525199, |
| "learning_rate": 1.99490662139219e-07, |
| "logits/chosen": -0.2966064512729645, |
| "logits/rejected": -0.3492675721645355, |
| "logps/chosen": -563.2000122070312, |
| "logps/rejected": -554.5999755859375, |
| "loss": 0.4784, |
| "nll_loss": 1.038671851158142, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": 0.3266967833042145, |
| "rewards/margins": 2.3394532203674316, |
| "rewards/rejected": -2.016064405441284, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.6493506493506493, |
| "grad_norm": 95.18920210483807, |
| "learning_rate": 1.9524617996604413e-07, |
| "logits/chosen": -0.2720703184604645, |
| "logits/rejected": -0.21528320014476776, |
| "logps/chosen": -639.4000244140625, |
| "logps/rejected": -599.4000244140625, |
| "loss": 0.4277, |
| "nll_loss": 0.913281261920929, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": 1.0625, |
| "rewards/margins": 2.268261671066284, |
| "rewards/rejected": -1.204687476158142, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.6569900687547746, |
| "grad_norm": 179.17446116841967, |
| "learning_rate": 1.9100169779286926e-07, |
| "logits/chosen": -0.28715819120407104, |
| "logits/rejected": -0.2583984434604645, |
| "logps/chosen": -548.5999755859375, |
| "logps/rejected": -489.3999938964844, |
| "loss": 0.4577, |
| "nll_loss": 0.846875011920929, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": 1.1994140148162842, |
| "rewards/margins": 2.366015672683716, |
| "rewards/rejected": -1.167578101158142, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.6646294881588999, |
| "grad_norm": 176.32245150389784, |
| "learning_rate": 1.867572156196944e-07, |
| "logits/chosen": -0.2516418397426605, |
| "logits/rejected": -0.24804076552391052, |
| "logps/chosen": -462.0, |
| "logps/rejected": -476.0, |
| "loss": 0.5009, |
| "nll_loss": 0.921875, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": 0.588671863079071, |
| "rewards/margins": 2.198046922683716, |
| "rewards/rejected": -1.612036108970642, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.6722689075630253, |
| "grad_norm": 154.3222829630188, |
| "learning_rate": 1.825127334465195e-07, |
| "logits/chosen": -0.10764160007238388, |
| "logits/rejected": -0.10710449516773224, |
| "logps/chosen": -545.4000244140625, |
| "logps/rejected": -537.0, |
| "loss": 0.4212, |
| "nll_loss": 0.8804687261581421, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": 0.8570312261581421, |
| "rewards/margins": 2.375781297683716, |
| "rewards/rejected": -1.517333984375, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.6799083269671505, |
| "grad_norm": 90.0921653237205, |
| "learning_rate": 1.7826825127334463e-07, |
| "logits/chosen": -0.2685546875, |
| "logits/rejected": -0.24018554389476776, |
| "logps/chosen": -573.0, |
| "logps/rejected": -550.0, |
| "loss": 0.3369, |
| "nll_loss": 0.922656238079071, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": 0.4517578184604645, |
| "rewards/margins": 2.914843797683716, |
| "rewards/rejected": -2.46484375, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.6875477463712758, |
| "grad_norm": 90.60087330499096, |
| "learning_rate": 1.7402376910016978e-07, |
| "logits/chosen": -0.17197266221046448, |
| "logits/rejected": -0.16635742783546448, |
| "logps/chosen": -646.5999755859375, |
| "logps/rejected": -654.0, |
| "loss": 0.5631, |
| "nll_loss": 1.0390625, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": 0.09462890774011612, |
| "rewards/margins": 2.5414061546325684, |
| "rewards/rejected": -2.447265625, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.6951871657754011, |
| "grad_norm": 411.9292153674767, |
| "learning_rate": 1.697792869269949e-07, |
| "logits/chosen": -0.21950682997703552, |
| "logits/rejected": -0.14067383110523224, |
| "logps/chosen": -782.4000244140625, |
| "logps/rejected": -746.5999755859375, |
| "loss": 0.5727, |
| "nll_loss": 0.967968761920929, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": 0.3875488340854645, |
| "rewards/margins": 1.941015601158142, |
| "rewards/rejected": -1.554296851158142, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.7028265851795263, |
| "grad_norm": 96.59177093218652, |
| "learning_rate": 1.6553480475382003e-07, |
| "logits/chosen": -0.17082519829273224, |
| "logits/rejected": -0.24100342392921448, |
| "logps/chosen": -523.4000244140625, |
| "logps/rejected": -496.79998779296875, |
| "loss": 0.4115, |
| "nll_loss": 0.9515625238418579, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": 0.02167968824505806, |
| "rewards/margins": 2.335156202316284, |
| "rewards/rejected": -2.3140625953674316, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.7104660045836516, |
| "grad_norm": 114.00603134519599, |
| "learning_rate": 1.6129032258064515e-07, |
| "logits/chosen": -0.0623779296875, |
| "logits/rejected": -0.03818359225988388, |
| "logps/chosen": -604.7999877929688, |
| "logps/rejected": -563.4000244140625, |
| "loss": 0.374, |
| "nll_loss": 0.859375, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": 0.599719226360321, |
| "rewards/margins": 2.612499952316284, |
| "rewards/rejected": -2.0126953125, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.7181054239877769, |
| "grad_norm": 46.449877624622495, |
| "learning_rate": 1.570458404074703e-07, |
| "logits/chosen": -0.20942382514476776, |
| "logits/rejected": -0.23570556938648224, |
| "logps/chosen": -527.4000244140625, |
| "logps/rejected": -528.4000244140625, |
| "loss": 0.3851, |
| "nll_loss": 0.9398437738418579, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": 0.19345703721046448, |
| "rewards/margins": 2.53125, |
| "rewards/rejected": -2.3375000953674316, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.7257448433919023, |
| "grad_norm": 154.8870158247193, |
| "learning_rate": 1.528013582342954e-07, |
| "logits/chosen": -0.17775878310203552, |
| "logits/rejected": -0.15126952528953552, |
| "logps/chosen": -644.0, |
| "logps/rejected": -595.4000244140625, |
| "loss": 0.3593, |
| "nll_loss": 0.948046863079071, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": 0.614453136920929, |
| "rewards/margins": 2.4710936546325684, |
| "rewards/rejected": -1.8585937023162842, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.7333842627960275, |
| "grad_norm": 60.309689191140706, |
| "learning_rate": 1.4855687606112052e-07, |
| "logits/chosen": -0.20181885361671448, |
| "logits/rejected": -0.17143554985523224, |
| "logps/chosen": -614.2000122070312, |
| "logps/rejected": -502.20001220703125, |
| "loss": 0.2511, |
| "nll_loss": 0.9703124761581421, |
| "rewards/accuracies": 0.887499988079071, |
| "rewards/chosen": 0.37763673067092896, |
| "rewards/margins": 2.9703125953674316, |
| "rewards/rejected": -2.592968702316284, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.7410236822001528, |
| "grad_norm": 93.06951737576422, |
| "learning_rate": 1.4431239388794567e-07, |
| "logits/chosen": -0.11916504055261612, |
| "logits/rejected": -0.12851563096046448, |
| "logps/chosen": -522.0, |
| "logps/rejected": -478.20001220703125, |
| "loss": 0.3418, |
| "nll_loss": 0.948046863079071, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": 0.33341675996780396, |
| "rewards/margins": 2.641406297683716, |
| "rewards/rejected": -2.311718702316284, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.7486631016042781, |
| "grad_norm": 86.2530129397837, |
| "learning_rate": 1.400679117147708e-07, |
| "logits/chosen": -0.26170653104782104, |
| "logits/rejected": -0.27277833223342896, |
| "logps/chosen": -557.7999877929688, |
| "logps/rejected": -540.5999755859375, |
| "loss": 0.3527, |
| "nll_loss": 0.98046875, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": 0.4949707090854645, |
| "rewards/margins": 2.564453125, |
| "rewards/rejected": -2.0708985328674316, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.7563025210084033, |
| "grad_norm": 87.12906010791761, |
| "learning_rate": 1.3582342954159592e-07, |
| "logits/chosen": -0.26884764432907104, |
| "logits/rejected": -0.22690430283546448, |
| "logps/chosen": -634.4000244140625, |
| "logps/rejected": -615.5999755859375, |
| "loss": 0.3518, |
| "nll_loss": 1.00390625, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": 0.05859375, |
| "rewards/margins": 3.135937452316284, |
| "rewards/rejected": -3.0796875953674316, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.7639419404125286, |
| "grad_norm": 245.3625476453072, |
| "learning_rate": 1.3157894736842104e-07, |
| "logits/chosen": -0.20460815727710724, |
| "logits/rejected": -0.16091307997703552, |
| "logps/chosen": -667.5999755859375, |
| "logps/rejected": -630.4000244140625, |
| "loss": 0.5093, |
| "nll_loss": 0.916015625, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -0.29121094942092896, |
| "rewards/margins": 2.0160155296325684, |
| "rewards/rejected": -2.307812452316284, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.771581359816654, |
| "grad_norm": 124.28190362733702, |
| "learning_rate": 1.273344651952462e-07, |
| "logits/chosen": -0.11392822116613388, |
| "logits/rejected": -0.11372070014476776, |
| "logps/chosen": -574.0, |
| "logps/rejected": -589.5999755859375, |
| "loss": 0.2951, |
| "nll_loss": 0.9234374761581421, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -0.04301757737994194, |
| "rewards/margins": 2.9124999046325684, |
| "rewards/rejected": -2.962890625, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.7792207792207793, |
| "grad_norm": 100.0627026970796, |
| "learning_rate": 1.2308998302207129e-07, |
| "logits/chosen": -0.3117919862270355, |
| "logits/rejected": -0.27192384004592896, |
| "logps/chosen": -643.2000122070312, |
| "logps/rejected": -626.4000244140625, |
| "loss": 0.4462, |
| "nll_loss": 0.983203113079071, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": 0.3858398497104645, |
| "rewards/margins": 2.7281250953674316, |
| "rewards/rejected": -2.3414063453674316, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.7868601986249045, |
| "grad_norm": 99.33843951990231, |
| "learning_rate": 1.1884550084889642e-07, |
| "logits/chosen": -0.20637206733226776, |
| "logits/rejected": -0.17661133408546448, |
| "logps/chosen": -621.0, |
| "logps/rejected": -586.4000244140625, |
| "loss": 0.4063, |
| "nll_loss": 0.924609363079071, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": 0.1715087890625, |
| "rewards/margins": 2.515625, |
| "rewards/rejected": -2.344531297683716, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.7944996180290298, |
| "grad_norm": 173.70184429546075, |
| "learning_rate": 1.1460101867572156e-07, |
| "logits/chosen": -0.15251465141773224, |
| "logits/rejected": -0.12940673530101776, |
| "logps/chosen": -624.5999755859375, |
| "logps/rejected": -595.0, |
| "loss": 0.294, |
| "nll_loss": 0.928906261920929, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": 0.10312499850988388, |
| "rewards/margins": 2.552734375, |
| "rewards/rejected": -2.452343702316284, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.8021390374331551, |
| "grad_norm": 112.58127449933384, |
| "learning_rate": 1.1035653650254668e-07, |
| "logits/chosen": -0.202392578125, |
| "logits/rejected": -0.167236328125, |
| "logps/chosen": -592.7999877929688, |
| "logps/rejected": -562.0, |
| "loss": 0.4084, |
| "nll_loss": 0.884765625, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": 0.49921876192092896, |
| "rewards/margins": 2.3228516578674316, |
| "rewards/rejected": -1.8244140148162842, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.8097784568372803, |
| "grad_norm": 13.477126429230648, |
| "learning_rate": 1.0611205432937182e-07, |
| "logits/chosen": -0.06467285007238388, |
| "logits/rejected": -0.10791015625, |
| "logps/chosen": -637.5999755859375, |
| "logps/rejected": -594.4000244140625, |
| "loss": 0.4124, |
| "nll_loss": 0.9261718988418579, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": 0.5888671875, |
| "rewards/margins": 2.6421875953674316, |
| "rewards/rejected": -2.053906202316284, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.8174178762414056, |
| "grad_norm": 146.54916411561834, |
| "learning_rate": 1.0186757215619694e-07, |
| "logits/chosen": -0.02625732496380806, |
| "logits/rejected": -0.01333007775247097, |
| "logps/chosen": -607.2000122070312, |
| "logps/rejected": -569.0, |
| "loss": 0.3095, |
| "nll_loss": 0.8355468511581421, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": 0.664257824420929, |
| "rewards/margins": 2.471874952316284, |
| "rewards/rejected": -1.8097655773162842, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.825057295645531, |
| "grad_norm": 27.03231972763934, |
| "learning_rate": 9.762308998302207e-08, |
| "logits/chosen": -0.2533203065395355, |
| "logits/rejected": -0.19873046875, |
| "logps/chosen": -530.2000122070312, |
| "logps/rejected": -508.79998779296875, |
| "loss": 0.363, |
| "nll_loss": 0.925000011920929, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": 0.32301026582717896, |
| "rewards/margins": 2.681640625, |
| "rewards/rejected": -2.3609375953674316, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.8326967150496563, |
| "grad_norm": 95.08593550485008, |
| "learning_rate": 9.33786078098472e-08, |
| "logits/chosen": -0.113616943359375, |
| "logits/rejected": -0.18590088188648224, |
| "logps/chosen": -522.0, |
| "logps/rejected": -463.0, |
| "loss": 0.3409, |
| "nll_loss": 0.901171863079071, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": 0.597607433795929, |
| "rewards/margins": 2.7945313453674316, |
| "rewards/rejected": -2.1976561546325684, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.8403361344537815, |
| "grad_norm": 43.42987904751351, |
| "learning_rate": 8.913412563667231e-08, |
| "logits/chosen": -0.26826173067092896, |
| "logits/rejected": -0.2728515565395355, |
| "logps/chosen": -591.4000244140625, |
| "logps/rejected": -585.5999755859375, |
| "loss": 0.4074, |
| "nll_loss": 0.938281238079071, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": 0.4159179627895355, |
| "rewards/margins": 2.444531202316284, |
| "rewards/rejected": -2.027539014816284, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.8479755538579068, |
| "grad_norm": 130.2620800651245, |
| "learning_rate": 8.488964346349745e-08, |
| "logits/chosen": -0.11006774753332138, |
| "logits/rejected": -0.13276366889476776, |
| "logps/chosen": -505.20001220703125, |
| "logps/rejected": -493.0, |
| "loss": 0.3638, |
| "nll_loss": 0.8988281488418579, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": 0.3427734375, |
| "rewards/margins": 2.474609375, |
| "rewards/rejected": -2.1343750953674316, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.8556149732620321, |
| "grad_norm": 258.0177244688908, |
| "learning_rate": 8.064516129032257e-08, |
| "logits/chosen": -0.17624512314796448, |
| "logits/rejected": -0.15255126357078552, |
| "logps/chosen": -585.5999755859375, |
| "logps/rejected": -592.5999755859375, |
| "loss": 0.4054, |
| "nll_loss": 0.9449218511581421, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": 0.28657227754592896, |
| "rewards/margins": 2.617968797683716, |
| "rewards/rejected": -2.334765672683716, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.8632543926661573, |
| "grad_norm": 167.06471829769077, |
| "learning_rate": 7.64006791171477e-08, |
| "logits/chosen": -0.01499023474752903, |
| "logits/rejected": -0.0026123046409338713, |
| "logps/chosen": -526.7999877929688, |
| "logps/rejected": -518.7999877929688, |
| "loss": 0.394, |
| "nll_loss": 0.834765613079071, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": -0.09824218600988388, |
| "rewards/margins": 2.207812547683716, |
| "rewards/rejected": -2.3070311546325684, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.8708938120702827, |
| "grad_norm": 203.27949384009779, |
| "learning_rate": 7.215619694397283e-08, |
| "logits/chosen": -0.04530029371380806, |
| "logits/rejected": -0.010485840030014515, |
| "logps/chosen": -570.4000244140625, |
| "logps/rejected": -554.0, |
| "loss": 0.3578, |
| "nll_loss": 0.8656250238418579, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": 0.8838866949081421, |
| "rewards/margins": 2.2896485328674316, |
| "rewards/rejected": -1.40283203125, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.878533231474408, |
| "grad_norm": 91.28683779041214, |
| "learning_rate": 6.791171477079796e-08, |
| "logits/chosen": -0.158447265625, |
| "logits/rejected": -0.15849609673023224, |
| "logps/chosen": -659.4000244140625, |
| "logps/rejected": -620.0, |
| "loss": 0.3552, |
| "nll_loss": 1.006250023841858, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": 1.120996117591858, |
| "rewards/margins": 2.5785155296325684, |
| "rewards/rejected": -1.453710913658142, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.8861726508785333, |
| "grad_norm": 165.0639361803876, |
| "learning_rate": 6.36672325976231e-08, |
| "logits/chosen": -0.17514649033546448, |
| "logits/rejected": -0.22218628227710724, |
| "logps/chosen": -545.5999755859375, |
| "logps/rejected": -525.5999755859375, |
| "loss": 0.3801, |
| "nll_loss": 0.984375, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": 0.7798827886581421, |
| "rewards/margins": 2.7671875953674316, |
| "rewards/rejected": -1.98876953125, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.8938120702826585, |
| "grad_norm": 70.36211868043284, |
| "learning_rate": 5.942275042444821e-08, |
| "logits/chosen": -0.07485046237707138, |
| "logits/rejected": -0.07871093600988388, |
| "logps/chosen": -517.7999877929688, |
| "logps/rejected": -501.20001220703125, |
| "loss": 0.4365, |
| "nll_loss": 0.889453113079071, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": 0.19501952826976776, |
| "rewards/margins": 2.075390577316284, |
| "rewards/rejected": -1.877343773841858, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.9014514896867838, |
| "grad_norm": 46.76470844821065, |
| "learning_rate": 5.517826825127334e-08, |
| "logits/chosen": -0.21860352158546448, |
| "logits/rejected": -0.21206054091453552, |
| "logps/chosen": -566.5999755859375, |
| "logps/rejected": -548.0, |
| "loss": 0.452, |
| "nll_loss": 0.876953125, |
| "rewards/accuracies": 0.6625000238418579, |
| "rewards/chosen": 0.6045898199081421, |
| "rewards/margins": 1.9511229991912842, |
| "rewards/rejected": -1.346093773841858, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.9090909090909091, |
| "grad_norm": 132.73200075615364, |
| "learning_rate": 5.093378607809847e-08, |
| "logits/chosen": -0.13608399033546448, |
| "logits/rejected": -0.16939696669578552, |
| "logps/chosen": -535.0, |
| "logps/rejected": -506.20001220703125, |
| "loss": 0.3551, |
| "nll_loss": 0.946093738079071, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": 0.880932629108429, |
| "rewards/margins": 2.707812547683716, |
| "rewards/rejected": -1.8278319835662842, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.9167303284950343, |
| "grad_norm": 141.55461267715054, |
| "learning_rate": 4.66893039049236e-08, |
| "logits/chosen": -0.20478515326976776, |
| "logits/rejected": -0.16940918564796448, |
| "logps/chosen": -531.7999877929688, |
| "logps/rejected": -499.6000061035156, |
| "loss": 0.3211, |
| "nll_loss": 0.9398437738418579, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": 0.8980468511581421, |
| "rewards/margins": 2.780468702316284, |
| "rewards/rejected": -1.885156273841858, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.9243697478991597, |
| "grad_norm": 188.7068399957901, |
| "learning_rate": 4.2444821731748725e-08, |
| "logits/chosen": -0.2447509765625, |
| "logits/rejected": -0.19533690810203552, |
| "logps/chosen": -581.2000122070312, |
| "logps/rejected": -552.2000122070312, |
| "loss": 0.3269, |
| "nll_loss": 0.961718738079071, |
| "rewards/accuracies": 0.8374999761581421, |
| "rewards/chosen": 0.3602050840854645, |
| "rewards/margins": 2.5765624046325684, |
| "rewards/rejected": -2.2173829078674316, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.932009167303285, |
| "grad_norm": 104.70717258392624, |
| "learning_rate": 3.820033955857385e-08, |
| "logits/chosen": -0.21748046576976776, |
| "logits/rejected": -0.20539550483226776, |
| "logps/chosen": -736.5999755859375, |
| "logps/rejected": -709.7999877929688, |
| "loss": 0.4171, |
| "nll_loss": 1.0066406726837158, |
| "rewards/accuracies": 0.675000011920929, |
| "rewards/chosen": 0.4925781190395355, |
| "rewards/margins": 2.092578172683716, |
| "rewards/rejected": -1.601171851158142, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.9396485867074102, |
| "grad_norm": 120.69266491201337, |
| "learning_rate": 3.395585738539898e-08, |
| "logits/chosen": -0.16705322265625, |
| "logits/rejected": -0.20624999701976776, |
| "logps/chosen": -546.4000244140625, |
| "logps/rejected": -513.5999755859375, |
| "loss": 0.5148, |
| "nll_loss": 0.892578125, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": 0.23613281548023224, |
| "rewards/margins": 2.397656202316284, |
| "rewards/rejected": -2.1617188453674316, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.9472880061115355, |
| "grad_norm": 89.80554106184103, |
| "learning_rate": 2.9711375212224106e-08, |
| "logits/chosen": -0.22031250596046448, |
| "logits/rejected": -0.26923829317092896, |
| "logps/chosen": -567.2000122070312, |
| "logps/rejected": -513.0, |
| "loss": 0.3652, |
| "nll_loss": 0.949999988079071, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": 0.590991199016571, |
| "rewards/margins": 2.520312547683716, |
| "rewards/rejected": -1.9285156726837158, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.9549274255156608, |
| "grad_norm": 68.44803041900859, |
| "learning_rate": 2.5466893039049236e-08, |
| "logits/chosen": -0.10751952975988388, |
| "logits/rejected": -0.10664062201976776, |
| "logps/chosen": -557.4000244140625, |
| "logps/rejected": -524.0, |
| "loss": 0.3961, |
| "nll_loss": 0.864062488079071, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": 0.5328124761581421, |
| "rewards/margins": 2.385009765625, |
| "rewards/rejected": -1.853906273841858, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.9625668449197861, |
| "grad_norm": 77.86404690618204, |
| "learning_rate": 2.1222410865874363e-08, |
| "logits/chosen": -0.23062744736671448, |
| "logits/rejected": -0.19792480766773224, |
| "logps/chosen": -579.0, |
| "logps/rejected": -549.4000244140625, |
| "loss": 0.3363, |
| "nll_loss": 0.908984363079071, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": 0.6390625238418579, |
| "rewards/margins": 2.806640625, |
| "rewards/rejected": -2.1689453125, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.9702062643239114, |
| "grad_norm": 92.2508879537955, |
| "learning_rate": 1.697792869269949e-08, |
| "logits/chosen": -0.243896484375, |
| "logits/rejected": -0.2039794921875, |
| "logps/chosen": -596.0, |
| "logps/rejected": -552.0, |
| "loss": 0.442, |
| "nll_loss": 0.9007812738418579, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": 0.3742431700229645, |
| "rewards/margins": 2.436718702316284, |
| "rewards/rejected": -2.05859375, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.9778456837280367, |
| "grad_norm": 99.79262236865266, |
| "learning_rate": 1.2733446519524618e-08, |
| "logits/chosen": -0.23527832329273224, |
| "logits/rejected": -0.218994140625, |
| "logps/chosen": -552.2000122070312, |
| "logps/rejected": -540.0, |
| "loss": 0.4244, |
| "nll_loss": 0.8421875238418579, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": 0.4585937559604645, |
| "rewards/margins": 2.216992139816284, |
| "rewards/rejected": -1.75732421875, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.985485103132162, |
| "grad_norm": 140.72925309996154, |
| "learning_rate": 8.488964346349745e-09, |
| "logits/chosen": -0.2117919921875, |
| "logits/rejected": -0.18214111030101776, |
| "logps/chosen": -638.5999755859375, |
| "logps/rejected": -619.7999877929688, |
| "loss": 0.4388, |
| "nll_loss": 0.943359375, |
| "rewards/accuracies": 0.6625000238418579, |
| "rewards/chosen": 0.4850097596645355, |
| "rewards/margins": 2.5679688453674316, |
| "rewards/rejected": -2.085156202316284, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.9931245225362872, |
| "grad_norm": 49.408062951712566, |
| "learning_rate": 4.244482173174872e-09, |
| "logits/chosen": -0.09760741889476776, |
| "logits/rejected": -0.031402587890625, |
| "logps/chosen": -593.2000122070312, |
| "logps/rejected": -587.0, |
| "loss": 0.4112, |
| "nll_loss": 0.869140625, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": 0.6004394292831421, |
| "rewards/margins": 2.287890672683716, |
| "rewards/rejected": -1.6857421398162842, |
| "step": 1300 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_logits/chosen": -0.10636549443006516, |
| "eval_logits/rejected": -0.1420053392648697, |
| "eval_logps/chosen": -509.3846130371094, |
| "eval_logps/rejected": -480.6153869628906, |
| "eval_loss": 0.3973870873451233, |
| "eval_nll_loss": 0.9269831776618958, |
| "eval_rewards/accuracies": 0.7211538553237915, |
| "eval_rewards/chosen": 0.2996356785297394, |
| "eval_rewards/margins": 2.412559986114502, |
| "eval_rewards/rejected": -2.112868070602417, |
| "eval_runtime": 8.7861, |
| "eval_samples_per_second": 11.382, |
| "eval_steps_per_second": 1.48, |
| "step": 1309 |
| }, |
| { |
| "epoch": 1.0, |
| "step": 1309, |
| "total_flos": 0.0, |
| "train_loss": 0.4408511233930828, |
| "train_runtime": 2418.9161, |
| "train_samples_per_second": 4.327, |
| "train_steps_per_second": 0.541 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 1309, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|