Invalid JSON:
Unexpected token 'N', ..."ad_norm": NaN,
"... is not valid JSON
| { | |
| "best_metric": 21.83156394958496, | |
| "best_model_checkpoint": "./output/checkpoints/2024-05-27_09-04-31/checkpoint-100", | |
| "epoch": 1.0, | |
| "eval_steps": 100, | |
| "global_step": 198, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.025252525252525252, | |
| "grad_norm": 26.353445053100586, | |
| "learning_rate": 4.000000000000001e-06, | |
| "logits/chosen": 0.14427797496318817, | |
| "logits/rejected": -0.5873457193374634, | |
| "logps/chosen": -0.901843249797821, | |
| "logps/rejected": -1.3607301712036133, | |
| "loss": 24.9998, | |
| "rewards/accuracies": 0.21250000596046448, | |
| "rewards/chosen": -1.578416777192615e-05, | |
| "rewards/margins": 2.430938138786587e-06, | |
| "rewards/rejected": -1.8215103409602307e-05, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.050505050505050504, | |
| "grad_norm": NaN, | |
| "learning_rate": 8.000000000000001e-06, | |
| "logits/chosen": -0.10329052060842514, | |
| "logits/rejected": -0.4683811664581299, | |
| "logps/chosen": -0.9063997268676758, | |
| "logps/rejected": -1.461859107017517, | |
| "loss": 24.9337, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.00036737616755999625, | |
| "rewards/margins": 0.0006723683327436447, | |
| "rewards/rejected": -0.0010397445876151323, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.07575757575757576, | |
| "grad_norm": 13.749723434448242, | |
| "learning_rate": 1.3000000000000001e-05, | |
| "logits/chosen": -0.2425023317337036, | |
| "logits/rejected": -0.6693668365478516, | |
| "logps/chosen": -0.8707982897758484, | |
| "logps/rejected": -1.1566194295883179, | |
| "loss": 24.9041, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -0.0014928742311894894, | |
| "rewards/margins": 0.0009821585845202208, | |
| "rewards/rejected": -0.00247503281570971, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.10101010101010101, | |
| "grad_norm": 25.53832244873047, | |
| "learning_rate": 1.8e-05, | |
| "logits/chosen": -0.46215763688087463, | |
| "logits/rejected": -0.9008939862251282, | |
| "logps/chosen": -0.959465503692627, | |
| "logps/rejected": -1.5446056127548218, | |
| "loss": 24.2631, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.0067633287981152534, | |
| "rewards/margins": 0.007808461785316467, | |
| "rewards/rejected": -0.014571788720786572, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.12626262626262627, | |
| "grad_norm": 45.06657791137695, | |
| "learning_rate": 1.9985985720017786e-05, | |
| "logits/chosen": -0.04087737202644348, | |
| "logits/rejected": -0.5188297033309937, | |
| "logps/chosen": -0.9965022802352905, | |
| "logps/rejected": -1.3733254671096802, | |
| "loss": 24.1692, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.014551234431564808, | |
| "rewards/margins": 0.009625318460166454, | |
| "rewards/rejected": -0.024176552891731262, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.15151515151515152, | |
| "grad_norm": 28.255924224853516, | |
| "learning_rate": 1.9900485105144544e-05, | |
| "logits/chosen": -0.14505064487457275, | |
| "logits/rejected": -0.5278365015983582, | |
| "logps/chosen": -1.0397828817367554, | |
| "logps/rejected": -1.44753897190094, | |
| "loss": 24.1349, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.018694132566452026, | |
| "rewards/margins": 0.01500606257468462, | |
| "rewards/rejected": -0.03370019793510437, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.17676767676767677, | |
| "grad_norm": NaN, | |
| "learning_rate": 1.9776556239997146e-05, | |
| "logits/chosen": -0.4809038043022156, | |
| "logits/rejected": -0.9093053936958313, | |
| "logps/chosen": -1.3904650211334229, | |
| "logps/rejected": -2.406257390975952, | |
| "loss": 23.5774, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -0.04243111237883568, | |
| "rewards/margins": 0.052741266787052155, | |
| "rewards/rejected": -0.09517236799001694, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.20202020202020202, | |
| "grad_norm": 46.83095932006836, | |
| "learning_rate": 1.955324742088516e-05, | |
| "logits/chosen": -0.6266540288925171, | |
| "logits/rejected": -1.0290076732635498, | |
| "logps/chosen": -1.2514160871505737, | |
| "logps/rejected": -2.1771531105041504, | |
| "loss": 22.3291, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.03719799965620041, | |
| "rewards/margins": 0.04375718533992767, | |
| "rewards/rejected": -0.08095519244670868, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.22727272727272727, | |
| "grad_norm": 76.44580841064453, | |
| "learning_rate": 1.9255590665712214e-05, | |
| "logits/chosen": -0.6130943894386292, | |
| "logits/rejected": -1.143413782119751, | |
| "logps/chosen": -1.5433876514434814, | |
| "logps/rejected": -2.6532750129699707, | |
| "loss": 21.656, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.05903216451406479, | |
| "rewards/margins": 0.05665038153529167, | |
| "rewards/rejected": -0.11568254232406616, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.25252525252525254, | |
| "grad_norm": 78.35297393798828, | |
| "learning_rate": 1.8965472436868288e-05, | |
| "logits/chosen": -0.757357656955719, | |
| "logits/rejected": -1.0666834115982056, | |
| "logps/chosen": -1.3742765188217163, | |
| "logps/rejected": -3.0053694248199463, | |
| "loss": 22.6627, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": -0.05383248254656792, | |
| "rewards/margins": 0.09586457908153534, | |
| "rewards/rejected": -0.14969706535339355, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.2777777777777778, | |
| "grad_norm": 82.29180145263672, | |
| "learning_rate": 1.8540204424421264e-05, | |
| "logits/chosen": -0.8564749956130981, | |
| "logits/rejected": -1.3737789392471313, | |
| "logps/chosen": -1.733337163925171, | |
| "logps/rejected": -3.3698067665100098, | |
| "loss": 19.3611, | |
| "rewards/accuracies": 0.8374999761581421, | |
| "rewards/chosen": -0.08400858938694, | |
| "rewards/margins": 0.10341653972864151, | |
| "rewards/rejected": -0.18742512166500092, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.30303030303030304, | |
| "grad_norm": 128.23907470703125, | |
| "learning_rate": 1.804847246055326e-05, | |
| "logits/chosen": -0.9640189409255981, | |
| "logits/rejected": -1.1732914447784424, | |
| "logps/chosen": -2.538499593734741, | |
| "logps/rejected": -3.0090465545654297, | |
| "loss": 29.7881, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -0.14390432834625244, | |
| "rewards/margins": 0.02762184664607048, | |
| "rewards/rejected": -0.17152616381645203, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.3282828282828283, | |
| "grad_norm": 49.566158294677734, | |
| "learning_rate": 1.7494103438361252e-05, | |
| "logits/chosen": -0.7158849239349365, | |
| "logits/rejected": -1.0623328685760498, | |
| "logps/chosen": -1.4396604299545288, | |
| "logps/rejected": -2.000624179840088, | |
| "loss": 21.9915, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -0.06708293408155441, | |
| "rewards/margins": 0.03939032554626465, | |
| "rewards/rejected": -0.10647325217723846, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.35353535353535354, | |
| "grad_norm": 76.89603424072266, | |
| "learning_rate": 1.6881411722458688e-05, | |
| "logits/chosen": -0.8769875764846802, | |
| "logits/rejected": -1.1103827953338623, | |
| "logps/chosen": -2.1615917682647705, | |
| "logps/rejected": -3.0439255237579346, | |
| "loss": 21.986, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.12244679778814316, | |
| "rewards/margins": 0.06274138391017914, | |
| "rewards/rejected": -0.1851881742477417, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.3787878787878788, | |
| "grad_norm": 134.9673309326172, | |
| "learning_rate": 1.6215165572528598e-05, | |
| "logits/chosen": -1.343915581703186, | |
| "logits/rejected": -1.401227355003357, | |
| "logps/chosen": -2.2325069904327393, | |
| "logps/rejected": -3.132831573486328, | |
| "loss": 21.2106, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.13722343742847443, | |
| "rewards/margins": 0.06202084943652153, | |
| "rewards/rejected": -0.19924426078796387, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.40404040404040403, | |
| "grad_norm": 137.21859741210938, | |
| "learning_rate": 1.5500550034448415e-05, | |
| "logits/chosen": -1.3024094104766846, | |
| "logits/rejected": -1.5494719743728638, | |
| "logps/chosen": -2.4210548400878906, | |
| "logps/rejected": -3.3495230674743652, | |
| "loss": 23.1095, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.15112502872943878, | |
| "rewards/margins": 0.07425413280725479, | |
| "rewards/rejected": -0.22537918388843536, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.4292929292929293, | |
| "grad_norm": 351.0603942871094, | |
| "learning_rate": 1.5050862598575474e-05, | |
| "logits/chosen": -1.311993956565857, | |
| "logits/rejected": -1.6289136409759521, | |
| "logps/chosen": -2.586198091506958, | |
| "logps/rejected": -5.161986827850342, | |
| "loss": 25.0728, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -0.16497337818145752, | |
| "rewards/margins": 0.17613837122917175, | |
| "rewards/rejected": -0.3411117494106293, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.45454545454545453, | |
| "grad_norm": 376.21038818359375, | |
| "learning_rate": 1.4270564388663761e-05, | |
| "logits/chosen": -1.4695305824279785, | |
| "logits/rejected": -1.5699679851531982, | |
| "logps/chosen": -3.0274829864501953, | |
| "logps/rejected": -3.7816379070281982, | |
| "loss": 24.3757, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.20180432498455048, | |
| "rewards/margins": 0.06594176590442657, | |
| "rewards/rejected": -0.26774606108665466, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.4797979797979798, | |
| "grad_norm": 83.94548034667969, | |
| "learning_rate": 1.3457030606163564e-05, | |
| "logits/chosen": -1.542257308959961, | |
| "logits/rejected": -1.640545129776001, | |
| "logps/chosen": -3.1931662559509277, | |
| "logps/rejected": -4.362542152404785, | |
| "loss": 21.7905, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.21774420142173767, | |
| "rewards/margins": 0.09037742763757706, | |
| "rewards/rejected": -0.30812162160873413, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.5050505050505051, | |
| "grad_norm": 174.58786010742188, | |
| "learning_rate": 1.2616592559684408e-05, | |
| "logits/chosen": -1.5426051616668701, | |
| "logits/rejected": -1.7211687564849854, | |
| "logps/chosen": -2.798499345779419, | |
| "logps/rejected": -3.3964920043945312, | |
| "loss": 25.8166, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.17160701751708984, | |
| "rewards/margins": 0.05040215328335762, | |
| "rewards/rejected": -0.22200918197631836, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.5050505050505051, | |
| "eval_logits/chosen": -1.919495940208435, | |
| "eval_logits/rejected": -2.218794584274292, | |
| "eval_logps/chosen": -2.5173401832580566, | |
| "eval_logps/rejected": -3.3597702980041504, | |
| "eval_loss": 21.83156394958496, | |
| "eval_rewards/accuracies": 0.6421874761581421, | |
| "eval_rewards/chosen": -0.15560917556285858, | |
| "eval_rewards/margins": 0.05931411311030388, | |
| "eval_rewards/rejected": -0.21492330729961395, | |
| "eval_runtime": 256.4168, | |
| "eval_samples_per_second": 2.496, | |
| "eval_steps_per_second": 0.156, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.5303030303030303, | |
| "grad_norm": 94.69363403320312, | |
| "learning_rate": 1.1755790939673208e-05, | |
| "logits/chosen": -1.6892818212509155, | |
| "logits/rejected": -1.860984206199646, | |
| "logps/chosen": -2.6088526248931885, | |
| "logps/rejected": -3.893810272216797, | |
| "loss": 24.6292, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.1631762683391571, | |
| "rewards/margins": 0.0721951425075531, | |
| "rewards/rejected": -0.235371395945549, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.5555555555555556, | |
| "grad_norm": 299.636962890625, | |
| "learning_rate": 1.088132491563602e-05, | |
| "logits/chosen": -1.6523587703704834, | |
| "logits/rejected": -1.648794412612915, | |
| "logps/chosen": -2.3819022178649902, | |
| "logps/rejected": -3.91084623336792, | |
| "loss": 24.6609, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.14854103326797485, | |
| "rewards/margins": 0.08199040591716766, | |
| "rewards/rejected": -0.23053142428398132, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.5808080808080808, | |
| "grad_norm": 179.6541748046875, | |
| "learning_rate": 1e-05, | |
| "logits/chosen": -1.7479238510131836, | |
| "logits/rejected": -1.8762273788452148, | |
| "logps/chosen": -2.4850611686706543, | |
| "logps/rejected": -3.9139976501464844, | |
| "loss": 21.9825, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.1599404662847519, | |
| "rewards/margins": 0.0621558353304863, | |
| "rewards/rejected": -0.2220962941646576, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.6060606060606061, | |
| "grad_norm": 416.4597473144531, | |
| "learning_rate": 9.118675084363986e-06, | |
| "logits/chosen": -1.6893389225006104, | |
| "logits/rejected": -1.9248136281967163, | |
| "logps/chosen": -2.431549549102783, | |
| "logps/rejected": -3.4075489044189453, | |
| "loss": 23.7008, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.1548345983028412, | |
| "rewards/margins": 0.03470990061759949, | |
| "rewards/rejected": -0.18954448401927948, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.6313131313131313, | |
| "grad_norm": 106.20417022705078, | |
| "learning_rate": 8.244209060326794e-06, | |
| "logits/chosen": -1.6689144372940063, | |
| "logits/rejected": -1.932077407836914, | |
| "logps/chosen": -2.1763813495635986, | |
| "logps/rejected": -4.08168888092041, | |
| "loss": 20.9314, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.13834789395332336, | |
| "rewards/margins": 0.07367957383394241, | |
| "rewards/rejected": -0.21202746033668518, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.6565656565656566, | |
| "grad_norm": 628.0269775390625, | |
| "learning_rate": 7.383407440315595e-06, | |
| "logits/chosen": -1.7707713842391968, | |
| "logits/rejected": -1.8211300373077393, | |
| "logps/chosen": -2.591797351837158, | |
| "logps/rejected": -4.223265647888184, | |
| "loss": 19.9625, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.1714746505022049, | |
| "rewards/margins": 0.10012316703796387, | |
| "rewards/rejected": -0.27159780263900757, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.6818181818181818, | |
| "grad_norm": 166.4376220703125, | |
| "learning_rate": 6.542969393836436e-06, | |
| "logits/chosen": -1.6975538730621338, | |
| "logits/rejected": -1.7919883728027344, | |
| "logps/chosen": -2.655794858932495, | |
| "logps/rejected": -3.9739787578582764, | |
| "loss": 19.885, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.17945000529289246, | |
| "rewards/margins": 0.08590926975011826, | |
| "rewards/rejected": -0.2653592824935913, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.7070707070707071, | |
| "grad_norm": 924.48388671875, | |
| "learning_rate": 5.729435611336239e-06, | |
| "logits/chosen": -1.6683040857315063, | |
| "logits/rejected": -1.8297067880630493, | |
| "logps/chosen": -3.389685869216919, | |
| "logps/rejected": -4.693975925445557, | |
| "loss": 21.4041, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -0.22584636509418488, | |
| "rewards/margins": 0.08828467130661011, | |
| "rewards/rejected": -0.3141310513019562, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.7323232323232324, | |
| "grad_norm": 208.90626525878906, | |
| "learning_rate": 4.949137401424527e-06, | |
| "logits/chosen": -1.690625786781311, | |
| "logits/rejected": -1.8179527521133423, | |
| "logps/chosen": -3.1737165451049805, | |
| "logps/rejected": -4.919283866882324, | |
| "loss": 19.995, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.22326549887657166, | |
| "rewards/margins": 0.10102611780166626, | |
| "rewards/rejected": -0.3242916166782379, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.7575757575757576, | |
| "grad_norm": 243.00192260742188, | |
| "learning_rate": 4.208147417604665e-06, | |
| "logits/chosen": -1.6386387348175049, | |
| "logits/rejected": -1.7950681447982788, | |
| "logps/chosen": -3.373720645904541, | |
| "logps/rejected": -4.483418941497803, | |
| "loss": 20.3863, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.24290914833545685, | |
| "rewards/margins": 0.07580031454563141, | |
| "rewards/rejected": -0.31870946288108826, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.7828282828282829, | |
| "grad_norm": 205.0689697265625, | |
| "learning_rate": 3.51223239798274e-06, | |
| "logits/chosen": -1.7644588947296143, | |
| "logits/rejected": -1.792384147644043, | |
| "logps/chosen": -2.8454086780548096, | |
| "logps/rejected": -4.108365058898926, | |
| "loss": 22.1816, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.19682423770427704, | |
| "rewards/margins": 0.07537179440259933, | |
| "rewards/rejected": -0.2721960246562958, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.8080808080808081, | |
| "grad_norm": 202.64425659179688, | |
| "learning_rate": 2.8668082857562006e-06, | |
| "logits/chosen": -1.7155227661132812, | |
| "logits/rejected": -1.7265026569366455, | |
| "logps/chosen": -3.2442708015441895, | |
| "logps/rejected": -5.168461799621582, | |
| "loss": 20.5007, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": -0.22376994788646698, | |
| "rewards/margins": 0.11927111446857452, | |
| "rewards/rejected": -0.3430410623550415, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.8333333333333334, | |
| "grad_norm": 146.06727600097656, | |
| "learning_rate": 2.2768980797561125e-06, | |
| "logits/chosen": -1.5448095798492432, | |
| "logits/rejected": -1.6818040609359741, | |
| "logps/chosen": -3.1757941246032715, | |
| "logps/rejected": -4.661167144775391, | |
| "loss": 23.3162, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.22800321877002716, | |
| "rewards/margins": 0.10849568992853165, | |
| "rewards/rejected": -0.336498886346817, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.8585858585858586, | |
| "grad_norm": 266.4602966308594, | |
| "learning_rate": 1.7470927430702277e-06, | |
| "logits/chosen": -1.77353036403656, | |
| "logits/rejected": -1.8091161251068115, | |
| "logps/chosen": -3.679595470428467, | |
| "logps/rejected": -5.641579627990723, | |
| "loss": 21.0313, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.2708187699317932, | |
| "rewards/margins": 0.11891458183526993, | |
| "rewards/rejected": -0.38973334431648254, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.8838383838383839, | |
| "grad_norm": 191.99391174316406, | |
| "learning_rate": 1.281515473974614e-06, | |
| "logits/chosen": -1.7262178659439087, | |
| "logits/rejected": -1.7621949911117554, | |
| "logps/chosen": -3.6915946006774902, | |
| "logps/rejected": -4.522196292877197, | |
| "loss": 23.1575, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.2737148106098175, | |
| "rewards/margins": 0.06611393392086029, | |
| "rewards/rejected": -0.339828759431839, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.9090909090909091, | |
| "grad_norm": 445.5780334472656, | |
| "learning_rate": 8.837896172345827e-07, | |
| "logits/chosen": -1.7799314260482788, | |
| "logits/rejected": -1.758079171180725, | |
| "logps/chosen": -3.744454860687256, | |
| "logps/rejected": -5.533487319946289, | |
| "loss": 22.4579, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.26133713126182556, | |
| "rewards/margins": 0.10725338757038116, | |
| "rewards/rejected": -0.36859050393104553, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.9343434343434344, | |
| "grad_norm": 351.77313232421875, | |
| "learning_rate": 5.570104655044428e-07, | |
| "logits/chosen": -1.8014914989471436, | |
| "logits/rejected": -1.8869857788085938, | |
| "logps/chosen": -3.1039249897003174, | |
| "logps/rejected": -4.702515602111816, | |
| "loss": 23.8499, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -0.22470612823963165, | |
| "rewards/margins": 0.12236537039279938, | |
| "rewards/rejected": -0.34707149863243103, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.9595959595959596, | |
| "grad_norm": 203.9517059326172, | |
| "learning_rate": 3.0372117028111825e-07, | |
| "logits/chosen": -1.650368332862854, | |
| "logits/rejected": -1.7378900051116943, | |
| "logps/chosen": -3.5179672241210938, | |
| "logps/rejected": -4.001964092254639, | |
| "loss": 25.7021, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.26367172598838806, | |
| "rewards/margins": 0.021851424127817154, | |
| "rewards/rejected": -0.2855231761932373, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.9848484848484849, | |
| "grad_norm": 358.923095703125, | |
| "learning_rate": 1.2589294988404887e-07, | |
| "logits/chosen": -1.6349338293075562, | |
| "logits/rejected": -1.7700506448745728, | |
| "logps/chosen": -3.084740161895752, | |
| "logps/rejected": -4.999522686004639, | |
| "loss": 18.5645, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": -0.22231082618236542, | |
| "rewards/margins": 0.12103237211704254, | |
| "rewards/rejected": -0.34334319829940796, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "step": 198, | |
| "total_flos": 0.0, | |
| "train_loss": 22.75462433786103, | |
| "train_runtime": 3251.7686, | |
| "train_samples_per_second": 0.973, | |
| "train_steps_per_second": 0.061 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 198, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 10, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |