| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 9.908256880733944, |
| "eval_steps": 500, |
| "global_step": 270, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.3669724770642202, |
| "grad_norm": 0.4546981155872345, |
| "learning_rate": 1.8518518518518519e-06, |
| "logits/chosen": 1.790213942527771, |
| "logits/rejected": 1.8330585956573486, |
| "logps/chosen": -75.35237121582031, |
| "logps/rejected": -77.22242736816406, |
| "loss": 0.6929, |
| "rewards/accuracies": 0.4000000059604645, |
| "rewards/chosen": -0.006426649633795023, |
| "rewards/margins": -0.008417336270213127, |
| "rewards/rejected": 0.0019906857050955296, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.7339449541284404, |
| "grad_norm": 0.5062414407730103, |
| "learning_rate": 3.7037037037037037e-06, |
| "logits/chosen": 1.8213964700698853, |
| "logits/rejected": 1.7818235158920288, |
| "logps/chosen": -83.15866088867188, |
| "logps/rejected": -78.38325500488281, |
| "loss": 0.695, |
| "rewards/accuracies": 0.42500001192092896, |
| "rewards/chosen": -0.006863894872367382, |
| "rewards/margins": -0.010942240245640278, |
| "rewards/rejected": 0.0040783449076116085, |
| "step": 20 |
| }, |
| { |
| "epoch": 1.1009174311926606, |
| "grad_norm": 0.5203150510787964, |
| "learning_rate": 4.998119881260576e-06, |
| "logits/chosen": 1.811631441116333, |
| "logits/rejected": 1.7203019857406616, |
| "logps/chosen": -96.63399505615234, |
| "logps/rejected": -83.23392486572266, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.48750001192092896, |
| "rewards/chosen": -0.00417748000472784, |
| "rewards/margins": -0.006159568205475807, |
| "rewards/rejected": 0.0019820884335786104, |
| "step": 30 |
| }, |
| { |
| "epoch": 1.4678899082568808, |
| "grad_norm": 0.5335714221000671, |
| "learning_rate": 4.964774158361991e-06, |
| "logits/chosen": 1.8185322284698486, |
| "logits/rejected": 1.7785847187042236, |
| "logps/chosen": -80.90029907226562, |
| "logps/rejected": -76.1764144897461, |
| "loss": 0.6934, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.008144860155880451, |
| "rewards/margins": -0.001973528414964676, |
| "rewards/rejected": -0.006171331740915775, |
| "step": 40 |
| }, |
| { |
| "epoch": 1.834862385321101, |
| "grad_norm": 0.5139996409416199, |
| "learning_rate": 4.8902889044347e-06, |
| "logits/chosen": 1.8735860586166382, |
| "logits/rejected": 1.886704683303833, |
| "logps/chosen": -87.01000213623047, |
| "logps/rejected": -89.1278305053711, |
| "loss": 0.6925, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.0030386822763830423, |
| "rewards/margins": 0.006321282591670752, |
| "rewards/rejected": -0.009359965100884438, |
| "step": 50 |
| }, |
| { |
| "epoch": 2.2018348623853212, |
| "grad_norm": 0.596449077129364, |
| "learning_rate": 4.775907352415367e-06, |
| "logits/chosen": 1.7842556238174438, |
| "logits/rejected": 1.7772290706634521, |
| "logps/chosen": -89.68411254882812, |
| "logps/rejected": -85.80751037597656, |
| "loss": 0.693, |
| "rewards/accuracies": 0.44999998807907104, |
| "rewards/chosen": -0.002062492538243532, |
| "rewards/margins": -0.0020373582374304533, |
| "rewards/rejected": -2.5133975213975646e-05, |
| "step": 60 |
| }, |
| { |
| "epoch": 2.5688073394495414, |
| "grad_norm": 0.605808436870575, |
| "learning_rate": 4.623538644118244e-06, |
| "logits/chosen": 1.7955410480499268, |
| "logits/rejected": 1.742892861366272, |
| "logps/chosen": -77.76892852783203, |
| "logps/rejected": -75.139892578125, |
| "loss": 0.6923, |
| "rewards/accuracies": 0.574999988079071, |
| "rewards/chosen": -0.005501260980963707, |
| "rewards/margins": 0.003699531313031912, |
| "rewards/rejected": -0.009200791828334332, |
| "step": 70 |
| }, |
| { |
| "epoch": 2.9357798165137616, |
| "grad_norm": 0.6306387782096863, |
| "learning_rate": 4.435725964760331e-06, |
| "logits/chosen": 1.7294387817382812, |
| "logits/rejected": 1.7057327032089233, |
| "logps/chosen": -86.89958190917969, |
| "logps/rejected": -75.46858215332031, |
| "loss": 0.6903, |
| "rewards/accuracies": 0.4375, |
| "rewards/chosen": 0.00010954421304631978, |
| "rewards/margins": 0.00032598042162135243, |
| "rewards/rejected": -0.00021643625223077834, |
| "step": 80 |
| }, |
| { |
| "epoch": 3.302752293577982, |
| "grad_norm": 0.5730260014533997, |
| "learning_rate": 4.215604094671835e-06, |
| "logits/chosen": 1.879805326461792, |
| "logits/rejected": 1.9939963817596436, |
| "logps/chosen": -68.91841125488281, |
| "logps/rejected": -82.97428894042969, |
| "loss": 0.6903, |
| "rewards/accuracies": 0.5249999761581421, |
| "rewards/chosen": 0.0012840941781178117, |
| "rewards/margins": 0.004185027442872524, |
| "rewards/rejected": -0.0029009338468313217, |
| "step": 90 |
| }, |
| { |
| "epoch": 3.669724770642202, |
| "grad_norm": 0.903571605682373, |
| "learning_rate": 3.966847086696045e-06, |
| "logits/chosen": 1.7714732885360718, |
| "logits/rejected": 1.7483131885528564, |
| "logps/chosen": -88.14640808105469, |
| "logps/rejected": -79.63529968261719, |
| "loss": 0.6869, |
| "rewards/accuracies": 0.574999988079071, |
| "rewards/chosen": 2.6259198421030305e-05, |
| "rewards/margins": 0.016201479360461235, |
| "rewards/rejected": -0.016175217926502228, |
| "step": 100 |
| }, |
| { |
| "epoch": 4.036697247706422, |
| "grad_norm": 0.6428204774856567, |
| "learning_rate": 3.693606942594873e-06, |
| "logits/chosen": 1.8136510848999023, |
| "logits/rejected": 1.8026773929595947, |
| "logps/chosen": -74.69807434082031, |
| "logps/rejected": -77.08403778076172, |
| "loss": 0.6886, |
| "rewards/accuracies": 0.574999988079071, |
| "rewards/chosen": 0.002973981201648712, |
| "rewards/margins": 0.004478845279663801, |
| "rewards/rejected": -0.0015048638451844454, |
| "step": 110 |
| }, |
| { |
| "epoch": 4.4036697247706424, |
| "grad_norm": 0.7341277599334717, |
| "learning_rate": 3.400444312011776e-06, |
| "logits/chosen": 1.8813444375991821, |
| "logits/rejected": 1.8145755529403687, |
| "logps/chosen": -102.3713150024414, |
| "logps/rejected": -73.9950942993164, |
| "loss": 0.6852, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": 0.0024112462997436523, |
| "rewards/margins": 0.021793870255351067, |
| "rewards/rejected": -0.019382625818252563, |
| "step": 120 |
| }, |
| { |
| "epoch": 4.770642201834862, |
| "grad_norm": 0.7996196746826172, |
| "learning_rate": 3.092252370695298e-06, |
| "logits/chosen": 1.7861392498016357, |
| "logits/rejected": 1.8571494817733765, |
| "logps/chosen": -73.10860443115234, |
| "logps/rejected": -84.28839874267578, |
| "loss": 0.684, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": 0.009105954319238663, |
| "rewards/margins": 0.0323755219578743, |
| "rewards/rejected": -0.023269567638635635, |
| "step": 130 |
| }, |
| { |
| "epoch": 5.137614678899083, |
| "grad_norm": 0.7055425047874451, |
| "learning_rate": 2.7741751485313295e-06, |
| "logits/chosen": 1.8377151489257812, |
| "logits/rejected": 1.8748899698257446, |
| "logps/chosen": -97.56179809570312, |
| "logps/rejected": -88.68690490722656, |
| "loss": 0.6823, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": 0.010783041827380657, |
| "rewards/margins": 0.026087775826454163, |
| "rewards/rejected": -0.015304732136428356, |
| "step": 140 |
| }, |
| { |
| "epoch": 5.504587155963303, |
| "grad_norm": 0.6778654456138611, |
| "learning_rate": 2.4515216705704396e-06, |
| "logits/chosen": 1.7777469158172607, |
| "logits/rejected": 1.7777780294418335, |
| "logps/chosen": -76.43773651123047, |
| "logps/rejected": -74.53337860107422, |
| "loss": 0.6803, |
| "rewards/accuracies": 0.637499988079071, |
| "rewards/chosen": -0.003852830035611987, |
| "rewards/margins": 0.02566695772111416, |
| "rewards/rejected": -0.029519790783524513, |
| "step": 150 |
| }, |
| { |
| "epoch": 5.871559633027523, |
| "grad_norm": 2.0170881748199463, |
| "learning_rate": 2.129677344121879e-06, |
| "logits/chosen": 1.8108351230621338, |
| "logits/rejected": 1.867889165878296, |
| "logps/chosen": -84.06121063232422, |
| "logps/rejected": -92.29908752441406, |
| "loss": 0.6796, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.01210473570972681, |
| "rewards/margins": 0.01314194779843092, |
| "rewards/rejected": -0.02524668350815773, |
| "step": 160 |
| }, |
| { |
| "epoch": 6.238532110091743, |
| "grad_norm": 0.7809003591537476, |
| "learning_rate": 1.8140140709517467e-06, |
| "logits/chosen": 1.8063510656356812, |
| "logits/rejected": 1.9083925485610962, |
| "logps/chosen": -74.83514404296875, |
| "logps/rejected": -90.15823364257812, |
| "loss": 0.6758, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -0.004577965941280127, |
| "rewards/margins": 0.03555908426642418, |
| "rewards/rejected": -0.040137048810720444, |
| "step": 170 |
| }, |
| { |
| "epoch": 6.605504587155964, |
| "grad_norm": 0.7568692564964294, |
| "learning_rate": 1.509800584902108e-06, |
| "logits/chosen": 1.7594693899154663, |
| "logits/rejected": 1.736595869064331, |
| "logps/chosen": -87.5718994140625, |
| "logps/rejected": -83.89161682128906, |
| "loss": 0.6752, |
| "rewards/accuracies": 0.6499999761581421, |
| "rewards/chosen": 0.0013473846483975649, |
| "rewards/margins": 0.04137669503688812, |
| "rewards/rejected": -0.040029313415288925, |
| "step": 180 |
| }, |
| { |
| "epoch": 6.972477064220183, |
| "grad_norm": 0.7889443635940552, |
| "learning_rate": 1.2221145114853172e-06, |
| "logits/chosen": 1.8161766529083252, |
| "logits/rejected": 1.7991691827774048, |
| "logps/chosen": -83.15028381347656, |
| "logps/rejected": -71.78063201904297, |
| "loss": 0.6716, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": 0.0056212423369288445, |
| "rewards/margins": 0.043176714330911636, |
| "rewards/rejected": -0.03755547106266022, |
| "step": 190 |
| }, |
| { |
| "epoch": 7.339449541284404, |
| "grad_norm": 0.7979565262794495, |
| "learning_rate": 9.557576172663577e-07, |
| "logits/chosen": 1.7513843774795532, |
| "logits/rejected": 1.7776830196380615, |
| "logps/chosen": -100.34294128417969, |
| "logps/rejected": -96.10903930664062, |
| "loss": 0.6697, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": -0.0050580548122525215, |
| "rewards/margins": 0.05877142399549484, |
| "rewards/rejected": -0.06382948160171509, |
| "step": 200 |
| }, |
| { |
| "epoch": 7.706422018348624, |
| "grad_norm": 0.7959036231040955, |
| "learning_rate": 7.151756636052529e-07, |
| "logits/chosen": 1.7774207592010498, |
| "logits/rejected": 1.8367748260498047, |
| "logps/chosen": -78.96214294433594, |
| "logps/rejected": -76.54395294189453, |
| "loss": 0.6704, |
| "rewards/accuracies": 0.637499988079071, |
| "rewards/chosen": -0.014961203560233116, |
| "rewards/margins": 0.03050311841070652, |
| "rewards/rejected": -0.045464321970939636, |
| "step": 210 |
| }, |
| { |
| "epoch": 8.073394495412844, |
| "grad_norm": 0.7231891751289368, |
| "learning_rate": 5.043842024802675e-07, |
| "logits/chosen": 1.7771985530853271, |
| "logits/rejected": 1.831194519996643, |
| "logps/chosen": -80.49971008300781, |
| "logps/rejected": -84.05366516113281, |
| "loss": 0.6689, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": -0.006958714686334133, |
| "rewards/margins": 0.06582571566104889, |
| "rewards/rejected": -0.072784423828125, |
| "step": 220 |
| }, |
| { |
| "epoch": 8.440366972477065, |
| "grad_norm": 0.8330283761024475, |
| "learning_rate": 3.269015529333805e-07, |
| "logits/chosen": 1.84121572971344, |
| "logits/rejected": 1.7711197137832642, |
| "logps/chosen": -94.49369812011719, |
| "logps/rejected": -83.2547378540039, |
| "loss": 0.6696, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -0.0076360441744327545, |
| "rewards/margins": 0.05511891841888428, |
| "rewards/rejected": -0.06275496631860733, |
| "step": 230 |
| }, |
| { |
| "epoch": 8.807339449541285, |
| "grad_norm": 0.8446040749549866, |
| "learning_rate": 1.8569007682777417e-07, |
| "logits/chosen": 1.8937292098999023, |
| "logits/rejected": 1.9111621379852295, |
| "logps/chosen": -83.84481811523438, |
| "logps/rejected": -82.11663818359375, |
| "loss": 0.6662, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -0.010129066184163094, |
| "rewards/margins": 0.05674300342798233, |
| "rewards/rejected": -0.06687206774950027, |
| "step": 240 |
| }, |
| { |
| "epoch": 9.174311926605505, |
| "grad_norm": 0.7800756692886353, |
| "learning_rate": 8.310673408334496e-08, |
| "logits/chosen": 1.7548353672027588, |
| "logits/rejected": 1.8221992254257202, |
| "logps/chosen": -73.01091003417969, |
| "logps/rejected": -86.10346984863281, |
| "loss": 0.6699, |
| "rewards/accuracies": 0.6499999761581421, |
| "rewards/chosen": -0.01050621084868908, |
| "rewards/margins": 0.050604354590177536, |
| "rewards/rejected": -0.061110567301511765, |
| "step": 250 |
| }, |
| { |
| "epoch": 9.541284403669724, |
| "grad_norm": 0.7628592252731323, |
| "learning_rate": 2.0863742672497244e-08, |
| "logits/chosen": 1.7504405975341797, |
| "logits/rejected": 1.8729193210601807, |
| "logps/chosen": -69.52116394042969, |
| "logps/rejected": -101.10639953613281, |
| "loss": 0.6687, |
| "rewards/accuracies": 0.6499999761581421, |
| "rewards/chosen": -0.023021433502435684, |
| "rewards/margins": 0.032114267349243164, |
| "rewards/rejected": -0.05513570457696915, |
| "step": 260 |
| }, |
| { |
| "epoch": 9.908256880733944, |
| "grad_norm": 0.6787808537483215, |
| "learning_rate": 0.0, |
| "logits/chosen": 1.7953345775604248, |
| "logits/rejected": 1.8516321182250977, |
| "logps/chosen": -92.99394989013672, |
| "logps/rejected": -93.45514678955078, |
| "loss": 0.6675, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": 0.0009590781410224736, |
| "rewards/margins": 0.06170158460736275, |
| "rewards/rejected": -0.06074250862002373, |
| "step": 270 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 270, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 10, |
| "save_steps": 100, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 9.208786937802916e+17, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|