| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.967020426794477, |
| "eval_steps": 100, |
| "global_step": 6500, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.5662100456621e-10, |
| "logits/chosen": -2.4946703910827637, |
| "logits/rejected": -2.335416316986084, |
| "logps/chosen": -85.90689086914062, |
| "logps/rejected": -62.35003662109375, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.0, |
| "rewards/chosen": 0.0, |
| "rewards/margins": 0.0, |
| "rewards/rejected": 0.0, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.5662100456621e-09, |
| "logits/chosen": -2.267062187194824, |
| "logits/rejected": -1.9090423583984375, |
| "logps/chosen": -88.730712890625, |
| "logps/rejected": -68.25267028808594, |
| "loss": 0.6976, |
| "rewards/accuracies": 0.5138888955116272, |
| "rewards/chosen": 0.0035505560226738453, |
| "rewards/margins": 0.019558124244213104, |
| "rewards/rejected": -0.016007568687200546, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 9.1324200913242e-09, |
| "logits/chosen": -2.2504024505615234, |
| "logits/rejected": -1.9175926446914673, |
| "logps/chosen": -94.94258880615234, |
| "logps/rejected": -70.55738067626953, |
| "loss": 0.6949, |
| "rewards/accuracies": 0.6000000238418579, |
| "rewards/chosen": 0.02249746397137642, |
| "rewards/margins": 0.018389523029327393, |
| "rewards/rejected": 0.004107940010726452, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 1.36986301369863e-08, |
| "logits/chosen": -2.2938480377197266, |
| "logits/rejected": -1.9231021404266357, |
| "logps/chosen": -89.45714569091797, |
| "logps/rejected": -70.13645935058594, |
| "loss": 0.6899, |
| "rewards/accuracies": 0.5249999761581421, |
| "rewards/chosen": 0.025471080094575882, |
| "rewards/margins": 0.030790437012910843, |
| "rewards/rejected": -0.005319356918334961, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 1.82648401826484e-08, |
| "logits/chosen": -2.323638439178467, |
| "logits/rejected": -1.9527565240859985, |
| "logps/chosen": -92.12770080566406, |
| "logps/rejected": -68.82365417480469, |
| "loss": 0.6845, |
| "rewards/accuracies": 0.637499988079071, |
| "rewards/chosen": 0.022075748071074486, |
| "rewards/margins": 0.03521919250488281, |
| "rewards/rejected": -0.013143444433808327, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 2.28310502283105e-08, |
| "logits/chosen": -2.302112340927124, |
| "logits/rejected": -1.9624712467193604, |
| "logps/chosen": -89.31111907958984, |
| "logps/rejected": -67.56095123291016, |
| "loss": 0.6716, |
| "rewards/accuracies": 0.675000011920929, |
| "rewards/chosen": 0.02959294244647026, |
| "rewards/margins": 0.04926164075732231, |
| "rewards/rejected": -0.01966869831085205, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 2.73972602739726e-08, |
| "logits/chosen": -2.3690874576568604, |
| "logits/rejected": -2.003035068511963, |
| "logps/chosen": -94.92839050292969, |
| "logps/rejected": -72.64738464355469, |
| "loss": 0.6588, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": 0.028924476355314255, |
| "rewards/margins": 0.0910693034529686, |
| "rewards/rejected": -0.06214482709765434, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 3.19634703196347e-08, |
| "logits/chosen": -2.251183032989502, |
| "logits/rejected": -1.8819090127944946, |
| "logps/chosen": -91.9073257446289, |
| "logps/rejected": -69.90731811523438, |
| "loss": 0.6357, |
| "rewards/accuracies": 0.8374999761581421, |
| "rewards/chosen": 0.06686282157897949, |
| "rewards/margins": 0.17646007239818573, |
| "rewards/rejected": -0.10959725081920624, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 3.65296803652968e-08, |
| "logits/chosen": -2.2135210037231445, |
| "logits/rejected": -1.8786967992782593, |
| "logps/chosen": -96.14964294433594, |
| "logps/rejected": -70.20441436767578, |
| "loss": 0.5988, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": 0.11508840322494507, |
| "rewards/margins": 0.2152937352657318, |
| "rewards/rejected": -0.10020533949136734, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 4.10958904109589e-08, |
| "logits/chosen": -2.2189507484436035, |
| "logits/rejected": -1.9139974117279053, |
| "logps/chosen": -83.40045928955078, |
| "logps/rejected": -64.86124420166016, |
| "loss": 0.5451, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": 0.12101010233163834, |
| "rewards/margins": 0.3227211534976959, |
| "rewards/rejected": -0.2017110288143158, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 4.5662100456621e-08, |
| "logits/chosen": -2.3013663291931152, |
| "logits/rejected": -1.9398345947265625, |
| "logps/chosen": -89.51341247558594, |
| "logps/rejected": -67.42799377441406, |
| "loss": 0.5166, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.1514771282672882, |
| "rewards/margins": 0.42826494574546814, |
| "rewards/rejected": -0.27678781747817993, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.05, |
| "eval_logits/chosen": -2.261293411254883, |
| "eval_logits/rejected": -1.9334125518798828, |
| "eval_logps/chosen": -88.47880554199219, |
| "eval_logps/rejected": -67.38658905029297, |
| "eval_loss": 0.5020039081573486, |
| "eval_rewards/accuracies": 0.9497206807136536, |
| "eval_rewards/chosen": 0.17589102685451508, |
| "eval_rewards/margins": 0.4537213146686554, |
| "eval_rewards/rejected": -0.2778303027153015, |
| "eval_runtime": 310.5221, |
| "eval_samples_per_second": 9.217, |
| "eval_steps_per_second": 0.576, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 5.02283105022831e-08, |
| "logits/chosen": -2.3438382148742676, |
| "logits/rejected": -1.9776771068572998, |
| "logps/chosen": -84.6239013671875, |
| "logps/rejected": -64.25120544433594, |
| "loss": 0.4884, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": 0.23512157797813416, |
| "rewards/margins": 0.4826792776584625, |
| "rewards/rejected": -0.24755771458148956, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 5.47945205479452e-08, |
| "logits/chosen": -2.305763006210327, |
| "logits/rejected": -1.9242515563964844, |
| "logps/chosen": -93.78631591796875, |
| "logps/rejected": -71.03646087646484, |
| "loss": 0.452, |
| "rewards/accuracies": 0.9750000238418579, |
| "rewards/chosen": 0.25619563460350037, |
| "rewards/margins": 0.7088645100593567, |
| "rewards/rejected": -0.45266884565353394, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 5.93607305936073e-08, |
| "logits/chosen": -2.1994524002075195, |
| "logits/rejected": -1.8348830938339233, |
| "logps/chosen": -93.8993911743164, |
| "logps/rejected": -67.64842224121094, |
| "loss": 0.3697, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.37854477763175964, |
| "rewards/margins": 0.9585170745849609, |
| "rewards/rejected": -0.5799722671508789, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 6.39269406392694e-08, |
| "logits/chosen": -2.339900493621826, |
| "logits/rejected": -1.9880163669586182, |
| "logps/chosen": -91.68956756591797, |
| "logps/rejected": -70.57650756835938, |
| "loss": 0.3139, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.4103256165981293, |
| "rewards/margins": 1.110944390296936, |
| "rewards/rejected": -0.7006188631057739, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 6.84931506849315e-08, |
| "logits/chosen": -2.3096134662628174, |
| "logits/rejected": -1.9507678747177124, |
| "logps/chosen": -88.14106750488281, |
| "logps/rejected": -69.87593841552734, |
| "loss": 0.2834, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.5618699789047241, |
| "rewards/margins": 1.299116849899292, |
| "rewards/rejected": -0.7372468113899231, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 7.30593607305936e-08, |
| "logits/chosen": -2.2230865955352783, |
| "logits/rejected": -1.8755073547363281, |
| "logps/chosen": -93.29566955566406, |
| "logps/rejected": -71.75144958496094, |
| "loss": 0.2673, |
| "rewards/accuracies": 0.9750000238418579, |
| "rewards/chosen": 0.5255166292190552, |
| "rewards/margins": 1.4965600967407227, |
| "rewards/rejected": -0.9710434079170227, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 7.76255707762557e-08, |
| "logits/chosen": -2.1584527492523193, |
| "logits/rejected": -1.9142663478851318, |
| "logps/chosen": -84.84449768066406, |
| "logps/rejected": -71.8950424194336, |
| "loss": 0.2645, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 0.43970975279808044, |
| "rewards/margins": 1.3599587678909302, |
| "rewards/rejected": -0.9202489852905273, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 8.21917808219178e-08, |
| "logits/chosen": -2.317610740661621, |
| "logits/rejected": -1.8754488229751587, |
| "logps/chosen": -94.67909240722656, |
| "logps/rejected": -69.68304443359375, |
| "loss": 0.2408, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 0.7285944819450378, |
| "rewards/margins": 1.5651540756225586, |
| "rewards/rejected": -0.8365596532821655, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 8.67579908675799e-08, |
| "logits/chosen": -2.3835504055023193, |
| "logits/rejected": -2.005877733230591, |
| "logps/chosen": -89.22205352783203, |
| "logps/rejected": -72.03932189941406, |
| "loss": 0.2142, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.7577417492866516, |
| "rewards/margins": 1.8872630596160889, |
| "rewards/rejected": -1.1295212507247925, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 9.1324200913242e-08, |
| "logits/chosen": -2.2697577476501465, |
| "logits/rejected": -1.892371416091919, |
| "logps/chosen": -89.17048645019531, |
| "logps/rejected": -69.54945373535156, |
| "loss": 0.1947, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.7742541432380676, |
| "rewards/margins": 1.9032939672470093, |
| "rewards/rejected": -1.1290397644042969, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.09, |
| "eval_logits/chosen": -2.2516348361968994, |
| "eval_logits/rejected": -1.9177496433258057, |
| "eval_logps/chosen": -87.51004028320312, |
| "eval_logps/rejected": -69.3072738647461, |
| "eval_loss": 0.19914411008358002, |
| "eval_rewards/accuracies": 0.9748603105545044, |
| "eval_rewards/chosen": 0.6602736115455627, |
| "eval_rewards/margins": 1.898452877998352, |
| "eval_rewards/rejected": -1.2381792068481445, |
| "eval_runtime": 222.9167, |
| "eval_samples_per_second": 12.839, |
| "eval_steps_per_second": 0.803, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 9.58904109589041e-08, |
| "logits/chosen": -2.25944185256958, |
| "logits/rejected": -1.9700183868408203, |
| "logps/chosen": -84.00562286376953, |
| "logps/rejected": -68.82413482666016, |
| "loss": 0.1838, |
| "rewards/accuracies": 0.9750000238418579, |
| "rewards/chosen": 0.6912348866462708, |
| "rewards/margins": 1.9548532962799072, |
| "rewards/rejected": -1.2636187076568604, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 1.004566210045662e-07, |
| "logits/chosen": -2.2063567638397217, |
| "logits/rejected": -1.9247395992279053, |
| "logps/chosen": -83.2778091430664, |
| "logps/rejected": -68.1301498413086, |
| "loss": 0.1654, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 0.7392381429672241, |
| "rewards/margins": 2.103783130645752, |
| "rewards/rejected": -1.3645451068878174, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 1.050228310502283e-07, |
| "logits/chosen": -2.196837902069092, |
| "logits/rejected": -1.8360687494277954, |
| "logps/chosen": -90.70558166503906, |
| "logps/rejected": -71.97917175292969, |
| "loss": 0.138, |
| "rewards/accuracies": 0.9750000238418579, |
| "rewards/chosen": 0.6917696595191956, |
| "rewards/margins": 2.567314386367798, |
| "rewards/rejected": -1.875544786453247, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 1.095890410958904e-07, |
| "logits/chosen": -2.269440174102783, |
| "logits/rejected": -1.8552277088165283, |
| "logps/chosen": -91.484130859375, |
| "logps/rejected": -67.85257720947266, |
| "loss": 0.1148, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 1.0015159845352173, |
| "rewards/margins": 2.8821816444396973, |
| "rewards/rejected": -1.8806654214859009, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 1.141552511415525e-07, |
| "logits/chosen": -2.2505125999450684, |
| "logits/rejected": -1.8530277013778687, |
| "logps/chosen": -91.98687744140625, |
| "logps/rejected": -76.97599792480469, |
| "loss": 0.0979, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 0.9848436117172241, |
| "rewards/margins": 3.093383550643921, |
| "rewards/rejected": -2.1085400581359863, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 1.187214611872146e-07, |
| "logits/chosen": -2.3471381664276123, |
| "logits/rejected": -1.9955825805664062, |
| "logps/chosen": -89.78094482421875, |
| "logps/rejected": -75.41387939453125, |
| "loss": 0.0891, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 1.1348358392715454, |
| "rewards/margins": 3.3899853229522705, |
| "rewards/rejected": -2.2551493644714355, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 1.232876712328767e-07, |
| "logits/chosen": -2.2680814266204834, |
| "logits/rejected": -1.8932344913482666, |
| "logps/chosen": -90.12066650390625, |
| "logps/rejected": -74.62397766113281, |
| "loss": 0.0802, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.1657724380493164, |
| "rewards/margins": 3.604767322540283, |
| "rewards/rejected": -2.438995122909546, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 1.278538812785388e-07, |
| "logits/chosen": -2.276060104370117, |
| "logits/rejected": -1.9517319202423096, |
| "logps/chosen": -91.58979797363281, |
| "logps/rejected": -80.03755187988281, |
| "loss": 0.067, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.0883013010025024, |
| "rewards/margins": 4.193854808807373, |
| "rewards/rejected": -3.1055538654327393, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 1.324200913242009e-07, |
| "logits/chosen": -2.288156509399414, |
| "logits/rejected": -1.8913819789886475, |
| "logps/chosen": -92.2828598022461, |
| "logps/rejected": -77.56204986572266, |
| "loss": 0.0674, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 1.2855546474456787, |
| "rewards/margins": 3.8462796211242676, |
| "rewards/rejected": -2.560725212097168, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 1.36986301369863e-07, |
| "logits/chosen": -2.221187114715576, |
| "logits/rejected": -1.8750922679901123, |
| "logps/chosen": -84.05517578125, |
| "logps/rejected": -73.2014389038086, |
| "loss": 0.0662, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.3618335723876953, |
| "rewards/margins": 4.3457865715026855, |
| "rewards/rejected": -2.9839529991149902, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.14, |
| "eval_logits/chosen": -2.245551586151123, |
| "eval_logits/rejected": -1.9041495323181152, |
| "eval_logps/chosen": -86.4066162109375, |
| "eval_logps/rejected": -73.35151672363281, |
| "eval_loss": 0.06390678137540817, |
| "eval_rewards/accuracies": 0.9804469347000122, |
| "eval_rewards/chosen": 1.2119877338409424, |
| "eval_rewards/margins": 4.47228479385376, |
| "eval_rewards/rejected": -3.260296106338501, |
| "eval_runtime": 197.1717, |
| "eval_samples_per_second": 14.515, |
| "eval_steps_per_second": 0.908, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 1.415525114155251e-07, |
| "logits/chosen": -2.24013090133667, |
| "logits/rejected": -1.8544371128082275, |
| "logps/chosen": -88.1822280883789, |
| "logps/rejected": -72.43907928466797, |
| "loss": 0.0602, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.3825366497039795, |
| "rewards/margins": 4.868518829345703, |
| "rewards/rejected": -3.4859824180603027, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 1.461187214611872e-07, |
| "logits/chosen": -2.2278833389282227, |
| "logits/rejected": -1.8756189346313477, |
| "logps/chosen": -93.53022003173828, |
| "logps/rejected": -76.30734252929688, |
| "loss": 0.0517, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 1.2675020694732666, |
| "rewards/margins": 4.621975898742676, |
| "rewards/rejected": -3.35447359085083, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 1.506849315068493e-07, |
| "logits/chosen": -2.2458999156951904, |
| "logits/rejected": -1.9483330249786377, |
| "logps/chosen": -84.49108123779297, |
| "logps/rejected": -74.85401153564453, |
| "loss": 0.0488, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.4710012674331665, |
| "rewards/margins": 5.068787574768066, |
| "rewards/rejected": -3.5977866649627686, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 1.552511415525114e-07, |
| "logits/chosen": -2.2143704891204834, |
| "logits/rejected": -1.8886018991470337, |
| "logps/chosen": -85.12834167480469, |
| "logps/rejected": -73.5596694946289, |
| "loss": 0.0535, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.3029371500015259, |
| "rewards/margins": 5.119471073150635, |
| "rewards/rejected": -3.8165335655212402, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 1.598173515981735e-07, |
| "logits/chosen": -2.2415523529052734, |
| "logits/rejected": -1.9033533334732056, |
| "logps/chosen": -81.90745544433594, |
| "logps/rejected": -78.7509536743164, |
| "loss": 0.0511, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 1.247246503829956, |
| "rewards/margins": 5.400745391845703, |
| "rewards/rejected": -4.153499126434326, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 1.643835616438356e-07, |
| "logits/chosen": -2.198545455932617, |
| "logits/rejected": -1.8948042392730713, |
| "logps/chosen": -85.43161010742188, |
| "logps/rejected": -76.58003997802734, |
| "loss": 0.0389, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 1.3219624757766724, |
| "rewards/margins": 5.153651237487793, |
| "rewards/rejected": -3.8316891193389893, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 1.689497716894977e-07, |
| "logits/chosen": -2.288311004638672, |
| "logits/rejected": -1.879122018814087, |
| "logps/chosen": -88.61750793457031, |
| "logps/rejected": -73.41519927978516, |
| "loss": 0.0448, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.605739951133728, |
| "rewards/margins": 5.448731899261475, |
| "rewards/rejected": -3.8429923057556152, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 1.735159817351598e-07, |
| "logits/chosen": -2.2476701736450195, |
| "logits/rejected": -1.837656021118164, |
| "logps/chosen": -88.90941619873047, |
| "logps/rejected": -76.27139282226562, |
| "loss": 0.027, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.609424352645874, |
| "rewards/margins": 6.268812656402588, |
| "rewards/rejected": -4.659388065338135, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 1.780821917808219e-07, |
| "logits/chosen": -2.256505012512207, |
| "logits/rejected": -1.9018818140029907, |
| "logps/chosen": -87.40512084960938, |
| "logps/rejected": -78.49671936035156, |
| "loss": 0.0365, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.3038495779037476, |
| "rewards/margins": 5.88823938369751, |
| "rewards/rejected": -4.584390163421631, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 1.82648401826484e-07, |
| "logits/chosen": -2.2472777366638184, |
| "logits/rejected": -1.9034316539764404, |
| "logps/chosen": -87.68411254882812, |
| "logps/rejected": -83.73296356201172, |
| "loss": 0.0284, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 1.4865331649780273, |
| "rewards/margins": 6.618298530578613, |
| "rewards/rejected": -5.131765842437744, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.18, |
| "eval_logits/chosen": -2.2571330070495605, |
| "eval_logits/rejected": -1.9162304401397705, |
| "eval_logps/chosen": -86.22880554199219, |
| "eval_logps/rejected": -77.06842803955078, |
| "eval_loss": 0.034718479961156845, |
| "eval_rewards/accuracies": 0.9832402467727661, |
| "eval_rewards/chosen": 1.300887942314148, |
| "eval_rewards/margins": 6.419642925262451, |
| "eval_rewards/rejected": -5.118754863739014, |
| "eval_runtime": 283.8557, |
| "eval_samples_per_second": 10.083, |
| "eval_steps_per_second": 0.631, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 1.872146118721461e-07, |
| "logits/chosen": -2.230461597442627, |
| "logits/rejected": -1.7934761047363281, |
| "logps/chosen": -95.64036560058594, |
| "logps/rejected": -79.05517578125, |
| "loss": 0.028, |
| "rewards/accuracies": 0.9624999761581421, |
| "rewards/chosen": 1.7179588079452515, |
| "rewards/margins": 6.512901306152344, |
| "rewards/rejected": -4.7949419021606445, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 1.917808219178082e-07, |
| "logits/chosen": -2.286062240600586, |
| "logits/rejected": -1.9140151739120483, |
| "logps/chosen": -85.09687042236328, |
| "logps/rejected": -79.01274871826172, |
| "loss": 0.0303, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.2032204866409302, |
| "rewards/margins": 6.1523003578186035, |
| "rewards/rejected": -4.949079990386963, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 1.963470319634703e-07, |
| "logits/chosen": -2.296107053756714, |
| "logits/rejected": -2.0091071128845215, |
| "logps/chosen": -82.93006896972656, |
| "logps/rejected": -80.10784912109375, |
| "loss": 0.0268, |
| "rewards/accuracies": 0.9624999761581421, |
| "rewards/chosen": 1.1454616785049438, |
| "rewards/margins": 6.973064422607422, |
| "rewards/rejected": -5.827603340148926, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 2.009132420091324e-07, |
| "logits/chosen": -2.2207303047180176, |
| "logits/rejected": -1.8763904571533203, |
| "logps/chosen": -82.83280181884766, |
| "logps/rejected": -79.92660522460938, |
| "loss": 0.0291, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 1.1346409320831299, |
| "rewards/margins": 7.1732892990112305, |
| "rewards/rejected": -6.03864860534668, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 2.054794520547945e-07, |
| "logits/chosen": -2.164816379547119, |
| "logits/rejected": -1.7991693019866943, |
| "logps/chosen": -91.07575988769531, |
| "logps/rejected": -82.17328643798828, |
| "loss": 0.0295, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.567754864692688, |
| "rewards/margins": 8.110410690307617, |
| "rewards/rejected": -6.542654991149902, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 2.100456621004566e-07, |
| "logits/chosen": -2.278972864151001, |
| "logits/rejected": -1.9182837009429932, |
| "logps/chosen": -91.42366790771484, |
| "logps/rejected": -88.95767974853516, |
| "loss": 0.0314, |
| "rewards/accuracies": 0.9750000238418579, |
| "rewards/chosen": 1.1288248300552368, |
| "rewards/margins": 7.684876441955566, |
| "rewards/rejected": -6.556051731109619, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 2.146118721461187e-07, |
| "logits/chosen": -2.3190386295318604, |
| "logits/rejected": -1.961912751197815, |
| "logps/chosen": -86.6361312866211, |
| "logps/rejected": -81.09564971923828, |
| "loss": 0.0202, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 1.5480232238769531, |
| "rewards/margins": 7.930706024169922, |
| "rewards/rejected": -6.382682800292969, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 2.191780821917808e-07, |
| "logits/chosen": -2.3331542015075684, |
| "logits/rejected": -1.9054124355316162, |
| "logps/chosen": -91.41481018066406, |
| "logps/rejected": -83.68817138671875, |
| "loss": 0.0218, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.3129695653915405, |
| "rewards/margins": 8.34068775177002, |
| "rewards/rejected": -7.027717590332031, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 2.237442922374429e-07, |
| "logits/chosen": -2.2767910957336426, |
| "logits/rejected": -1.91777765750885, |
| "logps/chosen": -90.29415130615234, |
| "logps/rejected": -84.94145965576172, |
| "loss": 0.017, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.661865234375, |
| "rewards/margins": 8.428221702575684, |
| "rewards/rejected": -6.766357421875, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 2.28310502283105e-07, |
| "logits/chosen": -2.2446134090423584, |
| "logits/rejected": -1.8638511896133423, |
| "logps/chosen": -88.12340545654297, |
| "logps/rejected": -83.9640884399414, |
| "loss": 0.0164, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 1.3303194046020508, |
| "rewards/margins": 8.892141342163086, |
| "rewards/rejected": -7.561821937561035, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.23, |
| "eval_logits/chosen": -2.269374132156372, |
| "eval_logits/rejected": -1.9262473583221436, |
| "eval_logps/chosen": -86.5768814086914, |
| "eval_logps/rejected": -82.61319732666016, |
| "eval_loss": 0.018967095762491226, |
| "eval_rewards/accuracies": 0.9860334992408752, |
| "eval_rewards/chosen": 1.126853346824646, |
| "eval_rewards/margins": 9.017987251281738, |
| "eval_rewards/rejected": -7.891134262084961, |
| "eval_runtime": 180.5741, |
| "eval_samples_per_second": 15.849, |
| "eval_steps_per_second": 0.991, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 2.328767123287671e-07, |
| "logits/chosen": -2.2802085876464844, |
| "logits/rejected": -1.9185457229614258, |
| "logps/chosen": -81.77207946777344, |
| "logps/rejected": -80.97681427001953, |
| "loss": 0.0197, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.417055368423462, |
| "rewards/margins": 9.611557960510254, |
| "rewards/rejected": -8.194501876831055, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 2.374429223744292e-07, |
| "logits/chosen": -2.312295436859131, |
| "logits/rejected": -1.9153810739517212, |
| "logps/chosen": -91.66407775878906, |
| "logps/rejected": -87.04930877685547, |
| "loss": 0.0133, |
| "rewards/accuracies": 0.9750000238418579, |
| "rewards/chosen": 1.3187003135681152, |
| "rewards/margins": 9.435572624206543, |
| "rewards/rejected": -8.11687183380127, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 2.420091324200913e-07, |
| "logits/chosen": -2.347360134124756, |
| "logits/rejected": -1.9737800359725952, |
| "logps/chosen": -92.10830688476562, |
| "logps/rejected": -89.24776458740234, |
| "loss": 0.016, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.5000791549682617, |
| "rewards/margins": 10.65346622467041, |
| "rewards/rejected": -9.153387069702148, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 2.465753424657534e-07, |
| "logits/chosen": -2.2161240577697754, |
| "logits/rejected": -1.819411039352417, |
| "logps/chosen": -89.9305648803711, |
| "logps/rejected": -87.90519714355469, |
| "loss": 0.0117, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 1.882406234741211, |
| "rewards/margins": 10.416455268859863, |
| "rewards/rejected": -8.534049034118652, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 2.511415525114155e-07, |
| "logits/chosen": -2.3072893619537354, |
| "logits/rejected": -1.928308129310608, |
| "logps/chosen": -88.01408386230469, |
| "logps/rejected": -83.06895446777344, |
| "loss": 0.0133, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.1510813236236572, |
| "rewards/margins": 10.02571964263916, |
| "rewards/rejected": -8.87463665008545, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 2.557077625570776e-07, |
| "logits/chosen": -2.2718453407287598, |
| "logits/rejected": -1.8856366872787476, |
| "logps/chosen": -90.56663513183594, |
| "logps/rejected": -86.86662292480469, |
| "loss": 0.0137, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 1.4503662586212158, |
| "rewards/margins": 11.014634132385254, |
| "rewards/rejected": -9.5642671585083, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 2.602739726027397e-07, |
| "logits/chosen": -2.2816548347473145, |
| "logits/rejected": -1.8435026407241821, |
| "logps/chosen": -94.76255798339844, |
| "logps/rejected": -88.23798370361328, |
| "loss": 0.0171, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.4174392223358154, |
| "rewards/margins": 10.936891555786133, |
| "rewards/rejected": -9.519450187683105, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 2.648401826484018e-07, |
| "logits/chosen": -2.298529863357544, |
| "logits/rejected": -1.8752195835113525, |
| "logps/chosen": -88.09405517578125, |
| "logps/rejected": -88.1880874633789, |
| "loss": 0.0106, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 1.359735131263733, |
| "rewards/margins": 11.649232864379883, |
| "rewards/rejected": -10.289498329162598, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 2.694063926940639e-07, |
| "logits/chosen": -2.2168471813201904, |
| "logits/rejected": -1.908395528793335, |
| "logps/chosen": -89.10639953613281, |
| "logps/rejected": -90.54367065429688, |
| "loss": 0.0091, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.3336710929870605, |
| "rewards/margins": 12.000402450561523, |
| "rewards/rejected": -10.666732788085938, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 2.73972602739726e-07, |
| "logits/chosen": -2.3078534603118896, |
| "logits/rejected": -1.9278669357299805, |
| "logps/chosen": -85.4991683959961, |
| "logps/rejected": -89.22947692871094, |
| "loss": 0.0115, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.5495243072509766, |
| "rewards/margins": 12.214988708496094, |
| "rewards/rejected": -10.665464401245117, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.27, |
| "eval_logits/chosen": -2.258920431137085, |
| "eval_logits/rejected": -1.9018107652664185, |
| "eval_logps/chosen": -86.94231414794922, |
| "eval_logps/rejected": -88.5583267211914, |
| "eval_loss": 0.01211754884570837, |
| "eval_rewards/accuracies": 0.9860334992408752, |
| "eval_rewards/chosen": 0.9441364407539368, |
| "eval_rewards/margins": 11.807838439941406, |
| "eval_rewards/rejected": -10.863702774047852, |
| "eval_runtime": 220.3515, |
| "eval_samples_per_second": 12.988, |
| "eval_steps_per_second": 0.812, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 2.785388127853881e-07, |
| "logits/chosen": -2.3034229278564453, |
| "logits/rejected": -1.9104959964752197, |
| "logps/chosen": -89.28177642822266, |
| "logps/rejected": -89.79473114013672, |
| "loss": 0.0112, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 1.2887121438980103, |
| "rewards/margins": 11.897204399108887, |
| "rewards/rejected": -10.608492851257324, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 2.831050228310502e-07, |
| "logits/chosen": -2.3329930305480957, |
| "logits/rejected": -1.9547516107559204, |
| "logps/chosen": -87.00670623779297, |
| "logps/rejected": -91.48796081542969, |
| "loss": 0.0116, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 1.2360261678695679, |
| "rewards/margins": 12.600028991699219, |
| "rewards/rejected": -11.364001274108887, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 2.876712328767123e-07, |
| "logits/chosen": -2.298924446105957, |
| "logits/rejected": -1.964695930480957, |
| "logps/chosen": -91.54945373535156, |
| "logps/rejected": -96.8219985961914, |
| "loss": 0.0097, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.5358260869979858, |
| "rewards/margins": 13.666036605834961, |
| "rewards/rejected": -12.130212783813477, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 2.922374429223744e-07, |
| "logits/chosen": -2.3074567317962646, |
| "logits/rejected": -1.9056323766708374, |
| "logps/chosen": -89.87752532958984, |
| "logps/rejected": -90.3302230834961, |
| "loss": 0.0129, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 0.6936411261558533, |
| "rewards/margins": 11.324541091918945, |
| "rewards/rejected": -10.630899429321289, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 2.968036529680365e-07, |
| "logits/chosen": -2.310959815979004, |
| "logits/rejected": -1.8844079971313477, |
| "logps/chosen": -89.85198974609375, |
| "logps/rejected": -88.82755279541016, |
| "loss": 0.0041, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.92197585105896, |
| "rewards/margins": 13.281471252441406, |
| "rewards/rejected": -11.359495162963867, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 2.998477929984779e-07, |
| "logits/chosen": -2.326324462890625, |
| "logits/rejected": -1.9201898574829102, |
| "logps/chosen": -86.74824523925781, |
| "logps/rejected": -93.16512298583984, |
| "loss": 0.0083, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.1350452899932861, |
| "rewards/margins": 13.680140495300293, |
| "rewards/rejected": -12.545095443725586, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 2.993404363267377e-07, |
| "logits/chosen": -2.307976484298706, |
| "logits/rejected": -1.9049756526947021, |
| "logps/chosen": -91.18403625488281, |
| "logps/rejected": -93.76152801513672, |
| "loss": 0.0057, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.3413939476013184, |
| "rewards/margins": 14.000112533569336, |
| "rewards/rejected": -12.658717155456543, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 2.9883307965499743e-07, |
| "logits/chosen": -2.2899010181427, |
| "logits/rejected": -1.9124386310577393, |
| "logps/chosen": -91.78596496582031, |
| "logps/rejected": -97.47313690185547, |
| "loss": 0.0082, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.643972635269165, |
| "rewards/margins": 14.497393608093262, |
| "rewards/rejected": -12.853422164916992, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 2.983257229832572e-07, |
| "logits/chosen": -2.278542995452881, |
| "logits/rejected": -1.8497650623321533, |
| "logps/chosen": -89.2038803100586, |
| "logps/rejected": -89.34355926513672, |
| "loss": 0.0084, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.009284496307373, |
| "rewards/margins": 14.792158126831055, |
| "rewards/rejected": -12.782875061035156, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 2.9781836631151696e-07, |
| "logits/chosen": -2.255171060562134, |
| "logits/rejected": -1.9044491052627563, |
| "logps/chosen": -89.71915435791016, |
| "logps/rejected": -97.62041473388672, |
| "loss": 0.006, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 1.1625474691390991, |
| "rewards/margins": 13.750628471374512, |
| "rewards/rejected": -12.588080406188965, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.32, |
| "eval_logits/chosen": -2.2662999629974365, |
| "eval_logits/rejected": -1.9091564416885376, |
| "eval_logps/chosen": -86.36480712890625, |
| "eval_logps/rejected": -93.07726287841797, |
| "eval_loss": 0.00890163704752922, |
| "eval_rewards/accuracies": 0.9888268113136292, |
| "eval_rewards/chosen": 1.2328906059265137, |
| "eval_rewards/margins": 14.356060981750488, |
| "eval_rewards/rejected": -13.123170852661133, |
| "eval_runtime": 201.1096, |
| "eval_samples_per_second": 14.231, |
| "eval_steps_per_second": 0.89, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 2.9731100963977676e-07, |
| "logits/chosen": -2.2573180198669434, |
| "logits/rejected": -1.8857762813568115, |
| "logps/chosen": -86.557861328125, |
| "logps/rejected": -94.78699493408203, |
| "loss": 0.0053, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.315459132194519, |
| "rewards/margins": 14.209829330444336, |
| "rewards/rejected": -12.894371032714844, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 2.968036529680365e-07, |
| "logits/chosen": -2.325437068939209, |
| "logits/rejected": -1.954026460647583, |
| "logps/chosen": -85.6775131225586, |
| "logps/rejected": -95.70713806152344, |
| "loss": 0.0078, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.7716138362884521, |
| "rewards/margins": 15.646145820617676, |
| "rewards/rejected": -13.874531745910645, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 2.962962962962963e-07, |
| "logits/chosen": -2.3093421459198, |
| "logits/rejected": -1.8865350484848022, |
| "logps/chosen": -86.63150787353516, |
| "logps/rejected": -93.17677307128906, |
| "loss": 0.0059, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.0186963081359863, |
| "rewards/margins": 15.363825798034668, |
| "rewards/rejected": -13.345129013061523, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 2.9578893962455603e-07, |
| "logits/chosen": -2.2457454204559326, |
| "logits/rejected": -1.8727748394012451, |
| "logps/chosen": -84.77641296386719, |
| "logps/rejected": -90.2676010131836, |
| "loss": 0.0081, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.4204827547073364, |
| "rewards/margins": 14.20788288116455, |
| "rewards/rejected": -12.787399291992188, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 2.952815829528158e-07, |
| "logits/chosen": -2.308354616165161, |
| "logits/rejected": -1.9623510837554932, |
| "logps/chosen": -83.89095306396484, |
| "logps/rejected": -93.6553955078125, |
| "loss": 0.0102, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.9429199695587158, |
| "rewards/margins": 14.841961860656738, |
| "rewards/rejected": -12.899042129516602, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 2.9477422628107556e-07, |
| "logits/chosen": -2.3661086559295654, |
| "logits/rejected": -1.9924736022949219, |
| "logps/chosen": -87.33646392822266, |
| "logps/rejected": -95.00645446777344, |
| "loss": 0.0145, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 2.3574576377868652, |
| "rewards/margins": 14.781530380249023, |
| "rewards/rejected": -12.424072265625, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 2.9426686960933536e-07, |
| "logits/chosen": -2.2825839519500732, |
| "logits/rejected": -1.8536045551300049, |
| "logps/chosen": -93.97390747070312, |
| "logps/rejected": -94.27307891845703, |
| "loss": 0.008, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 1.272422194480896, |
| "rewards/margins": 14.426435470581055, |
| "rewards/rejected": -13.154012680053711, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 2.937595129375951e-07, |
| "logits/chosen": -2.228724956512451, |
| "logits/rejected": -1.8590246438980103, |
| "logps/chosen": -86.09639739990234, |
| "logps/rejected": -92.32320404052734, |
| "loss": 0.0058, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 1.6447618007659912, |
| "rewards/margins": 14.473971366882324, |
| "rewards/rejected": -12.82921028137207, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 2.932521562658549e-07, |
| "logits/chosen": -2.298750400543213, |
| "logits/rejected": -1.9722936153411865, |
| "logps/chosen": -88.01515197753906, |
| "logps/rejected": -96.683349609375, |
| "loss": 0.0066, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 2.2021758556365967, |
| "rewards/margins": 15.400471687316895, |
| "rewards/rejected": -13.198295593261719, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 2.9274479959411463e-07, |
| "logits/chosen": -2.2984695434570312, |
| "logits/rejected": -1.9286425113677979, |
| "logps/chosen": -84.20467376708984, |
| "logps/rejected": -96.1876449584961, |
| "loss": 0.0068, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.15755558013916, |
| "rewards/margins": 15.103589057922363, |
| "rewards/rejected": -12.946032524108887, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.37, |
| "eval_logits/chosen": -2.2589728832244873, |
| "eval_logits/rejected": -1.9111113548278809, |
| "eval_logps/chosen": -85.02589416503906, |
| "eval_logps/rejected": -92.66677856445312, |
| "eval_loss": 0.00817781314253807, |
| "eval_rewards/accuracies": 0.9888268113136292, |
| "eval_rewards/chosen": 1.902347207069397, |
| "eval_rewards/margins": 14.820280075073242, |
| "eval_rewards/rejected": -12.917930603027344, |
| "eval_runtime": 235.1892, |
| "eval_samples_per_second": 12.169, |
| "eval_steps_per_second": 0.761, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 2.922374429223744e-07, |
| "logits/chosen": -2.246121883392334, |
| "logits/rejected": -1.878603219985962, |
| "logps/chosen": -84.80223083496094, |
| "logps/rejected": -96.83042907714844, |
| "loss": 0.0154, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.4696574211120605, |
| "rewards/margins": 15.41651439666748, |
| "rewards/rejected": -12.946856498718262, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 2.9173008625063416e-07, |
| "logits/chosen": -2.2706353664398193, |
| "logits/rejected": -1.885000467300415, |
| "logps/chosen": -88.46263122558594, |
| "logps/rejected": -95.60710144042969, |
| "loss": 0.0061, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 2.5691077709198, |
| "rewards/margins": 16.42782211303711, |
| "rewards/rejected": -13.858716011047363, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 2.9122272957889396e-07, |
| "logits/chosen": -2.2218751907348633, |
| "logits/rejected": -1.8037551641464233, |
| "logps/chosen": -89.38809204101562, |
| "logps/rejected": -99.0447006225586, |
| "loss": 0.0107, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 2.3626868724823, |
| "rewards/margins": 15.406193733215332, |
| "rewards/rejected": -13.04350757598877, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 2.907153729071537e-07, |
| "logits/chosen": -2.2497596740722656, |
| "logits/rejected": -1.7971522808074951, |
| "logps/chosen": -91.6142807006836, |
| "logps/rejected": -95.75736236572266, |
| "loss": 0.0077, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.9224580526351929, |
| "rewards/margins": 16.020370483398438, |
| "rewards/rejected": -14.09791088104248, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 2.902080162354135e-07, |
| "logits/chosen": -2.2165656089782715, |
| "logits/rejected": -1.902951955795288, |
| "logps/chosen": -79.86293029785156, |
| "logps/rejected": -97.59088134765625, |
| "loss": 0.0073, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.5969226360321045, |
| "rewards/margins": 15.788568496704102, |
| "rewards/rejected": -14.191644668579102, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 2.8970065956367323e-07, |
| "logits/chosen": -2.272202968597412, |
| "logits/rejected": -1.8873332738876343, |
| "logps/chosen": -87.25418853759766, |
| "logps/rejected": -99.46211242675781, |
| "loss": 0.0108, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.7722752094268799, |
| "rewards/margins": 16.73154640197754, |
| "rewards/rejected": -14.959269523620605, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 2.89193302891933e-07, |
| "logits/chosen": -2.255979061126709, |
| "logits/rejected": -1.9493907690048218, |
| "logps/chosen": -86.21080017089844, |
| "logps/rejected": -106.2105941772461, |
| "loss": 0.008, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 0.9701576232910156, |
| "rewards/margins": 18.300521850585938, |
| "rewards/rejected": -17.330366134643555, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 2.8868594622019276e-07, |
| "logits/chosen": -2.2402877807617188, |
| "logits/rejected": -1.801593542098999, |
| "logps/chosen": -91.86135864257812, |
| "logps/rejected": -99.11498260498047, |
| "loss": 0.0049, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 2.0486648082733154, |
| "rewards/margins": 17.628559112548828, |
| "rewards/rejected": -15.579893112182617, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 2.8817858954845256e-07, |
| "logits/chosen": -2.2116286754608154, |
| "logits/rejected": -1.8907750844955444, |
| "logps/chosen": -86.34685516357422, |
| "logps/rejected": -105.34354400634766, |
| "loss": 0.0034, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.269397735595703, |
| "rewards/margins": 18.360477447509766, |
| "rewards/rejected": -16.091083526611328, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 2.876712328767123e-07, |
| "logits/chosen": -2.2881391048431396, |
| "logits/rejected": -1.9034688472747803, |
| "logps/chosen": -85.5262222290039, |
| "logps/rejected": -103.30674743652344, |
| "loss": 0.0047, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.0433995723724365, |
| "rewards/margins": 19.068531036376953, |
| "rewards/rejected": -17.025129318237305, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.41, |
| "eval_logits/chosen": -2.2466113567352295, |
| "eval_logits/rejected": -1.893018364906311, |
| "eval_logps/chosen": -85.62364959716797, |
| "eval_logps/rejected": -100.12027740478516, |
| "eval_loss": 0.007255914621055126, |
| "eval_rewards/accuracies": 0.9888268113136292, |
| "eval_rewards/chosen": 1.6034660339355469, |
| "eval_rewards/margins": 18.248144149780273, |
| "eval_rewards/rejected": -16.644678115844727, |
| "eval_runtime": 217.5149, |
| "eval_samples_per_second": 13.158, |
| "eval_steps_per_second": 0.823, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 2.871638762049721e-07, |
| "logits/chosen": -2.2870774269104004, |
| "logits/rejected": -1.9350963830947876, |
| "logps/chosen": -92.18787384033203, |
| "logps/rejected": -107.0401611328125, |
| "loss": 0.0056, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.6550416946411133, |
| "rewards/margins": 18.991727828979492, |
| "rewards/rejected": -17.336687088012695, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 2.8665651953323183e-07, |
| "logits/chosen": -2.281367063522339, |
| "logits/rejected": -1.8545904159545898, |
| "logps/chosen": -85.82614135742188, |
| "logps/rejected": -100.07356262207031, |
| "loss": 0.0057, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.7310631275177, |
| "rewards/margins": 19.34614372253418, |
| "rewards/rejected": -16.615079879760742, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 2.861491628614916e-07, |
| "logits/chosen": -2.313906192779541, |
| "logits/rejected": -1.910033941268921, |
| "logps/chosen": -85.05974578857422, |
| "logps/rejected": -101.42959594726562, |
| "loss": 0.0071, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.475045919418335, |
| "rewards/margins": 17.93846893310547, |
| "rewards/rejected": -16.463422775268555, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 2.8564180618975136e-07, |
| "logits/chosen": -2.234340190887451, |
| "logits/rejected": -1.8709052801132202, |
| "logps/chosen": -90.11567687988281, |
| "logps/rejected": -107.35555267333984, |
| "loss": 0.005, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.179823398590088, |
| "rewards/margins": 19.71477699279785, |
| "rewards/rejected": -17.53495216369629, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 2.8513444951801116e-07, |
| "logits/chosen": -2.2462267875671387, |
| "logits/rejected": -1.8993927240371704, |
| "logps/chosen": -86.99429321289062, |
| "logps/rejected": -108.23291015625, |
| "loss": 0.0068, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.4878250360488892, |
| "rewards/margins": 20.112104415893555, |
| "rewards/rejected": -18.624279022216797, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 2.846270928462709e-07, |
| "logits/chosen": -2.196100950241089, |
| "logits/rejected": -1.8292429447174072, |
| "logps/chosen": -89.53839874267578, |
| "logps/rejected": -103.94474029541016, |
| "loss": 0.0019, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.670432686805725, |
| "rewards/margins": 18.502605438232422, |
| "rewards/rejected": -16.832172393798828, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 2.841197361745307e-07, |
| "logits/chosen": -2.222852945327759, |
| "logits/rejected": -1.827455759048462, |
| "logps/chosen": -87.63113403320312, |
| "logps/rejected": -101.9697265625, |
| "loss": 0.0086, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 1.7835509777069092, |
| "rewards/margins": 19.11504364013672, |
| "rewards/rejected": -17.331493377685547, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 2.8361237950279043e-07, |
| "logits/chosen": -2.178133726119995, |
| "logits/rejected": -1.8010485172271729, |
| "logps/chosen": -87.01272583007812, |
| "logps/rejected": -104.30177307128906, |
| "loss": 0.0123, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.2377567291259766, |
| "rewards/margins": 18.971233367919922, |
| "rewards/rejected": -16.733478546142578, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 2.831050228310502e-07, |
| "logits/chosen": -2.2163853645324707, |
| "logits/rejected": -1.8224786520004272, |
| "logps/chosen": -85.6602554321289, |
| "logps/rejected": -103.28083801269531, |
| "loss": 0.006, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.684990882873535, |
| "rewards/margins": 18.973764419555664, |
| "rewards/rejected": -16.288774490356445, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 2.8259766615930996e-07, |
| "logits/chosen": -2.2510056495666504, |
| "logits/rejected": -1.7727775573730469, |
| "logps/chosen": -87.45413970947266, |
| "logps/rejected": -97.86138916015625, |
| "loss": 0.0053, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.894382953643799, |
| "rewards/margins": 19.050537109375, |
| "rewards/rejected": -16.15615463256836, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.46, |
| "eval_logits/chosen": -2.2001523971557617, |
| "eval_logits/rejected": -1.8469951152801514, |
| "eval_logps/chosen": -84.38460540771484, |
| "eval_logps/rejected": -99.33098602294922, |
| "eval_loss": 0.0070889778435230255, |
| "eval_rewards/accuracies": 0.9916201233863831, |
| "eval_rewards/chosen": 2.222993850708008, |
| "eval_rewards/margins": 18.473024368286133, |
| "eval_rewards/rejected": -16.250030517578125, |
| "eval_runtime": 165.1867, |
| "eval_samples_per_second": 17.326, |
| "eval_steps_per_second": 1.084, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 2.8209030948756976e-07, |
| "logits/chosen": -2.2496349811553955, |
| "logits/rejected": -1.8998152017593384, |
| "logps/chosen": -84.53297424316406, |
| "logps/rejected": -103.37425231933594, |
| "loss": 0.005, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.839362621307373, |
| "rewards/margins": 18.76103401184082, |
| "rewards/rejected": -15.921670913696289, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 2.815829528158295e-07, |
| "logits/chosen": -2.176231861114502, |
| "logits/rejected": -1.815509557723999, |
| "logps/chosen": -83.98689270019531, |
| "logps/rejected": -102.61014556884766, |
| "loss": 0.0039, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.5853066444396973, |
| "rewards/margins": 19.177194595336914, |
| "rewards/rejected": -16.591888427734375, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 2.810755961440893e-07, |
| "logits/chosen": -2.2483859062194824, |
| "logits/rejected": -1.837993860244751, |
| "logps/chosen": -85.09949493408203, |
| "logps/rejected": -105.0072021484375, |
| "loss": 0.0045, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.028395175933838, |
| "rewards/margins": 19.074552536010742, |
| "rewards/rejected": -17.046157836914062, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 2.8056823947234903e-07, |
| "logits/chosen": -2.1420371532440186, |
| "logits/rejected": -1.7370542287826538, |
| "logps/chosen": -87.30947875976562, |
| "logps/rejected": -97.69217681884766, |
| "loss": 0.0078, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 3.3234381675720215, |
| "rewards/margins": 18.487682342529297, |
| "rewards/rejected": -15.16424560546875, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 2.800608828006088e-07, |
| "logits/chosen": -2.2500481605529785, |
| "logits/rejected": -1.8463836908340454, |
| "logps/chosen": -90.0582275390625, |
| "logps/rejected": -103.7062759399414, |
| "loss": 0.0053, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.3003077507019043, |
| "rewards/margins": 19.799549102783203, |
| "rewards/rejected": -17.49924087524414, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 2.7955352612886856e-07, |
| "logits/chosen": -2.208354949951172, |
| "logits/rejected": -1.8978168964385986, |
| "logps/chosen": -77.57411193847656, |
| "logps/rejected": -103.87590026855469, |
| "loss": 0.007, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 1.4448658227920532, |
| "rewards/margins": 20.587467193603516, |
| "rewards/rejected": -19.142602920532227, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 2.7904616945712836e-07, |
| "logits/chosen": -2.1659793853759766, |
| "logits/rejected": -1.7798793315887451, |
| "logps/chosen": -83.9134521484375, |
| "logps/rejected": -107.64317321777344, |
| "loss": 0.0048, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.1447739601135254, |
| "rewards/margins": 21.296142578125, |
| "rewards/rejected": -19.151369094848633, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 2.785388127853881e-07, |
| "logits/chosen": -2.2320501804351807, |
| "logits/rejected": -1.8187439441680908, |
| "logps/chosen": -88.16758728027344, |
| "logps/rejected": -105.4022445678711, |
| "loss": 0.0082, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.6898410320281982, |
| "rewards/margins": 21.423625946044922, |
| "rewards/rejected": -18.733787536621094, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 2.780314561136479e-07, |
| "logits/chosen": -2.150700092315674, |
| "logits/rejected": -1.8009631633758545, |
| "logps/chosen": -88.3268051147461, |
| "logps/rejected": -107.76560974121094, |
| "loss": 0.0057, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 1.4143996238708496, |
| "rewards/margins": 20.30838394165039, |
| "rewards/rejected": -18.893983840942383, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 2.7752409944190763e-07, |
| "logits/chosen": -2.2560830116271973, |
| "logits/rejected": -1.8415091037750244, |
| "logps/chosen": -88.06275939941406, |
| "logps/rejected": -104.66233825683594, |
| "loss": 0.0082, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.9098188877105713, |
| "rewards/margins": 20.39904022216797, |
| "rewards/rejected": -18.489221572875977, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.5, |
| "eval_logits/chosen": -2.208942413330078, |
| "eval_logits/rejected": -1.8563501834869385, |
| "eval_logps/chosen": -84.56623840332031, |
| "eval_logps/rejected": -100.85079956054688, |
| "eval_loss": 0.007256262004375458, |
| "eval_rewards/accuracies": 0.9916201233863831, |
| "eval_rewards/chosen": 2.132176637649536, |
| "eval_rewards/margins": 19.14211654663086, |
| "eval_rewards/rejected": -17.00994110107422, |
| "eval_runtime": 179.5596, |
| "eval_samples_per_second": 15.939, |
| "eval_steps_per_second": 0.997, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 2.770167427701674e-07, |
| "logits/chosen": -2.148176431655884, |
| "logits/rejected": -1.811261534690857, |
| "logps/chosen": -90.28803253173828, |
| "logps/rejected": -105.98939514160156, |
| "loss": 0.0037, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.97528874874115, |
| "rewards/margins": 18.622446060180664, |
| "rewards/rejected": -16.647159576416016, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 2.7650938609842716e-07, |
| "logits/chosen": -2.226710796356201, |
| "logits/rejected": -1.8027299642562866, |
| "logps/chosen": -88.32550048828125, |
| "logps/rejected": -98.92311096191406, |
| "loss": 0.0021, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.893244504928589, |
| "rewards/margins": 20.122953414916992, |
| "rewards/rejected": -17.22970962524414, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 2.7600202942668696e-07, |
| "logits/chosen": -2.259479284286499, |
| "logits/rejected": -1.845926284790039, |
| "logps/chosen": -90.93453216552734, |
| "logps/rejected": -101.05252075195312, |
| "loss": 0.0021, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.7073707580566406, |
| "rewards/margins": 19.56781578063965, |
| "rewards/rejected": -16.860445022583008, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 2.754946727549467e-07, |
| "logits/chosen": -2.253920793533325, |
| "logits/rejected": -1.9071108102798462, |
| "logps/chosen": -89.0732650756836, |
| "logps/rejected": -106.84078216552734, |
| "loss": 0.0072, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 2.451443672180176, |
| "rewards/margins": 19.57352638244629, |
| "rewards/rejected": -17.122081756591797, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 2.749873160832065e-07, |
| "logits/chosen": -2.217794418334961, |
| "logits/rejected": -1.8264172077178955, |
| "logps/chosen": -84.98257446289062, |
| "logps/rejected": -103.6314926147461, |
| "loss": 0.002, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.75927734375, |
| "rewards/margins": 20.883607864379883, |
| "rewards/rejected": -18.12432861328125, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 2.7447995941146623e-07, |
| "logits/chosen": -2.23350191116333, |
| "logits/rejected": -1.8721933364868164, |
| "logps/chosen": -88.2120361328125, |
| "logps/rejected": -107.16935729980469, |
| "loss": 0.0196, |
| "rewards/accuracies": 0.9624999761581421, |
| "rewards/chosen": 2.1735239028930664, |
| "rewards/margins": 19.927579879760742, |
| "rewards/rejected": -17.75405502319336, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 2.73972602739726e-07, |
| "logits/chosen": -2.191521167755127, |
| "logits/rejected": -1.8549985885620117, |
| "logps/chosen": -88.76947784423828, |
| "logps/rejected": -109.21342468261719, |
| "loss": 0.0061, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 2.1197288036346436, |
| "rewards/margins": 21.133193969726562, |
| "rewards/rejected": -19.013463973999023, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 2.7346524606798576e-07, |
| "logits/chosen": -2.223162889480591, |
| "logits/rejected": -1.8733783960342407, |
| "logps/chosen": -88.88787841796875, |
| "logps/rejected": -108.9013671875, |
| "loss": 0.0062, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.40854811668396, |
| "rewards/margins": 22.095478057861328, |
| "rewards/rejected": -19.686931610107422, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 2.7295788939624556e-07, |
| "logits/chosen": -2.197874069213867, |
| "logits/rejected": -1.7851364612579346, |
| "logps/chosen": -85.47915649414062, |
| "logps/rejected": -101.67639923095703, |
| "loss": 0.0049, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.8164796829223633, |
| "rewards/margins": 20.382543563842773, |
| "rewards/rejected": -17.566064834594727, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 2.724505327245053e-07, |
| "logits/chosen": -2.2712106704711914, |
| "logits/rejected": -1.9540023803710938, |
| "logps/chosen": -88.18670654296875, |
| "logps/rejected": -107.9418716430664, |
| "loss": 0.0243, |
| "rewards/accuracies": 0.9750000238418579, |
| "rewards/chosen": 1.7379881143569946, |
| "rewards/margins": 19.269012451171875, |
| "rewards/rejected": -17.531024932861328, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.55, |
| "eval_logits/chosen": -2.1799449920654297, |
| "eval_logits/rejected": -1.832864761352539, |
| "eval_logps/chosen": -83.93651580810547, |
| "eval_logps/rejected": -97.79573822021484, |
| "eval_loss": 0.007186249829828739, |
| "eval_rewards/accuracies": 0.9888268113136292, |
| "eval_rewards/chosen": 2.4470374584198, |
| "eval_rewards/margins": 17.92945098876953, |
| "eval_rewards/rejected": -15.482412338256836, |
| "eval_runtime": 229.0536, |
| "eval_samples_per_second": 12.495, |
| "eval_steps_per_second": 0.781, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 2.719431760527651e-07, |
| "logits/chosen": -2.1936728954315186, |
| "logits/rejected": -1.8306325674057007, |
| "logps/chosen": -84.01075744628906, |
| "logps/rejected": -96.85215759277344, |
| "loss": 0.0029, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.3379154205322266, |
| "rewards/margins": 16.724027633666992, |
| "rewards/rejected": -14.38611125946045, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 2.7143581938102483e-07, |
| "logits/chosen": -2.2218477725982666, |
| "logits/rejected": -1.848719835281372, |
| "logps/chosen": -81.31999969482422, |
| "logps/rejected": -99.01549530029297, |
| "loss": 0.0051, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.9159164428710938, |
| "rewards/margins": 18.738767623901367, |
| "rewards/rejected": -15.822851181030273, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 2.709284627092846e-07, |
| "logits/chosen": -2.1815645694732666, |
| "logits/rejected": -1.8792476654052734, |
| "logps/chosen": -81.18501281738281, |
| "logps/rejected": -99.95542907714844, |
| "loss": 0.0038, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.6159422397613525, |
| "rewards/margins": 18.44463348388672, |
| "rewards/rejected": -15.828694343566895, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 2.7042110603754436e-07, |
| "logits/chosen": -2.199174642562866, |
| "logits/rejected": -1.7883743047714233, |
| "logps/chosen": -86.32429504394531, |
| "logps/rejected": -99.67549133300781, |
| "loss": 0.0066, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 3.1772170066833496, |
| "rewards/margins": 19.528255462646484, |
| "rewards/rejected": -16.35103988647461, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 2.6991374936580416e-07, |
| "logits/chosen": -2.1487929821014404, |
| "logits/rejected": -1.789044737815857, |
| "logps/chosen": -85.71726989746094, |
| "logps/rejected": -104.7240219116211, |
| "loss": 0.0015, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.469191312789917, |
| "rewards/margins": 19.757835388183594, |
| "rewards/rejected": -17.28864288330078, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 2.694063926940639e-07, |
| "logits/chosen": -2.1753883361816406, |
| "logits/rejected": -1.798710823059082, |
| "logps/chosen": -87.18423461914062, |
| "logps/rejected": -101.5936279296875, |
| "loss": 0.0036, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.384445905685425, |
| "rewards/margins": 19.821514129638672, |
| "rewards/rejected": -17.43706512451172, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 2.688990360223237e-07, |
| "logits/chosen": -2.170189619064331, |
| "logits/rejected": -1.8727480173110962, |
| "logps/chosen": -84.32581329345703, |
| "logps/rejected": -109.3947982788086, |
| "loss": 0.008, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.941753625869751, |
| "rewards/margins": 20.492143630981445, |
| "rewards/rejected": -18.55038833618164, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 2.6839167935058343e-07, |
| "logits/chosen": -2.334543466567993, |
| "logits/rejected": -1.9388000965118408, |
| "logps/chosen": -83.55552673339844, |
| "logps/rejected": -104.6715316772461, |
| "loss": 0.0091, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 2.3667588233947754, |
| "rewards/margins": 21.2448787689209, |
| "rewards/rejected": -18.87812042236328, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 2.678843226788432e-07, |
| "logits/chosen": -2.1919655799865723, |
| "logits/rejected": -1.857973337173462, |
| "logps/chosen": -89.53315734863281, |
| "logps/rejected": -108.6443099975586, |
| "loss": 0.0026, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.031522274017334, |
| "rewards/margins": 21.71250343322754, |
| "rewards/rejected": -19.680980682373047, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 2.6737696600710296e-07, |
| "logits/chosen": -2.2731220722198486, |
| "logits/rejected": -1.894622802734375, |
| "logps/chosen": -80.3070297241211, |
| "logps/rejected": -106.09849548339844, |
| "loss": 0.0053, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 3.022993564605713, |
| "rewards/margins": 22.860183715820312, |
| "rewards/rejected": -19.837190628051758, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.59, |
| "eval_logits/chosen": -2.197847366333008, |
| "eval_logits/rejected": -1.8479573726654053, |
| "eval_logps/chosen": -85.01256561279297, |
| "eval_logps/rejected": -106.13009643554688, |
| "eval_loss": 0.007005217019468546, |
| "eval_rewards/accuracies": 0.994413435459137, |
| "eval_rewards/chosen": 1.9090105295181274, |
| "eval_rewards/margins": 21.558591842651367, |
| "eval_rewards/rejected": -19.64958381652832, |
| "eval_runtime": 244.0952, |
| "eval_samples_per_second": 11.725, |
| "eval_steps_per_second": 0.733, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 2.6686960933536276e-07, |
| "logits/chosen": -2.257427215576172, |
| "logits/rejected": -1.9010261297225952, |
| "logps/chosen": -85.78707885742188, |
| "logps/rejected": -110.6099853515625, |
| "loss": 0.0068, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.4400665760040283, |
| "rewards/margins": 22.056560516357422, |
| "rewards/rejected": -19.616491317749023, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 2.663622526636225e-07, |
| "logits/chosen": -2.1522250175476074, |
| "logits/rejected": -1.8013957738876343, |
| "logps/chosen": -85.6783447265625, |
| "logps/rejected": -111.1842269897461, |
| "loss": 0.0088, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 1.6612367630004883, |
| "rewards/margins": 22.052217483520508, |
| "rewards/rejected": -20.390979766845703, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 2.658548959918823e-07, |
| "logits/chosen": -2.207953691482544, |
| "logits/rejected": -1.8331537246704102, |
| "logps/chosen": -89.45965576171875, |
| "logps/rejected": -109.67543029785156, |
| "loss": 0.0043, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.7361319065093994, |
| "rewards/margins": 22.78788185119629, |
| "rewards/rejected": -20.051748275756836, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 2.6534753932014203e-07, |
| "logits/chosen": -2.213942050933838, |
| "logits/rejected": -1.8521515130996704, |
| "logps/chosen": -85.9124526977539, |
| "logps/rejected": -104.89479064941406, |
| "loss": 0.0096, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.157468557357788, |
| "rewards/margins": 21.968364715576172, |
| "rewards/rejected": -19.810897827148438, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 2.648401826484018e-07, |
| "logits/chosen": -2.2531790733337402, |
| "logits/rejected": -1.883050560951233, |
| "logps/chosen": -83.91947937011719, |
| "logps/rejected": -117.13653564453125, |
| "loss": 0.0034, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.926668405532837, |
| "rewards/margins": 24.423267364501953, |
| "rewards/rejected": -22.496599197387695, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 2.6433282597666156e-07, |
| "logits/chosen": -2.1308257579803467, |
| "logits/rejected": -1.7693058252334595, |
| "logps/chosen": -89.39836120605469, |
| "logps/rejected": -115.6990737915039, |
| "loss": 0.0013, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.7298908233642578, |
| "rewards/margins": 23.361858367919922, |
| "rewards/rejected": -21.631967544555664, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 2.6382546930492135e-07, |
| "logits/chosen": -2.2151737213134766, |
| "logits/rejected": -1.8560287952423096, |
| "logps/chosen": -87.30859375, |
| "logps/rejected": -111.68087005615234, |
| "loss": 0.0048, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.5496299266815186, |
| "rewards/margins": 22.752437591552734, |
| "rewards/rejected": -21.202808380126953, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 2.633181126331811e-07, |
| "logits/chosen": -2.2534372806549072, |
| "logits/rejected": -1.9297653436660767, |
| "logps/chosen": -83.9007568359375, |
| "logps/rejected": -111.0853500366211, |
| "loss": 0.0061, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 1.8405357599258423, |
| "rewards/margins": 22.654870986938477, |
| "rewards/rejected": -20.814334869384766, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 2.628107559614409e-07, |
| "logits/chosen": -2.269160747528076, |
| "logits/rejected": -1.8955166339874268, |
| "logps/chosen": -85.63101196289062, |
| "logps/rejected": -112.61944580078125, |
| "loss": 0.001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.2250313758850098, |
| "rewards/margins": 22.996959686279297, |
| "rewards/rejected": -20.771930694580078, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 2.6230339928970063e-07, |
| "logits/chosen": -2.2950210571289062, |
| "logits/rejected": -1.9462471008300781, |
| "logps/chosen": -88.02154541015625, |
| "logps/rejected": -110.0986557006836, |
| "loss": 0.0051, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.7983818054199219, |
| "rewards/margins": 22.378314971923828, |
| "rewards/rejected": -20.57993507385254, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.64, |
| "eval_logits/chosen": -2.209514856338501, |
| "eval_logits/rejected": -1.8633878231048584, |
| "eval_logps/chosen": -85.35096740722656, |
| "eval_logps/rejected": -108.35541534423828, |
| "eval_loss": 0.007250170689076185, |
| "eval_rewards/accuracies": 0.9916201233863831, |
| "eval_rewards/chosen": 1.7398098707199097, |
| "eval_rewards/margins": 22.50205421447754, |
| "eval_rewards/rejected": -20.762245178222656, |
| "eval_runtime": 186.4436, |
| "eval_samples_per_second": 15.35, |
| "eval_steps_per_second": 0.96, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 2.617960426179604e-07, |
| "logits/chosen": -2.224945306777954, |
| "logits/rejected": -1.8185851573944092, |
| "logps/chosen": -91.3012466430664, |
| "logps/rejected": -112.8142318725586, |
| "loss": 0.0035, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.3502469062805176, |
| "rewards/margins": 24.02829933166504, |
| "rewards/rejected": -21.678050994873047, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 2.6128868594622016e-07, |
| "logits/chosen": -2.163133144378662, |
| "logits/rejected": -1.816178560256958, |
| "logps/chosen": -85.406494140625, |
| "logps/rejected": -105.5617904663086, |
| "loss": 0.0044, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 1.699741005897522, |
| "rewards/margins": 21.171184539794922, |
| "rewards/rejected": -19.47144317626953, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 2.6078132927447995e-07, |
| "logits/chosen": -2.240720748901367, |
| "logits/rejected": -1.8999313116073608, |
| "logps/chosen": -86.69328308105469, |
| "logps/rejected": -114.80928802490234, |
| "loss": 0.0016, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.3369460105895996, |
| "rewards/margins": 24.97016143798828, |
| "rewards/rejected": -22.633216857910156, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 2.602739726027397e-07, |
| "logits/chosen": -2.2251267433166504, |
| "logits/rejected": -1.8492538928985596, |
| "logps/chosen": -85.41273498535156, |
| "logps/rejected": -113.72379302978516, |
| "loss": 0.0027, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.7388935089111328, |
| "rewards/margins": 24.013940811157227, |
| "rewards/rejected": -22.27504539489746, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 2.597666159309995e-07, |
| "logits/chosen": -2.196199655532837, |
| "logits/rejected": -1.8432044982910156, |
| "logps/chosen": -84.84178924560547, |
| "logps/rejected": -113.02108001708984, |
| "loss": 0.0143, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.910146713256836, |
| "rewards/margins": 22.436695098876953, |
| "rewards/rejected": -20.52655029296875, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 2.5925925925925923e-07, |
| "logits/chosen": -2.2136662006378174, |
| "logits/rejected": -1.8691514730453491, |
| "logps/chosen": -87.48178100585938, |
| "logps/rejected": -113.52275085449219, |
| "loss": 0.0046, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.627357840538025, |
| "rewards/margins": 21.706161499023438, |
| "rewards/rejected": -20.07880210876465, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 2.58751902587519e-07, |
| "logits/chosen": -2.1794159412384033, |
| "logits/rejected": -1.8281217813491821, |
| "logps/chosen": -91.26468658447266, |
| "logps/rejected": -111.00882720947266, |
| "loss": 0.0065, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.3432388305664062, |
| "rewards/margins": 22.417926788330078, |
| "rewards/rejected": -20.074687957763672, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 2.5824454591577876e-07, |
| "logits/chosen": -2.2314205169677734, |
| "logits/rejected": -1.8225589990615845, |
| "logps/chosen": -88.60221862792969, |
| "logps/rejected": -109.89363098144531, |
| "loss": 0.002, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.573441982269287, |
| "rewards/margins": 24.366378784179688, |
| "rewards/rejected": -21.79293441772461, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 2.5773718924403855e-07, |
| "logits/chosen": -2.1291966438293457, |
| "logits/rejected": -1.784570336341858, |
| "logps/chosen": -87.0352554321289, |
| "logps/rejected": -114.33548736572266, |
| "loss": 0.0029, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.9369707107543945, |
| "rewards/margins": 24.22989845275879, |
| "rewards/rejected": -22.292926788330078, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 2.572298325722983e-07, |
| "logits/chosen": -2.148015260696411, |
| "logits/rejected": -1.8161808252334595, |
| "logps/chosen": -87.71135711669922, |
| "logps/rejected": -112.76509094238281, |
| "loss": 0.0042, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.6630229949951172, |
| "rewards/margins": 23.89804458618164, |
| "rewards/rejected": -22.235023498535156, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.68, |
| "eval_logits/chosen": -2.1937787532806396, |
| "eval_logits/rejected": -1.8467158079147339, |
| "eval_logps/chosen": -85.40876007080078, |
| "eval_logps/rejected": -109.89164733886719, |
| "eval_loss": 0.007178114727139473, |
| "eval_rewards/accuracies": 0.9916201233863831, |
| "eval_rewards/chosen": 1.7109133005142212, |
| "eval_rewards/margins": 23.241273880004883, |
| "eval_rewards/rejected": -21.530363082885742, |
| "eval_runtime": 183.2701, |
| "eval_samples_per_second": 15.616, |
| "eval_steps_per_second": 0.977, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 2.567224759005581e-07, |
| "logits/chosen": -2.22023344039917, |
| "logits/rejected": -1.8317985534667969, |
| "logps/chosen": -81.38858795166016, |
| "logps/rejected": -102.5715560913086, |
| "loss": 0.0068, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 1.5601766109466553, |
| "rewards/margins": 20.727497100830078, |
| "rewards/rejected": -19.167320251464844, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 2.5621511922881783e-07, |
| "logits/chosen": -2.194122791290283, |
| "logits/rejected": -1.7103767395019531, |
| "logps/chosen": -86.33879089355469, |
| "logps/rejected": -105.571533203125, |
| "loss": 0.003, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.7159111499786377, |
| "rewards/margins": 23.524215698242188, |
| "rewards/rejected": -19.808303833007812, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 2.557077625570776e-07, |
| "logits/chosen": -2.1951489448547363, |
| "logits/rejected": -1.881566047668457, |
| "logps/chosen": -86.02349853515625, |
| "logps/rejected": -113.3117446899414, |
| "loss": 0.0061, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 1.7853310108184814, |
| "rewards/margins": 21.609209060668945, |
| "rewards/rejected": -19.823875427246094, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 2.5520040588533736e-07, |
| "logits/chosen": -2.233779191970825, |
| "logits/rejected": -1.821406602859497, |
| "logps/chosen": -89.24073028564453, |
| "logps/rejected": -110.7870101928711, |
| "loss": 0.0036, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.8013709783554077, |
| "rewards/margins": 23.02773666381836, |
| "rewards/rejected": -21.226367950439453, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 2.5469304921359715e-07, |
| "logits/chosen": -2.2160837650299072, |
| "logits/rejected": -1.8794729709625244, |
| "logps/chosen": -85.04659271240234, |
| "logps/rejected": -111.51092529296875, |
| "loss": 0.0047, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 2.755566358566284, |
| "rewards/margins": 22.784481048583984, |
| "rewards/rejected": -20.02891731262207, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 2.541856925418569e-07, |
| "logits/chosen": -2.1777358055114746, |
| "logits/rejected": -1.827588438987732, |
| "logps/chosen": -83.27925109863281, |
| "logps/rejected": -109.00111389160156, |
| "loss": 0.0064, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.335489511489868, |
| "rewards/margins": 22.536785125732422, |
| "rewards/rejected": -20.201297760009766, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 2.536783358701167e-07, |
| "logits/chosen": -2.227769374847412, |
| "logits/rejected": -1.8616615533828735, |
| "logps/chosen": -83.50282287597656, |
| "logps/rejected": -108.1055908203125, |
| "loss": 0.0029, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.6894235610961914, |
| "rewards/margins": 23.30007553100586, |
| "rewards/rejected": -20.610652923583984, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 2.5317097919837643e-07, |
| "logits/chosen": -2.2765088081359863, |
| "logits/rejected": -1.8756946325302124, |
| "logps/chosen": -86.42229461669922, |
| "logps/rejected": -113.13868713378906, |
| "loss": 0.0015, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.7841994762420654, |
| "rewards/margins": 23.93669319152832, |
| "rewards/rejected": -22.152494430541992, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 2.526636225266362e-07, |
| "logits/chosen": -2.2187094688415527, |
| "logits/rejected": -1.8117910623550415, |
| "logps/chosen": -90.1899185180664, |
| "logps/rejected": -115.62690734863281, |
| "loss": 0.0045, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.5328062176704407, |
| "rewards/margins": 23.20620346069336, |
| "rewards/rejected": -22.673397064208984, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 2.5215626585489596e-07, |
| "logits/chosen": -2.2092316150665283, |
| "logits/rejected": -1.886833906173706, |
| "logps/chosen": -83.89598083496094, |
| "logps/rejected": -113.24908447265625, |
| "loss": 0.0025, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.40551048517227173, |
| "rewards/margins": 22.66997528076172, |
| "rewards/rejected": -22.264461517333984, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.73, |
| "eval_logits/chosen": -2.203066110610962, |
| "eval_logits/rejected": -1.853004813194275, |
| "eval_logps/chosen": -87.39132690429688, |
| "eval_logps/rejected": -111.6284408569336, |
| "eval_loss": 0.006391549948602915, |
| "eval_rewards/accuracies": 0.9916201233863831, |
| "eval_rewards/chosen": 0.7196283340454102, |
| "eval_rewards/margins": 23.118391036987305, |
| "eval_rewards/rejected": -22.398759841918945, |
| "eval_runtime": 187.7477, |
| "eval_samples_per_second": 15.244, |
| "eval_steps_per_second": 0.953, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 2.5164890918315575e-07, |
| "logits/chosen": -2.2956223487854004, |
| "logits/rejected": -1.944941759109497, |
| "logps/chosen": -89.06498718261719, |
| "logps/rejected": -114.08534240722656, |
| "loss": 0.0015, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.5906304121017456, |
| "rewards/margins": 23.316919326782227, |
| "rewards/rejected": -22.726289749145508, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 2.511415525114155e-07, |
| "logits/chosen": -2.2368171215057373, |
| "logits/rejected": -1.9505856037139893, |
| "logps/chosen": -89.01737213134766, |
| "logps/rejected": -112.30778503417969, |
| "loss": 0.0038, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.34434378147125244, |
| "rewards/margins": 21.64006996154785, |
| "rewards/rejected": -21.295726776123047, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 2.506341958396753e-07, |
| "logits/chosen": -2.1280384063720703, |
| "logits/rejected": -1.7620325088500977, |
| "logps/chosen": -89.35337829589844, |
| "logps/rejected": -113.1550521850586, |
| "loss": 0.0077, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 0.42981046438217163, |
| "rewards/margins": 23.936683654785156, |
| "rewards/rejected": -23.506874084472656, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 2.5012683916793503e-07, |
| "logits/chosen": -2.142151355743408, |
| "logits/rejected": -1.7131553888320923, |
| "logps/chosen": -92.11378479003906, |
| "logps/rejected": -114.9017562866211, |
| "loss": 0.003, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.4623570442199707, |
| "rewards/margins": 24.9403133392334, |
| "rewards/rejected": -22.47795867919922, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 2.496194824961948e-07, |
| "logits/chosen": -2.2092690467834473, |
| "logits/rejected": -1.8049709796905518, |
| "logps/chosen": -87.24295806884766, |
| "logps/rejected": -112.48702239990234, |
| "loss": 0.0028, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.603916645050049, |
| "rewards/margins": 24.936174392700195, |
| "rewards/rejected": -22.332256317138672, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 2.4911212582445456e-07, |
| "logits/chosen": -2.184044599533081, |
| "logits/rejected": -1.7883659601211548, |
| "logps/chosen": -84.82064056396484, |
| "logps/rejected": -112.11551666259766, |
| "loss": 0.0025, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.598346710205078, |
| "rewards/margins": 24.60263442993164, |
| "rewards/rejected": -22.004289627075195, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 2.4860476915271435e-07, |
| "logits/chosen": -2.2834110260009766, |
| "logits/rejected": -1.8969223499298096, |
| "logps/chosen": -84.3614273071289, |
| "logps/rejected": -114.11153411865234, |
| "loss": 0.0108, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.1697537899017334, |
| "rewards/margins": 25.301849365234375, |
| "rewards/rejected": -23.132095336914062, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 2.480974124809741e-07, |
| "logits/chosen": -2.228867769241333, |
| "logits/rejected": -1.776439905166626, |
| "logps/chosen": -92.27311706542969, |
| "logps/rejected": -115.7362289428711, |
| "loss": 0.0031, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 2.5189244747161865, |
| "rewards/margins": 23.18109703063965, |
| "rewards/rejected": -20.662174224853516, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 2.475900558092339e-07, |
| "logits/chosen": -2.2656030654907227, |
| "logits/rejected": -1.8951988220214844, |
| "logps/chosen": -87.36007690429688, |
| "logps/rejected": -105.27301025390625, |
| "loss": 0.0035, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.5274977684020996, |
| "rewards/margins": 22.20186424255371, |
| "rewards/rejected": -19.674365997314453, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 2.4708269913749363e-07, |
| "logits/chosen": -2.2209548950195312, |
| "logits/rejected": -1.8660656213760376, |
| "logps/chosen": -84.46993255615234, |
| "logps/rejected": -111.65545654296875, |
| "loss": 0.0034, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 1.7445751428604126, |
| "rewards/margins": 22.078369140625, |
| "rewards/rejected": -20.333797454833984, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.78, |
| "eval_logits/chosen": -2.1876187324523926, |
| "eval_logits/rejected": -1.8381489515304565, |
| "eval_logps/chosen": -85.6910171508789, |
| "eval_logps/rejected": -108.63225555419922, |
| "eval_loss": 0.0056876870803534985, |
| "eval_rewards/accuracies": 0.9972066879272461, |
| "eval_rewards/chosen": 1.5697858333587646, |
| "eval_rewards/margins": 22.470449447631836, |
| "eval_rewards/rejected": -20.900663375854492, |
| "eval_runtime": 262.3348, |
| "eval_samples_per_second": 10.91, |
| "eval_steps_per_second": 0.682, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 2.465753424657534e-07, |
| "logits/chosen": -2.1472651958465576, |
| "logits/rejected": -1.737198829650879, |
| "logps/chosen": -85.45567321777344, |
| "logps/rejected": -110.42171478271484, |
| "loss": 0.0016, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 2.159968614578247, |
| "rewards/margins": 23.5798397064209, |
| "rewards/rejected": -21.419872283935547, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 2.4606798579401316e-07, |
| "logits/chosen": -2.1188554763793945, |
| "logits/rejected": -1.82101571559906, |
| "logps/chosen": -85.29725646972656, |
| "logps/rejected": -111.76287841796875, |
| "loss": 0.0033, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 1.8330132961273193, |
| "rewards/margins": 21.782207489013672, |
| "rewards/rejected": -19.949195861816406, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 2.4556062912227295e-07, |
| "logits/chosen": -2.1919398307800293, |
| "logits/rejected": -1.777989149093628, |
| "logps/chosen": -89.14543151855469, |
| "logps/rejected": -109.812744140625, |
| "loss": 0.004, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 2.5910837650299072, |
| "rewards/margins": 22.191823959350586, |
| "rewards/rejected": -19.60074234008789, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 2.450532724505327e-07, |
| "logits/chosen": -2.2214889526367188, |
| "logits/rejected": -1.8154666423797607, |
| "logps/chosen": -84.61921691894531, |
| "logps/rejected": -109.73350524902344, |
| "loss": 0.0047, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 2.7251133918762207, |
| "rewards/margins": 23.308391571044922, |
| "rewards/rejected": -20.58327865600586, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 2.445459157787925e-07, |
| "logits/chosen": -2.266470432281494, |
| "logits/rejected": -1.869768500328064, |
| "logps/chosen": -90.2508316040039, |
| "logps/rejected": -113.48309326171875, |
| "loss": 0.0054, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.311676502227783, |
| "rewards/margins": 23.228078842163086, |
| "rewards/rejected": -20.916400909423828, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 2.4403855910705223e-07, |
| "logits/chosen": -2.214456081390381, |
| "logits/rejected": -1.8925609588623047, |
| "logps/chosen": -82.62592315673828, |
| "logps/rejected": -108.47261047363281, |
| "loss": 0.0042, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.60259211063385, |
| "rewards/margins": 21.090885162353516, |
| "rewards/rejected": -19.488292694091797, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 2.43531202435312e-07, |
| "logits/chosen": -2.18231463432312, |
| "logits/rejected": -1.7262542247772217, |
| "logps/chosen": -94.32820892333984, |
| "logps/rejected": -114.97127532958984, |
| "loss": 0.0025, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.17089581489563, |
| "rewards/margins": 24.209266662597656, |
| "rewards/rejected": -22.038372039794922, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 2.4302384576357176e-07, |
| "logits/chosen": -2.221064329147339, |
| "logits/rejected": -1.8696062564849854, |
| "logps/chosen": -90.65652465820312, |
| "logps/rejected": -123.33599853515625, |
| "loss": 0.0012, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 1.8242883682250977, |
| "rewards/margins": 25.92755126953125, |
| "rewards/rejected": -24.103261947631836, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 2.4251648909183155e-07, |
| "logits/chosen": -2.2197823524475098, |
| "logits/rejected": -1.8690903186798096, |
| "logps/chosen": -87.0594482421875, |
| "logps/rejected": -113.86370849609375, |
| "loss": 0.0062, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 3.1697230339050293, |
| "rewards/margins": 24.501766204833984, |
| "rewards/rejected": -21.332040786743164, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 2.420091324200913e-07, |
| "logits/chosen": -2.231548309326172, |
| "logits/rejected": -1.9493480920791626, |
| "logps/chosen": -86.16682434082031, |
| "logps/rejected": -116.98976135253906, |
| "loss": 0.0046, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.1170125007629395, |
| "rewards/margins": 23.978233337402344, |
| "rewards/rejected": -22.861225128173828, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.82, |
| "eval_logits/chosen": -2.1833367347717285, |
| "eval_logits/rejected": -1.8318486213684082, |
| "eval_logps/chosen": -85.54547119140625, |
| "eval_logps/rejected": -111.36595153808594, |
| "eval_loss": 0.005670672748237848, |
| "eval_rewards/accuracies": 0.994413435459137, |
| "eval_rewards/chosen": 1.6425559520721436, |
| "eval_rewards/margins": 23.910072326660156, |
| "eval_rewards/rejected": -22.267513275146484, |
| "eval_runtime": 194.2178, |
| "eval_samples_per_second": 14.736, |
| "eval_steps_per_second": 0.922, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 2.415017757483511e-07, |
| "logits/chosen": -2.2005953788757324, |
| "logits/rejected": -1.7761281728744507, |
| "logps/chosen": -94.50230407714844, |
| "logps/rejected": -119.65816497802734, |
| "loss": 0.0013, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.642756700515747, |
| "rewards/margins": 27.622684478759766, |
| "rewards/rejected": -24.979928970336914, |
| "step": 1810 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 2.409944190766108e-07, |
| "logits/chosen": -2.2361111640930176, |
| "logits/rejected": -1.850996732711792, |
| "logps/chosen": -85.87068176269531, |
| "logps/rejected": -119.11030578613281, |
| "loss": 0.0047, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.5628911852836609, |
| "rewards/margins": 25.55295181274414, |
| "rewards/rejected": -24.99005699157715, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 2.404870624048706e-07, |
| "logits/chosen": -2.179274559020996, |
| "logits/rejected": -1.809653878211975, |
| "logps/chosen": -83.79485321044922, |
| "logps/rejected": -107.97098541259766, |
| "loss": 0.0061, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 1.3543555736541748, |
| "rewards/margins": 23.6602840423584, |
| "rewards/rejected": -22.305927276611328, |
| "step": 1830 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 2.3997970573313036e-07, |
| "logits/chosen": -2.2063956260681152, |
| "logits/rejected": -1.8312809467315674, |
| "logps/chosen": -82.93327331542969, |
| "logps/rejected": -108.3280029296875, |
| "loss": 0.0022, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 2.580742835998535, |
| "rewards/margins": 24.954925537109375, |
| "rewards/rejected": -22.374181747436523, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 2.3947234906139015e-07, |
| "logits/chosen": -2.2583651542663574, |
| "logits/rejected": -1.8435817956924438, |
| "logps/chosen": -87.89311981201172, |
| "logps/rejected": -114.3637466430664, |
| "loss": 0.0036, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.6345696449279785, |
| "rewards/margins": 26.21219825744629, |
| "rewards/rejected": -23.577627182006836, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 2.389649923896499e-07, |
| "logits/chosen": -2.1602165699005127, |
| "logits/rejected": -1.8217239379882812, |
| "logps/chosen": -86.65802001953125, |
| "logps/rejected": -116.1138916015625, |
| "loss": 0.005, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.0264811515808105, |
| "rewards/margins": 24.80233383178711, |
| "rewards/rejected": -21.77585220336914, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 2.384576357179097e-07, |
| "logits/chosen": -2.1380181312561035, |
| "logits/rejected": -1.7056655883789062, |
| "logps/chosen": -90.73863220214844, |
| "logps/rejected": -117.328125, |
| "loss": 0.0046, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.132702350616455, |
| "rewards/margins": 26.685169219970703, |
| "rewards/rejected": -23.552465438842773, |
| "step": 1870 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 2.3795027904616943e-07, |
| "logits/chosen": -2.2756571769714355, |
| "logits/rejected": -1.9350929260253906, |
| "logps/chosen": -90.4540786743164, |
| "logps/rejected": -118.94252014160156, |
| "loss": 0.0034, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.2916797399520874, |
| "rewards/margins": 23.93198013305664, |
| "rewards/rejected": -22.640300750732422, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 2.374429223744292e-07, |
| "logits/chosen": -2.2293753623962402, |
| "logits/rejected": -1.8584178686141968, |
| "logps/chosen": -83.38288879394531, |
| "logps/rejected": -109.228759765625, |
| "loss": 0.0038, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.3209164142608643, |
| "rewards/margins": 24.548112869262695, |
| "rewards/rejected": -22.227197647094727, |
| "step": 1890 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 2.3693556570268896e-07, |
| "logits/chosen": -2.28892183303833, |
| "logits/rejected": -1.9423027038574219, |
| "logps/chosen": -86.03301239013672, |
| "logps/rejected": -118.62422180175781, |
| "loss": 0.0038, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.0244522094726562, |
| "rewards/margins": 25.199527740478516, |
| "rewards/rejected": -23.17507553100586, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.87, |
| "eval_logits/chosen": -2.2072012424468994, |
| "eval_logits/rejected": -1.8581523895263672, |
| "eval_logps/chosen": -85.27046203613281, |
| "eval_logps/rejected": -113.4885025024414, |
| "eval_loss": 0.006078703328967094, |
| "eval_rewards/accuracies": 0.9916201233863831, |
| "eval_rewards/chosen": 1.780059814453125, |
| "eval_rewards/margins": 25.108850479125977, |
| "eval_rewards/rejected": -23.32879066467285, |
| "eval_runtime": 195.9406, |
| "eval_samples_per_second": 14.606, |
| "eval_steps_per_second": 0.914, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 2.3642820903094873e-07, |
| "logits/chosen": -2.218658208847046, |
| "logits/rejected": -1.8969409465789795, |
| "logps/chosen": -82.16979217529297, |
| "logps/rejected": -116.5958023071289, |
| "loss": 0.0043, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 2.3920204639434814, |
| "rewards/margins": 26.6087646484375, |
| "rewards/rejected": -24.21674156188965, |
| "step": 1910 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 2.359208523592085e-07, |
| "logits/chosen": -2.2358078956604004, |
| "logits/rejected": -1.8991063833236694, |
| "logps/chosen": -85.44503021240234, |
| "logps/rejected": -115.95997619628906, |
| "loss": 0.0071, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 1.7518513202667236, |
| "rewards/margins": 25.580867767333984, |
| "rewards/rejected": -23.829017639160156, |
| "step": 1920 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 2.3541349568746826e-07, |
| "logits/chosen": -2.23246169090271, |
| "logits/rejected": -1.8101627826690674, |
| "logps/chosen": -88.6487045288086, |
| "logps/rejected": -115.31546783447266, |
| "loss": 0.0026, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.0985748767852783, |
| "rewards/margins": 25.528507232666016, |
| "rewards/rejected": -23.429927825927734, |
| "step": 1930 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 2.3490613901572803e-07, |
| "logits/chosen": -2.1937997341156006, |
| "logits/rejected": -1.899291753768921, |
| "logps/chosen": -82.33692169189453, |
| "logps/rejected": -116.04981994628906, |
| "loss": 0.0035, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.9832813739776611, |
| "rewards/margins": 25.707172393798828, |
| "rewards/rejected": -23.72389030456543, |
| "step": 1940 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 2.343987823439878e-07, |
| "logits/chosen": -2.19626522064209, |
| "logits/rejected": -1.8097482919692993, |
| "logps/chosen": -81.79765319824219, |
| "logps/rejected": -115.43753814697266, |
| "loss": 0.0026, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.6907103061676025, |
| "rewards/margins": 26.663219451904297, |
| "rewards/rejected": -23.972511291503906, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 2.3389142567224756e-07, |
| "logits/chosen": -2.2777841091156006, |
| "logits/rejected": -1.932579755783081, |
| "logps/chosen": -93.16586303710938, |
| "logps/rejected": -125.8462905883789, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.5280853509902954, |
| "rewards/margins": 27.09100914001465, |
| "rewards/rejected": -25.562923431396484, |
| "step": 1960 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 2.3338406900050733e-07, |
| "logits/chosen": -2.2580790519714355, |
| "logits/rejected": -1.8621156215667725, |
| "logps/chosen": -85.24269104003906, |
| "logps/rejected": -114.29624938964844, |
| "loss": 0.0043, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.0481958389282227, |
| "rewards/margins": 28.0736141204834, |
| "rewards/rejected": -25.025419235229492, |
| "step": 1970 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 2.328767123287671e-07, |
| "logits/chosen": -2.237919569015503, |
| "logits/rejected": -1.9384901523590088, |
| "logps/chosen": -85.32179260253906, |
| "logps/rejected": -126.17274475097656, |
| "loss": 0.0019, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.1654307842254639, |
| "rewards/margins": 29.019460678100586, |
| "rewards/rejected": -27.854028701782227, |
| "step": 1980 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 2.3236935565702686e-07, |
| "logits/chosen": -2.2402701377868652, |
| "logits/rejected": -1.9076135158538818, |
| "logps/chosen": -84.37796783447266, |
| "logps/rejected": -119.68265533447266, |
| "loss": 0.0035, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.9258636236190796, |
| "rewards/margins": 26.976810455322266, |
| "rewards/rejected": -25.050945281982422, |
| "step": 1990 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 2.3186199898528663e-07, |
| "logits/chosen": -2.2411704063415527, |
| "logits/rejected": -1.8752963542938232, |
| "logps/chosen": -85.16765594482422, |
| "logps/rejected": -116.25230407714844, |
| "loss": 0.0074, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.9921166896820068, |
| "rewards/margins": 26.783111572265625, |
| "rewards/rejected": -24.790996551513672, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.91, |
| "eval_logits/chosen": -2.1969516277313232, |
| "eval_logits/rejected": -1.849937081336975, |
| "eval_logps/chosen": -86.06404113769531, |
| "eval_logps/rejected": -117.32756042480469, |
| "eval_loss": 0.006207953207194805, |
| "eval_rewards/accuracies": 0.9916201233863831, |
| "eval_rewards/chosen": 1.3832741975784302, |
| "eval_rewards/margins": 26.631595611572266, |
| "eval_rewards/rejected": -25.24832534790039, |
| "eval_runtime": 285.1562, |
| "eval_samples_per_second": 10.037, |
| "eval_steps_per_second": 0.628, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 2.313546423135464e-07, |
| "logits/chosen": -2.1540346145629883, |
| "logits/rejected": -1.7665355205535889, |
| "logps/chosen": -90.4669189453125, |
| "logps/rejected": -113.56608581542969, |
| "loss": 0.0323, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.6254875659942627, |
| "rewards/margins": 24.596233367919922, |
| "rewards/rejected": -21.970745086669922, |
| "step": 2010 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 2.3084728564180616e-07, |
| "logits/chosen": -2.095485210418701, |
| "logits/rejected": -1.8191229104995728, |
| "logps/chosen": -82.52064514160156, |
| "logps/rejected": -114.91255187988281, |
| "loss": 0.0007, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.7258262634277344, |
| "rewards/margins": 23.092130661010742, |
| "rewards/rejected": -20.366304397583008, |
| "step": 2020 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 2.3033992897006593e-07, |
| "logits/chosen": -2.269178867340088, |
| "logits/rejected": -1.8472541570663452, |
| "logps/chosen": -85.8851318359375, |
| "logps/rejected": -109.74867248535156, |
| "loss": 0.0059, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.9320855140686035, |
| "rewards/margins": 24.42289161682129, |
| "rewards/rejected": -21.49080467224121, |
| "step": 2030 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 2.298325722983257e-07, |
| "logits/chosen": -2.1464996337890625, |
| "logits/rejected": -1.7602676153182983, |
| "logps/chosen": -87.7806625366211, |
| "logps/rejected": -114.57737731933594, |
| "loss": 0.0039, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.2257797718048096, |
| "rewards/margins": 24.386262893676758, |
| "rewards/rejected": -22.160480499267578, |
| "step": 2040 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 2.2932521562658546e-07, |
| "logits/chosen": -2.1523966789245605, |
| "logits/rejected": -1.8064305782318115, |
| "logps/chosen": -86.0357666015625, |
| "logps/rejected": -112.4377212524414, |
| "loss": 0.01, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.2664999961853027, |
| "rewards/margins": 24.120498657226562, |
| "rewards/rejected": -21.853994369506836, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 2.2881785895484523e-07, |
| "logits/chosen": -2.209660291671753, |
| "logits/rejected": -1.8102290630340576, |
| "logps/chosen": -87.49055480957031, |
| "logps/rejected": -110.42215728759766, |
| "loss": 0.0084, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.9767696857452393, |
| "rewards/margins": 23.30925941467285, |
| "rewards/rejected": -20.33249282836914, |
| "step": 2060 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 2.28310502283105e-07, |
| "logits/chosen": -2.1463191509246826, |
| "logits/rejected": -1.8173093795776367, |
| "logps/chosen": -80.68872833251953, |
| "logps/rejected": -112.73249816894531, |
| "loss": 0.0017, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.864393711090088, |
| "rewards/margins": 23.488155364990234, |
| "rewards/rejected": -20.623760223388672, |
| "step": 2070 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 2.2780314561136476e-07, |
| "logits/chosen": -2.2494287490844727, |
| "logits/rejected": -1.8683185577392578, |
| "logps/chosen": -90.07569885253906, |
| "logps/rejected": -109.84877014160156, |
| "loss": 0.0027, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 2.9630918502807617, |
| "rewards/margins": 24.1021728515625, |
| "rewards/rejected": -21.139080047607422, |
| "step": 2080 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 2.2729578893962453e-07, |
| "logits/chosen": -2.2044570446014404, |
| "logits/rejected": -1.8658645153045654, |
| "logps/chosen": -87.5743179321289, |
| "logps/rejected": -112.40694427490234, |
| "loss": 0.0024, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.718559741973877, |
| "rewards/margins": 23.71183967590332, |
| "rewards/rejected": -20.9932804107666, |
| "step": 2090 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 2.267884322678843e-07, |
| "logits/chosen": -2.25122332572937, |
| "logits/rejected": -1.8896070718765259, |
| "logps/chosen": -85.34817504882812, |
| "logps/rejected": -111.40423583984375, |
| "loss": 0.0052, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.9988149404525757, |
| "rewards/margins": 24.812755584716797, |
| "rewards/rejected": -22.813940048217773, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.96, |
| "eval_logits/chosen": -2.1680314540863037, |
| "eval_logits/rejected": -1.8186105489730835, |
| "eval_logps/chosen": -85.12017822265625, |
| "eval_logps/rejected": -110.73912048339844, |
| "eval_loss": 0.005591261200606823, |
| "eval_rewards/accuracies": 0.994413435459137, |
| "eval_rewards/chosen": 1.8552027940750122, |
| "eval_rewards/margins": 23.80930519104004, |
| "eval_rewards/rejected": -21.954099655151367, |
| "eval_runtime": 178.0339, |
| "eval_samples_per_second": 16.076, |
| "eval_steps_per_second": 1.005, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 2.2628107559614406e-07, |
| "logits/chosen": -2.182180643081665, |
| "logits/rejected": -1.887372612953186, |
| "logps/chosen": -84.8567123413086, |
| "logps/rejected": -115.8074722290039, |
| "loss": 0.0061, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.302744150161743, |
| "rewards/margins": 24.386987686157227, |
| "rewards/rejected": -22.084239959716797, |
| "step": 2110 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 2.2577371892440383e-07, |
| "logits/chosen": -2.1381964683532715, |
| "logits/rejected": -1.7454639673233032, |
| "logps/chosen": -89.93260192871094, |
| "logps/rejected": -116.71270751953125, |
| "loss": 0.0026, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.476393938064575, |
| "rewards/margins": 26.676769256591797, |
| "rewards/rejected": -24.200376510620117, |
| "step": 2120 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 2.252663622526636e-07, |
| "logits/chosen": -2.1717324256896973, |
| "logits/rejected": -1.7591243982315063, |
| "logps/chosen": -92.45228576660156, |
| "logps/rejected": -119.2745361328125, |
| "loss": 0.0051, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.9234495162963867, |
| "rewards/margins": 26.036029815673828, |
| "rewards/rejected": -24.112579345703125, |
| "step": 2130 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 2.2475900558092336e-07, |
| "logits/chosen": -2.2273201942443848, |
| "logits/rejected": -1.8839191198349, |
| "logps/chosen": -82.78079223632812, |
| "logps/rejected": -117.9035415649414, |
| "loss": 0.002, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.3843512535095215, |
| "rewards/margins": 26.67987632751465, |
| "rewards/rejected": -24.2955265045166, |
| "step": 2140 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 2.2425164890918313e-07, |
| "logits/chosen": -2.1497018337249756, |
| "logits/rejected": -1.7351865768432617, |
| "logps/chosen": -91.68416595458984, |
| "logps/rejected": -117.03997802734375, |
| "loss": 0.004, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.7206225395202637, |
| "rewards/margins": 25.490280151367188, |
| "rewards/rejected": -22.7696590423584, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 2.237442922374429e-07, |
| "logits/chosen": -2.105646848678589, |
| "logits/rejected": -1.7680670022964478, |
| "logps/chosen": -83.17054748535156, |
| "logps/rejected": -110.3686294555664, |
| "loss": 0.003, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 1.5340194702148438, |
| "rewards/margins": 23.920883178710938, |
| "rewards/rejected": -22.386865615844727, |
| "step": 2160 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 2.2323693556570266e-07, |
| "logits/chosen": -2.165457248687744, |
| "logits/rejected": -1.7284084558486938, |
| "logps/chosen": -87.86439514160156, |
| "logps/rejected": -118.21602630615234, |
| "loss": 0.0034, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.8017003536224365, |
| "rewards/margins": 25.351261138916016, |
| "rewards/rejected": -22.549560546875, |
| "step": 2170 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 2.2272957889396242e-07, |
| "logits/chosen": -2.1325504779815674, |
| "logits/rejected": -1.746701955795288, |
| "logps/chosen": -86.5936279296875, |
| "logps/rejected": -111.45501708984375, |
| "loss": 0.0061, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.3284072875976562, |
| "rewards/margins": 23.549602508544922, |
| "rewards/rejected": -21.221195220947266, |
| "step": 2180 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 2.222222222222222e-07, |
| "logits/chosen": -2.1646571159362793, |
| "logits/rejected": -1.8008124828338623, |
| "logps/chosen": -91.38953399658203, |
| "logps/rejected": -111.65739440917969, |
| "loss": 0.0011, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.5468361377716064, |
| "rewards/margins": 23.903175354003906, |
| "rewards/rejected": -21.356340408325195, |
| "step": 2190 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 2.2171486555048196e-07, |
| "logits/chosen": -2.1863300800323486, |
| "logits/rejected": -1.8556190729141235, |
| "logps/chosen": -88.28529357910156, |
| "logps/rejected": -117.9306869506836, |
| "loss": 0.0023, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 2.728421926498413, |
| "rewards/margins": 25.30516242980957, |
| "rewards/rejected": -22.57674217224121, |
| "step": 2200 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_logits/chosen": -2.1533539295196533, |
| "eval_logits/rejected": -1.8051024675369263, |
| "eval_logps/chosen": -85.23373413085938, |
| "eval_logps/rejected": -109.83948516845703, |
| "eval_loss": 0.005602886434644461, |
| "eval_rewards/accuracies": 0.994413435459137, |
| "eval_rewards/chosen": 1.7984265089035034, |
| "eval_rewards/margins": 23.30270767211914, |
| "eval_rewards/rejected": -21.50428009033203, |
| "eval_runtime": 199.8494, |
| "eval_samples_per_second": 14.321, |
| "eval_steps_per_second": 0.896, |
| "step": 2200 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 2.2120750887874172e-07, |
| "logits/chosen": -2.184231996536255, |
| "logits/rejected": -1.8242343664169312, |
| "logps/chosen": -87.5803451538086, |
| "logps/rejected": -116.75703430175781, |
| "loss": 0.0047, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.086021900177002, |
| "rewards/margins": 24.399898529052734, |
| "rewards/rejected": -21.31387710571289, |
| "step": 2210 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 2.207001522070015e-07, |
| "logits/chosen": -2.141301393508911, |
| "logits/rejected": -1.7759917974472046, |
| "logps/chosen": -91.86133575439453, |
| "logps/rejected": -116.03465270996094, |
| "loss": 0.0027, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.5139381885528564, |
| "rewards/margins": 24.87776756286621, |
| "rewards/rejected": -22.363828659057617, |
| "step": 2220 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 2.2019279553526126e-07, |
| "logits/chosen": -2.147770643234253, |
| "logits/rejected": -1.6709057092666626, |
| "logps/chosen": -95.53349304199219, |
| "logps/rejected": -112.99432373046875, |
| "loss": 0.0045, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.351959705352783, |
| "rewards/margins": 23.853574752807617, |
| "rewards/rejected": -21.501617431640625, |
| "step": 2230 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 2.1968543886352102e-07, |
| "logits/chosen": -2.23237943649292, |
| "logits/rejected": -1.9411084651947021, |
| "logps/chosen": -82.67511749267578, |
| "logps/rejected": -113.44953918457031, |
| "loss": 0.0034, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.2467036247253418, |
| "rewards/margins": 23.66016960144043, |
| "rewards/rejected": -22.413467407226562, |
| "step": 2240 |
| }, |
| { |
| "epoch": 1.03, |
| "learning_rate": 2.191780821917808e-07, |
| "logits/chosen": -2.1674644947052, |
| "logits/rejected": -1.8577144145965576, |
| "logps/chosen": -84.78733825683594, |
| "logps/rejected": -115.30723571777344, |
| "loss": 0.0019, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.1725082397460938, |
| "rewards/margins": 24.452693939208984, |
| "rewards/rejected": -22.280183792114258, |
| "step": 2250 |
| }, |
| { |
| "epoch": 1.03, |
| "learning_rate": 2.1867072552004056e-07, |
| "logits/chosen": -2.288512706756592, |
| "logits/rejected": -1.899009108543396, |
| "logps/chosen": -87.27893829345703, |
| "logps/rejected": -116.2738265991211, |
| "loss": 0.0033, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 2.6636362075805664, |
| "rewards/margins": 25.80401039123535, |
| "rewards/rejected": -23.1403751373291, |
| "step": 2260 |
| }, |
| { |
| "epoch": 1.04, |
| "learning_rate": 2.1816336884830032e-07, |
| "logits/chosen": -2.1499955654144287, |
| "logits/rejected": -1.848623514175415, |
| "logps/chosen": -81.82324981689453, |
| "logps/rejected": -115.3056411743164, |
| "loss": 0.0012, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.2145495414733887, |
| "rewards/margins": 25.514049530029297, |
| "rewards/rejected": -23.299501419067383, |
| "step": 2270 |
| }, |
| { |
| "epoch": 1.04, |
| "learning_rate": 2.176560121765601e-07, |
| "logits/chosen": -2.2483465671539307, |
| "logits/rejected": -1.9416240453720093, |
| "logps/chosen": -87.2221908569336, |
| "logps/rejected": -114.78794860839844, |
| "loss": 0.0068, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.0889229774475098, |
| "rewards/margins": 24.115562438964844, |
| "rewards/rejected": -22.026639938354492, |
| "step": 2280 |
| }, |
| { |
| "epoch": 1.05, |
| "learning_rate": 2.1714865550481986e-07, |
| "logits/chosen": -2.1781678199768066, |
| "logits/rejected": -1.788074254989624, |
| "logps/chosen": -85.21916961669922, |
| "logps/rejected": -115.5194091796875, |
| "loss": 0.0056, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.176715850830078, |
| "rewards/margins": 25.536880493164062, |
| "rewards/rejected": -23.360164642333984, |
| "step": 2290 |
| }, |
| { |
| "epoch": 1.05, |
| "learning_rate": 2.1664129883307962e-07, |
| "logits/chosen": -2.1438374519348145, |
| "logits/rejected": -1.81368887424469, |
| "logps/chosen": -84.81913757324219, |
| "logps/rejected": -114.127685546875, |
| "loss": 0.0024, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.9444429874420166, |
| "rewards/margins": 24.719940185546875, |
| "rewards/rejected": -22.775497436523438, |
| "step": 2300 |
| }, |
| { |
| "epoch": 1.05, |
| "eval_logits/chosen": -2.1615262031555176, |
| "eval_logits/rejected": -1.812113881111145, |
| "eval_logps/chosen": -85.03972625732422, |
| "eval_logps/rejected": -110.49690246582031, |
| "eval_loss": 0.005681305192410946, |
| "eval_rewards/accuracies": 0.9916201233863831, |
| "eval_rewards/chosen": 1.8954312801361084, |
| "eval_rewards/margins": 23.728422164916992, |
| "eval_rewards/rejected": -21.832990646362305, |
| "eval_runtime": 213.6661, |
| "eval_samples_per_second": 13.395, |
| "eval_steps_per_second": 0.838, |
| "step": 2300 |
| }, |
| { |
| "epoch": 1.05, |
| "learning_rate": 2.161339421613394e-07, |
| "logits/chosen": -2.1927437782287598, |
| "logits/rejected": -1.8735787868499756, |
| "logps/chosen": -85.47551727294922, |
| "logps/rejected": -118.7635269165039, |
| "loss": 0.0014, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.551397681236267, |
| "rewards/margins": 23.236629486083984, |
| "rewards/rejected": -21.685232162475586, |
| "step": 2310 |
| }, |
| { |
| "epoch": 1.06, |
| "learning_rate": 2.1562658548959916e-07, |
| "logits/chosen": -2.259660482406616, |
| "logits/rejected": -1.8416106700897217, |
| "logps/chosen": -88.98851013183594, |
| "logps/rejected": -113.52168273925781, |
| "loss": 0.0056, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.8511302471160889, |
| "rewards/margins": 26.2247257232666, |
| "rewards/rejected": -24.37359619140625, |
| "step": 2320 |
| }, |
| { |
| "epoch": 1.06, |
| "learning_rate": 2.1511922881785892e-07, |
| "logits/chosen": -2.151369571685791, |
| "logits/rejected": -1.7967971563339233, |
| "logps/chosen": -86.92037200927734, |
| "logps/rejected": -110.92555236816406, |
| "loss": 0.0023, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.9498761892318726, |
| "rewards/margins": 22.68193244934082, |
| "rewards/rejected": -20.7320556640625, |
| "step": 2330 |
| }, |
| { |
| "epoch": 1.07, |
| "learning_rate": 2.146118721461187e-07, |
| "logits/chosen": -2.1439459323883057, |
| "logits/rejected": -1.8120663166046143, |
| "logps/chosen": -85.99787902832031, |
| "logps/rejected": -114.73786926269531, |
| "loss": 0.0012, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.070859432220459, |
| "rewards/margins": 24.2840518951416, |
| "rewards/rejected": -21.213193893432617, |
| "step": 2340 |
| }, |
| { |
| "epoch": 1.07, |
| "learning_rate": 2.1410451547437846e-07, |
| "logits/chosen": -2.2162632942199707, |
| "logits/rejected": -1.8168309926986694, |
| "logps/chosen": -86.9716796875, |
| "logps/rejected": -110.72579193115234, |
| "loss": 0.0048, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.628699541091919, |
| "rewards/margins": 23.844585418701172, |
| "rewards/rejected": -21.215885162353516, |
| "step": 2350 |
| }, |
| { |
| "epoch": 1.08, |
| "learning_rate": 2.1359715880263822e-07, |
| "logits/chosen": -2.174852132797241, |
| "logits/rejected": -1.8075027465820312, |
| "logps/chosen": -83.8674087524414, |
| "logps/rejected": -112.02839660644531, |
| "loss": 0.0035, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 2.375577926635742, |
| "rewards/margins": 24.027584075927734, |
| "rewards/rejected": -21.65200424194336, |
| "step": 2360 |
| }, |
| { |
| "epoch": 1.08, |
| "learning_rate": 2.13089802130898e-07, |
| "logits/chosen": -2.167109251022339, |
| "logits/rejected": -1.8096405267715454, |
| "logps/chosen": -84.7580337524414, |
| "logps/rejected": -117.7773208618164, |
| "loss": 0.0031, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.1797847747802734, |
| "rewards/margins": 25.387409210205078, |
| "rewards/rejected": -23.207622528076172, |
| "step": 2370 |
| }, |
| { |
| "epoch": 1.09, |
| "learning_rate": 2.1258244545915776e-07, |
| "logits/chosen": -2.141667127609253, |
| "logits/rejected": -1.7658653259277344, |
| "logps/chosen": -83.34464263916016, |
| "logps/rejected": -108.3573989868164, |
| "loss": 0.0023, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.8713493347167969, |
| "rewards/margins": 23.396373748779297, |
| "rewards/rejected": -21.525026321411133, |
| "step": 2380 |
| }, |
| { |
| "epoch": 1.09, |
| "learning_rate": 2.1207508878741752e-07, |
| "logits/chosen": -2.1764111518859863, |
| "logits/rejected": -1.8196337223052979, |
| "logps/chosen": -89.48072814941406, |
| "logps/rejected": -112.5357437133789, |
| "loss": 0.0044, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.0725154876708984, |
| "rewards/margins": 23.079078674316406, |
| "rewards/rejected": -22.006563186645508, |
| "step": 2390 |
| }, |
| { |
| "epoch": 1.1, |
| "learning_rate": 2.115677321156773e-07, |
| "logits/chosen": -2.1829419136047363, |
| "logits/rejected": -1.830394983291626, |
| "logps/chosen": -85.89895629882812, |
| "logps/rejected": -116.04026794433594, |
| "loss": 0.0056, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 1.8246675729751587, |
| "rewards/margins": 24.702465057373047, |
| "rewards/rejected": -22.877796173095703, |
| "step": 2400 |
| }, |
| { |
| "epoch": 1.1, |
| "eval_logits/chosen": -2.16304874420166, |
| "eval_logits/rejected": -1.8152433633804321, |
| "eval_logps/chosen": -86.18814086914062, |
| "eval_logps/rejected": -112.2934341430664, |
| "eval_loss": 0.005279215984046459, |
| "eval_rewards/accuracies": 0.994413435459137, |
| "eval_rewards/chosen": 1.3212203979492188, |
| "eval_rewards/margins": 24.052478790283203, |
| "eval_rewards/rejected": -22.731260299682617, |
| "eval_runtime": 219.653, |
| "eval_samples_per_second": 13.03, |
| "eval_steps_per_second": 0.815, |
| "step": 2400 |
| }, |
| { |
| "epoch": 1.1, |
| "learning_rate": 2.1106037544393706e-07, |
| "logits/chosen": -2.186826705932617, |
| "logits/rejected": -1.8132612705230713, |
| "logps/chosen": -87.9347152709961, |
| "logps/rejected": -116.61759185791016, |
| "loss": 0.0057, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.319627285003662, |
| "rewards/margins": 25.145320892333984, |
| "rewards/rejected": -22.82569122314453, |
| "step": 2410 |
| }, |
| { |
| "epoch": 1.1, |
| "learning_rate": 2.1055301877219682e-07, |
| "logits/chosen": -2.2042155265808105, |
| "logits/rejected": -1.7962379455566406, |
| "logps/chosen": -89.61426544189453, |
| "logps/rejected": -119.98515319824219, |
| "loss": 0.0011, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.1328256130218506, |
| "rewards/margins": 25.84686279296875, |
| "rewards/rejected": -23.714035034179688, |
| "step": 2420 |
| }, |
| { |
| "epoch": 1.11, |
| "learning_rate": 2.100456621004566e-07, |
| "logits/chosen": -2.12410831451416, |
| "logits/rejected": -1.81709885597229, |
| "logps/chosen": -83.92530822753906, |
| "logps/rejected": -115.90692138671875, |
| "loss": 0.004, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.454441785812378, |
| "rewards/margins": 25.90082359313965, |
| "rewards/rejected": -23.44638442993164, |
| "step": 2430 |
| }, |
| { |
| "epoch": 1.11, |
| "learning_rate": 2.0953830542871636e-07, |
| "logits/chosen": -2.24179744720459, |
| "logits/rejected": -1.9268741607666016, |
| "logps/chosen": -83.57841491699219, |
| "logps/rejected": -115.2148666381836, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.5743858814239502, |
| "rewards/margins": 24.534379959106445, |
| "rewards/rejected": -22.95999526977539, |
| "step": 2440 |
| }, |
| { |
| "epoch": 1.12, |
| "learning_rate": 2.0903094875697612e-07, |
| "logits/chosen": -2.1769909858703613, |
| "logits/rejected": -1.7283875942230225, |
| "logps/chosen": -94.74465942382812, |
| "logps/rejected": -120.26570129394531, |
| "loss": 0.0032, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.9194726943969727, |
| "rewards/margins": 25.821313858032227, |
| "rewards/rejected": -23.901838302612305, |
| "step": 2450 |
| }, |
| { |
| "epoch": 1.12, |
| "learning_rate": 2.085235920852359e-07, |
| "logits/chosen": -2.1689000129699707, |
| "logits/rejected": -1.8665683269500732, |
| "logps/chosen": -85.41389465332031, |
| "logps/rejected": -120.8945083618164, |
| "loss": 0.0037, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.879907250404358, |
| "rewards/margins": 26.468700408935547, |
| "rewards/rejected": -24.588794708251953, |
| "step": 2460 |
| }, |
| { |
| "epoch": 1.13, |
| "learning_rate": 2.0801623541349566e-07, |
| "logits/chosen": -2.243333101272583, |
| "logits/rejected": -1.8705765008926392, |
| "logps/chosen": -81.45762634277344, |
| "logps/rejected": -111.65384674072266, |
| "loss": 0.0009, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.552882432937622, |
| "rewards/margins": 25.76004981994629, |
| "rewards/rejected": -23.207164764404297, |
| "step": 2470 |
| }, |
| { |
| "epoch": 1.13, |
| "learning_rate": 2.0750887874175542e-07, |
| "logits/chosen": -2.171480655670166, |
| "logits/rejected": -1.8349215984344482, |
| "logps/chosen": -86.48908996582031, |
| "logps/rejected": -120.96435546875, |
| "loss": 0.0012, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.6669495105743408, |
| "rewards/margins": 25.430204391479492, |
| "rewards/rejected": -23.763256072998047, |
| "step": 2480 |
| }, |
| { |
| "epoch": 1.14, |
| "learning_rate": 2.070015220700152e-07, |
| "logits/chosen": -2.1459603309631348, |
| "logits/rejected": -1.7423560619354248, |
| "logps/chosen": -87.96595764160156, |
| "logps/rejected": -120.45890045166016, |
| "loss": 0.0013, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.2795779705047607, |
| "rewards/margins": 26.35489273071289, |
| "rewards/rejected": -24.0753116607666, |
| "step": 2490 |
| }, |
| { |
| "epoch": 1.14, |
| "learning_rate": 2.0649416539827496e-07, |
| "logits/chosen": -2.1519787311553955, |
| "logits/rejected": -1.7902311086654663, |
| "logps/chosen": -85.25035095214844, |
| "logps/rejected": -112.2745590209961, |
| "loss": 0.009, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 1.7297900915145874, |
| "rewards/margins": 22.654048919677734, |
| "rewards/rejected": -20.92425537109375, |
| "step": 2500 |
| }, |
| { |
| "epoch": 1.14, |
| "eval_logits/chosen": -2.1591501235961914, |
| "eval_logits/rejected": -1.8124133348464966, |
| "eval_logps/chosen": -84.112548828125, |
| "eval_logps/rejected": -110.20504760742188, |
| "eval_loss": 0.005225938744843006, |
| "eval_rewards/accuracies": 0.994413435459137, |
| "eval_rewards/chosen": 2.3590192794799805, |
| "eval_rewards/margins": 24.04608726501465, |
| "eval_rewards/rejected": -21.68706703186035, |
| "eval_runtime": 233.6775, |
| "eval_samples_per_second": 12.248, |
| "eval_steps_per_second": 0.766, |
| "step": 2500 |
| }, |
| { |
| "epoch": 1.15, |
| "learning_rate": 2.0598680872653472e-07, |
| "logits/chosen": -2.1743123531341553, |
| "logits/rejected": -1.7647409439086914, |
| "logps/chosen": -90.07218933105469, |
| "logps/rejected": -115.3643798828125, |
| "loss": 0.0037, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 2.752506732940674, |
| "rewards/margins": 24.845409393310547, |
| "rewards/rejected": -22.092905044555664, |
| "step": 2510 |
| }, |
| { |
| "epoch": 1.15, |
| "learning_rate": 2.054794520547945e-07, |
| "logits/chosen": -2.0834057331085205, |
| "logits/rejected": -1.7100751399993896, |
| "logps/chosen": -85.79044342041016, |
| "logps/rejected": -113.79996490478516, |
| "loss": 0.0046, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.7096903324127197, |
| "rewards/margins": 25.91143798828125, |
| "rewards/rejected": -23.20174789428711, |
| "step": 2520 |
| }, |
| { |
| "epoch": 1.15, |
| "learning_rate": 2.0497209538305426e-07, |
| "logits/chosen": -2.2217605113983154, |
| "logits/rejected": -1.871063470840454, |
| "logps/chosen": -87.78010559082031, |
| "logps/rejected": -115.4132080078125, |
| "loss": 0.0035, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.101217746734619, |
| "rewards/margins": 25.03754234313965, |
| "rewards/rejected": -22.93632698059082, |
| "step": 2530 |
| }, |
| { |
| "epoch": 1.16, |
| "learning_rate": 2.0446473871131402e-07, |
| "logits/chosen": -2.188868761062622, |
| "logits/rejected": -1.7949635982513428, |
| "logps/chosen": -90.8096923828125, |
| "logps/rejected": -122.7888412475586, |
| "loss": 0.0017, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.599236488342285, |
| "rewards/margins": 25.132827758789062, |
| "rewards/rejected": -22.533588409423828, |
| "step": 2540 |
| }, |
| { |
| "epoch": 1.16, |
| "learning_rate": 2.039573820395738e-07, |
| "logits/chosen": -2.1523804664611816, |
| "logits/rejected": -1.8026365041732788, |
| "logps/chosen": -79.64842224121094, |
| "logps/rejected": -115.60630798339844, |
| "loss": 0.0013, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.1584062576293945, |
| "rewards/margins": 25.834789276123047, |
| "rewards/rejected": -23.67638397216797, |
| "step": 2550 |
| }, |
| { |
| "epoch": 1.17, |
| "learning_rate": 2.0345002536783356e-07, |
| "logits/chosen": -2.191920757293701, |
| "logits/rejected": -1.7900508642196655, |
| "logps/chosen": -81.1897964477539, |
| "logps/rejected": -110.9433822631836, |
| "loss": 0.0012, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.1984918117523193, |
| "rewards/margins": 25.696517944335938, |
| "rewards/rejected": -22.498027801513672, |
| "step": 2560 |
| }, |
| { |
| "epoch": 1.17, |
| "learning_rate": 2.0294266869609332e-07, |
| "logits/chosen": -2.240447998046875, |
| "logits/rejected": -1.842795729637146, |
| "logps/chosen": -89.96078491210938, |
| "logps/rejected": -115.45732116699219, |
| "loss": 0.0045, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.5964362621307373, |
| "rewards/margins": 26.703838348388672, |
| "rewards/rejected": -24.107402801513672, |
| "step": 2570 |
| }, |
| { |
| "epoch": 1.18, |
| "learning_rate": 2.024353120243531e-07, |
| "logits/chosen": -2.155785083770752, |
| "logits/rejected": -1.7801926136016846, |
| "logps/chosen": -79.60318756103516, |
| "logps/rejected": -111.69859313964844, |
| "loss": 0.0034, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.1407876014709473, |
| "rewards/margins": 24.803207397460938, |
| "rewards/rejected": -21.66242027282715, |
| "step": 2580 |
| }, |
| { |
| "epoch": 1.18, |
| "learning_rate": 2.0192795535261286e-07, |
| "logits/chosen": -2.186657428741455, |
| "logits/rejected": -1.8247960805892944, |
| "logps/chosen": -83.01274108886719, |
| "logps/rejected": -115.82133483886719, |
| "loss": 0.0055, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.050610065460205, |
| "rewards/margins": 26.237497329711914, |
| "rewards/rejected": -23.1868839263916, |
| "step": 2590 |
| }, |
| { |
| "epoch": 1.19, |
| "learning_rate": 2.0142059868087262e-07, |
| "logits/chosen": -2.184058666229248, |
| "logits/rejected": -1.8208153247833252, |
| "logps/chosen": -87.23689270019531, |
| "logps/rejected": -112.4271240234375, |
| "loss": 0.0023, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.2483184337615967, |
| "rewards/margins": 25.097911834716797, |
| "rewards/rejected": -21.849592208862305, |
| "step": 2600 |
| }, |
| { |
| "epoch": 1.19, |
| "eval_logits/chosen": -2.1579010486602783, |
| "eval_logits/rejected": -1.8120908737182617, |
| "eval_logps/chosen": -83.75383758544922, |
| "eval_logps/rejected": -112.24433898925781, |
| "eval_loss": 0.005189881194382906, |
| "eval_rewards/accuracies": 0.9916201233863831, |
| "eval_rewards/chosen": 2.5383784770965576, |
| "eval_rewards/margins": 25.245080947875977, |
| "eval_rewards/rejected": -22.706703186035156, |
| "eval_runtime": 299.5228, |
| "eval_samples_per_second": 9.555, |
| "eval_steps_per_second": 0.598, |
| "step": 2600 |
| }, |
| { |
| "epoch": 1.19, |
| "learning_rate": 2.009132420091324e-07, |
| "logits/chosen": -2.0997519493103027, |
| "logits/rejected": -1.7868757247924805, |
| "logps/chosen": -81.20166015625, |
| "logps/rejected": -114.8741455078125, |
| "loss": 0.0065, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.8495497703552246, |
| "rewards/margins": 26.784374237060547, |
| "rewards/rejected": -23.934823989868164, |
| "step": 2610 |
| }, |
| { |
| "epoch": 1.2, |
| "learning_rate": 2.0040588533739216e-07, |
| "logits/chosen": -2.1891965866088867, |
| "logits/rejected": -1.8037872314453125, |
| "logps/chosen": -86.70457458496094, |
| "logps/rejected": -116.2646255493164, |
| "loss": 0.0023, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 2.284919261932373, |
| "rewards/margins": 26.434612274169922, |
| "rewards/rejected": -24.149694442749023, |
| "step": 2620 |
| }, |
| { |
| "epoch": 1.2, |
| "learning_rate": 1.9989852866565192e-07, |
| "logits/chosen": -2.1447997093200684, |
| "logits/rejected": -1.8104356527328491, |
| "logps/chosen": -85.48689270019531, |
| "logps/rejected": -120.234375, |
| "loss": 0.0048, |
| "rewards/accuracies": 0.9750000238418579, |
| "rewards/chosen": 1.3036551475524902, |
| "rewards/margins": 25.647720336914062, |
| "rewards/rejected": -24.344066619873047, |
| "step": 2630 |
| }, |
| { |
| "epoch": 1.21, |
| "learning_rate": 1.993911719939117e-07, |
| "logits/chosen": -2.1070830821990967, |
| "logits/rejected": -1.7089662551879883, |
| "logps/chosen": -88.43952178955078, |
| "logps/rejected": -112.84476470947266, |
| "loss": 0.0039, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.554791212081909, |
| "rewards/margins": 25.463603973388672, |
| "rewards/rejected": -22.9088134765625, |
| "step": 2640 |
| }, |
| { |
| "epoch": 1.21, |
| "learning_rate": 1.9888381532217146e-07, |
| "logits/chosen": -2.1640028953552246, |
| "logits/rejected": -1.803934097290039, |
| "logps/chosen": -87.4181900024414, |
| "logps/rejected": -119.70159912109375, |
| "loss": 0.0037, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.3634817600250244, |
| "rewards/margins": 25.766544342041016, |
| "rewards/rejected": -24.403064727783203, |
| "step": 2650 |
| }, |
| { |
| "epoch": 1.21, |
| "learning_rate": 1.9837645865043122e-07, |
| "logits/chosen": -2.202446460723877, |
| "logits/rejected": -1.8282486200332642, |
| "logps/chosen": -87.19379425048828, |
| "logps/rejected": -118.42472839355469, |
| "loss": 0.0024, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 1.8503637313842773, |
| "rewards/margins": 26.071752548217773, |
| "rewards/rejected": -24.22138786315918, |
| "step": 2660 |
| }, |
| { |
| "epoch": 1.22, |
| "learning_rate": 1.97869101978691e-07, |
| "logits/chosen": -2.149972915649414, |
| "logits/rejected": -1.789910912513733, |
| "logps/chosen": -87.65351867675781, |
| "logps/rejected": -117.45466613769531, |
| "loss": 0.0012, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.0328145027160645, |
| "rewards/margins": 25.302623748779297, |
| "rewards/rejected": -23.269811630249023, |
| "step": 2670 |
| }, |
| { |
| "epoch": 1.22, |
| "learning_rate": 1.9736174530695076e-07, |
| "logits/chosen": -2.2164740562438965, |
| "logits/rejected": -1.8319326639175415, |
| "logps/chosen": -87.3599853515625, |
| "logps/rejected": -120.3600845336914, |
| "loss": 0.0042, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.874768853187561, |
| "rewards/margins": 27.10894775390625, |
| "rewards/rejected": -25.234180450439453, |
| "step": 2680 |
| }, |
| { |
| "epoch": 1.23, |
| "learning_rate": 1.9685438863521052e-07, |
| "logits/chosen": -2.1731603145599365, |
| "logits/rejected": -1.781818151473999, |
| "logps/chosen": -86.36930847167969, |
| "logps/rejected": -117.47142028808594, |
| "loss": 0.0023, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.4858036041259766, |
| "rewards/margins": 26.315841674804688, |
| "rewards/rejected": -23.83003807067871, |
| "step": 2690 |
| }, |
| { |
| "epoch": 1.23, |
| "learning_rate": 1.963470319634703e-07, |
| "logits/chosen": -2.2734062671661377, |
| "logits/rejected": -1.908062219619751, |
| "logps/chosen": -84.78250122070312, |
| "logps/rejected": -117.51930236816406, |
| "loss": 0.0044, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.3912415504455566, |
| "rewards/margins": 27.765466690063477, |
| "rewards/rejected": -25.37422752380371, |
| "step": 2700 |
| }, |
| { |
| "epoch": 1.23, |
| "eval_logits/chosen": -2.178615093231201, |
| "eval_logits/rejected": -1.8322229385375977, |
| "eval_logps/chosen": -86.60983276367188, |
| "eval_logps/rejected": -116.65242767333984, |
| "eval_loss": 0.005201002117246389, |
| "eval_rewards/accuracies": 0.9916201233863831, |
| "eval_rewards/chosen": 1.1103774309158325, |
| "eval_rewards/margins": 26.021133422851562, |
| "eval_rewards/rejected": -24.910757064819336, |
| "eval_runtime": 204.9916, |
| "eval_samples_per_second": 13.962, |
| "eval_steps_per_second": 0.873, |
| "step": 2700 |
| }, |
| { |
| "epoch": 1.24, |
| "learning_rate": 1.9583967529173006e-07, |
| "logits/chosen": -2.1784110069274902, |
| "logits/rejected": -1.8004591464996338, |
| "logps/chosen": -90.504638671875, |
| "logps/rejected": -120.81787109375, |
| "loss": 0.0014, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.6989917755126953, |
| "rewards/margins": 27.467670440673828, |
| "rewards/rejected": -25.7686767578125, |
| "step": 2710 |
| }, |
| { |
| "epoch": 1.24, |
| "learning_rate": 1.9533231861998982e-07, |
| "logits/chosen": -2.2853636741638184, |
| "logits/rejected": -1.9385350942611694, |
| "logps/chosen": -88.9593734741211, |
| "logps/rejected": -126.08199310302734, |
| "loss": 0.0015, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.512266993522644, |
| "rewards/margins": 27.1735782623291, |
| "rewards/rejected": -25.661312103271484, |
| "step": 2720 |
| }, |
| { |
| "epoch": 1.25, |
| "learning_rate": 1.948249619482496e-07, |
| "logits/chosen": -2.0956151485443115, |
| "logits/rejected": -1.7385085821151733, |
| "logps/chosen": -88.5088882446289, |
| "logps/rejected": -119.0416488647461, |
| "loss": 0.0057, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.1305296421051025, |
| "rewards/margins": 26.505001068115234, |
| "rewards/rejected": -25.374475479125977, |
| "step": 2730 |
| }, |
| { |
| "epoch": 1.25, |
| "learning_rate": 1.9431760527650936e-07, |
| "logits/chosen": -2.2182435989379883, |
| "logits/rejected": -1.8027465343475342, |
| "logps/chosen": -87.2740707397461, |
| "logps/rejected": -126.13343811035156, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.500243663787842, |
| "rewards/margins": 29.380783081054688, |
| "rewards/rejected": -26.880542755126953, |
| "step": 2740 |
| }, |
| { |
| "epoch": 1.26, |
| "learning_rate": 1.9381024860476912e-07, |
| "logits/chosen": -2.2416915893554688, |
| "logits/rejected": -1.851488471031189, |
| "logps/chosen": -92.11241149902344, |
| "logps/rejected": -120.4324722290039, |
| "loss": 0.0055, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.0522854328155518, |
| "rewards/margins": 27.805065155029297, |
| "rewards/rejected": -25.75278091430664, |
| "step": 2750 |
| }, |
| { |
| "epoch": 1.26, |
| "learning_rate": 1.933028919330289e-07, |
| "logits/chosen": -2.1997532844543457, |
| "logits/rejected": -1.8702919483184814, |
| "logps/chosen": -87.5173568725586, |
| "logps/rejected": -121.913818359375, |
| "loss": 0.0044, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.8851993680000305, |
| "rewards/margins": 27.617206573486328, |
| "rewards/rejected": -26.732006072998047, |
| "step": 2760 |
| }, |
| { |
| "epoch": 1.26, |
| "learning_rate": 1.9279553526128866e-07, |
| "logits/chosen": -2.212709426879883, |
| "logits/rejected": -1.8357082605361938, |
| "logps/chosen": -85.00953674316406, |
| "logps/rejected": -122.2784652709961, |
| "loss": 0.0069, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.2315926551818848, |
| "rewards/margins": 29.97836685180664, |
| "rewards/rejected": -27.746774673461914, |
| "step": 2770 |
| }, |
| { |
| "epoch": 1.27, |
| "learning_rate": 1.9228817858954842e-07, |
| "logits/chosen": -2.2300631999969482, |
| "logits/rejected": -1.828784704208374, |
| "logps/chosen": -84.7209243774414, |
| "logps/rejected": -122.51011657714844, |
| "loss": 0.0023, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.3681228160858154, |
| "rewards/margins": 29.574649810791016, |
| "rewards/rejected": -26.206527709960938, |
| "step": 2780 |
| }, |
| { |
| "epoch": 1.27, |
| "learning_rate": 1.917808219178082e-07, |
| "logits/chosen": -2.255375385284424, |
| "logits/rejected": -1.8779224157333374, |
| "logps/chosen": -85.09639739990234, |
| "logps/rejected": -115.9112548828125, |
| "loss": 0.0036, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.2712059020996094, |
| "rewards/margins": 27.021814346313477, |
| "rewards/rejected": -23.750606536865234, |
| "step": 2790 |
| }, |
| { |
| "epoch": 1.28, |
| "learning_rate": 1.9127346524606796e-07, |
| "logits/chosen": -2.224299430847168, |
| "logits/rejected": -1.8939344882965088, |
| "logps/chosen": -82.23179626464844, |
| "logps/rejected": -113.6113052368164, |
| "loss": 0.0034, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.4387083053588867, |
| "rewards/margins": 26.06760025024414, |
| "rewards/rejected": -23.62889289855957, |
| "step": 2800 |
| }, |
| { |
| "epoch": 1.28, |
| "eval_logits/chosen": -2.1937084197998047, |
| "eval_logits/rejected": -1.8446825742721558, |
| "eval_logps/chosen": -84.96446990966797, |
| "eval_logps/rejected": -116.5991439819336, |
| "eval_loss": 0.005613674875348806, |
| "eval_rewards/accuracies": 0.9916201233863831, |
| "eval_rewards/chosen": 1.9330565929412842, |
| "eval_rewards/margins": 26.81716537475586, |
| "eval_rewards/rejected": -24.88410758972168, |
| "eval_runtime": 259.8625, |
| "eval_samples_per_second": 11.014, |
| "eval_steps_per_second": 0.689, |
| "step": 2800 |
| }, |
| { |
| "epoch": 1.28, |
| "learning_rate": 1.9076610857432772e-07, |
| "logits/chosen": -2.2159202098846436, |
| "logits/rejected": -1.7732995748519897, |
| "logps/chosen": -86.9103775024414, |
| "logps/rejected": -119.67277526855469, |
| "loss": 0.001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.8791584968566895, |
| "rewards/margins": 28.936452865600586, |
| "rewards/rejected": -26.05729103088379, |
| "step": 2810 |
| }, |
| { |
| "epoch": 1.29, |
| "learning_rate": 1.902587519025875e-07, |
| "logits/chosen": -2.169167995452881, |
| "logits/rejected": -1.750314474105835, |
| "logps/chosen": -89.01744079589844, |
| "logps/rejected": -120.8212661743164, |
| "loss": 0.003, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.396334171295166, |
| "rewards/margins": 29.005077362060547, |
| "rewards/rejected": -26.60874366760254, |
| "step": 2820 |
| }, |
| { |
| "epoch": 1.29, |
| "learning_rate": 1.8975139523084726e-07, |
| "logits/chosen": -2.1312053203582764, |
| "logits/rejected": -1.7972911596298218, |
| "logps/chosen": -86.7480697631836, |
| "logps/rejected": -125.67042541503906, |
| "loss": 0.0002, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.189509391784668, |
| "rewards/margins": 28.36881446838379, |
| "rewards/rejected": -26.179306030273438, |
| "step": 2830 |
| }, |
| { |
| "epoch": 1.3, |
| "learning_rate": 1.8924403855910702e-07, |
| "logits/chosen": -2.2344472408294678, |
| "logits/rejected": -1.9485044479370117, |
| "logps/chosen": -79.72859954833984, |
| "logps/rejected": -119.08320617675781, |
| "loss": 0.0026, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.4703823328018188, |
| "rewards/margins": 27.174612045288086, |
| "rewards/rejected": -25.7042293548584, |
| "step": 2840 |
| }, |
| { |
| "epoch": 1.3, |
| "learning_rate": 1.887366818873668e-07, |
| "logits/chosen": -2.237384080886841, |
| "logits/rejected": -1.8804075717926025, |
| "logps/chosen": -87.28046417236328, |
| "logps/rejected": -117.6243896484375, |
| "loss": 0.0003, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.6116135120391846, |
| "rewards/margins": 26.46062660217285, |
| "rewards/rejected": -24.849010467529297, |
| "step": 2850 |
| }, |
| { |
| "epoch": 1.31, |
| "learning_rate": 1.8822932521562656e-07, |
| "logits/chosen": -2.1050355434417725, |
| "logits/rejected": -1.7900664806365967, |
| "logps/chosen": -87.86946105957031, |
| "logps/rejected": -118.79146575927734, |
| "loss": 0.0076, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 1.9105596542358398, |
| "rewards/margins": 25.8955020904541, |
| "rewards/rejected": -23.984943389892578, |
| "step": 2860 |
| }, |
| { |
| "epoch": 1.31, |
| "learning_rate": 1.8772196854388632e-07, |
| "logits/chosen": -2.2055516242980957, |
| "logits/rejected": -1.794002890586853, |
| "logps/chosen": -90.33226013183594, |
| "logps/rejected": -122.12618255615234, |
| "loss": 0.0045, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.3315025568008423, |
| "rewards/margins": 27.155811309814453, |
| "rewards/rejected": -25.82430648803711, |
| "step": 2870 |
| }, |
| { |
| "epoch": 1.31, |
| "learning_rate": 1.872146118721461e-07, |
| "logits/chosen": -2.1890950202941895, |
| "logits/rejected": -1.7347943782806396, |
| "logps/chosen": -93.88822937011719, |
| "logps/rejected": -118.97929382324219, |
| "loss": 0.0011, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 2.2069919109344482, |
| "rewards/margins": 27.36787986755371, |
| "rewards/rejected": -25.160892486572266, |
| "step": 2880 |
| }, |
| { |
| "epoch": 1.32, |
| "learning_rate": 1.8670725520040586e-07, |
| "logits/chosen": -2.2413697242736816, |
| "logits/rejected": -1.8580372333526611, |
| "logps/chosen": -88.85777282714844, |
| "logps/rejected": -122.02364349365234, |
| "loss": 0.0012, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.6634600162506104, |
| "rewards/margins": 29.12453842163086, |
| "rewards/rejected": -26.46108055114746, |
| "step": 2890 |
| }, |
| { |
| "epoch": 1.32, |
| "learning_rate": 1.8619989852866562e-07, |
| "logits/chosen": -2.1835293769836426, |
| "logits/rejected": -1.8364540338516235, |
| "logps/chosen": -87.46830749511719, |
| "logps/rejected": -118.46342468261719, |
| "loss": 0.0023, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.9554582834243774, |
| "rewards/margins": 27.929412841796875, |
| "rewards/rejected": -25.973957061767578, |
| "step": 2900 |
| }, |
| { |
| "epoch": 1.32, |
| "eval_logits/chosen": -2.1951773166656494, |
| "eval_logits/rejected": -1.8438202142715454, |
| "eval_logps/chosen": -85.55497741699219, |
| "eval_logps/rejected": -119.35179901123047, |
| "eval_loss": 0.005554942414164543, |
| "eval_rewards/accuracies": 0.9916201233863831, |
| "eval_rewards/chosen": 1.6378037929534912, |
| "eval_rewards/margins": 27.898239135742188, |
| "eval_rewards/rejected": -26.26043701171875, |
| "eval_runtime": 327.6474, |
| "eval_samples_per_second": 8.735, |
| "eval_steps_per_second": 0.546, |
| "step": 2900 |
| }, |
| { |
| "epoch": 1.33, |
| "learning_rate": 1.856925418569254e-07, |
| "logits/chosen": -2.1702980995178223, |
| "logits/rejected": -1.7541742324829102, |
| "logps/chosen": -87.9078598022461, |
| "logps/rejected": -117.9532699584961, |
| "loss": 0.0036, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.8803863525390625, |
| "rewards/margins": 28.9564151763916, |
| "rewards/rejected": -25.07602882385254, |
| "step": 2910 |
| }, |
| { |
| "epoch": 1.33, |
| "learning_rate": 1.8518518518518516e-07, |
| "logits/chosen": -2.164057493209839, |
| "logits/rejected": -1.8279097080230713, |
| "logps/chosen": -82.57586669921875, |
| "logps/rejected": -120.59794616699219, |
| "loss": 0.0053, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 2.3020541667938232, |
| "rewards/margins": 29.443384170532227, |
| "rewards/rejected": -27.141326904296875, |
| "step": 2920 |
| }, |
| { |
| "epoch": 1.34, |
| "learning_rate": 1.8467782851344492e-07, |
| "logits/chosen": -2.170772075653076, |
| "logits/rejected": -1.8161497116088867, |
| "logps/chosen": -85.89886474609375, |
| "logps/rejected": -123.47340393066406, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.1593396663665771, |
| "rewards/margins": 29.278268814086914, |
| "rewards/rejected": -28.118927001953125, |
| "step": 2930 |
| }, |
| { |
| "epoch": 1.34, |
| "learning_rate": 1.841704718417047e-07, |
| "logits/chosen": -2.198502779006958, |
| "logits/rejected": -1.8595365285873413, |
| "logps/chosen": -82.33607482910156, |
| "logps/rejected": -117.26557922363281, |
| "loss": 0.0058, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.3132636547088623, |
| "rewards/margins": 27.13201904296875, |
| "rewards/rejected": -25.818756103515625, |
| "step": 2940 |
| }, |
| { |
| "epoch": 1.35, |
| "learning_rate": 1.8366311516996446e-07, |
| "logits/chosen": -2.2103936672210693, |
| "logits/rejected": -1.8283309936523438, |
| "logps/chosen": -87.3060302734375, |
| "logps/rejected": -116.38105773925781, |
| "loss": 0.0002, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.252316951751709, |
| "rewards/margins": 27.21462059020996, |
| "rewards/rejected": -24.962305068969727, |
| "step": 2950 |
| }, |
| { |
| "epoch": 1.35, |
| "learning_rate": 1.8315575849822422e-07, |
| "logits/chosen": -2.1284189224243164, |
| "logits/rejected": -1.7991136312484741, |
| "logps/chosen": -85.66838073730469, |
| "logps/rejected": -115.23576354980469, |
| "loss": 0.0017, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.003413677215576, |
| "rewards/margins": 25.363988876342773, |
| "rewards/rejected": -23.360576629638672, |
| "step": 2960 |
| }, |
| { |
| "epoch": 1.36, |
| "learning_rate": 1.82648401826484e-07, |
| "logits/chosen": -2.1712751388549805, |
| "logits/rejected": -1.8838971853256226, |
| "logps/chosen": -81.59526824951172, |
| "logps/rejected": -119.744384765625, |
| "loss": 0.0049, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 1.3410313129425049, |
| "rewards/margins": 26.60662269592285, |
| "rewards/rejected": -25.265588760375977, |
| "step": 2970 |
| }, |
| { |
| "epoch": 1.36, |
| "learning_rate": 1.8214104515474375e-07, |
| "logits/chosen": -2.229074239730835, |
| "logits/rejected": -1.8246219158172607, |
| "logps/chosen": -85.791259765625, |
| "logps/rejected": -122.32413482666016, |
| "loss": 0.0022, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.3884541988372803, |
| "rewards/margins": 29.200729370117188, |
| "rewards/rejected": -27.812274932861328, |
| "step": 2980 |
| }, |
| { |
| "epoch": 1.36, |
| "learning_rate": 1.8163368848300352e-07, |
| "logits/chosen": -2.2312495708465576, |
| "logits/rejected": -1.912697196006775, |
| "logps/chosen": -80.87403869628906, |
| "logps/rejected": -123.8989028930664, |
| "loss": 0.0083, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 1.5714377164840698, |
| "rewards/margins": 28.29909324645996, |
| "rewards/rejected": -26.7276554107666, |
| "step": 2990 |
| }, |
| { |
| "epoch": 1.37, |
| "learning_rate": 1.811263318112633e-07, |
| "logits/chosen": -2.2286040782928467, |
| "logits/rejected": -1.8826186656951904, |
| "logps/chosen": -86.8649673461914, |
| "logps/rejected": -121.7653579711914, |
| "loss": 0.0034, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.1471002101898193, |
| "rewards/margins": 27.70977783203125, |
| "rewards/rejected": -25.562679290771484, |
| "step": 3000 |
| }, |
| { |
| "epoch": 1.37, |
| "eval_logits/chosen": -2.1800849437713623, |
| "eval_logits/rejected": -1.830121636390686, |
| "eval_logps/chosen": -86.1629867553711, |
| "eval_logps/rejected": -119.01561737060547, |
| "eval_loss": 0.006075535900890827, |
| "eval_rewards/accuracies": 0.9916201233863831, |
| "eval_rewards/chosen": 1.3337992429733276, |
| "eval_rewards/margins": 27.4261474609375, |
| "eval_rewards/rejected": -26.092342376708984, |
| "eval_runtime": 190.0895, |
| "eval_samples_per_second": 15.056, |
| "eval_steps_per_second": 0.942, |
| "step": 3000 |
| }, |
| { |
| "epoch": 1.37, |
| "learning_rate": 1.8061897513952305e-07, |
| "logits/chosen": -2.1978001594543457, |
| "logits/rejected": -1.7904773950576782, |
| "logps/chosen": -89.93728637695312, |
| "logps/rejected": -120.550048828125, |
| "loss": 0.0003, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.8481388092041016, |
| "rewards/margins": 28.82729721069336, |
| "rewards/rejected": -25.979156494140625, |
| "step": 3010 |
| }, |
| { |
| "epoch": 1.38, |
| "learning_rate": 1.8011161846778282e-07, |
| "logits/chosen": -2.20564603805542, |
| "logits/rejected": -1.8694099187850952, |
| "logps/chosen": -85.4388427734375, |
| "logps/rejected": -119.31624603271484, |
| "loss": 0.0023, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 1.9251155853271484, |
| "rewards/margins": 26.277385711669922, |
| "rewards/rejected": -24.352270126342773, |
| "step": 3020 |
| }, |
| { |
| "epoch": 1.38, |
| "learning_rate": 1.796042617960426e-07, |
| "logits/chosen": -2.176328659057617, |
| "logits/rejected": -1.7874501943588257, |
| "logps/chosen": -97.2492446899414, |
| "logps/rejected": -123.11531066894531, |
| "loss": 0.0011, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.5614807605743408, |
| "rewards/margins": 26.78195571899414, |
| "rewards/rejected": -25.22047233581543, |
| "step": 3030 |
| }, |
| { |
| "epoch": 1.39, |
| "learning_rate": 1.7909690512430235e-07, |
| "logits/chosen": -2.1733341217041016, |
| "logits/rejected": -1.796979546546936, |
| "logps/chosen": -84.2280502319336, |
| "logps/rejected": -116.29603576660156, |
| "loss": 0.0015, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.3264310359954834, |
| "rewards/margins": 27.896953582763672, |
| "rewards/rejected": -25.57052230834961, |
| "step": 3040 |
| }, |
| { |
| "epoch": 1.39, |
| "learning_rate": 1.7858954845256212e-07, |
| "logits/chosen": -2.2226386070251465, |
| "logits/rejected": -1.8575359582901, |
| "logps/chosen": -85.23347473144531, |
| "logps/rejected": -116.77949523925781, |
| "loss": 0.0046, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.571824550628662, |
| "rewards/margins": 27.728759765625, |
| "rewards/rejected": -25.156932830810547, |
| "step": 3050 |
| }, |
| { |
| "epoch": 1.4, |
| "learning_rate": 1.780821917808219e-07, |
| "logits/chosen": -2.196302652359009, |
| "logits/rejected": -1.8011735677719116, |
| "logps/chosen": -90.76152038574219, |
| "logps/rejected": -119.6912841796875, |
| "loss": 0.0026, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.5391037464141846, |
| "rewards/margins": 26.7655086517334, |
| "rewards/rejected": -24.226404190063477, |
| "step": 3060 |
| }, |
| { |
| "epoch": 1.4, |
| "learning_rate": 1.7757483510908165e-07, |
| "logits/chosen": -2.184532642364502, |
| "logits/rejected": -1.8580677509307861, |
| "logps/chosen": -84.7155990600586, |
| "logps/rejected": -126.76289367675781, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.2373816967010498, |
| "rewards/margins": 28.667322158813477, |
| "rewards/rejected": -27.4299373626709, |
| "step": 3070 |
| }, |
| { |
| "epoch": 1.41, |
| "learning_rate": 1.7706747843734142e-07, |
| "logits/chosen": -2.236210823059082, |
| "logits/rejected": -1.8297055959701538, |
| "logps/chosen": -84.67916107177734, |
| "logps/rejected": -119.5815200805664, |
| "loss": 0.0011, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.601902723312378, |
| "rewards/margins": 28.8743896484375, |
| "rewards/rejected": -27.27248764038086, |
| "step": 3080 |
| }, |
| { |
| "epoch": 1.41, |
| "learning_rate": 1.765601217656012e-07, |
| "logits/chosen": -2.2311558723449707, |
| "logits/rejected": -1.878273367881775, |
| "logps/chosen": -88.32709503173828, |
| "logps/rejected": -121.57609558105469, |
| "loss": 0.0054, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 1.2206145524978638, |
| "rewards/margins": 28.100128173828125, |
| "rewards/rejected": -26.879512786865234, |
| "step": 3090 |
| }, |
| { |
| "epoch": 1.42, |
| "learning_rate": 1.7605276509386095e-07, |
| "logits/chosen": -2.1835224628448486, |
| "logits/rejected": -1.8489364385604858, |
| "logps/chosen": -84.09500122070312, |
| "logps/rejected": -121.5748519897461, |
| "loss": 0.0036, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.4824763238430023, |
| "rewards/margins": 27.168865203857422, |
| "rewards/rejected": -26.686386108398438, |
| "step": 3100 |
| }, |
| { |
| "epoch": 1.42, |
| "eval_logits/chosen": -2.176138162612915, |
| "eval_logits/rejected": -1.8300259113311768, |
| "eval_logps/chosen": -86.61859130859375, |
| "eval_logps/rejected": -119.87804412841797, |
| "eval_loss": 0.0059745111502707005, |
| "eval_rewards/accuracies": 0.9916201233863831, |
| "eval_rewards/chosen": 1.1059939861297607, |
| "eval_rewards/margins": 27.629554748535156, |
| "eval_rewards/rejected": -26.523563385009766, |
| "eval_runtime": 207.1978, |
| "eval_samples_per_second": 13.813, |
| "eval_steps_per_second": 0.864, |
| "step": 3100 |
| }, |
| { |
| "epoch": 1.42, |
| "learning_rate": 1.7554540842212072e-07, |
| "logits/chosen": -2.169111728668213, |
| "logits/rejected": -1.8405656814575195, |
| "logps/chosen": -83.15672302246094, |
| "logps/rejected": -120.4351577758789, |
| "loss": 0.0013, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.0360536575317383, |
| "rewards/margins": 27.2529354095459, |
| "rewards/rejected": -26.216882705688477, |
| "step": 3110 |
| }, |
| { |
| "epoch": 1.42, |
| "learning_rate": 1.750380517503805e-07, |
| "logits/chosen": -2.2091641426086426, |
| "logits/rejected": -1.8550224304199219, |
| "logps/chosen": -84.96271514892578, |
| "logps/rejected": -119.28694152832031, |
| "loss": 0.0052, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.8836094737052917, |
| "rewards/margins": 26.456172943115234, |
| "rewards/rejected": -25.572561264038086, |
| "step": 3120 |
| }, |
| { |
| "epoch": 1.43, |
| "learning_rate": 1.7453069507864025e-07, |
| "logits/chosen": -2.2117037773132324, |
| "logits/rejected": -1.8669350147247314, |
| "logps/chosen": -84.34877014160156, |
| "logps/rejected": -119.05101013183594, |
| "loss": 0.0076, |
| "rewards/accuracies": 0.9750000238418579, |
| "rewards/chosen": 1.0027484893798828, |
| "rewards/margins": 28.111114501953125, |
| "rewards/rejected": -27.10836410522461, |
| "step": 3130 |
| }, |
| { |
| "epoch": 1.43, |
| "learning_rate": 1.7402333840690002e-07, |
| "logits/chosen": -2.1435580253601074, |
| "logits/rejected": -1.753458023071289, |
| "logps/chosen": -93.03610229492188, |
| "logps/rejected": -126.67008972167969, |
| "loss": 0.0008, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.7019534707069397, |
| "rewards/margins": 28.131439208984375, |
| "rewards/rejected": -27.42948341369629, |
| "step": 3140 |
| }, |
| { |
| "epoch": 1.44, |
| "learning_rate": 1.735159817351598e-07, |
| "logits/chosen": -2.1424784660339355, |
| "logits/rejected": -1.8056213855743408, |
| "logps/chosen": -82.12688446044922, |
| "logps/rejected": -125.8278579711914, |
| "loss": 0.0015, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.45741605758667, |
| "rewards/margins": 30.075420379638672, |
| "rewards/rejected": -27.61800765991211, |
| "step": 3150 |
| }, |
| { |
| "epoch": 1.44, |
| "learning_rate": 1.7300862506341955e-07, |
| "logits/chosen": -2.2541353702545166, |
| "logits/rejected": -1.8698198795318604, |
| "logps/chosen": -87.37802124023438, |
| "logps/rejected": -123.66682434082031, |
| "loss": 0.0064, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.516167402267456, |
| "rewards/margins": 29.741958618164062, |
| "rewards/rejected": -28.22579002380371, |
| "step": 3160 |
| }, |
| { |
| "epoch": 1.45, |
| "learning_rate": 1.7250126839167932e-07, |
| "logits/chosen": -2.297926902770996, |
| "logits/rejected": -1.9295371770858765, |
| "logps/chosen": -88.74690246582031, |
| "logps/rejected": -119.91023254394531, |
| "loss": 0.0046, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.9074515104293823, |
| "rewards/margins": 27.017669677734375, |
| "rewards/rejected": -26.110218048095703, |
| "step": 3170 |
| }, |
| { |
| "epoch": 1.45, |
| "learning_rate": 1.719939117199391e-07, |
| "logits/chosen": -2.1698784828186035, |
| "logits/rejected": -1.8197612762451172, |
| "logps/chosen": -87.33271789550781, |
| "logps/rejected": -120.39599609375, |
| "loss": 0.0037, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.6318087577819824, |
| "rewards/margins": 29.638574600219727, |
| "rewards/rejected": -27.006765365600586, |
| "step": 3180 |
| }, |
| { |
| "epoch": 1.46, |
| "learning_rate": 1.7148655504819885e-07, |
| "logits/chosen": -2.126783847808838, |
| "logits/rejected": -1.7983070611953735, |
| "logps/chosen": -83.7030029296875, |
| "logps/rejected": -122.6335678100586, |
| "loss": 0.0022, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.919542670249939, |
| "rewards/margins": 28.071773529052734, |
| "rewards/rejected": -26.152231216430664, |
| "step": 3190 |
| }, |
| { |
| "epoch": 1.46, |
| "learning_rate": 1.7097919837645862e-07, |
| "logits/chosen": -2.227081298828125, |
| "logits/rejected": -1.9020026922225952, |
| "logps/chosen": -85.4066162109375, |
| "logps/rejected": -123.98991394042969, |
| "loss": 0.0031, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 1.6433346271514893, |
| "rewards/margins": 29.48199462890625, |
| "rewards/rejected": -27.838659286499023, |
| "step": 3200 |
| }, |
| { |
| "epoch": 1.46, |
| "eval_logits/chosen": -2.1810193061828613, |
| "eval_logits/rejected": -1.8324401378631592, |
| "eval_logps/chosen": -85.56733703613281, |
| "eval_logps/rejected": -119.99109649658203, |
| "eval_loss": 0.00612166291102767, |
| "eval_rewards/accuracies": 0.9916201233863831, |
| "eval_rewards/chosen": 1.631625771522522, |
| "eval_rewards/margins": 28.211711883544922, |
| "eval_rewards/rejected": -26.58008575439453, |
| "eval_runtime": 214.0667, |
| "eval_samples_per_second": 13.37, |
| "eval_steps_per_second": 0.836, |
| "step": 3200 |
| }, |
| { |
| "epoch": 1.47, |
| "learning_rate": 1.704718417047184e-07, |
| "logits/chosen": -2.2383410930633545, |
| "logits/rejected": -1.9348970651626587, |
| "logps/chosen": -86.20955657958984, |
| "logps/rejected": -122.5006103515625, |
| "loss": 0.0029, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.4148099422454834, |
| "rewards/margins": 27.235687255859375, |
| "rewards/rejected": -24.82087516784668, |
| "step": 3210 |
| }, |
| { |
| "epoch": 1.47, |
| "learning_rate": 1.6996448503297815e-07, |
| "logits/chosen": -2.160952091217041, |
| "logits/rejected": -1.7654712200164795, |
| "logps/chosen": -89.77068328857422, |
| "logps/rejected": -118.8796615600586, |
| "loss": 0.004, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.054098606109619, |
| "rewards/margins": 26.069538116455078, |
| "rewards/rejected": -24.01543617248535, |
| "step": 3220 |
| }, |
| { |
| "epoch": 1.47, |
| "learning_rate": 1.6945712836123792e-07, |
| "logits/chosen": -2.2201457023620605, |
| "logits/rejected": -1.820336937904358, |
| "logps/chosen": -84.93563842773438, |
| "logps/rejected": -115.88444519042969, |
| "loss": 0.0066, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.6848886013031006, |
| "rewards/margins": 26.666845321655273, |
| "rewards/rejected": -23.98195457458496, |
| "step": 3230 |
| }, |
| { |
| "epoch": 1.48, |
| "learning_rate": 1.689497716894977e-07, |
| "logits/chosen": -2.1646978855133057, |
| "logits/rejected": -1.8357467651367188, |
| "logps/chosen": -80.21171569824219, |
| "logps/rejected": -111.47000885009766, |
| "loss": 0.0043, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 2.577789306640625, |
| "rewards/margins": 25.826122283935547, |
| "rewards/rejected": -23.248332977294922, |
| "step": 3240 |
| }, |
| { |
| "epoch": 1.48, |
| "learning_rate": 1.6844241501775745e-07, |
| "logits/chosen": -2.1942062377929688, |
| "logits/rejected": -1.831578254699707, |
| "logps/chosen": -87.41214752197266, |
| "logps/rejected": -118.68165588378906, |
| "loss": 0.0025, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.257878065109253, |
| "rewards/margins": 27.593063354492188, |
| "rewards/rejected": -24.335186004638672, |
| "step": 3250 |
| }, |
| { |
| "epoch": 1.49, |
| "learning_rate": 1.6793505834601722e-07, |
| "logits/chosen": -2.270230293273926, |
| "logits/rejected": -1.930605173110962, |
| "logps/chosen": -77.88417053222656, |
| "logps/rejected": -119.29927062988281, |
| "loss": 0.0022, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.550905704498291, |
| "rewards/margins": 28.38033103942871, |
| "rewards/rejected": -25.82942771911621, |
| "step": 3260 |
| }, |
| { |
| "epoch": 1.49, |
| "learning_rate": 1.67427701674277e-07, |
| "logits/chosen": -2.1990127563476562, |
| "logits/rejected": -1.842660665512085, |
| "logps/chosen": -82.07890319824219, |
| "logps/rejected": -117.83439636230469, |
| "loss": 0.0028, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.725590467453003, |
| "rewards/margins": 28.352636337280273, |
| "rewards/rejected": -25.627044677734375, |
| "step": 3270 |
| }, |
| { |
| "epoch": 1.5, |
| "learning_rate": 1.6692034500253675e-07, |
| "logits/chosen": -2.181185245513916, |
| "logits/rejected": -1.8166754245758057, |
| "logps/chosen": -82.80810546875, |
| "logps/rejected": -124.4463882446289, |
| "loss": 0.0037, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.80354642868042, |
| "rewards/margins": 29.090805053710938, |
| "rewards/rejected": -26.287261962890625, |
| "step": 3280 |
| }, |
| { |
| "epoch": 1.5, |
| "learning_rate": 1.6641298833079652e-07, |
| "logits/chosen": -2.245620012283325, |
| "logits/rejected": -1.8439216613769531, |
| "logps/chosen": -87.42176055908203, |
| "logps/rejected": -118.27108001708984, |
| "loss": 0.0012, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.304450273513794, |
| "rewards/margins": 28.731517791748047, |
| "rewards/rejected": -25.42706871032715, |
| "step": 3290 |
| }, |
| { |
| "epoch": 1.51, |
| "learning_rate": 1.659056316590563e-07, |
| "logits/chosen": -2.1820719242095947, |
| "logits/rejected": -1.8486725091934204, |
| "logps/chosen": -83.21141815185547, |
| "logps/rejected": -117.5422592163086, |
| "loss": 0.0018, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 2.1327810287475586, |
| "rewards/margins": 28.282459259033203, |
| "rewards/rejected": -26.149677276611328, |
| "step": 3300 |
| }, |
| { |
| "epoch": 1.51, |
| "eval_logits/chosen": -2.188385486602783, |
| "eval_logits/rejected": -1.8376048803329468, |
| "eval_logps/chosen": -84.18167114257812, |
| "eval_logps/rejected": -117.2090072631836, |
| "eval_loss": 0.005902700126171112, |
| "eval_rewards/accuracies": 0.994413435459137, |
| "eval_rewards/chosen": 2.3244550228118896, |
| "eval_rewards/margins": 27.51349639892578, |
| "eval_rewards/rejected": -25.189043045043945, |
| "eval_runtime": 191.0473, |
| "eval_samples_per_second": 14.981, |
| "eval_steps_per_second": 0.937, |
| "step": 3300 |
| }, |
| { |
| "epoch": 1.51, |
| "learning_rate": 1.6539827498731605e-07, |
| "logits/chosen": -2.2313265800476074, |
| "logits/rejected": -1.8483736515045166, |
| "logps/chosen": -87.50598907470703, |
| "logps/rejected": -118.2992935180664, |
| "loss": 0.0013, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.3828155994415283, |
| "rewards/margins": 27.8851261138916, |
| "rewards/rejected": -25.502309799194336, |
| "step": 3310 |
| }, |
| { |
| "epoch": 1.52, |
| "learning_rate": 1.6489091831557582e-07, |
| "logits/chosen": -2.143418312072754, |
| "logits/rejected": -1.8131214380264282, |
| "logps/chosen": -82.66302490234375, |
| "logps/rejected": -117.9818344116211, |
| "loss": 0.0011, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.9075496196746826, |
| "rewards/margins": 27.7532958984375, |
| "rewards/rejected": -24.845745086669922, |
| "step": 3320 |
| }, |
| { |
| "epoch": 1.52, |
| "learning_rate": 1.643835616438356e-07, |
| "logits/chosen": -2.185781955718994, |
| "logits/rejected": -1.8397849798202515, |
| "logps/chosen": -86.03587341308594, |
| "logps/rejected": -121.69071960449219, |
| "loss": 0.0037, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 2.5947184562683105, |
| "rewards/margins": 28.71170997619629, |
| "rewards/rejected": -26.116989135742188, |
| "step": 3330 |
| }, |
| { |
| "epoch": 1.52, |
| "learning_rate": 1.6387620497209535e-07, |
| "logits/chosen": -2.093822956085205, |
| "logits/rejected": -1.744879126548767, |
| "logps/chosen": -82.53218078613281, |
| "logps/rejected": -120.4306869506836, |
| "loss": 0.0069, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.362114429473877, |
| "rewards/margins": 28.930118560791016, |
| "rewards/rejected": -26.568002700805664, |
| "step": 3340 |
| }, |
| { |
| "epoch": 1.53, |
| "learning_rate": 1.6336884830035512e-07, |
| "logits/chosen": -2.222090244293213, |
| "logits/rejected": -1.889953851699829, |
| "logps/chosen": -84.85404968261719, |
| "logps/rejected": -122.89866638183594, |
| "loss": 0.0011, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.0494067668914795, |
| "rewards/margins": 27.54694175720215, |
| "rewards/rejected": -25.497535705566406, |
| "step": 3350 |
| }, |
| { |
| "epoch": 1.53, |
| "learning_rate": 1.6286149162861489e-07, |
| "logits/chosen": -2.2259693145751953, |
| "logits/rejected": -1.8098411560058594, |
| "logps/chosen": -87.61415100097656, |
| "logps/rejected": -129.11331176757812, |
| "loss": 0.0043, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.2813143730163574, |
| "rewards/margins": 30.5456485748291, |
| "rewards/rejected": -28.264331817626953, |
| "step": 3360 |
| }, |
| { |
| "epoch": 1.54, |
| "learning_rate": 1.6235413495687465e-07, |
| "logits/chosen": -2.115265369415283, |
| "logits/rejected": -1.7785238027572632, |
| "logps/chosen": -83.63951110839844, |
| "logps/rejected": -116.60148620605469, |
| "loss": 0.0029, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.2996184825897217, |
| "rewards/margins": 27.973474502563477, |
| "rewards/rejected": -25.67385482788086, |
| "step": 3370 |
| }, |
| { |
| "epoch": 1.54, |
| "learning_rate": 1.6184677828513442e-07, |
| "logits/chosen": -2.213620185852051, |
| "logits/rejected": -1.853643774986267, |
| "logps/chosen": -84.76154327392578, |
| "logps/rejected": -120.73722839355469, |
| "loss": 0.0011, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.5600218772888184, |
| "rewards/margins": 28.535400390625, |
| "rewards/rejected": -25.97538185119629, |
| "step": 3380 |
| }, |
| { |
| "epoch": 1.55, |
| "learning_rate": 1.613394216133942e-07, |
| "logits/chosen": -2.2256524562835693, |
| "logits/rejected": -1.8734019994735718, |
| "logps/chosen": -82.6207046508789, |
| "logps/rejected": -123.33949279785156, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.0278208255767822, |
| "rewards/margins": 29.47714614868164, |
| "rewards/rejected": -27.449321746826172, |
| "step": 3390 |
| }, |
| { |
| "epoch": 1.55, |
| "learning_rate": 1.6083206494165398e-07, |
| "logits/chosen": -2.247122049331665, |
| "logits/rejected": -1.9250261783599854, |
| "logps/chosen": -81.63235473632812, |
| "logps/rejected": -120.65080261230469, |
| "loss": 0.0011, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.553596258163452, |
| "rewards/margins": 29.37432289123535, |
| "rewards/rejected": -26.820724487304688, |
| "step": 3400 |
| }, |
| { |
| "epoch": 1.55, |
| "eval_logits/chosen": -2.1875288486480713, |
| "eval_logits/rejected": -1.8437479734420776, |
| "eval_logps/chosen": -84.4741439819336, |
| "eval_logps/rejected": -119.88742065429688, |
| "eval_loss": 0.005948640406131744, |
| "eval_rewards/accuracies": 0.9916201233863831, |
| "eval_rewards/chosen": 2.1782193183898926, |
| "eval_rewards/margins": 28.70646858215332, |
| "eval_rewards/rejected": -26.528249740600586, |
| "eval_runtime": 170.1725, |
| "eval_samples_per_second": 16.818, |
| "eval_steps_per_second": 1.052, |
| "step": 3400 |
| }, |
| { |
| "epoch": 1.56, |
| "learning_rate": 1.6032470826991375e-07, |
| "logits/chosen": -2.18739914894104, |
| "logits/rejected": -1.8111976385116577, |
| "logps/chosen": -83.60111999511719, |
| "logps/rejected": -120.876220703125, |
| "loss": 0.0033, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.492570400238037, |
| "rewards/margins": 29.177154541015625, |
| "rewards/rejected": -25.684585571289062, |
| "step": 3410 |
| }, |
| { |
| "epoch": 1.56, |
| "learning_rate": 1.598173515981735e-07, |
| "logits/chosen": -2.1494498252868652, |
| "logits/rejected": -1.715921401977539, |
| "logps/chosen": -87.95207214355469, |
| "logps/rejected": -117.0306396484375, |
| "loss": 0.003, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.5173392295837402, |
| "rewards/margins": 28.325458526611328, |
| "rewards/rejected": -24.80811882019043, |
| "step": 3420 |
| }, |
| { |
| "epoch": 1.57, |
| "learning_rate": 1.5930999492643328e-07, |
| "logits/chosen": -2.2305150032043457, |
| "logits/rejected": -1.8406226634979248, |
| "logps/chosen": -85.95257568359375, |
| "logps/rejected": -121.40510559082031, |
| "loss": 0.0024, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.6646008491516113, |
| "rewards/margins": 29.42586898803711, |
| "rewards/rejected": -26.76127052307129, |
| "step": 3430 |
| }, |
| { |
| "epoch": 1.57, |
| "learning_rate": 1.5880263825469305e-07, |
| "logits/chosen": -2.2470791339874268, |
| "logits/rejected": -1.903607726097107, |
| "logps/chosen": -83.02677917480469, |
| "logps/rejected": -118.3487777709961, |
| "loss": 0.0022, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.975865125656128, |
| "rewards/margins": 28.316776275634766, |
| "rewards/rejected": -25.340911865234375, |
| "step": 3440 |
| }, |
| { |
| "epoch": 1.57, |
| "learning_rate": 1.582952815829528e-07, |
| "logits/chosen": -2.106412410736084, |
| "logits/rejected": -1.7845014333724976, |
| "logps/chosen": -85.77284240722656, |
| "logps/rejected": -122.04698181152344, |
| "loss": 0.0033, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 3.0580241680145264, |
| "rewards/margins": 28.00246238708496, |
| "rewards/rejected": -24.94443702697754, |
| "step": 3450 |
| }, |
| { |
| "epoch": 1.58, |
| "learning_rate": 1.5778792491121258e-07, |
| "logits/chosen": -2.230118989944458, |
| "logits/rejected": -1.8575313091278076, |
| "logps/chosen": -84.6324462890625, |
| "logps/rejected": -119.6858901977539, |
| "loss": 0.0033, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 3.278738498687744, |
| "rewards/margins": 30.146167755126953, |
| "rewards/rejected": -26.867427825927734, |
| "step": 3460 |
| }, |
| { |
| "epoch": 1.58, |
| "learning_rate": 1.5728056823947235e-07, |
| "logits/chosen": -2.245896577835083, |
| "logits/rejected": -1.8796007633209229, |
| "logps/chosen": -83.982666015625, |
| "logps/rejected": -119.01679992675781, |
| "loss": 0.0045, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 3.464191436767578, |
| "rewards/margins": 29.175838470458984, |
| "rewards/rejected": -25.711650848388672, |
| "step": 3470 |
| }, |
| { |
| "epoch": 1.59, |
| "learning_rate": 1.567732115677321e-07, |
| "logits/chosen": -2.186904191970825, |
| "logits/rejected": -1.7904678583145142, |
| "logps/chosen": -88.25177001953125, |
| "logps/rejected": -118.5602798461914, |
| "loss": 0.0067, |
| "rewards/accuracies": 0.9750000238418579, |
| "rewards/chosen": 2.3008944988250732, |
| "rewards/margins": 27.154077529907227, |
| "rewards/rejected": -24.853181838989258, |
| "step": 3480 |
| }, |
| { |
| "epoch": 1.59, |
| "learning_rate": 1.5626585489599188e-07, |
| "logits/chosen": -2.129375457763672, |
| "logits/rejected": -1.7635730504989624, |
| "logps/chosen": -91.15408325195312, |
| "logps/rejected": -130.5845489501953, |
| "loss": 0.0022, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.3261685371398926, |
| "rewards/margins": 30.6134033203125, |
| "rewards/rejected": -27.287235260009766, |
| "step": 3490 |
| }, |
| { |
| "epoch": 1.6, |
| "learning_rate": 1.5575849822425165e-07, |
| "logits/chosen": -2.22548246383667, |
| "logits/rejected": -1.8935177326202393, |
| "logps/chosen": -83.39473724365234, |
| "logps/rejected": -121.51374816894531, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.055039882659912, |
| "rewards/margins": 27.6326904296875, |
| "rewards/rejected": -25.577648162841797, |
| "step": 3500 |
| }, |
| { |
| "epoch": 1.6, |
| "eval_logits/chosen": -2.186929941177368, |
| "eval_logits/rejected": -1.8434008359909058, |
| "eval_logps/chosen": -84.8189697265625, |
| "eval_logps/rejected": -120.2051010131836, |
| "eval_loss": 0.00656374916434288, |
| "eval_rewards/accuracies": 0.994413435459137, |
| "eval_rewards/chosen": 2.0058064460754395, |
| "eval_rewards/margins": 28.69289779663086, |
| "eval_rewards/rejected": -26.687089920043945, |
| "eval_runtime": 188.198, |
| "eval_samples_per_second": 15.207, |
| "eval_steps_per_second": 0.951, |
| "step": 3500 |
| }, |
| { |
| "epoch": 1.6, |
| "learning_rate": 1.552511415525114e-07, |
| "logits/chosen": -2.2018935680389404, |
| "logits/rejected": -1.8334615230560303, |
| "logps/chosen": -85.780029296875, |
| "logps/rejected": -124.47122955322266, |
| "loss": 0.0048, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.9497463703155518, |
| "rewards/margins": 29.585214614868164, |
| "rewards/rejected": -27.635467529296875, |
| "step": 3510 |
| }, |
| { |
| "epoch": 1.61, |
| "learning_rate": 1.5474378488077118e-07, |
| "logits/chosen": -2.2138073444366455, |
| "logits/rejected": -1.819265365600586, |
| "logps/chosen": -91.44041442871094, |
| "logps/rejected": -128.78485107421875, |
| "loss": 0.0033, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.9119396209716797, |
| "rewards/margins": 30.014429092407227, |
| "rewards/rejected": -28.10248374938965, |
| "step": 3520 |
| }, |
| { |
| "epoch": 1.61, |
| "learning_rate": 1.5423642820903095e-07, |
| "logits/chosen": -2.1547751426696777, |
| "logits/rejected": -1.8329432010650635, |
| "logps/chosen": -84.93801879882812, |
| "logps/rejected": -124.8791275024414, |
| "loss": 0.0042, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 2.6016337871551514, |
| "rewards/margins": 29.17998695373535, |
| "rewards/rejected": -26.578350067138672, |
| "step": 3530 |
| }, |
| { |
| "epoch": 1.62, |
| "learning_rate": 1.537290715372907e-07, |
| "logits/chosen": -2.2604050636291504, |
| "logits/rejected": -1.8284223079681396, |
| "logps/chosen": -88.08625030517578, |
| "logps/rejected": -120.90476989746094, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.400468349456787, |
| "rewards/margins": 29.343048095703125, |
| "rewards/rejected": -26.942581176757812, |
| "step": 3540 |
| }, |
| { |
| "epoch": 1.62, |
| "learning_rate": 1.5322171486555048e-07, |
| "logits/chosen": -2.2389979362487793, |
| "logits/rejected": -1.8285210132598877, |
| "logps/chosen": -92.38504791259766, |
| "logps/rejected": -129.23291015625, |
| "loss": 0.0017, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.8334972858428955, |
| "rewards/margins": 31.94364356994629, |
| "rewards/rejected": -29.11014747619629, |
| "step": 3550 |
| }, |
| { |
| "epoch": 1.63, |
| "learning_rate": 1.5271435819381025e-07, |
| "logits/chosen": -2.2842297554016113, |
| "logits/rejected": -1.9232155084609985, |
| "logps/chosen": -88.47393035888672, |
| "logps/rejected": -126.19525146484375, |
| "loss": 0.0042, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.37927508354187, |
| "rewards/margins": 30.97516441345215, |
| "rewards/rejected": -28.595890045166016, |
| "step": 3560 |
| }, |
| { |
| "epoch": 1.63, |
| "learning_rate": 1.5220700152207e-07, |
| "logits/chosen": -2.1726303100585938, |
| "logits/rejected": -1.7958831787109375, |
| "logps/chosen": -92.33584594726562, |
| "logps/rejected": -127.83787536621094, |
| "loss": 0.0022, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 1.8039255142211914, |
| "rewards/margins": 29.595382690429688, |
| "rewards/rejected": -27.791458129882812, |
| "step": 3570 |
| }, |
| { |
| "epoch": 1.63, |
| "learning_rate": 1.5169964485032978e-07, |
| "logits/chosen": -2.295196771621704, |
| "logits/rejected": -1.9108684062957764, |
| "logps/chosen": -86.93212890625, |
| "logps/rejected": -136.36459350585938, |
| "loss": 0.0016, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.010756015777588, |
| "rewards/margins": 32.493587493896484, |
| "rewards/rejected": -30.482830047607422, |
| "step": 3580 |
| }, |
| { |
| "epoch": 1.64, |
| "learning_rate": 1.5119228817858955e-07, |
| "logits/chosen": -2.2773728370666504, |
| "logits/rejected": -1.9154014587402344, |
| "logps/chosen": -85.64041900634766, |
| "logps/rejected": -127.74327087402344, |
| "loss": 0.0033, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.8653056621551514, |
| "rewards/margins": 31.216760635375977, |
| "rewards/rejected": -28.351455688476562, |
| "step": 3590 |
| }, |
| { |
| "epoch": 1.64, |
| "learning_rate": 1.506849315068493e-07, |
| "logits/chosen": -2.1455962657928467, |
| "logits/rejected": -1.849805474281311, |
| "logps/chosen": -87.38817596435547, |
| "logps/rejected": -123.53019714355469, |
| "loss": 0.0034, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.1171107292175293, |
| "rewards/margins": 29.911731719970703, |
| "rewards/rejected": -27.79462242126465, |
| "step": 3600 |
| }, |
| { |
| "epoch": 1.64, |
| "eval_logits/chosen": -2.1960811614990234, |
| "eval_logits/rejected": -1.8514564037322998, |
| "eval_logps/chosen": -85.992919921875, |
| "eval_logps/rejected": -124.32015228271484, |
| "eval_loss": 0.005746352486312389, |
| "eval_rewards/accuracies": 0.9916201233863831, |
| "eval_rewards/chosen": 1.4188352823257446, |
| "eval_rewards/margins": 30.1634521484375, |
| "eval_rewards/rejected": -28.744617462158203, |
| "eval_runtime": 179.5705, |
| "eval_samples_per_second": 15.938, |
| "eval_steps_per_second": 0.997, |
| "step": 3600 |
| }, |
| { |
| "epoch": 1.65, |
| "learning_rate": 1.5017757483510908e-07, |
| "logits/chosen": -2.1365771293640137, |
| "logits/rejected": -1.8313045501708984, |
| "logps/chosen": -85.9605712890625, |
| "logps/rejected": -131.0879364013672, |
| "loss": 0.0033, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.9845980405807495, |
| "rewards/margins": 30.611257553100586, |
| "rewards/rejected": -29.626659393310547, |
| "step": 3610 |
| }, |
| { |
| "epoch": 1.65, |
| "learning_rate": 1.4967021816336885e-07, |
| "logits/chosen": -2.197392225265503, |
| "logits/rejected": -1.8468068838119507, |
| "logps/chosen": -82.49998474121094, |
| "logps/rejected": -126.23731994628906, |
| "loss": 0.0033, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 1.3484909534454346, |
| "rewards/margins": 31.131275177001953, |
| "rewards/rejected": -29.78278160095215, |
| "step": 3620 |
| }, |
| { |
| "epoch": 1.66, |
| "learning_rate": 1.491628614916286e-07, |
| "logits/chosen": -2.269951343536377, |
| "logits/rejected": -1.8826881647109985, |
| "logps/chosen": -88.26679992675781, |
| "logps/rejected": -132.8121795654297, |
| "loss": 0.0056, |
| "rewards/accuracies": 0.9750000238418579, |
| "rewards/chosen": 1.3638824224472046, |
| "rewards/margins": 31.313467025756836, |
| "rewards/rejected": -29.949581146240234, |
| "step": 3630 |
| }, |
| { |
| "epoch": 1.66, |
| "learning_rate": 1.4865550481988838e-07, |
| "logits/chosen": -2.1884894371032715, |
| "logits/rejected": -1.8790266513824463, |
| "logps/chosen": -79.186279296875, |
| "logps/rejected": -124.1537094116211, |
| "loss": 0.0043, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.4217311143875122, |
| "rewards/margins": 30.239089965820312, |
| "rewards/rejected": -28.81736183166504, |
| "step": 3640 |
| }, |
| { |
| "epoch": 1.67, |
| "learning_rate": 1.4814814814814815e-07, |
| "logits/chosen": -2.2884726524353027, |
| "logits/rejected": -1.9434821605682373, |
| "logps/chosen": -82.31842041015625, |
| "logps/rejected": -121.79225158691406, |
| "loss": 0.0011, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.6052954196929932, |
| "rewards/margins": 30.475265502929688, |
| "rewards/rejected": -28.869970321655273, |
| "step": 3650 |
| }, |
| { |
| "epoch": 1.67, |
| "learning_rate": 1.476407914764079e-07, |
| "logits/chosen": -2.2113142013549805, |
| "logits/rejected": -1.858170747756958, |
| "logps/chosen": -88.19541931152344, |
| "logps/rejected": -128.73983764648438, |
| "loss": 0.0015, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.839066505432129, |
| "rewards/margins": 30.854511260986328, |
| "rewards/rejected": -29.015445709228516, |
| "step": 3660 |
| }, |
| { |
| "epoch": 1.68, |
| "learning_rate": 1.4713343480466768e-07, |
| "logits/chosen": -2.1949267387390137, |
| "logits/rejected": -1.8198333978652954, |
| "logps/chosen": -86.29945373535156, |
| "logps/rejected": -126.752197265625, |
| "loss": 0.0023, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.4010677337646484, |
| "rewards/margins": 32.765567779541016, |
| "rewards/rejected": -29.3644962310791, |
| "step": 3670 |
| }, |
| { |
| "epoch": 1.68, |
| "learning_rate": 1.4662607813292745e-07, |
| "logits/chosen": -2.2420246601104736, |
| "logits/rejected": -1.8449939489364624, |
| "logps/chosen": -88.93192291259766, |
| "logps/rejected": -125.01644134521484, |
| "loss": 0.0099, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.367809295654297, |
| "rewards/margins": 31.084781646728516, |
| "rewards/rejected": -28.716970443725586, |
| "step": 3680 |
| }, |
| { |
| "epoch": 1.68, |
| "learning_rate": 1.461187214611872e-07, |
| "logits/chosen": -2.1443724632263184, |
| "logits/rejected": -1.7969995737075806, |
| "logps/chosen": -88.4251937866211, |
| "logps/rejected": -124.62715911865234, |
| "loss": 0.0042, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.7502880096435547, |
| "rewards/margins": 30.875295639038086, |
| "rewards/rejected": -28.125009536743164, |
| "step": 3690 |
| }, |
| { |
| "epoch": 1.69, |
| "learning_rate": 1.4561136478944698e-07, |
| "logits/chosen": -2.1906471252441406, |
| "logits/rejected": -1.8091493844985962, |
| "logps/chosen": -85.97318267822266, |
| "logps/rejected": -130.11227416992188, |
| "loss": 0.003, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.8357086181640625, |
| "rewards/margins": 32.3084602355957, |
| "rewards/rejected": -28.47275161743164, |
| "step": 3700 |
| }, |
| { |
| "epoch": 1.69, |
| "eval_logits/chosen": -2.1971521377563477, |
| "eval_logits/rejected": -1.848021149635315, |
| "eval_logps/chosen": -84.61711120605469, |
| "eval_logps/rejected": -123.59754943847656, |
| "eval_loss": 0.005367867648601532, |
| "eval_rewards/accuracies": 0.9916201233863831, |
| "eval_rewards/chosen": 2.106738567352295, |
| "eval_rewards/margins": 30.49005699157715, |
| "eval_rewards/rejected": -28.38331413269043, |
| "eval_runtime": 176.7324, |
| "eval_samples_per_second": 16.194, |
| "eval_steps_per_second": 1.013, |
| "step": 3700 |
| }, |
| { |
| "epoch": 1.69, |
| "learning_rate": 1.4510400811770675e-07, |
| "logits/chosen": -2.226139545440674, |
| "logits/rejected": -1.836024522781372, |
| "logps/chosen": -91.7187271118164, |
| "logps/rejected": -129.92689514160156, |
| "loss": 0.0023, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.7738643884658813, |
| "rewards/margins": 30.75638198852539, |
| "rewards/rejected": -28.982519149780273, |
| "step": 3710 |
| }, |
| { |
| "epoch": 1.7, |
| "learning_rate": 1.445966514459665e-07, |
| "logits/chosen": -2.2146477699279785, |
| "logits/rejected": -1.8724933862686157, |
| "logps/chosen": -82.95762634277344, |
| "logps/rejected": -124.72331237792969, |
| "loss": 0.005, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.609710216522217, |
| "rewards/margins": 30.03472328186035, |
| "rewards/rejected": -27.425012588500977, |
| "step": 3720 |
| }, |
| { |
| "epoch": 1.7, |
| "learning_rate": 1.4408929477422628e-07, |
| "logits/chosen": -2.1765360832214355, |
| "logits/rejected": -1.8613145351409912, |
| "logps/chosen": -88.75824737548828, |
| "logps/rejected": -126.1810531616211, |
| "loss": 0.0056, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.3347582817077637, |
| "rewards/margins": 30.444360733032227, |
| "rewards/rejected": -28.109600067138672, |
| "step": 3730 |
| }, |
| { |
| "epoch": 1.71, |
| "learning_rate": 1.4358193810248604e-07, |
| "logits/chosen": -2.203244686126709, |
| "logits/rejected": -1.8518617153167725, |
| "logps/chosen": -83.48390197753906, |
| "logps/rejected": -124.0763168334961, |
| "loss": 0.0033, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.4211070537567139, |
| "rewards/margins": 29.72439956665039, |
| "rewards/rejected": -28.303295135498047, |
| "step": 3740 |
| }, |
| { |
| "epoch": 1.71, |
| "learning_rate": 1.430745814307458e-07, |
| "logits/chosen": -2.231482982635498, |
| "logits/rejected": -1.8217432498931885, |
| "logps/chosen": -89.63983917236328, |
| "logps/rejected": -125.65821838378906, |
| "loss": 0.0022, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.655423641204834, |
| "rewards/margins": 29.783100128173828, |
| "rewards/rejected": -27.127676010131836, |
| "step": 3750 |
| }, |
| { |
| "epoch": 1.72, |
| "learning_rate": 1.4256722475900558e-07, |
| "logits/chosen": -2.152547597885132, |
| "logits/rejected": -1.8171924352645874, |
| "logps/chosen": -81.50759887695312, |
| "logps/rejected": -123.8464126586914, |
| "loss": 0.0033, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.3097615242004395, |
| "rewards/margins": 29.635555267333984, |
| "rewards/rejected": -28.325796127319336, |
| "step": 3760 |
| }, |
| { |
| "epoch": 1.72, |
| "learning_rate": 1.4205986808726534e-07, |
| "logits/chosen": -2.249340534210205, |
| "logits/rejected": -1.9595706462860107, |
| "logps/chosen": -85.8914794921875, |
| "logps/rejected": -138.0444793701172, |
| "loss": 0.0007, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.460918664932251, |
| "rewards/margins": 32.2510871887207, |
| "rewards/rejected": -29.790172576904297, |
| "step": 3770 |
| }, |
| { |
| "epoch": 1.73, |
| "learning_rate": 1.415525114155251e-07, |
| "logits/chosen": -2.1925182342529297, |
| "logits/rejected": -1.7810020446777344, |
| "logps/chosen": -93.9482421875, |
| "logps/rejected": -130.27859497070312, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.997776746749878, |
| "rewards/margins": 31.849191665649414, |
| "rewards/rejected": -29.851415634155273, |
| "step": 3780 |
| }, |
| { |
| "epoch": 1.73, |
| "learning_rate": 1.4104515474378488e-07, |
| "logits/chosen": -2.134547472000122, |
| "logits/rejected": -1.8135312795639038, |
| "logps/chosen": -84.8584976196289, |
| "logps/rejected": -123.8740005493164, |
| "loss": 0.009, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 1.4246654510498047, |
| "rewards/margins": 29.43796730041504, |
| "rewards/rejected": -28.013301849365234, |
| "step": 3790 |
| }, |
| { |
| "epoch": 1.73, |
| "learning_rate": 1.4053779807204464e-07, |
| "logits/chosen": -2.267373561859131, |
| "logits/rejected": -1.8483003377914429, |
| "logps/chosen": -87.11589050292969, |
| "logps/rejected": -115.36067199707031, |
| "loss": 0.006, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 3.7466697692871094, |
| "rewards/margins": 28.394153594970703, |
| "rewards/rejected": -24.647480010986328, |
| "step": 3800 |
| }, |
| { |
| "epoch": 1.73, |
| "eval_logits/chosen": -2.191563367843628, |
| "eval_logits/rejected": -1.8433810472488403, |
| "eval_logps/chosen": -83.24746704101562, |
| "eval_logps/rejected": -116.2911148071289, |
| "eval_loss": 0.005436885170638561, |
| "eval_rewards/accuracies": 0.9916201233863831, |
| "eval_rewards/chosen": 2.791560173034668, |
| "eval_rewards/margins": 27.521656036376953, |
| "eval_rewards/rejected": -24.73009490966797, |
| "eval_runtime": 221.0913, |
| "eval_samples_per_second": 12.945, |
| "eval_steps_per_second": 0.81, |
| "step": 3800 |
| }, |
| { |
| "epoch": 1.74, |
| "learning_rate": 1.400304414003044e-07, |
| "logits/chosen": -2.2577452659606934, |
| "logits/rejected": -1.8542404174804688, |
| "logps/chosen": -81.37186431884766, |
| "logps/rejected": -114.3775405883789, |
| "loss": 0.0011, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 2.739640951156616, |
| "rewards/margins": 26.94769859313965, |
| "rewards/rejected": -24.208059310913086, |
| "step": 3810 |
| }, |
| { |
| "epoch": 1.74, |
| "learning_rate": 1.3952308472856418e-07, |
| "logits/chosen": -2.1801652908325195, |
| "logits/rejected": -1.8121554851531982, |
| "logps/chosen": -87.84750366210938, |
| "logps/rejected": -119.56182861328125, |
| "loss": 0.0007, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.7155660390853882, |
| "rewards/margins": 28.609859466552734, |
| "rewards/rejected": -26.894290924072266, |
| "step": 3820 |
| }, |
| { |
| "epoch": 1.75, |
| "learning_rate": 1.3901572805682394e-07, |
| "logits/chosen": -2.166355609893799, |
| "logits/rejected": -1.7864339351654053, |
| "logps/chosen": -87.84122467041016, |
| "logps/rejected": -126.62345886230469, |
| "loss": 0.0022, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.1120693683624268, |
| "rewards/margins": 30.040283203125, |
| "rewards/rejected": -26.928213119506836, |
| "step": 3830 |
| }, |
| { |
| "epoch": 1.75, |
| "learning_rate": 1.385083713850837e-07, |
| "logits/chosen": -2.1811468601226807, |
| "logits/rejected": -1.7890942096710205, |
| "logps/chosen": -86.22557830810547, |
| "logps/rejected": -121.93861389160156, |
| "loss": 0.0043, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.1839969158172607, |
| "rewards/margins": 29.871551513671875, |
| "rewards/rejected": -26.687557220458984, |
| "step": 3840 |
| }, |
| { |
| "epoch": 1.76, |
| "learning_rate": 1.3800101471334348e-07, |
| "logits/chosen": -2.186249256134033, |
| "logits/rejected": -1.8273032903671265, |
| "logps/chosen": -86.01737213134766, |
| "logps/rejected": -119.9991226196289, |
| "loss": 0.0055, |
| "rewards/accuracies": 0.9750000238418579, |
| "rewards/chosen": 1.2794878482818604, |
| "rewards/margins": 28.3900203704834, |
| "rewards/rejected": -27.11053466796875, |
| "step": 3850 |
| }, |
| { |
| "epoch": 1.76, |
| "learning_rate": 1.3749365804160324e-07, |
| "logits/chosen": -2.122135639190674, |
| "logits/rejected": -1.7545080184936523, |
| "logps/chosen": -86.4559555053711, |
| "logps/rejected": -118.87664794921875, |
| "loss": 0.0033, |
| "rewards/accuracies": 0.9750000238418579, |
| "rewards/chosen": 1.654794454574585, |
| "rewards/margins": 27.156728744506836, |
| "rewards/rejected": -25.501934051513672, |
| "step": 3860 |
| }, |
| { |
| "epoch": 1.77, |
| "learning_rate": 1.36986301369863e-07, |
| "logits/chosen": -2.1122047901153564, |
| "logits/rejected": -1.7238849401474, |
| "logps/chosen": -84.66944122314453, |
| "logps/rejected": -121.77825927734375, |
| "loss": 0.0011, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.5499777793884277, |
| "rewards/margins": 31.206939697265625, |
| "rewards/rejected": -27.656963348388672, |
| "step": 3870 |
| }, |
| { |
| "epoch": 1.77, |
| "learning_rate": 1.3647894469812278e-07, |
| "logits/chosen": -2.1466057300567627, |
| "logits/rejected": -1.7561490535736084, |
| "logps/chosen": -84.68426513671875, |
| "logps/rejected": -123.66081237792969, |
| "loss": 0.0014, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 3.3084232807159424, |
| "rewards/margins": 31.223583221435547, |
| "rewards/rejected": -27.915157318115234, |
| "step": 3880 |
| }, |
| { |
| "epoch": 1.78, |
| "learning_rate": 1.3597158802638254e-07, |
| "logits/chosen": -2.1600029468536377, |
| "logits/rejected": -1.7710784673690796, |
| "logps/chosen": -87.58964538574219, |
| "logps/rejected": -127.94322204589844, |
| "loss": 0.0022, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.7342724800109863, |
| "rewards/margins": 30.390094757080078, |
| "rewards/rejected": -27.65582275390625, |
| "step": 3890 |
| }, |
| { |
| "epoch": 1.78, |
| "learning_rate": 1.354642313546423e-07, |
| "logits/chosen": -2.1588668823242188, |
| "logits/rejected": -1.8171707391738892, |
| "logps/chosen": -82.0718002319336, |
| "logps/rejected": -117.87044525146484, |
| "loss": 0.0022, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 2.580735445022583, |
| "rewards/margins": 28.978771209716797, |
| "rewards/rejected": -26.398035049438477, |
| "step": 3900 |
| }, |
| { |
| "epoch": 1.78, |
| "eval_logits/chosen": -2.1919665336608887, |
| "eval_logits/rejected": -1.8445065021514893, |
| "eval_logps/chosen": -84.54802703857422, |
| "eval_logps/rejected": -120.56312561035156, |
| "eval_loss": 0.005169562995433807, |
| "eval_rewards/accuracies": 0.994413435459137, |
| "eval_rewards/chosen": 2.1412765979766846, |
| "eval_rewards/margins": 29.007375717163086, |
| "eval_rewards/rejected": -26.866098403930664, |
| "eval_runtime": 203.0484, |
| "eval_samples_per_second": 14.095, |
| "eval_steps_per_second": 0.882, |
| "step": 3900 |
| }, |
| { |
| "epoch": 1.78, |
| "learning_rate": 1.3495687468290208e-07, |
| "logits/chosen": -2.2244656085968018, |
| "logits/rejected": -1.810752272605896, |
| "logps/chosen": -89.08476257324219, |
| "logps/rejected": -123.11407470703125, |
| "loss": 0.0012, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.44339919090271, |
| "rewards/margins": 30.2976016998291, |
| "rewards/rejected": -27.854202270507812, |
| "step": 3910 |
| }, |
| { |
| "epoch": 1.79, |
| "learning_rate": 1.3444951801116184e-07, |
| "logits/chosen": -2.219104290008545, |
| "logits/rejected": -1.8220125436782837, |
| "logps/chosen": -89.28169250488281, |
| "logps/rejected": -126.15754699707031, |
| "loss": 0.0039, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.9085346460342407, |
| "rewards/margins": 30.235088348388672, |
| "rewards/rejected": -28.326553344726562, |
| "step": 3920 |
| }, |
| { |
| "epoch": 1.79, |
| "learning_rate": 1.339421613394216e-07, |
| "logits/chosen": -2.276082754135132, |
| "logits/rejected": -1.8841686248779297, |
| "logps/chosen": -89.07032775878906, |
| "logps/rejected": -126.38359069824219, |
| "loss": 0.0043, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 2.427870988845825, |
| "rewards/margins": 30.165613174438477, |
| "rewards/rejected": -27.737743377685547, |
| "step": 3930 |
| }, |
| { |
| "epoch": 1.8, |
| "learning_rate": 1.3343480466768138e-07, |
| "logits/chosen": -2.227997064590454, |
| "logits/rejected": -1.8480758666992188, |
| "logps/chosen": -84.52095794677734, |
| "logps/rejected": -119.73951721191406, |
| "loss": 0.0066, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 1.0445349216461182, |
| "rewards/margins": 28.596317291259766, |
| "rewards/rejected": -27.551782608032227, |
| "step": 3940 |
| }, |
| { |
| "epoch": 1.8, |
| "learning_rate": 1.3292744799594114e-07, |
| "logits/chosen": -2.182831287384033, |
| "logits/rejected": -1.7697114944458008, |
| "logps/chosen": -88.89148712158203, |
| "logps/rejected": -125.78935241699219, |
| "loss": 0.0024, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.283107042312622, |
| "rewards/margins": 31.58938217163086, |
| "rewards/rejected": -28.306278228759766, |
| "step": 3950 |
| }, |
| { |
| "epoch": 1.81, |
| "learning_rate": 1.324200913242009e-07, |
| "logits/chosen": -2.156838893890381, |
| "logits/rejected": -1.7892711162567139, |
| "logps/chosen": -82.74298095703125, |
| "logps/rejected": -127.2794189453125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.570988416671753, |
| "rewards/margins": 32.732200622558594, |
| "rewards/rejected": -29.161212921142578, |
| "step": 3960 |
| }, |
| { |
| "epoch": 1.81, |
| "learning_rate": 1.3191273465246068e-07, |
| "logits/chosen": -2.2453393936157227, |
| "logits/rejected": -1.8167974948883057, |
| "logps/chosen": -89.5159912109375, |
| "logps/rejected": -128.8140869140625, |
| "loss": 0.0068, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.0265374183654785, |
| "rewards/margins": 31.421884536743164, |
| "rewards/rejected": -28.395349502563477, |
| "step": 3970 |
| }, |
| { |
| "epoch": 1.82, |
| "learning_rate": 1.3140537798072044e-07, |
| "logits/chosen": -2.1521337032318115, |
| "logits/rejected": -1.7532793283462524, |
| "logps/chosen": -85.2146987915039, |
| "logps/rejected": -111.48863220214844, |
| "loss": 0.0044, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 3.89916729927063, |
| "rewards/margins": 27.08025550842285, |
| "rewards/rejected": -23.181087493896484, |
| "step": 3980 |
| }, |
| { |
| "epoch": 1.82, |
| "learning_rate": 1.308980213089802e-07, |
| "logits/chosen": -2.2195143699645996, |
| "logits/rejected": -1.8655322790145874, |
| "logps/chosen": -83.930419921875, |
| "logps/rejected": -112.97065734863281, |
| "loss": 0.0033, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.9724647998809814, |
| "rewards/margins": 25.749919891357422, |
| "rewards/rejected": -21.777454376220703, |
| "step": 3990 |
| }, |
| { |
| "epoch": 1.83, |
| "learning_rate": 1.3039066463723998e-07, |
| "logits/chosen": -2.243203639984131, |
| "logits/rejected": -1.7824580669403076, |
| "logps/chosen": -85.41126251220703, |
| "logps/rejected": -116.12227630615234, |
| "loss": 0.0033, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.609849691390991, |
| "rewards/margins": 27.11124038696289, |
| "rewards/rejected": -24.50139045715332, |
| "step": 4000 |
| }, |
| { |
| "epoch": 1.83, |
| "eval_logits/chosen": -2.207897663116455, |
| "eval_logits/rejected": -1.8570655584335327, |
| "eval_logps/chosen": -83.74642181396484, |
| "eval_logps/rejected": -115.08486938476562, |
| "eval_loss": 0.0051609063521027565, |
| "eval_rewards/accuracies": 0.994413435459137, |
| "eval_rewards/chosen": 2.5420799255371094, |
| "eval_rewards/margins": 26.66905403137207, |
| "eval_rewards/rejected": -24.126972198486328, |
| "eval_runtime": 213.5807, |
| "eval_samples_per_second": 13.4, |
| "eval_steps_per_second": 0.838, |
| "step": 4000 |
| }, |
| { |
| "epoch": 1.83, |
| "learning_rate": 1.2988330796549974e-07, |
| "logits/chosen": -2.2335009574890137, |
| "logits/rejected": -1.8103595972061157, |
| "logps/chosen": -87.22106170654297, |
| "logps/rejected": -117.1398696899414, |
| "loss": 0.0065, |
| "rewards/accuracies": 0.9624999761581421, |
| "rewards/chosen": 2.971987247467041, |
| "rewards/margins": 27.090845108032227, |
| "rewards/rejected": -24.118860244750977, |
| "step": 4010 |
| }, |
| { |
| "epoch": 1.83, |
| "learning_rate": 1.293759512937595e-07, |
| "logits/chosen": -2.2131710052490234, |
| "logits/rejected": -1.8810796737670898, |
| "logps/chosen": -89.93437957763672, |
| "logps/rejected": -119.86589050292969, |
| "loss": 0.0022, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.9654256701469421, |
| "rewards/margins": 24.501127243041992, |
| "rewards/rejected": -23.535701751708984, |
| "step": 4020 |
| }, |
| { |
| "epoch": 1.84, |
| "learning_rate": 1.2886859462201928e-07, |
| "logits/chosen": -2.2927210330963135, |
| "logits/rejected": -1.9104375839233398, |
| "logps/chosen": -88.72476196289062, |
| "logps/rejected": -117.77976989746094, |
| "loss": 0.0041, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.68806791305542, |
| "rewards/margins": 27.564483642578125, |
| "rewards/rejected": -24.876415252685547, |
| "step": 4030 |
| }, |
| { |
| "epoch": 1.84, |
| "learning_rate": 1.2836123795027904e-07, |
| "logits/chosen": -2.168994903564453, |
| "logits/rejected": -1.7794716358184814, |
| "logps/chosen": -81.93544006347656, |
| "logps/rejected": -116.9870376586914, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.4068737030029297, |
| "rewards/margins": 27.525577545166016, |
| "rewards/rejected": -25.118701934814453, |
| "step": 4040 |
| }, |
| { |
| "epoch": 1.85, |
| "learning_rate": 1.278538812785388e-07, |
| "logits/chosen": -2.18416690826416, |
| "logits/rejected": -1.7201156616210938, |
| "logps/chosen": -91.39389038085938, |
| "logps/rejected": -118.92759704589844, |
| "loss": 0.0063, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 3.148336887359619, |
| "rewards/margins": 27.875076293945312, |
| "rewards/rejected": -24.72673988342285, |
| "step": 4050 |
| }, |
| { |
| "epoch": 1.85, |
| "learning_rate": 1.2734652460679858e-07, |
| "logits/chosen": -2.2627367973327637, |
| "logits/rejected": -1.9123704433441162, |
| "logps/chosen": -88.3521728515625, |
| "logps/rejected": -117.85545349121094, |
| "loss": 0.0027, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.9983296394348145, |
| "rewards/margins": 25.974822998046875, |
| "rewards/rejected": -22.976491928100586, |
| "step": 4060 |
| }, |
| { |
| "epoch": 1.86, |
| "learning_rate": 1.2683916793505834e-07, |
| "logits/chosen": -2.1818766593933105, |
| "logits/rejected": -1.8303353786468506, |
| "logps/chosen": -82.59492492675781, |
| "logps/rejected": -120.16998291015625, |
| "loss": 0.0025, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.4224205017089844, |
| "rewards/margins": 27.243423461914062, |
| "rewards/rejected": -23.821001052856445, |
| "step": 4070 |
| }, |
| { |
| "epoch": 1.86, |
| "learning_rate": 1.263318112633181e-07, |
| "logits/chosen": -2.2209744453430176, |
| "logits/rejected": -1.8506110906600952, |
| "logps/chosen": -87.74772644042969, |
| "logps/rejected": -112.5324478149414, |
| "loss": 0.0032, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.3162944316864014, |
| "rewards/margins": 25.54704475402832, |
| "rewards/rejected": -22.230749130249023, |
| "step": 4080 |
| }, |
| { |
| "epoch": 1.87, |
| "learning_rate": 1.2582445459157788e-07, |
| "logits/chosen": -2.203535318374634, |
| "logits/rejected": -1.8149940967559814, |
| "logps/chosen": -81.7652587890625, |
| "logps/rejected": -113.93977355957031, |
| "loss": 0.0034, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.2748992443084717, |
| "rewards/margins": 26.76283836364746, |
| "rewards/rejected": -23.48794174194336, |
| "step": 4090 |
| }, |
| { |
| "epoch": 1.87, |
| "learning_rate": 1.2531709791983764e-07, |
| "logits/chosen": -2.271902561187744, |
| "logits/rejected": -1.8522933721542358, |
| "logps/chosen": -89.37789916992188, |
| "logps/rejected": -117.07502746582031, |
| "loss": 0.0046, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.4491279125213623, |
| "rewards/margins": 26.383209228515625, |
| "rewards/rejected": -22.934078216552734, |
| "step": 4100 |
| }, |
| { |
| "epoch": 1.87, |
| "eval_logits/chosen": -2.217360019683838, |
| "eval_logits/rejected": -1.8673908710479736, |
| "eval_logps/chosen": -83.58930969238281, |
| "eval_logps/rejected": -114.42141723632812, |
| "eval_loss": 0.005190215539187193, |
| "eval_rewards/accuracies": 0.9916201233863831, |
| "eval_rewards/chosen": 2.6206393241882324, |
| "eval_rewards/margins": 26.415889739990234, |
| "eval_rewards/rejected": -23.795251846313477, |
| "eval_runtime": 206.2078, |
| "eval_samples_per_second": 13.879, |
| "eval_steps_per_second": 0.868, |
| "step": 4100 |
| }, |
| { |
| "epoch": 1.88, |
| "learning_rate": 1.248097412480974e-07, |
| "logits/chosen": -2.3479442596435547, |
| "logits/rejected": -1.9204511642456055, |
| "logps/chosen": -88.93357849121094, |
| "logps/rejected": -119.96925354003906, |
| "loss": 0.0024, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.7320475578308105, |
| "rewards/margins": 28.458393096923828, |
| "rewards/rejected": -24.72634506225586, |
| "step": 4110 |
| }, |
| { |
| "epoch": 1.88, |
| "learning_rate": 1.2430238457635718e-07, |
| "logits/chosen": -2.1347765922546387, |
| "logits/rejected": -1.743198037147522, |
| "logps/chosen": -87.92202758789062, |
| "logps/rejected": -118.41119384765625, |
| "loss": 0.0024, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.6807589530944824, |
| "rewards/margins": 27.0130558013916, |
| "rewards/rejected": -24.33229637145996, |
| "step": 4120 |
| }, |
| { |
| "epoch": 1.89, |
| "learning_rate": 1.2379502790461694e-07, |
| "logits/chosen": -2.176631450653076, |
| "logits/rejected": -1.8327052593231201, |
| "logps/chosen": -84.86781311035156, |
| "logps/rejected": -117.3744888305664, |
| "loss": 0.0054, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 1.2650296688079834, |
| "rewards/margins": 25.868539810180664, |
| "rewards/rejected": -24.603511810302734, |
| "step": 4130 |
| }, |
| { |
| "epoch": 1.89, |
| "learning_rate": 1.232876712328767e-07, |
| "logits/chosen": -2.158825635910034, |
| "logits/rejected": -1.7359037399291992, |
| "logps/chosen": -88.40345001220703, |
| "logps/rejected": -119.45426177978516, |
| "loss": 0.0016, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.4086227416992188, |
| "rewards/margins": 27.16936683654785, |
| "rewards/rejected": -24.760744094848633, |
| "step": 4140 |
| }, |
| { |
| "epoch": 1.89, |
| "learning_rate": 1.2278031456113648e-07, |
| "logits/chosen": -2.2729620933532715, |
| "logits/rejected": -1.8309637308120728, |
| "logps/chosen": -91.2960205078125, |
| "logps/rejected": -117.98994445800781, |
| "loss": 0.0022, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.7180824279785156, |
| "rewards/margins": 27.0567569732666, |
| "rewards/rejected": -23.33867645263672, |
| "step": 4150 |
| }, |
| { |
| "epoch": 1.9, |
| "learning_rate": 1.2227295788939624e-07, |
| "logits/chosen": -2.2085700035095215, |
| "logits/rejected": -1.8833599090576172, |
| "logps/chosen": -86.73484802246094, |
| "logps/rejected": -124.00797271728516, |
| "loss": 0.0015, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.6060290336608887, |
| "rewards/margins": 27.345844268798828, |
| "rewards/rejected": -24.739816665649414, |
| "step": 4160 |
| }, |
| { |
| "epoch": 1.9, |
| "learning_rate": 1.21765601217656e-07, |
| "logits/chosen": -2.231919050216675, |
| "logits/rejected": -1.8927338123321533, |
| "logps/chosen": -82.05496978759766, |
| "logps/rejected": -125.29072570800781, |
| "loss": 0.0036, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.5094146728515625, |
| "rewards/margins": 27.967309951782227, |
| "rewards/rejected": -25.457895278930664, |
| "step": 4170 |
| }, |
| { |
| "epoch": 1.91, |
| "learning_rate": 1.2125824454591578e-07, |
| "logits/chosen": -2.252375364303589, |
| "logits/rejected": -1.7970874309539795, |
| "logps/chosen": -92.34709167480469, |
| "logps/rejected": -115.58805084228516, |
| "loss": 0.0047, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.9096739292144775, |
| "rewards/margins": 27.263355255126953, |
| "rewards/rejected": -24.353681564331055, |
| "step": 4180 |
| }, |
| { |
| "epoch": 1.91, |
| "learning_rate": 1.2075088787417554e-07, |
| "logits/chosen": -2.1913251876831055, |
| "logits/rejected": -1.8256721496582031, |
| "logps/chosen": -83.70695495605469, |
| "logps/rejected": -117.19889068603516, |
| "loss": 0.0024, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.558323621749878, |
| "rewards/margins": 27.682703018188477, |
| "rewards/rejected": -24.124378204345703, |
| "step": 4190 |
| }, |
| { |
| "epoch": 1.92, |
| "learning_rate": 1.202435312024353e-07, |
| "logits/chosen": -2.2380259037017822, |
| "logits/rejected": -1.9088646173477173, |
| "logps/chosen": -82.8735580444336, |
| "logps/rejected": -115.74534606933594, |
| "loss": 0.0026, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 2.5881214141845703, |
| "rewards/margins": 26.207622528076172, |
| "rewards/rejected": -23.6195011138916, |
| "step": 4200 |
| }, |
| { |
| "epoch": 1.92, |
| "eval_logits/chosen": -2.2144737243652344, |
| "eval_logits/rejected": -1.8624593019485474, |
| "eval_logps/chosen": -83.490234375, |
| "eval_logps/rejected": -111.31690979003906, |
| "eval_loss": 0.00542183592915535, |
| "eval_rewards/accuracies": 0.9916201233863831, |
| "eval_rewards/chosen": 2.670175075531006, |
| "eval_rewards/margins": 24.913169860839844, |
| "eval_rewards/rejected": -22.242996215820312, |
| "eval_runtime": 204.9075, |
| "eval_samples_per_second": 13.967, |
| "eval_steps_per_second": 0.874, |
| "step": 4200 |
| }, |
| { |
| "epoch": 1.92, |
| "learning_rate": 1.1973617453069508e-07, |
| "logits/chosen": -2.214597463607788, |
| "logits/rejected": -1.8877366781234741, |
| "logps/chosen": -81.5813980102539, |
| "logps/rejected": -110.80888366699219, |
| "loss": 0.0036, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 2.2836194038391113, |
| "rewards/margins": 24.602767944335938, |
| "rewards/rejected": -22.31914710998535, |
| "step": 4210 |
| }, |
| { |
| "epoch": 1.93, |
| "learning_rate": 1.1922881785895484e-07, |
| "logits/chosen": -2.2340848445892334, |
| "logits/rejected": -1.8911195993423462, |
| "logps/chosen": -88.96124267578125, |
| "logps/rejected": -121.70379638671875, |
| "loss": 0.0023, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.17574405670166, |
| "rewards/margins": 26.648061752319336, |
| "rewards/rejected": -23.47231674194336, |
| "step": 4220 |
| }, |
| { |
| "epoch": 1.93, |
| "learning_rate": 1.187214611872146e-07, |
| "logits/chosen": -2.306396007537842, |
| "logits/rejected": -1.935389757156372, |
| "logps/chosen": -90.6281509399414, |
| "logps/rejected": -122.7242660522461, |
| "loss": 0.0026, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.553041934967041, |
| "rewards/margins": 26.54819107055664, |
| "rewards/rejected": -23.995147705078125, |
| "step": 4230 |
| }, |
| { |
| "epoch": 1.94, |
| "learning_rate": 1.1821410451547436e-07, |
| "logits/chosen": -2.159374713897705, |
| "logits/rejected": -1.8598381280899048, |
| "logps/chosen": -81.08094024658203, |
| "logps/rejected": -112.6502914428711, |
| "loss": 0.0026, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 4.01515531539917, |
| "rewards/margins": 25.243701934814453, |
| "rewards/rejected": -21.22854232788086, |
| "step": 4240 |
| }, |
| { |
| "epoch": 1.94, |
| "learning_rate": 1.1770674784373413e-07, |
| "logits/chosen": -2.15578031539917, |
| "logits/rejected": -1.851265549659729, |
| "logps/chosen": -78.29918670654297, |
| "logps/rejected": -111.49513244628906, |
| "loss": 0.0011, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.1447842121124268, |
| "rewards/margins": 23.960596084594727, |
| "rewards/rejected": -21.815811157226562, |
| "step": 4250 |
| }, |
| { |
| "epoch": 1.94, |
| "learning_rate": 1.171993911719939e-07, |
| "logits/chosen": -2.2845335006713867, |
| "logits/rejected": -1.9803342819213867, |
| "logps/chosen": -84.60355377197266, |
| "logps/rejected": -118.61265563964844, |
| "loss": 0.0043, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.0940170288085938, |
| "rewards/margins": 26.21561050415039, |
| "rewards/rejected": -24.121593475341797, |
| "step": 4260 |
| }, |
| { |
| "epoch": 1.95, |
| "learning_rate": 1.1669203450025366e-07, |
| "logits/chosen": -2.1469063758850098, |
| "logits/rejected": -1.885765790939331, |
| "logps/chosen": -78.9441909790039, |
| "logps/rejected": -114.021240234375, |
| "loss": 0.0073, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 2.2761025428771973, |
| "rewards/margins": 24.522062301635742, |
| "rewards/rejected": -22.24595832824707, |
| "step": 4270 |
| }, |
| { |
| "epoch": 1.95, |
| "learning_rate": 1.1618467782851343e-07, |
| "logits/chosen": -2.2316300868988037, |
| "logits/rejected": -1.8505769968032837, |
| "logps/chosen": -82.11341857910156, |
| "logps/rejected": -119.51060485839844, |
| "loss": 0.0025, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.4899394512176514, |
| "rewards/margins": 26.23581314086914, |
| "rewards/rejected": -23.74587059020996, |
| "step": 4280 |
| }, |
| { |
| "epoch": 1.96, |
| "learning_rate": 1.156773211567732e-07, |
| "logits/chosen": -2.1731178760528564, |
| "logits/rejected": -1.7443568706512451, |
| "logps/chosen": -84.8414306640625, |
| "logps/rejected": -119.10728454589844, |
| "loss": 0.0013, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.4432387351989746, |
| "rewards/margins": 28.680456161499023, |
| "rewards/rejected": -25.237218856811523, |
| "step": 4290 |
| }, |
| { |
| "epoch": 1.96, |
| "learning_rate": 1.1516996448503296e-07, |
| "logits/chosen": -2.245872974395752, |
| "logits/rejected": -1.8761298656463623, |
| "logps/chosen": -83.03449249267578, |
| "logps/rejected": -122.46466064453125, |
| "loss": 0.0043, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.476714611053467, |
| "rewards/margins": 27.374313354492188, |
| "rewards/rejected": -24.897600173950195, |
| "step": 4300 |
| }, |
| { |
| "epoch": 1.96, |
| "eval_logits/chosen": -2.2145586013793945, |
| "eval_logits/rejected": -1.862236499786377, |
| "eval_logps/chosen": -83.82196807861328, |
| "eval_logps/rejected": -114.24178314208984, |
| "eval_loss": 0.005356738809496164, |
| "eval_rewards/accuracies": 0.994413435459137, |
| "eval_rewards/chosen": 2.504307270050049, |
| "eval_rewards/margins": 26.20973777770996, |
| "eval_rewards/rejected": -23.705427169799805, |
| "eval_runtime": 231.7023, |
| "eval_samples_per_second": 12.352, |
| "eval_steps_per_second": 0.773, |
| "step": 4300 |
| }, |
| { |
| "epoch": 1.97, |
| "learning_rate": 1.1466260781329273e-07, |
| "logits/chosen": -2.2044477462768555, |
| "logits/rejected": -1.769928216934204, |
| "logps/chosen": -87.90011596679688, |
| "logps/rejected": -117.90144348144531, |
| "loss": 0.0021, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.1481876373291016, |
| "rewards/margins": 27.33310317993164, |
| "rewards/rejected": -24.184917449951172, |
| "step": 4310 |
| }, |
| { |
| "epoch": 1.97, |
| "learning_rate": 1.141552511415525e-07, |
| "logits/chosen": -2.2101001739501953, |
| "logits/rejected": -1.8445708751678467, |
| "logps/chosen": -90.51927185058594, |
| "logps/rejected": -117.70124816894531, |
| "loss": 0.0022, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.421943426132202, |
| "rewards/margins": 27.004901885986328, |
| "rewards/rejected": -24.582958221435547, |
| "step": 4320 |
| }, |
| { |
| "epoch": 1.98, |
| "learning_rate": 1.1364789446981226e-07, |
| "logits/chosen": -2.20845365524292, |
| "logits/rejected": -1.8483657836914062, |
| "logps/chosen": -90.68830871582031, |
| "logps/rejected": -119.9162368774414, |
| "loss": 0.0032, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.908290386199951, |
| "rewards/margins": 27.960651397705078, |
| "rewards/rejected": -25.052364349365234, |
| "step": 4330 |
| }, |
| { |
| "epoch": 1.98, |
| "learning_rate": 1.1314053779807203e-07, |
| "logits/chosen": -2.2378525733947754, |
| "logits/rejected": -1.9095252752304077, |
| "logps/chosen": -84.37041473388672, |
| "logps/rejected": -117.41896057128906, |
| "loss": 0.0036, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.9735949039459229, |
| "rewards/margins": 26.64251136779785, |
| "rewards/rejected": -24.668912887573242, |
| "step": 4340 |
| }, |
| { |
| "epoch": 1.99, |
| "learning_rate": 1.126331811263318e-07, |
| "logits/chosen": -2.2925407886505127, |
| "logits/rejected": -1.940437912940979, |
| "logps/chosen": -83.49569702148438, |
| "logps/rejected": -115.74214172363281, |
| "loss": 0.0023, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.6832937002182007, |
| "rewards/margins": 25.954483032226562, |
| "rewards/rejected": -24.271190643310547, |
| "step": 4350 |
| }, |
| { |
| "epoch": 1.99, |
| "learning_rate": 1.1212582445459156e-07, |
| "logits/chosen": -2.1880042552948, |
| "logits/rejected": -1.7285563945770264, |
| "logps/chosen": -90.88077545166016, |
| "logps/rejected": -118.75814056396484, |
| "loss": 0.0021, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.5746841430664062, |
| "rewards/margins": 28.058208465576172, |
| "rewards/rejected": -25.483524322509766, |
| "step": 4360 |
| }, |
| { |
| "epoch": 1.99, |
| "learning_rate": 1.1161846778285133e-07, |
| "logits/chosen": -2.1667580604553223, |
| "logits/rejected": -1.878365159034729, |
| "logps/chosen": -78.63961029052734, |
| "logps/rejected": -118.59150695800781, |
| "loss": 0.0068, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.0068283081054688, |
| "rewards/margins": 25.848861694335938, |
| "rewards/rejected": -23.8420352935791, |
| "step": 4370 |
| }, |
| { |
| "epoch": 2.0, |
| "learning_rate": 1.111111111111111e-07, |
| "logits/chosen": -2.160161018371582, |
| "logits/rejected": -1.7247244119644165, |
| "logps/chosen": -90.16160583496094, |
| "logps/rejected": -113.54561614990234, |
| "loss": 0.0011, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.1644601821899414, |
| "rewards/margins": 26.249164581298828, |
| "rewards/rejected": -23.084701538085938, |
| "step": 4380 |
| }, |
| { |
| "epoch": 2.0, |
| "learning_rate": 1.1060375443937086e-07, |
| "logits/chosen": -2.183781862258911, |
| "logits/rejected": -1.8434038162231445, |
| "logps/chosen": -84.59786224365234, |
| "logps/rejected": -118.7528076171875, |
| "loss": 0.0044, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 2.4381895065307617, |
| "rewards/margins": 26.610469818115234, |
| "rewards/rejected": -24.17228126525879, |
| "step": 4390 |
| }, |
| { |
| "epoch": 2.01, |
| "learning_rate": 1.1009639776763063e-07, |
| "logits/chosen": -2.2394397258758545, |
| "logits/rejected": -1.8388830423355103, |
| "logps/chosen": -87.1556396484375, |
| "logps/rejected": -115.66267395019531, |
| "loss": 0.0024, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 2.9926955699920654, |
| "rewards/margins": 26.94419288635254, |
| "rewards/rejected": -23.951494216918945, |
| "step": 4400 |
| }, |
| { |
| "epoch": 2.01, |
| "eval_logits/chosen": -2.2128782272338867, |
| "eval_logits/rejected": -1.8610923290252686, |
| "eval_logps/chosen": -84.40758514404297, |
| "eval_logps/rejected": -115.89351654052734, |
| "eval_loss": 0.005497102625668049, |
| "eval_rewards/accuracies": 0.9916201233863831, |
| "eval_rewards/chosen": 2.211501359939575, |
| "eval_rewards/margins": 26.7427921295166, |
| "eval_rewards/rejected": -24.531293869018555, |
| "eval_runtime": 181.6374, |
| "eval_samples_per_second": 15.757, |
| "eval_steps_per_second": 0.985, |
| "step": 4400 |
| }, |
| { |
| "epoch": 2.01, |
| "learning_rate": 1.095890410958904e-07, |
| "logits/chosen": -2.1844065189361572, |
| "logits/rejected": -1.7922824621200562, |
| "logps/chosen": -87.32149505615234, |
| "logps/rejected": -124.73358154296875, |
| "loss": 0.0043, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.739438533782959, |
| "rewards/margins": 29.961498260498047, |
| "rewards/rejected": -26.222061157226562, |
| "step": 4410 |
| }, |
| { |
| "epoch": 2.02, |
| "learning_rate": 1.0908168442415016e-07, |
| "logits/chosen": -2.1687610149383545, |
| "logits/rejected": -1.8179349899291992, |
| "logps/chosen": -82.60697937011719, |
| "logps/rejected": -117.3493881225586, |
| "loss": 0.0076, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.0033984184265137, |
| "rewards/margins": 26.623882293701172, |
| "rewards/rejected": -24.620487213134766, |
| "step": 4420 |
| }, |
| { |
| "epoch": 2.02, |
| "learning_rate": 1.0857432775240993e-07, |
| "logits/chosen": -2.157742977142334, |
| "logits/rejected": -1.8218927383422852, |
| "logps/chosen": -81.67804718017578, |
| "logps/rejected": -123.06440734863281, |
| "loss": 0.0014, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.68721342086792, |
| "rewards/margins": 28.739349365234375, |
| "rewards/rejected": -26.052135467529297, |
| "step": 4430 |
| }, |
| { |
| "epoch": 2.03, |
| "learning_rate": 1.080669710806697e-07, |
| "logits/chosen": -2.275503396987915, |
| "logits/rejected": -1.8494739532470703, |
| "logps/chosen": -83.60503387451172, |
| "logps/rejected": -121.38664245605469, |
| "loss": 0.0025, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.1225333213806152, |
| "rewards/margins": 28.552631378173828, |
| "rewards/rejected": -25.430099487304688, |
| "step": 4440 |
| }, |
| { |
| "epoch": 2.03, |
| "learning_rate": 1.0755961440892946e-07, |
| "logits/chosen": -2.269984722137451, |
| "logits/rejected": -1.902269721031189, |
| "logps/chosen": -83.16432189941406, |
| "logps/rejected": -120.77657318115234, |
| "loss": 0.0023, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.8239471912384033, |
| "rewards/margins": 28.915185928344727, |
| "rewards/rejected": -26.091238021850586, |
| "step": 4450 |
| }, |
| { |
| "epoch": 2.04, |
| "learning_rate": 1.0705225773718923e-07, |
| "logits/chosen": -2.2269845008850098, |
| "logits/rejected": -1.8621666431427002, |
| "logps/chosen": -85.25446319580078, |
| "logps/rejected": -115.29423522949219, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.4882960319519043, |
| "rewards/margins": 26.619338989257812, |
| "rewards/rejected": -24.13104248046875, |
| "step": 4460 |
| }, |
| { |
| "epoch": 2.04, |
| "learning_rate": 1.06544901065449e-07, |
| "logits/chosen": -2.236506700515747, |
| "logits/rejected": -1.9493324756622314, |
| "logps/chosen": -87.72699737548828, |
| "logps/rejected": -120.0291748046875, |
| "loss": 0.0022, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.765307903289795, |
| "rewards/margins": 26.442195892333984, |
| "rewards/rejected": -23.676889419555664, |
| "step": 4470 |
| }, |
| { |
| "epoch": 2.04, |
| "learning_rate": 1.0603754439370876e-07, |
| "logits/chosen": -2.22512149810791, |
| "logits/rejected": -1.8360923528671265, |
| "logps/chosen": -85.01690673828125, |
| "logps/rejected": -123.11708068847656, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.9220136404037476, |
| "rewards/margins": 28.249126434326172, |
| "rewards/rejected": -26.327117919921875, |
| "step": 4480 |
| }, |
| { |
| "epoch": 2.05, |
| "learning_rate": 1.0553018772196853e-07, |
| "logits/chosen": -2.271622896194458, |
| "logits/rejected": -1.8992735147476196, |
| "logps/chosen": -88.58778381347656, |
| "logps/rejected": -118.49006652832031, |
| "loss": 0.0011, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 3.1313204765319824, |
| "rewards/margins": 26.39713478088379, |
| "rewards/rejected": -23.265811920166016, |
| "step": 4490 |
| }, |
| { |
| "epoch": 2.05, |
| "learning_rate": 1.050228310502283e-07, |
| "logits/chosen": -2.2248878479003906, |
| "logits/rejected": -1.8648513555526733, |
| "logps/chosen": -84.03074645996094, |
| "logps/rejected": -119.88997650146484, |
| "loss": 0.0033, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 1.3918333053588867, |
| "rewards/margins": 26.75858497619629, |
| "rewards/rejected": -25.366750717163086, |
| "step": 4500 |
| }, |
| { |
| "epoch": 2.05, |
| "eval_logits/chosen": -2.2098562717437744, |
| "eval_logits/rejected": -1.8567416667938232, |
| "eval_logps/chosen": -84.76105499267578, |
| "eval_logps/rejected": -117.29058074951172, |
| "eval_loss": 0.005427930504083633, |
| "eval_rewards/accuracies": 0.9916201233863831, |
| "eval_rewards/chosen": 2.0347681045532227, |
| "eval_rewards/margins": 27.26459312438965, |
| "eval_rewards/rejected": -25.22982406616211, |
| "eval_runtime": 226.6245, |
| "eval_samples_per_second": 12.629, |
| "eval_steps_per_second": 0.79, |
| "step": 4500 |
| }, |
| { |
| "epoch": 2.06, |
| "learning_rate": 1.0451547437848806e-07, |
| "logits/chosen": -2.244274139404297, |
| "logits/rejected": -1.835097074508667, |
| "logps/chosen": -89.41958618164062, |
| "logps/rejected": -119.83707427978516, |
| "loss": 0.0031, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.5296854972839355, |
| "rewards/margins": 27.9240779876709, |
| "rewards/rejected": -25.394390106201172, |
| "step": 4510 |
| }, |
| { |
| "epoch": 2.06, |
| "learning_rate": 1.0400811770674783e-07, |
| "logits/chosen": -2.20641827583313, |
| "logits/rejected": -1.8095362186431885, |
| "logps/chosen": -79.14988708496094, |
| "logps/rejected": -120.5500717163086, |
| "loss": 0.0065, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 2.5764966011047363, |
| "rewards/margins": 29.600894927978516, |
| "rewards/rejected": -27.024398803710938, |
| "step": 4520 |
| }, |
| { |
| "epoch": 2.07, |
| "learning_rate": 1.035007610350076e-07, |
| "logits/chosen": -2.1728744506835938, |
| "logits/rejected": -1.750156044960022, |
| "logps/chosen": -90.58625793457031, |
| "logps/rejected": -115.90065002441406, |
| "loss": 0.0044, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 2.2151265144348145, |
| "rewards/margins": 26.991992950439453, |
| "rewards/rejected": -24.776866912841797, |
| "step": 4530 |
| }, |
| { |
| "epoch": 2.07, |
| "learning_rate": 1.0299340436326736e-07, |
| "logits/chosen": -2.1972928047180176, |
| "logits/rejected": -1.8641561269760132, |
| "logps/chosen": -83.82379150390625, |
| "logps/rejected": -120.79838562011719, |
| "loss": 0.0032, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.7972400188446045, |
| "rewards/margins": 28.825088500976562, |
| "rewards/rejected": -26.027847290039062, |
| "step": 4540 |
| }, |
| { |
| "epoch": 2.08, |
| "learning_rate": 1.0248604769152713e-07, |
| "logits/chosen": -2.2066047191619873, |
| "logits/rejected": -1.8661673069000244, |
| "logps/chosen": -82.5276870727539, |
| "logps/rejected": -117.04951477050781, |
| "loss": 0.0022, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.4789516925811768, |
| "rewards/margins": 26.52178955078125, |
| "rewards/rejected": -24.042835235595703, |
| "step": 4550 |
| }, |
| { |
| "epoch": 2.08, |
| "learning_rate": 1.019786910197869e-07, |
| "logits/chosen": -2.2894678115844727, |
| "logits/rejected": -1.9717973470687866, |
| "logps/chosen": -85.42427825927734, |
| "logps/rejected": -122.86556243896484, |
| "loss": 0.0066, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 0.7783312797546387, |
| "rewards/margins": 26.929943084716797, |
| "rewards/rejected": -26.151615142822266, |
| "step": 4560 |
| }, |
| { |
| "epoch": 2.09, |
| "learning_rate": 1.0147133434804666e-07, |
| "logits/chosen": -2.2856619358062744, |
| "logits/rejected": -1.8935363292694092, |
| "logps/chosen": -83.85643005371094, |
| "logps/rejected": -116.0418930053711, |
| "loss": 0.0053, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 2.631080389022827, |
| "rewards/margins": 28.714336395263672, |
| "rewards/rejected": -26.083255767822266, |
| "step": 4570 |
| }, |
| { |
| "epoch": 2.09, |
| "learning_rate": 1.0096397767630643e-07, |
| "logits/chosen": -2.1582155227661133, |
| "logits/rejected": -1.7837406396865845, |
| "logps/chosen": -90.37853240966797, |
| "logps/rejected": -126.54129791259766, |
| "loss": 0.0011, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.798124313354492, |
| "rewards/margins": 29.615198135375977, |
| "rewards/rejected": -25.817073822021484, |
| "step": 4580 |
| }, |
| { |
| "epoch": 2.1, |
| "learning_rate": 1.004566210045662e-07, |
| "logits/chosen": -2.217268705368042, |
| "logits/rejected": -1.9051685333251953, |
| "logps/chosen": -79.26915740966797, |
| "logps/rejected": -121.41270446777344, |
| "loss": 0.002, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.768852949142456, |
| "rewards/margins": 27.323421478271484, |
| "rewards/rejected": -24.5545654296875, |
| "step": 4590 |
| }, |
| { |
| "epoch": 2.1, |
| "learning_rate": 9.994926433282596e-08, |
| "logits/chosen": -2.2435638904571533, |
| "logits/rejected": -1.8570373058319092, |
| "logps/chosen": -90.26069641113281, |
| "logps/rejected": -124.71513366699219, |
| "loss": 0.0023, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.2154905796051025, |
| "rewards/margins": 28.169509887695312, |
| "rewards/rejected": -25.954015731811523, |
| "step": 4600 |
| }, |
| { |
| "epoch": 2.1, |
| "eval_logits/chosen": -2.2148590087890625, |
| "eval_logits/rejected": -1.865024447441101, |
| "eval_logps/chosen": -84.85028839111328, |
| "eval_logps/rejected": -118.58063507080078, |
| "eval_loss": 0.005515058524906635, |
| "eval_rewards/accuracies": 0.9916201233863831, |
| "eval_rewards/chosen": 1.9901474714279175, |
| "eval_rewards/margins": 27.865001678466797, |
| "eval_rewards/rejected": -25.874853134155273, |
| "eval_runtime": 189.7173, |
| "eval_samples_per_second": 15.086, |
| "eval_steps_per_second": 0.944, |
| "step": 4600 |
| }, |
| { |
| "epoch": 2.1, |
| "learning_rate": 9.944190766108573e-08, |
| "logits/chosen": -2.2464184761047363, |
| "logits/rejected": -1.902503252029419, |
| "logps/chosen": -86.0322036743164, |
| "logps/rejected": -123.70219421386719, |
| "loss": 0.0034, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.7963413000106812, |
| "rewards/margins": 29.205463409423828, |
| "rewards/rejected": -27.409122467041016, |
| "step": 4610 |
| }, |
| { |
| "epoch": 2.11, |
| "learning_rate": 9.89345509893455e-08, |
| "logits/chosen": -2.134826183319092, |
| "logits/rejected": -1.8044805526733398, |
| "logps/chosen": -87.43550109863281, |
| "logps/rejected": -123.2356185913086, |
| "loss": 0.0022, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.1314964294433594, |
| "rewards/margins": 29.698129653930664, |
| "rewards/rejected": -26.566635131835938, |
| "step": 4620 |
| }, |
| { |
| "epoch": 2.11, |
| "learning_rate": 9.842719431760526e-08, |
| "logits/chosen": -2.146523952484131, |
| "logits/rejected": -1.7418187856674194, |
| "logps/chosen": -83.50428771972656, |
| "logps/rejected": -122.54347229003906, |
| "loss": 0.0011, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.376321792602539, |
| "rewards/margins": 30.02010726928711, |
| "rewards/rejected": -27.643783569335938, |
| "step": 4630 |
| }, |
| { |
| "epoch": 2.12, |
| "learning_rate": 9.791983764586503e-08, |
| "logits/chosen": -2.262716770172119, |
| "logits/rejected": -1.8752014636993408, |
| "logps/chosen": -84.97921752929688, |
| "logps/rejected": -122.33912658691406, |
| "loss": 0.0045, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.812735915184021, |
| "rewards/margins": 29.280685424804688, |
| "rewards/rejected": -27.46795082092285, |
| "step": 4640 |
| }, |
| { |
| "epoch": 2.12, |
| "learning_rate": 9.74124809741248e-08, |
| "logits/chosen": -2.2482247352600098, |
| "logits/rejected": -1.9075597524642944, |
| "logps/chosen": -81.56404113769531, |
| "logps/rejected": -120.65312194824219, |
| "loss": 0.0056, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 2.012248992919922, |
| "rewards/margins": 28.712228775024414, |
| "rewards/rejected": -26.699981689453125, |
| "step": 4650 |
| }, |
| { |
| "epoch": 2.13, |
| "learning_rate": 9.690512430238456e-08, |
| "logits/chosen": -2.1412789821624756, |
| "logits/rejected": -1.8419468402862549, |
| "logps/chosen": -83.85114288330078, |
| "logps/rejected": -121.31649017333984, |
| "loss": 0.0044, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.0328783988952637, |
| "rewards/margins": 27.409412384033203, |
| "rewards/rejected": -26.376529693603516, |
| "step": 4660 |
| }, |
| { |
| "epoch": 2.13, |
| "learning_rate": 9.639776763064433e-08, |
| "logits/chosen": -2.2971882820129395, |
| "logits/rejected": -1.9334943294525146, |
| "logps/chosen": -82.55015563964844, |
| "logps/rejected": -119.16120910644531, |
| "loss": 0.0022, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.0977225303649902, |
| "rewards/margins": 28.73293113708496, |
| "rewards/rejected": -26.635211944580078, |
| "step": 4670 |
| }, |
| { |
| "epoch": 2.14, |
| "learning_rate": 9.58904109589041e-08, |
| "logits/chosen": -2.325941562652588, |
| "logits/rejected": -1.964868187904358, |
| "logps/chosen": -87.65157318115234, |
| "logps/rejected": -125.1788330078125, |
| "loss": 0.0022, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.739508032798767, |
| "rewards/margins": 30.17959213256836, |
| "rewards/rejected": -28.44008445739746, |
| "step": 4680 |
| }, |
| { |
| "epoch": 2.14, |
| "learning_rate": 9.538305428716386e-08, |
| "logits/chosen": -2.1548006534576416, |
| "logits/rejected": -1.8352489471435547, |
| "logps/chosen": -83.15804290771484, |
| "logps/rejected": -114.95072174072266, |
| "loss": 0.0012, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.4212478399276733, |
| "rewards/margins": 27.054697036743164, |
| "rewards/rejected": -25.63344955444336, |
| "step": 4690 |
| }, |
| { |
| "epoch": 2.15, |
| "learning_rate": 9.487569761542363e-08, |
| "logits/chosen": -2.263066530227661, |
| "logits/rejected": -1.8140977621078491, |
| "logps/chosen": -93.13688659667969, |
| "logps/rejected": -124.15168762207031, |
| "loss": 0.0011, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.275789737701416, |
| "rewards/margins": 29.475337982177734, |
| "rewards/rejected": -26.199548721313477, |
| "step": 4700 |
| }, |
| { |
| "epoch": 2.15, |
| "eval_logits/chosen": -2.2139129638671875, |
| "eval_logits/rejected": -1.8623522520065308, |
| "eval_logps/chosen": -85.51490783691406, |
| "eval_logps/rejected": -120.75127410888672, |
| "eval_loss": 0.005448976997286081, |
| "eval_rewards/accuracies": 0.9916201233863831, |
| "eval_rewards/chosen": 1.657840371131897, |
| "eval_rewards/margins": 28.618017196655273, |
| "eval_rewards/rejected": -26.960174560546875, |
| "eval_runtime": 230.7918, |
| "eval_samples_per_second": 12.401, |
| "eval_steps_per_second": 0.776, |
| "step": 4700 |
| }, |
| { |
| "epoch": 2.15, |
| "learning_rate": 9.43683409436834e-08, |
| "logits/chosen": -2.2211930751800537, |
| "logits/rejected": -1.881513237953186, |
| "logps/chosen": -82.05587005615234, |
| "logps/rejected": -126.84517669677734, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.6722524166107178, |
| "rewards/margins": 30.808147430419922, |
| "rewards/rejected": -28.13589859008789, |
| "step": 4710 |
| }, |
| { |
| "epoch": 2.15, |
| "learning_rate": 9.386098427194316e-08, |
| "logits/chosen": -2.1461105346679688, |
| "logits/rejected": -1.7770576477050781, |
| "logps/chosen": -85.20562744140625, |
| "logps/rejected": -125.3041763305664, |
| "loss": 0.0042, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.119962692260742, |
| "rewards/margins": 29.933517456054688, |
| "rewards/rejected": -27.813552856445312, |
| "step": 4720 |
| }, |
| { |
| "epoch": 2.16, |
| "learning_rate": 9.335362760020293e-08, |
| "logits/chosen": -2.215076208114624, |
| "logits/rejected": -1.8217270374298096, |
| "logps/chosen": -84.71824645996094, |
| "logps/rejected": -120.50927734375, |
| "loss": 0.0058, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 2.6982955932617188, |
| "rewards/margins": 29.129375457763672, |
| "rewards/rejected": -26.431079864501953, |
| "step": 4730 |
| }, |
| { |
| "epoch": 2.16, |
| "learning_rate": 9.28462709284627e-08, |
| "logits/chosen": -2.2664477825164795, |
| "logits/rejected": -1.8872871398925781, |
| "logps/chosen": -86.96624755859375, |
| "logps/rejected": -124.43575286865234, |
| "loss": 0.0022, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.4599993228912354, |
| "rewards/margins": 30.88054847717285, |
| "rewards/rejected": -29.420547485351562, |
| "step": 4740 |
| }, |
| { |
| "epoch": 2.17, |
| "learning_rate": 9.233891425672246e-08, |
| "logits/chosen": -2.2344231605529785, |
| "logits/rejected": -1.7941217422485352, |
| "logps/chosen": -88.73806762695312, |
| "logps/rejected": -124.6733627319336, |
| "loss": 0.0054, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.4499599933624268, |
| "rewards/margins": 30.964941024780273, |
| "rewards/rejected": -27.51498031616211, |
| "step": 4750 |
| }, |
| { |
| "epoch": 2.17, |
| "learning_rate": 9.183155758498223e-08, |
| "logits/chosen": -2.26192045211792, |
| "logits/rejected": -1.9525654315948486, |
| "logps/chosen": -86.94519805908203, |
| "logps/rejected": -128.98985290527344, |
| "loss": 0.0012, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 1.0427141189575195, |
| "rewards/margins": 29.724853515625, |
| "rewards/rejected": -28.682140350341797, |
| "step": 4760 |
| }, |
| { |
| "epoch": 2.18, |
| "learning_rate": 9.1324200913242e-08, |
| "logits/chosen": -2.2315638065338135, |
| "logits/rejected": -1.844601035118103, |
| "logps/chosen": -90.42842864990234, |
| "logps/rejected": -123.21382904052734, |
| "loss": 0.0019, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.7231295108795166, |
| "rewards/margins": 28.74625587463379, |
| "rewards/rejected": -27.023128509521484, |
| "step": 4770 |
| }, |
| { |
| "epoch": 2.18, |
| "learning_rate": 9.081684424150176e-08, |
| "logits/chosen": -2.2464542388916016, |
| "logits/rejected": -1.9291092157363892, |
| "logps/chosen": -90.54940032958984, |
| "logps/rejected": -130.29818725585938, |
| "loss": 0.0057, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.4829115867614746, |
| "rewards/margins": 30.492401123046875, |
| "rewards/rejected": -29.00948715209961, |
| "step": 4780 |
| }, |
| { |
| "epoch": 2.19, |
| "learning_rate": 9.030948756976153e-08, |
| "logits/chosen": -2.249579906463623, |
| "logits/rejected": -1.7951542139053345, |
| "logps/chosen": -84.71315002441406, |
| "logps/rejected": -123.6868896484375, |
| "loss": 0.0033, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.647388458251953, |
| "rewards/margins": 31.460529327392578, |
| "rewards/rejected": -28.813140869140625, |
| "step": 4790 |
| }, |
| { |
| "epoch": 2.19, |
| "learning_rate": 8.98021308980213e-08, |
| "logits/chosen": -2.2210094928741455, |
| "logits/rejected": -1.8732092380523682, |
| "logps/chosen": -83.642578125, |
| "logps/rejected": -124.18983459472656, |
| "loss": 0.0064, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 2.0244059562683105, |
| "rewards/margins": 29.753732681274414, |
| "rewards/rejected": -27.729320526123047, |
| "step": 4800 |
| }, |
| { |
| "epoch": 2.19, |
| "eval_logits/chosen": -2.208674192428589, |
| "eval_logits/rejected": -1.8558579683303833, |
| "eval_logps/chosen": -86.5013198852539, |
| "eval_logps/rejected": -123.4262466430664, |
| "eval_loss": 0.005769502837210894, |
| "eval_rewards/accuracies": 0.9916201233863831, |
| "eval_rewards/chosen": 1.1646300554275513, |
| "eval_rewards/margins": 29.462299346923828, |
| "eval_rewards/rejected": -28.29766845703125, |
| "eval_runtime": 258.0094, |
| "eval_samples_per_second": 11.093, |
| "eval_steps_per_second": 0.694, |
| "step": 4800 |
| }, |
| { |
| "epoch": 2.2, |
| "learning_rate": 8.929477422628106e-08, |
| "logits/chosen": -2.2377803325653076, |
| "logits/rejected": -1.8689501285552979, |
| "logps/chosen": -88.69053649902344, |
| "logps/rejected": -133.93704223632812, |
| "loss": 0.0043, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 1.5955907106399536, |
| "rewards/margins": 29.29754066467285, |
| "rewards/rejected": -27.701946258544922, |
| "step": 4810 |
| }, |
| { |
| "epoch": 2.2, |
| "learning_rate": 8.878741755454083e-08, |
| "logits/chosen": -2.179999589920044, |
| "logits/rejected": -1.8029935359954834, |
| "logps/chosen": -90.18666076660156, |
| "logps/rejected": -128.03399658203125, |
| "loss": 0.0022, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.3174841403961182, |
| "rewards/margins": 30.12929344177246, |
| "rewards/rejected": -28.811809539794922, |
| "step": 4820 |
| }, |
| { |
| "epoch": 2.2, |
| "learning_rate": 8.82800608828006e-08, |
| "logits/chosen": -2.2542881965637207, |
| "logits/rejected": -1.860487699508667, |
| "logps/chosen": -84.78041076660156, |
| "logps/rejected": -127.14323425292969, |
| "loss": 0.0011, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.2697548866271973, |
| "rewards/margins": 31.854806900024414, |
| "rewards/rejected": -29.585052490234375, |
| "step": 4830 |
| }, |
| { |
| "epoch": 2.21, |
| "learning_rate": 8.777270421106036e-08, |
| "logits/chosen": -2.2306056022644043, |
| "logits/rejected": -1.9451026916503906, |
| "logps/chosen": -84.75010681152344, |
| "logps/rejected": -127.5153579711914, |
| "loss": 0.0032, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.20773085951805115, |
| "rewards/margins": 29.199344635009766, |
| "rewards/rejected": -28.99161720275879, |
| "step": 4840 |
| }, |
| { |
| "epoch": 2.21, |
| "learning_rate": 8.726534753932013e-08, |
| "logits/chosen": -2.195384979248047, |
| "logits/rejected": -1.8609682321548462, |
| "logps/chosen": -84.73804473876953, |
| "logps/rejected": -123.91796875, |
| "loss": 0.0013, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.5444185733795166, |
| "rewards/margins": 30.432912826538086, |
| "rewards/rejected": -28.88849449157715, |
| "step": 4850 |
| }, |
| { |
| "epoch": 2.22, |
| "learning_rate": 8.67579908675799e-08, |
| "logits/chosen": -2.1483521461486816, |
| "logits/rejected": -1.7701698541641235, |
| "logps/chosen": -81.2613754272461, |
| "logps/rejected": -121.82652282714844, |
| "loss": 0.0011, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.8260562419891357, |
| "rewards/margins": 30.310237884521484, |
| "rewards/rejected": -27.484180450439453, |
| "step": 4860 |
| }, |
| { |
| "epoch": 2.22, |
| "learning_rate": 8.625063419583966e-08, |
| "logits/chosen": -2.243568181991577, |
| "logits/rejected": -1.941277265548706, |
| "logps/chosen": -84.82609558105469, |
| "logps/rejected": -127.32414245605469, |
| "loss": 0.0011, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.3905398845672607, |
| "rewards/margins": 30.30340003967285, |
| "rewards/rejected": -28.912860870361328, |
| "step": 4870 |
| }, |
| { |
| "epoch": 2.23, |
| "learning_rate": 8.574327752409943e-08, |
| "logits/chosen": -2.1722254753112793, |
| "logits/rejected": -1.8854080438613892, |
| "logps/chosen": -83.84065246582031, |
| "logps/rejected": -128.77059936523438, |
| "loss": 0.0022, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.0625784397125244, |
| "rewards/margins": 31.2793025970459, |
| "rewards/rejected": -30.216724395751953, |
| "step": 4880 |
| }, |
| { |
| "epoch": 2.23, |
| "learning_rate": 8.52359208523592e-08, |
| "logits/chosen": -2.1614387035369873, |
| "logits/rejected": -1.8331111669540405, |
| "logps/chosen": -85.25975036621094, |
| "logps/rejected": -130.95721435546875, |
| "loss": 0.0033, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.8772072792053223, |
| "rewards/margins": 31.198410034179688, |
| "rewards/rejected": -28.32120132446289, |
| "step": 4890 |
| }, |
| { |
| "epoch": 2.24, |
| "learning_rate": 8.472856418061896e-08, |
| "logits/chosen": -2.2037620544433594, |
| "logits/rejected": -1.8145354986190796, |
| "logps/chosen": -86.72196197509766, |
| "logps/rejected": -121.60040283203125, |
| "loss": 0.0022, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.486250877380371, |
| "rewards/margins": 30.308456420898438, |
| "rewards/rejected": -27.822208404541016, |
| "step": 4900 |
| }, |
| { |
| "epoch": 2.24, |
| "eval_logits/chosen": -2.2160322666168213, |
| "eval_logits/rejected": -1.868016242980957, |
| "eval_logps/chosen": -86.11427307128906, |
| "eval_logps/rejected": -123.58334350585938, |
| "eval_loss": 0.0056231957860291, |
| "eval_rewards/accuracies": 0.9916201233863831, |
| "eval_rewards/chosen": 1.3581570386886597, |
| "eval_rewards/margins": 29.7343692779541, |
| "eval_rewards/rejected": -28.376211166381836, |
| "eval_runtime": 178.9426, |
| "eval_samples_per_second": 15.994, |
| "eval_steps_per_second": 1.0, |
| "step": 4900 |
| }, |
| { |
| "epoch": 2.24, |
| "learning_rate": 8.422120750887873e-08, |
| "logits/chosen": -2.2726593017578125, |
| "logits/rejected": -1.8736642599105835, |
| "logps/chosen": -87.76054382324219, |
| "logps/rejected": -124.5219955444336, |
| "loss": 0.0022, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.735135316848755, |
| "rewards/margins": 30.699026107788086, |
| "rewards/rejected": -27.963891983032227, |
| "step": 4910 |
| }, |
| { |
| "epoch": 2.25, |
| "learning_rate": 8.37138508371385e-08, |
| "logits/chosen": -2.2303450107574463, |
| "logits/rejected": -1.8574903011322021, |
| "logps/chosen": -87.68033599853516, |
| "logps/rejected": -125.37784576416016, |
| "loss": 0.0022, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.6258609294891357, |
| "rewards/margins": 30.00998878479004, |
| "rewards/rejected": -28.384124755859375, |
| "step": 4920 |
| }, |
| { |
| "epoch": 2.25, |
| "learning_rate": 8.320649416539826e-08, |
| "logits/chosen": -2.1431241035461426, |
| "logits/rejected": -1.8002105951309204, |
| "logps/chosen": -86.46646881103516, |
| "logps/rejected": -127.82108306884766, |
| "loss": 0.0034, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 1.7209049463272095, |
| "rewards/margins": 30.69203758239746, |
| "rewards/rejected": -28.971134185791016, |
| "step": 4930 |
| }, |
| { |
| "epoch": 2.25, |
| "learning_rate": 8.269913749365803e-08, |
| "logits/chosen": -2.21543550491333, |
| "logits/rejected": -1.858415961265564, |
| "logps/chosen": -85.4444808959961, |
| "logps/rejected": -125.85990142822266, |
| "loss": 0.0045, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.123652219772339, |
| "rewards/margins": 29.99489974975586, |
| "rewards/rejected": -27.87125015258789, |
| "step": 4940 |
| }, |
| { |
| "epoch": 2.26, |
| "learning_rate": 8.21917808219178e-08, |
| "logits/chosen": -2.226576328277588, |
| "logits/rejected": -1.8361542224884033, |
| "logps/chosen": -88.27388000488281, |
| "logps/rejected": -124.52458190917969, |
| "loss": 0.0054, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 2.222522020339966, |
| "rewards/margins": 29.900218963623047, |
| "rewards/rejected": -27.677698135375977, |
| "step": 4950 |
| }, |
| { |
| "epoch": 2.26, |
| "learning_rate": 8.168442415017756e-08, |
| "logits/chosen": -2.211759567260742, |
| "logits/rejected": -1.8759946823120117, |
| "logps/chosen": -91.97505187988281, |
| "logps/rejected": -127.7925796508789, |
| "loss": 0.0044, |
| "rewards/accuracies": 0.9750000238418579, |
| "rewards/chosen": 0.8236006498336792, |
| "rewards/margins": 30.634103775024414, |
| "rewards/rejected": -29.810501098632812, |
| "step": 4960 |
| }, |
| { |
| "epoch": 2.27, |
| "learning_rate": 8.117706747843733e-08, |
| "logits/chosen": -2.220738172531128, |
| "logits/rejected": -1.8724660873413086, |
| "logps/chosen": -87.83768463134766, |
| "logps/rejected": -127.0542984008789, |
| "loss": 0.0022, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 1.5908282995224, |
| "rewards/margins": 30.813274383544922, |
| "rewards/rejected": -29.22244644165039, |
| "step": 4970 |
| }, |
| { |
| "epoch": 2.27, |
| "learning_rate": 8.06697108066971e-08, |
| "logits/chosen": -2.2144250869750977, |
| "logits/rejected": -1.8192403316497803, |
| "logps/chosen": -88.7645034790039, |
| "logps/rejected": -123.9654769897461, |
| "loss": 0.0022, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.7449915409088135, |
| "rewards/margins": 30.582874298095703, |
| "rewards/rejected": -28.8378849029541, |
| "step": 4980 |
| }, |
| { |
| "epoch": 2.28, |
| "learning_rate": 8.016235413495687e-08, |
| "logits/chosen": -2.1595869064331055, |
| "logits/rejected": -1.8579118251800537, |
| "logps/chosen": -81.31648254394531, |
| "logps/rejected": -125.78511810302734, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.1897120475769043, |
| "rewards/margins": 31.16072654724121, |
| "rewards/rejected": -28.97101402282715, |
| "step": 4990 |
| }, |
| { |
| "epoch": 2.28, |
| "learning_rate": 7.965499746321664e-08, |
| "logits/chosen": -2.163888692855835, |
| "logits/rejected": -1.7756332159042358, |
| "logps/chosen": -91.29290771484375, |
| "logps/rejected": -126.38360595703125, |
| "loss": 0.0025, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 2.3966686725616455, |
| "rewards/margins": 29.905893325805664, |
| "rewards/rejected": -27.509225845336914, |
| "step": 5000 |
| }, |
| { |
| "epoch": 2.28, |
| "eval_logits/chosen": -2.2226171493530273, |
| "eval_logits/rejected": -1.8767516613006592, |
| "eval_logps/chosen": -86.72997283935547, |
| "eval_logps/rejected": -125.4724349975586, |
| "eval_loss": 0.005568630062043667, |
| "eval_rewards/accuracies": 0.9916201233863831, |
| "eval_rewards/chosen": 1.0503071546554565, |
| "eval_rewards/margins": 30.37106704711914, |
| "eval_rewards/rejected": -29.320756912231445, |
| "eval_runtime": 265.4131, |
| "eval_samples_per_second": 10.783, |
| "eval_steps_per_second": 0.674, |
| "step": 5000 |
| }, |
| { |
| "epoch": 2.29, |
| "learning_rate": 7.91476407914764e-08, |
| "logits/chosen": -2.237316608428955, |
| "logits/rejected": -1.8469164371490479, |
| "logps/chosen": -86.86993408203125, |
| "logps/rejected": -128.10240173339844, |
| "loss": 0.0022, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 1.2446770668029785, |
| "rewards/margins": 31.02500343322754, |
| "rewards/rejected": -29.78032875061035, |
| "step": 5010 |
| }, |
| { |
| "epoch": 2.29, |
| "learning_rate": 7.864028411973617e-08, |
| "logits/chosen": -2.212674379348755, |
| "logits/rejected": -1.7991310358047485, |
| "logps/chosen": -90.48421478271484, |
| "logps/rejected": -128.47349548339844, |
| "loss": 0.0067, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.104187488555908, |
| "rewards/margins": 31.993030548095703, |
| "rewards/rejected": -29.888843536376953, |
| "step": 5020 |
| }, |
| { |
| "epoch": 2.3, |
| "learning_rate": 7.813292744799594e-08, |
| "logits/chosen": -2.192707061767578, |
| "logits/rejected": -1.812227487564087, |
| "logps/chosen": -85.58504486083984, |
| "logps/rejected": -125.02815246582031, |
| "loss": 0.0022, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.6018452644348145, |
| "rewards/margins": 31.598779678344727, |
| "rewards/rejected": -28.996929168701172, |
| "step": 5030 |
| }, |
| { |
| "epoch": 2.3, |
| "learning_rate": 7.76255707762557e-08, |
| "logits/chosen": -2.2332510948181152, |
| "logits/rejected": -1.9000991582870483, |
| "logps/chosen": -87.93112182617188, |
| "logps/rejected": -124.27226257324219, |
| "loss": 0.001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.8778412938117981, |
| "rewards/margins": 29.009052276611328, |
| "rewards/rejected": -28.131210327148438, |
| "step": 5040 |
| }, |
| { |
| "epoch": 2.31, |
| "learning_rate": 7.711821410451547e-08, |
| "logits/chosen": -2.2971322536468506, |
| "logits/rejected": -1.955288290977478, |
| "logps/chosen": -84.05998229980469, |
| "logps/rejected": -121.64460754394531, |
| "loss": 0.0022, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.0523746013641357, |
| "rewards/margins": 30.503372192382812, |
| "rewards/rejected": -28.45099449157715, |
| "step": 5050 |
| }, |
| { |
| "epoch": 2.31, |
| "learning_rate": 7.661085743277524e-08, |
| "logits/chosen": -2.2180066108703613, |
| "logits/rejected": -1.8513801097869873, |
| "logps/chosen": -91.88666534423828, |
| "logps/rejected": -131.45933532714844, |
| "loss": 0.0019, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 2.476855516433716, |
| "rewards/margins": 32.50453186035156, |
| "rewards/rejected": -30.02767562866211, |
| "step": 5060 |
| }, |
| { |
| "epoch": 2.31, |
| "learning_rate": 7.6103500761035e-08, |
| "logits/chosen": -2.202540636062622, |
| "logits/rejected": -1.883195161819458, |
| "logps/chosen": -86.0487060546875, |
| "logps/rejected": -127.7849349975586, |
| "loss": 0.0012, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.5693166255950928, |
| "rewards/margins": 30.323623657226562, |
| "rewards/rejected": -29.75430679321289, |
| "step": 5070 |
| }, |
| { |
| "epoch": 2.32, |
| "learning_rate": 7.559614408929477e-08, |
| "logits/chosen": -2.167088031768799, |
| "logits/rejected": -1.7546894550323486, |
| "logps/chosen": -86.92243194580078, |
| "logps/rejected": -121.72308349609375, |
| "loss": 0.0011, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.5353543758392334, |
| "rewards/margins": 30.667156219482422, |
| "rewards/rejected": -28.13180160522461, |
| "step": 5080 |
| }, |
| { |
| "epoch": 2.32, |
| "learning_rate": 7.508878741755454e-08, |
| "logits/chosen": -2.1240665912628174, |
| "logits/rejected": -1.8034683465957642, |
| "logps/chosen": -86.10752868652344, |
| "logps/rejected": -130.32803344726562, |
| "loss": 0.0055, |
| "rewards/accuracies": 0.9750000238418579, |
| "rewards/chosen": 0.6282140016555786, |
| "rewards/margins": 30.408313751220703, |
| "rewards/rejected": -29.780099868774414, |
| "step": 5090 |
| }, |
| { |
| "epoch": 2.33, |
| "learning_rate": 7.45814307458143e-08, |
| "logits/chosen": -2.234485387802124, |
| "logits/rejected": -1.8363311290740967, |
| "logps/chosen": -89.19425964355469, |
| "logps/rejected": -129.45681762695312, |
| "loss": 0.0033, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.9281721115112305, |
| "rewards/margins": 33.132511138916016, |
| "rewards/rejected": -31.204341888427734, |
| "step": 5100 |
| }, |
| { |
| "epoch": 2.33, |
| "eval_logits/chosen": -2.2230384349823, |
| "eval_logits/rejected": -1.8779499530792236, |
| "eval_logps/chosen": -86.5483627319336, |
| "eval_logps/rejected": -125.72158813476562, |
| "eval_loss": 0.005534125491976738, |
| "eval_rewards/accuracies": 0.9916201233863831, |
| "eval_rewards/chosen": 1.1411113739013672, |
| "eval_rewards/margins": 30.586444854736328, |
| "eval_rewards/rejected": -29.44533348083496, |
| "eval_runtime": 203.014, |
| "eval_samples_per_second": 14.098, |
| "eval_steps_per_second": 0.882, |
| "step": 5100 |
| }, |
| { |
| "epoch": 2.33, |
| "learning_rate": 7.407407407407407e-08, |
| "logits/chosen": -2.23368239402771, |
| "logits/rejected": -1.8570277690887451, |
| "logps/chosen": -87.19905853271484, |
| "logps/rejected": -127.96573638916016, |
| "loss": 0.0042, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.1923465728759766, |
| "rewards/margins": 31.500961303710938, |
| "rewards/rejected": -30.30861473083496, |
| "step": 5110 |
| }, |
| { |
| "epoch": 2.34, |
| "learning_rate": 7.356671740233384e-08, |
| "logits/chosen": -2.14494252204895, |
| "logits/rejected": -1.8105385303497314, |
| "logps/chosen": -83.87150573730469, |
| "logps/rejected": -126.78532409667969, |
| "loss": 0.0033, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 0.35639578104019165, |
| "rewards/margins": 30.178089141845703, |
| "rewards/rejected": -29.82169532775879, |
| "step": 5120 |
| }, |
| { |
| "epoch": 2.34, |
| "learning_rate": 7.30593607305936e-08, |
| "logits/chosen": -2.253058671951294, |
| "logits/rejected": -1.9436228275299072, |
| "logps/chosen": -90.07007598876953, |
| "logps/rejected": -127.9438705444336, |
| "loss": 0.0022, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": -0.44818297028541565, |
| "rewards/margins": 30.035837173461914, |
| "rewards/rejected": -30.4840145111084, |
| "step": 5130 |
| }, |
| { |
| "epoch": 2.35, |
| "learning_rate": 7.255200405885337e-08, |
| "logits/chosen": -2.2380034923553467, |
| "logits/rejected": -1.849329948425293, |
| "logps/chosen": -91.55721282958984, |
| "logps/rejected": -132.04197692871094, |
| "loss": 0.001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.7558342218399048, |
| "rewards/margins": 31.776962280273438, |
| "rewards/rejected": -30.021127700805664, |
| "step": 5140 |
| }, |
| { |
| "epoch": 2.35, |
| "learning_rate": 7.204464738711314e-08, |
| "logits/chosen": -2.249427318572998, |
| "logits/rejected": -1.916865348815918, |
| "logps/chosen": -84.08346557617188, |
| "logps/rejected": -127.4309310913086, |
| "loss": 0.0012, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.0999155044555664, |
| "rewards/margins": 31.15311622619629, |
| "rewards/rejected": -30.053197860717773, |
| "step": 5150 |
| }, |
| { |
| "epoch": 2.36, |
| "learning_rate": 7.15372907153729e-08, |
| "logits/chosen": -2.164412498474121, |
| "logits/rejected": -1.8164761066436768, |
| "logps/chosen": -84.87574768066406, |
| "logps/rejected": -127.91072845458984, |
| "loss": 0.0046, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.4683095216751099, |
| "rewards/margins": 30.6422176361084, |
| "rewards/rejected": -29.173908233642578, |
| "step": 5160 |
| }, |
| { |
| "epoch": 2.36, |
| "learning_rate": 7.102993404363267e-08, |
| "logits/chosen": -2.2733216285705566, |
| "logits/rejected": -1.8871898651123047, |
| "logps/chosen": -84.68191528320312, |
| "logps/rejected": -128.5628204345703, |
| "loss": 0.0046, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.4870762825012207, |
| "rewards/margins": 31.898874282836914, |
| "rewards/rejected": -30.41179847717285, |
| "step": 5170 |
| }, |
| { |
| "epoch": 2.36, |
| "learning_rate": 7.052257737189244e-08, |
| "logits/chosen": -2.1040546894073486, |
| "logits/rejected": -1.794029951095581, |
| "logps/chosen": -87.33931732177734, |
| "logps/rejected": -122.92645263671875, |
| "loss": 0.0022, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.523239016532898, |
| "rewards/margins": 28.69746971130371, |
| "rewards/rejected": -28.174230575561523, |
| "step": 5180 |
| }, |
| { |
| "epoch": 2.37, |
| "learning_rate": 7.00152207001522e-08, |
| "logits/chosen": -2.2180774211883545, |
| "logits/rejected": -1.8783124685287476, |
| "logps/chosen": -88.9879150390625, |
| "logps/rejected": -129.85577392578125, |
| "loss": 0.0013, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.285766363143921, |
| "rewards/margins": 31.67111587524414, |
| "rewards/rejected": -30.38534927368164, |
| "step": 5190 |
| }, |
| { |
| "epoch": 2.37, |
| "learning_rate": 6.950786402841197e-08, |
| "logits/chosen": -2.2432150840759277, |
| "logits/rejected": -1.8757518529891968, |
| "logps/chosen": -96.52727508544922, |
| "logps/rejected": -133.89486694335938, |
| "loss": 0.0044, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": -0.2539239525794983, |
| "rewards/margins": 30.236125946044922, |
| "rewards/rejected": -30.49005126953125, |
| "step": 5200 |
| }, |
| { |
| "epoch": 2.37, |
| "eval_logits/chosen": -2.2150111198425293, |
| "eval_logits/rejected": -1.8662109375, |
| "eval_logps/chosen": -86.7686767578125, |
| "eval_logps/rejected": -126.29188537597656, |
| "eval_loss": 0.005586822517216206, |
| "eval_rewards/accuracies": 0.994413435459137, |
| "eval_rewards/chosen": 1.030951976776123, |
| "eval_rewards/margins": 30.761432647705078, |
| "eval_rewards/rejected": -29.730480194091797, |
| "eval_runtime": 372.2708, |
| "eval_samples_per_second": 7.688, |
| "eval_steps_per_second": 0.481, |
| "step": 5200 |
| }, |
| { |
| "epoch": 2.38, |
| "learning_rate": 6.900050735667174e-08, |
| "logits/chosen": -2.2496633529663086, |
| "logits/rejected": -1.9013668298721313, |
| "logps/chosen": -85.10989379882812, |
| "logps/rejected": -126.21388244628906, |
| "loss": 0.0033, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.0123857259750366, |
| "rewards/margins": 28.938159942626953, |
| "rewards/rejected": -27.925771713256836, |
| "step": 5210 |
| }, |
| { |
| "epoch": 2.38, |
| "learning_rate": 6.84931506849315e-08, |
| "logits/chosen": -2.169320583343506, |
| "logits/rejected": -1.8793160915374756, |
| "logps/chosen": -83.18421173095703, |
| "logps/rejected": -132.3877410888672, |
| "loss": 0.0022, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.122344732284546, |
| "rewards/margins": 30.82855796813965, |
| "rewards/rejected": -29.70621109008789, |
| "step": 5220 |
| }, |
| { |
| "epoch": 2.39, |
| "learning_rate": 6.798579401319127e-08, |
| "logits/chosen": -2.3072543144226074, |
| "logits/rejected": -1.8543882369995117, |
| "logps/chosen": -92.82881164550781, |
| "logps/rejected": -127.92374420166016, |
| "loss": 0.0044, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 1.4654512405395508, |
| "rewards/margins": 31.590072631835938, |
| "rewards/rejected": -30.124622344970703, |
| "step": 5230 |
| }, |
| { |
| "epoch": 2.39, |
| "learning_rate": 6.747843734145104e-08, |
| "logits/chosen": -2.2160370349884033, |
| "logits/rejected": -1.8520358800888062, |
| "logps/chosen": -87.04851531982422, |
| "logps/rejected": -126.8850326538086, |
| "loss": 0.0033, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.426818370819092, |
| "rewards/margins": 32.05809020996094, |
| "rewards/rejected": -29.631271362304688, |
| "step": 5240 |
| }, |
| { |
| "epoch": 2.4, |
| "learning_rate": 6.69710806697108e-08, |
| "logits/chosen": -2.2358498573303223, |
| "logits/rejected": -1.92721426486969, |
| "logps/chosen": -83.96819305419922, |
| "logps/rejected": -127.56068420410156, |
| "loss": 0.0044, |
| "rewards/accuracies": 0.9750000238418579, |
| "rewards/chosen": 1.6845203638076782, |
| "rewards/margins": 31.140390396118164, |
| "rewards/rejected": -29.455867767333984, |
| "step": 5250 |
| }, |
| { |
| "epoch": 2.4, |
| "learning_rate": 6.646372399797057e-08, |
| "logits/chosen": -2.224752426147461, |
| "logits/rejected": -1.8739010095596313, |
| "logps/chosen": -83.2956771850586, |
| "logps/rejected": -126.33967590332031, |
| "loss": 0.0011, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.8728569746017456, |
| "rewards/margins": 29.507293701171875, |
| "rewards/rejected": -28.63443374633789, |
| "step": 5260 |
| }, |
| { |
| "epoch": 2.41, |
| "learning_rate": 6.595636732623034e-08, |
| "logits/chosen": -2.2283332347869873, |
| "logits/rejected": -1.898648977279663, |
| "logps/chosen": -89.44114685058594, |
| "logps/rejected": -134.07838439941406, |
| "loss": 0.0013, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.2760619521141052, |
| "rewards/margins": 31.4327335357666, |
| "rewards/rejected": -31.156673431396484, |
| "step": 5270 |
| }, |
| { |
| "epoch": 2.41, |
| "learning_rate": 6.54490106544901e-08, |
| "logits/chosen": -2.217665195465088, |
| "logits/rejected": -1.7950804233551025, |
| "logps/chosen": -91.71220397949219, |
| "logps/rejected": -131.68295288085938, |
| "loss": 0.0022, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.5391443967819214, |
| "rewards/margins": 33.635643005371094, |
| "rewards/rejected": -32.09649658203125, |
| "step": 5280 |
| }, |
| { |
| "epoch": 2.41, |
| "learning_rate": 6.494165398274987e-08, |
| "logits/chosen": -2.1419761180877686, |
| "logits/rejected": -1.7783762216567993, |
| "logps/chosen": -90.9324722290039, |
| "logps/rejected": -132.05467224121094, |
| "loss": 0.0022, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.3793599605560303, |
| "rewards/margins": 31.754810333251953, |
| "rewards/rejected": -30.375452041625977, |
| "step": 5290 |
| }, |
| { |
| "epoch": 2.42, |
| "learning_rate": 6.443429731100964e-08, |
| "logits/chosen": -2.23848032951355, |
| "logits/rejected": -1.9507777690887451, |
| "logps/chosen": -83.6585464477539, |
| "logps/rejected": -128.34262084960938, |
| "loss": 0.0022, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.07423808425664902, |
| "rewards/margins": 31.000595092773438, |
| "rewards/rejected": -30.926355361938477, |
| "step": 5300 |
| }, |
| { |
| "epoch": 2.42, |
| "eval_logits/chosen": -2.220287322998047, |
| "eval_logits/rejected": -1.8729933500289917, |
| "eval_logps/chosen": -87.28707885742188, |
| "eval_logps/rejected": -127.75875091552734, |
| "eval_loss": 0.005639108829200268, |
| "eval_rewards/accuracies": 0.9916201233863831, |
| "eval_rewards/chosen": 0.7717540264129639, |
| "eval_rewards/margins": 31.235660552978516, |
| "eval_rewards/rejected": -30.463911056518555, |
| "eval_runtime": 204.4438, |
| "eval_samples_per_second": 13.999, |
| "eval_steps_per_second": 0.876, |
| "step": 5300 |
| }, |
| { |
| "epoch": 2.42, |
| "learning_rate": 6.39269406392694e-08, |
| "logits/chosen": -2.217376232147217, |
| "logits/rejected": -1.8636280298233032, |
| "logps/chosen": -85.64130401611328, |
| "logps/rejected": -130.76148986816406, |
| "loss": 0.0045, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.24725079536438, |
| "rewards/margins": 32.80742263793945, |
| "rewards/rejected": -30.5601749420166, |
| "step": 5310 |
| }, |
| { |
| "epoch": 2.43, |
| "learning_rate": 6.341958396752917e-08, |
| "logits/chosen": -2.2781529426574707, |
| "logits/rejected": -1.9017364978790283, |
| "logps/chosen": -85.34228515625, |
| "logps/rejected": -126.925537109375, |
| "loss": 0.0011, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.1655864715576172, |
| "rewards/margins": 31.301239013671875, |
| "rewards/rejected": -30.135656356811523, |
| "step": 5320 |
| }, |
| { |
| "epoch": 2.43, |
| "learning_rate": 6.291222729578894e-08, |
| "logits/chosen": -2.2987561225891113, |
| "logits/rejected": -1.894020438194275, |
| "logps/chosen": -90.0018539428711, |
| "logps/rejected": -120.316650390625, |
| "loss": 0.0058, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 1.8631511926651, |
| "rewards/margins": 30.350738525390625, |
| "rewards/rejected": -28.487585067749023, |
| "step": 5330 |
| }, |
| { |
| "epoch": 2.44, |
| "learning_rate": 6.24048706240487e-08, |
| "logits/chosen": -2.3006176948547363, |
| "logits/rejected": -1.861555814743042, |
| "logps/chosen": -88.1524658203125, |
| "logps/rejected": -130.68968200683594, |
| "loss": 0.0057, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.4345285892486572, |
| "rewards/margins": 31.54620933532715, |
| "rewards/rejected": -30.111682891845703, |
| "step": 5340 |
| }, |
| { |
| "epoch": 2.44, |
| "learning_rate": 6.189751395230847e-08, |
| "logits/chosen": -2.287721872329712, |
| "logits/rejected": -1.911665678024292, |
| "logps/chosen": -94.31678771972656, |
| "logps/rejected": -128.20156860351562, |
| "loss": 0.0021, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 1.2087610960006714, |
| "rewards/margins": 30.08795738220215, |
| "rewards/rejected": -28.879199981689453, |
| "step": 5350 |
| }, |
| { |
| "epoch": 2.45, |
| "learning_rate": 6.139015728056824e-08, |
| "logits/chosen": -2.2673823833465576, |
| "logits/rejected": -1.8630508184432983, |
| "logps/chosen": -85.5520248413086, |
| "logps/rejected": -127.2315444946289, |
| "loss": 0.0022, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 2.068432331085205, |
| "rewards/margins": 31.822891235351562, |
| "rewards/rejected": -29.75446128845215, |
| "step": 5360 |
| }, |
| { |
| "epoch": 2.45, |
| "learning_rate": 6.0882800608828e-08, |
| "logits/chosen": -2.211057662963867, |
| "logits/rejected": -1.789331078529358, |
| "logps/chosen": -91.29240417480469, |
| "logps/rejected": -127.65742492675781, |
| "loss": 0.0011, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.5129761695861816, |
| "rewards/margins": 32.06275939941406, |
| "rewards/rejected": -29.54978370666504, |
| "step": 5370 |
| }, |
| { |
| "epoch": 2.46, |
| "learning_rate": 6.037544393708777e-08, |
| "logits/chosen": -2.1423110961914062, |
| "logits/rejected": -1.8088276386260986, |
| "logps/chosen": -86.91880798339844, |
| "logps/rejected": -131.5565948486328, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.9576078653335571, |
| "rewards/margins": 31.21225929260254, |
| "rewards/rejected": -30.254650115966797, |
| "step": 5380 |
| }, |
| { |
| "epoch": 2.46, |
| "learning_rate": 5.986808726534754e-08, |
| "logits/chosen": -2.1703662872314453, |
| "logits/rejected": -1.8025754690170288, |
| "logps/chosen": -90.57283020019531, |
| "logps/rejected": -127.29478454589844, |
| "loss": 0.0057, |
| "rewards/accuracies": 0.9750000238418579, |
| "rewards/chosen": 0.42478710412979126, |
| "rewards/margins": 30.417232513427734, |
| "rewards/rejected": -29.992446899414062, |
| "step": 5390 |
| }, |
| { |
| "epoch": 2.46, |
| "learning_rate": 5.93607305936073e-08, |
| "logits/chosen": -2.2170863151550293, |
| "logits/rejected": -1.8885765075683594, |
| "logps/chosen": -83.57946014404297, |
| "logps/rejected": -127.2809829711914, |
| "loss": 0.0022, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.1054432392120361, |
| "rewards/margins": 31.797679901123047, |
| "rewards/rejected": -30.69223976135254, |
| "step": 5400 |
| }, |
| { |
| "epoch": 2.46, |
| "eval_logits/chosen": -2.218388795852661, |
| "eval_logits/rejected": -1.8641334772109985, |
| "eval_logps/chosen": -87.43651580810547, |
| "eval_logps/rejected": -127.63639068603516, |
| "eval_loss": 0.005585874430835247, |
| "eval_rewards/accuracies": 0.9916201233863831, |
| "eval_rewards/chosen": 0.6970371007919312, |
| "eval_rewards/margins": 31.099769592285156, |
| "eval_rewards/rejected": -30.402734756469727, |
| "eval_runtime": 215.095, |
| "eval_samples_per_second": 13.306, |
| "eval_steps_per_second": 0.832, |
| "step": 5400 |
| }, |
| { |
| "epoch": 2.47, |
| "learning_rate": 5.8853373921867065e-08, |
| "logits/chosen": -2.204249858856201, |
| "logits/rejected": -1.8770767450332642, |
| "logps/chosen": -89.52336120605469, |
| "logps/rejected": -134.68844604492188, |
| "loss": 0.0044, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.223055362701416, |
| "rewards/margins": 32.22364044189453, |
| "rewards/rejected": -30.000585556030273, |
| "step": 5410 |
| }, |
| { |
| "epoch": 2.47, |
| "learning_rate": 5.834601725012683e-08, |
| "logits/chosen": -2.255474805831909, |
| "logits/rejected": -1.9023358821868896, |
| "logps/chosen": -89.14444732666016, |
| "logps/rejected": -128.97544860839844, |
| "loss": 0.0011, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.772989273071289, |
| "rewards/margins": 31.232650756835938, |
| "rewards/rejected": -29.45966148376465, |
| "step": 5420 |
| }, |
| { |
| "epoch": 2.48, |
| "learning_rate": 5.78386605783866e-08, |
| "logits/chosen": -2.12762451171875, |
| "logits/rejected": -1.7764146327972412, |
| "logps/chosen": -89.85285186767578, |
| "logps/rejected": -135.08656311035156, |
| "loss": 0.0023, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 0.07732389122247696, |
| "rewards/margins": 32.275455474853516, |
| "rewards/rejected": -32.19812774658203, |
| "step": 5430 |
| }, |
| { |
| "epoch": 2.48, |
| "learning_rate": 5.7331303906646365e-08, |
| "logits/chosen": -2.1940665245056152, |
| "logits/rejected": -1.8633617162704468, |
| "logps/chosen": -86.71977233886719, |
| "logps/rejected": -132.53306579589844, |
| "loss": 0.0022, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.124468207359314, |
| "rewards/margins": 31.2531681060791, |
| "rewards/rejected": -30.128698348999023, |
| "step": 5440 |
| }, |
| { |
| "epoch": 2.49, |
| "learning_rate": 5.682394723490613e-08, |
| "logits/chosen": -2.319063663482666, |
| "logits/rejected": -1.9095830917358398, |
| "logps/chosen": -92.62286376953125, |
| "logps/rejected": -133.32777404785156, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.9819850921630859, |
| "rewards/margins": 32.897369384765625, |
| "rewards/rejected": -31.915386199951172, |
| "step": 5450 |
| }, |
| { |
| "epoch": 2.49, |
| "learning_rate": 5.63165905631659e-08, |
| "logits/chosen": -2.249760627746582, |
| "logits/rejected": -1.880089521408081, |
| "logps/chosen": -90.4861831665039, |
| "logps/rejected": -135.6083221435547, |
| "loss": 0.0033, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.5621588826179504, |
| "rewards/margins": 33.28693771362305, |
| "rewards/rejected": -32.72478103637695, |
| "step": 5460 |
| }, |
| { |
| "epoch": 2.5, |
| "learning_rate": 5.5809233891425665e-08, |
| "logits/chosen": -2.229804277420044, |
| "logits/rejected": -1.8705679178237915, |
| "logps/chosen": -89.12641143798828, |
| "logps/rejected": -131.51339721679688, |
| "loss": 0.0011, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.6339949369430542, |
| "rewards/margins": 31.29616928100586, |
| "rewards/rejected": -30.66217613220215, |
| "step": 5470 |
| }, |
| { |
| "epoch": 2.5, |
| "learning_rate": 5.530187721968543e-08, |
| "logits/chosen": -2.2353408336639404, |
| "logits/rejected": -1.8864301443099976, |
| "logps/chosen": -90.36353302001953, |
| "logps/rejected": -135.7219696044922, |
| "loss": 0.0022, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.8752725720405579, |
| "rewards/margins": 33.36455535888672, |
| "rewards/rejected": -32.489280700683594, |
| "step": 5480 |
| }, |
| { |
| "epoch": 2.51, |
| "learning_rate": 5.47945205479452e-08, |
| "logits/chosen": -2.1790707111358643, |
| "logits/rejected": -1.8824752569198608, |
| "logps/chosen": -86.35264587402344, |
| "logps/rejected": -138.5924835205078, |
| "loss": 0.003, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 0.8381770849227905, |
| "rewards/margins": 33.54732131958008, |
| "rewards/rejected": -32.70914077758789, |
| "step": 5490 |
| }, |
| { |
| "epoch": 2.51, |
| "learning_rate": 5.4287163876204964e-08, |
| "logits/chosen": -2.253610134124756, |
| "logits/rejected": -1.8832261562347412, |
| "logps/chosen": -91.31092834472656, |
| "logps/rejected": -132.42288208007812, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.8345121145248413, |
| "rewards/margins": 31.579730987548828, |
| "rewards/rejected": -30.745220184326172, |
| "step": 5500 |
| }, |
| { |
| "epoch": 2.51, |
| "eval_logits/chosen": -2.2216947078704834, |
| "eval_logits/rejected": -1.8705134391784668, |
| "eval_logps/chosen": -87.70054626464844, |
| "eval_logps/rejected": -128.5836181640625, |
| "eval_loss": 0.005452133249491453, |
| "eval_rewards/accuracies": 0.9916201233863831, |
| "eval_rewards/chosen": 0.56501704454422, |
| "eval_rewards/margins": 31.44136619567871, |
| "eval_rewards/rejected": -30.8763484954834, |
| "eval_runtime": 239.2992, |
| "eval_samples_per_second": 11.96, |
| "eval_steps_per_second": 0.748, |
| "step": 5500 |
| }, |
| { |
| "epoch": 2.52, |
| "learning_rate": 5.377980720446473e-08, |
| "logits/chosen": -2.198091745376587, |
| "logits/rejected": -1.8183279037475586, |
| "logps/chosen": -88.14014434814453, |
| "logps/rejected": -130.6202392578125, |
| "loss": 0.0011, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.0735613107681274, |
| "rewards/margins": 32.70407485961914, |
| "rewards/rejected": -31.63051414489746, |
| "step": 5510 |
| }, |
| { |
| "epoch": 2.52, |
| "learning_rate": 5.32724505327245e-08, |
| "logits/chosen": -2.260110378265381, |
| "logits/rejected": -1.8869386911392212, |
| "logps/chosen": -85.59989929199219, |
| "logps/rejected": -126.64656829833984, |
| "loss": 0.0019, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.1739155054092407, |
| "rewards/margins": 29.694509506225586, |
| "rewards/rejected": -28.520593643188477, |
| "step": 5520 |
| }, |
| { |
| "epoch": 2.52, |
| "learning_rate": 5.2765093860984264e-08, |
| "logits/chosen": -2.2093660831451416, |
| "logits/rejected": -1.9314342737197876, |
| "logps/chosen": -86.80977630615234, |
| "logps/rejected": -128.8234405517578, |
| "loss": 0.0033, |
| "rewards/accuracies": 0.9624999761581421, |
| "rewards/chosen": -1.0760526657104492, |
| "rewards/margins": 28.858245849609375, |
| "rewards/rejected": -29.93429946899414, |
| "step": 5530 |
| }, |
| { |
| "epoch": 2.53, |
| "learning_rate": 5.225773718924403e-08, |
| "logits/chosen": -2.182633876800537, |
| "logits/rejected": -1.857081651687622, |
| "logps/chosen": -84.45915222167969, |
| "logps/rejected": -127.63262939453125, |
| "loss": 0.0033, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.2383257150650024, |
| "rewards/margins": 31.82940101623535, |
| "rewards/rejected": -30.591073989868164, |
| "step": 5540 |
| }, |
| { |
| "epoch": 2.53, |
| "learning_rate": 5.17503805175038e-08, |
| "logits/chosen": -2.2090537548065186, |
| "logits/rejected": -1.8241138458251953, |
| "logps/chosen": -89.18778991699219, |
| "logps/rejected": -133.34609985351562, |
| "loss": 0.0044, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.9115778207778931, |
| "rewards/margins": 32.943077087402344, |
| "rewards/rejected": -32.03150177001953, |
| "step": 5550 |
| }, |
| { |
| "epoch": 2.54, |
| "learning_rate": 5.1243023845763564e-08, |
| "logits/chosen": -2.2495675086975098, |
| "logits/rejected": -1.9659570455551147, |
| "logps/chosen": -82.7123031616211, |
| "logps/rejected": -135.22415161132812, |
| "loss": 0.0011, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.8649141192436218, |
| "rewards/margins": 33.649383544921875, |
| "rewards/rejected": -32.78447341918945, |
| "step": 5560 |
| }, |
| { |
| "epoch": 2.54, |
| "learning_rate": 5.073566717402333e-08, |
| "logits/chosen": -2.2645201683044434, |
| "logits/rejected": -1.9173786640167236, |
| "logps/chosen": -85.26544952392578, |
| "logps/rejected": -131.0226593017578, |
| "loss": 0.0079, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.9299182891845703, |
| "rewards/margins": 32.061363220214844, |
| "rewards/rejected": -31.131444931030273, |
| "step": 5570 |
| }, |
| { |
| "epoch": 2.55, |
| "learning_rate": 5.02283105022831e-08, |
| "logits/chosen": -2.21622633934021, |
| "logits/rejected": -1.8026697635650635, |
| "logps/chosen": -93.25666046142578, |
| "logps/rejected": -133.81333923339844, |
| "loss": 0.0033, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.4977938234806061, |
| "rewards/margins": 31.675308227539062, |
| "rewards/rejected": -31.17751693725586, |
| "step": 5580 |
| }, |
| { |
| "epoch": 2.55, |
| "learning_rate": 4.9720953830542864e-08, |
| "logits/chosen": -2.175873279571533, |
| "logits/rejected": -1.7768224477767944, |
| "logps/chosen": -91.02295684814453, |
| "logps/rejected": -131.60630798339844, |
| "loss": 0.0012, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.008679199032485485, |
| "rewards/margins": 32.17238235473633, |
| "rewards/rejected": -32.181060791015625, |
| "step": 5590 |
| }, |
| { |
| "epoch": 2.56, |
| "learning_rate": 4.921359715880263e-08, |
| "logits/chosen": -2.2250924110412598, |
| "logits/rejected": -1.7964969873428345, |
| "logps/chosen": -83.45528411865234, |
| "logps/rejected": -126.2359848022461, |
| "loss": 0.0021, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.5450199842453003, |
| "rewards/margins": 32.206085205078125, |
| "rewards/rejected": -30.66106605529785, |
| "step": 5600 |
| }, |
| { |
| "epoch": 2.56, |
| "eval_logits/chosen": -2.225454807281494, |
| "eval_logits/rejected": -1.8789043426513672, |
| "eval_logps/chosen": -87.96759796142578, |
| "eval_logps/rejected": -129.4971466064453, |
| "eval_loss": 0.005582863464951515, |
| "eval_rewards/accuracies": 0.994413435459137, |
| "eval_rewards/chosen": 0.4314943850040436, |
| "eval_rewards/margins": 31.764596939086914, |
| "eval_rewards/rejected": -31.33310317993164, |
| "eval_runtime": 190.7581, |
| "eval_samples_per_second": 15.003, |
| "eval_steps_per_second": 0.938, |
| "step": 5600 |
| }, |
| { |
| "epoch": 2.56, |
| "learning_rate": 4.87062404870624e-08, |
| "logits/chosen": -2.2279891967773438, |
| "logits/rejected": -1.8970493078231812, |
| "logps/chosen": -85.4381103515625, |
| "logps/rejected": -128.7406005859375, |
| "loss": 0.002, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.32868337631225586, |
| "rewards/margins": 31.653844833374023, |
| "rewards/rejected": -31.325159072875977, |
| "step": 5610 |
| }, |
| { |
| "epoch": 2.57, |
| "learning_rate": 4.8198883815322164e-08, |
| "logits/chosen": -2.2391271591186523, |
| "logits/rejected": -1.919921636581421, |
| "logps/chosen": -86.07292175292969, |
| "logps/rejected": -133.414306640625, |
| "loss": 0.0064, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 0.4069444537162781, |
| "rewards/margins": 31.80039405822754, |
| "rewards/rejected": -31.393451690673828, |
| "step": 5620 |
| }, |
| { |
| "epoch": 2.57, |
| "learning_rate": 4.769152714358193e-08, |
| "logits/chosen": -2.1675751209259033, |
| "logits/rejected": -1.8634262084960938, |
| "logps/chosen": -84.79447937011719, |
| "logps/rejected": -135.7115020751953, |
| "loss": 0.0009, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.24717219173908234, |
| "rewards/margins": 33.35146713256836, |
| "rewards/rejected": -33.10429382324219, |
| "step": 5630 |
| }, |
| { |
| "epoch": 2.57, |
| "learning_rate": 4.71841704718417e-08, |
| "logits/chosen": -2.1868836879730225, |
| "logits/rejected": -1.8967559337615967, |
| "logps/chosen": -88.42156982421875, |
| "logps/rejected": -132.83580017089844, |
| "loss": 0.0022, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": -0.0003570079861674458, |
| "rewards/margins": 31.02736473083496, |
| "rewards/rejected": -31.027725219726562, |
| "step": 5640 |
| }, |
| { |
| "epoch": 2.58, |
| "learning_rate": 4.6676813800101464e-08, |
| "logits/chosen": -2.281507968902588, |
| "logits/rejected": -1.8936166763305664, |
| "logps/chosen": -91.97183990478516, |
| "logps/rejected": -128.8743438720703, |
| "loss": 0.0023, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 1.5036630630493164, |
| "rewards/margins": 31.219980239868164, |
| "rewards/rejected": -29.716318130493164, |
| "step": 5650 |
| }, |
| { |
| "epoch": 2.58, |
| "learning_rate": 4.616945712836123e-08, |
| "logits/chosen": -2.1977477073669434, |
| "logits/rejected": -1.8735698461532593, |
| "logps/chosen": -89.23414611816406, |
| "logps/rejected": -131.9024658203125, |
| "loss": 0.0033, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.5118563771247864, |
| "rewards/margins": 32.36008834838867, |
| "rewards/rejected": -31.848236083984375, |
| "step": 5660 |
| }, |
| { |
| "epoch": 2.59, |
| "learning_rate": 4.5662100456621e-08, |
| "logits/chosen": -2.2489736080169678, |
| "logits/rejected": -1.8674437999725342, |
| "logps/chosen": -87.26219940185547, |
| "logps/rejected": -134.82095336914062, |
| "loss": 0.0034, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.8911517858505249, |
| "rewards/margins": 32.76057434082031, |
| "rewards/rejected": -31.86942481994629, |
| "step": 5670 |
| }, |
| { |
| "epoch": 2.59, |
| "learning_rate": 4.5154743784880764e-08, |
| "logits/chosen": -2.1873042583465576, |
| "logits/rejected": -1.924572229385376, |
| "logps/chosen": -79.94978332519531, |
| "logps/rejected": -128.44432067871094, |
| "loss": 0.0015, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.669002890586853, |
| "rewards/margins": 30.952983856201172, |
| "rewards/rejected": -30.283981323242188, |
| "step": 5680 |
| }, |
| { |
| "epoch": 2.6, |
| "learning_rate": 4.464738711314053e-08, |
| "logits/chosen": -2.160274028778076, |
| "logits/rejected": -1.8004605770111084, |
| "logps/chosen": -96.46055603027344, |
| "logps/rejected": -133.01870727539062, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.372810959815979, |
| "rewards/margins": 31.60434913635254, |
| "rewards/rejected": -30.231542587280273, |
| "step": 5690 |
| }, |
| { |
| "epoch": 2.6, |
| "learning_rate": 4.41400304414003e-08, |
| "logits/chosen": -2.2056326866149902, |
| "logits/rejected": -1.73297917842865, |
| "logps/chosen": -96.11383056640625, |
| "logps/rejected": -131.00546264648438, |
| "loss": 0.0011, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.5699256658554077, |
| "rewards/margins": 32.902523040771484, |
| "rewards/rejected": -31.332595825195312, |
| "step": 5700 |
| }, |
| { |
| "epoch": 2.6, |
| "eval_logits/chosen": -2.226090669631958, |
| "eval_logits/rejected": -1.876379370689392, |
| "eval_logps/chosen": -87.41751861572266, |
| "eval_logps/rejected": -128.55892944335938, |
| "eval_loss": 0.005494570359587669, |
| "eval_rewards/accuracies": 0.994413435459137, |
| "eval_rewards/chosen": 0.7065298557281494, |
| "eval_rewards/margins": 31.570541381835938, |
| "eval_rewards/rejected": -30.864015579223633, |
| "eval_runtime": 195.492, |
| "eval_samples_per_second": 14.64, |
| "eval_steps_per_second": 0.916, |
| "step": 5700 |
| }, |
| { |
| "epoch": 2.61, |
| "learning_rate": 4.3632673769660064e-08, |
| "logits/chosen": -2.2404072284698486, |
| "logits/rejected": -1.8427753448486328, |
| "logps/chosen": -88.0647964477539, |
| "logps/rejected": -132.00401306152344, |
| "loss": 0.0044, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.0430487394332886, |
| "rewards/margins": 31.766735076904297, |
| "rewards/rejected": -30.723682403564453, |
| "step": 5710 |
| }, |
| { |
| "epoch": 2.61, |
| "learning_rate": 4.312531709791983e-08, |
| "logits/chosen": -2.238086223602295, |
| "logits/rejected": -1.9353258609771729, |
| "logps/chosen": -86.8983383178711, |
| "logps/rejected": -134.00552368164062, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.7805923223495483, |
| "rewards/margins": 32.820377349853516, |
| "rewards/rejected": -32.03978729248047, |
| "step": 5720 |
| }, |
| { |
| "epoch": 2.62, |
| "learning_rate": 4.26179604261796e-08, |
| "logits/chosen": -2.210732936859131, |
| "logits/rejected": -1.8510334491729736, |
| "logps/chosen": -86.54997253417969, |
| "logps/rejected": -128.822998046875, |
| "loss": 0.0011, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.1234691143035889, |
| "rewards/margins": 31.587039947509766, |
| "rewards/rejected": -30.46356773376465, |
| "step": 5730 |
| }, |
| { |
| "epoch": 2.62, |
| "learning_rate": 4.2110603754439363e-08, |
| "logits/chosen": -2.271686553955078, |
| "logits/rejected": -1.9221289157867432, |
| "logps/chosen": -85.12126922607422, |
| "logps/rejected": -132.54244995117188, |
| "loss": 0.0033, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.38059353828430176, |
| "rewards/margins": 32.59527587890625, |
| "rewards/rejected": -32.214683532714844, |
| "step": 5740 |
| }, |
| { |
| "epoch": 2.62, |
| "learning_rate": 4.160324708269913e-08, |
| "logits/chosen": -2.2383780479431152, |
| "logits/rejected": -1.9224342107772827, |
| "logps/chosen": -87.90855407714844, |
| "logps/rejected": -133.9766387939453, |
| "loss": 0.0023, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.365673303604126, |
| "rewards/margins": 32.20100784301758, |
| "rewards/rejected": -30.8353328704834, |
| "step": 5750 |
| }, |
| { |
| "epoch": 2.63, |
| "learning_rate": 4.10958904109589e-08, |
| "logits/chosen": -2.2130231857299805, |
| "logits/rejected": -1.9353950023651123, |
| "logps/chosen": -79.39093017578125, |
| "logps/rejected": -129.04461669921875, |
| "loss": 0.0035, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.7016046047210693, |
| "rewards/margins": 31.313640594482422, |
| "rewards/rejected": -29.612030029296875, |
| "step": 5760 |
| }, |
| { |
| "epoch": 2.63, |
| "learning_rate": 4.0588533739218663e-08, |
| "logits/chosen": -2.196366786956787, |
| "logits/rejected": -1.783656358718872, |
| "logps/chosen": -81.48753356933594, |
| "logps/rejected": -127.0324478149414, |
| "loss": 0.0035, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.7704079151153564, |
| "rewards/margins": 33.856910705566406, |
| "rewards/rejected": -32.08650207519531, |
| "step": 5770 |
| }, |
| { |
| "epoch": 2.64, |
| "learning_rate": 4.0081177067478437e-08, |
| "logits/chosen": -2.1401500701904297, |
| "logits/rejected": -1.800840139389038, |
| "logps/chosen": -90.09111785888672, |
| "logps/rejected": -134.8428955078125, |
| "loss": 0.0022, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.179386854171753, |
| "rewards/margins": 33.37128448486328, |
| "rewards/rejected": -32.191898345947266, |
| "step": 5780 |
| }, |
| { |
| "epoch": 2.64, |
| "learning_rate": 3.95738203957382e-08, |
| "logits/chosen": -2.2394440174102783, |
| "logits/rejected": -1.904920220375061, |
| "logps/chosen": -87.17567443847656, |
| "logps/rejected": -130.36404418945312, |
| "loss": 0.0054, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 0.23616953194141388, |
| "rewards/margins": 31.9814453125, |
| "rewards/rejected": -31.74527931213379, |
| "step": 5790 |
| }, |
| { |
| "epoch": 2.65, |
| "learning_rate": 3.906646372399797e-08, |
| "logits/chosen": -2.1932971477508545, |
| "logits/rejected": -1.808300256729126, |
| "logps/chosen": -90.1788558959961, |
| "logps/rejected": -135.00550842285156, |
| "loss": 0.0055, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 0.4655330777168274, |
| "rewards/margins": 31.84645652770996, |
| "rewards/rejected": -31.380924224853516, |
| "step": 5800 |
| }, |
| { |
| "epoch": 2.65, |
| "eval_logits/chosen": -2.2272167205810547, |
| "eval_logits/rejected": -1.878515601158142, |
| "eval_logps/chosen": -87.6694564819336, |
| "eval_logps/rejected": -129.5568084716797, |
| "eval_loss": 0.005615293048322201, |
| "eval_rewards/accuracies": 0.9916201233863831, |
| "eval_rewards/chosen": 0.5805642604827881, |
| "eval_rewards/margins": 31.943510055541992, |
| "eval_rewards/rejected": -31.36294937133789, |
| "eval_runtime": 188.904, |
| "eval_samples_per_second": 15.151, |
| "eval_steps_per_second": 0.948, |
| "step": 5800 |
| }, |
| { |
| "epoch": 2.65, |
| "learning_rate": 3.8559107052257736e-08, |
| "logits/chosen": -2.2895407676696777, |
| "logits/rejected": -1.9646167755126953, |
| "logps/chosen": -84.39744567871094, |
| "logps/rejected": -128.11642456054688, |
| "loss": 0.0033, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": -0.45202645659446716, |
| "rewards/margins": 31.760883331298828, |
| "rewards/rejected": -32.21290969848633, |
| "step": 5810 |
| }, |
| { |
| "epoch": 2.66, |
| "learning_rate": 3.80517503805175e-08, |
| "logits/chosen": -2.2432637214660645, |
| "logits/rejected": -1.90109384059906, |
| "logps/chosen": -87.83378601074219, |
| "logps/rejected": -134.02479553222656, |
| "loss": 0.0033, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": -0.3190554678440094, |
| "rewards/margins": 32.41364669799805, |
| "rewards/rejected": -32.73270034790039, |
| "step": 5820 |
| }, |
| { |
| "epoch": 2.66, |
| "learning_rate": 3.754439370877727e-08, |
| "logits/chosen": -2.249756336212158, |
| "logits/rejected": -1.8411035537719727, |
| "logps/chosen": -92.8016128540039, |
| "logps/rejected": -141.8028106689453, |
| "loss": 0.0022, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.7895019054412842, |
| "rewards/margins": 35.841468811035156, |
| "rewards/rejected": -34.051971435546875, |
| "step": 5830 |
| }, |
| { |
| "epoch": 2.67, |
| "learning_rate": 3.7037037037037036e-08, |
| "logits/chosen": -2.2434568405151367, |
| "logits/rejected": -1.8818607330322266, |
| "logps/chosen": -85.84037780761719, |
| "logps/rejected": -130.4074249267578, |
| "loss": 0.0013, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.18663930892944336, |
| "rewards/margins": 32.276615142822266, |
| "rewards/rejected": -32.08997344970703, |
| "step": 5840 |
| }, |
| { |
| "epoch": 2.67, |
| "learning_rate": 3.65296803652968e-08, |
| "logits/chosen": -2.266979932785034, |
| "logits/rejected": -1.9421230554580688, |
| "logps/chosen": -88.89350128173828, |
| "logps/rejected": -130.6448516845703, |
| "loss": 0.0059, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.22369590401649475, |
| "rewards/margins": 30.364971160888672, |
| "rewards/rejected": -30.14127540588379, |
| "step": 5850 |
| }, |
| { |
| "epoch": 2.67, |
| "learning_rate": 3.602232369355657e-08, |
| "logits/chosen": -2.245170831680298, |
| "logits/rejected": -1.8216416835784912, |
| "logps/chosen": -89.65043640136719, |
| "logps/rejected": -136.75848388671875, |
| "loss": 0.0044, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.8733927607536316, |
| "rewards/margins": 34.61988830566406, |
| "rewards/rejected": -33.746498107910156, |
| "step": 5860 |
| }, |
| { |
| "epoch": 2.68, |
| "learning_rate": 3.5514967021816336e-08, |
| "logits/chosen": -2.264880895614624, |
| "logits/rejected": -1.87582528591156, |
| "logps/chosen": -89.56029510498047, |
| "logps/rejected": -132.30987548828125, |
| "loss": 0.0033, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.9222901463508606, |
| "rewards/margins": 32.37554931640625, |
| "rewards/rejected": -31.453258514404297, |
| "step": 5870 |
| }, |
| { |
| "epoch": 2.68, |
| "learning_rate": 3.50076103500761e-08, |
| "logits/chosen": -2.2106261253356934, |
| "logits/rejected": -1.8250715732574463, |
| "logps/chosen": -86.20150756835938, |
| "logps/rejected": -131.51016235351562, |
| "loss": 0.0043, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 1.9935638904571533, |
| "rewards/margins": 32.885887145996094, |
| "rewards/rejected": -30.892318725585938, |
| "step": 5880 |
| }, |
| { |
| "epoch": 2.69, |
| "learning_rate": 3.450025367833587e-08, |
| "logits/chosen": -2.1880767345428467, |
| "logits/rejected": -1.850630521774292, |
| "logps/chosen": -92.76158142089844, |
| "logps/rejected": -135.90554809570312, |
| "loss": 0.008, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.7444310188293457, |
| "rewards/margins": 32.634525299072266, |
| "rewards/rejected": -31.890094757080078, |
| "step": 5890 |
| }, |
| { |
| "epoch": 2.69, |
| "learning_rate": 3.3992897006595636e-08, |
| "logits/chosen": -2.2573089599609375, |
| "logits/rejected": -1.9743645191192627, |
| "logps/chosen": -89.33732604980469, |
| "logps/rejected": -133.85531616210938, |
| "loss": 0.0033, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.07906317710876465, |
| "rewards/margins": 31.270915985107422, |
| "rewards/rejected": -31.191858291625977, |
| "step": 5900 |
| }, |
| { |
| "epoch": 2.69, |
| "eval_logits/chosen": -2.2270514965057373, |
| "eval_logits/rejected": -1.8773261308670044, |
| "eval_logps/chosen": -88.0054931640625, |
| "eval_logps/rejected": -130.35690307617188, |
| "eval_loss": 0.0056848106905817986, |
| "eval_rewards/accuracies": 0.9916201233863831, |
| "eval_rewards/chosen": 0.4125469923019409, |
| "eval_rewards/margins": 32.175540924072266, |
| "eval_rewards/rejected": -31.76299285888672, |
| "eval_runtime": 187.4914, |
| "eval_samples_per_second": 15.265, |
| "eval_steps_per_second": 0.955, |
| "step": 5900 |
| }, |
| { |
| "epoch": 2.7, |
| "learning_rate": 3.34855403348554e-08, |
| "logits/chosen": -2.249181032180786, |
| "logits/rejected": -1.8485383987426758, |
| "logps/chosen": -94.90681457519531, |
| "logps/rejected": -131.73171997070312, |
| "loss": 0.0011, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.9341878890991211, |
| "rewards/margins": 31.412506103515625, |
| "rewards/rejected": -30.478321075439453, |
| "step": 5910 |
| }, |
| { |
| "epoch": 2.7, |
| "learning_rate": 3.297818366311517e-08, |
| "logits/chosen": -2.2404422760009766, |
| "logits/rejected": -1.9138734340667725, |
| "logps/chosen": -84.72028350830078, |
| "logps/rejected": -135.90516662597656, |
| "loss": 0.0042, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 0.11444835364818573, |
| "rewards/margins": 33.07588577270508, |
| "rewards/rejected": -32.96143341064453, |
| "step": 5920 |
| }, |
| { |
| "epoch": 2.71, |
| "learning_rate": 3.2470826991374936e-08, |
| "logits/chosen": -2.2721214294433594, |
| "logits/rejected": -1.8678615093231201, |
| "logps/chosen": -89.56704711914062, |
| "logps/rejected": -135.8103790283203, |
| "loss": 0.0022, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.21784238517284393, |
| "rewards/margins": 33.14167022705078, |
| "rewards/rejected": -32.923828125, |
| "step": 5930 |
| }, |
| { |
| "epoch": 2.71, |
| "learning_rate": 3.19634703196347e-08, |
| "logits/chosen": -2.2767839431762695, |
| "logits/rejected": -1.95871102809906, |
| "logps/chosen": -92.42479705810547, |
| "logps/rejected": -139.7154998779297, |
| "loss": 0.0011, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.1621972620487213, |
| "rewards/margins": 33.40778350830078, |
| "rewards/rejected": -33.56998062133789, |
| "step": 5940 |
| }, |
| { |
| "epoch": 2.72, |
| "learning_rate": 3.145611364789447e-08, |
| "logits/chosen": -2.1546614170074463, |
| "logits/rejected": -1.7696462869644165, |
| "logps/chosen": -92.0521469116211, |
| "logps/rejected": -134.0331268310547, |
| "loss": 0.0011, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.3609464168548584, |
| "rewards/margins": 32.90177917480469, |
| "rewards/rejected": -31.540836334228516, |
| "step": 5950 |
| }, |
| { |
| "epoch": 2.72, |
| "learning_rate": 3.0948756976154236e-08, |
| "logits/chosen": -2.2606728076934814, |
| "logits/rejected": -1.9319846630096436, |
| "logps/chosen": -84.58219146728516, |
| "logps/rejected": -129.25262451171875, |
| "loss": 0.0011, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.3570373058319092, |
| "rewards/margins": 32.03596878051758, |
| "rewards/rejected": -30.678930282592773, |
| "step": 5960 |
| }, |
| { |
| "epoch": 2.73, |
| "learning_rate": 3.0441400304414e-08, |
| "logits/chosen": -2.2868599891662598, |
| "logits/rejected": -1.9675430059432983, |
| "logps/chosen": -84.88082122802734, |
| "logps/rejected": -130.11279296875, |
| "loss": 0.0022, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 1.3799717426300049, |
| "rewards/margins": 32.025333404541016, |
| "rewards/rejected": -30.645360946655273, |
| "step": 5970 |
| }, |
| { |
| "epoch": 2.73, |
| "learning_rate": 2.993404363267377e-08, |
| "logits/chosen": -2.2390341758728027, |
| "logits/rejected": -1.9002273082733154, |
| "logps/chosen": -84.3361587524414, |
| "logps/rejected": -132.21054077148438, |
| "loss": 0.0015, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": -0.16436767578125, |
| "rewards/margins": 31.722976684570312, |
| "rewards/rejected": -31.887344360351562, |
| "step": 5980 |
| }, |
| { |
| "epoch": 2.73, |
| "learning_rate": 2.9426686960933532e-08, |
| "logits/chosen": -2.199117660522461, |
| "logits/rejected": -1.7541431188583374, |
| "logps/chosen": -94.94574737548828, |
| "logps/rejected": -130.81954956054688, |
| "loss": 0.0046, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.6142606735229492, |
| "rewards/margins": 33.10905456542969, |
| "rewards/rejected": -31.494792938232422, |
| "step": 5990 |
| }, |
| { |
| "epoch": 2.74, |
| "learning_rate": 2.89193302891933e-08, |
| "logits/chosen": -2.368619918823242, |
| "logits/rejected": -1.9851986169815063, |
| "logps/chosen": -90.95460510253906, |
| "logps/rejected": -135.75814819335938, |
| "loss": 0.0011, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.2684900164604187, |
| "rewards/margins": 33.65724563598633, |
| "rewards/rejected": -33.388755798339844, |
| "step": 6000 |
| }, |
| { |
| "epoch": 2.74, |
| "eval_logits/chosen": -2.220628023147583, |
| "eval_logits/rejected": -1.8684388399124146, |
| "eval_logps/chosen": -87.90426635742188, |
| "eval_logps/rejected": -129.78916931152344, |
| "eval_loss": 0.005480717867612839, |
| "eval_rewards/accuracies": 0.9916201233863831, |
| "eval_rewards/chosen": 0.4631572663784027, |
| "eval_rewards/margins": 31.94228172302246, |
| "eval_rewards/rejected": -31.479124069213867, |
| "eval_runtime": 188.1705, |
| "eval_samples_per_second": 15.21, |
| "eval_steps_per_second": 0.951, |
| "step": 6000 |
| }, |
| { |
| "epoch": 2.74, |
| "learning_rate": 2.8411973617453066e-08, |
| "logits/chosen": -2.1487109661102295, |
| "logits/rejected": -1.817098617553711, |
| "logps/chosen": -88.42924499511719, |
| "logps/rejected": -132.3211212158203, |
| "loss": 0.0022, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.3025569915771484, |
| "rewards/margins": 33.558433532714844, |
| "rewards/rejected": -32.25587844848633, |
| "step": 6010 |
| }, |
| { |
| "epoch": 2.75, |
| "learning_rate": 2.7904616945712832e-08, |
| "logits/chosen": -2.2678167819976807, |
| "logits/rejected": -1.8667113780975342, |
| "logps/chosen": -86.5342025756836, |
| "logps/rejected": -129.55935668945312, |
| "loss": 0.0033, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.0008518695831299, |
| "rewards/margins": 32.452972412109375, |
| "rewards/rejected": -31.452117919921875, |
| "step": 6020 |
| }, |
| { |
| "epoch": 2.75, |
| "learning_rate": 2.73972602739726e-08, |
| "logits/chosen": -2.264303684234619, |
| "logits/rejected": -1.878883957862854, |
| "logps/chosen": -87.34925842285156, |
| "logps/rejected": -130.01307678222656, |
| "loss": 0.0031, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.4018937945365906, |
| "rewards/margins": 32.04921340942383, |
| "rewards/rejected": -31.647314071655273, |
| "step": 6030 |
| }, |
| { |
| "epoch": 2.76, |
| "learning_rate": 2.6889903602232366e-08, |
| "logits/chosen": -2.2151219844818115, |
| "logits/rejected": -1.8500292301177979, |
| "logps/chosen": -89.37690734863281, |
| "logps/rejected": -132.4504852294922, |
| "loss": 0.0042, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.7266126871109009, |
| "rewards/margins": 33.85057067871094, |
| "rewards/rejected": -32.123958587646484, |
| "step": 6040 |
| }, |
| { |
| "epoch": 2.76, |
| "learning_rate": 2.6382546930492132e-08, |
| "logits/chosen": -2.3891372680664062, |
| "logits/rejected": -2.01668119430542, |
| "logps/chosen": -88.968017578125, |
| "logps/rejected": -134.05789184570312, |
| "loss": 0.0012, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.28615638613700867, |
| "rewards/margins": 31.448923110961914, |
| "rewards/rejected": -31.162769317626953, |
| "step": 6050 |
| }, |
| { |
| "epoch": 2.77, |
| "learning_rate": 2.58751902587519e-08, |
| "logits/chosen": -2.265878200531006, |
| "logits/rejected": -1.890244722366333, |
| "logps/chosen": -83.14924621582031, |
| "logps/rejected": -128.8566436767578, |
| "loss": 0.0023, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.234644889831543, |
| "rewards/margins": 32.04364776611328, |
| "rewards/rejected": -30.80900001525879, |
| "step": 6060 |
| }, |
| { |
| "epoch": 2.77, |
| "learning_rate": 2.5367833587011665e-08, |
| "logits/chosen": -2.3138954639434814, |
| "logits/rejected": -1.8876469135284424, |
| "logps/chosen": -96.02142333984375, |
| "logps/rejected": -132.93771362304688, |
| "loss": 0.0011, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.01418936252594, |
| "rewards/margins": 31.96274185180664, |
| "rewards/rejected": -30.948551177978516, |
| "step": 6070 |
| }, |
| { |
| "epoch": 2.78, |
| "learning_rate": 2.4860476915271432e-08, |
| "logits/chosen": -2.2674362659454346, |
| "logits/rejected": -1.8280704021453857, |
| "logps/chosen": -91.15046691894531, |
| "logps/rejected": -131.0101318359375, |
| "loss": 0.0032, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.064391613006592, |
| "rewards/margins": 34.05684280395508, |
| "rewards/rejected": -31.99245262145996, |
| "step": 6080 |
| }, |
| { |
| "epoch": 2.78, |
| "learning_rate": 2.43531202435312e-08, |
| "logits/chosen": -2.2650763988494873, |
| "logits/rejected": -1.9617116451263428, |
| "logps/chosen": -81.56208801269531, |
| "logps/rejected": -131.12905883789062, |
| "loss": 0.0022, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.9505952000617981, |
| "rewards/margins": 32.1887092590332, |
| "rewards/rejected": -31.238117218017578, |
| "step": 6090 |
| }, |
| { |
| "epoch": 2.78, |
| "learning_rate": 2.3845763571790965e-08, |
| "logits/chosen": -2.2631096839904785, |
| "logits/rejected": -1.8503172397613525, |
| "logps/chosen": -93.38179016113281, |
| "logps/rejected": -127.70606994628906, |
| "loss": 0.0011, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.097931981086731, |
| "rewards/margins": 31.50238037109375, |
| "rewards/rejected": -30.40444564819336, |
| "step": 6100 |
| }, |
| { |
| "epoch": 2.78, |
| "eval_logits/chosen": -2.2261240482330322, |
| "eval_logits/rejected": -1.877672791481018, |
| "eval_logps/chosen": -87.89844512939453, |
| "eval_logps/rejected": -130.31732177734375, |
| "eval_loss": 0.005521238315850496, |
| "eval_rewards/accuracies": 0.9916201233863831, |
| "eval_rewards/chosen": 0.46606892347335815, |
| "eval_rewards/margins": 32.209266662597656, |
| "eval_rewards/rejected": -31.743196487426758, |
| "eval_runtime": 230.5005, |
| "eval_samples_per_second": 12.416, |
| "eval_steps_per_second": 0.777, |
| "step": 6100 |
| }, |
| { |
| "epoch": 2.79, |
| "learning_rate": 2.3338406900050732e-08, |
| "logits/chosen": -2.156193494796753, |
| "logits/rejected": -1.7497676610946655, |
| "logps/chosen": -88.01410675048828, |
| "logps/rejected": -129.71466064453125, |
| "loss": 0.0044, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 0.868211567401886, |
| "rewards/margins": 32.48591995239258, |
| "rewards/rejected": -31.61771011352539, |
| "step": 6110 |
| }, |
| { |
| "epoch": 2.79, |
| "learning_rate": 2.28310502283105e-08, |
| "logits/chosen": -2.27543306350708, |
| "logits/rejected": -1.8602546453475952, |
| "logps/chosen": -87.84466552734375, |
| "logps/rejected": -132.81777954101562, |
| "loss": 0.0022, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.908778429031372, |
| "rewards/margins": 33.64677810668945, |
| "rewards/rejected": -31.73800277709961, |
| "step": 6120 |
| }, |
| { |
| "epoch": 2.8, |
| "learning_rate": 2.2323693556570265e-08, |
| "logits/chosen": -2.2359254360198975, |
| "logits/rejected": -1.9188182353973389, |
| "logps/chosen": -83.94422149658203, |
| "logps/rejected": -129.99581909179688, |
| "loss": 0.0043, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.0997272729873657, |
| "rewards/margins": 33.11286163330078, |
| "rewards/rejected": -32.01313400268555, |
| "step": 6130 |
| }, |
| { |
| "epoch": 2.8, |
| "learning_rate": 2.1816336884830032e-08, |
| "logits/chosen": -2.220909357070923, |
| "logits/rejected": -1.9032386541366577, |
| "logps/chosen": -84.5809555053711, |
| "logps/rejected": -131.4840545654297, |
| "loss": 0.0023, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 1.0809751749038696, |
| "rewards/margins": 32.72708511352539, |
| "rewards/rejected": -31.6461124420166, |
| "step": 6140 |
| }, |
| { |
| "epoch": 2.81, |
| "learning_rate": 2.13089802130898e-08, |
| "logits/chosen": -2.242598056793213, |
| "logits/rejected": -1.8045127391815186, |
| "logps/chosen": -88.67266082763672, |
| "logps/rejected": -134.9513397216797, |
| "loss": 0.0033, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.345744013786316, |
| "rewards/margins": 35.6363410949707, |
| "rewards/rejected": -34.29059600830078, |
| "step": 6150 |
| }, |
| { |
| "epoch": 2.81, |
| "learning_rate": 2.0801623541349565e-08, |
| "logits/chosen": -2.2422022819519043, |
| "logits/rejected": -1.8511192798614502, |
| "logps/chosen": -89.98072052001953, |
| "logps/rejected": -134.96629333496094, |
| "loss": 0.0011, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.8219916820526123, |
| "rewards/margins": 34.537723541259766, |
| "rewards/rejected": -32.71573257446289, |
| "step": 6160 |
| }, |
| { |
| "epoch": 2.82, |
| "learning_rate": 2.0294266869609332e-08, |
| "logits/chosen": -2.2780232429504395, |
| "logits/rejected": -1.8996295928955078, |
| "logps/chosen": -90.99410247802734, |
| "logps/rejected": -132.14706420898438, |
| "loss": 0.0011, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.23341834545135498, |
| "rewards/margins": 32.95896530151367, |
| "rewards/rejected": -32.725547790527344, |
| "step": 6170 |
| }, |
| { |
| "epoch": 2.82, |
| "learning_rate": 1.97869101978691e-08, |
| "logits/chosen": -2.265056610107422, |
| "logits/rejected": -1.8394749164581299, |
| "logps/chosen": -93.9018783569336, |
| "logps/rejected": -132.54859924316406, |
| "loss": 0.0022, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.5913444757461548, |
| "rewards/margins": 33.28534698486328, |
| "rewards/rejected": -32.694007873535156, |
| "step": 6180 |
| }, |
| { |
| "epoch": 2.83, |
| "learning_rate": 1.9279553526128868e-08, |
| "logits/chosen": -2.234004259109497, |
| "logits/rejected": -1.9320383071899414, |
| "logps/chosen": -80.52074432373047, |
| "logps/rejected": -131.88790893554688, |
| "loss": 0.0022, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 0.3767390549182892, |
| "rewards/margins": 33.22993087768555, |
| "rewards/rejected": -32.85319137573242, |
| "step": 6190 |
| }, |
| { |
| "epoch": 2.83, |
| "learning_rate": 1.8772196854388635e-08, |
| "logits/chosen": -2.2175235748291016, |
| "logits/rejected": -1.8579527139663696, |
| "logps/chosen": -86.18971252441406, |
| "logps/rejected": -130.14015197753906, |
| "loss": 0.0022, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.16683992743492126, |
| "rewards/margins": 31.512447357177734, |
| "rewards/rejected": -31.67928695678711, |
| "step": 6200 |
| }, |
| { |
| "epoch": 2.83, |
| "eval_logits/chosen": -2.2242860794067383, |
| "eval_logits/rejected": -1.8746119737625122, |
| "eval_logps/chosen": -87.8666763305664, |
| "eval_logps/rejected": -130.38250732421875, |
| "eval_loss": 0.005393806379288435, |
| "eval_rewards/accuracies": 0.9972066879272461, |
| "eval_rewards/chosen": 0.48195433616638184, |
| "eval_rewards/margins": 32.25774383544922, |
| "eval_rewards/rejected": -31.775789260864258, |
| "eval_runtime": 225.1441, |
| "eval_samples_per_second": 12.712, |
| "eval_steps_per_second": 0.795, |
| "step": 6200 |
| }, |
| { |
| "epoch": 2.83, |
| "learning_rate": 1.82648401826484e-08, |
| "logits/chosen": -2.2525382041931152, |
| "logits/rejected": -1.9172885417938232, |
| "logps/chosen": -85.48805236816406, |
| "logps/rejected": -133.95077514648438, |
| "loss": 0.0054, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.1090790256857872, |
| "rewards/margins": 32.306297302246094, |
| "rewards/rejected": -32.19722366333008, |
| "step": 6210 |
| }, |
| { |
| "epoch": 2.84, |
| "learning_rate": 1.7757483510908168e-08, |
| "logits/chosen": -2.1963915824890137, |
| "logits/rejected": -1.8396198749542236, |
| "logps/chosen": -87.08211517333984, |
| "logps/rejected": -136.7283477783203, |
| "loss": 0.0023, |
| "rewards/accuracies": 0.9750000238418579, |
| "rewards/chosen": 0.9704242944717407, |
| "rewards/margins": 33.93294906616211, |
| "rewards/rejected": -32.96253204345703, |
| "step": 6220 |
| }, |
| { |
| "epoch": 2.84, |
| "learning_rate": 1.7250126839167935e-08, |
| "logits/chosen": -2.220945358276367, |
| "logits/rejected": -1.8874809741973877, |
| "logps/chosen": -83.79931640625, |
| "logps/rejected": -129.1210174560547, |
| "loss": 0.0033, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.5613209009170532, |
| "rewards/margins": 32.765995025634766, |
| "rewards/rejected": -32.204673767089844, |
| "step": 6230 |
| }, |
| { |
| "epoch": 2.85, |
| "learning_rate": 1.67427701674277e-08, |
| "logits/chosen": -2.2060558795928955, |
| "logits/rejected": -1.8112850189208984, |
| "logps/chosen": -87.95478820800781, |
| "logps/rejected": -132.51715087890625, |
| "loss": 0.0023, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.4738547801971436, |
| "rewards/margins": 34.16747283935547, |
| "rewards/rejected": -32.69361877441406, |
| "step": 6240 |
| }, |
| { |
| "epoch": 2.85, |
| "learning_rate": 1.6235413495687468e-08, |
| "logits/chosen": -2.1749913692474365, |
| "logits/rejected": -1.8666985034942627, |
| "logps/chosen": -88.18854522705078, |
| "logps/rejected": -133.01121520996094, |
| "loss": 0.0024, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.7993472218513489, |
| "rewards/margins": 31.564916610717773, |
| "rewards/rejected": -30.765567779541016, |
| "step": 6250 |
| }, |
| { |
| "epoch": 2.86, |
| "learning_rate": 1.5728056823947235e-08, |
| "logits/chosen": -2.161698341369629, |
| "logits/rejected": -1.7702264785766602, |
| "logps/chosen": -83.78431701660156, |
| "logps/rejected": -125.90284729003906, |
| "loss": 0.0011, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.6891142129898071, |
| "rewards/margins": 30.822668075561523, |
| "rewards/rejected": -30.133554458618164, |
| "step": 6260 |
| }, |
| { |
| "epoch": 2.86, |
| "learning_rate": 1.5220700152207e-08, |
| "logits/chosen": -2.3119311332702637, |
| "logits/rejected": -1.9453165531158447, |
| "logps/chosen": -87.14160919189453, |
| "logps/rejected": -134.0089569091797, |
| "loss": 0.0014, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.7904703617095947, |
| "rewards/margins": 33.403202056884766, |
| "rewards/rejected": -32.61273193359375, |
| "step": 6270 |
| }, |
| { |
| "epoch": 2.87, |
| "learning_rate": 1.4713343480466766e-08, |
| "logits/chosen": -2.2479605674743652, |
| "logits/rejected": -1.8846473693847656, |
| "logps/chosen": -94.47314453125, |
| "logps/rejected": -134.31393432617188, |
| "loss": 0.0011, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.504614531993866, |
| "rewards/margins": 31.21249008178711, |
| "rewards/rejected": -31.71710205078125, |
| "step": 6280 |
| }, |
| { |
| "epoch": 2.87, |
| "learning_rate": 1.4205986808726533e-08, |
| "logits/chosen": -2.1867895126342773, |
| "logits/rejected": -1.8338983058929443, |
| "logps/chosen": -86.80367279052734, |
| "logps/rejected": -134.02243041992188, |
| "loss": 0.0022, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.9545267820358276, |
| "rewards/margins": 32.74871826171875, |
| "rewards/rejected": -31.794189453125, |
| "step": 6290 |
| }, |
| { |
| "epoch": 2.88, |
| "learning_rate": 1.36986301369863e-08, |
| "logits/chosen": -2.277268648147583, |
| "logits/rejected": -1.94185471534729, |
| "logps/chosen": -86.12911224365234, |
| "logps/rejected": -135.1995849609375, |
| "loss": 0.0017, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.7337052822113037, |
| "rewards/margins": 33.44802474975586, |
| "rewards/rejected": -32.71432113647461, |
| "step": 6300 |
| }, |
| { |
| "epoch": 2.88, |
| "eval_logits/chosen": -2.2262394428253174, |
| "eval_logits/rejected": -1.8772982358932495, |
| "eval_logps/chosen": -87.41510772705078, |
| "eval_logps/rejected": -129.4889373779297, |
| "eval_loss": 0.005471652373671532, |
| "eval_rewards/accuracies": 0.9916201233863831, |
| "eval_rewards/chosen": 0.7077398300170898, |
| "eval_rewards/margins": 32.0367431640625, |
| "eval_rewards/rejected": -31.329004287719727, |
| "eval_runtime": 218.9647, |
| "eval_samples_per_second": 13.071, |
| "eval_steps_per_second": 0.817, |
| "step": 6300 |
| }, |
| { |
| "epoch": 2.88, |
| "learning_rate": 1.3191273465246066e-08, |
| "logits/chosen": -2.2694547176361084, |
| "logits/rejected": -1.8749288320541382, |
| "logps/chosen": -86.23151397705078, |
| "logps/rejected": -126.11222076416016, |
| "loss": 0.0022, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.5606023073196411, |
| "rewards/margins": 32.055213928222656, |
| "rewards/rejected": -30.49460792541504, |
| "step": 6310 |
| }, |
| { |
| "epoch": 2.88, |
| "learning_rate": 1.2683916793505833e-08, |
| "logits/chosen": -2.271695375442505, |
| "logits/rejected": -1.8720725774765015, |
| "logps/chosen": -88.22488403320312, |
| "logps/rejected": -133.49237060546875, |
| "loss": 0.0033, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.0623927116394043, |
| "rewards/margins": 33.233009338378906, |
| "rewards/rejected": -32.170616149902344, |
| "step": 6320 |
| }, |
| { |
| "epoch": 2.89, |
| "learning_rate": 1.21765601217656e-08, |
| "logits/chosen": -2.2026755809783936, |
| "logits/rejected": -1.8335577249526978, |
| "logps/chosen": -86.81916809082031, |
| "logps/rejected": -130.44638061523438, |
| "loss": 0.0064, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.3268142938613892, |
| "rewards/margins": 32.48900604248047, |
| "rewards/rejected": -31.16219139099121, |
| "step": 6330 |
| }, |
| { |
| "epoch": 2.89, |
| "learning_rate": 1.1669203450025366e-08, |
| "logits/chosen": -2.2959322929382324, |
| "logits/rejected": -1.9061601161956787, |
| "logps/chosen": -92.18937683105469, |
| "logps/rejected": -137.560302734375, |
| "loss": 0.0043, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.8249843120574951, |
| "rewards/margins": 33.91102600097656, |
| "rewards/rejected": -32.08604049682617, |
| "step": 6340 |
| }, |
| { |
| "epoch": 2.9, |
| "learning_rate": 1.1161846778285133e-08, |
| "logits/chosen": -2.2622501850128174, |
| "logits/rejected": -1.8619012832641602, |
| "logps/chosen": -87.53951263427734, |
| "logps/rejected": -131.9397430419922, |
| "loss": 0.0025, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 0.8194286227226257, |
| "rewards/margins": 32.57026290893555, |
| "rewards/rejected": -31.750835418701172, |
| "step": 6350 |
| }, |
| { |
| "epoch": 2.9, |
| "learning_rate": 1.06544901065449e-08, |
| "logits/chosen": -2.26688814163208, |
| "logits/rejected": -1.9139961004257202, |
| "logps/chosen": -85.9757308959961, |
| "logps/rejected": -134.81204223632812, |
| "loss": 0.0031, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.7707549333572388, |
| "rewards/margins": 33.59237289428711, |
| "rewards/rejected": -32.821617126464844, |
| "step": 6360 |
| }, |
| { |
| "epoch": 2.91, |
| "learning_rate": 1.0147133434804666e-08, |
| "logits/chosen": -2.1150248050689697, |
| "logits/rejected": -1.7503303289413452, |
| "logps/chosen": -88.97874450683594, |
| "logps/rejected": -134.1490478515625, |
| "loss": 0.0085, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.2682693004608154, |
| "rewards/margins": 32.085575103759766, |
| "rewards/rejected": -30.817302703857422, |
| "step": 6370 |
| }, |
| { |
| "epoch": 2.91, |
| "learning_rate": 9.639776763064434e-09, |
| "logits/chosen": -2.1985535621643066, |
| "logits/rejected": -1.8416798114776611, |
| "logps/chosen": -83.56170654296875, |
| "logps/rejected": -127.2623291015625, |
| "loss": 0.0052, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.5547320246696472, |
| "rewards/margins": 31.148799896240234, |
| "rewards/rejected": -30.594066619873047, |
| "step": 6380 |
| }, |
| { |
| "epoch": 2.92, |
| "learning_rate": 9.1324200913242e-09, |
| "logits/chosen": -2.1735215187072754, |
| "logits/rejected": -1.806305170059204, |
| "logps/chosen": -85.32138061523438, |
| "logps/rejected": -126.041015625, |
| "loss": 0.0011, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.6665178537368774, |
| "rewards/margins": 31.99408531188965, |
| "rewards/rejected": -30.327566146850586, |
| "step": 6390 |
| }, |
| { |
| "epoch": 2.92, |
| "learning_rate": 8.625063419583967e-09, |
| "logits/chosen": -2.2531039714813232, |
| "logits/rejected": -1.8883110284805298, |
| "logps/chosen": -89.10762023925781, |
| "logps/rejected": -135.33935546875, |
| "loss": 0.0033, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 0.8338155746459961, |
| "rewards/margins": 32.7072868347168, |
| "rewards/rejected": -31.873470306396484, |
| "step": 6400 |
| }, |
| { |
| "epoch": 2.92, |
| "eval_logits/chosen": -2.227215051651001, |
| "eval_logits/rejected": -1.8791638612747192, |
| "eval_logps/chosen": -87.51187133789062, |
| "eval_logps/rejected": -129.8273468017578, |
| "eval_loss": 0.005407842341810465, |
| "eval_rewards/accuracies": 0.994413435459137, |
| "eval_rewards/chosen": 0.6593578457832336, |
| "eval_rewards/margins": 32.15756607055664, |
| "eval_rewards/rejected": -31.498210906982422, |
| "eval_runtime": 185.4523, |
| "eval_samples_per_second": 15.433, |
| "eval_steps_per_second": 0.965, |
| "step": 6400 |
| }, |
| { |
| "epoch": 2.93, |
| "learning_rate": 8.117706747843734e-09, |
| "logits/chosen": -2.1412510871887207, |
| "logits/rejected": -1.8283464908599854, |
| "logps/chosen": -90.0947265625, |
| "logps/rejected": -132.66854858398438, |
| "loss": 0.0022, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.1616397649049759, |
| "rewards/margins": 31.50514793395996, |
| "rewards/rejected": -31.3435115814209, |
| "step": 6410 |
| }, |
| { |
| "epoch": 2.93, |
| "learning_rate": 7.6103500761035e-09, |
| "logits/chosen": -2.265721321105957, |
| "logits/rejected": -1.9056390523910522, |
| "logps/chosen": -87.0054931640625, |
| "logps/rejected": -132.97547912597656, |
| "loss": 0.0044, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 1.0425851345062256, |
| "rewards/margins": 32.99910354614258, |
| "rewards/rejected": -31.95652198791504, |
| "step": 6420 |
| }, |
| { |
| "epoch": 2.94, |
| "learning_rate": 7.1029934043632664e-09, |
| "logits/chosen": -2.2558608055114746, |
| "logits/rejected": -1.8824846744537354, |
| "logps/chosen": -86.92906188964844, |
| "logps/rejected": -135.2639617919922, |
| "loss": 0.0034, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.9257342219352722, |
| "rewards/margins": 33.13534927368164, |
| "rewards/rejected": -32.209617614746094, |
| "step": 6430 |
| }, |
| { |
| "epoch": 2.94, |
| "learning_rate": 6.595636732623033e-09, |
| "logits/chosen": -2.2064738273620605, |
| "logits/rejected": -1.859531044960022, |
| "logps/chosen": -87.9058837890625, |
| "logps/rejected": -133.92010498046875, |
| "loss": 0.0034, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.3929450511932373, |
| "rewards/margins": 33.6112174987793, |
| "rewards/rejected": -32.2182731628418, |
| "step": 6440 |
| }, |
| { |
| "epoch": 2.94, |
| "learning_rate": 6.0882800608828e-09, |
| "logits/chosen": -2.2518255710601807, |
| "logits/rejected": -1.9297151565551758, |
| "logps/chosen": -82.05953979492188, |
| "logps/rejected": -129.396240234375, |
| "loss": 0.0011, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.9110031127929688, |
| "rewards/margins": 31.361520767211914, |
| "rewards/rejected": -30.450519561767578, |
| "step": 6450 |
| }, |
| { |
| "epoch": 2.95, |
| "learning_rate": 5.580923389142566e-09, |
| "logits/chosen": -2.0963399410247803, |
| "logits/rejected": -1.7235018014907837, |
| "logps/chosen": -89.75813293457031, |
| "logps/rejected": -132.9015350341797, |
| "loss": 0.0033, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 2.336916923522949, |
| "rewards/margins": 32.56396484375, |
| "rewards/rejected": -30.2270450592041, |
| "step": 6460 |
| }, |
| { |
| "epoch": 2.95, |
| "learning_rate": 5.073566717402333e-09, |
| "logits/chosen": -2.23740816116333, |
| "logits/rejected": -1.8160254955291748, |
| "logps/chosen": -97.89299774169922, |
| "logps/rejected": -127.76078796386719, |
| "loss": 0.0022, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.258462905883789, |
| "rewards/margins": 32.506343841552734, |
| "rewards/rejected": -31.247879028320312, |
| "step": 6470 |
| }, |
| { |
| "epoch": 2.96, |
| "learning_rate": 4.5662100456621e-09, |
| "logits/chosen": -2.185804843902588, |
| "logits/rejected": -1.8170665502548218, |
| "logps/chosen": -86.79978942871094, |
| "logps/rejected": -129.39993286132812, |
| "loss": 0.0044, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.790616989135742, |
| "rewards/margins": 33.8178825378418, |
| "rewards/rejected": -31.027271270751953, |
| "step": 6480 |
| }, |
| { |
| "epoch": 2.96, |
| "learning_rate": 4.058853373921867e-09, |
| "logits/chosen": -2.232938766479492, |
| "logits/rejected": -1.811173439025879, |
| "logps/chosen": -90.50852966308594, |
| "logps/rejected": -131.22023010253906, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.5173299312591553, |
| "rewards/margins": 34.080116271972656, |
| "rewards/rejected": -32.562782287597656, |
| "step": 6490 |
| }, |
| { |
| "epoch": 2.97, |
| "learning_rate": 3.5514967021816332e-09, |
| "logits/chosen": -2.2670958042144775, |
| "logits/rejected": -1.914764165878296, |
| "logps/chosen": -91.02118682861328, |
| "logps/rejected": -133.01803588867188, |
| "loss": 0.0011, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.9153534173965454, |
| "rewards/margins": 31.895156860351562, |
| "rewards/rejected": -30.979806900024414, |
| "step": 6500 |
| }, |
| { |
| "epoch": 2.97, |
| "eval_logits/chosen": -2.227478265762329, |
| "eval_logits/rejected": -1.8803960084915161, |
| "eval_logps/chosen": -87.42868041992188, |
| "eval_logps/rejected": -129.57211303710938, |
| "eval_loss": 0.005446174647659063, |
| "eval_rewards/accuracies": 0.9916201233863831, |
| "eval_rewards/chosen": 0.7009533643722534, |
| "eval_rewards/margins": 32.07155227661133, |
| "eval_rewards/rejected": -31.37059211730957, |
| "eval_runtime": 193.9275, |
| "eval_samples_per_second": 14.758, |
| "eval_steps_per_second": 0.923, |
| "step": 6500 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 6570, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "total_flos": 0.0, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|