{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9111617312072893, "eval_steps": 200, "global_step": 3000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 5.000000000000001e-07, "logits/chosen": -0.300163596868515, "logits/rejected": -0.3011459410190582, "logps/chosen": -418.81268310546875, "logps/rejected": -421.69482421875, "loss": 0.6923, "rewards/accuracies": 0.4124999940395355, "rewards/chosen": 0.008436297997832298, "rewards/margins": 0.001967963995411992, "rewards/rejected": 0.006468335632234812, "step": 10 }, { "epoch": 0.01, "learning_rate": 1.0000000000000002e-06, "logits/chosen": -0.31174224615097046, "logits/rejected": -0.3135172724723816, "logps/chosen": -428.8531799316406, "logps/rejected": -427.205810546875, "loss": 0.6951, "rewards/accuracies": 0.4625000059604645, "rewards/chosen": 0.0215766541659832, "rewards/margins": -0.0034640885423868895, "rewards/rejected": 0.02504074200987816, "step": 20 }, { "epoch": 0.01, "learning_rate": 1.5e-06, "logits/chosen": -0.2996385097503662, "logits/rejected": -0.30060532689094543, "logps/chosen": -416.20086669921875, "logps/rejected": -412.4971618652344, "loss": 0.6924, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": 0.030052989721298218, "rewards/margins": 0.0019294738303869963, "rewards/rejected": 0.028123509138822556, "step": 30 }, { "epoch": 0.01, "learning_rate": 2.0000000000000003e-06, "logits/chosen": -0.3022615313529968, "logits/rejected": -0.3025739789009094, "logps/chosen": -426.9918518066406, "logps/rejected": -423.1588439941406, "loss": 0.692, "rewards/accuracies": 0.543749988079071, "rewards/chosen": 0.02528352662920952, "rewards/margins": 0.002774887252599001, "rewards/rejected": 0.022508641704916954, "step": 40 }, { "epoch": 0.02, "learning_rate": 2.5e-06, "logits/chosen": -0.30438098311424255, "logits/rejected": -0.30549854040145874, "logps/chosen": -421.03363037109375, "logps/rejected": -421.8212890625, "loss": 0.6898, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": 0.03513988479971886, "rewards/margins": 0.007109012454748154, "rewards/rejected": 0.028030872344970703, "step": 50 }, { "epoch": 0.02, "learning_rate": 3e-06, "logits/chosen": -0.30687031149864197, "logits/rejected": -0.3071025013923645, "logps/chosen": -417.4591369628906, "logps/rejected": -417.7974548339844, "loss": 0.6931, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": 0.014512499794363976, "rewards/margins": 0.00045255664736032486, "rewards/rejected": 0.014059944078326225, "step": 60 }, { "epoch": 0.02, "learning_rate": 3.5e-06, "logits/chosen": -0.30733975768089294, "logits/rejected": -0.3082950711250305, "logps/chosen": -422.18487548828125, "logps/rejected": -422.29052734375, "loss": 0.6887, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.06789219379425049, "rewards/margins": 0.00933685339987278, "rewards/rejected": 0.05855534225702286, "step": 70 }, { "epoch": 0.02, "learning_rate": 4.000000000000001e-06, "logits/chosen": -0.309120774269104, "logits/rejected": -0.3103254437446594, "logps/chosen": -424.8710021972656, "logps/rejected": -423.9234924316406, "loss": 0.6875, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.0613434836268425, "rewards/margins": 0.011882667429745197, "rewards/rejected": 0.04946080967783928, "step": 80 }, { "epoch": 0.03, "learning_rate": 4.5e-06, "logits/chosen": -0.3092747628688812, "logits/rejected": -0.3102528750896454, "logps/chosen": -417.56097412109375, "logps/rejected": -420.48541259765625, "loss": 0.6882, "rewards/accuracies": 0.5625, "rewards/chosen": 0.1002880111336708, "rewards/margins": 0.010797671973705292, "rewards/rejected": 0.08949033915996552, "step": 90 }, { "epoch": 0.03, "learning_rate": 5e-06, "logits/chosen": -0.3046155571937561, "logits/rejected": -0.3053414225578308, "logps/chosen": -417.95501708984375, "logps/rejected": -416.2376403808594, "loss": 0.6748, "rewards/accuracies": 0.762499988079071, "rewards/chosen": 0.1339004933834076, "rewards/margins": 0.03802730515599251, "rewards/rejected": 0.09587319195270538, "step": 100 }, { "epoch": 0.03, "learning_rate": 4.999853306957783e-06, "logits/chosen": -0.3040740489959717, "logits/rejected": -0.30468136072158813, "logps/chosen": -416.46527099609375, "logps/rejected": -415.51568603515625, "loss": 0.6714, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": 0.13915565609931946, "rewards/margins": 0.045606400817632675, "rewards/rejected": 0.09354925900697708, "step": 110 }, { "epoch": 0.04, "learning_rate": 4.99941324504621e-06, "logits/chosen": -0.3062252104282379, "logits/rejected": -0.30699923634529114, "logps/chosen": -423.4345703125, "logps/rejected": -421.33477783203125, "loss": 0.6681, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": 0.25515347719192505, "rewards/margins": 0.05361776426434517, "rewards/rejected": 0.20153570175170898, "step": 120 }, { "epoch": 0.04, "learning_rate": 4.998679865908499e-06, "logits/chosen": -0.3025161623954773, "logits/rejected": -0.30388832092285156, "logps/chosen": -421.076416015625, "logps/rejected": -419.70428466796875, "loss": 0.6432, "rewards/accuracies": 0.75, "rewards/chosen": 0.36134886741638184, "rewards/margins": 0.10863993316888809, "rewards/rejected": 0.25270897150039673, "step": 130 }, { "epoch": 0.04, "learning_rate": 4.9976532556099425e-06, "logits/chosen": -0.29753798246383667, "logits/rejected": -0.2986024022102356, "logps/chosen": -423.3164978027344, "logps/rejected": -420.72918701171875, "loss": 0.632, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.45896610617637634, "rewards/margins": 0.13753186166286469, "rewards/rejected": 0.32143422961235046, "step": 140 }, { "epoch": 0.05, "learning_rate": 4.99633353462781e-06, "logits/chosen": -0.300027072429657, "logits/rejected": -0.3015795648097992, "logps/chosen": -413.91973876953125, "logps/rejected": -415.4903869628906, "loss": 0.6428, "rewards/accuracies": 0.668749988079071, "rewards/chosen": 0.47876471281051636, "rewards/margins": 0.11648330837488174, "rewards/rejected": 0.3622814118862152, "step": 150 }, { "epoch": 0.05, "learning_rate": 4.994720857837211e-06, "logits/chosen": -0.3021107316017151, "logits/rejected": -0.30334895849227905, "logps/chosen": -419.66571044921875, "logps/rejected": -420.95068359375, "loss": 0.623, "rewards/accuracies": 0.706250011920929, "rewards/chosen": 0.5215948820114136, "rewards/margins": 0.1642296016216278, "rewards/rejected": 0.35736531019210815, "step": 160 }, { "epoch": 0.05, "learning_rate": 4.992815414492917e-06, "logits/chosen": -0.29045212268829346, "logits/rejected": -0.29103735089302063, "logps/chosen": -411.07635498046875, "logps/rejected": -411.93463134765625, "loss": 0.6303, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": 0.5044211149215698, "rewards/margins": 0.16038301587104797, "rewards/rejected": 0.34403812885284424, "step": 170 }, { "epoch": 0.05, "learning_rate": 4.990617428207153e-06, "logits/chosen": -0.29839888215065, "logits/rejected": -0.29893797636032104, "logps/chosen": -430.1136169433594, "logps/rejected": -428.3583984375, "loss": 0.6029, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.5385109782218933, "rewards/margins": 0.22802197933197021, "rewards/rejected": 0.3104889690876007, "step": 180 }, { "epoch": 0.06, "learning_rate": 4.988127156923355e-06, "logits/chosen": -0.2956782281398773, "logits/rejected": -0.2963833212852478, "logps/chosen": -415.17071533203125, "logps/rejected": -414.58148193359375, "loss": 0.6078, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.4606494903564453, "rewards/margins": 0.22775804996490479, "rewards/rejected": 0.23289147019386292, "step": 190 }, { "epoch": 0.06, "learning_rate": 4.985344892885899e-06, "logits/chosen": -0.29678258299827576, "logits/rejected": -0.2977609634399414, "logps/chosen": -416.76275634765625, "logps/rejected": -419.5223693847656, "loss": 0.5821, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.44212013483047485, "rewards/margins": 0.29724568128585815, "rewards/rejected": 0.1448744386434555, "step": 200 }, { "epoch": 0.06, "eval_logits/chosen": -0.35281771421432495, "eval_logits/rejected": -0.35360345244407654, "eval_logps/chosen": -408.5499267578125, "eval_logps/rejected": -409.8388977050781, "eval_loss": 0.5728641152381897, "eval_rewards/accuracies": 0.7260000109672546, "eval_rewards/chosen": 0.43412691354751587, "eval_rewards/margins": 0.3201069235801697, "eval_rewards/rejected": 0.11402001231908798, "eval_runtime": 351.7745, "eval_samples_per_second": 1.421, "eval_steps_per_second": 1.421, "step": 200 }, { "epoch": 0.06, "learning_rate": 4.9822709626058065e-06, "logits/chosen": -0.29128286242485046, "logits/rejected": -0.2920396327972412, "logps/chosen": -416.55322265625, "logps/rejected": -417.397216796875, "loss": 0.5743, "rewards/accuracies": 0.75, "rewards/chosen": 0.42414647340774536, "rewards/margins": 0.3180859684944153, "rewards/rejected": 0.1060604602098465, "step": 210 }, { "epoch": 0.07, "learning_rate": 4.978905726822424e-06, "logits/chosen": -0.29205116629600525, "logits/rejected": -0.2932327687740326, "logps/chosen": -429.031005859375, "logps/rejected": -432.4542541503906, "loss": 0.5944, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": 0.34927603602409363, "rewards/margins": 0.2745763659477234, "rewards/rejected": 0.07469968497753143, "step": 220 }, { "epoch": 0.07, "learning_rate": 4.975249580461092e-06, "logits/chosen": -0.29278379678726196, "logits/rejected": -0.29318395256996155, "logps/chosen": -415.50640869140625, "logps/rejected": -414.65631103515625, "loss": 0.6108, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": 0.20576027035713196, "rewards/margins": 0.24422487616539001, "rewards/rejected": -0.038464583456516266, "step": 230 }, { "epoch": 0.07, "learning_rate": 4.971302952586796e-06, "logits/chosen": -0.2884067893028259, "logits/rejected": -0.2890322208404541, "logps/chosen": -411.9427795410156, "logps/rejected": -418.1693420410156, "loss": 0.553, "rewards/accuracies": 0.768750011920929, "rewards/chosen": 0.25125259160995483, "rewards/margins": 0.39369240403175354, "rewards/rejected": -0.1424398422241211, "step": 240 }, { "epoch": 0.08, "learning_rate": 4.967066306353816e-06, "logits/chosen": -0.28915414214134216, "logits/rejected": -0.29073747992515564, "logps/chosen": -417.0771484375, "logps/rejected": -419.65380859375, "loss": 0.5598, "rewards/accuracies": 0.706250011920929, "rewards/chosen": 0.25535959005355835, "rewards/margins": 0.40900731086730957, "rewards/rejected": -0.15364770591259003, "step": 250 }, { "epoch": 0.08, "learning_rate": 4.962540138951371e-06, "logits/chosen": -0.2950271964073181, "logits/rejected": -0.29611852765083313, "logps/chosen": -420.79681396484375, "logps/rejected": -425.1570739746094, "loss": 0.5278, "rewards/accuracies": 0.768750011920929, "rewards/chosen": 0.24652545154094696, "rewards/margins": 0.48342761397361755, "rewards/rejected": -0.2369021624326706, "step": 260 }, { "epoch": 0.08, "learning_rate": 4.957724981545276e-06, "logits/chosen": -0.28752994537353516, "logits/rejected": -0.2876993417739868, "logps/chosen": -413.72808837890625, "logps/rejected": -418.2240295410156, "loss": 0.5369, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.144112229347229, "rewards/margins": 0.48878079652786255, "rewards/rejected": -0.34466850757598877, "step": 270 }, { "epoch": 0.09, "learning_rate": 4.952621399215598e-06, "logits/chosen": -0.29713207483291626, "logits/rejected": -0.29806575179100037, "logps/chosen": -420.4150390625, "logps/rejected": -428.95513916015625, "loss": 0.5325, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.011465489864349365, "rewards/margins": 0.47427234053611755, "rewards/rejected": -0.4857378602027893, "step": 280 }, { "epoch": 0.09, "learning_rate": 4.947229990890356e-06, "logits/chosen": -0.285542756319046, "logits/rejected": -0.28633180260658264, "logps/chosen": -420.0926208496094, "logps/rejected": -423.4457092285156, "loss": 0.5193, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.08504172414541245, "rewards/margins": 0.5871935486793518, "rewards/rejected": -0.6722352504730225, "step": 290 }, { "epoch": 0.09, "learning_rate": 4.941551389275217e-06, "logits/chosen": -0.2842163145542145, "logits/rejected": -0.28539806604385376, "logps/chosen": -421.17822265625, "logps/rejected": -424.78387451171875, "loss": 0.5631, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.19560113549232483, "rewards/margins": 0.5197954177856445, "rewards/rejected": -0.715396523475647, "step": 300 }, { "epoch": 0.09, "learning_rate": 4.935586260779261e-06, "logits/chosen": -0.2907197177410126, "logits/rejected": -0.29180362820625305, "logps/chosen": -427.5953063964844, "logps/rejected": -431.76788330078125, "loss": 0.5331, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.4097444415092468, "rewards/margins": 0.5406568646430969, "rewards/rejected": -0.9504014253616333, "step": 310 }, { "epoch": 0.1, "learning_rate": 4.929335305436764e-06, "logits/chosen": -0.2902284264564514, "logits/rejected": -0.2910650670528412, "logps/chosen": -427.05621337890625, "logps/rejected": -427.26904296875, "loss": 0.5694, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.4887164533138275, "rewards/margins": 0.4573966860771179, "rewards/rejected": -0.9461132287979126, "step": 320 }, { "epoch": 0.1, "learning_rate": 4.922799256825052e-06, "logits/chosen": -0.30178460478782654, "logits/rejected": -0.3031577467918396, "logps/chosen": -432.64544677734375, "logps/rejected": -437.355712890625, "loss": 0.5759, "rewards/accuracies": 0.65625, "rewards/chosen": -0.5430983304977417, "rewards/margins": 0.5063012838363647, "rewards/rejected": -1.0493996143341064, "step": 330 }, { "epoch": 0.1, "learning_rate": 4.915978881978407e-06, "logits/chosen": -0.2879001498222351, "logits/rejected": -0.28882110118865967, "logps/chosen": -418.3189392089844, "logps/rejected": -420.14349365234375, "loss": 0.5114, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.39648348093032837, "rewards/margins": 0.6386500000953674, "rewards/rejected": -1.0351333618164062, "step": 340 }, { "epoch": 0.11, "learning_rate": 4.908874981298058e-06, "logits/chosen": -0.29214486479759216, "logits/rejected": -0.29305440187454224, "logps/chosen": -421.3182678222656, "logps/rejected": -427.06317138671875, "loss": 0.5628, "rewards/accuracies": 0.6875, "rewards/chosen": -0.5688936114311218, "rewards/margins": 0.5135782957077026, "rewards/rejected": -1.0824719667434692, "step": 350 }, { "epoch": 0.11, "learning_rate": 4.901488388458247e-06, "logits/chosen": -0.2956882119178772, "logits/rejected": -0.29717716574668884, "logps/chosen": -429.40850830078125, "logps/rejected": -432.0194396972656, "loss": 0.5326, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.5142576098442078, "rewards/margins": 0.5949846506118774, "rewards/rejected": -1.1092422008514404, "step": 360 }, { "epoch": 0.11, "learning_rate": 4.893819970308394e-06, "logits/chosen": -0.29191336035728455, "logits/rejected": -0.2928611636161804, "logps/chosen": -432.4073181152344, "logps/rejected": -437.53472900390625, "loss": 0.5255, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.5648801922798157, "rewards/margins": 0.5746434926986694, "rewards/rejected": -1.1395236253738403, "step": 370 }, { "epoch": 0.12, "learning_rate": 4.885870626771371e-06, "logits/chosen": -0.2915678322315216, "logits/rejected": -0.2924065887928009, "logps/chosen": -421.0965881347656, "logps/rejected": -425.9581604003906, "loss": 0.5565, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -0.6075869798660278, "rewards/margins": 0.576026201248169, "rewards/rejected": -1.1836131811141968, "step": 380 }, { "epoch": 0.12, "learning_rate": 4.8776412907378845e-06, "logits/chosen": -0.29022809863090515, "logits/rejected": -0.2918907701969147, "logps/chosen": -422.2085876464844, "logps/rejected": -425.4307556152344, "loss": 0.5346, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.6413823962211609, "rewards/margins": 0.6153510808944702, "rewards/rejected": -1.2567334175109863, "step": 390 }, { "epoch": 0.12, "learning_rate": 4.869132927957007e-06, "logits/chosen": -0.2912658751010895, "logits/rejected": -0.292255163192749, "logps/chosen": -424.4219665527344, "logps/rejected": -430.76885986328125, "loss": 0.53, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.7030640840530396, "rewards/margins": 0.6264012455940247, "rewards/rejected": -1.329465389251709, "step": 400 }, { "epoch": 0.12, "eval_logits/chosen": -0.3515583574771881, "eval_logits/rejected": -0.35239377617836, "eval_logps/chosen": -419.6265563964844, "eval_logps/rejected": -424.9375, "eval_loss": 0.5038847327232361, "eval_rewards/accuracies": 0.7379999756813049, "eval_rewards/chosen": -0.6735388040542603, "eval_rewards/margins": 0.7223072648048401, "eval_rewards/rejected": -1.3958461284637451, "eval_runtime": 375.1774, "eval_samples_per_second": 1.333, "eval_steps_per_second": 1.333, "step": 400 }, { "epoch": 0.12, "learning_rate": 4.860346536922834e-06, "logits/chosen": -0.29377973079681396, "logits/rejected": -0.294566810131073, "logps/chosen": -429.86907958984375, "logps/rejected": -432.5889587402344, "loss": 0.529, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.7517430782318115, "rewards/margins": 0.6350258588790894, "rewards/rejected": -1.3867689371109009, "step": 410 }, { "epoch": 0.13, "learning_rate": 4.85128314875731e-06, "logits/chosen": -0.2876330316066742, "logits/rejected": -0.2890221178531647, "logps/chosen": -433.5904846191406, "logps/rejected": -438.02886962890625, "loss": 0.5174, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.7357751131057739, "rewards/margins": 0.6541243195533752, "rewards/rejected": -1.389899492263794, "step": 420 }, { "epoch": 0.13, "learning_rate": 4.841943827089223e-06, "logits/chosen": -0.30073267221450806, "logits/rejected": -0.3028663098812103, "logps/chosen": -438.89056396484375, "logps/rejected": -444.29443359375, "loss": 0.5427, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.7469267845153809, "rewards/margins": 0.6645030379295349, "rewards/rejected": -1.411429762840271, "step": 430 }, { "epoch": 0.13, "learning_rate": 4.832329667929378e-06, "logits/chosen": -0.30401021242141724, "logits/rejected": -0.305408775806427, "logps/chosen": -436.4923400878906, "logps/rejected": -443.785400390625, "loss": 0.4856, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.7162739634513855, "rewards/margins": 0.7617406845092773, "rewards/rejected": -1.478014588356018, "step": 440 }, { "epoch": 0.14, "learning_rate": 4.822441799541979e-06, "logits/chosen": -0.29748016595840454, "logits/rejected": -0.2987380027770996, "logps/chosen": -432.2513122558594, "logps/rejected": -439.78741455078125, "loss": 0.5138, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.8289654850959778, "rewards/margins": 0.7209790349006653, "rewards/rejected": -1.549944519996643, "step": 450 }, { "epoch": 0.14, "learning_rate": 4.812281382312222e-06, "logits/chosen": -0.28938063979148865, "logits/rejected": -0.2903631031513214, "logps/chosen": -421.52337646484375, "logps/rejected": -426.65142822265625, "loss": 0.4934, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.7328917980194092, "rewards/margins": 0.7723864316940308, "rewards/rejected": -1.5052781105041504, "step": 460 }, { "epoch": 0.14, "learning_rate": 4.801849608610119e-06, "logits/chosen": -0.2995319366455078, "logits/rejected": -0.3008275330066681, "logps/chosen": -437.45916748046875, "logps/rejected": -443.75799560546875, "loss": 0.4984, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.9069220423698425, "rewards/margins": 0.7756569981575012, "rewards/rejected": -1.6825790405273438, "step": 470 }, { "epoch": 0.15, "learning_rate": 4.7911477026505656e-06, "logits/chosen": -0.2930867373943329, "logits/rejected": -0.2938670516014099, "logps/chosen": -436.32305908203125, "logps/rejected": -439.0968322753906, "loss": 0.4882, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.054971694946289, "rewards/margins": 0.8004587292671204, "rewards/rejected": -1.8554306030273438, "step": 480 }, { "epoch": 0.15, "learning_rate": 4.780176920349675e-06, "logits/chosen": -0.2880414128303528, "logits/rejected": -0.2893609404563904, "logps/chosen": -426.8358459472656, "logps/rejected": -432.79248046875, "loss": 0.5123, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -1.1646369695663452, "rewards/margins": 0.739470362663269, "rewards/rejected": -1.9041073322296143, "step": 490 }, { "epoch": 0.15, "learning_rate": 4.7689385491773934e-06, "logits/chosen": -0.3000113070011139, "logits/rejected": -0.3008071780204773, "logps/chosen": -442.62860107421875, "logps/rejected": -446.14825439453125, "loss": 0.5871, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -1.3722599744796753, "rewards/margins": 0.6036561131477356, "rewards/rejected": -1.9759161472320557, "step": 500 }, { "epoch": 0.15, "learning_rate": 4.7574339080064046e-06, "logits/chosen": -0.2956729829311371, "logits/rejected": -0.29699647426605225, "logps/chosen": -432.17486572265625, "logps/rejected": -441.1890563964844, "loss": 0.4989, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.2666178941726685, "rewards/margins": 0.7352741956710815, "rewards/rejected": -2.001891851425171, "step": 510 }, { "epoch": 0.16, "learning_rate": 4.745664346957362e-06, "logits/chosen": -0.29319706559181213, "logits/rejected": -0.2932819724082947, "logps/chosen": -441.1473083496094, "logps/rejected": -443.6536560058594, "loss": 0.5431, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.1980129480361938, "rewards/margins": 0.7274158000946045, "rewards/rejected": -1.9254287481307983, "step": 520 }, { "epoch": 0.16, "learning_rate": 4.733631247240435e-06, "logits/chosen": -0.28386861085891724, "logits/rejected": -0.28545230627059937, "logps/chosen": -424.7322692871094, "logps/rejected": -432.74920654296875, "loss": 0.5172, "rewards/accuracies": 0.71875, "rewards/chosen": -1.3047645092010498, "rewards/margins": 0.7416442632675171, "rewards/rejected": -2.0464088916778564, "step": 530 }, { "epoch": 0.16, "learning_rate": 4.721336020993228e-06, "logits/chosen": -0.29582637548446655, "logits/rejected": -0.2965632379055023, "logps/chosen": -428.98992919921875, "logps/rejected": -436.71533203125, "loss": 0.5223, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -1.232280969619751, "rewards/margins": 0.7531365752220154, "rewards/rejected": -1.9854176044464111, "step": 540 }, { "epoch": 0.17, "learning_rate": 4.708780111115058e-06, "logits/chosen": -0.3022860884666443, "logits/rejected": -0.303489625453949, "logps/chosen": -434.28936767578125, "logps/rejected": -439.01043701171875, "loss": 0.506, "rewards/accuracies": 0.75, "rewards/chosen": -1.1110032796859741, "rewards/margins": 0.7986260652542114, "rewards/rejected": -1.909629225730896, "step": 550 }, { "epoch": 0.17, "learning_rate": 4.6959649910976165e-06, "logits/chosen": -0.3028009533882141, "logits/rejected": -0.3035816550254822, "logps/chosen": -433.6151428222656, "logps/rejected": -436.40045166015625, "loss": 0.5109, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -1.0793737173080444, "rewards/margins": 0.753380537033081, "rewards/rejected": -1.832754373550415, "step": 560 }, { "epoch": 0.17, "learning_rate": 4.682892164852057e-06, "logits/chosen": -0.29320716857910156, "logits/rejected": -0.29399818181991577, "logps/chosen": -428.3548889160156, "logps/rejected": -433.96124267578125, "loss": 0.5566, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.1920106410980225, "rewards/margins": 0.6818917989730835, "rewards/rejected": -1.8739025592803955, "step": 570 }, { "epoch": 0.18, "learning_rate": 4.669563166532504e-06, "logits/chosen": -0.29630088806152344, "logits/rejected": -0.2984740138053894, "logps/chosen": -428.59405517578125, "logps/rejected": -439.8580017089844, "loss": 0.5099, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.0762312412261963, "rewards/margins": 0.8075464963912964, "rewards/rejected": -1.8837776184082031, "step": 580 }, { "epoch": 0.18, "learning_rate": 4.655979560356006e-06, "logits/chosen": -0.299476683139801, "logits/rejected": -0.30079394578933716, "logps/chosen": -437.24359130859375, "logps/rejected": -444.2662048339844, "loss": 0.4679, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.0544074773788452, "rewards/margins": 0.8957304954528809, "rewards/rejected": -1.9501378536224365, "step": 590 }, { "epoch": 0.18, "learning_rate": 4.642142940418973e-06, "logits/chosen": -0.3016494810581207, "logits/rejected": -0.3028479218482971, "logps/chosen": -428.2562561035156, "logps/rejected": -436.1544494628906, "loss": 0.4446, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -1.1558116674423218, "rewards/margins": 0.9864055514335632, "rewards/rejected": -2.1422171592712402, "step": 600 }, { "epoch": 0.18, "eval_logits/chosen": -0.3611030876636505, "eval_logits/rejected": -0.36189064383506775, "eval_logps/chosen": -425.2828674316406, "eval_logps/rejected": -432.32147216796875, "eval_loss": 0.4912301301956177, "eval_rewards/accuracies": 0.75, "eval_rewards/chosen": -1.239168405532837, "eval_rewards/margins": 0.895074725151062, "eval_rewards/rejected": -2.1342432498931885, "eval_runtime": 376.2893, "eval_samples_per_second": 1.329, "eval_steps_per_second": 1.329, "step": 600 }, { "epoch": 0.19, "learning_rate": 4.6280549305101065e-06, "logits/chosen": -0.30701732635498047, "logits/rejected": -0.30861714482307434, "logps/chosen": -430.90643310546875, "logps/rejected": -437.9549255371094, "loss": 0.545, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -1.4101296663284302, "rewards/margins": 0.7359235286712646, "rewards/rejected": -2.1460530757904053, "step": 610 }, { "epoch": 0.19, "learning_rate": 4.61371718391983e-06, "logits/chosen": -0.30552786588668823, "logits/rejected": -0.30662640929222107, "logps/chosen": -432.50506591796875, "logps/rejected": -443.53216552734375, "loss": 0.486, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -1.181477427482605, "rewards/margins": 0.9552658796310425, "rewards/rejected": -2.1367435455322266, "step": 620 }, { "epoch": 0.19, "learning_rate": 4.599131383246277e-06, "logits/chosen": -0.308699369430542, "logits/rejected": -0.308963418006897, "logps/chosen": -443.76837158203125, "logps/rejected": -452.234130859375, "loss": 0.5178, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -1.5818192958831787, "rewards/margins": 0.70851069688797, "rewards/rejected": -2.290329933166504, "step": 630 }, { "epoch": 0.19, "learning_rate": 4.584299240197826e-06, "logits/chosen": -0.29901835322380066, "logits/rejected": -0.2997357249259949, "logps/chosen": -437.3292541503906, "logps/rejected": -438.70013427734375, "loss": 0.4941, "rewards/accuracies": 0.75, "rewards/chosen": -1.4868736267089844, "rewards/margins": 0.9190858602523804, "rewards/rejected": -2.405959129333496, "step": 640 }, { "epoch": 0.2, "learning_rate": 4.569222495392227e-06, "logits/chosen": -0.30075928568840027, "logits/rejected": -0.30218517780303955, "logps/chosen": -437.5245056152344, "logps/rejected": -447.72271728515625, "loss": 0.4425, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.6097447872161865, "rewards/margins": 1.0334211587905884, "rewards/rejected": -2.6431655883789062, "step": 650 }, { "epoch": 0.2, "learning_rate": 4.553902918152329e-06, "logits/chosen": -0.3034583628177643, "logits/rejected": -0.3045238256454468, "logps/chosen": -439.45159912109375, "logps/rejected": -448.080322265625, "loss": 0.4796, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.7247978448867798, "rewards/margins": 1.0205966234207153, "rewards/rejected": -2.745394229888916, "step": 660 }, { "epoch": 0.2, "learning_rate": 4.5383423062984455e-06, "logits/chosen": -0.3042409420013428, "logits/rejected": -0.3053613603115082, "logps/chosen": -432.8832092285156, "logps/rejected": -440.6971130371094, "loss": 0.468, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -1.8063684701919556, "rewards/margins": 0.9467433094978333, "rewards/rejected": -2.7531113624572754, "step": 670 }, { "epoch": 0.21, "learning_rate": 4.522542485937369e-06, "logits/chosen": -0.2990413308143616, "logits/rejected": -0.3002299666404724, "logps/chosen": -435.41754150390625, "logps/rejected": -442.31201171875, "loss": 0.4606, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.9637155532836914, "rewards/margins": 0.984288215637207, "rewards/rejected": -2.9480037689208984, "step": 680 }, { "epoch": 0.21, "learning_rate": 4.5065053112480725e-06, "logits/chosen": -0.3054850697517395, "logits/rejected": -0.3073977530002594, "logps/chosen": -433.15771484375, "logps/rejected": -440.9410095214844, "loss": 0.4933, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -2.1777331829071045, "rewards/margins": 0.8640511631965637, "rewards/rejected": -3.0417845249176025, "step": 690 }, { "epoch": 0.21, "learning_rate": 4.49023266426411e-06, "logits/chosen": -0.30031442642211914, "logits/rejected": -0.3014809787273407, "logps/chosen": -441.3443908691406, "logps/rejected": -447.56231689453125, "loss": 0.5213, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -2.0933678150177, "rewards/margins": 0.8899556994438171, "rewards/rejected": -2.983323574066162, "step": 700 }, { "epoch": 0.22, "learning_rate": 4.473726454652755e-06, "logits/chosen": -0.2997979521751404, "logits/rejected": -0.30115145444869995, "logps/chosen": -440.00372314453125, "logps/rejected": -449.6446838378906, "loss": 0.4733, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -2.0547962188720703, "rewards/margins": 1.1024844646453857, "rewards/rejected": -3.157280445098877, "step": 710 }, { "epoch": 0.22, "learning_rate": 4.45698861949089e-06, "logits/chosen": -0.3066961169242859, "logits/rejected": -0.3076573610305786, "logps/chosen": -442.42303466796875, "logps/rejected": -448.47686767578125, "loss": 0.5236, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.1251652240753174, "rewards/margins": 0.8965371251106262, "rewards/rejected": -3.021702289581299, "step": 720 }, { "epoch": 0.22, "learning_rate": 4.440021123037683e-06, "logits/chosen": -0.29265230894088745, "logits/rejected": -0.29371362924575806, "logps/chosen": -441.66900634765625, "logps/rejected": -450.8470153808594, "loss": 0.5327, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -2.108212947845459, "rewards/margins": 0.8388462066650391, "rewards/rejected": -2.947059154510498, "step": 730 }, { "epoch": 0.22, "learning_rate": 4.422825956504073e-06, "logits/chosen": -0.3069104254245758, "logits/rejected": -0.3083550035953522, "logps/chosen": -449.7119140625, "logps/rejected": -459.4678649902344, "loss": 0.5117, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.1724143028259277, "rewards/margins": 0.8879534602165222, "rewards/rejected": -3.0603675842285156, "step": 740 }, { "epoch": 0.23, "learning_rate": 4.4054051378190915e-06, "logits/chosen": -0.30406031012535095, "logits/rejected": -0.30475375056266785, "logps/chosen": -447.04022216796875, "logps/rejected": -452.49658203125, "loss": 0.493, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -2.098881483078003, "rewards/margins": 0.9250394105911255, "rewards/rejected": -3.023920774459839, "step": 750 }, { "epoch": 0.23, "learning_rate": 4.387760711393052e-06, "logits/chosen": -0.3125828206539154, "logits/rejected": -0.3135472536087036, "logps/chosen": -441.21337890625, "logps/rejected": -446.6187438964844, "loss": 0.5226, "rewards/accuracies": 0.71875, "rewards/chosen": -2.1020660400390625, "rewards/margins": 0.8715343475341797, "rewards/rejected": -2.973600387573242, "step": 760 }, { "epoch": 0.23, "learning_rate": 4.369894747877627e-06, "logits/chosen": -0.30844077467918396, "logits/rejected": -0.3093765676021576, "logps/chosen": -439.68060302734375, "logps/rejected": -447.6014709472656, "loss": 0.4748, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -1.9754493236541748, "rewards/margins": 1.023809552192688, "rewards/rejected": -2.9992587566375732, "step": 770 }, { "epoch": 0.24, "learning_rate": 4.3518093439228484e-06, "logits/chosen": -0.309563547372818, "logits/rejected": -0.3104109764099121, "logps/chosen": -442.0809631347656, "logps/rejected": -449.5039978027344, "loss": 0.4696, "rewards/accuracies": 0.78125, "rewards/chosen": -2.0284624099731445, "rewards/margins": 0.8842188119888306, "rewards/rejected": -2.9126813411712646, "step": 780 }, { "epoch": 0.24, "learning_rate": 4.333506621931056e-06, "logits/chosen": -0.3095022737979889, "logits/rejected": -0.3111112713813782, "logps/chosen": -441.48736572265625, "logps/rejected": -452.59466552734375, "loss": 0.4302, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.7804081439971924, "rewards/margins": 1.1726500988006592, "rewards/rejected": -2.9530580043792725, "step": 790 }, { "epoch": 0.24, "learning_rate": 4.3149887298078275e-06, "logits/chosen": -0.3100133538246155, "logits/rejected": -0.3110717535018921, "logps/chosen": -439.8687438964844, "logps/rejected": -447.7996520996094, "loss": 0.4705, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.8933576345443726, "rewards/margins": 1.0525233745574951, "rewards/rejected": -2.9458811283111572, "step": 800 }, { "epoch": 0.24, "eval_logits/chosen": -0.3688412606716156, "eval_logits/rejected": -0.3696078956127167, "eval_logps/chosen": -432.4996643066406, "eval_logps/rejected": -440.57073974609375, "eval_loss": 0.4888974726200104, "eval_rewards/accuracies": 0.7599999904632568, "eval_rewards/chosen": -1.9608467817306519, "eval_rewards/margins": 0.9983222484588623, "eval_rewards/rejected": -2.9591689109802246, "eval_runtime": 376.2946, "eval_samples_per_second": 1.329, "eval_steps_per_second": 1.329, "step": 800 }, { "epoch": 0.25, "learning_rate": 4.296257840709906e-06, "logits/chosen": -0.3060837686061859, "logits/rejected": -0.30729439854621887, "logps/chosen": -443.59765625, "logps/rejected": -454.3882751464844, "loss": 0.4934, "rewards/accuracies": 0.75, "rewards/chosen": -2.0910544395446777, "rewards/margins": 0.964927077293396, "rewards/rejected": -3.0559818744659424, "step": 810 }, { "epoch": 0.25, "learning_rate": 4.277316152790177e-06, "logits/chosen": -0.3090333938598633, "logits/rejected": -0.3097476363182068, "logps/chosen": -446.78564453125, "logps/rejected": -453.74859619140625, "loss": 0.5066, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.2516064643859863, "rewards/margins": 0.9105945825576782, "rewards/rejected": -3.162201404571533, "step": 820 }, { "epoch": 0.25, "learning_rate": 4.2581658889397e-06, "logits/chosen": -0.2983805537223816, "logits/rejected": -0.29977601766586304, "logps/chosen": -434.3565979003906, "logps/rejected": -444.49542236328125, "loss": 0.4289, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -1.9821250438690186, "rewards/margins": 1.0745497941970825, "rewards/rejected": -3.0566749572753906, "step": 830 }, { "epoch": 0.26, "learning_rate": 4.238809296526847e-06, "logits/chosen": -0.30951178073883057, "logits/rejected": -0.31038326025009155, "logps/chosen": -453.48419189453125, "logps/rejected": -461.54833984375, "loss": 0.523, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -2.3027613162994385, "rewards/margins": 0.8140772581100464, "rewards/rejected": -3.1168384552001953, "step": 840 }, { "epoch": 0.26, "learning_rate": 4.219248647133559e-06, "logits/chosen": -0.3112717568874359, "logits/rejected": -0.3124113082885742, "logps/chosen": -437.2984313964844, "logps/rejected": -447.7862243652344, "loss": 0.4619, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -2.230332612991333, "rewards/margins": 1.0623276233673096, "rewards/rejected": -3.2926604747772217, "step": 850 }, { "epoch": 0.26, "learning_rate": 4.19948623628877e-06, "logits/chosen": -0.3127744495868683, "logits/rejected": -0.31366902589797974, "logps/chosen": -451.15966796875, "logps/rejected": -458.08154296875, "loss": 0.5186, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -2.2668843269348145, "rewards/margins": 0.8956031799316406, "rewards/rejected": -3.162487268447876, "step": 860 }, { "epoch": 0.26, "learning_rate": 4.179524383199016e-06, "logits/chosen": -0.30885085463523865, "logits/rejected": -0.3100178837776184, "logps/chosen": -445.05670166015625, "logps/rejected": -453.55328369140625, "loss": 0.4533, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -2.1777684688568115, "rewards/margins": 1.1419053077697754, "rewards/rejected": -3.319674253463745, "step": 870 }, { "epoch": 0.27, "learning_rate": 4.159365430476262e-06, "logits/chosen": -0.30774661898612976, "logits/rejected": -0.3091534674167633, "logps/chosen": -445.9901428222656, "logps/rejected": -453.9535217285156, "loss": 0.4711, "rewards/accuracies": 0.75, "rewards/chosen": -2.2285873889923096, "rewards/margins": 1.0858628749847412, "rewards/rejected": -3.31445050239563, "step": 880 }, { "epoch": 0.27, "learning_rate": 4.139011743862991e-06, "logits/chosen": -0.31220975518226624, "logits/rejected": -0.31295710802078247, "logps/chosen": -437.8184509277344, "logps/rejected": -450.45611572265625, "loss": 0.4411, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -2.122331142425537, "rewards/margins": 1.2842220067977905, "rewards/rejected": -3.406553268432617, "step": 890 }, { "epoch": 0.27, "learning_rate": 4.11846571195457e-06, "logits/chosen": -0.30749282240867615, "logits/rejected": -0.3092586398124695, "logps/chosen": -445.489013671875, "logps/rejected": -456.45361328125, "loss": 0.4331, "rewards/accuracies": 0.78125, "rewards/chosen": -2.122631788253784, "rewards/margins": 1.2757575511932373, "rewards/rejected": -3.3983893394470215, "step": 900 }, { "epoch": 0.28, "learning_rate": 4.0977297459189405e-06, "logits/chosen": -0.31161195039749146, "logits/rejected": -0.3124944865703583, "logps/chosen": -448.9032287597656, "logps/rejected": -456.729248046875, "loss": 0.4549, "rewards/accuracies": 0.78125, "rewards/chosen": -2.3067939281463623, "rewards/margins": 1.165810227394104, "rewards/rejected": -3.472604274749756, "step": 910 }, { "epoch": 0.28, "learning_rate": 4.076806279213656e-06, "logits/chosen": -0.311604380607605, "logits/rejected": -0.312518447637558, "logps/chosen": -438.07916259765625, "logps/rejected": -450.47381591796875, "loss": 0.4232, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -2.2351415157318115, "rewards/margins": 1.1904911994934082, "rewards/rejected": -3.425632953643799, "step": 920 }, { "epoch": 0.28, "learning_rate": 4.055697767300302e-06, "logits/chosen": -0.3170091211795807, "logits/rejected": -0.31755563616752625, "logps/chosen": -442.83758544921875, "logps/rejected": -450.9444885253906, "loss": 0.5088, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.335662841796875, "rewards/margins": 1.0687111616134644, "rewards/rejected": -3.40437388420105, "step": 930 }, { "epoch": 0.29, "learning_rate": 4.034406687356344e-06, "logits/chosen": -0.3176030218601227, "logits/rejected": -0.31867748498916626, "logps/chosen": -438.16229248046875, "logps/rejected": -446.01806640625, "loss": 0.5146, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.501446008682251, "rewards/margins": 0.903441309928894, "rewards/rejected": -3.4048874378204346, "step": 940 }, { "epoch": 0.29, "learning_rate": 4.012935537984414e-06, "logits/chosen": -0.31417202949523926, "logits/rejected": -0.3148192763328552, "logps/chosen": -435.503173828125, "logps/rejected": -444.60400390625, "loss": 0.5049, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.4111835956573486, "rewards/margins": 0.8773403167724609, "rewards/rejected": -3.2885234355926514, "step": 950 }, { "epoch": 0.29, "learning_rate": 3.991286838919086e-06, "logits/chosen": -0.30995315313339233, "logits/rejected": -0.31148335337638855, "logps/chosen": -440.8172912597656, "logps/rejected": -452.94268798828125, "loss": 0.4584, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -2.2748212814331055, "rewards/margins": 1.0933376550674438, "rewards/rejected": -3.3681588172912598, "step": 960 }, { "epoch": 0.29, "learning_rate": 3.969463130731183e-06, "logits/chosen": -0.3108167052268982, "logits/rejected": -0.31207841634750366, "logps/chosen": -443.76409912109375, "logps/rejected": -456.50286865234375, "loss": 0.4063, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.237427234649658, "rewards/margins": 1.2730433940887451, "rewards/rejected": -3.5104706287384033, "step": 970 }, { "epoch": 0.3, "learning_rate": 3.947466974529622e-06, "logits/chosen": -0.3074961304664612, "logits/rejected": -0.30913347005844116, "logps/chosen": -451.47320556640625, "logps/rejected": -461.1958923339844, "loss": 0.4688, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -2.525216817855835, "rewards/margins": 1.2734287977218628, "rewards/rejected": -3.798645496368408, "step": 980 }, { "epoch": 0.3, "learning_rate": 3.925300951660859e-06, "logits/chosen": -0.3098825216293335, "logits/rejected": -0.3106127381324768, "logps/chosen": -449.3988342285156, "logps/rejected": -455.8897399902344, "loss": 0.4974, "rewards/accuracies": 0.71875, "rewards/chosen": -2.6619412899017334, "rewards/margins": 1.0037035942077637, "rewards/rejected": -3.665644884109497, "step": 990 }, { "epoch": 0.3, "learning_rate": 3.9029676634059565e-06, "logits/chosen": -0.31196895241737366, "logits/rejected": -0.3131485879421234, "logps/chosen": -451.7205505371094, "logps/rejected": -461.85467529296875, "loss": 0.4296, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -2.469062328338623, "rewards/margins": 1.1974780559539795, "rewards/rejected": -3.6665406227111816, "step": 1000 }, { "epoch": 0.3, "eval_logits/chosen": -0.3799600601196289, "eval_logits/rejected": -0.3806193768978119, "eval_logps/chosen": -436.8405456542969, "eval_logps/rejected": -445.942626953125, "eval_loss": 0.48261019587516785, "eval_rewards/accuracies": 0.7459999918937683, "eval_rewards/chosen": -2.3949320316314697, "eval_rewards/margins": 1.1014209985733032, "eval_rewards/rejected": -3.4963533878326416, "eval_runtime": 377.1489, "eval_samples_per_second": 1.326, "eval_steps_per_second": 1.326, "step": 1000 }, { "epoch": 0.31, "learning_rate": 3.880469730675311e-06, "logits/chosen": -0.31937772035598755, "logits/rejected": -0.3201027512550354, "logps/chosen": -444.93267822265625, "logps/rejected": -454.3338317871094, "loss": 0.4744, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -2.4238359928131104, "rewards/margins": 1.1197197437286377, "rewards/rejected": -3.543555736541748, "step": 1010 }, { "epoch": 0.31, "learning_rate": 3.857809793701082e-06, "logits/chosen": -0.3155730664730072, "logits/rejected": -0.31668931245803833, "logps/chosen": -447.9942932128906, "logps/rejected": -458.11199951171875, "loss": 0.4398, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -2.2663698196411133, "rewards/margins": 1.3081058263778687, "rewards/rejected": -3.5744757652282715, "step": 1020 }, { "epoch": 0.31, "learning_rate": 3.834990511727341e-06, "logits/chosen": -0.3186780512332916, "logits/rejected": -0.32040825486183167, "logps/chosen": -445.6949768066406, "logps/rejected": -458.57244873046875, "loss": 0.4537, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -2.343160629272461, "rewards/margins": 1.2446506023406982, "rewards/rejected": -3.587811231613159, "step": 1030 }, { "epoch": 0.32, "learning_rate": 3.812014562698002e-06, "logits/chosen": -0.320089191198349, "logits/rejected": -0.3210357427597046, "logps/chosen": -441.82354736328125, "logps/rejected": -449.4639587402344, "loss": 0.5402, "rewards/accuracies": 0.71875, "rewards/chosen": -2.470724582672119, "rewards/margins": 0.9423478841781616, "rewards/rejected": -3.4130725860595703, "step": 1040 }, { "epoch": 0.32, "learning_rate": 3.788884642942555e-06, "logits/chosen": -0.32223668694496155, "logits/rejected": -0.32441529631614685, "logps/chosen": -444.36328125, "logps/rejected": -457.4508361816406, "loss": 0.4432, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -2.3148508071899414, "rewards/margins": 1.2026770114898682, "rewards/rejected": -3.5175278186798096, "step": 1050 }, { "epoch": 0.32, "learning_rate": 3.765603466859635e-06, "logits/chosen": -0.31094425916671753, "logits/rejected": -0.3124980330467224, "logps/chosen": -439.57025146484375, "logps/rejected": -453.1656799316406, "loss": 0.4585, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -2.423621654510498, "rewards/margins": 1.1653788089752197, "rewards/rejected": -3.589000701904297, "step": 1060 }, { "epoch": 0.32, "learning_rate": 3.7421737665984807e-06, "logits/chosen": -0.32444941997528076, "logits/rejected": -0.3258149325847626, "logps/chosen": -444.17742919921875, "logps/rejected": -454.7518005371094, "loss": 0.485, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.465573310852051, "rewards/margins": 1.1133558750152588, "rewards/rejected": -3.5789291858673096, "step": 1070 }, { "epoch": 0.33, "learning_rate": 3.7185982917382986e-06, "logits/chosen": -0.32046034932136536, "logits/rejected": -0.3209912180900574, "logps/chosen": -449.0337829589844, "logps/rejected": -456.1290588378906, "loss": 0.5036, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.421260356903076, "rewards/margins": 1.0578956604003906, "rewards/rejected": -3.479156017303467, "step": 1080 }, { "epoch": 0.33, "learning_rate": 3.6948798089655913e-06, "logits/chosen": -0.3232346177101135, "logits/rejected": -0.3241461217403412, "logps/chosen": -448.1339416503906, "logps/rejected": -455.69970703125, "loss": 0.4664, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -2.2551751136779785, "rewards/margins": 1.1089966297149658, "rewards/rejected": -3.3641715049743652, "step": 1090 }, { "epoch": 0.33, "learning_rate": 3.671021101749476e-06, "logits/chosen": -0.3160512447357178, "logits/rejected": -0.3167613744735718, "logps/chosen": -434.97698974609375, "logps/rejected": -441.48065185546875, "loss": 0.4634, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.224177122116089, "rewards/margins": 1.158850908279419, "rewards/rejected": -3.383028507232666, "step": 1100 }, { "epoch": 0.34, "learning_rate": 3.6470249700150273e-06, "logits/chosen": -0.31829750537872314, "logits/rejected": -0.3188309669494629, "logps/chosen": -440.1014099121094, "logps/rejected": -448.751220703125, "loss": 0.4287, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -2.001333713531494, "rewards/margins": 1.3394745588302612, "rewards/rejected": -3.340808153152466, "step": 1110 }, { "epoch": 0.34, "learning_rate": 3.6228942298146985e-06, "logits/chosen": -0.31696969270706177, "logits/rejected": -0.3185669183731079, "logps/chosen": -436.61090087890625, "logps/rejected": -446.8427734375, "loss": 0.4086, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -1.968629240989685, "rewards/margins": 1.365235686302185, "rewards/rejected": -3.33386492729187, "step": 1120 }, { "epoch": 0.34, "learning_rate": 3.598631712997841e-06, "logits/chosen": -0.3232669234275818, "logits/rejected": -0.32362625002861023, "logps/chosen": -445.9930114746094, "logps/rejected": -456.0194396972656, "loss": 0.4797, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.1778838634490967, "rewards/margins": 1.1961848735809326, "rewards/rejected": -3.3740687370300293, "step": 1130 }, { "epoch": 0.35, "learning_rate": 3.5742402668783797e-06, "logits/chosen": -0.31457391381263733, "logits/rejected": -0.31524404883384705, "logps/chosen": -434.63885498046875, "logps/rejected": -445.6629943847656, "loss": 0.4942, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -2.235532283782959, "rewards/margins": 1.1538090705871582, "rewards/rejected": -3.389340877532959, "step": 1140 }, { "epoch": 0.35, "learning_rate": 3.549722753900662e-06, "logits/chosen": -0.3312085270881653, "logits/rejected": -0.33145731687545776, "logps/chosen": -451.101806640625, "logps/rejected": -457.88909912109375, "loss": 0.5859, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -2.525597095489502, "rewards/margins": 0.7067753672599792, "rewards/rejected": -3.232372283935547, "step": 1150 }, { "epoch": 0.35, "learning_rate": 3.5250820513035403e-06, "logits/chosen": -0.3225269615650177, "logits/rejected": -0.3232432007789612, "logps/chosen": -438.2666931152344, "logps/rejected": -450.084716796875, "loss": 0.4502, "rewards/accuracies": 0.78125, "rewards/chosen": -2.3596291542053223, "rewards/margins": 1.153564691543579, "rewards/rejected": -3.5131936073303223, "step": 1160 }, { "epoch": 0.36, "learning_rate": 3.500321050782717e-06, "logits/chosen": -0.3299608826637268, "logits/rejected": -0.33111685514450073, "logps/chosen": -435.506103515625, "logps/rejected": -449.41644287109375, "loss": 0.4587, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -2.255194664001465, "rewards/margins": 1.2196067571640015, "rewards/rejected": -3.474801540374756, "step": 1170 }, { "epoch": 0.36, "learning_rate": 3.4754426581513866e-06, "logits/chosen": -0.3299122750759125, "logits/rejected": -0.33067744970321655, "logps/chosen": -450.20074462890625, "logps/rejected": -456.9153747558594, "loss": 0.4929, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -2.382310390472412, "rewards/margins": 1.0834969282150269, "rewards/rejected": -3.4658074378967285, "step": 1180 }, { "epoch": 0.36, "learning_rate": 3.45044979299923e-06, "logits/chosen": -0.3264179527759552, "logits/rejected": -0.32756897807121277, "logps/chosen": -442.2974548339844, "logps/rejected": -449.1595764160156, "loss": 0.4977, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.263164520263672, "rewards/margins": 1.0983796119689941, "rewards/rejected": -3.361544370651245, "step": 1190 }, { "epoch": 0.36, "learning_rate": 3.425345388349787e-06, "logits/chosen": -0.31463193893432617, "logits/rejected": -0.31522423028945923, "logps/chosen": -442.2705078125, "logps/rejected": -452.19110107421875, "loss": 0.501, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -2.2270102500915527, "rewards/margins": 1.1489759683609009, "rewards/rejected": -3.3759865760803223, "step": 1200 }, { "epoch": 0.36, "eval_logits/chosen": -0.39124199748039246, "eval_logits/rejected": -0.3919140696525574, "eval_logps/chosen": -434.10162353515625, "eval_logps/rejected": -443.24261474609375, "eval_loss": 0.4862767159938812, "eval_rewards/accuracies": 0.75, "eval_rewards/chosen": -2.1210429668426514, "eval_rewards/margins": 1.1053153276443481, "eval_rewards/rejected": -3.22635817527771, "eval_runtime": 375.0192, "eval_samples_per_second": 1.333, "eval_steps_per_second": 1.333, "step": 1200 }, { "epoch": 0.37, "learning_rate": 3.4001323903162476e-06, "logits/chosen": -0.32597848773002625, "logits/rejected": -0.32685333490371704, "logps/chosen": -435.82135009765625, "logps/rejected": -446.47552490234375, "loss": 0.4618, "rewards/accuracies": 0.78125, "rewards/chosen": -2.0256919860839844, "rewards/margins": 1.2204935550689697, "rewards/rejected": -3.246185302734375, "step": 1210 }, { "epoch": 0.37, "learning_rate": 3.3748137577557216e-06, "logits/chosen": -0.3275033235549927, "logits/rejected": -0.3280579149723053, "logps/chosen": -438.50384521484375, "logps/rejected": -447.7579040527344, "loss": 0.4531, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -2.154592514038086, "rewards/margins": 1.1412467956542969, "rewards/rejected": -3.295839309692383, "step": 1220 }, { "epoch": 0.37, "learning_rate": 3.3493924619219964e-06, "logits/chosen": -0.3302023112773895, "logits/rejected": -0.33196666836738586, "logps/chosen": -454.8751525878906, "logps/rejected": -466.814453125, "loss": 0.4865, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -2.2140915393829346, "rewards/margins": 1.0115987062454224, "rewards/rejected": -3.2256903648376465, "step": 1230 }, { "epoch": 0.38, "learning_rate": 3.3238714861168513e-06, "logits/chosen": -0.3286048173904419, "logits/rejected": -0.3293796181678772, "logps/chosen": -436.85308837890625, "logps/rejected": -445.0462951660156, "loss": 0.4905, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -2.0171353816986084, "rewards/margins": 1.1139663457870483, "rewards/rejected": -3.1311020851135254, "step": 1240 }, { "epoch": 0.38, "learning_rate": 3.29825382533995e-06, "logits/chosen": -0.3311420679092407, "logits/rejected": -0.3327622711658478, "logps/chosen": -444.5484924316406, "logps/rejected": -455.69732666015625, "loss": 0.5066, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.358177661895752, "rewards/margins": 0.9705084562301636, "rewards/rejected": -3.328686237335205, "step": 1250 }, { "epoch": 0.38, "learning_rate": 3.272542485937369e-06, "logits/chosen": -0.33226504921913147, "logits/rejected": -0.3331693708896637, "logps/chosen": -434.11248779296875, "logps/rejected": -441.95428466796875, "loss": 0.4827, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -1.988227128982544, "rewards/margins": 1.1914219856262207, "rewards/rejected": -3.1796488761901855, "step": 1260 }, { "epoch": 0.39, "learning_rate": 3.2467404852487846e-06, "logits/chosen": -0.33789581060409546, "logits/rejected": -0.33837661147117615, "logps/chosen": -445.60009765625, "logps/rejected": -453.21380615234375, "loss": 0.4935, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -1.8867080211639404, "rewards/margins": 1.2431962490081787, "rewards/rejected": -3.1299045085906982, "step": 1270 }, { "epoch": 0.39, "learning_rate": 3.2208508512533777e-06, "logits/chosen": -0.3227623403072357, "logits/rejected": -0.3246156573295593, "logps/chosen": -447.2259826660156, "logps/rejected": -456.6937561035156, "loss": 0.4514, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -2.05145001411438, "rewards/margins": 1.0999400615692139, "rewards/rejected": -3.151390552520752, "step": 1280 }, { "epoch": 0.39, "learning_rate": 3.1948766222144863e-06, "logits/chosen": -0.32600507140159607, "logits/rejected": -0.3266277313232422, "logps/chosen": -434.4227600097656, "logps/rejected": -442.1160583496094, "loss": 0.5228, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.233891010284424, "rewards/margins": 0.8642382621765137, "rewards/rejected": -3.0981292724609375, "step": 1290 }, { "epoch": 0.39, "learning_rate": 3.168820846323053e-06, "logits/chosen": -0.3299737870693207, "logits/rejected": -0.3313831090927124, "logps/chosen": -434.67803955078125, "logps/rejected": -446.71417236328125, "loss": 0.4392, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -2.0030479431152344, "rewards/margins": 1.1440895795822144, "rewards/rejected": -3.147137403488159, "step": 1300 }, { "epoch": 0.4, "learning_rate": 3.142686581339902e-06, "logits/chosen": -0.32545098662376404, "logits/rejected": -0.32752394676208496, "logps/chosen": -435.9081115722656, "logps/rejected": -445.0193786621094, "loss": 0.5154, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.0516555309295654, "rewards/margins": 1.0273317098617554, "rewards/rejected": -3.0789875984191895, "step": 1310 }, { "epoch": 0.4, "learning_rate": 3.1164768942369058e-06, "logits/chosen": -0.33717575669288635, "logits/rejected": -0.33777323365211487, "logps/chosen": -439.6886291503906, "logps/rejected": -450.8135681152344, "loss": 0.4056, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -1.7847926616668701, "rewards/margins": 1.3125979900360107, "rewards/rejected": -3.097390651702881, "step": 1320 }, { "epoch": 0.4, "learning_rate": 3.0901948608370503e-06, "logits/chosen": -0.3371260166168213, "logits/rejected": -0.33846548199653625, "logps/chosen": -436.64190673828125, "logps/rejected": -450.7660217285156, "loss": 0.4474, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -1.9579681158065796, "rewards/margins": 1.1995084285736084, "rewards/rejected": -3.1574764251708984, "step": 1330 }, { "epoch": 0.41, "learning_rate": 3.063843565453486e-06, "logits/chosen": -0.3233332931995392, "logits/rejected": -0.3235628008842468, "logps/chosen": -441.6625061035156, "logps/rejected": -450.6896057128906, "loss": 0.4454, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -2.0109899044036865, "rewards/margins": 1.2036950588226318, "rewards/rejected": -3.2146849632263184, "step": 1340 }, { "epoch": 0.41, "learning_rate": 3.0374261005275606e-06, "logits/chosen": -0.32744866609573364, "logits/rejected": -0.32873040437698364, "logps/chosen": -438.97955322265625, "logps/rejected": -452.37255859375, "loss": 0.4277, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.850262999534607, "rewards/margins": 1.45210862159729, "rewards/rejected": -3.3023715019226074, "step": 1350 }, { "epoch": 0.41, "learning_rate": 3.0109455662659126e-06, "logits/chosen": -0.33421364426612854, "logits/rejected": -0.33508172631263733, "logps/chosen": -438.8184509277344, "logps/rejected": -447.74267578125, "loss": 0.469, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.3082900047302246, "rewards/margins": 1.0502725839614868, "rewards/rejected": -3.358562469482422, "step": 1360 }, { "epoch": 0.42, "learning_rate": 2.984405070276646e-06, "logits/chosen": -0.3377315402030945, "logits/rejected": -0.3380245268344879, "logps/chosen": -440.62689208984375, "logps/rejected": -448.7757873535156, "loss": 0.4497, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -2.239384174346924, "rewards/margins": 1.1150033473968506, "rewards/rejected": -3.3543879985809326, "step": 1370 }, { "epoch": 0.42, "learning_rate": 2.9578077272046407e-06, "logits/chosen": -0.3324066698551178, "logits/rejected": -0.3327699303627014, "logps/chosen": -445.11651611328125, "logps/rejected": -452.57135009765625, "loss": 0.4627, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -2.3683180809020996, "rewards/margins": 1.2064650058746338, "rewards/rejected": -3.5747828483581543, "step": 1380 }, { "epoch": 0.42, "learning_rate": 2.931156658366032e-06, "logits/chosen": -0.33288371562957764, "logits/rejected": -0.33407607674598694, "logps/chosen": -438.28363037109375, "logps/rejected": -449.0726623535156, "loss": 0.4609, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.4992074966430664, "rewards/margins": 1.1167179346084595, "rewards/rejected": -3.6159253120422363, "step": 1390 }, { "epoch": 0.43, "learning_rate": 2.9044549913819125e-06, "logits/chosen": -0.33773019909858704, "logits/rejected": -0.3393145203590393, "logps/chosen": -441.80511474609375, "logps/rejected": -450.90997314453125, "loss": 0.421, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -2.251107692718506, "rewards/margins": 1.231013536453247, "rewards/rejected": -3.482121706008911, "step": 1400 }, { "epoch": 0.43, "eval_logits/chosen": -0.401915043592453, "eval_logits/rejected": -0.4025632441043854, "eval_logps/chosen": -436.2534484863281, "eval_logps/rejected": -445.6542053222656, "eval_loss": 0.4834233820438385, "eval_rewards/accuracies": 0.7580000162124634, "eval_rewards/chosen": -2.336226463317871, "eval_rewards/margins": 1.131289005279541, "eval_rewards/rejected": -3.4675159454345703, "eval_runtime": 373.3095, "eval_samples_per_second": 1.339, "eval_steps_per_second": 1.339, "step": 1400 }, { "epoch": 0.43, "learning_rate": 2.877705859811292e-06, "logits/chosen": -0.32963141798973083, "logits/rejected": -0.32958561182022095, "logps/chosen": -441.468017578125, "logps/rejected": -452.41632080078125, "loss": 0.4867, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -2.3174846172332764, "rewards/margins": 1.2222645282745361, "rewards/rejected": -3.5397496223449707, "step": 1410 }, { "epoch": 0.43, "learning_rate": 2.850912402783361e-06, "logits/chosen": -0.33581605553627014, "logits/rejected": -0.3373740315437317, "logps/chosen": -443.38507080078125, "logps/rejected": -455.705810546875, "loss": 0.4821, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -2.5340211391448975, "rewards/margins": 1.059066653251648, "rewards/rejected": -3.593087673187256, "step": 1420 }, { "epoch": 0.43, "learning_rate": 2.8240777646290973e-06, "logits/chosen": -0.3432762026786804, "logits/rejected": -0.3442252576351166, "logps/chosen": -455.3641662597656, "logps/rejected": -465.02032470703125, "loss": 0.4363, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -2.3768112659454346, "rewards/margins": 1.3034956455230713, "rewards/rejected": -3.680307388305664, "step": 1430 }, { "epoch": 0.44, "learning_rate": 2.7972050945122666e-06, "logits/chosen": -0.3318456709384918, "logits/rejected": -0.33274808526039124, "logps/chosen": -442.74029541015625, "logps/rejected": -453.32745361328125, "loss": 0.4564, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -2.556647300720215, "rewards/margins": 1.2236577272415161, "rewards/rejected": -3.7803051471710205, "step": 1440 }, { "epoch": 0.44, "learning_rate": 2.7702975460598545e-06, "logits/chosen": -0.33731141686439514, "logits/rejected": -0.33812469244003296, "logps/chosen": -445.42596435546875, "logps/rejected": -457.1685485839844, "loss": 0.4487, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -2.542117118835449, "rewards/margins": 1.1616876125335693, "rewards/rejected": -3.7038047313690186, "step": 1450 }, { "epoch": 0.44, "learning_rate": 2.7433582769919752e-06, "logits/chosen": -0.3384588360786438, "logits/rejected": -0.33992061018943787, "logps/chosen": -448.994873046875, "logps/rejected": -456.4767150878906, "loss": 0.5548, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -2.7594006061553955, "rewards/margins": 0.9322620630264282, "rewards/rejected": -3.691662549972534, "step": 1460 }, { "epoch": 0.45, "learning_rate": 2.716390448751294e-06, "logits/chosen": -0.34274882078170776, "logits/rejected": -0.34329262375831604, "logps/chosen": -450.77972412109375, "logps/rejected": -461.76239013671875, "loss": 0.4976, "rewards/accuracies": 0.71875, "rewards/chosen": -2.5453426837921143, "rewards/margins": 1.080673336982727, "rewards/rejected": -3.6260154247283936, "step": 1470 }, { "epoch": 0.45, "learning_rate": 2.6893972261320265e-06, "logits/chosen": -0.3363896608352661, "logits/rejected": -0.33778852224349976, "logps/chosen": -442.7216796875, "logps/rejected": -453.9684143066406, "loss": 0.4628, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.584522008895874, "rewards/margins": 1.2380025386810303, "rewards/rejected": -3.8225245475769043, "step": 1480 }, { "epoch": 0.45, "learning_rate": 2.6623817769085268e-06, "logits/chosen": -0.3299495577812195, "logits/rejected": -0.3310778737068176, "logps/chosen": -438.0104064941406, "logps/rejected": -450.68572998046875, "loss": 0.4308, "rewards/accuracies": 0.75, "rewards/chosen": -2.4091532230377197, "rewards/margins": 1.258310079574585, "rewards/rejected": -3.6674628257751465, "step": 1490 }, { "epoch": 0.46, "learning_rate": 2.6353472714635443e-06, "logits/chosen": -0.3383990526199341, "logits/rejected": -0.33991554379463196, "logps/chosen": -453.71038818359375, "logps/rejected": -466.170166015625, "loss": 0.4603, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.7626547813415527, "rewards/margins": 1.127687692642212, "rewards/rejected": -3.8903422355651855, "step": 1500 }, { "epoch": 0.46, "learning_rate": 2.6082968824161558e-06, "logits/chosen": -0.3404627740383148, "logits/rejected": -0.3412095606327057, "logps/chosen": -446.44281005859375, "logps/rejected": -454.9615783691406, "loss": 0.4887, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -2.626864194869995, "rewards/margins": 1.2109944820404053, "rewards/rejected": -3.8378589153289795, "step": 1510 }, { "epoch": 0.46, "learning_rate": 2.5812337842494517e-06, "logits/chosen": -0.3334888815879822, "logits/rejected": -0.334361732006073, "logps/chosen": -437.97979736328125, "logps/rejected": -449.66864013671875, "loss": 0.4395, "rewards/accuracies": 0.78125, "rewards/chosen": -2.6739344596862793, "rewards/margins": 1.2408138513565063, "rewards/rejected": -3.914747953414917, "step": 1520 }, { "epoch": 0.46, "learning_rate": 2.554161152937994e-06, "logits/chosen": -0.34664058685302734, "logits/rejected": -0.34752577543258667, "logps/chosen": -452.38983154296875, "logps/rejected": -458.98046875, "loss": 0.46, "rewards/accuracies": 0.78125, "rewards/chosen": -2.5840182304382324, "rewards/margins": 1.3410053253173828, "rewards/rejected": -3.9250235557556152, "step": 1530 }, { "epoch": 0.47, "learning_rate": 2.5270821655750997e-06, "logits/chosen": -0.3402210772037506, "logits/rejected": -0.3408128619194031, "logps/chosen": -452.06658935546875, "logps/rejected": -465.1114807128906, "loss": 0.383, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -2.483328342437744, "rewards/margins": 1.4499397277832031, "rewards/rejected": -3.9332680702209473, "step": 1540 }, { "epoch": 0.47, "learning_rate": 2.5e-06, "logits/chosen": -0.33848652243614197, "logits/rejected": -0.3391149640083313, "logps/chosen": -447.24407958984375, "logps/rejected": -456.50933837890625, "loss": 0.4384, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -2.932450294494629, "rewards/margins": 1.1602147817611694, "rewards/rejected": -4.09266471862793, "step": 1550 }, { "epoch": 0.47, "learning_rate": 2.4729178344249007e-06, "logits/chosen": -0.34805721044540405, "logits/rejected": -0.34990328550338745, "logps/chosen": -457.77520751953125, "logps/rejected": -467.7879943847656, "loss": 0.4306, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -2.852219343185425, "rewards/margins": 1.3265211582183838, "rewards/rejected": -4.178740501403809, "step": 1560 }, { "epoch": 0.48, "learning_rate": 2.4458388470620066e-06, "logits/chosen": -0.34960517287254333, "logits/rejected": -0.35107699036598206, "logps/chosen": -457.14569091796875, "logps/rejected": -467.239990234375, "loss": 0.4444, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -2.756500720977783, "rewards/margins": 1.3199396133422852, "rewards/rejected": -4.07643985748291, "step": 1570 }, { "epoch": 0.48, "learning_rate": 2.418766215750549e-06, "logits/chosen": -0.3384454548358917, "logits/rejected": -0.3394390642642975, "logps/chosen": -455.9664001464844, "logps/rejected": -467.4884338378906, "loss": 0.4289, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.917130708694458, "rewards/margins": 1.3165969848632812, "rewards/rejected": -4.23372745513916, "step": 1580 }, { "epoch": 0.48, "learning_rate": 2.3917031175838447e-06, "logits/chosen": -0.33930128812789917, "logits/rejected": -0.33957165479660034, "logps/chosen": -452.30548095703125, "logps/rejected": -467.23614501953125, "loss": 0.4339, "rewards/accuracies": 0.8125, "rewards/chosen": -2.8669447898864746, "rewards/margins": 1.3872116804122925, "rewards/rejected": -4.254156589508057, "step": 1590 }, { "epoch": 0.49, "learning_rate": 2.3646527285364565e-06, "logits/chosen": -0.33700358867645264, "logits/rejected": -0.33825331926345825, "logps/chosen": -451.98272705078125, "logps/rejected": -461.351318359375, "loss": 0.4821, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -3.0442519187927246, "rewards/margins": 1.0982847213745117, "rewards/rejected": -4.1425371170043945, "step": 1600 }, { "epoch": 0.49, "eval_logits/chosen": -0.41185611486434937, "eval_logits/rejected": -0.41246527433395386, "eval_logps/chosen": -441.00274658203125, "eval_logps/rejected": -451.21136474609375, "eval_loss": 0.48274433612823486, "eval_rewards/accuracies": 0.7620000243186951, "eval_rewards/chosen": -2.811156749725342, "eval_rewards/margins": 1.2120723724365234, "eval_rewards/rejected": -4.023228645324707, "eval_runtime": 376.6555, "eval_samples_per_second": 1.327, "eval_steps_per_second": 1.327, "step": 1600 }, { "epoch": 0.49, "learning_rate": 2.3376182230914728e-06, "logits/chosen": -0.35231637954711914, "logits/rejected": -0.3524485230445862, "logps/chosen": -450.71600341796875, "logps/rejected": -459.95623779296875, "loss": 0.4562, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -2.791792869567871, "rewards/margins": 1.3087048530578613, "rewards/rejected": -4.100497245788574, "step": 1610 }, { "epoch": 0.49, "learning_rate": 2.3106027738679743e-06, "logits/chosen": -0.3403882086277008, "logits/rejected": -0.34152495861053467, "logps/chosen": -453.09197998046875, "logps/rejected": -461.7265625, "loss": 0.5492, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.994368076324463, "rewards/margins": 0.9577304124832153, "rewards/rejected": -3.9520981311798096, "step": 1620 }, { "epoch": 0.5, "learning_rate": 2.2836095512487063e-06, "logits/chosen": -0.34236225485801697, "logits/rejected": -0.3437976539134979, "logps/chosen": -448.03765869140625, "logps/rejected": -458.0298767089844, "loss": 0.4769, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -2.8009488582611084, "rewards/margins": 1.1950900554656982, "rewards/rejected": -3.9960389137268066, "step": 1630 }, { "epoch": 0.5, "learning_rate": 2.256641723008026e-06, "logits/chosen": -0.3453958332538605, "logits/rejected": -0.34628570079803467, "logps/chosen": -452.4602966308594, "logps/rejected": -464.2635192871094, "loss": 0.4904, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -2.8692307472229004, "rewards/margins": 1.1883941888809204, "rewards/rejected": -4.057624816894531, "step": 1640 }, { "epoch": 0.5, "learning_rate": 2.2297024539401463e-06, "logits/chosen": -0.3422110974788666, "logits/rejected": -0.34265169501304626, "logps/chosen": -459.0148010253906, "logps/rejected": -469.46038818359375, "loss": 0.4726, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -3.0340380668640137, "rewards/margins": 1.1149537563323975, "rewards/rejected": -4.148991584777832, "step": 1650 }, { "epoch": 0.5, "learning_rate": 2.2027949054877342e-06, "logits/chosen": -0.34315139055252075, "logits/rejected": -0.3437284529209137, "logps/chosen": -448.80657958984375, "logps/rejected": -458.0669860839844, "loss": 0.5145, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -2.8165078163146973, "rewards/margins": 1.120755672454834, "rewards/rejected": -3.9372634887695312, "step": 1660 }, { "epoch": 0.51, "learning_rate": 2.175922235370904e-06, "logits/chosen": -0.34890785813331604, "logits/rejected": -0.34955543279647827, "logps/chosen": -448.3866271972656, "logps/rejected": -457.5038146972656, "loss": 0.4845, "rewards/accuracies": 0.75, "rewards/chosen": -2.5519251823425293, "rewards/margins": 1.266904592514038, "rewards/rejected": -3.8188300132751465, "step": 1670 }, { "epoch": 0.51, "learning_rate": 2.1490875972166394e-06, "logits/chosen": -0.3498338460922241, "logits/rejected": -0.35048046708106995, "logps/chosen": -449.01849365234375, "logps/rejected": -459.8980407714844, "loss": 0.3836, "rewards/accuracies": 0.856249988079071, "rewards/chosen": -2.5593769550323486, "rewards/margins": 1.4853286743164062, "rewards/rejected": -4.044705390930176, "step": 1680 }, { "epoch": 0.51, "learning_rate": 2.1222941401887087e-06, "logits/chosen": -0.3391914367675781, "logits/rejected": -0.3401142954826355, "logps/chosen": -437.19488525390625, "logps/rejected": -449.09820556640625, "loss": 0.4638, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -2.642850875854492, "rewards/margins": 1.1874374151229858, "rewards/rejected": -3.8302879333496094, "step": 1690 }, { "epoch": 0.52, "learning_rate": 2.0955450086180883e-06, "logits/chosen": -0.3401223123073578, "logits/rejected": -0.3409723937511444, "logps/chosen": -453.819580078125, "logps/rejected": -463.77117919921875, "loss": 0.4747, "rewards/accuracies": 0.75, "rewards/chosen": -2.682774066925049, "rewards/margins": 1.2849785089492798, "rewards/rejected": -3.9677529335021973, "step": 1700 }, { "epoch": 0.52, "learning_rate": 2.0688433416339694e-06, "logits/chosen": -0.3425321877002716, "logits/rejected": -0.3435406982898712, "logps/chosen": -441.6337890625, "logps/rejected": -454.7735290527344, "loss": 0.4359, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -2.900296688079834, "rewards/margins": 1.1836225986480713, "rewards/rejected": -4.083919525146484, "step": 1710 }, { "epoch": 0.52, "learning_rate": 2.0421922727953597e-06, "logits/chosen": -0.3457149863243103, "logits/rejected": -0.3468255400657654, "logps/chosen": -449.11700439453125, "logps/rejected": -461.40020751953125, "loss": 0.4626, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -2.759221315383911, "rewards/margins": 1.2033522129058838, "rewards/rejected": -3.962573528289795, "step": 1720 }, { "epoch": 0.53, "learning_rate": 2.0155949297233542e-06, "logits/chosen": -0.3487555980682373, "logits/rejected": -0.34981435537338257, "logps/chosen": -461.87481689453125, "logps/rejected": -473.541015625, "loss": 0.4555, "rewards/accuracies": 0.75, "rewards/chosen": -2.762120008468628, "rewards/margins": 1.2758208513259888, "rewards/rejected": -4.037940979003906, "step": 1730 }, { "epoch": 0.53, "learning_rate": 1.9890544337340882e-06, "logits/chosen": -0.3474620282649994, "logits/rejected": -0.34911760687828064, "logps/chosen": -446.1351623535156, "logps/rejected": -461.01678466796875, "loss": 0.4426, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -2.9118289947509766, "rewards/margins": 1.271337866783142, "rewards/rejected": -4.183166980743408, "step": 1740 }, { "epoch": 0.53, "learning_rate": 1.96257389947244e-06, "logits/chosen": -0.34583669900894165, "logits/rejected": -0.3470597565174103, "logps/chosen": -445.00054931640625, "logps/rejected": -457.888671875, "loss": 0.4487, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.823984146118164, "rewards/margins": 1.3737802505493164, "rewards/rejected": -4.1977643966674805, "step": 1750 }, { "epoch": 0.53, "learning_rate": 1.936156434546515e-06, "logits/chosen": -0.3472025990486145, "logits/rejected": -0.3478149473667145, "logps/chosen": -450.0955505371094, "logps/rejected": -459.27166748046875, "loss": 0.5015, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.8728580474853516, "rewards/margins": 1.2571897506713867, "rewards/rejected": -4.130047798156738, "step": 1760 }, { "epoch": 0.54, "learning_rate": 1.90980513916295e-06, "logits/chosen": -0.3443449139595032, "logits/rejected": -0.3453408479690552, "logps/chosen": -450.039306640625, "logps/rejected": -456.46392822265625, "loss": 0.4463, "rewards/accuracies": 0.78125, "rewards/chosen": -2.8687210083007812, "rewards/margins": 1.327781081199646, "rewards/rejected": -4.196502208709717, "step": 1770 }, { "epoch": 0.54, "learning_rate": 1.8835231057630955e-06, "logits/chosen": -0.34365350008010864, "logits/rejected": -0.34461337327957153, "logps/chosen": -454.1045837402344, "logps/rejected": -468.08251953125, "loss": 0.3981, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -2.681962490081787, "rewards/margins": 1.4462287425994873, "rewards/rejected": -4.128190517425537, "step": 1780 }, { "epoch": 0.54, "learning_rate": 1.8573134186600978e-06, "logits/chosen": -0.3493928909301758, "logits/rejected": -0.35027194023132324, "logps/chosen": -447.32666015625, "logps/rejected": -458.9419860839844, "loss": 0.4397, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.6235809326171875, "rewards/margins": 1.4347044229507446, "rewards/rejected": -4.058285236358643, "step": 1790 }, { "epoch": 0.55, "learning_rate": 1.8311791536769485e-06, "logits/chosen": -0.346055805683136, "logits/rejected": -0.3475271463394165, "logps/chosen": -442.3778381347656, "logps/rejected": -458.1031188964844, "loss": 0.3935, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -2.6405744552612305, "rewards/margins": 1.582415223121643, "rewards/rejected": -4.222989559173584, "step": 1800 }, { "epoch": 0.55, "eval_logits/chosen": -0.41737592220306396, "eval_logits/rejected": -0.4179980754852295, "eval_logps/chosen": -440.4134826660156, "eval_logps/rejected": -450.7027893066406, "eval_loss": 0.47837841510772705, "eval_rewards/accuracies": 0.7599999904632568, "eval_rewards/chosen": -2.7522289752960205, "eval_rewards/margins": 1.220139503479004, "eval_rewards/rejected": -3.9723684787750244, "eval_runtime": 351.6535, "eval_samples_per_second": 1.422, "eval_steps_per_second": 1.422, "step": 1800 }, { "epoch": 0.55, "learning_rate": 1.805123377785515e-06, "logits/chosen": -0.3527616858482361, "logits/rejected": -0.3528694212436676, "logps/chosen": -444.4476623535156, "logps/rejected": -453.0213317871094, "loss": 0.4432, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -2.709862232208252, "rewards/margins": 1.3173949718475342, "rewards/rejected": -4.027257442474365, "step": 1810 }, { "epoch": 0.55, "learning_rate": 1.7791491487466234e-06, "logits/chosen": -0.3477206528186798, "logits/rejected": -0.34793621301651, "logps/chosen": -444.4949645996094, "logps/rejected": -456.147705078125, "loss": 0.4933, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -2.9446983337402344, "rewards/margins": 1.1086232662200928, "rewards/rejected": -4.053321361541748, "step": 1820 }, { "epoch": 0.56, "learning_rate": 1.7532595147512167e-06, "logits/chosen": -0.34836429357528687, "logits/rejected": -0.34931057691574097, "logps/chosen": -448.5811462402344, "logps/rejected": -460.9894104003906, "loss": 0.4243, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -2.7561140060424805, "rewards/margins": 1.3816736936569214, "rewards/rejected": -4.137787818908691, "step": 1830 }, { "epoch": 0.56, "learning_rate": 1.7274575140626318e-06, "logits/chosen": -0.359462171792984, "logits/rejected": -0.36063042283058167, "logps/chosen": -448.9486389160156, "logps/rejected": -458.26776123046875, "loss": 0.4759, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -2.6954994201660156, "rewards/margins": 1.2426683902740479, "rewards/rejected": -3.9381680488586426, "step": 1840 }, { "epoch": 0.56, "learning_rate": 1.7017461746600506e-06, "logits/chosen": -0.3540958762168884, "logits/rejected": -0.3554149866104126, "logps/chosen": -442.2723083496094, "logps/rejected": -452.81951904296875, "loss": 0.479, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -2.743239402770996, "rewards/margins": 1.2077919244766235, "rewards/rejected": -3.95103120803833, "step": 1850 }, { "epoch": 0.56, "learning_rate": 1.6761285138831493e-06, "logits/chosen": -0.3558579981327057, "logits/rejected": -0.35607069730758667, "logps/chosen": -448.01458740234375, "logps/rejected": -458.3499450683594, "loss": 0.4367, "rewards/accuracies": 0.78125, "rewards/chosen": -2.706444025039673, "rewards/margins": 1.3273353576660156, "rewards/rejected": -4.033779144287109, "step": 1860 }, { "epoch": 0.57, "learning_rate": 1.6506075380780043e-06, "logits/chosen": -0.343932569026947, "logits/rejected": -0.3449569046497345, "logps/chosen": -449.41534423828125, "logps/rejected": -461.0784606933594, "loss": 0.4612, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -2.849799633026123, "rewards/margins": 1.247933030128479, "rewards/rejected": -4.0977325439453125, "step": 1870 }, { "epoch": 0.57, "learning_rate": 1.625186242244279e-06, "logits/chosen": -0.351362407207489, "logits/rejected": -0.35285985469818115, "logps/chosen": -442.25335693359375, "logps/rejected": -452.58526611328125, "loss": 0.4487, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -2.7264816761016846, "rewards/margins": 1.3034883737564087, "rewards/rejected": -4.029970169067383, "step": 1880 }, { "epoch": 0.57, "learning_rate": 1.5998676096837534e-06, "logits/chosen": -0.35466188192367554, "logits/rejected": -0.35623863339424133, "logps/chosen": -455.30859375, "logps/rejected": -466.81817626953125, "loss": 0.4525, "rewards/accuracies": 0.78125, "rewards/chosen": -2.76641845703125, "rewards/margins": 1.3434927463531494, "rewards/rejected": -4.1099114418029785, "step": 1890 }, { "epoch": 0.58, "learning_rate": 1.574654611650214e-06, "logits/chosen": -0.353823721408844, "logits/rejected": -0.3546674847602844, "logps/chosen": -448.30615234375, "logps/rejected": -462.4393005371094, "loss": 0.4049, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -2.676255226135254, "rewards/margins": 1.421419382095337, "rewards/rejected": -4.097674369812012, "step": 1900 }, { "epoch": 0.58, "learning_rate": 1.54955020700077e-06, "logits/chosen": -0.35255804657936096, "logits/rejected": -0.35378915071487427, "logps/chosen": -442.2880859375, "logps/rejected": -454.832763671875, "loss": 0.4771, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -2.8402438163757324, "rewards/margins": 1.2248531579971313, "rewards/rejected": -4.065096855163574, "step": 1910 }, { "epoch": 0.58, "learning_rate": 1.5245573418486136e-06, "logits/chosen": -0.35058295726776123, "logits/rejected": -0.3520324230194092, "logps/chosen": -451.47265625, "logps/rejected": -462.79791259765625, "loss": 0.4615, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -2.648658037185669, "rewards/margins": 1.3944532871246338, "rewards/rejected": -4.043111324310303, "step": 1920 }, { "epoch": 0.59, "learning_rate": 1.4996789492172836e-06, "logits/chosen": -0.35444819927215576, "logits/rejected": -0.35484084486961365, "logps/chosen": -447.3772888183594, "logps/rejected": -457.873291015625, "loss": 0.4392, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -2.671096086502075, "rewards/margins": 1.3712053298950195, "rewards/rejected": -4.042301654815674, "step": 1930 }, { "epoch": 0.59, "learning_rate": 1.4749179486964599e-06, "logits/chosen": -0.3643060028553009, "logits/rejected": -0.3653911054134369, "logps/chosen": -452.032470703125, "logps/rejected": -464.42193603515625, "loss": 0.4286, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -2.5244593620300293, "rewards/margins": 1.475843071937561, "rewards/rejected": -4.000302314758301, "step": 1940 }, { "epoch": 0.59, "learning_rate": 1.4502772460993387e-06, "logits/chosen": -0.35049787163734436, "logits/rejected": -0.3510446846485138, "logps/chosen": -448.87518310546875, "logps/rejected": -457.3994140625, "loss": 0.491, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -2.827812910079956, "rewards/margins": 1.2428455352783203, "rewards/rejected": -4.0706586837768555, "step": 1950 }, { "epoch": 0.6, "learning_rate": 1.4257597331216211e-06, "logits/chosen": -0.3531518578529358, "logits/rejected": -0.3538290858268738, "logps/chosen": -456.27691650390625, "logps/rejected": -466.98687744140625, "loss": 0.4657, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -3.0542407035827637, "rewards/margins": 1.164147138595581, "rewards/rejected": -4.218388080596924, "step": 1960 }, { "epoch": 0.6, "learning_rate": 1.4013682870021594e-06, "logits/chosen": -0.35849729180336, "logits/rejected": -0.3595832884311676, "logps/chosen": -447.4246520996094, "logps/rejected": -460.0209045410156, "loss": 0.3725, "rewards/accuracies": 0.831250011920929, "rewards/chosen": -2.7747585773468018, "rewards/margins": 1.452606439590454, "rewards/rejected": -4.227365016937256, "step": 1970 }, { "epoch": 0.6, "learning_rate": 1.3771057701853034e-06, "logits/chosen": -0.35135719180107117, "logits/rejected": -0.3521498739719391, "logps/chosen": -455.69549560546875, "logps/rejected": -467.37591552734375, "loss": 0.4899, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -2.884308338165283, "rewards/margins": 1.336925983428955, "rewards/rejected": -4.221234321594238, "step": 1980 }, { "epoch": 0.6, "learning_rate": 1.352975029984974e-06, "logits/chosen": -0.3514239192008972, "logits/rejected": -0.35260799527168274, "logps/chosen": -441.4317932128906, "logps/rejected": -454.58251953125, "loss": 0.4829, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -2.8820691108703613, "rewards/margins": 1.2067675590515137, "rewards/rejected": -4.088836669921875, "step": 1990 }, { "epoch": 0.61, "learning_rate": 1.328978898250525e-06, "logits/chosen": -0.3527238070964813, "logits/rejected": -0.3534066379070282, "logps/chosen": -452.95989990234375, "logps/rejected": -464.698486328125, "loss": 0.4476, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -2.7672972679138184, "rewards/margins": 1.3264329433441162, "rewards/rejected": -4.093730926513672, "step": 2000 }, { "epoch": 0.61, "eval_logits/chosen": -0.42317765951156616, "eval_logits/rejected": -0.42379918694496155, "eval_logps/chosen": -441.13116455078125, "eval_logps/rejected": -451.5594177246094, "eval_loss": 0.4796808958053589, "eval_rewards/accuracies": 0.7559999823570251, "eval_rewards/chosen": -2.823995590209961, "eval_rewards/margins": 1.234041452407837, "eval_rewards/rejected": -4.058037281036377, "eval_runtime": 351.7707, "eval_samples_per_second": 1.421, "eval_steps_per_second": 1.421, "step": 2000 }, { "epoch": 0.61, "learning_rate": 1.305120191034409e-06, "logits/chosen": -0.34321507811546326, "logits/rejected": -0.343815416097641, "logps/chosen": -443.4376525878906, "logps/rejected": -452.2301330566406, "loss": 0.4223, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -2.8989710807800293, "rewards/margins": 1.322284460067749, "rewards/rejected": -4.221255302429199, "step": 2010 }, { "epoch": 0.61, "learning_rate": 1.2814017082617025e-06, "logits/chosen": -0.3508697748184204, "logits/rejected": -0.35247209668159485, "logps/chosen": -444.38641357421875, "logps/rejected": -456.51153564453125, "loss": 0.4284, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -2.6340365409851074, "rewards/margins": 1.4381511211395264, "rewards/rejected": -4.072187900543213, "step": 2020 }, { "epoch": 0.62, "learning_rate": 1.2578262334015201e-06, "logits/chosen": -0.34914684295654297, "logits/rejected": -0.35076671838760376, "logps/chosen": -441.771728515625, "logps/rejected": -457.03057861328125, "loss": 0.4234, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -2.6355042457580566, "rewards/margins": 1.4987059831619263, "rewards/rejected": -4.134210109710693, "step": 2030 }, { "epoch": 0.62, "learning_rate": 1.234396533140365e-06, "logits/chosen": -0.3611491024494171, "logits/rejected": -0.3617832660675049, "logps/chosen": -454.31951904296875, "logps/rejected": -467.4290466308594, "loss": 0.435, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -2.7407212257385254, "rewards/margins": 1.4744737148284912, "rewards/rejected": -4.215195178985596, "step": 2040 }, { "epoch": 0.62, "learning_rate": 1.2111153570574454e-06, "logits/chosen": -0.35015982389450073, "logits/rejected": -0.35119912028312683, "logps/chosen": -446.706787109375, "logps/rejected": -461.330810546875, "loss": 0.4095, "rewards/accuracies": 0.78125, "rewards/chosen": -2.59765625, "rewards/margins": 1.5183773040771484, "rewards/rejected": -4.116034030914307, "step": 2050 }, { "epoch": 0.63, "learning_rate": 1.187985437301999e-06, "logits/chosen": -0.35530123114585876, "logits/rejected": -0.35578909516334534, "logps/chosen": -438.29974365234375, "logps/rejected": -452.89483642578125, "loss": 0.4416, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -2.729788303375244, "rewards/margins": 1.507673978805542, "rewards/rejected": -4.237462043762207, "step": 2060 }, { "epoch": 0.63, "learning_rate": 1.1650094882726599e-06, "logits/chosen": -0.36762434244155884, "logits/rejected": -0.36925989389419556, "logps/chosen": -455.1209411621094, "logps/rejected": -469.49920654296875, "loss": 0.4061, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -2.913037061691284, "rewards/margins": 1.4348491430282593, "rewards/rejected": -4.347886562347412, "step": 2070 }, { "epoch": 0.63, "learning_rate": 1.1421902062989178e-06, "logits/chosen": -0.3690846264362335, "logits/rejected": -0.3703765869140625, "logps/chosen": -451.37860107421875, "logps/rejected": -462.94293212890625, "loss": 0.4399, "rewards/accuracies": 0.78125, "rewards/chosen": -2.7418205738067627, "rewards/margins": 1.415470838546753, "rewards/rejected": -4.157290935516357, "step": 2080 }, { "epoch": 0.63, "learning_rate": 1.1195302693246879e-06, "logits/chosen": -0.34830474853515625, "logits/rejected": -0.34976112842559814, "logps/chosen": -447.49261474609375, "logps/rejected": -460.62994384765625, "loss": 0.4744, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -2.9276537895202637, "rewards/margins": 1.2483450174331665, "rewards/rejected": -4.175999164581299, "step": 2090 }, { "epoch": 0.64, "learning_rate": 1.0970323365940443e-06, "logits/chosen": -0.358784556388855, "logits/rejected": -0.35958269238471985, "logps/chosen": -449.94482421875, "logps/rejected": -461.4793395996094, "loss": 0.456, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -2.9514570236206055, "rewards/margins": 1.3440725803375244, "rewards/rejected": -4.295529365539551, "step": 2100 }, { "epoch": 0.64, "learning_rate": 1.0746990483391414e-06, "logits/chosen": -0.3496165871620178, "logits/rejected": -0.3507440388202667, "logps/chosen": -453.05755615234375, "logps/rejected": -464.02978515625, "loss": 0.429, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.848665237426758, "rewards/margins": 1.313674807548523, "rewards/rejected": -4.162339687347412, "step": 2110 }, { "epoch": 0.64, "learning_rate": 1.052533025470379e-06, "logits/chosen": -0.3463028073310852, "logits/rejected": -0.34690287709236145, "logps/chosen": -443.31134033203125, "logps/rejected": -455.8688049316406, "loss": 0.4229, "rewards/accuracies": 0.78125, "rewards/chosen": -2.8174338340759277, "rewards/margins": 1.4426196813583374, "rewards/rejected": -4.260054111480713, "step": 2120 }, { "epoch": 0.65, "learning_rate": 1.0305368692688175e-06, "logits/chosen": -0.3607487082481384, "logits/rejected": -0.36121565103530884, "logps/chosen": -459.01824951171875, "logps/rejected": -472.31884765625, "loss": 0.4488, "rewards/accuracies": 0.78125, "rewards/chosen": -2.872823715209961, "rewards/margins": 1.4165146350860596, "rewards/rejected": -4.2893385887146, "step": 2130 }, { "epoch": 0.65, "learning_rate": 1.0087131610809153e-06, "logits/chosen": -0.34994029998779297, "logits/rejected": -0.35072094202041626, "logps/chosen": -442.97589111328125, "logps/rejected": -453.756591796875, "loss": 0.555, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -3.1791205406188965, "rewards/margins": 1.0200657844543457, "rewards/rejected": -4.1991868019104, "step": 2140 }, { "epoch": 0.65, "learning_rate": 9.870644620155878e-07, "logits/chosen": -0.35871225595474243, "logits/rejected": -0.35941624641418457, "logps/chosen": -454.1318359375, "logps/rejected": -464.7295837402344, "loss": 0.4462, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -2.9613711833953857, "rewards/margins": 1.3269731998443604, "rewards/rejected": -4.288344383239746, "step": 2150 }, { "epoch": 0.66, "learning_rate": 9.655933126436565e-07, "logits/chosen": -0.3492319583892822, "logits/rejected": -0.3505721092224121, "logps/chosen": -444.28515625, "logps/rejected": -456.6302185058594, "loss": 0.4471, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -2.847562313079834, "rewards/margins": 1.3418700695037842, "rewards/rejected": -4.1894330978393555, "step": 2160 }, { "epoch": 0.66, "learning_rate": 9.443022326996984e-07, "logits/chosen": -0.354257732629776, "logits/rejected": -0.35464176535606384, "logps/chosen": -444.35089111328125, "logps/rejected": -453.5, "loss": 0.4514, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -2.9952127933502197, "rewards/margins": 1.3154346942901611, "rewards/rejected": -4.310647487640381, "step": 2170 }, { "epoch": 0.66, "learning_rate": 9.231937207863459e-07, "logits/chosen": -0.35797202587127686, "logits/rejected": -0.3591151833534241, "logps/chosen": -446.80487060546875, "logps/rejected": -460.2659606933594, "loss": 0.4346, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -2.878164768218994, "rewards/margins": 1.2440316677093506, "rewards/rejected": -4.122197151184082, "step": 2180 }, { "epoch": 0.67, "learning_rate": 9.022702540810607e-07, "logits/chosen": -0.3597440719604492, "logits/rejected": -0.3606324791908264, "logps/chosen": -450.4046325683594, "logps/rejected": -460.8863830566406, "loss": 0.4151, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -2.835644483566284, "rewards/margins": 1.3725159168243408, "rewards/rejected": -4.208160400390625, "step": 2190 }, { "epoch": 0.67, "learning_rate": 8.815342880454312e-07, "logits/chosen": -0.3541966378688812, "logits/rejected": -0.35494524240493774, "logps/chosen": -455.19146728515625, "logps/rejected": -470.131103515625, "loss": 0.4702, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -3.059727430343628, "rewards/margins": 1.261040449142456, "rewards/rejected": -4.320767879486084, "step": 2200 }, { "epoch": 0.67, "eval_logits/chosen": -0.4240322411060333, "eval_logits/rejected": -0.4246600270271301, "eval_logps/chosen": -441.68072509765625, "eval_logps/rejected": -452.262451171875, "eval_loss": 0.4791676104068756, "eval_rewards/accuracies": 0.7599999904632568, "eval_rewards/chosen": -2.878952980041504, "eval_rewards/margins": 1.2493829727172852, "eval_rewards/rejected": -4.128335475921631, "eval_runtime": 351.6609, "eval_samples_per_second": 1.422, "eval_steps_per_second": 1.422, "step": 2200 }, { "epoch": 0.67, "learning_rate": 8.609882561370101e-07, "logits/chosen": -0.3556322455406189, "logits/rejected": -0.35619792342185974, "logps/chosen": -446.03204345703125, "logps/rejected": -455.453125, "loss": 0.4476, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -2.709972381591797, "rewards/margins": 1.3839571475982666, "rewards/rejected": -4.093929290771484, "step": 2210 }, { "epoch": 0.67, "learning_rate": 8.406345695237394e-07, "logits/chosen": -0.3541732430458069, "logits/rejected": -0.35552269220352173, "logps/chosen": -444.166015625, "logps/rejected": -460.39617919921875, "loss": 0.3845, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -2.691920757293701, "rewards/margins": 1.684851884841919, "rewards/rejected": -4.376772880554199, "step": 2220 }, { "epoch": 0.68, "learning_rate": 8.20475616800985e-07, "logits/chosen": -0.35582807660102844, "logits/rejected": -0.35650044679641724, "logps/chosen": -449.7290954589844, "logps/rejected": -458.966064453125, "loss": 0.498, "rewards/accuracies": 0.71875, "rewards/chosen": -3.0267415046691895, "rewards/margins": 1.181206464767456, "rewards/rejected": -4.207947254180908, "step": 2230 }, { "epoch": 0.68, "learning_rate": 8.005137637112303e-07, "logits/chosen": -0.35746604204177856, "logits/rejected": -0.35817286372184753, "logps/chosen": -450.47308349609375, "logps/rejected": -463.4517517089844, "loss": 0.4951, "rewards/accuracies": 0.71875, "rewards/chosen": -2.940412759780884, "rewards/margins": 1.3010506629943848, "rewards/rejected": -4.2414631843566895, "step": 2240 }, { "epoch": 0.68, "learning_rate": 7.807513528664415e-07, "logits/chosen": -0.3562454581260681, "logits/rejected": -0.3569663166999817, "logps/chosen": -449.81768798828125, "logps/rejected": -462.03125, "loss": 0.4975, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -3.0241804122924805, "rewards/margins": 1.1527836322784424, "rewards/rejected": -4.176963806152344, "step": 2250 }, { "epoch": 0.69, "learning_rate": 7.611907034731538e-07, "logits/chosen": -0.35374173521995544, "logits/rejected": -0.3544319272041321, "logps/chosen": -452.8089904785156, "logps/rejected": -466.8789978027344, "loss": 0.4872, "rewards/accuracies": 0.71875, "rewards/chosen": -3.0933475494384766, "rewards/margins": 1.278378963470459, "rewards/rejected": -4.371726989746094, "step": 2260 }, { "epoch": 0.69, "learning_rate": 7.418341110603e-07, "logits/chosen": -0.3625703454017639, "logits/rejected": -0.363391637802124, "logps/chosen": -461.72442626953125, "logps/rejected": -472.3929138183594, "loss": 0.4361, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -2.796329975128174, "rewards/margins": 1.4400124549865723, "rewards/rejected": -4.236342430114746, "step": 2270 }, { "epoch": 0.69, "learning_rate": 7.226838472098239e-07, "logits/chosen": -0.35118603706359863, "logits/rejected": -0.35229939222335815, "logps/chosen": -450.23895263671875, "logps/rejected": -462.094482421875, "loss": 0.4608, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -2.8915536403656006, "rewards/margins": 1.3143730163574219, "rewards/rejected": -4.205926418304443, "step": 2280 }, { "epoch": 0.7, "learning_rate": 7.037421592900942e-07, "logits/chosen": -0.3532702326774597, "logits/rejected": -0.3544442057609558, "logps/chosen": -444.9677734375, "logps/rejected": -458.2537536621094, "loss": 0.4259, "rewards/accuracies": 0.8125, "rewards/chosen": -2.8905608654022217, "rewards/margins": 1.462416410446167, "rewards/rejected": -4.352977275848389, "step": 2290 }, { "epoch": 0.7, "learning_rate": 6.850112701921735e-07, "logits/chosen": -0.35222965478897095, "logits/rejected": -0.3528757095336914, "logps/chosen": -441.19976806640625, "logps/rejected": -455.4878845214844, "loss": 0.4063, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -2.7645294666290283, "rewards/margins": 1.4231407642364502, "rewards/rejected": -4.1876702308654785, "step": 2300 }, { "epoch": 0.7, "learning_rate": 6.664933780689445e-07, "logits/chosen": -0.3582982122898102, "logits/rejected": -0.3593185842037201, "logps/chosen": -450.44549560546875, "logps/rejected": -464.33905029296875, "loss": 0.4102, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -2.714564800262451, "rewards/margins": 1.4921871423721313, "rewards/rejected": -4.206751823425293, "step": 2310 }, { "epoch": 0.7, "learning_rate": 6.481906560771525e-07, "logits/chosen": -0.357990562915802, "logits/rejected": -0.3587570786476135, "logps/chosen": -441.71746826171875, "logps/rejected": -452.60870361328125, "loss": 0.4988, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.9209952354431152, "rewards/margins": 1.2236100435256958, "rewards/rejected": -4.1446051597595215, "step": 2320 }, { "epoch": 0.71, "learning_rate": 6.301052521223736e-07, "logits/chosen": -0.3549385070800781, "logits/rejected": -0.3562348484992981, "logps/chosen": -450.3941955566406, "logps/rejected": -462.034912109375, "loss": 0.4629, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -3.1530508995056152, "rewards/margins": 1.2050797939300537, "rewards/rejected": -4.358130931854248, "step": 2330 }, { "epoch": 0.71, "learning_rate": 6.122392886069486e-07, "logits/chosen": -0.3575456738471985, "logits/rejected": -0.3584723174571991, "logps/chosen": -456.48797607421875, "logps/rejected": -469.8323669433594, "loss": 0.403, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -3.066274881362915, "rewards/margins": 1.3775126934051514, "rewards/rejected": -4.443788051605225, "step": 2340 }, { "epoch": 0.71, "learning_rate": 5.945948621809092e-07, "logits/chosen": -0.34499675035476685, "logits/rejected": -0.34601226449012756, "logps/chosen": -444.13818359375, "logps/rejected": -458.27978515625, "loss": 0.3784, "rewards/accuracies": 0.8125, "rewards/chosen": -2.885612726211548, "rewards/margins": 1.6283729076385498, "rewards/rejected": -4.513985633850098, "step": 2350 }, { "epoch": 0.72, "learning_rate": 5.771740434959278e-07, "logits/chosen": -0.36106568574905396, "logits/rejected": -0.3616113066673279, "logps/chosen": -451.7525329589844, "logps/rejected": -462.39361572265625, "loss": 0.4455, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -3.004162549972534, "rewards/margins": 1.2321292161941528, "rewards/rejected": -4.236291408538818, "step": 2360 }, { "epoch": 0.72, "learning_rate": 5.599788769623174e-07, "logits/chosen": -0.3459396958351135, "logits/rejected": -0.3463771939277649, "logps/chosen": -451.74462890625, "logps/rejected": -460.52532958984375, "loss": 0.442, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -3.0312728881835938, "rewards/margins": 1.2485148906707764, "rewards/rejected": -4.279788017272949, "step": 2370 }, { "epoch": 0.72, "learning_rate": 5.430113805091111e-07, "logits/chosen": -0.34979885816574097, "logits/rejected": -0.3506646156311035, "logps/chosen": -452.90667724609375, "logps/rejected": -459.8733825683594, "loss": 0.4529, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -3.2027690410614014, "rewards/margins": 1.2168538570404053, "rewards/rejected": -4.419622898101807, "step": 2380 }, { "epoch": 0.73, "learning_rate": 5.262735453472459e-07, "logits/chosen": -0.3504520058631897, "logits/rejected": -0.3512795567512512, "logps/chosen": -448.52685546875, "logps/rejected": -459.87359619140625, "loss": 0.3957, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.8900694847106934, "rewards/margins": 1.5135910511016846, "rewards/rejected": -4.403660774230957, "step": 2390 }, { "epoch": 0.73, "learning_rate": 5.097673357358906e-07, "logits/chosen": -0.36047258973121643, "logits/rejected": -0.36156997084617615, "logps/chosen": -451.36767578125, "logps/rejected": -462.75225830078125, "loss": 0.4152, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -2.962690830230713, "rewards/margins": 1.4245904684066772, "rewards/rejected": -4.3872809410095215, "step": 2400 }, { "epoch": 0.73, "eval_logits/chosen": -0.4258207082748413, "eval_logits/rejected": -0.4264317452907562, "eval_logps/chosen": -443.40802001953125, "eval_logps/rejected": -454.0955810546875, "eval_loss": 0.4785875976085663, "eval_rewards/accuracies": 0.7599999904632568, "eval_rewards/chosen": -3.0516843795776367, "eval_rewards/margins": 1.2599674463272095, "eval_rewards/rejected": -4.311651706695557, "eval_runtime": 351.6671, "eval_samples_per_second": 1.422, "eval_steps_per_second": 1.422, "step": 2400 }, { "epoch": 0.73, "learning_rate": 4.934946887519279e-07, "logits/chosen": -0.36616581678390503, "logits/rejected": -0.36695989966392517, "logps/chosen": -457.62567138671875, "logps/rejected": -470.66485595703125, "loss": 0.4125, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -2.906996011734009, "rewards/margins": 1.469089150428772, "rewards/rejected": -4.37608528137207, "step": 2410 }, { "epoch": 0.74, "learning_rate": 4.774575140626317e-07, "logits/chosen": -0.35779887437820435, "logits/rejected": -0.3586946129798889, "logps/chosen": -451.4176330566406, "logps/rejected": -464.3094787597656, "loss": 0.4281, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -2.945284605026245, "rewards/margins": 1.3972845077514648, "rewards/rejected": -4.342568874359131, "step": 2420 }, { "epoch": 0.74, "learning_rate": 4.6165769370155516e-07, "logits/chosen": -0.36210596561431885, "logits/rejected": -0.36279112100601196, "logps/chosen": -451.7628479003906, "logps/rejected": -464.893798828125, "loss": 0.4782, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -3.2397968769073486, "rewards/margins": 1.2520904541015625, "rewards/rejected": -4.49188756942749, "step": 2430 }, { "epoch": 0.74, "learning_rate": 4.4609708184767177e-07, "logits/chosen": -0.3466174006462097, "logits/rejected": -0.3471986651420593, "logps/chosen": -448.8627014160156, "logps/rejected": -458.1527404785156, "loss": 0.4647, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.91853404045105, "rewards/margins": 1.3560994863510132, "rewards/rejected": -4.274633884429932, "step": 2440 }, { "epoch": 0.74, "learning_rate": 4.307775046077739e-07, "logits/chosen": -0.3524012863636017, "logits/rejected": -0.3537690043449402, "logps/chosen": -445.4092712402344, "logps/rejected": -460.19635009765625, "loss": 0.4618, "rewards/accuracies": 0.75, "rewards/chosen": -3.2233848571777344, "rewards/margins": 1.2467783689498901, "rewards/rejected": -4.470162391662598, "step": 2450 }, { "epoch": 0.75, "learning_rate": 4.1570075980217503e-07, "logits/chosen": -0.3559108376502991, "logits/rejected": -0.35668981075286865, "logps/chosen": -449.45330810546875, "logps/rejected": -457.7796325683594, "loss": 0.4718, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -3.394498348236084, "rewards/margins": 1.175183892250061, "rewards/rejected": -4.5696821212768555, "step": 2460 }, { "epoch": 0.75, "learning_rate": 4.008686167537243e-07, "logits/chosen": -0.36145132780075073, "logits/rejected": -0.362403005361557, "logps/chosen": -455.7185974121094, "logps/rejected": -467.92828369140625, "loss": 0.427, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -3.0464015007019043, "rewards/margins": 1.3699265718460083, "rewards/rejected": -4.416327953338623, "step": 2470 }, { "epoch": 0.75, "learning_rate": 3.862828160801707e-07, "logits/chosen": -0.3624842166900635, "logits/rejected": -0.36328238248825073, "logps/chosen": -455.2030334472656, "logps/rejected": -468.92608642578125, "loss": 0.468, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -3.084407329559326, "rewards/margins": 1.2791146039962769, "rewards/rejected": -4.363522529602051, "step": 2480 }, { "epoch": 0.76, "learning_rate": 3.7194506948989405e-07, "logits/chosen": -0.3563145697116852, "logits/rejected": -0.3578342795372009, "logps/chosen": -448.5079650878906, "logps/rejected": -462.24542236328125, "loss": 0.3916, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -2.866283655166626, "rewards/margins": 1.522825002670288, "rewards/rejected": -4.389109134674072, "step": 2490 }, { "epoch": 0.76, "learning_rate": 3.578570595810274e-07, "logits/chosen": -0.35796427726745605, "logits/rejected": -0.3588128089904785, "logps/chosen": -454.2952575683594, "logps/rejected": -462.9483337402344, "loss": 0.4564, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -3.1886661052703857, "rewards/margins": 1.3093502521514893, "rewards/rejected": -4.498016357421875, "step": 2500 }, { "epoch": 0.76, "learning_rate": 3.4402043964399527e-07, "logits/chosen": -0.35277941823005676, "logits/rejected": -0.35380321741104126, "logps/chosen": -441.46600341796875, "logps/rejected": -452.00537109375, "loss": 0.4007, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -3.036705493927002, "rewards/margins": 1.4018588066101074, "rewards/rejected": -4.438564777374268, "step": 2510 }, { "epoch": 0.77, "learning_rate": 3.304368334674965e-07, "logits/chosen": -0.3567604124546051, "logits/rejected": -0.35805758833885193, "logps/chosen": -449.0523376464844, "logps/rejected": -461.69842529296875, "loss": 0.4191, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -2.938197612762451, "rewards/margins": 1.5301718711853027, "rewards/rejected": -4.468369483947754, "step": 2520 }, { "epoch": 0.77, "learning_rate": 3.1710783514794256e-07, "logits/chosen": -0.35164931416511536, "logits/rejected": -0.3529738187789917, "logps/chosen": -449.84173583984375, "logps/rejected": -464.1533203125, "loss": 0.5458, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -3.2846596240997314, "rewards/margins": 1.069946527481079, "rewards/rejected": -4.354605674743652, "step": 2530 }, { "epoch": 0.77, "learning_rate": 3.040350089023844e-07, "logits/chosen": -0.3580131232738495, "logits/rejected": -0.35896363854408264, "logps/chosen": -460.78790283203125, "logps/rejected": -474.05987548828125, "loss": 0.4396, "rewards/accuracies": 0.78125, "rewards/chosen": -3.1543052196502686, "rewards/margins": 1.453919768333435, "rewards/rejected": -4.6082258224487305, "step": 2540 }, { "epoch": 0.77, "learning_rate": 2.9121988888494297e-07, "logits/chosen": -0.35557836294174194, "logits/rejected": -0.356197327375412, "logps/chosen": -454.5060119628906, "logps/rejected": -467.5367736816406, "loss": 0.3968, "rewards/accuracies": 0.8125, "rewards/chosen": -2.987016201019287, "rewards/margins": 1.4960193634033203, "rewards/rejected": -4.483035564422607, "step": 2550 }, { "epoch": 0.78, "learning_rate": 2.786639790067719e-07, "logits/chosen": -0.35686007142066956, "logits/rejected": -0.3575289249420166, "logps/chosen": -457.2361755371094, "logps/rejected": -470.49212646484375, "loss": 0.4509, "rewards/accuracies": 0.78125, "rewards/chosen": -3.249783754348755, "rewards/margins": 1.2371891736984253, "rewards/rejected": -4.486972808837891, "step": 2560 }, { "epoch": 0.78, "learning_rate": 2.6636875275956567e-07, "logits/chosen": -0.3554794192314148, "logits/rejected": -0.35618001222610474, "logps/chosen": -455.373291015625, "logps/rejected": -466.55389404296875, "loss": 0.5174, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -3.4816794395446777, "rewards/margins": 1.009413480758667, "rewards/rejected": -4.491092681884766, "step": 2570 }, { "epoch": 0.78, "learning_rate": 2.543356530426394e-07, "logits/chosen": -0.34936192631721497, "logits/rejected": -0.3497045040130615, "logps/chosen": -451.0462951660156, "logps/rejected": -464.62109375, "loss": 0.4859, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -3.2196598052978516, "rewards/margins": 1.3027180433273315, "rewards/rejected": -4.522377967834473, "step": 2580 }, { "epoch": 0.79, "learning_rate": 2.425660919935954e-07, "logits/chosen": -0.35678738355636597, "logits/rejected": -0.35775676369667053, "logps/chosen": -452.04925537109375, "logps/rejected": -463.97869873046875, "loss": 0.4253, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -2.8534128665924072, "rewards/margins": 1.3880208730697632, "rewards/rejected": -4.241434097290039, "step": 2590 }, { "epoch": 0.79, "learning_rate": 2.3106145082260777e-07, "logits/chosen": -0.35490182042121887, "logits/rejected": -0.35594433546066284, "logps/chosen": -456.057373046875, "logps/rejected": -470.40350341796875, "loss": 0.4502, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -3.0541415214538574, "rewards/margins": 1.4110925197601318, "rewards/rejected": -4.46523380279541, "step": 2600 }, { "epoch": 0.79, "eval_logits/chosen": -0.4264547824859619, "eval_logits/rejected": -0.4270709156990051, "eval_logps/chosen": -443.8349914550781, "eval_logps/rejected": -454.5430603027344, "eval_loss": 0.48084381222724915, "eval_rewards/accuracies": 0.7620000243186951, "eval_rewards/chosen": -3.0943799018859863, "eval_rewards/margins": 1.2620201110839844, "eval_rewards/rejected": -4.356400489807129, "eval_runtime": 351.5894, "eval_samples_per_second": 1.422, "eval_steps_per_second": 1.422, "step": 2600 }, { "epoch": 0.79, "learning_rate": 2.1982307965032563e-07, "logits/chosen": -0.3585938513278961, "logits/rejected": -0.3597787618637085, "logps/chosen": -453.99884033203125, "logps/rejected": -462.98272705078125, "loss": 0.5579, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -3.515160322189331, "rewards/margins": 0.9192056655883789, "rewards/rejected": -4.434365749359131, "step": 2610 }, { "epoch": 0.8, "learning_rate": 2.0885229734943501e-07, "logits/chosen": -0.35792115330696106, "logits/rejected": -0.35949331521987915, "logps/chosen": -441.6431579589844, "logps/rejected": -454.21160888671875, "loss": 0.4968, "rewards/accuracies": 0.75, "rewards/chosen": -3.2611217498779297, "rewards/margins": 1.2790337800979614, "rewards/rejected": -4.540155410766602, "step": 2620 }, { "epoch": 0.8, "learning_rate": 1.9815039138988135e-07, "logits/chosen": -0.3631977438926697, "logits/rejected": -0.3638666272163391, "logps/chosen": -448.5018005371094, "logps/rejected": -460.1982421875, "loss": 0.452, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -3.071455478668213, "rewards/margins": 1.4410284757614136, "rewards/rejected": -4.512484073638916, "step": 2630 }, { "epoch": 0.8, "learning_rate": 1.8771861768777794e-07, "logits/chosen": -0.3509594798088074, "logits/rejected": -0.35208243131637573, "logps/chosen": -450.60308837890625, "logps/rejected": -464.2266540527344, "loss": 0.4278, "rewards/accuracies": 0.78125, "rewards/chosen": -3.225553512573242, "rewards/margins": 1.3294174671173096, "rewards/rejected": -4.554970741271973, "step": 2640 }, { "epoch": 0.8, "learning_rate": 1.7755820045802146e-07, "logits/chosen": -0.35590630769729614, "logits/rejected": -0.35736554861068726, "logps/chosen": -455.400390625, "logps/rejected": -465.2867126464844, "loss": 0.4158, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -2.942516803741455, "rewards/margins": 1.4671036005020142, "rewards/rejected": -4.409620761871338, "step": 2650 }, { "epoch": 0.81, "learning_rate": 1.67670332070623e-07, "logits/chosen": -0.3521929383277893, "logits/rejected": -0.3526236116886139, "logps/chosen": -455.163330078125, "logps/rejected": -469.2591857910156, "loss": 0.4457, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -3.1400699615478516, "rewards/margins": 1.289953589439392, "rewards/rejected": -4.430023193359375, "step": 2660 }, { "epoch": 0.81, "learning_rate": 1.580561729107777e-07, "logits/chosen": -0.35622936487197876, "logits/rejected": -0.356993168592453, "logps/chosen": -455.1328125, "logps/rejected": -465.6949157714844, "loss": 0.4489, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -3.2420265674591064, "rewards/margins": 1.3224232196807861, "rewards/rejected": -4.564449310302734, "step": 2670 }, { "epoch": 0.81, "learning_rate": 1.487168512426901e-07, "logits/chosen": -0.36213189363479614, "logits/rejected": -0.3628009557723999, "logps/chosen": -453.6480407714844, "logps/rejected": -465.2872619628906, "loss": 0.4185, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -3.116283416748047, "rewards/margins": 1.3114349842071533, "rewards/rejected": -4.427718162536621, "step": 2680 }, { "epoch": 0.82, "learning_rate": 1.3965346307716676e-07, "logits/chosen": -0.3530941605567932, "logits/rejected": -0.35421401262283325, "logps/chosen": -451.10894775390625, "logps/rejected": -465.1979064941406, "loss": 0.376, "rewards/accuracies": 0.856249988079071, "rewards/chosen": -2.9081177711486816, "rewards/margins": 1.643689751625061, "rewards/rejected": -4.551807403564453, "step": 2690 }, { "epoch": 0.82, "learning_rate": 1.3086707204299415e-07, "logits/chosen": -0.36071377992630005, "logits/rejected": -0.3618861138820648, "logps/chosen": -448.95355224609375, "logps/rejected": -460.8838806152344, "loss": 0.4524, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -3.284519672393799, "rewards/margins": 1.33005690574646, "rewards/rejected": -4.6145758628845215, "step": 2700 }, { "epoch": 0.82, "learning_rate": 1.223587092621162e-07, "logits/chosen": -0.3580467998981476, "logits/rejected": -0.35923272371292114, "logps/chosen": -451.82769775390625, "logps/rejected": -463.8099670410156, "loss": 0.4238, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -3.3439979553222656, "rewards/margins": 1.2987263202667236, "rewards/rejected": -4.64272403717041, "step": 2710 }, { "epoch": 0.83, "learning_rate": 1.1412937322862971e-07, "logits/chosen": -0.3629991412162781, "logits/rejected": -0.3639989495277405, "logps/chosen": -448.5044860839844, "logps/rejected": -460.817138671875, "loss": 0.4102, "rewards/accuracies": 0.831250011920929, "rewards/chosen": -3.0257716178894043, "rewards/margins": 1.448880910873413, "rewards/rejected": -4.4746527671813965, "step": 2720 }, { "epoch": 0.83, "learning_rate": 1.0618002969160546e-07, "logits/chosen": -0.3608396053314209, "logits/rejected": -0.3618479371070862, "logps/chosen": -453.93499755859375, "logps/rejected": -466.81640625, "loss": 0.4187, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -3.1413166522979736, "rewards/margins": 1.3508026599884033, "rewards/rejected": -4.492118835449219, "step": 2730 }, { "epoch": 0.83, "learning_rate": 9.851161154175337e-08, "logits/chosen": -0.3562917113304138, "logits/rejected": -0.35710564255714417, "logps/chosen": -451.28076171875, "logps/rejected": -461.2808532714844, "loss": 0.5024, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -3.143902540206909, "rewards/margins": 1.1941773891448975, "rewards/rejected": -4.338079929351807, "step": 2740 }, { "epoch": 0.84, "learning_rate": 9.112501870194273e-08, "logits/chosen": -0.3589875102043152, "logits/rejected": -0.35990768671035767, "logps/chosen": -452.32000732421875, "logps/rejected": -461.66033935546875, "loss": 0.5337, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -3.512810468673706, "rewards/margins": 0.9673913717269897, "rewards/rejected": -4.4802021980285645, "step": 2750 }, { "epoch": 0.84, "learning_rate": 8.402111802159413e-08, "logits/chosen": -0.3585359454154968, "logits/rejected": -0.35975727438926697, "logps/chosen": -454.8050842285156, "logps/rejected": -465.12615966796875, "loss": 0.4486, "rewards/accuracies": 0.78125, "rewards/chosen": -3.2963638305664062, "rewards/margins": 1.2402369976043701, "rewards/rejected": -4.5366010665893555, "step": 2760 }, { "epoch": 0.84, "learning_rate": 7.720074317494913e-08, "logits/chosen": -0.36562293767929077, "logits/rejected": -0.3664829134941101, "logps/chosen": -457.26068115234375, "logps/rejected": -470.1167907714844, "loss": 0.4503, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -3.0565972328186035, "rewards/margins": 1.4593775272369385, "rewards/rejected": -4.515974521636963, "step": 2770 }, { "epoch": 0.84, "learning_rate": 7.06646945632361e-08, "logits/chosen": -0.3597029447555542, "logits/rejected": -0.3601227402687073, "logps/chosen": -461.0421447753906, "logps/rejected": -469.86175537109375, "loss": 0.512, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -3.246166944503784, "rewards/margins": 1.1497961282730103, "rewards/rejected": -4.395963191986084, "step": 2780 }, { "epoch": 0.85, "learning_rate": 6.441373922073946e-08, "logits/chosen": -0.359005331993103, "logits/rejected": -0.35974326729774475, "logps/chosen": -455.99908447265625, "logps/rejected": -466.95440673828125, "loss": 0.4367, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -3.224595546722412, "rewards/margins": 1.361697793006897, "rewards/rejected": -4.5862932205200195, "step": 2790 }, { "epoch": 0.85, "learning_rate": 5.844861072478336e-08, "logits/chosen": -0.3530232608318329, "logits/rejected": -0.3545222580432892, "logps/chosen": -443.6240234375, "logps/rejected": -458.322998046875, "loss": 0.4834, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -3.3614017963409424, "rewards/margins": 1.2071417570114136, "rewards/rejected": -4.568543434143066, "step": 2800 }, { "epoch": 0.85, "eval_logits/chosen": -0.4272295832633972, "eval_logits/rejected": -0.42783358693122864, "eval_logps/chosen": -444.2228088378906, "eval_logps/rejected": -454.95098876953125, "eval_loss": 0.48089736700057983, "eval_rewards/accuracies": 0.7599999904632568, "eval_rewards/chosen": -3.1331627368927, "eval_rewards/margins": 1.2640310525894165, "eval_rewards/rejected": -4.3971943855285645, "eval_runtime": 351.6656, "eval_samples_per_second": 1.422, "eval_steps_per_second": 1.422, "step": 2800 }, { "epoch": 0.85, "learning_rate": 5.2770009109645306e-08, "logits/chosen": -0.36214134097099304, "logits/rejected": -0.36288636922836304, "logps/chosen": -454.91839599609375, "logps/rejected": -466.05224609375, "loss": 0.4296, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -3.1463279724121094, "rewards/margins": 1.363966703414917, "rewards/rejected": -4.5102949142456055, "step": 2810 }, { "epoch": 0.86, "learning_rate": 4.7378600784402095e-08, "logits/chosen": -0.3552590310573578, "logits/rejected": -0.35652121901512146, "logps/chosen": -455.6435546875, "logps/rejected": -465.54693603515625, "loss": 0.4669, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -3.2327640056610107, "rewards/margins": 1.2310270071029663, "rewards/rejected": -4.463791370391846, "step": 2820 }, { "epoch": 0.86, "learning_rate": 4.22750184547252e-08, "logits/chosen": -0.3599388301372528, "logits/rejected": -0.3607821762561798, "logps/chosen": -456.6576232910156, "logps/rejected": -469.9142150878906, "loss": 0.4199, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -3.1602883338928223, "rewards/margins": 1.4565389156341553, "rewards/rejected": -4.616827487945557, "step": 2830 }, { "epoch": 0.86, "learning_rate": 3.745986104862903e-08, "logits/chosen": -0.35964518785476685, "logits/rejected": -0.360365092754364, "logps/chosen": -455.8336486816406, "logps/rejected": -467.90948486328125, "loss": 0.4152, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -2.874156951904297, "rewards/margins": 1.5480505228042603, "rewards/rejected": -4.422207832336426, "step": 2840 }, { "epoch": 0.87, "learning_rate": 3.293369364618465e-08, "logits/chosen": -0.3647812604904175, "logits/rejected": -0.3658196032047272, "logps/chosen": -449.73138427734375, "logps/rejected": -462.46942138671875, "loss": 0.4729, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -3.295436382293701, "rewards/margins": 1.252638816833496, "rewards/rejected": -4.5480756759643555, "step": 2850 }, { "epoch": 0.87, "learning_rate": 2.869704741320478e-08, "logits/chosen": -0.35672903060913086, "logits/rejected": -0.3576185703277588, "logps/chosen": -449.70294189453125, "logps/rejected": -459.93096923828125, "loss": 0.4951, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -3.523907423019409, "rewards/margins": 1.1444907188415527, "rewards/rejected": -4.668398380279541, "step": 2860 }, { "epoch": 0.87, "learning_rate": 2.4750419538908667e-08, "logits/chosen": -0.3534146547317505, "logits/rejected": -0.35466113686561584, "logps/chosen": -452.890869140625, "logps/rejected": -464.16510009765625, "loss": 0.4477, "rewards/accuracies": 0.78125, "rewards/chosen": -3.2099146842956543, "rewards/margins": 1.3813788890838623, "rewards/rejected": -4.591293811798096, "step": 2870 }, { "epoch": 0.87, "learning_rate": 2.1094273177576508e-08, "logits/chosen": -0.36183369159698486, "logits/rejected": -0.36180374026298523, "logps/chosen": -455.7822265625, "logps/rejected": -465.14801025390625, "loss": 0.4747, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -3.090353488922119, "rewards/margins": 1.3213683366775513, "rewards/rejected": -4.411721706390381, "step": 2880 }, { "epoch": 0.88, "learning_rate": 1.7729037394193792e-08, "logits/chosen": -0.3579171299934387, "logits/rejected": -0.35931870341300964, "logps/chosen": -450.9007263183594, "logps/rejected": -464.6581115722656, "loss": 0.4626, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -3.0921690464019775, "rewards/margins": 1.4891610145568848, "rewards/rejected": -4.581330299377441, "step": 2890 }, { "epoch": 0.88, "learning_rate": 1.4655107114101008e-08, "logits/chosen": -0.36245545744895935, "logits/rejected": -0.36358946561813354, "logps/chosen": -452.5899353027344, "logps/rejected": -467.2527770996094, "loss": 0.464, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -2.9442224502563477, "rewards/margins": 1.4334999322891235, "rewards/rejected": -4.377722263336182, "step": 2900 }, { "epoch": 0.88, "learning_rate": 1.1872843076645157e-08, "logits/chosen": -0.35802754759788513, "logits/rejected": -0.35865747928619385, "logps/chosen": -454.47662353515625, "logps/rejected": -465.0662536621094, "loss": 0.3877, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -2.96606707572937, "rewards/margins": 1.5276943445205688, "rewards/rejected": -4.4937615394592285, "step": 2910 }, { "epoch": 0.89, "learning_rate": 9.382571792846962e-09, "logits/chosen": -0.3509235084056854, "logits/rejected": -0.3516360819339752, "logps/chosen": -443.75469970703125, "logps/rejected": -453.1048278808594, "loss": 0.4472, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -3.1273512840270996, "rewards/margins": 1.3490197658538818, "rewards/rejected": -4.476370811462402, "step": 2920 }, { "epoch": 0.89, "learning_rate": 7.1845855070828975e-09, "logits/chosen": -0.3624979555606842, "logits/rejected": -0.36296314001083374, "logps/chosen": -450.0325622558594, "logps/rejected": -459.88916015625, "loss": 0.4578, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -3.340954303741455, "rewards/margins": 1.343732237815857, "rewards/rejected": -4.684686660766602, "step": 2930 }, { "epoch": 0.89, "learning_rate": 5.279142162789019e-09, "logits/chosen": -0.35505902767181396, "logits/rejected": -0.35619235038757324, "logps/chosen": -451.9505920410156, "logps/rejected": -465.6192321777344, "loss": 0.4539, "rewards/accuracies": 0.78125, "rewards/chosen": -3.4748854637145996, "rewards/margins": 1.2840955257415771, "rewards/rejected": -4.758981227874756, "step": 2940 }, { "epoch": 0.9, "learning_rate": 3.666465372190453e-09, "logits/chosen": -0.356467604637146, "logits/rejected": -0.357626736164093, "logps/chosen": -452.7481384277344, "logps/rejected": -465.7762145996094, "loss": 0.472, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -3.293835401535034, "rewards/margins": 1.261348009109497, "rewards/rejected": -4.555183410644531, "step": 2950 }, { "epoch": 0.9, "learning_rate": 2.34674439005822e-09, "logits/chosen": -0.3525004982948303, "logits/rejected": -0.35348066687583923, "logps/chosen": -450.6170959472656, "logps/rejected": -462.77069091796875, "loss": 0.3976, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -3.295048952102661, "rewards/margins": 1.4584187269210815, "rewards/rejected": -4.753467559814453, "step": 2960 }, { "epoch": 0.9, "learning_rate": 1.3201340915011685e-09, "logits/chosen": -0.35318654775619507, "logits/rejected": -0.35393238067626953, "logps/chosen": -453.31707763671875, "logps/rejected": -463.7138671875, "loss": 0.4291, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -3.284226655960083, "rewards/margins": 1.3613402843475342, "rewards/rejected": -4.645566463470459, "step": 2970 }, { "epoch": 0.91, "learning_rate": 5.86754953789681e-10, "logits/chosen": -0.35480597615242004, "logits/rejected": -0.35523343086242676, "logps/chosen": -449.10235595703125, "logps/rejected": -461.8785095214844, "loss": 0.4941, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -3.270163059234619, "rewards/margins": 1.221695065498352, "rewards/rejected": -4.491857528686523, "step": 2980 }, { "epoch": 0.91, "learning_rate": 1.4669304221726077e-10, "logits/chosen": -0.3551548421382904, "logits/rejected": -0.3556649386882782, "logps/chosen": -456.6444396972656, "logps/rejected": -467.2582092285156, "loss": 0.4289, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -3.225339412689209, "rewards/margins": 1.3719263076782227, "rewards/rejected": -4.597265243530273, "step": 2990 }, { "epoch": 0.91, "learning_rate": 0.0, "logits/chosen": -0.35613125562667847, "logits/rejected": -0.3575323522090912, "logps/chosen": -453.228759765625, "logps/rejected": -467.6603088378906, "loss": 0.416, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -3.2920143604278564, "rewards/margins": 1.416282296180725, "rewards/rejected": -4.708296775817871, "step": 3000 }, { "epoch": 0.91, "eval_logits/chosen": -0.42728757858276367, "eval_logits/rejected": -0.42789557576179504, "eval_logps/chosen": -444.3138427734375, "eval_logps/rejected": -455.0481262207031, "eval_loss": 0.47960197925567627, "eval_rewards/accuracies": 0.7599999904632568, "eval_rewards/chosen": -3.1422641277313232, "eval_rewards/margins": 1.264641523361206, "eval_rewards/rejected": -4.406905174255371, "eval_runtime": 351.5662, "eval_samples_per_second": 1.422, "eval_steps_per_second": 1.422, "step": 3000 } ], "logging_steps": 10, "max_steps": 3000, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }