| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9111617312072893, | |
| "eval_steps": 200, | |
| "global_step": 3000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 5.000000000000001e-07, | |
| "logits/chosen": -0.300163596868515, | |
| "logits/rejected": -0.3011459410190582, | |
| "logps/chosen": -418.81268310546875, | |
| "logps/rejected": -421.69482421875, | |
| "loss": 0.6923, | |
| "rewards/accuracies": 0.4124999940395355, | |
| "rewards/chosen": 0.008436297997832298, | |
| "rewards/margins": 0.001967963995411992, | |
| "rewards/rejected": 0.006468335632234812, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 1.0000000000000002e-06, | |
| "logits/chosen": -0.31174224615097046, | |
| "logits/rejected": -0.3135172724723816, | |
| "logps/chosen": -428.8531799316406, | |
| "logps/rejected": -427.205810546875, | |
| "loss": 0.6951, | |
| "rewards/accuracies": 0.4625000059604645, | |
| "rewards/chosen": 0.0215766541659832, | |
| "rewards/margins": -0.0034640885423868895, | |
| "rewards/rejected": 0.02504074200987816, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 1.5e-06, | |
| "logits/chosen": -0.2996385097503662, | |
| "logits/rejected": -0.30060532689094543, | |
| "logps/chosen": -416.20086669921875, | |
| "logps/rejected": -412.4971618652344, | |
| "loss": 0.6924, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": 0.030052989721298218, | |
| "rewards/margins": 0.0019294738303869963, | |
| "rewards/rejected": 0.028123509138822556, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 2.0000000000000003e-06, | |
| "logits/chosen": -0.3022615313529968, | |
| "logits/rejected": -0.3025739789009094, | |
| "logps/chosen": -426.9918518066406, | |
| "logps/rejected": -423.1588439941406, | |
| "loss": 0.692, | |
| "rewards/accuracies": 0.543749988079071, | |
| "rewards/chosen": 0.02528352662920952, | |
| "rewards/margins": 0.002774887252599001, | |
| "rewards/rejected": 0.022508641704916954, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 2.5e-06, | |
| "logits/chosen": -0.30438098311424255, | |
| "logits/rejected": -0.30549854040145874, | |
| "logps/chosen": -421.03363037109375, | |
| "logps/rejected": -421.8212890625, | |
| "loss": 0.6898, | |
| "rewards/accuracies": 0.5687500238418579, | |
| "rewards/chosen": 0.03513988479971886, | |
| "rewards/margins": 0.007109012454748154, | |
| "rewards/rejected": 0.028030872344970703, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 3e-06, | |
| "logits/chosen": -0.30687031149864197, | |
| "logits/rejected": -0.3071025013923645, | |
| "logps/chosen": -417.4591369628906, | |
| "logps/rejected": -417.7974548339844, | |
| "loss": 0.6931, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": 0.014512499794363976, | |
| "rewards/margins": 0.00045255664736032486, | |
| "rewards/rejected": 0.014059944078326225, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 3.5e-06, | |
| "logits/chosen": -0.30733975768089294, | |
| "logits/rejected": -0.3082950711250305, | |
| "logps/chosen": -422.18487548828125, | |
| "logps/rejected": -422.29052734375, | |
| "loss": 0.6887, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.06789219379425049, | |
| "rewards/margins": 0.00933685339987278, | |
| "rewards/rejected": 0.05855534225702286, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.000000000000001e-06, | |
| "logits/chosen": -0.309120774269104, | |
| "logits/rejected": -0.3103254437446594, | |
| "logps/chosen": -424.8710021972656, | |
| "logps/rejected": -423.9234924316406, | |
| "loss": 0.6875, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.0613434836268425, | |
| "rewards/margins": 0.011882667429745197, | |
| "rewards/rejected": 0.04946080967783928, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.5e-06, | |
| "logits/chosen": -0.3092747628688812, | |
| "logits/rejected": -0.3102528750896454, | |
| "logps/chosen": -417.56097412109375, | |
| "logps/rejected": -420.48541259765625, | |
| "loss": 0.6882, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": 0.1002880111336708, | |
| "rewards/margins": 0.010797671973705292, | |
| "rewards/rejected": 0.08949033915996552, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 5e-06, | |
| "logits/chosen": -0.3046155571937561, | |
| "logits/rejected": -0.3053414225578308, | |
| "logps/chosen": -417.95501708984375, | |
| "logps/rejected": -416.2376403808594, | |
| "loss": 0.6748, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": 0.1339004933834076, | |
| "rewards/margins": 0.03802730515599251, | |
| "rewards/rejected": 0.09587319195270538, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.999853306957783e-06, | |
| "logits/chosen": -0.3040740489959717, | |
| "logits/rejected": -0.30468136072158813, | |
| "logps/chosen": -416.46527099609375, | |
| "logps/rejected": -415.51568603515625, | |
| "loss": 0.6714, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": 0.13915565609931946, | |
| "rewards/margins": 0.045606400817632675, | |
| "rewards/rejected": 0.09354925900697708, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.99941324504621e-06, | |
| "logits/chosen": -0.3062252104282379, | |
| "logits/rejected": -0.30699923634529114, | |
| "logps/chosen": -423.4345703125, | |
| "logps/rejected": -421.33477783203125, | |
| "loss": 0.6681, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": 0.25515347719192505, | |
| "rewards/margins": 0.05361776426434517, | |
| "rewards/rejected": 0.20153570175170898, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.998679865908499e-06, | |
| "logits/chosen": -0.3025161623954773, | |
| "logits/rejected": -0.30388832092285156, | |
| "logps/chosen": -421.076416015625, | |
| "logps/rejected": -419.70428466796875, | |
| "loss": 0.6432, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": 0.36134886741638184, | |
| "rewards/margins": 0.10863993316888809, | |
| "rewards/rejected": 0.25270897150039673, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.9976532556099425e-06, | |
| "logits/chosen": -0.29753798246383667, | |
| "logits/rejected": -0.2986024022102356, | |
| "logps/chosen": -423.3164978027344, | |
| "logps/rejected": -420.72918701171875, | |
| "loss": 0.632, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": 0.45896610617637634, | |
| "rewards/margins": 0.13753186166286469, | |
| "rewards/rejected": 0.32143422961235046, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.99633353462781e-06, | |
| "logits/chosen": -0.300027072429657, | |
| "logits/rejected": -0.3015795648097992, | |
| "logps/chosen": -413.91973876953125, | |
| "logps/rejected": -415.4903869628906, | |
| "loss": 0.6428, | |
| "rewards/accuracies": 0.668749988079071, | |
| "rewards/chosen": 0.47876471281051636, | |
| "rewards/margins": 0.11648330837488174, | |
| "rewards/rejected": 0.3622814118862152, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.994720857837211e-06, | |
| "logits/chosen": -0.3021107316017151, | |
| "logits/rejected": -0.30334895849227905, | |
| "logps/chosen": -419.66571044921875, | |
| "logps/rejected": -420.95068359375, | |
| "loss": 0.623, | |
| "rewards/accuracies": 0.706250011920929, | |
| "rewards/chosen": 0.5215948820114136, | |
| "rewards/margins": 0.1642296016216278, | |
| "rewards/rejected": 0.35736531019210815, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.992815414492917e-06, | |
| "logits/chosen": -0.29045212268829346, | |
| "logits/rejected": -0.29103735089302063, | |
| "logps/chosen": -411.07635498046875, | |
| "logps/rejected": -411.93463134765625, | |
| "loss": 0.6303, | |
| "rewards/accuracies": 0.6187499761581421, | |
| "rewards/chosen": 0.5044211149215698, | |
| "rewards/margins": 0.16038301587104797, | |
| "rewards/rejected": 0.34403812885284424, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.990617428207153e-06, | |
| "logits/chosen": -0.29839888215065, | |
| "logits/rejected": -0.29893797636032104, | |
| "logps/chosen": -430.1136169433594, | |
| "logps/rejected": -428.3583984375, | |
| "loss": 0.6029, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": 0.5385109782218933, | |
| "rewards/margins": 0.22802197933197021, | |
| "rewards/rejected": 0.3104889690876007, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.988127156923355e-06, | |
| "logits/chosen": -0.2956782281398773, | |
| "logits/rejected": -0.2963833212852478, | |
| "logps/chosen": -415.17071533203125, | |
| "logps/rejected": -414.58148193359375, | |
| "loss": 0.6078, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": 0.4606494903564453, | |
| "rewards/margins": 0.22775804996490479, | |
| "rewards/rejected": 0.23289147019386292, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.985344892885899e-06, | |
| "logits/chosen": -0.29678258299827576, | |
| "logits/rejected": -0.2977609634399414, | |
| "logps/chosen": -416.76275634765625, | |
| "logps/rejected": -419.5223693847656, | |
| "loss": 0.5821, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": 0.44212013483047485, | |
| "rewards/margins": 0.29724568128585815, | |
| "rewards/rejected": 0.1448744386434555, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "eval_logits/chosen": -0.35281771421432495, | |
| "eval_logits/rejected": -0.35360345244407654, | |
| "eval_logps/chosen": -408.5499267578125, | |
| "eval_logps/rejected": -409.8388977050781, | |
| "eval_loss": 0.5728641152381897, | |
| "eval_rewards/accuracies": 0.7260000109672546, | |
| "eval_rewards/chosen": 0.43412691354751587, | |
| "eval_rewards/margins": 0.3201069235801697, | |
| "eval_rewards/rejected": 0.11402001231908798, | |
| "eval_runtime": 351.7745, | |
| "eval_samples_per_second": 1.421, | |
| "eval_steps_per_second": 1.421, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.9822709626058065e-06, | |
| "logits/chosen": -0.29128286242485046, | |
| "logits/rejected": -0.2920396327972412, | |
| "logps/chosen": -416.55322265625, | |
| "logps/rejected": -417.397216796875, | |
| "loss": 0.5743, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": 0.42414647340774536, | |
| "rewards/margins": 0.3180859684944153, | |
| "rewards/rejected": 0.1060604602098465, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.978905726822424e-06, | |
| "logits/chosen": -0.29205116629600525, | |
| "logits/rejected": -0.2932327687740326, | |
| "logps/chosen": -429.031005859375, | |
| "logps/rejected": -432.4542541503906, | |
| "loss": 0.5944, | |
| "rewards/accuracies": 0.6812499761581421, | |
| "rewards/chosen": 0.34927603602409363, | |
| "rewards/margins": 0.2745763659477234, | |
| "rewards/rejected": 0.07469968497753143, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.975249580461092e-06, | |
| "logits/chosen": -0.29278379678726196, | |
| "logits/rejected": -0.29318395256996155, | |
| "logps/chosen": -415.50640869140625, | |
| "logps/rejected": -414.65631103515625, | |
| "loss": 0.6108, | |
| "rewards/accuracies": 0.6312500238418579, | |
| "rewards/chosen": 0.20576027035713196, | |
| "rewards/margins": 0.24422487616539001, | |
| "rewards/rejected": -0.038464583456516266, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.971302952586796e-06, | |
| "logits/chosen": -0.2884067893028259, | |
| "logits/rejected": -0.2890322208404541, | |
| "logps/chosen": -411.9427795410156, | |
| "logps/rejected": -418.1693420410156, | |
| "loss": 0.553, | |
| "rewards/accuracies": 0.768750011920929, | |
| "rewards/chosen": 0.25125259160995483, | |
| "rewards/margins": 0.39369240403175354, | |
| "rewards/rejected": -0.1424398422241211, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.967066306353816e-06, | |
| "logits/chosen": -0.28915414214134216, | |
| "logits/rejected": -0.29073747992515564, | |
| "logps/chosen": -417.0771484375, | |
| "logps/rejected": -419.65380859375, | |
| "loss": 0.5598, | |
| "rewards/accuracies": 0.706250011920929, | |
| "rewards/chosen": 0.25535959005355835, | |
| "rewards/margins": 0.40900731086730957, | |
| "rewards/rejected": -0.15364770591259003, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.962540138951371e-06, | |
| "logits/chosen": -0.2950271964073181, | |
| "logits/rejected": -0.29611852765083313, | |
| "logps/chosen": -420.79681396484375, | |
| "logps/rejected": -425.1570739746094, | |
| "loss": 0.5278, | |
| "rewards/accuracies": 0.768750011920929, | |
| "rewards/chosen": 0.24652545154094696, | |
| "rewards/margins": 0.48342761397361755, | |
| "rewards/rejected": -0.2369021624326706, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.957724981545276e-06, | |
| "logits/chosen": -0.28752994537353516, | |
| "logits/rejected": -0.2876993417739868, | |
| "logps/chosen": -413.72808837890625, | |
| "logps/rejected": -418.2240295410156, | |
| "loss": 0.5369, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": 0.144112229347229, | |
| "rewards/margins": 0.48878079652786255, | |
| "rewards/rejected": -0.34466850757598877, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.952621399215598e-06, | |
| "logits/chosen": -0.29713207483291626, | |
| "logits/rejected": -0.29806575179100037, | |
| "logps/chosen": -420.4150390625, | |
| "logps/rejected": -428.95513916015625, | |
| "loss": 0.5325, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -0.011465489864349365, | |
| "rewards/margins": 0.47427234053611755, | |
| "rewards/rejected": -0.4857378602027893, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.947229990890356e-06, | |
| "logits/chosen": -0.285542756319046, | |
| "logits/rejected": -0.28633180260658264, | |
| "logps/chosen": -420.0926208496094, | |
| "logps/rejected": -423.4457092285156, | |
| "loss": 0.5193, | |
| "rewards/accuracies": 0.768750011920929, | |
| "rewards/chosen": -0.08504172414541245, | |
| "rewards/margins": 0.5871935486793518, | |
| "rewards/rejected": -0.6722352504730225, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.941551389275217e-06, | |
| "logits/chosen": -0.2842163145542145, | |
| "logits/rejected": -0.28539806604385376, | |
| "logps/chosen": -421.17822265625, | |
| "logps/rejected": -424.78387451171875, | |
| "loss": 0.5631, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.19560113549232483, | |
| "rewards/margins": 0.5197954177856445, | |
| "rewards/rejected": -0.715396523475647, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.935586260779261e-06, | |
| "logits/chosen": -0.2907197177410126, | |
| "logits/rejected": -0.29180362820625305, | |
| "logps/chosen": -427.5953063964844, | |
| "logps/rejected": -431.76788330078125, | |
| "loss": 0.5331, | |
| "rewards/accuracies": 0.7562500238418579, | |
| "rewards/chosen": -0.4097444415092468, | |
| "rewards/margins": 0.5406568646430969, | |
| "rewards/rejected": -0.9504014253616333, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.929335305436764e-06, | |
| "logits/chosen": -0.2902284264564514, | |
| "logits/rejected": -0.2910650670528412, | |
| "logps/chosen": -427.05621337890625, | |
| "logps/rejected": -427.26904296875, | |
| "loss": 0.5694, | |
| "rewards/accuracies": 0.6937500238418579, | |
| "rewards/chosen": -0.4887164533138275, | |
| "rewards/margins": 0.4573966860771179, | |
| "rewards/rejected": -0.9461132287979126, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.922799256825052e-06, | |
| "logits/chosen": -0.30178460478782654, | |
| "logits/rejected": -0.3031577467918396, | |
| "logps/chosen": -432.64544677734375, | |
| "logps/rejected": -437.355712890625, | |
| "loss": 0.5759, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": -0.5430983304977417, | |
| "rewards/margins": 0.5063012838363647, | |
| "rewards/rejected": -1.0493996143341064, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.915978881978407e-06, | |
| "logits/chosen": -0.2879001498222351, | |
| "logits/rejected": -0.28882110118865967, | |
| "logps/chosen": -418.3189392089844, | |
| "logps/rejected": -420.14349365234375, | |
| "loss": 0.5114, | |
| "rewards/accuracies": 0.7437499761581421, | |
| "rewards/chosen": -0.39648348093032837, | |
| "rewards/margins": 0.6386500000953674, | |
| "rewards/rejected": -1.0351333618164062, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.908874981298058e-06, | |
| "logits/chosen": -0.29214486479759216, | |
| "logits/rejected": -0.29305440187454224, | |
| "logps/chosen": -421.3182678222656, | |
| "logps/rejected": -427.06317138671875, | |
| "loss": 0.5628, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.5688936114311218, | |
| "rewards/margins": 0.5135782957077026, | |
| "rewards/rejected": -1.0824719667434692, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.901488388458247e-06, | |
| "logits/chosen": -0.2956882119178772, | |
| "logits/rejected": -0.29717716574668884, | |
| "logps/chosen": -429.40850830078125, | |
| "logps/rejected": -432.0194396972656, | |
| "loss": 0.5326, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.5142576098442078, | |
| "rewards/margins": 0.5949846506118774, | |
| "rewards/rejected": -1.1092422008514404, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.893819970308394e-06, | |
| "logits/chosen": -0.29191336035728455, | |
| "logits/rejected": -0.2928611636161804, | |
| "logps/chosen": -432.4073181152344, | |
| "logps/rejected": -437.53472900390625, | |
| "loss": 0.5255, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -0.5648801922798157, | |
| "rewards/margins": 0.5746434926986694, | |
| "rewards/rejected": -1.1395236253738403, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.885870626771371e-06, | |
| "logits/chosen": -0.2915678322315216, | |
| "logits/rejected": -0.2924065887928009, | |
| "logps/chosen": -421.0965881347656, | |
| "logps/rejected": -425.9581604003906, | |
| "loss": 0.5565, | |
| "rewards/accuracies": 0.668749988079071, | |
| "rewards/chosen": -0.6075869798660278, | |
| "rewards/margins": 0.576026201248169, | |
| "rewards/rejected": -1.1836131811141968, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.8776412907378845e-06, | |
| "logits/chosen": -0.29022809863090515, | |
| "logits/rejected": -0.2918907701969147, | |
| "logps/chosen": -422.2085876464844, | |
| "logps/rejected": -425.4307556152344, | |
| "loss": 0.5346, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.6413823962211609, | |
| "rewards/margins": 0.6153510808944702, | |
| "rewards/rejected": -1.2567334175109863, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.869132927957007e-06, | |
| "logits/chosen": -0.2912658751010895, | |
| "logits/rejected": -0.292255163192749, | |
| "logps/chosen": -424.4219665527344, | |
| "logps/rejected": -430.76885986328125, | |
| "loss": 0.53, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -0.7030640840530396, | |
| "rewards/margins": 0.6264012455940247, | |
| "rewards/rejected": -1.329465389251709, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "eval_logits/chosen": -0.3515583574771881, | |
| "eval_logits/rejected": -0.35239377617836, | |
| "eval_logps/chosen": -419.6265563964844, | |
| "eval_logps/rejected": -424.9375, | |
| "eval_loss": 0.5038847327232361, | |
| "eval_rewards/accuracies": 0.7379999756813049, | |
| "eval_rewards/chosen": -0.6735388040542603, | |
| "eval_rewards/margins": 0.7223072648048401, | |
| "eval_rewards/rejected": -1.3958461284637451, | |
| "eval_runtime": 375.1774, | |
| "eval_samples_per_second": 1.333, | |
| "eval_steps_per_second": 1.333, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.860346536922834e-06, | |
| "logits/chosen": -0.29377973079681396, | |
| "logits/rejected": -0.294566810131073, | |
| "logps/chosen": -429.86907958984375, | |
| "logps/rejected": -432.5889587402344, | |
| "loss": 0.529, | |
| "rewards/accuracies": 0.731249988079071, | |
| "rewards/chosen": -0.7517430782318115, | |
| "rewards/margins": 0.6350258588790894, | |
| "rewards/rejected": -1.3867689371109009, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.85128314875731e-06, | |
| "logits/chosen": -0.2876330316066742, | |
| "logits/rejected": -0.2890221178531647, | |
| "logps/chosen": -433.5904846191406, | |
| "logps/rejected": -438.02886962890625, | |
| "loss": 0.5174, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.7357751131057739, | |
| "rewards/margins": 0.6541243195533752, | |
| "rewards/rejected": -1.389899492263794, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.841943827089223e-06, | |
| "logits/chosen": -0.30073267221450806, | |
| "logits/rejected": -0.3028663098812103, | |
| "logps/chosen": -438.89056396484375, | |
| "logps/rejected": -444.29443359375, | |
| "loss": 0.5427, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.7469267845153809, | |
| "rewards/margins": 0.6645030379295349, | |
| "rewards/rejected": -1.411429762840271, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.832329667929378e-06, | |
| "logits/chosen": -0.30401021242141724, | |
| "logits/rejected": -0.305408775806427, | |
| "logps/chosen": -436.4923400878906, | |
| "logps/rejected": -443.785400390625, | |
| "loss": 0.4856, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": -0.7162739634513855, | |
| "rewards/margins": 0.7617406845092773, | |
| "rewards/rejected": -1.478014588356018, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.822441799541979e-06, | |
| "logits/chosen": -0.29748016595840454, | |
| "logits/rejected": -0.2987380027770996, | |
| "logps/chosen": -432.2513122558594, | |
| "logps/rejected": -439.78741455078125, | |
| "loss": 0.5138, | |
| "rewards/accuracies": 0.7437499761581421, | |
| "rewards/chosen": -0.8289654850959778, | |
| "rewards/margins": 0.7209790349006653, | |
| "rewards/rejected": -1.549944519996643, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.812281382312222e-06, | |
| "logits/chosen": -0.28938063979148865, | |
| "logits/rejected": -0.2903631031513214, | |
| "logps/chosen": -421.52337646484375, | |
| "logps/rejected": -426.65142822265625, | |
| "loss": 0.4934, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -0.7328917980194092, | |
| "rewards/margins": 0.7723864316940308, | |
| "rewards/rejected": -1.5052781105041504, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.801849608610119e-06, | |
| "logits/chosen": -0.2995319366455078, | |
| "logits/rejected": -0.3008275330066681, | |
| "logps/chosen": -437.45916748046875, | |
| "logps/rejected": -443.75799560546875, | |
| "loss": 0.4984, | |
| "rewards/accuracies": 0.706250011920929, | |
| "rewards/chosen": -0.9069220423698425, | |
| "rewards/margins": 0.7756569981575012, | |
| "rewards/rejected": -1.6825790405273438, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.7911477026505656e-06, | |
| "logits/chosen": -0.2930867373943329, | |
| "logits/rejected": -0.2938670516014099, | |
| "logps/chosen": -436.32305908203125, | |
| "logps/rejected": -439.0968322753906, | |
| "loss": 0.4882, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": -1.054971694946289, | |
| "rewards/margins": 0.8004587292671204, | |
| "rewards/rejected": -1.8554306030273438, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.780176920349675e-06, | |
| "logits/chosen": -0.2880414128303528, | |
| "logits/rejected": -0.2893609404563904, | |
| "logps/chosen": -426.8358459472656, | |
| "logps/rejected": -432.79248046875, | |
| "loss": 0.5123, | |
| "rewards/accuracies": 0.731249988079071, | |
| "rewards/chosen": -1.1646369695663452, | |
| "rewards/margins": 0.739470362663269, | |
| "rewards/rejected": -1.9041073322296143, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.7689385491773934e-06, | |
| "logits/chosen": -0.3000113070011139, | |
| "logits/rejected": -0.3008071780204773, | |
| "logps/chosen": -442.62860107421875, | |
| "logps/rejected": -446.14825439453125, | |
| "loss": 0.5871, | |
| "rewards/accuracies": 0.6812499761581421, | |
| "rewards/chosen": -1.3722599744796753, | |
| "rewards/margins": 0.6036561131477356, | |
| "rewards/rejected": -1.9759161472320557, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.7574339080064046e-06, | |
| "logits/chosen": -0.2956729829311371, | |
| "logits/rejected": -0.29699647426605225, | |
| "logps/chosen": -432.17486572265625, | |
| "logps/rejected": -441.1890563964844, | |
| "loss": 0.4989, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -1.2666178941726685, | |
| "rewards/margins": 0.7352741956710815, | |
| "rewards/rejected": -2.001891851425171, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.745664346957362e-06, | |
| "logits/chosen": -0.29319706559181213, | |
| "logits/rejected": -0.2932819724082947, | |
| "logps/chosen": -441.1473083496094, | |
| "logps/rejected": -443.6536560058594, | |
| "loss": 0.5431, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -1.1980129480361938, | |
| "rewards/margins": 0.7274158000946045, | |
| "rewards/rejected": -1.9254287481307983, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.733631247240435e-06, | |
| "logits/chosen": -0.28386861085891724, | |
| "logits/rejected": -0.28545230627059937, | |
| "logps/chosen": -424.7322692871094, | |
| "logps/rejected": -432.74920654296875, | |
| "loss": 0.5172, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -1.3047645092010498, | |
| "rewards/margins": 0.7416442632675171, | |
| "rewards/rejected": -2.0464088916778564, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.721336020993228e-06, | |
| "logits/chosen": -0.29582637548446655, | |
| "logits/rejected": -0.2965632379055023, | |
| "logps/chosen": -428.98992919921875, | |
| "logps/rejected": -436.71533203125, | |
| "loss": 0.5223, | |
| "rewards/accuracies": 0.731249988079071, | |
| "rewards/chosen": -1.232280969619751, | |
| "rewards/margins": 0.7531365752220154, | |
| "rewards/rejected": -1.9854176044464111, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.708780111115058e-06, | |
| "logits/chosen": -0.3022860884666443, | |
| "logits/rejected": -0.303489625453949, | |
| "logps/chosen": -434.28936767578125, | |
| "logps/rejected": -439.01043701171875, | |
| "loss": 0.506, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -1.1110032796859741, | |
| "rewards/margins": 0.7986260652542114, | |
| "rewards/rejected": -1.909629225730896, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.6959649910976165e-06, | |
| "logits/chosen": -0.3028009533882141, | |
| "logits/rejected": -0.3035816550254822, | |
| "logps/chosen": -433.6151428222656, | |
| "logps/rejected": -436.40045166015625, | |
| "loss": 0.5109, | |
| "rewards/accuracies": 0.7437499761581421, | |
| "rewards/chosen": -1.0793737173080444, | |
| "rewards/margins": 0.753380537033081, | |
| "rewards/rejected": -1.832754373550415, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.682892164852057e-06, | |
| "logits/chosen": -0.29320716857910156, | |
| "logits/rejected": -0.29399818181991577, | |
| "logps/chosen": -428.3548889160156, | |
| "logps/rejected": -433.96124267578125, | |
| "loss": 0.5566, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -1.1920106410980225, | |
| "rewards/margins": 0.6818917989730835, | |
| "rewards/rejected": -1.8739025592803955, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.669563166532504e-06, | |
| "logits/chosen": -0.29630088806152344, | |
| "logits/rejected": -0.2984740138053894, | |
| "logps/chosen": -428.59405517578125, | |
| "logps/rejected": -439.8580017089844, | |
| "loss": 0.5099, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -1.0762312412261963, | |
| "rewards/margins": 0.8075464963912964, | |
| "rewards/rejected": -1.8837776184082031, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.655979560356006e-06, | |
| "logits/chosen": -0.299476683139801, | |
| "logits/rejected": -0.30079394578933716, | |
| "logps/chosen": -437.24359130859375, | |
| "logps/rejected": -444.2662048339844, | |
| "loss": 0.4679, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -1.0544074773788452, | |
| "rewards/margins": 0.8957304954528809, | |
| "rewards/rejected": -1.9501378536224365, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.642142940418973e-06, | |
| "logits/chosen": -0.3016494810581207, | |
| "logits/rejected": -0.3028479218482971, | |
| "logps/chosen": -428.2562561035156, | |
| "logps/rejected": -436.1544494628906, | |
| "loss": 0.4446, | |
| "rewards/accuracies": 0.7437499761581421, | |
| "rewards/chosen": -1.1558116674423218, | |
| "rewards/margins": 0.9864055514335632, | |
| "rewards/rejected": -2.1422171592712402, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "eval_logits/chosen": -0.3611030876636505, | |
| "eval_logits/rejected": -0.36189064383506775, | |
| "eval_logps/chosen": -425.2828674316406, | |
| "eval_logps/rejected": -432.32147216796875, | |
| "eval_loss": 0.4912301301956177, | |
| "eval_rewards/accuracies": 0.75, | |
| "eval_rewards/chosen": -1.239168405532837, | |
| "eval_rewards/margins": 0.895074725151062, | |
| "eval_rewards/rejected": -2.1342432498931885, | |
| "eval_runtime": 376.2893, | |
| "eval_samples_per_second": 1.329, | |
| "eval_steps_per_second": 1.329, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.6280549305101065e-06, | |
| "logits/chosen": -0.30701732635498047, | |
| "logits/rejected": -0.30861714482307434, | |
| "logps/chosen": -430.90643310546875, | |
| "logps/rejected": -437.9549255371094, | |
| "loss": 0.545, | |
| "rewards/accuracies": 0.706250011920929, | |
| "rewards/chosen": -1.4101296663284302, | |
| "rewards/margins": 0.7359235286712646, | |
| "rewards/rejected": -2.1460530757904053, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.61371718391983e-06, | |
| "logits/chosen": -0.30552786588668823, | |
| "logits/rejected": -0.30662640929222107, | |
| "logps/chosen": -432.50506591796875, | |
| "logps/rejected": -443.53216552734375, | |
| "loss": 0.486, | |
| "rewards/accuracies": 0.731249988079071, | |
| "rewards/chosen": -1.181477427482605, | |
| "rewards/margins": 0.9552658796310425, | |
| "rewards/rejected": -2.1367435455322266, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.599131383246277e-06, | |
| "logits/chosen": -0.308699369430542, | |
| "logits/rejected": -0.308963418006897, | |
| "logps/chosen": -443.76837158203125, | |
| "logps/rejected": -452.234130859375, | |
| "loss": 0.5178, | |
| "rewards/accuracies": 0.7562500238418579, | |
| "rewards/chosen": -1.5818192958831787, | |
| "rewards/margins": 0.70851069688797, | |
| "rewards/rejected": -2.290329933166504, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.584299240197826e-06, | |
| "logits/chosen": -0.29901835322380066, | |
| "logits/rejected": -0.2997357249259949, | |
| "logps/chosen": -437.3292541503906, | |
| "logps/rejected": -438.70013427734375, | |
| "loss": 0.4941, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -1.4868736267089844, | |
| "rewards/margins": 0.9190858602523804, | |
| "rewards/rejected": -2.405959129333496, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.569222495392227e-06, | |
| "logits/chosen": -0.30075928568840027, | |
| "logits/rejected": -0.30218517780303955, | |
| "logps/chosen": -437.5245056152344, | |
| "logps/rejected": -447.72271728515625, | |
| "loss": 0.4425, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -1.6097447872161865, | |
| "rewards/margins": 1.0334211587905884, | |
| "rewards/rejected": -2.6431655883789062, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.553902918152329e-06, | |
| "logits/chosen": -0.3034583628177643, | |
| "logits/rejected": -0.3045238256454468, | |
| "logps/chosen": -439.45159912109375, | |
| "logps/rejected": -448.080322265625, | |
| "loss": 0.4796, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": -1.7247978448867798, | |
| "rewards/margins": 1.0205966234207153, | |
| "rewards/rejected": -2.745394229888916, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.5383423062984455e-06, | |
| "logits/chosen": -0.3042409420013428, | |
| "logits/rejected": -0.3053613603115082, | |
| "logps/chosen": -432.8832092285156, | |
| "logps/rejected": -440.6971130371094, | |
| "loss": 0.468, | |
| "rewards/accuracies": 0.768750011920929, | |
| "rewards/chosen": -1.8063684701919556, | |
| "rewards/margins": 0.9467433094978333, | |
| "rewards/rejected": -2.7531113624572754, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.522542485937369e-06, | |
| "logits/chosen": -0.2990413308143616, | |
| "logits/rejected": -0.3002299666404724, | |
| "logps/chosen": -435.41754150390625, | |
| "logps/rejected": -442.31201171875, | |
| "loss": 0.4606, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -1.9637155532836914, | |
| "rewards/margins": 0.984288215637207, | |
| "rewards/rejected": -2.9480037689208984, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.5065053112480725e-06, | |
| "logits/chosen": -0.3054850697517395, | |
| "logits/rejected": -0.3073977530002594, | |
| "logps/chosen": -433.15771484375, | |
| "logps/rejected": -440.9410095214844, | |
| "loss": 0.4933, | |
| "rewards/accuracies": 0.768750011920929, | |
| "rewards/chosen": -2.1777331829071045, | |
| "rewards/margins": 0.8640511631965637, | |
| "rewards/rejected": -3.0417845249176025, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.49023266426411e-06, | |
| "logits/chosen": -0.30031442642211914, | |
| "logits/rejected": -0.3014809787273407, | |
| "logps/chosen": -441.3443908691406, | |
| "logps/rejected": -447.56231689453125, | |
| "loss": 0.5213, | |
| "rewards/accuracies": 0.6937500238418579, | |
| "rewards/chosen": -2.0933678150177, | |
| "rewards/margins": 0.8899556994438171, | |
| "rewards/rejected": -2.983323574066162, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.473726454652755e-06, | |
| "logits/chosen": -0.2997979521751404, | |
| "logits/rejected": -0.30115145444869995, | |
| "logps/chosen": -440.00372314453125, | |
| "logps/rejected": -449.6446838378906, | |
| "loss": 0.4733, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": -2.0547962188720703, | |
| "rewards/margins": 1.1024844646453857, | |
| "rewards/rejected": -3.157280445098877, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.45698861949089e-06, | |
| "logits/chosen": -0.3066961169242859, | |
| "logits/rejected": -0.3076573610305786, | |
| "logps/chosen": -442.42303466796875, | |
| "logps/rejected": -448.47686767578125, | |
| "loss": 0.5236, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -2.1251652240753174, | |
| "rewards/margins": 0.8965371251106262, | |
| "rewards/rejected": -3.021702289581299, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.440021123037683e-06, | |
| "logits/chosen": -0.29265230894088745, | |
| "logits/rejected": -0.29371362924575806, | |
| "logps/chosen": -441.66900634765625, | |
| "logps/rejected": -450.8470153808594, | |
| "loss": 0.5327, | |
| "rewards/accuracies": 0.6812499761581421, | |
| "rewards/chosen": -2.108212947845459, | |
| "rewards/margins": 0.8388462066650391, | |
| "rewards/rejected": -2.947059154510498, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.422825956504073e-06, | |
| "logits/chosen": -0.3069104254245758, | |
| "logits/rejected": -0.3083550035953522, | |
| "logps/chosen": -449.7119140625, | |
| "logps/rejected": -459.4678649902344, | |
| "loss": 0.5117, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -2.1724143028259277, | |
| "rewards/margins": 0.8879534602165222, | |
| "rewards/rejected": -3.0603675842285156, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.4054051378190915e-06, | |
| "logits/chosen": -0.30406031012535095, | |
| "logits/rejected": -0.30475375056266785, | |
| "logps/chosen": -447.04022216796875, | |
| "logps/rejected": -452.49658203125, | |
| "loss": 0.493, | |
| "rewards/accuracies": 0.768750011920929, | |
| "rewards/chosen": -2.098881483078003, | |
| "rewards/margins": 0.9250394105911255, | |
| "rewards/rejected": -3.023920774459839, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.387760711393052e-06, | |
| "logits/chosen": -0.3125828206539154, | |
| "logits/rejected": -0.3135472536087036, | |
| "logps/chosen": -441.21337890625, | |
| "logps/rejected": -446.6187438964844, | |
| "loss": 0.5226, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -2.1020660400390625, | |
| "rewards/margins": 0.8715343475341797, | |
| "rewards/rejected": -2.973600387573242, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.369894747877627e-06, | |
| "logits/chosen": -0.30844077467918396, | |
| "logits/rejected": -0.3093765676021576, | |
| "logps/chosen": -439.68060302734375, | |
| "logps/rejected": -447.6014709472656, | |
| "loss": 0.4748, | |
| "rewards/accuracies": 0.7437499761581421, | |
| "rewards/chosen": -1.9754493236541748, | |
| "rewards/margins": 1.023809552192688, | |
| "rewards/rejected": -2.9992587566375732, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.3518093439228484e-06, | |
| "logits/chosen": -0.309563547372818, | |
| "logits/rejected": -0.3104109764099121, | |
| "logps/chosen": -442.0809631347656, | |
| "logps/rejected": -449.5039978027344, | |
| "loss": 0.4696, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -2.0284624099731445, | |
| "rewards/margins": 0.8842188119888306, | |
| "rewards/rejected": -2.9126813411712646, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.333506621931056e-06, | |
| "logits/chosen": -0.3095022737979889, | |
| "logits/rejected": -0.3111112713813782, | |
| "logps/chosen": -441.48736572265625, | |
| "logps/rejected": -452.59466552734375, | |
| "loss": 0.4302, | |
| "rewards/accuracies": 0.7875000238418579, | |
| "rewards/chosen": -1.7804081439971924, | |
| "rewards/margins": 1.1726500988006592, | |
| "rewards/rejected": -2.9530580043792725, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.3149887298078275e-06, | |
| "logits/chosen": -0.3100133538246155, | |
| "logits/rejected": -0.3110717535018921, | |
| "logps/chosen": -439.8687438964844, | |
| "logps/rejected": -447.7996520996094, | |
| "loss": 0.4705, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": -1.8933576345443726, | |
| "rewards/margins": 1.0525233745574951, | |
| "rewards/rejected": -2.9458811283111572, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "eval_logits/chosen": -0.3688412606716156, | |
| "eval_logits/rejected": -0.3696078956127167, | |
| "eval_logps/chosen": -432.4996643066406, | |
| "eval_logps/rejected": -440.57073974609375, | |
| "eval_loss": 0.4888974726200104, | |
| "eval_rewards/accuracies": 0.7599999904632568, | |
| "eval_rewards/chosen": -1.9608467817306519, | |
| "eval_rewards/margins": 0.9983222484588623, | |
| "eval_rewards/rejected": -2.9591689109802246, | |
| "eval_runtime": 376.2946, | |
| "eval_samples_per_second": 1.329, | |
| "eval_steps_per_second": 1.329, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.296257840709906e-06, | |
| "logits/chosen": -0.3060837686061859, | |
| "logits/rejected": -0.30729439854621887, | |
| "logps/chosen": -443.59765625, | |
| "logps/rejected": -454.3882751464844, | |
| "loss": 0.4934, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -2.0910544395446777, | |
| "rewards/margins": 0.964927077293396, | |
| "rewards/rejected": -3.0559818744659424, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.277316152790177e-06, | |
| "logits/chosen": -0.3090333938598633, | |
| "logits/rejected": -0.3097476363182068, | |
| "logps/chosen": -446.78564453125, | |
| "logps/rejected": -453.74859619140625, | |
| "loss": 0.5066, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -2.2516064643859863, | |
| "rewards/margins": 0.9105945825576782, | |
| "rewards/rejected": -3.162201404571533, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.2581658889397e-06, | |
| "logits/chosen": -0.2983805537223816, | |
| "logits/rejected": -0.29977601766586304, | |
| "logps/chosen": -434.3565979003906, | |
| "logps/rejected": -444.49542236328125, | |
| "loss": 0.4289, | |
| "rewards/accuracies": 0.8187500238418579, | |
| "rewards/chosen": -1.9821250438690186, | |
| "rewards/margins": 1.0745497941970825, | |
| "rewards/rejected": -3.0566749572753906, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 4.238809296526847e-06, | |
| "logits/chosen": -0.30951178073883057, | |
| "logits/rejected": -0.31038326025009155, | |
| "logps/chosen": -453.48419189453125, | |
| "logps/rejected": -461.54833984375, | |
| "loss": 0.523, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": -2.3027613162994385, | |
| "rewards/margins": 0.8140772581100464, | |
| "rewards/rejected": -3.1168384552001953, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 4.219248647133559e-06, | |
| "logits/chosen": -0.3112717568874359, | |
| "logits/rejected": -0.3124113082885742, | |
| "logps/chosen": -437.2984313964844, | |
| "logps/rejected": -447.7862243652344, | |
| "loss": 0.4619, | |
| "rewards/accuracies": 0.7562500238418579, | |
| "rewards/chosen": -2.230332612991333, | |
| "rewards/margins": 1.0623276233673096, | |
| "rewards/rejected": -3.2926604747772217, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 4.19948623628877e-06, | |
| "logits/chosen": -0.3127744495868683, | |
| "logits/rejected": -0.31366902589797974, | |
| "logps/chosen": -451.15966796875, | |
| "logps/rejected": -458.08154296875, | |
| "loss": 0.5186, | |
| "rewards/accuracies": 0.706250011920929, | |
| "rewards/chosen": -2.2668843269348145, | |
| "rewards/margins": 0.8956031799316406, | |
| "rewards/rejected": -3.162487268447876, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 4.179524383199016e-06, | |
| "logits/chosen": -0.30885085463523865, | |
| "logits/rejected": -0.3100178837776184, | |
| "logps/chosen": -445.05670166015625, | |
| "logps/rejected": -453.55328369140625, | |
| "loss": 0.4533, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -2.1777684688568115, | |
| "rewards/margins": 1.1419053077697754, | |
| "rewards/rejected": -3.319674253463745, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 4.159365430476262e-06, | |
| "logits/chosen": -0.30774661898612976, | |
| "logits/rejected": -0.3091534674167633, | |
| "logps/chosen": -445.9901428222656, | |
| "logps/rejected": -453.9535217285156, | |
| "loss": 0.4711, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -2.2285873889923096, | |
| "rewards/margins": 1.0858628749847412, | |
| "rewards/rejected": -3.31445050239563, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 4.139011743862991e-06, | |
| "logits/chosen": -0.31220975518226624, | |
| "logits/rejected": -0.31295710802078247, | |
| "logps/chosen": -437.8184509277344, | |
| "logps/rejected": -450.45611572265625, | |
| "loss": 0.4411, | |
| "rewards/accuracies": 0.7875000238418579, | |
| "rewards/chosen": -2.122331142425537, | |
| "rewards/margins": 1.2842220067977905, | |
| "rewards/rejected": -3.406553268432617, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 4.11846571195457e-06, | |
| "logits/chosen": -0.30749282240867615, | |
| "logits/rejected": -0.3092586398124695, | |
| "logps/chosen": -445.489013671875, | |
| "logps/rejected": -456.45361328125, | |
| "loss": 0.4331, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -2.122631788253784, | |
| "rewards/margins": 1.2757575511932373, | |
| "rewards/rejected": -3.3983893394470215, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 4.0977297459189405e-06, | |
| "logits/chosen": -0.31161195039749146, | |
| "logits/rejected": -0.3124944865703583, | |
| "logps/chosen": -448.9032287597656, | |
| "logps/rejected": -456.729248046875, | |
| "loss": 0.4549, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -2.3067939281463623, | |
| "rewards/margins": 1.165810227394104, | |
| "rewards/rejected": -3.472604274749756, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 4.076806279213656e-06, | |
| "logits/chosen": -0.311604380607605, | |
| "logits/rejected": -0.312518447637558, | |
| "logps/chosen": -438.07916259765625, | |
| "logps/rejected": -450.47381591796875, | |
| "loss": 0.4232, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -2.2351415157318115, | |
| "rewards/margins": 1.1904911994934082, | |
| "rewards/rejected": -3.425632953643799, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 4.055697767300302e-06, | |
| "logits/chosen": -0.3170091211795807, | |
| "logits/rejected": -0.31755563616752625, | |
| "logps/chosen": -442.83758544921875, | |
| "logps/rejected": -450.9444885253906, | |
| "loss": 0.5088, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -2.335662841796875, | |
| "rewards/margins": 1.0687111616134644, | |
| "rewards/rejected": -3.40437388420105, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 4.034406687356344e-06, | |
| "logits/chosen": -0.3176030218601227, | |
| "logits/rejected": -0.31867748498916626, | |
| "logps/chosen": -438.16229248046875, | |
| "logps/rejected": -446.01806640625, | |
| "loss": 0.5146, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -2.501446008682251, | |
| "rewards/margins": 0.903441309928894, | |
| "rewards/rejected": -3.4048874378204346, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 4.012935537984414e-06, | |
| "logits/chosen": -0.31417202949523926, | |
| "logits/rejected": -0.3148192763328552, | |
| "logps/chosen": -435.503173828125, | |
| "logps/rejected": -444.60400390625, | |
| "loss": 0.5049, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -2.4111835956573486, | |
| "rewards/margins": 0.8773403167724609, | |
| "rewards/rejected": -3.2885234355926514, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 3.991286838919086e-06, | |
| "logits/chosen": -0.30995315313339233, | |
| "logits/rejected": -0.31148335337638855, | |
| "logps/chosen": -440.8172912597656, | |
| "logps/rejected": -452.94268798828125, | |
| "loss": 0.4584, | |
| "rewards/accuracies": 0.793749988079071, | |
| "rewards/chosen": -2.2748212814331055, | |
| "rewards/margins": 1.0933376550674438, | |
| "rewards/rejected": -3.3681588172912598, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 3.969463130731183e-06, | |
| "logits/chosen": -0.3108167052268982, | |
| "logits/rejected": -0.31207841634750366, | |
| "logps/chosen": -443.76409912109375, | |
| "logps/rejected": -456.50286865234375, | |
| "loss": 0.4063, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -2.237427234649658, | |
| "rewards/margins": 1.2730433940887451, | |
| "rewards/rejected": -3.5104706287384033, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 3.947466974529622e-06, | |
| "logits/chosen": -0.3074961304664612, | |
| "logits/rejected": -0.30913347005844116, | |
| "logps/chosen": -451.47320556640625, | |
| "logps/rejected": -461.1958923339844, | |
| "loss": 0.4688, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -2.525216817855835, | |
| "rewards/margins": 1.2734287977218628, | |
| "rewards/rejected": -3.798645496368408, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 3.925300951660859e-06, | |
| "logits/chosen": -0.3098825216293335, | |
| "logits/rejected": -0.3106127381324768, | |
| "logps/chosen": -449.3988342285156, | |
| "logps/rejected": -455.8897399902344, | |
| "loss": 0.4974, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -2.6619412899017334, | |
| "rewards/margins": 1.0037035942077637, | |
| "rewards/rejected": -3.665644884109497, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 3.9029676634059565e-06, | |
| "logits/chosen": -0.31196895241737366, | |
| "logits/rejected": -0.3131485879421234, | |
| "logps/chosen": -451.7205505371094, | |
| "logps/rejected": -461.85467529296875, | |
| "loss": 0.4296, | |
| "rewards/accuracies": 0.793749988079071, | |
| "rewards/chosen": -2.469062328338623, | |
| "rewards/margins": 1.1974780559539795, | |
| "rewards/rejected": -3.6665406227111816, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "eval_logits/chosen": -0.3799600601196289, | |
| "eval_logits/rejected": -0.3806193768978119, | |
| "eval_logps/chosen": -436.8405456542969, | |
| "eval_logps/rejected": -445.942626953125, | |
| "eval_loss": 0.48261019587516785, | |
| "eval_rewards/accuracies": 0.7459999918937683, | |
| "eval_rewards/chosen": -2.3949320316314697, | |
| "eval_rewards/margins": 1.1014209985733032, | |
| "eval_rewards/rejected": -3.4963533878326416, | |
| "eval_runtime": 377.1489, | |
| "eval_samples_per_second": 1.326, | |
| "eval_steps_per_second": 1.326, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 3.880469730675311e-06, | |
| "logits/chosen": -0.31937772035598755, | |
| "logits/rejected": -0.3201027512550354, | |
| "logps/chosen": -444.93267822265625, | |
| "logps/rejected": -454.3338317871094, | |
| "loss": 0.4744, | |
| "rewards/accuracies": 0.731249988079071, | |
| "rewards/chosen": -2.4238359928131104, | |
| "rewards/margins": 1.1197197437286377, | |
| "rewards/rejected": -3.543555736541748, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 3.857809793701082e-06, | |
| "logits/chosen": -0.3155730664730072, | |
| "logits/rejected": -0.31668931245803833, | |
| "logps/chosen": -447.9942932128906, | |
| "logps/rejected": -458.11199951171875, | |
| "loss": 0.4398, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -2.2663698196411133, | |
| "rewards/margins": 1.3081058263778687, | |
| "rewards/rejected": -3.5744757652282715, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 3.834990511727341e-06, | |
| "logits/chosen": -0.3186780512332916, | |
| "logits/rejected": -0.32040825486183167, | |
| "logps/chosen": -445.6949768066406, | |
| "logps/rejected": -458.57244873046875, | |
| "loss": 0.4537, | |
| "rewards/accuracies": 0.7875000238418579, | |
| "rewards/chosen": -2.343160629272461, | |
| "rewards/margins": 1.2446506023406982, | |
| "rewards/rejected": -3.587811231613159, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 3.812014562698002e-06, | |
| "logits/chosen": -0.320089191198349, | |
| "logits/rejected": -0.3210357427597046, | |
| "logps/chosen": -441.82354736328125, | |
| "logps/rejected": -449.4639587402344, | |
| "loss": 0.5402, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -2.470724582672119, | |
| "rewards/margins": 0.9423478841781616, | |
| "rewards/rejected": -3.4130725860595703, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 3.788884642942555e-06, | |
| "logits/chosen": -0.32223668694496155, | |
| "logits/rejected": -0.32441529631614685, | |
| "logps/chosen": -444.36328125, | |
| "logps/rejected": -457.4508361816406, | |
| "loss": 0.4432, | |
| "rewards/accuracies": 0.793749988079071, | |
| "rewards/chosen": -2.3148508071899414, | |
| "rewards/margins": 1.2026770114898682, | |
| "rewards/rejected": -3.5175278186798096, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 3.765603466859635e-06, | |
| "logits/chosen": -0.31094425916671753, | |
| "logits/rejected": -0.3124980330467224, | |
| "logps/chosen": -439.57025146484375, | |
| "logps/rejected": -453.1656799316406, | |
| "loss": 0.4585, | |
| "rewards/accuracies": 0.768750011920929, | |
| "rewards/chosen": -2.423621654510498, | |
| "rewards/margins": 1.1653788089752197, | |
| "rewards/rejected": -3.589000701904297, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 3.7421737665984807e-06, | |
| "logits/chosen": -0.32444941997528076, | |
| "logits/rejected": -0.3258149325847626, | |
| "logps/chosen": -444.17742919921875, | |
| "logps/rejected": -454.7518005371094, | |
| "loss": 0.485, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -2.465573310852051, | |
| "rewards/margins": 1.1133558750152588, | |
| "rewards/rejected": -3.5789291858673096, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 3.7185982917382986e-06, | |
| "logits/chosen": -0.32046034932136536, | |
| "logits/rejected": -0.3209912180900574, | |
| "logps/chosen": -449.0337829589844, | |
| "logps/rejected": -456.1290588378906, | |
| "loss": 0.5036, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -2.421260356903076, | |
| "rewards/margins": 1.0578956604003906, | |
| "rewards/rejected": -3.479156017303467, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 3.6948798089655913e-06, | |
| "logits/chosen": -0.3232346177101135, | |
| "logits/rejected": -0.3241461217403412, | |
| "logps/chosen": -448.1339416503906, | |
| "logps/rejected": -455.69970703125, | |
| "loss": 0.4664, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -2.2551751136779785, | |
| "rewards/margins": 1.1089966297149658, | |
| "rewards/rejected": -3.3641715049743652, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 3.671021101749476e-06, | |
| "logits/chosen": -0.3160512447357178, | |
| "logits/rejected": -0.3167613744735718, | |
| "logps/chosen": -434.97698974609375, | |
| "logps/rejected": -441.48065185546875, | |
| "loss": 0.4634, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -2.224177122116089, | |
| "rewards/margins": 1.158850908279419, | |
| "rewards/rejected": -3.383028507232666, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 3.6470249700150273e-06, | |
| "logits/chosen": -0.31829750537872314, | |
| "logits/rejected": -0.3188309669494629, | |
| "logps/chosen": -440.1014099121094, | |
| "logps/rejected": -448.751220703125, | |
| "loss": 0.4287, | |
| "rewards/accuracies": 0.793749988079071, | |
| "rewards/chosen": -2.001333713531494, | |
| "rewards/margins": 1.3394745588302612, | |
| "rewards/rejected": -3.340808153152466, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 3.6228942298146985e-06, | |
| "logits/chosen": -0.31696969270706177, | |
| "logits/rejected": -0.3185669183731079, | |
| "logps/chosen": -436.61090087890625, | |
| "logps/rejected": -446.8427734375, | |
| "loss": 0.4086, | |
| "rewards/accuracies": 0.824999988079071, | |
| "rewards/chosen": -1.968629240989685, | |
| "rewards/margins": 1.365235686302185, | |
| "rewards/rejected": -3.33386492729187, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 3.598631712997841e-06, | |
| "logits/chosen": -0.3232669234275818, | |
| "logits/rejected": -0.32362625002861023, | |
| "logps/chosen": -445.9930114746094, | |
| "logps/rejected": -456.0194396972656, | |
| "loss": 0.4797, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -2.1778838634490967, | |
| "rewards/margins": 1.1961848735809326, | |
| "rewards/rejected": -3.3740687370300293, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 3.5742402668783797e-06, | |
| "logits/chosen": -0.31457391381263733, | |
| "logits/rejected": -0.31524404883384705, | |
| "logps/chosen": -434.63885498046875, | |
| "logps/rejected": -445.6629943847656, | |
| "loss": 0.4942, | |
| "rewards/accuracies": 0.7437499761581421, | |
| "rewards/chosen": -2.235532283782959, | |
| "rewards/margins": 1.1538090705871582, | |
| "rewards/rejected": -3.389340877532959, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 3.549722753900662e-06, | |
| "logits/chosen": -0.3312085270881653, | |
| "logits/rejected": -0.33145731687545776, | |
| "logps/chosen": -451.101806640625, | |
| "logps/rejected": -457.88909912109375, | |
| "loss": 0.5859, | |
| "rewards/accuracies": 0.6937500238418579, | |
| "rewards/chosen": -2.525597095489502, | |
| "rewards/margins": 0.7067753672599792, | |
| "rewards/rejected": -3.232372283935547, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 3.5250820513035403e-06, | |
| "logits/chosen": -0.3225269615650177, | |
| "logits/rejected": -0.3232432007789612, | |
| "logps/chosen": -438.2666931152344, | |
| "logps/rejected": -450.084716796875, | |
| "loss": 0.4502, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -2.3596291542053223, | |
| "rewards/margins": 1.153564691543579, | |
| "rewards/rejected": -3.5131936073303223, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 3.500321050782717e-06, | |
| "logits/chosen": -0.3299608826637268, | |
| "logits/rejected": -0.33111685514450073, | |
| "logps/chosen": -435.506103515625, | |
| "logps/rejected": -449.41644287109375, | |
| "loss": 0.4587, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -2.255194664001465, | |
| "rewards/margins": 1.2196067571640015, | |
| "rewards/rejected": -3.474801540374756, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 3.4754426581513866e-06, | |
| "logits/chosen": -0.3299122750759125, | |
| "logits/rejected": -0.33067744970321655, | |
| "logps/chosen": -450.20074462890625, | |
| "logps/rejected": -456.9153747558594, | |
| "loss": 0.4929, | |
| "rewards/accuracies": 0.7562500238418579, | |
| "rewards/chosen": -2.382310390472412, | |
| "rewards/margins": 1.0834969282150269, | |
| "rewards/rejected": -3.4658074378967285, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 3.45044979299923e-06, | |
| "logits/chosen": -0.3264179527759552, | |
| "logits/rejected": -0.32756897807121277, | |
| "logps/chosen": -442.2974548339844, | |
| "logps/rejected": -449.1595764160156, | |
| "loss": 0.4977, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -2.263164520263672, | |
| "rewards/margins": 1.0983796119689941, | |
| "rewards/rejected": -3.361544370651245, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 3.425345388349787e-06, | |
| "logits/chosen": -0.31463193893432617, | |
| "logits/rejected": -0.31522423028945923, | |
| "logps/chosen": -442.2705078125, | |
| "logps/rejected": -452.19110107421875, | |
| "loss": 0.501, | |
| "rewards/accuracies": 0.7437499761581421, | |
| "rewards/chosen": -2.2270102500915527, | |
| "rewards/margins": 1.1489759683609009, | |
| "rewards/rejected": -3.3759865760803223, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "eval_logits/chosen": -0.39124199748039246, | |
| "eval_logits/rejected": -0.3919140696525574, | |
| "eval_logps/chosen": -434.10162353515625, | |
| "eval_logps/rejected": -443.24261474609375, | |
| "eval_loss": 0.4862767159938812, | |
| "eval_rewards/accuracies": 0.75, | |
| "eval_rewards/chosen": -2.1210429668426514, | |
| "eval_rewards/margins": 1.1053153276443481, | |
| "eval_rewards/rejected": -3.22635817527771, | |
| "eval_runtime": 375.0192, | |
| "eval_samples_per_second": 1.333, | |
| "eval_steps_per_second": 1.333, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 3.4001323903162476e-06, | |
| "logits/chosen": -0.32597848773002625, | |
| "logits/rejected": -0.32685333490371704, | |
| "logps/chosen": -435.82135009765625, | |
| "logps/rejected": -446.47552490234375, | |
| "loss": 0.4618, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -2.0256919860839844, | |
| "rewards/margins": 1.2204935550689697, | |
| "rewards/rejected": -3.246185302734375, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 3.3748137577557216e-06, | |
| "logits/chosen": -0.3275033235549927, | |
| "logits/rejected": -0.3280579149723053, | |
| "logps/chosen": -438.50384521484375, | |
| "logps/rejected": -447.7579040527344, | |
| "loss": 0.4531, | |
| "rewards/accuracies": 0.768750011920929, | |
| "rewards/chosen": -2.154592514038086, | |
| "rewards/margins": 1.1412467956542969, | |
| "rewards/rejected": -3.295839309692383, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 3.3493924619219964e-06, | |
| "logits/chosen": -0.3302023112773895, | |
| "logits/rejected": -0.33196666836738586, | |
| "logps/chosen": -454.8751525878906, | |
| "logps/rejected": -466.814453125, | |
| "loss": 0.4865, | |
| "rewards/accuracies": 0.731249988079071, | |
| "rewards/chosen": -2.2140915393829346, | |
| "rewards/margins": 1.0115987062454224, | |
| "rewards/rejected": -3.2256903648376465, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 3.3238714861168513e-06, | |
| "logits/chosen": -0.3286048173904419, | |
| "logits/rejected": -0.3293796181678772, | |
| "logps/chosen": -436.85308837890625, | |
| "logps/rejected": -445.0462951660156, | |
| "loss": 0.4905, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -2.0171353816986084, | |
| "rewards/margins": 1.1139663457870483, | |
| "rewards/rejected": -3.1311020851135254, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 3.29825382533995e-06, | |
| "logits/chosen": -0.3311420679092407, | |
| "logits/rejected": -0.3327622711658478, | |
| "logps/chosen": -444.5484924316406, | |
| "logps/rejected": -455.69732666015625, | |
| "loss": 0.5066, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -2.358177661895752, | |
| "rewards/margins": 0.9705084562301636, | |
| "rewards/rejected": -3.328686237335205, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 3.272542485937369e-06, | |
| "logits/chosen": -0.33226504921913147, | |
| "logits/rejected": -0.3331693708896637, | |
| "logps/chosen": -434.11248779296875, | |
| "logps/rejected": -441.95428466796875, | |
| "loss": 0.4827, | |
| "rewards/accuracies": 0.768750011920929, | |
| "rewards/chosen": -1.988227128982544, | |
| "rewards/margins": 1.1914219856262207, | |
| "rewards/rejected": -3.1796488761901855, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 3.2467404852487846e-06, | |
| "logits/chosen": -0.33789581060409546, | |
| "logits/rejected": -0.33837661147117615, | |
| "logps/chosen": -445.60009765625, | |
| "logps/rejected": -453.21380615234375, | |
| "loss": 0.4935, | |
| "rewards/accuracies": 0.706250011920929, | |
| "rewards/chosen": -1.8867080211639404, | |
| "rewards/margins": 1.2431962490081787, | |
| "rewards/rejected": -3.1299045085906982, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 3.2208508512533777e-06, | |
| "logits/chosen": -0.3227623403072357, | |
| "logits/rejected": -0.3246156573295593, | |
| "logps/chosen": -447.2259826660156, | |
| "logps/rejected": -456.6937561035156, | |
| "loss": 0.4514, | |
| "rewards/accuracies": 0.793749988079071, | |
| "rewards/chosen": -2.05145001411438, | |
| "rewards/margins": 1.0999400615692139, | |
| "rewards/rejected": -3.151390552520752, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 3.1948766222144863e-06, | |
| "logits/chosen": -0.32600507140159607, | |
| "logits/rejected": -0.3266277313232422, | |
| "logps/chosen": -434.4227600097656, | |
| "logps/rejected": -442.1160583496094, | |
| "loss": 0.5228, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -2.233891010284424, | |
| "rewards/margins": 0.8642382621765137, | |
| "rewards/rejected": -3.0981292724609375, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 3.168820846323053e-06, | |
| "logits/chosen": -0.3299737870693207, | |
| "logits/rejected": -0.3313831090927124, | |
| "logps/chosen": -434.67803955078125, | |
| "logps/rejected": -446.71417236328125, | |
| "loss": 0.4392, | |
| "rewards/accuracies": 0.768750011920929, | |
| "rewards/chosen": -2.0030479431152344, | |
| "rewards/margins": 1.1440895795822144, | |
| "rewards/rejected": -3.147137403488159, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 3.142686581339902e-06, | |
| "logits/chosen": -0.32545098662376404, | |
| "logits/rejected": -0.32752394676208496, | |
| "logps/chosen": -435.9081115722656, | |
| "logps/rejected": -445.0193786621094, | |
| "loss": 0.5154, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -2.0516555309295654, | |
| "rewards/margins": 1.0273317098617554, | |
| "rewards/rejected": -3.0789875984191895, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 3.1164768942369058e-06, | |
| "logits/chosen": -0.33717575669288635, | |
| "logits/rejected": -0.33777323365211487, | |
| "logps/chosen": -439.6886291503906, | |
| "logps/rejected": -450.8135681152344, | |
| "loss": 0.4056, | |
| "rewards/accuracies": 0.8187500238418579, | |
| "rewards/chosen": -1.7847926616668701, | |
| "rewards/margins": 1.3125979900360107, | |
| "rewards/rejected": -3.097390651702881, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 3.0901948608370503e-06, | |
| "logits/chosen": -0.3371260166168213, | |
| "logits/rejected": -0.33846548199653625, | |
| "logps/chosen": -436.64190673828125, | |
| "logps/rejected": -450.7660217285156, | |
| "loss": 0.4474, | |
| "rewards/accuracies": 0.793749988079071, | |
| "rewards/chosen": -1.9579681158065796, | |
| "rewards/margins": 1.1995084285736084, | |
| "rewards/rejected": -3.1574764251708984, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 3.063843565453486e-06, | |
| "logits/chosen": -0.3233332931995392, | |
| "logits/rejected": -0.3235628008842468, | |
| "logps/chosen": -441.6625061035156, | |
| "logps/rejected": -450.6896057128906, | |
| "loss": 0.4454, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": -2.0109899044036865, | |
| "rewards/margins": 1.2036950588226318, | |
| "rewards/rejected": -3.2146849632263184, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 3.0374261005275606e-06, | |
| "logits/chosen": -0.32744866609573364, | |
| "logits/rejected": -0.32873040437698364, | |
| "logps/chosen": -438.97955322265625, | |
| "logps/rejected": -452.37255859375, | |
| "loss": 0.4277, | |
| "rewards/accuracies": 0.7875000238418579, | |
| "rewards/chosen": -1.850262999534607, | |
| "rewards/margins": 1.45210862159729, | |
| "rewards/rejected": -3.3023715019226074, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 3.0109455662659126e-06, | |
| "logits/chosen": -0.33421364426612854, | |
| "logits/rejected": -0.33508172631263733, | |
| "logps/chosen": -438.8184509277344, | |
| "logps/rejected": -447.74267578125, | |
| "loss": 0.469, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -2.3082900047302246, | |
| "rewards/margins": 1.0502725839614868, | |
| "rewards/rejected": -3.358562469482422, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 2.984405070276646e-06, | |
| "logits/chosen": -0.3377315402030945, | |
| "logits/rejected": -0.3380245268344879, | |
| "logps/chosen": -440.62689208984375, | |
| "logps/rejected": -448.7757873535156, | |
| "loss": 0.4497, | |
| "rewards/accuracies": 0.7875000238418579, | |
| "rewards/chosen": -2.239384174346924, | |
| "rewards/margins": 1.1150033473968506, | |
| "rewards/rejected": -3.3543879985809326, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 2.9578077272046407e-06, | |
| "logits/chosen": -0.3324066698551178, | |
| "logits/rejected": -0.3327699303627014, | |
| "logps/chosen": -445.11651611328125, | |
| "logps/rejected": -452.57135009765625, | |
| "loss": 0.4627, | |
| "rewards/accuracies": 0.768750011920929, | |
| "rewards/chosen": -2.3683180809020996, | |
| "rewards/margins": 1.2064650058746338, | |
| "rewards/rejected": -3.5747828483581543, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 2.931156658366032e-06, | |
| "logits/chosen": -0.33288371562957764, | |
| "logits/rejected": -0.33407607674598694, | |
| "logps/chosen": -438.28363037109375, | |
| "logps/rejected": -449.0726623535156, | |
| "loss": 0.4609, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -2.4992074966430664, | |
| "rewards/margins": 1.1167179346084595, | |
| "rewards/rejected": -3.6159253120422363, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 2.9044549913819125e-06, | |
| "logits/chosen": -0.33773019909858704, | |
| "logits/rejected": -0.3393145203590393, | |
| "logps/chosen": -441.80511474609375, | |
| "logps/rejected": -450.90997314453125, | |
| "loss": 0.421, | |
| "rewards/accuracies": 0.8062499761581421, | |
| "rewards/chosen": -2.251107692718506, | |
| "rewards/margins": 1.231013536453247, | |
| "rewards/rejected": -3.482121706008911, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "eval_logits/chosen": -0.401915043592453, | |
| "eval_logits/rejected": -0.4025632441043854, | |
| "eval_logps/chosen": -436.2534484863281, | |
| "eval_logps/rejected": -445.6542053222656, | |
| "eval_loss": 0.4834233820438385, | |
| "eval_rewards/accuracies": 0.7580000162124634, | |
| "eval_rewards/chosen": -2.336226463317871, | |
| "eval_rewards/margins": 1.131289005279541, | |
| "eval_rewards/rejected": -3.4675159454345703, | |
| "eval_runtime": 373.3095, | |
| "eval_samples_per_second": 1.339, | |
| "eval_steps_per_second": 1.339, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 2.877705859811292e-06, | |
| "logits/chosen": -0.32963141798973083, | |
| "logits/rejected": -0.32958561182022095, | |
| "logps/chosen": -441.468017578125, | |
| "logps/rejected": -452.41632080078125, | |
| "loss": 0.4867, | |
| "rewards/accuracies": 0.7562500238418579, | |
| "rewards/chosen": -2.3174846172332764, | |
| "rewards/margins": 1.2222645282745361, | |
| "rewards/rejected": -3.5397496223449707, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 2.850912402783361e-06, | |
| "logits/chosen": -0.33581605553627014, | |
| "logits/rejected": -0.3373740315437317, | |
| "logps/chosen": -443.38507080078125, | |
| "logps/rejected": -455.705810546875, | |
| "loss": 0.4821, | |
| "rewards/accuracies": 0.768750011920929, | |
| "rewards/chosen": -2.5340211391448975, | |
| "rewards/margins": 1.059066653251648, | |
| "rewards/rejected": -3.593087673187256, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 2.8240777646290973e-06, | |
| "logits/chosen": -0.3432762026786804, | |
| "logits/rejected": -0.3442252576351166, | |
| "logps/chosen": -455.3641662597656, | |
| "logps/rejected": -465.02032470703125, | |
| "loss": 0.4363, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -2.3768112659454346, | |
| "rewards/margins": 1.3034956455230713, | |
| "rewards/rejected": -3.680307388305664, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 2.7972050945122666e-06, | |
| "logits/chosen": -0.3318456709384918, | |
| "logits/rejected": -0.33274808526039124, | |
| "logps/chosen": -442.74029541015625, | |
| "logps/rejected": -453.32745361328125, | |
| "loss": 0.4564, | |
| "rewards/accuracies": 0.793749988079071, | |
| "rewards/chosen": -2.556647300720215, | |
| "rewards/margins": 1.2236577272415161, | |
| "rewards/rejected": -3.7803051471710205, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 2.7702975460598545e-06, | |
| "logits/chosen": -0.33731141686439514, | |
| "logits/rejected": -0.33812469244003296, | |
| "logps/chosen": -445.42596435546875, | |
| "logps/rejected": -457.1685485839844, | |
| "loss": 0.4487, | |
| "rewards/accuracies": 0.768750011920929, | |
| "rewards/chosen": -2.542117118835449, | |
| "rewards/margins": 1.1616876125335693, | |
| "rewards/rejected": -3.7038047313690186, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 2.7433582769919752e-06, | |
| "logits/chosen": -0.3384588360786438, | |
| "logits/rejected": -0.33992061018943787, | |
| "logps/chosen": -448.994873046875, | |
| "logps/rejected": -456.4767150878906, | |
| "loss": 0.5548, | |
| "rewards/accuracies": 0.706250011920929, | |
| "rewards/chosen": -2.7594006061553955, | |
| "rewards/margins": 0.9322620630264282, | |
| "rewards/rejected": -3.691662549972534, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 2.716390448751294e-06, | |
| "logits/chosen": -0.34274882078170776, | |
| "logits/rejected": -0.34329262375831604, | |
| "logps/chosen": -450.77972412109375, | |
| "logps/rejected": -461.76239013671875, | |
| "loss": 0.4976, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -2.5453426837921143, | |
| "rewards/margins": 1.080673336982727, | |
| "rewards/rejected": -3.6260154247283936, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 2.6893972261320265e-06, | |
| "logits/chosen": -0.3363896608352661, | |
| "logits/rejected": -0.33778852224349976, | |
| "logps/chosen": -442.7216796875, | |
| "logps/rejected": -453.9684143066406, | |
| "loss": 0.4628, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -2.584522008895874, | |
| "rewards/margins": 1.2380025386810303, | |
| "rewards/rejected": -3.8225245475769043, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 2.6623817769085268e-06, | |
| "logits/chosen": -0.3299495577812195, | |
| "logits/rejected": -0.3310778737068176, | |
| "logps/chosen": -438.0104064941406, | |
| "logps/rejected": -450.68572998046875, | |
| "loss": 0.4308, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -2.4091532230377197, | |
| "rewards/margins": 1.258310079574585, | |
| "rewards/rejected": -3.6674628257751465, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 2.6353472714635443e-06, | |
| "logits/chosen": -0.3383990526199341, | |
| "logits/rejected": -0.33991554379463196, | |
| "logps/chosen": -453.71038818359375, | |
| "logps/rejected": -466.170166015625, | |
| "loss": 0.4603, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -2.7626547813415527, | |
| "rewards/margins": 1.127687692642212, | |
| "rewards/rejected": -3.8903422355651855, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 2.6082968824161558e-06, | |
| "logits/chosen": -0.3404627740383148, | |
| "logits/rejected": -0.3412095606327057, | |
| "logps/chosen": -446.44281005859375, | |
| "logps/rejected": -454.9615783691406, | |
| "loss": 0.4887, | |
| "rewards/accuracies": 0.7437499761581421, | |
| "rewards/chosen": -2.626864194869995, | |
| "rewards/margins": 1.2109944820404053, | |
| "rewards/rejected": -3.8378589153289795, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 2.5812337842494517e-06, | |
| "logits/chosen": -0.3334888815879822, | |
| "logits/rejected": -0.334361732006073, | |
| "logps/chosen": -437.97979736328125, | |
| "logps/rejected": -449.66864013671875, | |
| "loss": 0.4395, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -2.6739344596862793, | |
| "rewards/margins": 1.2408138513565063, | |
| "rewards/rejected": -3.914747953414917, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 2.554161152937994e-06, | |
| "logits/chosen": -0.34664058685302734, | |
| "logits/rejected": -0.34752577543258667, | |
| "logps/chosen": -452.38983154296875, | |
| "logps/rejected": -458.98046875, | |
| "loss": 0.46, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -2.5840182304382324, | |
| "rewards/margins": 1.3410053253173828, | |
| "rewards/rejected": -3.9250235557556152, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 2.5270821655750997e-06, | |
| "logits/chosen": -0.3402210772037506, | |
| "logits/rejected": -0.3408128619194031, | |
| "logps/chosen": -452.06658935546875, | |
| "logps/rejected": -465.1114807128906, | |
| "loss": 0.383, | |
| "rewards/accuracies": 0.8187500238418579, | |
| "rewards/chosen": -2.483328342437744, | |
| "rewards/margins": 1.4499397277832031, | |
| "rewards/rejected": -3.9332680702209473, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 2.5e-06, | |
| "logits/chosen": -0.33848652243614197, | |
| "logits/rejected": -0.3391149640083313, | |
| "logps/chosen": -447.24407958984375, | |
| "logps/rejected": -456.50933837890625, | |
| "loss": 0.4384, | |
| "rewards/accuracies": 0.7875000238418579, | |
| "rewards/chosen": -2.932450294494629, | |
| "rewards/margins": 1.1602147817611694, | |
| "rewards/rejected": -4.09266471862793, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 2.4729178344249007e-06, | |
| "logits/chosen": -0.34805721044540405, | |
| "logits/rejected": -0.34990328550338745, | |
| "logps/chosen": -457.77520751953125, | |
| "logps/rejected": -467.7879943847656, | |
| "loss": 0.4306, | |
| "rewards/accuracies": 0.8062499761581421, | |
| "rewards/chosen": -2.852219343185425, | |
| "rewards/margins": 1.3265211582183838, | |
| "rewards/rejected": -4.178740501403809, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 2.4458388470620066e-06, | |
| "logits/chosen": -0.34960517287254333, | |
| "logits/rejected": -0.35107699036598206, | |
| "logps/chosen": -457.14569091796875, | |
| "logps/rejected": -467.239990234375, | |
| "loss": 0.4444, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -2.756500720977783, | |
| "rewards/margins": 1.3199396133422852, | |
| "rewards/rejected": -4.07643985748291, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 2.418766215750549e-06, | |
| "logits/chosen": -0.3384454548358917, | |
| "logits/rejected": -0.3394390642642975, | |
| "logps/chosen": -455.9664001464844, | |
| "logps/rejected": -467.4884338378906, | |
| "loss": 0.4289, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -2.917130708694458, | |
| "rewards/margins": 1.3165969848632812, | |
| "rewards/rejected": -4.23372745513916, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 2.3917031175838447e-06, | |
| "logits/chosen": -0.33930128812789917, | |
| "logits/rejected": -0.33957165479660034, | |
| "logps/chosen": -452.30548095703125, | |
| "logps/rejected": -467.23614501953125, | |
| "loss": 0.4339, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -2.8669447898864746, | |
| "rewards/margins": 1.3872116804122925, | |
| "rewards/rejected": -4.254156589508057, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 2.3646527285364565e-06, | |
| "logits/chosen": -0.33700358867645264, | |
| "logits/rejected": -0.33825331926345825, | |
| "logps/chosen": -451.98272705078125, | |
| "logps/rejected": -461.351318359375, | |
| "loss": 0.4821, | |
| "rewards/accuracies": 0.793749988079071, | |
| "rewards/chosen": -3.0442519187927246, | |
| "rewards/margins": 1.0982847213745117, | |
| "rewards/rejected": -4.1425371170043945, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "eval_logits/chosen": -0.41185611486434937, | |
| "eval_logits/rejected": -0.41246527433395386, | |
| "eval_logps/chosen": -441.00274658203125, | |
| "eval_logps/rejected": -451.21136474609375, | |
| "eval_loss": 0.48274433612823486, | |
| "eval_rewards/accuracies": 0.7620000243186951, | |
| "eval_rewards/chosen": -2.811156749725342, | |
| "eval_rewards/margins": 1.2120723724365234, | |
| "eval_rewards/rejected": -4.023228645324707, | |
| "eval_runtime": 376.6555, | |
| "eval_samples_per_second": 1.327, | |
| "eval_steps_per_second": 1.327, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 2.3376182230914728e-06, | |
| "logits/chosen": -0.35231637954711914, | |
| "logits/rejected": -0.3524485230445862, | |
| "logps/chosen": -450.71600341796875, | |
| "logps/rejected": -459.95623779296875, | |
| "loss": 0.4562, | |
| "rewards/accuracies": 0.7562500238418579, | |
| "rewards/chosen": -2.791792869567871, | |
| "rewards/margins": 1.3087048530578613, | |
| "rewards/rejected": -4.100497245788574, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 2.3106027738679743e-06, | |
| "logits/chosen": -0.3403882086277008, | |
| "logits/rejected": -0.34152495861053467, | |
| "logps/chosen": -453.09197998046875, | |
| "logps/rejected": -461.7265625, | |
| "loss": 0.5492, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -2.994368076324463, | |
| "rewards/margins": 0.9577304124832153, | |
| "rewards/rejected": -3.9520981311798096, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 2.2836095512487063e-06, | |
| "logits/chosen": -0.34236225485801697, | |
| "logits/rejected": -0.3437976539134979, | |
| "logps/chosen": -448.03765869140625, | |
| "logps/rejected": -458.0298767089844, | |
| "loss": 0.4769, | |
| "rewards/accuracies": 0.7437499761581421, | |
| "rewards/chosen": -2.8009488582611084, | |
| "rewards/margins": 1.1950900554656982, | |
| "rewards/rejected": -3.9960389137268066, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 2.256641723008026e-06, | |
| "logits/chosen": -0.3453958332538605, | |
| "logits/rejected": -0.34628570079803467, | |
| "logps/chosen": -452.4602966308594, | |
| "logps/rejected": -464.2635192871094, | |
| "loss": 0.4904, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": -2.8692307472229004, | |
| "rewards/margins": 1.1883941888809204, | |
| "rewards/rejected": -4.057624816894531, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 2.2297024539401463e-06, | |
| "logits/chosen": -0.3422110974788666, | |
| "logits/rejected": -0.34265169501304626, | |
| "logps/chosen": -459.0148010253906, | |
| "logps/rejected": -469.46038818359375, | |
| "loss": 0.4726, | |
| "rewards/accuracies": 0.768750011920929, | |
| "rewards/chosen": -3.0340380668640137, | |
| "rewards/margins": 1.1149537563323975, | |
| "rewards/rejected": -4.148991584777832, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 2.2027949054877342e-06, | |
| "logits/chosen": -0.34315139055252075, | |
| "logits/rejected": -0.3437284529209137, | |
| "logps/chosen": -448.80657958984375, | |
| "logps/rejected": -458.0669860839844, | |
| "loss": 0.5145, | |
| "rewards/accuracies": 0.731249988079071, | |
| "rewards/chosen": -2.8165078163146973, | |
| "rewards/margins": 1.120755672454834, | |
| "rewards/rejected": -3.9372634887695312, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 2.175922235370904e-06, | |
| "logits/chosen": -0.34890785813331604, | |
| "logits/rejected": -0.34955543279647827, | |
| "logps/chosen": -448.3866271972656, | |
| "logps/rejected": -457.5038146972656, | |
| "loss": 0.4845, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -2.5519251823425293, | |
| "rewards/margins": 1.266904592514038, | |
| "rewards/rejected": -3.8188300132751465, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 2.1490875972166394e-06, | |
| "logits/chosen": -0.3498338460922241, | |
| "logits/rejected": -0.35048046708106995, | |
| "logps/chosen": -449.01849365234375, | |
| "logps/rejected": -459.8980407714844, | |
| "loss": 0.3836, | |
| "rewards/accuracies": 0.856249988079071, | |
| "rewards/chosen": -2.5593769550323486, | |
| "rewards/margins": 1.4853286743164062, | |
| "rewards/rejected": -4.044705390930176, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 2.1222941401887087e-06, | |
| "logits/chosen": -0.3391914367675781, | |
| "logits/rejected": -0.3401142954826355, | |
| "logps/chosen": -437.19488525390625, | |
| "logps/rejected": -449.09820556640625, | |
| "loss": 0.4638, | |
| "rewards/accuracies": 0.7562500238418579, | |
| "rewards/chosen": -2.642850875854492, | |
| "rewards/margins": 1.1874374151229858, | |
| "rewards/rejected": -3.8302879333496094, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 2.0955450086180883e-06, | |
| "logits/chosen": -0.3401223123073578, | |
| "logits/rejected": -0.3409723937511444, | |
| "logps/chosen": -453.819580078125, | |
| "logps/rejected": -463.77117919921875, | |
| "loss": 0.4747, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -2.682774066925049, | |
| "rewards/margins": 1.2849785089492798, | |
| "rewards/rejected": -3.9677529335021973, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 2.0688433416339694e-06, | |
| "logits/chosen": -0.3425321877002716, | |
| "logits/rejected": -0.3435406982898712, | |
| "logps/chosen": -441.6337890625, | |
| "logps/rejected": -454.7735290527344, | |
| "loss": 0.4359, | |
| "rewards/accuracies": 0.768750011920929, | |
| "rewards/chosen": -2.900296688079834, | |
| "rewards/margins": 1.1836225986480713, | |
| "rewards/rejected": -4.083919525146484, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 2.0421922727953597e-06, | |
| "logits/chosen": -0.3457149863243103, | |
| "logits/rejected": -0.3468255400657654, | |
| "logps/chosen": -449.11700439453125, | |
| "logps/rejected": -461.40020751953125, | |
| "loss": 0.4626, | |
| "rewards/accuracies": 0.7437499761581421, | |
| "rewards/chosen": -2.759221315383911, | |
| "rewards/margins": 1.2033522129058838, | |
| "rewards/rejected": -3.962573528289795, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 2.0155949297233542e-06, | |
| "logits/chosen": -0.3487555980682373, | |
| "logits/rejected": -0.34981435537338257, | |
| "logps/chosen": -461.87481689453125, | |
| "logps/rejected": -473.541015625, | |
| "loss": 0.4555, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -2.762120008468628, | |
| "rewards/margins": 1.2758208513259888, | |
| "rewards/rejected": -4.037940979003906, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 1.9890544337340882e-06, | |
| "logits/chosen": -0.3474620282649994, | |
| "logits/rejected": -0.34911760687828064, | |
| "logps/chosen": -446.1351623535156, | |
| "logps/rejected": -461.01678466796875, | |
| "loss": 0.4426, | |
| "rewards/accuracies": 0.7875000238418579, | |
| "rewards/chosen": -2.9118289947509766, | |
| "rewards/margins": 1.271337866783142, | |
| "rewards/rejected": -4.183166980743408, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 1.96257389947244e-06, | |
| "logits/chosen": -0.34583669900894165, | |
| "logits/rejected": -0.3470597565174103, | |
| "logps/chosen": -445.00054931640625, | |
| "logps/rejected": -457.888671875, | |
| "loss": 0.4487, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -2.823984146118164, | |
| "rewards/margins": 1.3737802505493164, | |
| "rewards/rejected": -4.1977643966674805, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 1.936156434546515e-06, | |
| "logits/chosen": -0.3472025990486145, | |
| "logits/rejected": -0.3478149473667145, | |
| "logps/chosen": -450.0955505371094, | |
| "logps/rejected": -459.27166748046875, | |
| "loss": 0.5015, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -2.8728580474853516, | |
| "rewards/margins": 1.2571897506713867, | |
| "rewards/rejected": -4.130047798156738, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 1.90980513916295e-06, | |
| "logits/chosen": -0.3443449139595032, | |
| "logits/rejected": -0.3453408479690552, | |
| "logps/chosen": -450.039306640625, | |
| "logps/rejected": -456.46392822265625, | |
| "loss": 0.4463, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -2.8687210083007812, | |
| "rewards/margins": 1.327781081199646, | |
| "rewards/rejected": -4.196502208709717, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 1.8835231057630955e-06, | |
| "logits/chosen": -0.34365350008010864, | |
| "logits/rejected": -0.34461337327957153, | |
| "logps/chosen": -454.1045837402344, | |
| "logps/rejected": -468.08251953125, | |
| "loss": 0.3981, | |
| "rewards/accuracies": 0.824999988079071, | |
| "rewards/chosen": -2.681962490081787, | |
| "rewards/margins": 1.4462287425994873, | |
| "rewards/rejected": -4.128190517425537, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 1.8573134186600978e-06, | |
| "logits/chosen": -0.3493928909301758, | |
| "logits/rejected": -0.35027194023132324, | |
| "logps/chosen": -447.32666015625, | |
| "logps/rejected": -458.9419860839844, | |
| "loss": 0.4397, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -2.6235809326171875, | |
| "rewards/margins": 1.4347044229507446, | |
| "rewards/rejected": -4.058285236358643, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 1.8311791536769485e-06, | |
| "logits/chosen": -0.346055805683136, | |
| "logits/rejected": -0.3475271463394165, | |
| "logps/chosen": -442.3778381347656, | |
| "logps/rejected": -458.1031188964844, | |
| "loss": 0.3935, | |
| "rewards/accuracies": 0.8187500238418579, | |
| "rewards/chosen": -2.6405744552612305, | |
| "rewards/margins": 1.582415223121643, | |
| "rewards/rejected": -4.222989559173584, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "eval_logits/chosen": -0.41737592220306396, | |
| "eval_logits/rejected": -0.4179980754852295, | |
| "eval_logps/chosen": -440.4134826660156, | |
| "eval_logps/rejected": -450.7027893066406, | |
| "eval_loss": 0.47837841510772705, | |
| "eval_rewards/accuracies": 0.7599999904632568, | |
| "eval_rewards/chosen": -2.7522289752960205, | |
| "eval_rewards/margins": 1.220139503479004, | |
| "eval_rewards/rejected": -3.9723684787750244, | |
| "eval_runtime": 351.6535, | |
| "eval_samples_per_second": 1.422, | |
| "eval_steps_per_second": 1.422, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 1.805123377785515e-06, | |
| "logits/chosen": -0.3527616858482361, | |
| "logits/rejected": -0.3528694212436676, | |
| "logps/chosen": -444.4476623535156, | |
| "logps/rejected": -453.0213317871094, | |
| "loss": 0.4432, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": -2.709862232208252, | |
| "rewards/margins": 1.3173949718475342, | |
| "rewards/rejected": -4.027257442474365, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 1.7791491487466234e-06, | |
| "logits/chosen": -0.3477206528186798, | |
| "logits/rejected": -0.34793621301651, | |
| "logps/chosen": -444.4949645996094, | |
| "logps/rejected": -456.147705078125, | |
| "loss": 0.4933, | |
| "rewards/accuracies": 0.731249988079071, | |
| "rewards/chosen": -2.9446983337402344, | |
| "rewards/margins": 1.1086232662200928, | |
| "rewards/rejected": -4.053321361541748, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 1.7532595147512167e-06, | |
| "logits/chosen": -0.34836429357528687, | |
| "logits/rejected": -0.34931057691574097, | |
| "logps/chosen": -448.5811462402344, | |
| "logps/rejected": -460.9894104003906, | |
| "loss": 0.4243, | |
| "rewards/accuracies": 0.8062499761581421, | |
| "rewards/chosen": -2.7561140060424805, | |
| "rewards/margins": 1.3816736936569214, | |
| "rewards/rejected": -4.137787818908691, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 1.7274575140626318e-06, | |
| "logits/chosen": -0.359462171792984, | |
| "logits/rejected": -0.36063042283058167, | |
| "logps/chosen": -448.9486389160156, | |
| "logps/rejected": -458.26776123046875, | |
| "loss": 0.4759, | |
| "rewards/accuracies": 0.7437499761581421, | |
| "rewards/chosen": -2.6954994201660156, | |
| "rewards/margins": 1.2426683902740479, | |
| "rewards/rejected": -3.9381680488586426, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 1.7017461746600506e-06, | |
| "logits/chosen": -0.3540958762168884, | |
| "logits/rejected": -0.3554149866104126, | |
| "logps/chosen": -442.2723083496094, | |
| "logps/rejected": -452.81951904296875, | |
| "loss": 0.479, | |
| "rewards/accuracies": 0.7437499761581421, | |
| "rewards/chosen": -2.743239402770996, | |
| "rewards/margins": 1.2077919244766235, | |
| "rewards/rejected": -3.95103120803833, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 1.6761285138831493e-06, | |
| "logits/chosen": -0.3558579981327057, | |
| "logits/rejected": -0.35607069730758667, | |
| "logps/chosen": -448.01458740234375, | |
| "logps/rejected": -458.3499450683594, | |
| "loss": 0.4367, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -2.706444025039673, | |
| "rewards/margins": 1.3273353576660156, | |
| "rewards/rejected": -4.033779144287109, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 1.6506075380780043e-06, | |
| "logits/chosen": -0.343932569026947, | |
| "logits/rejected": -0.3449569046497345, | |
| "logps/chosen": -449.41534423828125, | |
| "logps/rejected": -461.0784606933594, | |
| "loss": 0.4612, | |
| "rewards/accuracies": 0.7437499761581421, | |
| "rewards/chosen": -2.849799633026123, | |
| "rewards/margins": 1.247933030128479, | |
| "rewards/rejected": -4.0977325439453125, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 1.625186242244279e-06, | |
| "logits/chosen": -0.351362407207489, | |
| "logits/rejected": -0.35285985469818115, | |
| "logps/chosen": -442.25335693359375, | |
| "logps/rejected": -452.58526611328125, | |
| "loss": 0.4487, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -2.7264816761016846, | |
| "rewards/margins": 1.3034883737564087, | |
| "rewards/rejected": -4.029970169067383, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 1.5998676096837534e-06, | |
| "logits/chosen": -0.35466188192367554, | |
| "logits/rejected": -0.35623863339424133, | |
| "logps/chosen": -455.30859375, | |
| "logps/rejected": -466.81817626953125, | |
| "loss": 0.4525, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -2.76641845703125, | |
| "rewards/margins": 1.3434927463531494, | |
| "rewards/rejected": -4.1099114418029785, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 1.574654611650214e-06, | |
| "logits/chosen": -0.353823721408844, | |
| "logits/rejected": -0.3546674847602844, | |
| "logps/chosen": -448.30615234375, | |
| "logps/rejected": -462.4393005371094, | |
| "loss": 0.4049, | |
| "rewards/accuracies": 0.793749988079071, | |
| "rewards/chosen": -2.676255226135254, | |
| "rewards/margins": 1.421419382095337, | |
| "rewards/rejected": -4.097674369812012, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 1.54955020700077e-06, | |
| "logits/chosen": -0.35255804657936096, | |
| "logits/rejected": -0.35378915071487427, | |
| "logps/chosen": -442.2880859375, | |
| "logps/rejected": -454.832763671875, | |
| "loss": 0.4771, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": -2.8402438163757324, | |
| "rewards/margins": 1.2248531579971313, | |
| "rewards/rejected": -4.065096855163574, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 1.5245573418486136e-06, | |
| "logits/chosen": -0.35058295726776123, | |
| "logits/rejected": -0.3520324230194092, | |
| "logps/chosen": -451.47265625, | |
| "logps/rejected": -462.79791259765625, | |
| "loss": 0.4615, | |
| "rewards/accuracies": 0.7562500238418579, | |
| "rewards/chosen": -2.648658037185669, | |
| "rewards/margins": 1.3944532871246338, | |
| "rewards/rejected": -4.043111324310303, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 1.4996789492172836e-06, | |
| "logits/chosen": -0.35444819927215576, | |
| "logits/rejected": -0.35484084486961365, | |
| "logps/chosen": -447.3772888183594, | |
| "logps/rejected": -457.873291015625, | |
| "loss": 0.4392, | |
| "rewards/accuracies": 0.7875000238418579, | |
| "rewards/chosen": -2.671096086502075, | |
| "rewards/margins": 1.3712053298950195, | |
| "rewards/rejected": -4.042301654815674, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 1.4749179486964599e-06, | |
| "logits/chosen": -0.3643060028553009, | |
| "logits/rejected": -0.3653911054134369, | |
| "logps/chosen": -452.032470703125, | |
| "logps/rejected": -464.42193603515625, | |
| "loss": 0.4286, | |
| "rewards/accuracies": 0.7875000238418579, | |
| "rewards/chosen": -2.5244593620300293, | |
| "rewards/margins": 1.475843071937561, | |
| "rewards/rejected": -4.000302314758301, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 1.4502772460993387e-06, | |
| "logits/chosen": -0.35049787163734436, | |
| "logits/rejected": -0.3510446846485138, | |
| "logps/chosen": -448.87518310546875, | |
| "logps/rejected": -457.3994140625, | |
| "loss": 0.491, | |
| "rewards/accuracies": 0.731249988079071, | |
| "rewards/chosen": -2.827812910079956, | |
| "rewards/margins": 1.2428455352783203, | |
| "rewards/rejected": -4.0706586837768555, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 1.4257597331216211e-06, | |
| "logits/chosen": -0.3531518578529358, | |
| "logits/rejected": -0.3538290858268738, | |
| "logps/chosen": -456.27691650390625, | |
| "logps/rejected": -466.98687744140625, | |
| "loss": 0.4657, | |
| "rewards/accuracies": 0.768750011920929, | |
| "rewards/chosen": -3.0542407035827637, | |
| "rewards/margins": 1.164147138595581, | |
| "rewards/rejected": -4.218388080596924, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 1.4013682870021594e-06, | |
| "logits/chosen": -0.35849729180336, | |
| "logits/rejected": -0.3595832884311676, | |
| "logps/chosen": -447.4246520996094, | |
| "logps/rejected": -460.0209045410156, | |
| "loss": 0.3725, | |
| "rewards/accuracies": 0.831250011920929, | |
| "rewards/chosen": -2.7747585773468018, | |
| "rewards/margins": 1.452606439590454, | |
| "rewards/rejected": -4.227365016937256, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 1.3771057701853034e-06, | |
| "logits/chosen": -0.35135719180107117, | |
| "logits/rejected": -0.3521498739719391, | |
| "logps/chosen": -455.69549560546875, | |
| "logps/rejected": -467.37591552734375, | |
| "loss": 0.4899, | |
| "rewards/accuracies": 0.7875000238418579, | |
| "rewards/chosen": -2.884308338165283, | |
| "rewards/margins": 1.336925983428955, | |
| "rewards/rejected": -4.221234321594238, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 1.352975029984974e-06, | |
| "logits/chosen": -0.3514239192008972, | |
| "logits/rejected": -0.35260799527168274, | |
| "logps/chosen": -441.4317932128906, | |
| "logps/rejected": -454.58251953125, | |
| "loss": 0.4829, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": -2.8820691108703613, | |
| "rewards/margins": 1.2067675590515137, | |
| "rewards/rejected": -4.088836669921875, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 1.328978898250525e-06, | |
| "logits/chosen": -0.3527238070964813, | |
| "logits/rejected": -0.3534066379070282, | |
| "logps/chosen": -452.95989990234375, | |
| "logps/rejected": -464.698486328125, | |
| "loss": 0.4476, | |
| "rewards/accuracies": 0.793749988079071, | |
| "rewards/chosen": -2.7672972679138184, | |
| "rewards/margins": 1.3264329433441162, | |
| "rewards/rejected": -4.093730926513672, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "eval_logits/chosen": -0.42317765951156616, | |
| "eval_logits/rejected": -0.42379918694496155, | |
| "eval_logps/chosen": -441.13116455078125, | |
| "eval_logps/rejected": -451.5594177246094, | |
| "eval_loss": 0.4796808958053589, | |
| "eval_rewards/accuracies": 0.7559999823570251, | |
| "eval_rewards/chosen": -2.823995590209961, | |
| "eval_rewards/margins": 1.234041452407837, | |
| "eval_rewards/rejected": -4.058037281036377, | |
| "eval_runtime": 351.7707, | |
| "eval_samples_per_second": 1.421, | |
| "eval_steps_per_second": 1.421, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 1.305120191034409e-06, | |
| "logits/chosen": -0.34321507811546326, | |
| "logits/rejected": -0.343815416097641, | |
| "logps/chosen": -443.4376525878906, | |
| "logps/rejected": -452.2301330566406, | |
| "loss": 0.4223, | |
| "rewards/accuracies": 0.793749988079071, | |
| "rewards/chosen": -2.8989710807800293, | |
| "rewards/margins": 1.322284460067749, | |
| "rewards/rejected": -4.221255302429199, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 1.2814017082617025e-06, | |
| "logits/chosen": -0.3508697748184204, | |
| "logits/rejected": -0.35247209668159485, | |
| "logps/chosen": -444.38641357421875, | |
| "logps/rejected": -456.51153564453125, | |
| "loss": 0.4284, | |
| "rewards/accuracies": 0.7437499761581421, | |
| "rewards/chosen": -2.6340365409851074, | |
| "rewards/margins": 1.4381511211395264, | |
| "rewards/rejected": -4.072187900543213, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 1.2578262334015201e-06, | |
| "logits/chosen": -0.34914684295654297, | |
| "logits/rejected": -0.35076671838760376, | |
| "logps/chosen": -441.771728515625, | |
| "logps/rejected": -457.03057861328125, | |
| "loss": 0.4234, | |
| "rewards/accuracies": 0.8187500238418579, | |
| "rewards/chosen": -2.6355042457580566, | |
| "rewards/margins": 1.4987059831619263, | |
| "rewards/rejected": -4.134210109710693, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 1.234396533140365e-06, | |
| "logits/chosen": -0.3611491024494171, | |
| "logits/rejected": -0.3617832660675049, | |
| "logps/chosen": -454.31951904296875, | |
| "logps/rejected": -467.4290466308594, | |
| "loss": 0.435, | |
| "rewards/accuracies": 0.7875000238418579, | |
| "rewards/chosen": -2.7407212257385254, | |
| "rewards/margins": 1.4744737148284912, | |
| "rewards/rejected": -4.215195178985596, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 1.2111153570574454e-06, | |
| "logits/chosen": -0.35015982389450073, | |
| "logits/rejected": -0.35119912028312683, | |
| "logps/chosen": -446.706787109375, | |
| "logps/rejected": -461.330810546875, | |
| "loss": 0.4095, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -2.59765625, | |
| "rewards/margins": 1.5183773040771484, | |
| "rewards/rejected": -4.116034030914307, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 1.187985437301999e-06, | |
| "logits/chosen": -0.35530123114585876, | |
| "logits/rejected": -0.35578909516334534, | |
| "logps/chosen": -438.29974365234375, | |
| "logps/rejected": -452.89483642578125, | |
| "loss": 0.4416, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": -2.729788303375244, | |
| "rewards/margins": 1.507673978805542, | |
| "rewards/rejected": -4.237462043762207, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 1.1650094882726599e-06, | |
| "logits/chosen": -0.36762434244155884, | |
| "logits/rejected": -0.36925989389419556, | |
| "logps/chosen": -455.1209411621094, | |
| "logps/rejected": -469.49920654296875, | |
| "loss": 0.4061, | |
| "rewards/accuracies": 0.793749988079071, | |
| "rewards/chosen": -2.913037061691284, | |
| "rewards/margins": 1.4348491430282593, | |
| "rewards/rejected": -4.347886562347412, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 1.1421902062989178e-06, | |
| "logits/chosen": -0.3690846264362335, | |
| "logits/rejected": -0.3703765869140625, | |
| "logps/chosen": -451.37860107421875, | |
| "logps/rejected": -462.94293212890625, | |
| "loss": 0.4399, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -2.7418205738067627, | |
| "rewards/margins": 1.415470838546753, | |
| "rewards/rejected": -4.157290935516357, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 1.1195302693246879e-06, | |
| "logits/chosen": -0.34830474853515625, | |
| "logits/rejected": -0.34976112842559814, | |
| "logps/chosen": -447.49261474609375, | |
| "logps/rejected": -460.62994384765625, | |
| "loss": 0.4744, | |
| "rewards/accuracies": 0.7562500238418579, | |
| "rewards/chosen": -2.9276537895202637, | |
| "rewards/margins": 1.2483450174331665, | |
| "rewards/rejected": -4.175999164581299, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 1.0970323365940443e-06, | |
| "logits/chosen": -0.358784556388855, | |
| "logits/rejected": -0.35958269238471985, | |
| "logps/chosen": -449.94482421875, | |
| "logps/rejected": -461.4793395996094, | |
| "loss": 0.456, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": -2.9514570236206055, | |
| "rewards/margins": 1.3440725803375244, | |
| "rewards/rejected": -4.295529365539551, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 1.0746990483391414e-06, | |
| "logits/chosen": -0.3496165871620178, | |
| "logits/rejected": -0.3507440388202667, | |
| "logps/chosen": -453.05755615234375, | |
| "logps/rejected": -464.02978515625, | |
| "loss": 0.429, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -2.848665237426758, | |
| "rewards/margins": 1.313674807548523, | |
| "rewards/rejected": -4.162339687347412, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 1.052533025470379e-06, | |
| "logits/chosen": -0.3463028073310852, | |
| "logits/rejected": -0.34690287709236145, | |
| "logps/chosen": -443.31134033203125, | |
| "logps/rejected": -455.8688049316406, | |
| "loss": 0.4229, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -2.8174338340759277, | |
| "rewards/margins": 1.4426196813583374, | |
| "rewards/rejected": -4.260054111480713, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 1.0305368692688175e-06, | |
| "logits/chosen": -0.3607487082481384, | |
| "logits/rejected": -0.36121565103530884, | |
| "logps/chosen": -459.01824951171875, | |
| "logps/rejected": -472.31884765625, | |
| "loss": 0.4488, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -2.872823715209961, | |
| "rewards/margins": 1.4165146350860596, | |
| "rewards/rejected": -4.2893385887146, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 1.0087131610809153e-06, | |
| "logits/chosen": -0.34994029998779297, | |
| "logits/rejected": -0.35072094202041626, | |
| "logps/chosen": -442.97589111328125, | |
| "logps/rejected": -453.756591796875, | |
| "loss": 0.555, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -3.1791205406188965, | |
| "rewards/margins": 1.0200657844543457, | |
| "rewards/rejected": -4.1991868019104, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 9.870644620155878e-07, | |
| "logits/chosen": -0.35871225595474243, | |
| "logits/rejected": -0.35941624641418457, | |
| "logps/chosen": -454.1318359375, | |
| "logps/rejected": -464.7295837402344, | |
| "loss": 0.4462, | |
| "rewards/accuracies": 0.793749988079071, | |
| "rewards/chosen": -2.9613711833953857, | |
| "rewards/margins": 1.3269731998443604, | |
| "rewards/rejected": -4.288344383239746, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 9.655933126436565e-07, | |
| "logits/chosen": -0.3492319583892822, | |
| "logits/rejected": -0.3505721092224121, | |
| "logps/chosen": -444.28515625, | |
| "logps/rejected": -456.6302185058594, | |
| "loss": 0.4471, | |
| "rewards/accuracies": 0.768750011920929, | |
| "rewards/chosen": -2.847562313079834, | |
| "rewards/margins": 1.3418700695037842, | |
| "rewards/rejected": -4.1894330978393555, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 9.443022326996984e-07, | |
| "logits/chosen": -0.354257732629776, | |
| "logits/rejected": -0.35464176535606384, | |
| "logps/chosen": -444.35089111328125, | |
| "logps/rejected": -453.5, | |
| "loss": 0.4514, | |
| "rewards/accuracies": 0.793749988079071, | |
| "rewards/chosen": -2.9952127933502197, | |
| "rewards/margins": 1.3154346942901611, | |
| "rewards/rejected": -4.310647487640381, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 9.231937207863459e-07, | |
| "logits/chosen": -0.35797202587127686, | |
| "logits/rejected": -0.3591151833534241, | |
| "logps/chosen": -446.80487060546875, | |
| "logps/rejected": -460.2659606933594, | |
| "loss": 0.4346, | |
| "rewards/accuracies": 0.7875000238418579, | |
| "rewards/chosen": -2.878164768218994, | |
| "rewards/margins": 1.2440316677093506, | |
| "rewards/rejected": -4.122197151184082, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 9.022702540810607e-07, | |
| "logits/chosen": -0.3597440719604492, | |
| "logits/rejected": -0.3606324791908264, | |
| "logps/chosen": -450.4046325683594, | |
| "logps/rejected": -460.8863830566406, | |
| "loss": 0.4151, | |
| "rewards/accuracies": 0.7875000238418579, | |
| "rewards/chosen": -2.835644483566284, | |
| "rewards/margins": 1.3725159168243408, | |
| "rewards/rejected": -4.208160400390625, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 8.815342880454312e-07, | |
| "logits/chosen": -0.3541966378688812, | |
| "logits/rejected": -0.35494524240493774, | |
| "logps/chosen": -455.19146728515625, | |
| "logps/rejected": -470.131103515625, | |
| "loss": 0.4702, | |
| "rewards/accuracies": 0.7562500238418579, | |
| "rewards/chosen": -3.059727430343628, | |
| "rewards/margins": 1.261040449142456, | |
| "rewards/rejected": -4.320767879486084, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "eval_logits/chosen": -0.4240322411060333, | |
| "eval_logits/rejected": -0.4246600270271301, | |
| "eval_logps/chosen": -441.68072509765625, | |
| "eval_logps/rejected": -452.262451171875, | |
| "eval_loss": 0.4791676104068756, | |
| "eval_rewards/accuracies": 0.7599999904632568, | |
| "eval_rewards/chosen": -2.878952980041504, | |
| "eval_rewards/margins": 1.2493829727172852, | |
| "eval_rewards/rejected": -4.128335475921631, | |
| "eval_runtime": 351.6609, | |
| "eval_samples_per_second": 1.422, | |
| "eval_steps_per_second": 1.422, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 8.609882561370101e-07, | |
| "logits/chosen": -0.3556322455406189, | |
| "logits/rejected": -0.35619792342185974, | |
| "logps/chosen": -446.03204345703125, | |
| "logps/rejected": -455.453125, | |
| "loss": 0.4476, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -2.709972381591797, | |
| "rewards/margins": 1.3839571475982666, | |
| "rewards/rejected": -4.093929290771484, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 8.406345695237394e-07, | |
| "logits/chosen": -0.3541732430458069, | |
| "logits/rejected": -0.35552269220352173, | |
| "logps/chosen": -444.166015625, | |
| "logps/rejected": -460.39617919921875, | |
| "loss": 0.3845, | |
| "rewards/accuracies": 0.8374999761581421, | |
| "rewards/chosen": -2.691920757293701, | |
| "rewards/margins": 1.684851884841919, | |
| "rewards/rejected": -4.376772880554199, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 8.20475616800985e-07, | |
| "logits/chosen": -0.35582807660102844, | |
| "logits/rejected": -0.35650044679641724, | |
| "logps/chosen": -449.7290954589844, | |
| "logps/rejected": -458.966064453125, | |
| "loss": 0.498, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -3.0267415046691895, | |
| "rewards/margins": 1.181206464767456, | |
| "rewards/rejected": -4.207947254180908, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 8.005137637112303e-07, | |
| "logits/chosen": -0.35746604204177856, | |
| "logits/rejected": -0.35817286372184753, | |
| "logps/chosen": -450.47308349609375, | |
| "logps/rejected": -463.4517517089844, | |
| "loss": 0.4951, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -2.940412759780884, | |
| "rewards/margins": 1.3010506629943848, | |
| "rewards/rejected": -4.2414631843566895, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 7.807513528664415e-07, | |
| "logits/chosen": -0.3562454581260681, | |
| "logits/rejected": -0.3569663166999817, | |
| "logps/chosen": -449.81768798828125, | |
| "logps/rejected": -462.03125, | |
| "loss": 0.4975, | |
| "rewards/accuracies": 0.731249988079071, | |
| "rewards/chosen": -3.0241804122924805, | |
| "rewards/margins": 1.1527836322784424, | |
| "rewards/rejected": -4.176963806152344, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 7.611907034731538e-07, | |
| "logits/chosen": -0.35374173521995544, | |
| "logits/rejected": -0.3544319272041321, | |
| "logps/chosen": -452.8089904785156, | |
| "logps/rejected": -466.8789978027344, | |
| "loss": 0.4872, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -3.0933475494384766, | |
| "rewards/margins": 1.278378963470459, | |
| "rewards/rejected": -4.371726989746094, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 7.418341110603e-07, | |
| "logits/chosen": -0.3625703454017639, | |
| "logits/rejected": -0.363391637802124, | |
| "logps/chosen": -461.72442626953125, | |
| "logps/rejected": -472.3929138183594, | |
| "loss": 0.4361, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -2.796329975128174, | |
| "rewards/margins": 1.4400124549865723, | |
| "rewards/rejected": -4.236342430114746, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 7.226838472098239e-07, | |
| "logits/chosen": -0.35118603706359863, | |
| "logits/rejected": -0.35229939222335815, | |
| "logps/chosen": -450.23895263671875, | |
| "logps/rejected": -462.094482421875, | |
| "loss": 0.4608, | |
| "rewards/accuracies": 0.768750011920929, | |
| "rewards/chosen": -2.8915536403656006, | |
| "rewards/margins": 1.3143730163574219, | |
| "rewards/rejected": -4.205926418304443, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 7.037421592900942e-07, | |
| "logits/chosen": -0.3532702326774597, | |
| "logits/rejected": -0.3544442057609558, | |
| "logps/chosen": -444.9677734375, | |
| "logps/rejected": -458.2537536621094, | |
| "loss": 0.4259, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -2.8905608654022217, | |
| "rewards/margins": 1.462416410446167, | |
| "rewards/rejected": -4.352977275848389, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 6.850112701921735e-07, | |
| "logits/chosen": -0.35222965478897095, | |
| "logits/rejected": -0.3528757095336914, | |
| "logps/chosen": -441.19976806640625, | |
| "logps/rejected": -455.4878845214844, | |
| "loss": 0.4063, | |
| "rewards/accuracies": 0.7875000238418579, | |
| "rewards/chosen": -2.7645294666290283, | |
| "rewards/margins": 1.4231407642364502, | |
| "rewards/rejected": -4.1876702308654785, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 6.664933780689445e-07, | |
| "logits/chosen": -0.3582982122898102, | |
| "logits/rejected": -0.3593185842037201, | |
| "logps/chosen": -450.44549560546875, | |
| "logps/rejected": -464.33905029296875, | |
| "loss": 0.4102, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -2.714564800262451, | |
| "rewards/margins": 1.4921871423721313, | |
| "rewards/rejected": -4.206751823425293, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 6.481906560771525e-07, | |
| "logits/chosen": -0.357990562915802, | |
| "logits/rejected": -0.3587570786476135, | |
| "logps/chosen": -441.71746826171875, | |
| "logps/rejected": -452.60870361328125, | |
| "loss": 0.4988, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -2.9209952354431152, | |
| "rewards/margins": 1.2236100435256958, | |
| "rewards/rejected": -4.1446051597595215, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 6.301052521223736e-07, | |
| "logits/chosen": -0.3549385070800781, | |
| "logits/rejected": -0.3562348484992981, | |
| "logps/chosen": -450.3941955566406, | |
| "logps/rejected": -462.034912109375, | |
| "loss": 0.4629, | |
| "rewards/accuracies": 0.7437499761581421, | |
| "rewards/chosen": -3.1530508995056152, | |
| "rewards/margins": 1.2050797939300537, | |
| "rewards/rejected": -4.358130931854248, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 6.122392886069486e-07, | |
| "logits/chosen": -0.3575456738471985, | |
| "logits/rejected": -0.3584723174571991, | |
| "logps/chosen": -456.48797607421875, | |
| "logps/rejected": -469.8323669433594, | |
| "loss": 0.403, | |
| "rewards/accuracies": 0.7875000238418579, | |
| "rewards/chosen": -3.066274881362915, | |
| "rewards/margins": 1.3775126934051514, | |
| "rewards/rejected": -4.443788051605225, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 5.945948621809092e-07, | |
| "logits/chosen": -0.34499675035476685, | |
| "logits/rejected": -0.34601226449012756, | |
| "logps/chosen": -444.13818359375, | |
| "logps/rejected": -458.27978515625, | |
| "loss": 0.3784, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -2.885612726211548, | |
| "rewards/margins": 1.6283729076385498, | |
| "rewards/rejected": -4.513985633850098, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 5.771740434959278e-07, | |
| "logits/chosen": -0.36106568574905396, | |
| "logits/rejected": -0.3616113066673279, | |
| "logps/chosen": -451.7525329589844, | |
| "logps/rejected": -462.39361572265625, | |
| "loss": 0.4455, | |
| "rewards/accuracies": 0.768750011920929, | |
| "rewards/chosen": -3.004162549972534, | |
| "rewards/margins": 1.2321292161941528, | |
| "rewards/rejected": -4.236291408538818, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 5.599788769623174e-07, | |
| "logits/chosen": -0.3459396958351135, | |
| "logits/rejected": -0.3463771939277649, | |
| "logps/chosen": -451.74462890625, | |
| "logps/rejected": -460.52532958984375, | |
| "loss": 0.442, | |
| "rewards/accuracies": 0.7875000238418579, | |
| "rewards/chosen": -3.0312728881835938, | |
| "rewards/margins": 1.2485148906707764, | |
| "rewards/rejected": -4.279788017272949, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 5.430113805091111e-07, | |
| "logits/chosen": -0.34979885816574097, | |
| "logits/rejected": -0.3506646156311035, | |
| "logps/chosen": -452.90667724609375, | |
| "logps/rejected": -459.8733825683594, | |
| "loss": 0.4529, | |
| "rewards/accuracies": 0.7875000238418579, | |
| "rewards/chosen": -3.2027690410614014, | |
| "rewards/margins": 1.2168538570404053, | |
| "rewards/rejected": -4.419622898101807, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 5.262735453472459e-07, | |
| "logits/chosen": -0.3504520058631897, | |
| "logits/rejected": -0.3512795567512512, | |
| "logps/chosen": -448.52685546875, | |
| "logps/rejected": -459.87359619140625, | |
| "loss": 0.3957, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -2.8900694847106934, | |
| "rewards/margins": 1.5135910511016846, | |
| "rewards/rejected": -4.403660774230957, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 5.097673357358906e-07, | |
| "logits/chosen": -0.36047258973121643, | |
| "logits/rejected": -0.36156997084617615, | |
| "logps/chosen": -451.36767578125, | |
| "logps/rejected": -462.75225830078125, | |
| "loss": 0.4152, | |
| "rewards/accuracies": 0.824999988079071, | |
| "rewards/chosen": -2.962690830230713, | |
| "rewards/margins": 1.4245904684066772, | |
| "rewards/rejected": -4.3872809410095215, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "eval_logits/chosen": -0.4258207082748413, | |
| "eval_logits/rejected": -0.4264317452907562, | |
| "eval_logps/chosen": -443.40802001953125, | |
| "eval_logps/rejected": -454.0955810546875, | |
| "eval_loss": 0.4785875976085663, | |
| "eval_rewards/accuracies": 0.7599999904632568, | |
| "eval_rewards/chosen": -3.0516843795776367, | |
| "eval_rewards/margins": 1.2599674463272095, | |
| "eval_rewards/rejected": -4.311651706695557, | |
| "eval_runtime": 351.6671, | |
| "eval_samples_per_second": 1.422, | |
| "eval_steps_per_second": 1.422, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 4.934946887519279e-07, | |
| "logits/chosen": -0.36616581678390503, | |
| "logits/rejected": -0.36695989966392517, | |
| "logps/chosen": -457.62567138671875, | |
| "logps/rejected": -470.66485595703125, | |
| "loss": 0.4125, | |
| "rewards/accuracies": 0.8062499761581421, | |
| "rewards/chosen": -2.906996011734009, | |
| "rewards/margins": 1.469089150428772, | |
| "rewards/rejected": -4.37608528137207, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 4.774575140626317e-07, | |
| "logits/chosen": -0.35779887437820435, | |
| "logits/rejected": -0.3586946129798889, | |
| "logps/chosen": -451.4176330566406, | |
| "logps/rejected": -464.3094787597656, | |
| "loss": 0.4281, | |
| "rewards/accuracies": 0.768750011920929, | |
| "rewards/chosen": -2.945284605026245, | |
| "rewards/margins": 1.3972845077514648, | |
| "rewards/rejected": -4.342568874359131, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 4.6165769370155516e-07, | |
| "logits/chosen": -0.36210596561431885, | |
| "logits/rejected": -0.36279112100601196, | |
| "logps/chosen": -451.7628479003906, | |
| "logps/rejected": -464.893798828125, | |
| "loss": 0.4782, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": -3.2397968769073486, | |
| "rewards/margins": 1.2520904541015625, | |
| "rewards/rejected": -4.49188756942749, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 4.4609708184767177e-07, | |
| "logits/chosen": -0.3466174006462097, | |
| "logits/rejected": -0.3471986651420593, | |
| "logps/chosen": -448.8627014160156, | |
| "logps/rejected": -458.1527404785156, | |
| "loss": 0.4647, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -2.91853404045105, | |
| "rewards/margins": 1.3560994863510132, | |
| "rewards/rejected": -4.274633884429932, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 4.307775046077739e-07, | |
| "logits/chosen": -0.3524012863636017, | |
| "logits/rejected": -0.3537690043449402, | |
| "logps/chosen": -445.4092712402344, | |
| "logps/rejected": -460.19635009765625, | |
| "loss": 0.4618, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -3.2233848571777344, | |
| "rewards/margins": 1.2467783689498901, | |
| "rewards/rejected": -4.470162391662598, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 4.1570075980217503e-07, | |
| "logits/chosen": -0.3559108376502991, | |
| "logits/rejected": -0.35668981075286865, | |
| "logps/chosen": -449.45330810546875, | |
| "logps/rejected": -457.7796325683594, | |
| "loss": 0.4718, | |
| "rewards/accuracies": 0.7875000238418579, | |
| "rewards/chosen": -3.394498348236084, | |
| "rewards/margins": 1.175183892250061, | |
| "rewards/rejected": -4.5696821212768555, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 4.008686167537243e-07, | |
| "logits/chosen": -0.36145132780075073, | |
| "logits/rejected": -0.362403005361557, | |
| "logps/chosen": -455.7185974121094, | |
| "logps/rejected": -467.92828369140625, | |
| "loss": 0.427, | |
| "rewards/accuracies": 0.8062499761581421, | |
| "rewards/chosen": -3.0464015007019043, | |
| "rewards/margins": 1.3699265718460083, | |
| "rewards/rejected": -4.416327953338623, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 3.862828160801707e-07, | |
| "logits/chosen": -0.3624842166900635, | |
| "logits/rejected": -0.36328238248825073, | |
| "logps/chosen": -455.2030334472656, | |
| "logps/rejected": -468.92608642578125, | |
| "loss": 0.468, | |
| "rewards/accuracies": 0.8187500238418579, | |
| "rewards/chosen": -3.084407329559326, | |
| "rewards/margins": 1.2791146039962769, | |
| "rewards/rejected": -4.363522529602051, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 3.7194506948989405e-07, | |
| "logits/chosen": -0.3563145697116852, | |
| "logits/rejected": -0.3578342795372009, | |
| "logps/chosen": -448.5079650878906, | |
| "logps/rejected": -462.24542236328125, | |
| "loss": 0.3916, | |
| "rewards/accuracies": 0.8187500238418579, | |
| "rewards/chosen": -2.866283655166626, | |
| "rewards/margins": 1.522825002670288, | |
| "rewards/rejected": -4.389109134674072, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 3.578570595810274e-07, | |
| "logits/chosen": -0.35796427726745605, | |
| "logits/rejected": -0.3588128089904785, | |
| "logps/chosen": -454.2952575683594, | |
| "logps/rejected": -462.9483337402344, | |
| "loss": 0.4564, | |
| "rewards/accuracies": 0.768750011920929, | |
| "rewards/chosen": -3.1886661052703857, | |
| "rewards/margins": 1.3093502521514893, | |
| "rewards/rejected": -4.498016357421875, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 3.4402043964399527e-07, | |
| "logits/chosen": -0.35277941823005676, | |
| "logits/rejected": -0.35380321741104126, | |
| "logps/chosen": -441.46600341796875, | |
| "logps/rejected": -452.00537109375, | |
| "loss": 0.4007, | |
| "rewards/accuracies": 0.8374999761581421, | |
| "rewards/chosen": -3.036705493927002, | |
| "rewards/margins": 1.4018588066101074, | |
| "rewards/rejected": -4.438564777374268, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 3.304368334674965e-07, | |
| "logits/chosen": -0.3567604124546051, | |
| "logits/rejected": -0.35805758833885193, | |
| "logps/chosen": -449.0523376464844, | |
| "logps/rejected": -461.69842529296875, | |
| "loss": 0.4191, | |
| "rewards/accuracies": 0.8062499761581421, | |
| "rewards/chosen": -2.938197612762451, | |
| "rewards/margins": 1.5301718711853027, | |
| "rewards/rejected": -4.468369483947754, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 3.1710783514794256e-07, | |
| "logits/chosen": -0.35164931416511536, | |
| "logits/rejected": -0.3529738187789917, | |
| "logps/chosen": -449.84173583984375, | |
| "logps/rejected": -464.1533203125, | |
| "loss": 0.5458, | |
| "rewards/accuracies": 0.7437499761581421, | |
| "rewards/chosen": -3.2846596240997314, | |
| "rewards/margins": 1.069946527481079, | |
| "rewards/rejected": -4.354605674743652, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 3.040350089023844e-07, | |
| "logits/chosen": -0.3580131232738495, | |
| "logits/rejected": -0.35896363854408264, | |
| "logps/chosen": -460.78790283203125, | |
| "logps/rejected": -474.05987548828125, | |
| "loss": 0.4396, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -3.1543052196502686, | |
| "rewards/margins": 1.453919768333435, | |
| "rewards/rejected": -4.6082258224487305, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 2.9121988888494297e-07, | |
| "logits/chosen": -0.35557836294174194, | |
| "logits/rejected": -0.356197327375412, | |
| "logps/chosen": -454.5060119628906, | |
| "logps/rejected": -467.5367736816406, | |
| "loss": 0.3968, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -2.987016201019287, | |
| "rewards/margins": 1.4960193634033203, | |
| "rewards/rejected": -4.483035564422607, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 2.786639790067719e-07, | |
| "logits/chosen": -0.35686007142066956, | |
| "logits/rejected": -0.3575289249420166, | |
| "logps/chosen": -457.2361755371094, | |
| "logps/rejected": -470.49212646484375, | |
| "loss": 0.4509, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -3.249783754348755, | |
| "rewards/margins": 1.2371891736984253, | |
| "rewards/rejected": -4.486972808837891, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 2.6636875275956567e-07, | |
| "logits/chosen": -0.3554794192314148, | |
| "logits/rejected": -0.35618001222610474, | |
| "logps/chosen": -455.373291015625, | |
| "logps/rejected": -466.55389404296875, | |
| "loss": 0.5174, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -3.4816794395446777, | |
| "rewards/margins": 1.009413480758667, | |
| "rewards/rejected": -4.491092681884766, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 2.543356530426394e-07, | |
| "logits/chosen": -0.34936192631721497, | |
| "logits/rejected": -0.3497045040130615, | |
| "logps/chosen": -451.0462951660156, | |
| "logps/rejected": -464.62109375, | |
| "loss": 0.4859, | |
| "rewards/accuracies": 0.7562500238418579, | |
| "rewards/chosen": -3.2196598052978516, | |
| "rewards/margins": 1.3027180433273315, | |
| "rewards/rejected": -4.522377967834473, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 2.425660919935954e-07, | |
| "logits/chosen": -0.35678738355636597, | |
| "logits/rejected": -0.35775676369667053, | |
| "logps/chosen": -452.04925537109375, | |
| "logps/rejected": -463.97869873046875, | |
| "loss": 0.4253, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": -2.8534128665924072, | |
| "rewards/margins": 1.3880208730697632, | |
| "rewards/rejected": -4.241434097290039, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 2.3106145082260777e-07, | |
| "logits/chosen": -0.35490182042121887, | |
| "logits/rejected": -0.35594433546066284, | |
| "logps/chosen": -456.057373046875, | |
| "logps/rejected": -470.40350341796875, | |
| "loss": 0.4502, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": -3.0541415214538574, | |
| "rewards/margins": 1.4110925197601318, | |
| "rewards/rejected": -4.46523380279541, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "eval_logits/chosen": -0.4264547824859619, | |
| "eval_logits/rejected": -0.4270709156990051, | |
| "eval_logps/chosen": -443.8349914550781, | |
| "eval_logps/rejected": -454.5430603027344, | |
| "eval_loss": 0.48084381222724915, | |
| "eval_rewards/accuracies": 0.7620000243186951, | |
| "eval_rewards/chosen": -3.0943799018859863, | |
| "eval_rewards/margins": 1.2620201110839844, | |
| "eval_rewards/rejected": -4.356400489807129, | |
| "eval_runtime": 351.5894, | |
| "eval_samples_per_second": 1.422, | |
| "eval_steps_per_second": 1.422, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 2.1982307965032563e-07, | |
| "logits/chosen": -0.3585938513278961, | |
| "logits/rejected": -0.3597787618637085, | |
| "logps/chosen": -453.99884033203125, | |
| "logps/rejected": -462.98272705078125, | |
| "loss": 0.5579, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -3.515160322189331, | |
| "rewards/margins": 0.9192056655883789, | |
| "rewards/rejected": -4.434365749359131, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 2.0885229734943501e-07, | |
| "logits/chosen": -0.35792115330696106, | |
| "logits/rejected": -0.35949331521987915, | |
| "logps/chosen": -441.6431579589844, | |
| "logps/rejected": -454.21160888671875, | |
| "loss": 0.4968, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -3.2611217498779297, | |
| "rewards/margins": 1.2790337800979614, | |
| "rewards/rejected": -4.540155410766602, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 1.9815039138988135e-07, | |
| "logits/chosen": -0.3631977438926697, | |
| "logits/rejected": -0.3638666272163391, | |
| "logps/chosen": -448.5018005371094, | |
| "logps/rejected": -460.1982421875, | |
| "loss": 0.452, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -3.071455478668213, | |
| "rewards/margins": 1.4410284757614136, | |
| "rewards/rejected": -4.512484073638916, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 1.8771861768777794e-07, | |
| "logits/chosen": -0.3509594798088074, | |
| "logits/rejected": -0.35208243131637573, | |
| "logps/chosen": -450.60308837890625, | |
| "logps/rejected": -464.2266540527344, | |
| "loss": 0.4278, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -3.225553512573242, | |
| "rewards/margins": 1.3294174671173096, | |
| "rewards/rejected": -4.554970741271973, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 1.7755820045802146e-07, | |
| "logits/chosen": -0.35590630769729614, | |
| "logits/rejected": -0.35736554861068726, | |
| "logps/chosen": -455.400390625, | |
| "logps/rejected": -465.2867126464844, | |
| "loss": 0.4158, | |
| "rewards/accuracies": 0.7875000238418579, | |
| "rewards/chosen": -2.942516803741455, | |
| "rewards/margins": 1.4671036005020142, | |
| "rewards/rejected": -4.409620761871338, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 1.67670332070623e-07, | |
| "logits/chosen": -0.3521929383277893, | |
| "logits/rejected": -0.3526236116886139, | |
| "logps/chosen": -455.163330078125, | |
| "logps/rejected": -469.2591857910156, | |
| "loss": 0.4457, | |
| "rewards/accuracies": 0.7562500238418579, | |
| "rewards/chosen": -3.1400699615478516, | |
| "rewards/margins": 1.289953589439392, | |
| "rewards/rejected": -4.430023193359375, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 1.580561729107777e-07, | |
| "logits/chosen": -0.35622936487197876, | |
| "logits/rejected": -0.356993168592453, | |
| "logps/chosen": -455.1328125, | |
| "logps/rejected": -465.6949157714844, | |
| "loss": 0.4489, | |
| "rewards/accuracies": 0.793749988079071, | |
| "rewards/chosen": -3.2420265674591064, | |
| "rewards/margins": 1.3224232196807861, | |
| "rewards/rejected": -4.564449310302734, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 1.487168512426901e-07, | |
| "logits/chosen": -0.36213189363479614, | |
| "logits/rejected": -0.3628009557723999, | |
| "logps/chosen": -453.6480407714844, | |
| "logps/rejected": -465.2872619628906, | |
| "loss": 0.4185, | |
| "rewards/accuracies": 0.8187500238418579, | |
| "rewards/chosen": -3.116283416748047, | |
| "rewards/margins": 1.3114349842071533, | |
| "rewards/rejected": -4.427718162536621, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 1.3965346307716676e-07, | |
| "logits/chosen": -0.3530941605567932, | |
| "logits/rejected": -0.35421401262283325, | |
| "logps/chosen": -451.10894775390625, | |
| "logps/rejected": -465.1979064941406, | |
| "loss": 0.376, | |
| "rewards/accuracies": 0.856249988079071, | |
| "rewards/chosen": -2.9081177711486816, | |
| "rewards/margins": 1.643689751625061, | |
| "rewards/rejected": -4.551807403564453, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 1.3086707204299415e-07, | |
| "logits/chosen": -0.36071377992630005, | |
| "logits/rejected": -0.3618861138820648, | |
| "logps/chosen": -448.95355224609375, | |
| "logps/rejected": -460.8838806152344, | |
| "loss": 0.4524, | |
| "rewards/accuracies": 0.793749988079071, | |
| "rewards/chosen": -3.284519672393799, | |
| "rewards/margins": 1.33005690574646, | |
| "rewards/rejected": -4.6145758628845215, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 1.223587092621162e-07, | |
| "logits/chosen": -0.3580467998981476, | |
| "logits/rejected": -0.35923272371292114, | |
| "logps/chosen": -451.82769775390625, | |
| "logps/rejected": -463.8099670410156, | |
| "loss": 0.4238, | |
| "rewards/accuracies": 0.8187500238418579, | |
| "rewards/chosen": -3.3439979553222656, | |
| "rewards/margins": 1.2987263202667236, | |
| "rewards/rejected": -4.64272403717041, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 1.1412937322862971e-07, | |
| "logits/chosen": -0.3629991412162781, | |
| "logits/rejected": -0.3639989495277405, | |
| "logps/chosen": -448.5044860839844, | |
| "logps/rejected": -460.817138671875, | |
| "loss": 0.4102, | |
| "rewards/accuracies": 0.831250011920929, | |
| "rewards/chosen": -3.0257716178894043, | |
| "rewards/margins": 1.448880910873413, | |
| "rewards/rejected": -4.4746527671813965, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 1.0618002969160546e-07, | |
| "logits/chosen": -0.3608396053314209, | |
| "logits/rejected": -0.3618479371070862, | |
| "logps/chosen": -453.93499755859375, | |
| "logps/rejected": -466.81640625, | |
| "loss": 0.4187, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -3.1413166522979736, | |
| "rewards/margins": 1.3508026599884033, | |
| "rewards/rejected": -4.492118835449219, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 9.851161154175337e-08, | |
| "logits/chosen": -0.3562917113304138, | |
| "logits/rejected": -0.35710564255714417, | |
| "logps/chosen": -451.28076171875, | |
| "logps/rejected": -461.2808532714844, | |
| "loss": 0.5024, | |
| "rewards/accuracies": 0.731249988079071, | |
| "rewards/chosen": -3.143902540206909, | |
| "rewards/margins": 1.1941773891448975, | |
| "rewards/rejected": -4.338079929351807, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 9.112501870194273e-08, | |
| "logits/chosen": -0.3589875102043152, | |
| "logits/rejected": -0.35990768671035767, | |
| "logps/chosen": -452.32000732421875, | |
| "logps/rejected": -461.66033935546875, | |
| "loss": 0.5337, | |
| "rewards/accuracies": 0.731249988079071, | |
| "rewards/chosen": -3.512810468673706, | |
| "rewards/margins": 0.9673913717269897, | |
| "rewards/rejected": -4.4802021980285645, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 8.402111802159413e-08, | |
| "logits/chosen": -0.3585359454154968, | |
| "logits/rejected": -0.35975727438926697, | |
| "logps/chosen": -454.8050842285156, | |
| "logps/rejected": -465.12615966796875, | |
| "loss": 0.4486, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -3.2963638305664062, | |
| "rewards/margins": 1.2402369976043701, | |
| "rewards/rejected": -4.5366010665893555, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 7.720074317494913e-08, | |
| "logits/chosen": -0.36562293767929077, | |
| "logits/rejected": -0.3664829134941101, | |
| "logps/chosen": -457.26068115234375, | |
| "logps/rejected": -470.1167907714844, | |
| "loss": 0.4503, | |
| "rewards/accuracies": 0.768750011920929, | |
| "rewards/chosen": -3.0565972328186035, | |
| "rewards/margins": 1.4593775272369385, | |
| "rewards/rejected": -4.515974521636963, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 7.06646945632361e-08, | |
| "logits/chosen": -0.3597029447555542, | |
| "logits/rejected": -0.3601227402687073, | |
| "logps/chosen": -461.0421447753906, | |
| "logps/rejected": -469.86175537109375, | |
| "loss": 0.512, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -3.246166944503784, | |
| "rewards/margins": 1.1497961282730103, | |
| "rewards/rejected": -4.395963191986084, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 6.441373922073946e-08, | |
| "logits/chosen": -0.359005331993103, | |
| "logits/rejected": -0.35974326729774475, | |
| "logps/chosen": -455.99908447265625, | |
| "logps/rejected": -466.95440673828125, | |
| "loss": 0.4367, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -3.224595546722412, | |
| "rewards/margins": 1.361697793006897, | |
| "rewards/rejected": -4.5862932205200195, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 5.844861072478336e-08, | |
| "logits/chosen": -0.3530232608318329, | |
| "logits/rejected": -0.3545222580432892, | |
| "logps/chosen": -443.6240234375, | |
| "logps/rejected": -458.322998046875, | |
| "loss": 0.4834, | |
| "rewards/accuracies": 0.768750011920929, | |
| "rewards/chosen": -3.3614017963409424, | |
| "rewards/margins": 1.2071417570114136, | |
| "rewards/rejected": -4.568543434143066, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "eval_logits/chosen": -0.4272295832633972, | |
| "eval_logits/rejected": -0.42783358693122864, | |
| "eval_logps/chosen": -444.2228088378906, | |
| "eval_logps/rejected": -454.95098876953125, | |
| "eval_loss": 0.48089736700057983, | |
| "eval_rewards/accuracies": 0.7599999904632568, | |
| "eval_rewards/chosen": -3.1331627368927, | |
| "eval_rewards/margins": 1.2640310525894165, | |
| "eval_rewards/rejected": -4.3971943855285645, | |
| "eval_runtime": 351.6656, | |
| "eval_samples_per_second": 1.422, | |
| "eval_steps_per_second": 1.422, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 5.2770009109645306e-08, | |
| "logits/chosen": -0.36214134097099304, | |
| "logits/rejected": -0.36288636922836304, | |
| "logps/chosen": -454.91839599609375, | |
| "logps/rejected": -466.05224609375, | |
| "loss": 0.4296, | |
| "rewards/accuracies": 0.793749988079071, | |
| "rewards/chosen": -3.1463279724121094, | |
| "rewards/margins": 1.363966703414917, | |
| "rewards/rejected": -4.5102949142456055, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 4.7378600784402095e-08, | |
| "logits/chosen": -0.3552590310573578, | |
| "logits/rejected": -0.35652121901512146, | |
| "logps/chosen": -455.6435546875, | |
| "logps/rejected": -465.54693603515625, | |
| "loss": 0.4669, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": -3.2327640056610107, | |
| "rewards/margins": 1.2310270071029663, | |
| "rewards/rejected": -4.463791370391846, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 4.22750184547252e-08, | |
| "logits/chosen": -0.3599388301372528, | |
| "logits/rejected": -0.3607821762561798, | |
| "logps/chosen": -456.6576232910156, | |
| "logps/rejected": -469.9142150878906, | |
| "loss": 0.4199, | |
| "rewards/accuracies": 0.793749988079071, | |
| "rewards/chosen": -3.1602883338928223, | |
| "rewards/margins": 1.4565389156341553, | |
| "rewards/rejected": -4.616827487945557, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 3.745986104862903e-08, | |
| "logits/chosen": -0.35964518785476685, | |
| "logits/rejected": -0.360365092754364, | |
| "logps/chosen": -455.8336486816406, | |
| "logps/rejected": -467.90948486328125, | |
| "loss": 0.4152, | |
| "rewards/accuracies": 0.793749988079071, | |
| "rewards/chosen": -2.874156951904297, | |
| "rewards/margins": 1.5480505228042603, | |
| "rewards/rejected": -4.422207832336426, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 3.293369364618465e-08, | |
| "logits/chosen": -0.3647812604904175, | |
| "logits/rejected": -0.3658196032047272, | |
| "logps/chosen": -449.73138427734375, | |
| "logps/rejected": -462.46942138671875, | |
| "loss": 0.4729, | |
| "rewards/accuracies": 0.7437499761581421, | |
| "rewards/chosen": -3.295436382293701, | |
| "rewards/margins": 1.252638816833496, | |
| "rewards/rejected": -4.5480756759643555, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 2.869704741320478e-08, | |
| "logits/chosen": -0.35672903060913086, | |
| "logits/rejected": -0.3576185703277588, | |
| "logps/chosen": -449.70294189453125, | |
| "logps/rejected": -459.93096923828125, | |
| "loss": 0.4951, | |
| "rewards/accuracies": 0.7437499761581421, | |
| "rewards/chosen": -3.523907423019409, | |
| "rewards/margins": 1.1444907188415527, | |
| "rewards/rejected": -4.668398380279541, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 2.4750419538908667e-08, | |
| "logits/chosen": -0.3534146547317505, | |
| "logits/rejected": -0.35466113686561584, | |
| "logps/chosen": -452.890869140625, | |
| "logps/rejected": -464.16510009765625, | |
| "loss": 0.4477, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -3.2099146842956543, | |
| "rewards/margins": 1.3813788890838623, | |
| "rewards/rejected": -4.591293811798096, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 2.1094273177576508e-08, | |
| "logits/chosen": -0.36183369159698486, | |
| "logits/rejected": -0.36180374026298523, | |
| "logps/chosen": -455.7822265625, | |
| "logps/rejected": -465.14801025390625, | |
| "loss": 0.4747, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -3.090353488922119, | |
| "rewards/margins": 1.3213683366775513, | |
| "rewards/rejected": -4.411721706390381, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 1.7729037394193792e-08, | |
| "logits/chosen": -0.3579171299934387, | |
| "logits/rejected": -0.35931870341300964, | |
| "logps/chosen": -450.9007263183594, | |
| "logps/rejected": -464.6581115722656, | |
| "loss": 0.4626, | |
| "rewards/accuracies": 0.731249988079071, | |
| "rewards/chosen": -3.0921690464019775, | |
| "rewards/margins": 1.4891610145568848, | |
| "rewards/rejected": -4.581330299377441, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 1.4655107114101008e-08, | |
| "logits/chosen": -0.36245545744895935, | |
| "logits/rejected": -0.36358946561813354, | |
| "logps/chosen": -452.5899353027344, | |
| "logps/rejected": -467.2527770996094, | |
| "loss": 0.464, | |
| "rewards/accuracies": 0.768750011920929, | |
| "rewards/chosen": -2.9442224502563477, | |
| "rewards/margins": 1.4334999322891235, | |
| "rewards/rejected": -4.377722263336182, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 1.1872843076645157e-08, | |
| "logits/chosen": -0.35802754759788513, | |
| "logits/rejected": -0.35865747928619385, | |
| "logps/chosen": -454.47662353515625, | |
| "logps/rejected": -465.0662536621094, | |
| "loss": 0.3877, | |
| "rewards/accuracies": 0.8062499761581421, | |
| "rewards/chosen": -2.96606707572937, | |
| "rewards/margins": 1.5276943445205688, | |
| "rewards/rejected": -4.4937615394592285, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 9.382571792846962e-09, | |
| "logits/chosen": -0.3509235084056854, | |
| "logits/rejected": -0.3516360819339752, | |
| "logps/chosen": -443.75469970703125, | |
| "logps/rejected": -453.1048278808594, | |
| "loss": 0.4472, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -3.1273512840270996, | |
| "rewards/margins": 1.3490197658538818, | |
| "rewards/rejected": -4.476370811462402, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 7.1845855070828975e-09, | |
| "logits/chosen": -0.3624979555606842, | |
| "logits/rejected": -0.36296314001083374, | |
| "logps/chosen": -450.0325622558594, | |
| "logps/rejected": -459.88916015625, | |
| "loss": 0.4578, | |
| "rewards/accuracies": 0.793749988079071, | |
| "rewards/chosen": -3.340954303741455, | |
| "rewards/margins": 1.343732237815857, | |
| "rewards/rejected": -4.684686660766602, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 5.279142162789019e-09, | |
| "logits/chosen": -0.35505902767181396, | |
| "logits/rejected": -0.35619235038757324, | |
| "logps/chosen": -451.9505920410156, | |
| "logps/rejected": -465.6192321777344, | |
| "loss": 0.4539, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -3.4748854637145996, | |
| "rewards/margins": 1.2840955257415771, | |
| "rewards/rejected": -4.758981227874756, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 3.666465372190453e-09, | |
| "logits/chosen": -0.356467604637146, | |
| "logits/rejected": -0.357626736164093, | |
| "logps/chosen": -452.7481384277344, | |
| "logps/rejected": -465.7762145996094, | |
| "loss": 0.472, | |
| "rewards/accuracies": 0.731249988079071, | |
| "rewards/chosen": -3.293835401535034, | |
| "rewards/margins": 1.261348009109497, | |
| "rewards/rejected": -4.555183410644531, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 2.34674439005822e-09, | |
| "logits/chosen": -0.3525004982948303, | |
| "logits/rejected": -0.35348066687583923, | |
| "logps/chosen": -450.6170959472656, | |
| "logps/rejected": -462.77069091796875, | |
| "loss": 0.3976, | |
| "rewards/accuracies": 0.8374999761581421, | |
| "rewards/chosen": -3.295048952102661, | |
| "rewards/margins": 1.4584187269210815, | |
| "rewards/rejected": -4.753467559814453, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 1.3201340915011685e-09, | |
| "logits/chosen": -0.35318654775619507, | |
| "logits/rejected": -0.35393238067626953, | |
| "logps/chosen": -453.31707763671875, | |
| "logps/rejected": -463.7138671875, | |
| "loss": 0.4291, | |
| "rewards/accuracies": 0.768750011920929, | |
| "rewards/chosen": -3.284226655960083, | |
| "rewards/margins": 1.3613402843475342, | |
| "rewards/rejected": -4.645566463470459, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 5.86754953789681e-10, | |
| "logits/chosen": -0.35480597615242004, | |
| "logits/rejected": -0.35523343086242676, | |
| "logps/chosen": -449.10235595703125, | |
| "logps/rejected": -461.8785095214844, | |
| "loss": 0.4941, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -3.270163059234619, | |
| "rewards/margins": 1.221695065498352, | |
| "rewards/rejected": -4.491857528686523, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 1.4669304221726077e-10, | |
| "logits/chosen": -0.3551548421382904, | |
| "logits/rejected": -0.3556649386882782, | |
| "logps/chosen": -456.6444396972656, | |
| "logps/rejected": -467.2582092285156, | |
| "loss": 0.4289, | |
| "rewards/accuracies": 0.793749988079071, | |
| "rewards/chosen": -3.225339412689209, | |
| "rewards/margins": 1.3719263076782227, | |
| "rewards/rejected": -4.597265243530273, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 0.0, | |
| "logits/chosen": -0.35613125562667847, | |
| "logits/rejected": -0.3575323522090912, | |
| "logps/chosen": -453.228759765625, | |
| "logps/rejected": -467.6603088378906, | |
| "loss": 0.416, | |
| "rewards/accuracies": 0.8062499761581421, | |
| "rewards/chosen": -3.2920143604278564, | |
| "rewards/margins": 1.416282296180725, | |
| "rewards/rejected": -4.708296775817871, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "eval_logits/chosen": -0.42728757858276367, | |
| "eval_logits/rejected": -0.42789557576179504, | |
| "eval_logps/chosen": -444.3138427734375, | |
| "eval_logps/rejected": -455.0481262207031, | |
| "eval_loss": 0.47960197925567627, | |
| "eval_rewards/accuracies": 0.7599999904632568, | |
| "eval_rewards/chosen": -3.1422641277313232, | |
| "eval_rewards/margins": 1.264641523361206, | |
| "eval_rewards/rejected": -4.406905174255371, | |
| "eval_runtime": 351.5662, | |
| "eval_samples_per_second": 1.422, | |
| "eval_steps_per_second": 1.422, | |
| "step": 3000 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 3000, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "total_flos": 0.0, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |