Instructions to use bboeun/dpo2-Delayed2-ref-mistral-fix with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- PEFT
How to use bboeun/dpo2-Delayed2-ref-mistral-fix with PEFT:
from peft import PeftModel from transformers import AutoModelForCausalLM base_model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2") model = PeftModel.from_pretrained(base_model, "bboeun/dpo2-Delayed2-ref-mistral-fix") - Notebooks
- Google Colab
- Kaggle
| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.8888888888888888, | |
| "eval_steps": 500, | |
| "global_step": 2000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 5.309734513274336e-07, | |
| "logits/chosen": -2.1858699321746826, | |
| "logits/rejected": -2.25400972366333, | |
| "logps/chosen": -292.4839172363281, | |
| "logps/rejected": -334.2861633300781, | |
| "loss": 0.6928, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.0007014082511886954, | |
| "rewards/margins": 0.0008057593367993832, | |
| "rewards/rejected": -0.001507167937234044, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 1.415929203539823e-06, | |
| "logits/chosen": -2.2499358654022217, | |
| "logits/rejected": -2.2245681285858154, | |
| "logps/chosen": -323.0448303222656, | |
| "logps/rejected": -341.9175109863281, | |
| "loss": 0.6941, | |
| "rewards/accuracies": 0.44999998807907104, | |
| "rewards/chosen": -0.0023523904383182526, | |
| "rewards/margins": -0.0019129945430904627, | |
| "rewards/rejected": -0.00043939598253928125, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 2.3008849557522127e-06, | |
| "logits/chosen": -2.2502377033233643, | |
| "logits/rejected": -2.235426425933838, | |
| "logps/chosen": -309.60076904296875, | |
| "logps/rejected": -354.3961181640625, | |
| "loss": 0.6933, | |
| "rewards/accuracies": 0.44999998807907104, | |
| "rewards/chosen": -0.005819912068545818, | |
| "rewards/margins": -0.00021856316016055644, | |
| "rewards/rejected": -0.0056013488210737705, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 3.185840707964602e-06, | |
| "logits/chosen": -2.2594857215881348, | |
| "logits/rejected": -2.231959819793701, | |
| "logps/chosen": -342.497802734375, | |
| "logps/rejected": -361.927734375, | |
| "loss": 0.6939, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.017969723790884018, | |
| "rewards/margins": -0.001382522750645876, | |
| "rewards/rejected": -0.01658720150589943, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.070796460176992e-06, | |
| "logits/chosen": -2.278099775314331, | |
| "logits/rejected": -2.2154829502105713, | |
| "logps/chosen": -334.44879150390625, | |
| "logps/rejected": -324.4710998535156, | |
| "loss": 0.6928, | |
| "rewards/accuracies": 0.4749999940395355, | |
| "rewards/chosen": -0.03501028195023537, | |
| "rewards/margins": 0.000740527524612844, | |
| "rewards/rejected": -0.03575081005692482, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.955752212389381e-06, | |
| "logits/chosen": -2.28778338432312, | |
| "logits/rejected": -2.1848011016845703, | |
| "logps/chosen": -329.4461669921875, | |
| "logps/rejected": -304.49163818359375, | |
| "loss": 0.693, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -0.0479893684387207, | |
| "rewards/margins": 0.0005268483073450625, | |
| "rewards/rejected": -0.04851621761918068, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 5.840707964601771e-06, | |
| "logits/chosen": -2.118542194366455, | |
| "logits/rejected": -2.1866343021392822, | |
| "logps/chosen": -299.27447509765625, | |
| "logps/rejected": -326.0574645996094, | |
| "loss": 0.6925, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.049522001296281815, | |
| "rewards/margins": 0.001656264765188098, | |
| "rewards/rejected": -0.05117826536297798, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 6.72566371681416e-06, | |
| "logits/chosen": -2.17606782913208, | |
| "logits/rejected": -2.1878247261047363, | |
| "logps/chosen": -327.1267395019531, | |
| "logps/rejected": -323.746337890625, | |
| "loss": 0.6897, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.050967562943696976, | |
| "rewards/margins": 0.0074631692841649055, | |
| "rewards/rejected": -0.05843073129653931, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 7.610619469026549e-06, | |
| "logits/chosen": -2.3069217205047607, | |
| "logits/rejected": -2.1109657287597656, | |
| "logps/chosen": -359.98150634765625, | |
| "logps/rejected": -322.8603820800781, | |
| "loss": 0.7002, | |
| "rewards/accuracies": 0.4749999940395355, | |
| "rewards/chosen": -0.05969462916254997, | |
| "rewards/margins": -0.013257542625069618, | |
| "rewards/rejected": -0.0464370921254158, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 8.495575221238938e-06, | |
| "logits/chosen": -2.2969472408294678, | |
| "logits/rejected": -2.2404885292053223, | |
| "logps/chosen": -328.78704833984375, | |
| "logps/rejected": -334.45281982421875, | |
| "loss": 0.6917, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": 0.005047931335866451, | |
| "rewards/margins": 0.0031173895113170147, | |
| "rewards/rejected": 0.0019305419409647584, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 9.380530973451329e-06, | |
| "logits/chosen": -2.2497150897979736, | |
| "logits/rejected": -2.220237970352173, | |
| "logps/chosen": -333.1561584472656, | |
| "logps/rejected": -314.7790832519531, | |
| "loss": 0.6961, | |
| "rewards/accuracies": 0.42500001192092896, | |
| "rewards/chosen": 0.015577316284179688, | |
| "rewards/margins": -0.005401826463639736, | |
| "rewards/rejected": 0.02097914181649685, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 9.999951373555555e-06, | |
| "logits/chosen": -2.301379442214966, | |
| "logits/rejected": -2.223681926727295, | |
| "logps/chosen": -351.5559387207031, | |
| "logps/rejected": -326.63287353515625, | |
| "loss": 0.6889, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": 0.012599905952811241, | |
| "rewards/margins": 0.009558334946632385, | |
| "rewards/rejected": 0.003041572170332074, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 9.999086929743288e-06, | |
| "logits/chosen": -2.2710835933685303, | |
| "logits/rejected": -2.227280855178833, | |
| "logps/chosen": -321.6353454589844, | |
| "logps/rejected": -332.7576599121094, | |
| "loss": 0.692, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.040375690907239914, | |
| "rewards/margins": 0.004883688408881426, | |
| "rewards/rejected": 0.03549199923872948, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 9.997142113313472e-06, | |
| "logits/chosen": -2.212054491043091, | |
| "logits/rejected": -2.202702283859253, | |
| "logps/chosen": -322.11651611328125, | |
| "logps/rejected": -309.7989501953125, | |
| "loss": 0.6871, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": 0.0492350198328495, | |
| "rewards/margins": 0.013679656200110912, | |
| "rewards/rejected": 0.035555362701416016, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 9.994117344568142e-06, | |
| "logits/chosen": -2.2250020503997803, | |
| "logits/rejected": -2.2318902015686035, | |
| "logps/chosen": -317.3855895996094, | |
| "logps/rejected": -337.94805908203125, | |
| "loss": 0.6924, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": 0.03159898519515991, | |
| "rewards/margins": 0.0040366738103330135, | |
| "rewards/rejected": 0.02756231650710106, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 9.990013277202137e-06, | |
| "logits/chosen": -2.2112176418304443, | |
| "logits/rejected": -2.3512566089630127, | |
| "logps/chosen": -337.7769775390625, | |
| "logps/rejected": -419.68450927734375, | |
| "loss": 0.7042, | |
| "rewards/accuracies": 0.44999998807907104, | |
| "rewards/chosen": -0.02137443795800209, | |
| "rewards/margins": -0.019264575093984604, | |
| "rewards/rejected": -0.00210986053571105, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 9.984830798161828e-06, | |
| "logits/chosen": -2.2544150352478027, | |
| "logits/rejected": -2.1911208629608154, | |
| "logps/chosen": -384.29718017578125, | |
| "logps/rejected": -362.35308837890625, | |
| "loss": 0.6841, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -0.11389386653900146, | |
| "rewards/margins": 0.022078361362218857, | |
| "rewards/rejected": -0.13597223162651062, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 9.978571027453433e-06, | |
| "logits/chosen": -2.3017163276672363, | |
| "logits/rejected": -2.12226939201355, | |
| "logps/chosen": -364.39837646484375, | |
| "logps/rejected": -286.0245361328125, | |
| "loss": 0.6959, | |
| "rewards/accuracies": 0.42500001192092896, | |
| "rewards/chosen": -0.12149347364902496, | |
| "rewards/margins": 0.000388662883779034, | |
| "rewards/rejected": -0.12188214063644409, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 9.971235317900968e-06, | |
| "logits/chosen": -2.1424872875213623, | |
| "logits/rejected": -2.239366054534912, | |
| "logps/chosen": -283.5924377441406, | |
| "logps/rejected": -315.69586181640625, | |
| "loss": 0.6965, | |
| "rewards/accuracies": 0.42500001192092896, | |
| "rewards/chosen": -0.04944513365626335, | |
| "rewards/margins": -0.0020489301532506943, | |
| "rewards/rejected": -0.04739619791507721, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 9.962825254853888e-06, | |
| "logits/chosen": -2.3371052742004395, | |
| "logits/rejected": -2.248575210571289, | |
| "logps/chosen": -406.6221923828125, | |
| "logps/rejected": -363.2230529785156, | |
| "loss": 0.6814, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": 0.03307682275772095, | |
| "rewards/margins": 0.028196487575769424, | |
| "rewards/rejected": 0.004880332853645086, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 9.953342655844465e-06, | |
| "logits/chosen": -2.281076669692993, | |
| "logits/rejected": -2.124605655670166, | |
| "logps/chosen": -329.9849548339844, | |
| "logps/rejected": -306.5705261230469, | |
| "loss": 0.6745, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": 0.04553469270467758, | |
| "rewards/margins": 0.04089093953371048, | |
| "rewards/rejected": 0.004643745254725218, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 9.942789570194995e-06, | |
| "logits/chosen": -2.2274227142333984, | |
| "logits/rejected": -2.195772409439087, | |
| "logps/chosen": -353.90277099609375, | |
| "logps/rejected": -329.13055419921875, | |
| "loss": 0.6709, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": 0.03678930178284645, | |
| "rewards/margins": 0.05163549259305, | |
| "rewards/rejected": -0.014846190810203552, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 9.931168278574916e-06, | |
| "logits/chosen": -2.290721893310547, | |
| "logits/rejected": -2.233349561691284, | |
| "logps/chosen": -384.83087158203125, | |
| "logps/rejected": -375.786376953125, | |
| "loss": 0.6738, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": 0.10011746734380722, | |
| "rewards/margins": 0.0485808365046978, | |
| "rewards/rejected": 0.05153663828969002, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 9.919797871024877e-06, | |
| "logits/chosen": -2.3008508682250977, | |
| "logits/rejected": -2.2407467365264893, | |
| "logps/chosen": -341.2652587890625, | |
| "logps/rejected": -304.7622985839844, | |
| "loss": 0.6569, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": 0.061227262020111084, | |
| "rewards/margins": 0.07875394821166992, | |
| "rewards/rejected": -0.01752668246626854, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 9.906154097672858e-06, | |
| "logits/chosen": -2.3239777088165283, | |
| "logits/rejected": -2.2359421253204346, | |
| "logps/chosen": -357.5738220214844, | |
| "logps/rejected": -333.55389404296875, | |
| "loss": 0.6474, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.16475871205329895, | |
| "rewards/margins": 0.10896603018045425, | |
| "rewards/rejected": 0.05579269677400589, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 9.89145003578833e-06, | |
| "logits/chosen": -2.3069913387298584, | |
| "logits/rejected": -2.225893497467041, | |
| "logps/chosen": -337.88299560546875, | |
| "logps/rejected": -319.11016845703125, | |
| "loss": 0.6595, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": 0.17844273149967194, | |
| "rewards/margins": 0.08880583941936493, | |
| "rewards/rejected": 0.08963687717914581, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 9.875688863124766e-06, | |
| "logits/chosen": -2.22190260887146, | |
| "logits/rejected": -2.2968430519104004, | |
| "logps/chosen": -391.5494384765625, | |
| "logps/rejected": -404.64178466796875, | |
| "loss": 0.6949, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": 0.26371732354164124, | |
| "rewards/margins": 0.03252069279551506, | |
| "rewards/rejected": 0.23119667172431946, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 9.858873985892058e-06, | |
| "logits/chosen": -2.2825188636779785, | |
| "logits/rejected": -2.19154691696167, | |
| "logps/chosen": -354.6551818847656, | |
| "logps/rejected": -353.5287170410156, | |
| "loss": 0.6837, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.13825781643390656, | |
| "rewards/margins": 0.03303035721182823, | |
| "rewards/rejected": 0.10522744804620743, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 9.841009038020401e-06, | |
| "logits/chosen": -2.2181854248046875, | |
| "logits/rejected": -2.2422547340393066, | |
| "logps/chosen": -332.62322998046875, | |
| "logps/rejected": -351.6214599609375, | |
| "loss": 0.6746, | |
| "rewards/accuracies": 0.4749999940395355, | |
| "rewards/chosen": 0.17069143056869507, | |
| "rewards/margins": 0.07059639692306519, | |
| "rewards/rejected": 0.10009505599737167, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 9.82209788037494e-06, | |
| "logits/chosen": -2.283082962036133, | |
| "logits/rejected": -2.3039584159851074, | |
| "logps/chosen": -367.76708984375, | |
| "logps/rejected": -379.209228515625, | |
| "loss": 0.7081, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": 0.12761621177196503, | |
| "rewards/margins": 0.001193371368572116, | |
| "rewards/rejected": 0.12642285227775574, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 9.80214459992139e-06, | |
| "logits/chosen": -2.297591209411621, | |
| "logits/rejected": -2.2650771141052246, | |
| "logps/chosen": -359.8524169921875, | |
| "logps/rejected": -390.44195556640625, | |
| "loss": 0.6335, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": 0.07557342946529388, | |
| "rewards/margins": 0.15836670994758606, | |
| "rewards/rejected": -0.08279327303171158, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 9.781153508842785e-06, | |
| "logits/chosen": -2.2711267471313477, | |
| "logits/rejected": -2.2797353267669678, | |
| "logps/chosen": -329.4121398925781, | |
| "logps/rejected": -345.84393310546875, | |
| "loss": 0.675, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.09101514518260956, | |
| "rewards/margins": 0.05387691408395767, | |
| "rewards/rejected": -0.14489206671714783, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 9.759129143607547e-06, | |
| "logits/chosen": -2.2991256713867188, | |
| "logits/rejected": -2.1713948249816895, | |
| "logps/chosen": -373.73992919921875, | |
| "logps/rejected": -298.8330993652344, | |
| "loss": 0.6611, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.07734711468219757, | |
| "rewards/margins": 0.09413515031337738, | |
| "rewards/rejected": -0.17148226499557495, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 9.736076263989103e-06, | |
| "logits/chosen": -2.333172559738159, | |
| "logits/rejected": -2.2931008338928223, | |
| "logps/chosen": -384.9156188964844, | |
| "logps/rejected": -363.5679016113281, | |
| "loss": 0.6449, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": 0.04307403415441513, | |
| "rewards/margins": 0.1350451558828354, | |
| "rewards/rejected": -0.09197112172842026, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 9.711999852037226e-06, | |
| "logits/chosen": -2.3204524517059326, | |
| "logits/rejected": -2.256392240524292, | |
| "logps/chosen": -376.4149169921875, | |
| "logps/rejected": -337.93402099609375, | |
| "loss": 0.681, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": 0.2001509964466095, | |
| "rewards/margins": 0.0656595304608345, | |
| "rewards/rejected": 0.1344914734363556, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 9.68690511100134e-06, | |
| "logits/chosen": -2.222977638244629, | |
| "logits/rejected": -2.2059781551361084, | |
| "logps/chosen": -326.2198181152344, | |
| "logps/rejected": -322.86907958984375, | |
| "loss": 0.7169, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": 0.12599822878837585, | |
| "rewards/margins": -0.01360931433737278, | |
| "rewards/rejected": 0.13960754871368408, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 9.660797464206035e-06, | |
| "logits/chosen": -2.2420578002929688, | |
| "logits/rejected": -2.23136568069458, | |
| "logps/chosen": -338.4748840332031, | |
| "logps/rejected": -357.66705322265625, | |
| "loss": 0.6701, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.26268088817596436, | |
| "rewards/margins": 0.08094726502895355, | |
| "rewards/rejected": 0.181733638048172, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 9.633682553879e-06, | |
| "logits/chosen": -2.276688814163208, | |
| "logits/rejected": -2.234923839569092, | |
| "logps/chosen": -317.33599853515625, | |
| "logps/rejected": -312.63897705078125, | |
| "loss": 0.6804, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": 0.23342204093933105, | |
| "rewards/margins": 0.0563444122672081, | |
| "rewards/rejected": 0.17707762122154236, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 9.605566239931666e-06, | |
| "logits/chosen": -2.3001625537872314, | |
| "logits/rejected": -2.2134346961975098, | |
| "logps/chosen": -357.8388977050781, | |
| "logps/rejected": -349.38995361328125, | |
| "loss": 0.6357, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": 0.3796467185020447, | |
| "rewards/margins": 0.167959064245224, | |
| "rewards/rejected": 0.21168763935565948, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 9.576454598692797e-06, | |
| "logits/chosen": -2.296462297439575, | |
| "logits/rejected": -2.226733922958374, | |
| "logps/chosen": -358.35015869140625, | |
| "logps/rejected": -326.0476989746094, | |
| "loss": 0.6382, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": 0.36672210693359375, | |
| "rewards/margins": 0.16903677582740784, | |
| "rewards/rejected": 0.19768527150154114, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 9.546353921595306e-06, | |
| "logits/chosen": -2.289577007293701, | |
| "logits/rejected": -2.279940128326416, | |
| "logps/chosen": -337.85699462890625, | |
| "logps/rejected": -340.87261962890625, | |
| "loss": 0.7079, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.3491610884666443, | |
| "rewards/margins": 0.029783133417367935, | |
| "rewards/rejected": 0.31937795877456665, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 9.515270713816589e-06, | |
| "logits/chosen": -2.314282178878784, | |
| "logits/rejected": -2.1990668773651123, | |
| "logps/chosen": -380.24554443359375, | |
| "logps/rejected": -341.0552062988281, | |
| "loss": 0.6806, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.3242203891277313, | |
| "rewards/margins": 0.10805626213550568, | |
| "rewards/rejected": 0.21616414189338684, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 9.483211692872669e-06, | |
| "logits/chosen": -2.2007763385772705, | |
| "logits/rejected": -2.1976895332336426, | |
| "logps/chosen": -307.19464111328125, | |
| "logps/rejected": -318.5234069824219, | |
| "loss": 0.7432, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": 0.22343340516090393, | |
| "rewards/margins": -0.031447634100914, | |
| "rewards/rejected": 0.25488102436065674, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 9.450183787166447e-06, | |
| "logits/chosen": -2.1776084899902344, | |
| "logits/rejected": -2.263404369354248, | |
| "logps/chosen": -275.30230712890625, | |
| "logps/rejected": -335.97637939453125, | |
| "loss": 0.6777, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.10852464288473129, | |
| "rewards/margins": 0.05528046935796738, | |
| "rewards/rejected": 0.053244173526763916, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 9.41619413449037e-06, | |
| "logits/chosen": -2.341031551361084, | |
| "logits/rejected": -2.2925498485565186, | |
| "logps/chosen": -387.17315673828125, | |
| "logps/rejected": -408.65350341796875, | |
| "loss": 0.6467, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": 0.17925263941287994, | |
| "rewards/margins": 0.12131942808628082, | |
| "rewards/rejected": 0.057933200150728226, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 9.381250080483864e-06, | |
| "logits/chosen": -2.305234432220459, | |
| "logits/rejected": -2.29388689994812, | |
| "logps/chosen": -353.67547607421875, | |
| "logps/rejected": -365.3600769042969, | |
| "loss": 0.6973, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": 0.020944729447364807, | |
| "rewards/margins": 0.018988361582159996, | |
| "rewards/rejected": 0.001956367399543524, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 9.345359177045827e-06, | |
| "logits/chosen": -2.2121920585632324, | |
| "logits/rejected": -2.1668667793273926, | |
| "logps/chosen": -318.66827392578125, | |
| "logps/rejected": -298.404052734375, | |
| "loss": 0.672, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.009936051443219185, | |
| "rewards/margins": 0.07024586200714111, | |
| "rewards/rejected": -0.08018191158771515, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 9.308529180702568e-06, | |
| "logits/chosen": -2.2666916847229004, | |
| "logits/rejected": -2.283783435821533, | |
| "logps/chosen": -359.14825439453125, | |
| "logps/rejected": -384.94073486328125, | |
| "loss": 0.6523, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.030093509703874588, | |
| "rewards/margins": 0.1233237162232399, | |
| "rewards/rejected": -0.1534171998500824, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 9.270768050931515e-06, | |
| "logits/chosen": -2.2703680992126465, | |
| "logits/rejected": -2.3521008491516113, | |
| "logps/chosen": -336.55548095703125, | |
| "logps/rejected": -373.68902587890625, | |
| "loss": 0.6823, | |
| "rewards/accuracies": 0.4749999940395355, | |
| "rewards/chosen": -0.19773463904857635, | |
| "rewards/margins": 0.06342988461256027, | |
| "rewards/rejected": -0.2611645460128784, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 9.232083948441046e-06, | |
| "logits/chosen": -2.263096332550049, | |
| "logits/rejected": -2.1758663654327393, | |
| "logps/chosen": -366.25714111328125, | |
| "logps/rejected": -322.65081787109375, | |
| "loss": 0.6963, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -0.1464572548866272, | |
| "rewards/margins": 0.025711650028824806, | |
| "rewards/rejected": -0.17216889560222626, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 9.192485233406862e-06, | |
| "logits/chosen": -2.3192970752716064, | |
| "logits/rejected": -2.3752708435058594, | |
| "logps/chosen": -383.03753662109375, | |
| "logps/rejected": -406.3360900878906, | |
| "loss": 0.6627, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.13319934904575348, | |
| "rewards/margins": 0.0994311273097992, | |
| "rewards/rejected": -0.23263044655323029, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 9.151980463665227e-06, | |
| "logits/chosen": -2.2946994304656982, | |
| "logits/rejected": -2.2120468616485596, | |
| "logps/chosen": -390.14434814453125, | |
| "logps/rejected": -344.0641174316406, | |
| "loss": 0.6981, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.12970566749572754, | |
| "rewards/margins": 0.05780962109565735, | |
| "rewards/rejected": -0.1875152885913849, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 9.1105783928635e-06, | |
| "logits/chosen": -2.2886319160461426, | |
| "logits/rejected": -2.2648708820343018, | |
| "logps/chosen": -373.304931640625, | |
| "logps/rejected": -381.96368408203125, | |
| "loss": 0.7022, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.011115001514554024, | |
| "rewards/margins": 0.03471168875694275, | |
| "rewards/rejected": -0.04582669958472252, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 9.068287968568355e-06, | |
| "logits/chosen": -2.2562637329101562, | |
| "logits/rejected": -2.2379746437072754, | |
| "logps/chosen": -336.50640869140625, | |
| "logps/rejected": -369.13037109375, | |
| "loss": 0.6873, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": 0.1720694601535797, | |
| "rewards/margins": 0.060669075697660446, | |
| "rewards/rejected": 0.11140035092830658, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 9.02511833033208e-06, | |
| "logits/chosen": -2.145764112472534, | |
| "logits/rejected": -2.1581664085388184, | |
| "logps/chosen": -320.99456787109375, | |
| "logps/rejected": -324.7594299316406, | |
| "loss": 0.6312, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.02777281031012535, | |
| "rewards/margins": 0.16521447896957397, | |
| "rewards/rejected": -0.19298730790615082, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 8.981078807717396e-06, | |
| "logits/chosen": -2.316991090774536, | |
| "logits/rejected": -2.169630765914917, | |
| "logps/chosen": -417.3232421875, | |
| "logps/rejected": -368.39617919921875, | |
| "loss": 0.6415, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": 0.12395425885915756, | |
| "rewards/margins": 0.15927435457706451, | |
| "rewards/rejected": -0.03532009571790695, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 8.936178918281209e-06, | |
| "logits/chosen": -2.3391947746276855, | |
| "logits/rejected": -2.359314441680908, | |
| "logps/chosen": -379.1593322753906, | |
| "logps/rejected": -404.58868408203125, | |
| "loss": 0.7159, | |
| "rewards/accuracies": 0.44999998807907104, | |
| "rewards/chosen": 0.0663943886756897, | |
| "rewards/margins": -0.007743087597191334, | |
| "rewards/rejected": 0.07413747161626816, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 8.890428365517728e-06, | |
| "logits/chosen": -2.3254919052124023, | |
| "logits/rejected": -2.2909200191497803, | |
| "logps/chosen": -377.60736083984375, | |
| "logps/rejected": -365.9610595703125, | |
| "loss": 0.6832, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": 0.23426219820976257, | |
| "rewards/margins": 0.05870335176587105, | |
| "rewards/rejected": 0.17555885016918182, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 8.843837036761404e-06, | |
| "logits/chosen": -2.247920513153076, | |
| "logits/rejected": -2.1772730350494385, | |
| "logps/chosen": -299.9126892089844, | |
| "logps/rejected": -299.29736328125, | |
| "loss": 0.6474, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": 0.20609867572784424, | |
| "rewards/margins": 0.12030823528766632, | |
| "rewards/rejected": 0.08579044044017792, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 8.796415001050154e-06, | |
| "logits/chosen": -2.295091152191162, | |
| "logits/rejected": -2.246346950531006, | |
| "logps/chosen": -389.3216552734375, | |
| "logps/rejected": -371.30157470703125, | |
| "loss": 0.6311, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.25310301780700684, | |
| "rewards/margins": 0.18558058142662048, | |
| "rewards/rejected": 0.06752243638038635, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 8.748172506949274e-06, | |
| "logits/chosen": -2.27669358253479, | |
| "logits/rejected": -2.1988024711608887, | |
| "logps/chosen": -326.1456298828125, | |
| "logps/rejected": -309.17266845703125, | |
| "loss": 0.6849, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": 0.05873417109251022, | |
| "rewards/margins": 0.07899702340364456, | |
| "rewards/rejected": -0.020262856036424637, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 8.699119980336602e-06, | |
| "logits/chosen": -2.302666187286377, | |
| "logits/rejected": -2.2827186584472656, | |
| "logps/chosen": -364.0043640136719, | |
| "logps/rejected": -372.041748046875, | |
| "loss": 0.693, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": 0.23366883397102356, | |
| "rewards/margins": 0.0409202054142952, | |
| "rewards/rejected": 0.19274859130382538, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 8.649268022149333e-06, | |
| "logits/chosen": -2.282480001449585, | |
| "logits/rejected": -2.2400062084198, | |
| "logps/chosen": -333.30194091796875, | |
| "logps/rejected": -321.1686096191406, | |
| "loss": 0.6733, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.3089759349822998, | |
| "rewards/margins": 0.10994930565357208, | |
| "rewards/rejected": 0.19902662932872772, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 8.59862740609301e-06, | |
| "logits/chosen": -2.3325583934783936, | |
| "logits/rejected": -2.396918535232544, | |
| "logps/chosen": -383.0022888183594, | |
| "logps/rejected": -438.1582946777344, | |
| "loss": 0.6617, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.38308969140052795, | |
| "rewards/margins": 0.12253421545028687, | |
| "rewards/rejected": 0.2605554461479187, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 8.547209076313172e-06, | |
| "logits/chosen": -2.3535332679748535, | |
| "logits/rejected": -2.3711094856262207, | |
| "logps/chosen": -378.6181640625, | |
| "logps/rejected": -433.81005859375, | |
| "loss": 0.6061, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": 0.33381205797195435, | |
| "rewards/margins": 0.23929791152477264, | |
| "rewards/rejected": 0.09451412409543991, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 8.495024145030174e-06, | |
| "logits/chosen": -2.195960283279419, | |
| "logits/rejected": -2.2019705772399902, | |
| "logps/chosen": -330.01177978515625, | |
| "logps/rejected": -352.898193359375, | |
| "loss": 0.6155, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.02304258942604065, | |
| "rewards/margins": 0.21534284949302673, | |
| "rewards/rejected": -0.23838546872138977, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 8.442083890137678e-06, | |
| "logits/chosen": -2.2961819171905518, | |
| "logits/rejected": -2.2526700496673584, | |
| "logps/chosen": -343.67987060546875, | |
| "logps/rejected": -348.7483825683594, | |
| "loss": 0.7059, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -0.05699265003204346, | |
| "rewards/margins": 0.0024743645917624235, | |
| "rewards/rejected": -0.05946701765060425, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 8.388399752765344e-06, | |
| "logits/chosen": -2.2721645832061768, | |
| "logits/rejected": -2.2435104846954346, | |
| "logps/chosen": -377.99664306640625, | |
| "logps/rejected": -376.784912109375, | |
| "loss": 0.6238, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.22479982674121857, | |
| "rewards/margins": 0.24280264973640442, | |
| "rewards/rejected": -0.4676024317741394, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 8.333983334806248e-06, | |
| "logits/chosen": -2.2859599590301514, | |
| "logits/rejected": -2.241246461868286, | |
| "logps/chosen": -367.9365234375, | |
| "logps/rejected": -335.52740478515625, | |
| "loss": 0.6717, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.09134645760059357, | |
| "rewards/margins": 0.09670265018939972, | |
| "rewards/rejected": -0.1880491077899933, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 8.278846396409534e-06, | |
| "logits/chosen": -2.306518077850342, | |
| "logits/rejected": -2.265807867050171, | |
| "logps/chosen": -370.5268249511719, | |
| "logps/rejected": -363.80718994140625, | |
| "loss": 0.6953, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": 0.02050144411623478, | |
| "rewards/margins": 0.0992293506860733, | |
| "rewards/rejected": -0.07872792333364487, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 8.223000853438904e-06, | |
| "logits/chosen": -2.3641223907470703, | |
| "logits/rejected": -2.272670269012451, | |
| "logps/chosen": -395.78509521484375, | |
| "logps/rejected": -397.1343994140625, | |
| "loss": 0.6263, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -3.156661841785535e-05, | |
| "rewards/margins": 0.2341923713684082, | |
| "rewards/rejected": -0.23422393202781677, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 8.166458774897413e-06, | |
| "logits/chosen": -2.30328631401062, | |
| "logits/rejected": -2.234039306640625, | |
| "logps/chosen": -379.7477722167969, | |
| "logps/rejected": -355.75677490234375, | |
| "loss": 0.605, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.024682385846972466, | |
| "rewards/margins": 0.26411938667297363, | |
| "rewards/rejected": -0.28880181908607483, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 8.109232380319194e-06, | |
| "logits/chosen": -2.2999930381774902, | |
| "logits/rejected": -2.2953243255615234, | |
| "logps/chosen": -407.1230163574219, | |
| "logps/rejected": -414.182373046875, | |
| "loss": 0.7166, | |
| "rewards/accuracies": 0.4749999940395355, | |
| "rewards/chosen": 0.06269857287406921, | |
| "rewards/margins": 0.01421293430030346, | |
| "rewards/rejected": 0.0484856478869915, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 8.051334037128661e-06, | |
| "logits/chosen": -2.2836692333221436, | |
| "logits/rejected": -2.2380998134613037, | |
| "logps/chosen": -332.956787109375, | |
| "logps/rejected": -330.85601806640625, | |
| "loss": 0.7164, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": 0.043525341898202896, | |
| "rewards/margins": 0.0009438946726731956, | |
| "rewards/rejected": 0.04258145019412041, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 7.99277625796771e-06, | |
| "logits/chosen": -2.200336217880249, | |
| "logits/rejected": -2.1876537799835205, | |
| "logps/chosen": -325.07611083984375, | |
| "logps/rejected": -318.10784912109375, | |
| "loss": 0.7158, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -0.05763138085603714, | |
| "rewards/margins": 0.018384983763098717, | |
| "rewards/rejected": -0.0760163813829422, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 7.933571697991582e-06, | |
| "logits/chosen": -2.3422603607177734, | |
| "logits/rejected": -2.2664551734924316, | |
| "logps/chosen": -401.63275146484375, | |
| "logps/rejected": -351.42767333984375, | |
| "loss": 0.6953, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.008800688199698925, | |
| "rewards/margins": 0.04444243758916855, | |
| "rewards/rejected": -0.05324311926960945, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 7.873733152133898e-06, | |
| "logits/chosen": -2.2099857330322266, | |
| "logits/rejected": -2.236807346343994, | |
| "logps/chosen": -311.5889892578125, | |
| "logps/rejected": -328.91033935546875, | |
| "loss": 0.6992, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": 0.11291439831256866, | |
| "rewards/margins": 0.023546913638710976, | |
| "rewards/rejected": 0.08936748653650284, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 7.813273552341496e-06, | |
| "logits/chosen": -2.3059163093566895, | |
| "logits/rejected": -2.280585527420044, | |
| "logps/chosen": -330.9400329589844, | |
| "logps/rejected": -347.64056396484375, | |
| "loss": 0.6812, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.044351525604724884, | |
| "rewards/margins": 0.0922970399260521, | |
| "rewards/rejected": -0.047945525497198105, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 7.75220596477966e-06, | |
| "logits/chosen": -2.263115644454956, | |
| "logits/rejected": -2.2254600524902344, | |
| "logps/chosen": -325.22198486328125, | |
| "logps/rejected": -316.40875244140625, | |
| "loss": 0.6262, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": 0.08847782015800476, | |
| "rewards/margins": 0.24120266735553741, | |
| "rewards/rejected": -0.15272484719753265, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 7.690543587008332e-06, | |
| "logits/chosen": -2.2187132835388184, | |
| "logits/rejected": -2.2646350860595703, | |
| "logps/chosen": -401.48687744140625, | |
| "logps/rejected": -387.3714294433594, | |
| "loss": 0.6596, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": 0.21927690505981445, | |
| "rewards/margins": 0.1301509588956833, | |
| "rewards/rejected": 0.08912594616413116, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 7.628299745129943e-06, | |
| "logits/chosen": -2.2820262908935547, | |
| "logits/rejected": -2.2334964275360107, | |
| "logps/chosen": -403.6439208984375, | |
| "logps/rejected": -374.96270751953125, | |
| "loss": 0.7398, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -0.02083228901028633, | |
| "rewards/margins": -0.01027420163154602, | |
| "rewards/rejected": -0.010558092966675758, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 7.565487890909448e-06, | |
| "logits/chosen": -2.322711229324341, | |
| "logits/rejected": -2.225168466567993, | |
| "logps/chosen": -337.26605224609375, | |
| "logps/rejected": -304.8133544921875, | |
| "loss": 0.6559, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": 0.2134527862071991, | |
| "rewards/margins": 0.1341491937637329, | |
| "rewards/rejected": 0.07930360734462738, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 7.502121598867218e-06, | |
| "logits/chosen": -2.2647910118103027, | |
| "logits/rejected": -2.2931771278381348, | |
| "logps/chosen": -353.45660400390625, | |
| "logps/rejected": -313.0556945800781, | |
| "loss": 0.6721, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": 0.16318438947200775, | |
| "rewards/margins": 0.0658370777964592, | |
| "rewards/rejected": 0.09734731912612915, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 7.438214563345389e-06, | |
| "logits/chosen": -2.352962017059326, | |
| "logits/rejected": -2.329225778579712, | |
| "logps/chosen": -387.4513244628906, | |
| "logps/rejected": -395.37347412109375, | |
| "loss": 0.6693, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": 0.19145812094211578, | |
| "rewards/margins": 0.13347746431827545, | |
| "rewards/rejected": 0.05798065662384033, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 7.373780595548334e-06, | |
| "logits/chosen": -2.310203790664673, | |
| "logits/rejected": -2.2119874954223633, | |
| "logps/chosen": -377.05657958984375, | |
| "logps/rejected": -357.82525634765625, | |
| "loss": 0.7045, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.4518454670906067, | |
| "rewards/margins": 0.05518758296966553, | |
| "rewards/rejected": 0.3966578543186188, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 7.3088336205578565e-06, | |
| "logits/chosen": -2.240410327911377, | |
| "logits/rejected": -2.215846538543701, | |
| "logps/chosen": -350.8703308105469, | |
| "logps/rejected": -368.5628662109375, | |
| "loss": 0.6456, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": 0.1876397579908371, | |
| "rewards/margins": 0.16387517750263214, | |
| "rewards/rejected": 0.0237645972520113, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 7.243387674323794e-06, | |
| "logits/chosen": -2.2497904300689697, | |
| "logits/rejected": -2.232779026031494, | |
| "logps/chosen": -339.3749084472656, | |
| "logps/rejected": -359.62493896484375, | |
| "loss": 0.6597, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": 0.22228892147541046, | |
| "rewards/margins": 0.12651677429676056, | |
| "rewards/rejected": 0.09577211737632751, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 7.177456900630645e-06, | |
| "logits/chosen": -2.256024122238159, | |
| "logits/rejected": -2.2079262733459473, | |
| "logps/chosen": -340.0914611816406, | |
| "logps/rejected": -322.68011474609375, | |
| "loss": 0.5837, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": 0.16695842146873474, | |
| "rewards/margins": 0.28337720036506653, | |
| "rewards/rejected": -0.1164187639951706, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 7.111055548040911e-06, | |
| "logits/chosen": -2.3057608604431152, | |
| "logits/rejected": -2.2699170112609863, | |
| "logps/chosen": -389.82611083984375, | |
| "logps/rejected": -382.68829345703125, | |
| "loss": 0.5632, | |
| "rewards/accuracies": 0.824999988079071, | |
| "rewards/chosen": 0.19717621803283691, | |
| "rewards/margins": 0.36688321828842163, | |
| "rewards/rejected": -0.1697070151567459, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 7.044197966815773e-06, | |
| "logits/chosen": -2.25697922706604, | |
| "logits/rejected": -2.107326030731201, | |
| "logps/chosen": -320.3851623535156, | |
| "logps/rejected": -288.55108642578125, | |
| "loss": 0.6459, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.034488920122385025, | |
| "rewards/margins": 0.16461703181266785, | |
| "rewards/rejected": -0.13012811541557312, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 6.976898605813798e-06, | |
| "logits/chosen": -2.269026041030884, | |
| "logits/rejected": -2.305229663848877, | |
| "logps/chosen": -335.22540283203125, | |
| "logps/rejected": -391.1421813964844, | |
| "loss": 0.7153, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": 0.16712869703769684, | |
| "rewards/margins": 0.05030643194913864, | |
| "rewards/rejected": 0.1168222427368164, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 6.90917200936835e-06, | |
| "logits/chosen": -2.202436923980713, | |
| "logits/rejected": -2.1774916648864746, | |
| "logps/chosen": -308.787109375, | |
| "logps/rejected": -331.55987548828125, | |
| "loss": 0.7907, | |
| "rewards/accuracies": 0.42500001192092896, | |
| "rewards/chosen": -0.020061034709215164, | |
| "rewards/margins": -0.09425730258226395, | |
| "rewards/rejected": 0.07419625669717789, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 6.841032814144345e-06, | |
| "logits/chosen": -2.2023422718048096, | |
| "logits/rejected": -2.215259552001953, | |
| "logps/chosen": -303.5005187988281, | |
| "logps/rejected": -343.00146484375, | |
| "loss": 0.6898, | |
| "rewards/accuracies": 0.4749999940395355, | |
| "rewards/chosen": 0.026828575879335403, | |
| "rewards/margins": 0.07769123464822769, | |
| "rewards/rejected": -0.05086265876889229, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 6.772495745975067e-06, | |
| "logits/chosen": -2.268451452255249, | |
| "logits/rejected": -2.1880459785461426, | |
| "logps/chosen": -352.240966796875, | |
| "logps/rejected": -341.0179748535156, | |
| "loss": 0.6377, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.02395152859389782, | |
| "rewards/margins": 0.17894978821277618, | |
| "rewards/rejected": -0.20290131866931915, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 6.703575616679709e-06, | |
| "logits/chosen": -2.382638454437256, | |
| "logits/rejected": -2.3348803520202637, | |
| "logps/chosen": -395.88372802734375, | |
| "logps/rejected": -385.67364501953125, | |
| "loss": 0.6581, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": 0.00013340116129256785, | |
| "rewards/margins": 0.17107084393501282, | |
| "rewards/rejected": -0.1709374487400055, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 6.634287320862334e-06, | |
| "logits/chosen": -2.332146644592285, | |
| "logits/rejected": -2.196887254714966, | |
| "logps/chosen": -367.8846435546875, | |
| "logps/rejected": -342.24224853515625, | |
| "loss": 0.6204, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": 0.21408149600028992, | |
| "rewards/margins": 0.27911919355392456, | |
| "rewards/rejected": -0.06503769755363464, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 6.564645832692938e-06, | |
| "logits/chosen": -2.2684531211853027, | |
| "logits/rejected": -2.2622039318084717, | |
| "logps/chosen": -356.80859375, | |
| "logps/rejected": -363.5769958496094, | |
| "loss": 0.7088, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": 0.05582839250564575, | |
| "rewards/margins": 0.07232120633125305, | |
| "rewards/rejected": -0.016492802649736404, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 6.494666202671329e-06, | |
| "logits/chosen": -2.297036647796631, | |
| "logits/rejected": -2.165566921234131, | |
| "logps/chosen": -358.6860046386719, | |
| "logps/rejected": -317.59002685546875, | |
| "loss": 0.5921, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": 0.30202144384384155, | |
| "rewards/margins": 0.3027155101299286, | |
| "rewards/rejected": -0.0006940944003872573, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 6.424363554374496e-06, | |
| "logits/chosen": -2.3090157508850098, | |
| "logits/rejected": -2.232266902923584, | |
| "logps/chosen": -363.88226318359375, | |
| "logps/rejected": -358.2498779296875, | |
| "loss": 0.6638, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": 0.47291359305381775, | |
| "rewards/margins": 0.14908143877983093, | |
| "rewards/rejected": 0.3238321542739868, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 6.353753081188194e-06, | |
| "logits/chosen": -2.2434115409851074, | |
| "logits/rejected": -2.301614284515381, | |
| "logps/chosen": -314.8789978027344, | |
| "logps/rejected": -350.7088928222656, | |
| "loss": 0.727, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": 0.19925834238529205, | |
| "rewards/margins": 0.04580863565206528, | |
| "rewards/rejected": 0.15344971418380737, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 6.28285004302345e-06, | |
| "logits/chosen": -2.266707420349121, | |
| "logits/rejected": -2.236722469329834, | |
| "logps/chosen": -321.0040283203125, | |
| "logps/rejected": -336.6592102050781, | |
| "loss": 0.6677, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.1193336620926857, | |
| "rewards/margins": 0.17302492260932922, | |
| "rewards/rejected": -0.053691256791353226, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 6.2116697630186685e-06, | |
| "logits/chosen": -2.303358554840088, | |
| "logits/rejected": -2.149106740951538, | |
| "logps/chosen": -351.23590087890625, | |
| "logps/rejected": -350.1204833984375, | |
| "loss": 0.6293, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": 0.17574825882911682, | |
| "rewards/margins": 0.1902790516614914, | |
| "rewards/rejected": -0.014530802145600319, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 6.140227624228098e-06, | |
| "logits/chosen": -2.375432252883911, | |
| "logits/rejected": -2.297983169555664, | |
| "logps/chosen": -366.21368408203125, | |
| "logps/rejected": -378.6297912597656, | |
| "loss": 0.6357, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.3224946856498718, | |
| "rewards/margins": 0.2146320790052414, | |
| "rewards/rejected": 0.10786261409521103, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 6.068539066297331e-06, | |
| "logits/chosen": -2.318620204925537, | |
| "logits/rejected": -2.2646164894104004, | |
| "logps/chosen": -367.49298095703125, | |
| "logps/rejected": -360.1875305175781, | |
| "loss": 0.6089, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": 0.14375829696655273, | |
| "rewards/margins": 0.2504768967628479, | |
| "rewards/rejected": -0.10671859979629517, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 5.996619582126586e-06, | |
| "logits/chosen": -2.322288751602173, | |
| "logits/rejected": -2.3236374855041504, | |
| "logps/chosen": -367.33343505859375, | |
| "logps/rejected": -372.8912658691406, | |
| "loss": 0.7435, | |
| "rewards/accuracies": 0.4749999940395355, | |
| "rewards/chosen": 0.08361749351024628, | |
| "rewards/margins": 0.028830815106630325, | |
| "rewards/rejected": 0.054786670953035355, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 5.924484714522473e-06, | |
| "logits/chosen": -2.2468628883361816, | |
| "logits/rejected": -2.2435338497161865, | |
| "logps/chosen": -354.2232666015625, | |
| "logps/rejected": -318.03851318359375, | |
| "loss": 0.6024, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": 0.1713722050189972, | |
| "rewards/margins": 0.26503580808639526, | |
| "rewards/rejected": -0.09366358816623688, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 5.8521500528389685e-06, | |
| "logits/chosen": -2.2789225578308105, | |
| "logits/rejected": -2.250373125076294, | |
| "logps/chosen": -337.760986328125, | |
| "logps/rejected": -343.9210510253906, | |
| "loss": 0.6352, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.043280668556690216, | |
| "rewards/margins": 0.275790274143219, | |
| "rewards/rejected": -0.3190709352493286, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 5.779631229608352e-06, | |
| "logits/chosen": -2.3031325340270996, | |
| "logits/rejected": -2.2297275066375732, | |
| "logps/chosen": -345.22265625, | |
| "logps/rejected": -361.78680419921875, | |
| "loss": 0.6227, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.18280446529388428, | |
| "rewards/margins": 0.2825770378112793, | |
| "rewards/rejected": -0.09977257996797562, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 5.706943917162786e-06, | |
| "logits/chosen": -2.3648791313171387, | |
| "logits/rejected": -2.2548999786376953, | |
| "logps/chosen": -348.91815185546875, | |
| "logps/rejected": -315.13653564453125, | |
| "loss": 0.6339, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": 0.2526555061340332, | |
| "rewards/margins": 0.18896642327308655, | |
| "rewards/rejected": 0.06368909776210785, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 5.634103824247312e-06, | |
| "logits/chosen": -2.241288900375366, | |
| "logits/rejected": -2.208639621734619, | |
| "logps/chosen": -335.605224609375, | |
| "logps/rejected": -334.7170715332031, | |
| "loss": 0.632, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.13065436482429504, | |
| "rewards/margins": 0.2133084535598755, | |
| "rewards/rejected": -0.34396281838417053, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 5.561126692624963e-06, | |
| "logits/chosen": -2.2892165184020996, | |
| "logits/rejected": -2.253537178039551, | |
| "logps/chosen": -380.8193054199219, | |
| "logps/rejected": -344.45684814453125, | |
| "loss": 0.677, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": 0.10464837402105331, | |
| "rewards/margins": 0.13998612761497498, | |
| "rewards/rejected": -0.035337746143341064, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 5.488028293674759e-06, | |
| "logits/chosen": -2.1598775386810303, | |
| "logits/rejected": -2.3442585468292236, | |
| "logps/chosen": -295.97161865234375, | |
| "logps/rejected": -376.0238952636719, | |
| "loss": 0.6603, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.1714015007019043, | |
| "rewards/margins": 0.1644255667924881, | |
| "rewards/rejected": 0.006975936703383923, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 5.414824424983282e-06, | |
| "logits/chosen": -2.253049373626709, | |
| "logits/rejected": -2.313413143157959, | |
| "logps/chosen": -350.61126708984375, | |
| "logps/rejected": -394.3390197753906, | |
| "loss": 0.7526, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -0.3026641309261322, | |
| "rewards/margins": 0.06349755823612213, | |
| "rewards/rejected": -0.3661617040634155, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 5.341530906930604e-06, | |
| "logits/chosen": -2.32954740524292, | |
| "logits/rejected": -2.2630321979522705, | |
| "logps/chosen": -389.9427185058594, | |
| "logps/rejected": -338.2027893066406, | |
| "loss": 0.6504, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": 0.0287860669195652, | |
| "rewards/margins": 0.21246078610420227, | |
| "rewards/rejected": -0.18367469310760498, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 5.268163579271276e-06, | |
| "logits/chosen": -2.249337673187256, | |
| "logits/rejected": -2.19362473487854, | |
| "logps/chosen": -330.29559326171875, | |
| "logps/rejected": -327.573486328125, | |
| "loss": 0.644, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.0009714558837004006, | |
| "rewards/margins": 0.18328654766082764, | |
| "rewards/rejected": -0.18425801396369934, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 5.1947382977111374e-06, | |
| "logits/chosen": -2.2833094596862793, | |
| "logits/rejected": -2.203212261199951, | |
| "logps/chosen": -360.40142822265625, | |
| "logps/rejected": -346.81927490234375, | |
| "loss": 0.5783, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": 0.05404200404882431, | |
| "rewards/margins": 0.3619672656059265, | |
| "rewards/rejected": -0.3079253137111664, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 5.128619108610792e-06, | |
| "logits/chosen": -2.310303211212158, | |
| "logits/rejected": -2.28350567817688, | |
| "logps/chosen": -339.02398681640625, | |
| "logps/rejected": -357.2115173339844, | |
| "loss": 0.6813, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": 0.07422615587711334, | |
| "rewards/margins": 0.14461743831634521, | |
| "rewards/rejected": -0.07039125263690948, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 5.055127439202268e-06, | |
| "logits/chosen": -2.309981107711792, | |
| "logits/rejected": -2.2555365562438965, | |
| "logps/chosen": -339.52301025390625, | |
| "logps/rejected": -363.42657470703125, | |
| "loss": 0.5414, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": 0.13452677428722382, | |
| "rewards/margins": 0.5444313287734985, | |
| "rewards/rejected": -0.4099045693874359, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 4.9816238559829586e-06, | |
| "logits/chosen": -2.371007204055786, | |
| "logits/rejected": -2.2399134635925293, | |
| "logps/chosen": -387.3955383300781, | |
| "logps/rejected": -373.3992919921875, | |
| "loss": 0.6567, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -0.04877791926264763, | |
| "rewards/margins": 0.20018813014030457, | |
| "rewards/rejected": -0.2489660531282425, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 4.908124244105435e-06, | |
| "logits/chosen": -2.1801342964172363, | |
| "logits/rejected": -2.1720447540283203, | |
| "logps/chosen": -307.62103271484375, | |
| "logps/rejected": -318.6053771972656, | |
| "loss": 0.6848, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.10059481859207153, | |
| "rewards/margins": 0.0915575847029686, | |
| "rewards/rejected": -0.19215238094329834, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 4.834644487864005e-06, | |
| "logits/chosen": -2.299656391143799, | |
| "logits/rejected": -2.1939291954040527, | |
| "logps/chosen": -364.6031188964844, | |
| "logps/rejected": -340.6778259277344, | |
| "loss": 0.7091, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": 0.1433655321598053, | |
| "rewards/margins": 0.1040463000535965, | |
| "rewards/rejected": 0.039319224655628204, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 4.7612004672619e-06, | |
| "logits/chosen": -2.2465157508850098, | |
| "logits/rejected": -2.142528533935547, | |
| "logps/chosen": -314.5583801269531, | |
| "logps/rejected": -273.532470703125, | |
| "loss": 0.7121, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -0.00577303022146225, | |
| "rewards/margins": 0.04662410169839859, | |
| "rewards/rejected": -0.05239715054631233, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 4.6878080545793765e-06, | |
| "logits/chosen": -2.275844097137451, | |
| "logits/rejected": -2.2758145332336426, | |
| "logps/chosen": -288.96905517578125, | |
| "logps/rejected": -304.53265380859375, | |
| "loss": 0.6788, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.18407993018627167, | |
| "rewards/margins": 0.13483914732933044, | |
| "rewards/rejected": 0.049240779131650925, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 4.614483110943502e-06, | |
| "logits/chosen": -2.275071620941162, | |
| "logits/rejected": -2.2525486946105957, | |
| "logps/chosen": -338.60357666015625, | |
| "logps/rejected": -337.8529357910156, | |
| "loss": 0.6597, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": 0.18449342250823975, | |
| "rewards/margins": 0.14333459734916687, | |
| "rewards/rejected": 0.04115881025791168, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 4.54124148290033e-06, | |
| "logits/chosen": -2.2469890117645264, | |
| "logits/rejected": -2.2963757514953613, | |
| "logps/chosen": -312.69677734375, | |
| "logps/rejected": -368.51220703125, | |
| "loss": 0.7698, | |
| "rewards/accuracies": 0.4749999940395355, | |
| "rewards/chosen": 0.211637943983078, | |
| "rewards/margins": -0.05843405798077583, | |
| "rewards/rejected": 0.27007198333740234, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 4.46809899899026e-06, | |
| "logits/chosen": -2.20833683013916, | |
| "logits/rejected": -2.240799903869629, | |
| "logps/chosen": -326.4002380371094, | |
| "logps/rejected": -338.2776794433594, | |
| "loss": 0.6814, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.15935859084129333, | |
| "rewards/margins": 0.09861962497234344, | |
| "rewards/rejected": 0.060738980770111084, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 4.395071466327251e-06, | |
| "logits/chosen": -2.2200393676757812, | |
| "logits/rejected": -2.200827121734619, | |
| "logps/chosen": -341.48321533203125, | |
| "logps/rejected": -320.7751770019531, | |
| "loss": 0.7204, | |
| "rewards/accuracies": 0.44999998807907104, | |
| "rewards/chosen": 0.03819073364138603, | |
| "rewards/margins": 0.06163903325796127, | |
| "rewards/rejected": -0.023448294028639793, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 4.322174667182689e-06, | |
| "logits/chosen": -2.3112101554870605, | |
| "logits/rejected": -2.1941494941711426, | |
| "logps/chosen": -381.0555114746094, | |
| "logps/rejected": -349.847900390625, | |
| "loss": 0.6448, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": 0.2540966272354126, | |
| "rewards/margins": 0.23512430489063263, | |
| "rewards/rejected": 0.018972331658005714, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 4.249424355574621e-06, | |
| "logits/chosen": -2.361945629119873, | |
| "logits/rejected": -2.2803444862365723, | |
| "logps/chosen": -397.45330810546875, | |
| "logps/rejected": -377.0959777832031, | |
| "loss": 0.662, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": 0.18188154697418213, | |
| "rewards/margins": 0.205190509557724, | |
| "rewards/rejected": -0.023308951407670975, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 4.176836253863087e-06, | |
| "logits/chosen": -2.3127691745758057, | |
| "logits/rejected": -2.185509443283081, | |
| "logps/chosen": -359.801025390625, | |
| "logps/rejected": -326.85382080078125, | |
| "loss": 0.5858, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": 0.27552157640457153, | |
| "rewards/margins": 0.30210158228874207, | |
| "rewards/rejected": -0.02657998725771904, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 4.1044260493523005e-06, | |
| "logits/chosen": -2.219707727432251, | |
| "logits/rejected": -2.2081971168518066, | |
| "logps/chosen": -325.4644775390625, | |
| "logps/rejected": -309.3984069824219, | |
| "loss": 0.6114, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.29091349244117737, | |
| "rewards/margins": 0.2537664771080017, | |
| "rewards/rejected": 0.037146955728530884, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 4.0322093909003965e-06, | |
| "logits/chosen": -2.3424715995788574, | |
| "logits/rejected": -2.328320026397705, | |
| "logps/chosen": -369.3295593261719, | |
| "logps/rejected": -420.92987060546875, | |
| "loss": 0.5449, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": 0.4485898017883301, | |
| "rewards/margins": 0.42458122968673706, | |
| "rewards/rejected": 0.02400858886539936, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 3.960201885537504e-06, | |
| "logits/chosen": -2.341200113296509, | |
| "logits/rejected": -2.253725528717041, | |
| "logps/chosen": -372.42633056640625, | |
| "logps/rejected": -400.0967102050781, | |
| "loss": 0.642, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.38997507095336914, | |
| "rewards/margins": 0.22686178982257843, | |
| "rewards/rejected": 0.16311326622962952, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 3.888419095092843e-06, | |
| "logits/chosen": -2.2710628509521484, | |
| "logits/rejected": -2.3019535541534424, | |
| "logps/chosen": -336.04296875, | |
| "logps/rejected": -364.0369873046875, | |
| "loss": 0.6737, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": 0.08392750471830368, | |
| "rewards/margins": 0.1348566859960556, | |
| "rewards/rejected": -0.05092918127775192, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 3.816876532831595e-06, | |
| "logits/chosen": -2.1317548751831055, | |
| "logits/rejected": -2.1478095054626465, | |
| "logps/chosen": -297.4999084472656, | |
| "logps/rejected": -314.2597961425781, | |
| "loss": 0.6427, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.07686291635036469, | |
| "rewards/margins": 0.2395528256893158, | |
| "rewards/rejected": -0.16268989443778992, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 3.7455896601022677e-06, | |
| "logits/chosen": -2.2463011741638184, | |
| "logits/rejected": -2.130866527557373, | |
| "logps/chosen": -333.47564697265625, | |
| "logps/rejected": -299.1008605957031, | |
| "loss": 0.6625, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": 0.01470687985420227, | |
| "rewards/margins": 0.15500742197036743, | |
| "rewards/rejected": -0.14030054211616516, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 3.6745738829952928e-06, | |
| "logits/chosen": -2.3302998542785645, | |
| "logits/rejected": -2.3339765071868896, | |
| "logps/chosen": -378.35498046875, | |
| "logps/rejected": -410.18035888671875, | |
| "loss": 0.6558, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.3636806905269623, | |
| "rewards/margins": 0.19609752297401428, | |
| "rewards/rejected": 0.1675831824541092, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 3.6038445490135354e-06, | |
| "logits/chosen": -2.3522942066192627, | |
| "logits/rejected": -2.3492603302001953, | |
| "logps/chosen": -387.61297607421875, | |
| "logps/rejected": -422.75054931640625, | |
| "loss": 0.6802, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": 0.4821054935455322, | |
| "rewards/margins": 0.14728207886219025, | |
| "rewards/rejected": 0.3348234295845032, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 3.5334169437555e-06, | |
| "logits/chosen": -2.2042317390441895, | |
| "logits/rejected": -2.272881507873535, | |
| "logps/chosen": -345.3319396972656, | |
| "logps/rejected": -344.2694091796875, | |
| "loss": 0.6958, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.11976834386587143, | |
| "rewards/margins": 0.10411565005779266, | |
| "rewards/rejected": 0.015652697533369064, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 3.4633062876118915e-06, | |
| "logits/chosen": -2.310586452484131, | |
| "logits/rejected": -2.2318148612976074, | |
| "logps/chosen": -339.42095947265625, | |
| "logps/rejected": -339.2361755371094, | |
| "loss": 0.6279, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": 0.20746886730194092, | |
| "rewards/margins": 0.2241462916135788, | |
| "rewards/rejected": -0.01667742058634758, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 3.3935277324762807e-06, | |
| "logits/chosen": -2.2938754558563232, | |
| "logits/rejected": -2.3304316997528076, | |
| "logps/chosen": -349.04547119140625, | |
| "logps/rejected": -412.75042724609375, | |
| "loss": 0.6163, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": 0.04044444486498833, | |
| "rewards/margins": 0.25089383125305176, | |
| "rewards/rejected": -0.21044941246509552, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 3.324096358470559e-06, | |
| "logits/chosen": -2.298367977142334, | |
| "logits/rejected": -2.2703890800476074, | |
| "logps/chosen": -365.79571533203125, | |
| "logps/rejected": -372.6152038574219, | |
| "loss": 0.6579, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.16013844311237335, | |
| "rewards/margins": 0.23061330616474152, | |
| "rewards/rejected": -0.07047487795352936, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 3.255027170685922e-06, | |
| "logits/chosen": -2.271730422973633, | |
| "logits/rejected": -2.305053234100342, | |
| "logps/chosen": -379.85321044921875, | |
| "logps/rejected": -405.1103515625, | |
| "loss": 0.717, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.14956679940223694, | |
| "rewards/margins": 0.06124185770750046, | |
| "rewards/rejected": 0.08832494169473648, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 3.186335095940058e-06, | |
| "logits/chosen": -2.3461501598358154, | |
| "logits/rejected": -2.1821436882019043, | |
| "logps/chosen": -382.0367736816406, | |
| "logps/rejected": -337.6816101074219, | |
| "loss": 0.6432, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": 0.07416114211082458, | |
| "rewards/margins": 0.22856464982032776, | |
| "rewards/rejected": -0.15440352261066437, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 3.1180349795512478e-06, | |
| "logits/chosen": -2.333571434020996, | |
| "logits/rejected": -2.2108778953552246, | |
| "logps/chosen": -364.5013427734375, | |
| "logps/rejected": -348.1319580078125, | |
| "loss": 0.6229, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": 0.042986027896404266, | |
| "rewards/margins": 0.23882392048835754, | |
| "rewards/rejected": -0.19583788514137268, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 3.050141582130093e-06, | |
| "logits/chosen": -2.19138765335083, | |
| "logits/rejected": -2.2427496910095215, | |
| "logps/chosen": -337.9814147949219, | |
| "logps/rejected": -330.500244140625, | |
| "loss": 0.7066, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": 0.053582824766635895, | |
| "rewards/margins": 0.07893103361129761, | |
| "rewards/rejected": -0.025348205119371414, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 2.982669576389533e-06, | |
| "logits/chosen": -2.296982526779175, | |
| "logits/rejected": -2.2845733165740967, | |
| "logps/chosen": -310.3797912597656, | |
| "logps/rejected": -309.05975341796875, | |
| "loss": 0.6881, | |
| "rewards/accuracies": 0.44999998807907104, | |
| "rewards/chosen": 0.22653362154960632, | |
| "rewards/margins": 0.06024567037820816, | |
| "rewards/rejected": 0.16628797352313995, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 2.9156335439738705e-06, | |
| "logits/chosen": -2.285391092300415, | |
| "logits/rejected": -2.3086414337158203, | |
| "logps/chosen": -373.62628173828125, | |
| "logps/rejected": -386.38092041015625, | |
| "loss": 0.7604, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": 0.023256815969944, | |
| "rewards/margins": -0.014189457520842552, | |
| "rewards/rejected": 0.037446290254592896, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 2.84904797230748e-06, | |
| "logits/chosen": -2.2920703887939453, | |
| "logits/rejected": -2.2386538982391357, | |
| "logps/chosen": -336.79888916015625, | |
| "logps/rejected": -377.5654296875, | |
| "loss": 0.5965, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": 0.3767642080783844, | |
| "rewards/margins": 0.3530300557613373, | |
| "rewards/rejected": 0.02373412624001503, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 2.782927251463854e-06, | |
| "logits/chosen": -2.2349536418914795, | |
| "logits/rejected": -2.246170997619629, | |
| "logps/chosen": -326.43084716796875, | |
| "logps/rejected": -355.4977111816406, | |
| "loss": 0.6291, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.12856540083885193, | |
| "rewards/margins": 0.2676551938056946, | |
| "rewards/rejected": -0.13908980786800385, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 2.717285671055733e-06, | |
| "logits/chosen": -2.2831931114196777, | |
| "logits/rejected": -2.2716732025146484, | |
| "logps/chosen": -339.9261474609375, | |
| "logps/rejected": -372.9583740234375, | |
| "loss": 0.6354, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.0657893493771553, | |
| "rewards/margins": 0.23048046231269836, | |
| "rewards/rejected": -0.29626980423927307, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 2.652137417146897e-06, | |
| "logits/chosen": -2.327761173248291, | |
| "logits/rejected": -2.1530885696411133, | |
| "logps/chosen": -351.7103271484375, | |
| "logps/rejected": -321.41046142578125, | |
| "loss": 0.7653, | |
| "rewards/accuracies": 0.4749999940395355, | |
| "rewards/chosen": -0.15571951866149902, | |
| "rewards/margins": -0.01952260732650757, | |
| "rewards/rejected": -0.13619689643383026, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 2.587496569186378e-06, | |
| "logits/chosen": -2.283737897872925, | |
| "logits/rejected": -2.2826638221740723, | |
| "logps/chosen": -369.670654296875, | |
| "logps/rejected": -380.65460205078125, | |
| "loss": 0.6403, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.06505627185106277, | |
| "rewards/margins": 0.1945182979106903, | |
| "rewards/rejected": -0.12946203351020813, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 2.5233770969656703e-06, | |
| "logits/chosen": -2.2368502616882324, | |
| "logits/rejected": -2.2161707878112793, | |
| "logps/chosen": -331.0984191894531, | |
| "logps/rejected": -336.10040283203125, | |
| "loss": 0.6759, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.1922999769449234, | |
| "rewards/margins": 0.1443178653717041, | |
| "rewards/rejected": 0.04798208177089691, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 2.4597928575996917e-06, | |
| "logits/chosen": -2.2817587852478027, | |
| "logits/rejected": -2.2737059593200684, | |
| "logps/chosen": -369.1025085449219, | |
| "logps/rejected": -396.74005126953125, | |
| "loss": 0.652, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": 0.08208617568016052, | |
| "rewards/margins": 0.25736135244369507, | |
| "rewards/rejected": -0.17527517676353455, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 2.3967575925320417e-06, | |
| "logits/chosen": -2.362391471862793, | |
| "logits/rejected": -2.2530202865600586, | |
| "logps/chosen": -362.65301513671875, | |
| "logps/rejected": -345.89776611328125, | |
| "loss": 0.6438, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": 0.2820921540260315, | |
| "rewards/margins": 0.289219468832016, | |
| "rewards/rejected": -0.007127317134290934, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 2.334284924565307e-06, | |
| "logits/chosen": -2.2167036533355713, | |
| "logits/rejected": -2.187164068222046, | |
| "logps/chosen": -322.52410888671875, | |
| "logps/rejected": -329.1834716796875, | |
| "loss": 0.6609, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": 0.04885543882846832, | |
| "rewards/margins": 0.21874144673347473, | |
| "rewards/rejected": -0.16988599300384521, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 2.2723883549169546e-06, | |
| "logits/chosen": -2.2128214836120605, | |
| "logits/rejected": -2.1517386436462402, | |
| "logps/chosen": -297.3167419433594, | |
| "logps/rejected": -319.46826171875, | |
| "loss": 0.6695, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": 0.1534380465745926, | |
| "rewards/margins": 0.17669746279716492, | |
| "rewards/rejected": -0.02325943298637867, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 2.211081260301559e-06, | |
| "logits/chosen": -2.2383880615234375, | |
| "logits/rejected": -2.152236223220825, | |
| "logps/chosen": -321.6248474121094, | |
| "logps/rejected": -295.3414001464844, | |
| "loss": 0.6343, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.2368522584438324, | |
| "rewards/margins": 0.21582520008087158, | |
| "rewards/rejected": 0.021027065813541412, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 2.150376890039888e-06, | |
| "logits/chosen": -2.142472743988037, | |
| "logits/rejected": -2.2683558464050293, | |
| "logps/chosen": -304.2695007324219, | |
| "logps/rejected": -381.8064270019531, | |
| "loss": 0.6457, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": 0.05382103472948074, | |
| "rewards/margins": 0.231169655919075, | |
| "rewards/rejected": -0.17734862864017487, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 2.090288363195546e-06, | |
| "logits/chosen": -2.301752805709839, | |
| "logits/rejected": -2.245049476623535, | |
| "logps/chosen": -360.19940185546875, | |
| "logps/rejected": -344.0550231933594, | |
| "loss": 0.6983, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": 0.1819063127040863, | |
| "rewards/margins": 0.07504500448703766, | |
| "rewards/rejected": 0.10686129331588745, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 2.0308286657397586e-06, | |
| "logits/chosen": -2.1193668842315674, | |
| "logits/rejected": -2.279275417327881, | |
| "logps/chosen": -290.13494873046875, | |
| "logps/rejected": -304.28460693359375, | |
| "loss": 0.6585, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -0.014680122956633568, | |
| "rewards/margins": 0.12039873749017715, | |
| "rewards/rejected": -0.13507884740829468, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 1.972010647744929e-06, | |
| "logits/chosen": -2.2673816680908203, | |
| "logits/rejected": -2.23976731300354, | |
| "logps/chosen": -359.8111267089844, | |
| "logps/rejected": -389.32012939453125, | |
| "loss": 0.6393, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.1408449113368988, | |
| "rewards/margins": 0.20818281173706055, | |
| "rewards/rejected": -0.34902772307395935, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 1.9138470206075468e-06, | |
| "logits/chosen": -2.260129690170288, | |
| "logits/rejected": -2.1876485347747803, | |
| "logps/chosen": -349.2674560546875, | |
| "logps/rejected": -373.29351806640625, | |
| "loss": 0.6647, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.08648316562175751, | |
| "rewards/margins": 0.13312320411205292, | |
| "rewards/rejected": -0.21960635483264923, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 1.8563503543010847e-06, | |
| "logits/chosen": -2.2733237743377686, | |
| "logits/rejected": -2.239638090133667, | |
| "logps/chosen": -357.602294921875, | |
| "logps/rejected": -358.47900390625, | |
| "loss": 0.6549, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.038054175674915314, | |
| "rewards/margins": 0.20417292416095734, | |
| "rewards/rejected": -0.1661187708377838, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 1.7995330746594492e-06, | |
| "logits/chosen": -2.2824442386627197, | |
| "logits/rejected": -2.319239854812622, | |
| "logps/chosen": -335.9391174316406, | |
| "logps/rejected": -376.009765625, | |
| "loss": 0.7277, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -0.06412219256162643, | |
| "rewards/margins": 0.0529680959880352, | |
| "rewards/rejected": -0.11709029972553253, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 1.7434074606915908e-06, | |
| "logits/chosen": -2.2410006523132324, | |
| "logits/rejected": -2.2910315990448, | |
| "logps/chosen": -366.2132263183594, | |
| "logps/rejected": -422.5810546875, | |
| "loss": 0.6248, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": 0.1538994014263153, | |
| "rewards/margins": 0.33952516317367554, | |
| "rewards/rejected": -0.185625821352005, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 1.687985641927819e-06, | |
| "logits/chosen": -2.3636865615844727, | |
| "logits/rejected": -2.2147748470306396, | |
| "logps/chosen": -360.6214294433594, | |
| "logps/rejected": -323.7347412109375, | |
| "loss": 0.622, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": 0.18724626302719116, | |
| "rewards/margins": 0.2453027069568634, | |
| "rewards/rejected": -0.05805645138025284, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 1.6332795957984688e-06, | |
| "logits/chosen": -2.2480177879333496, | |
| "logits/rejected": -2.2115044593811035, | |
| "logps/chosen": -352.7060852050781, | |
| "logps/rejected": -360.0293884277344, | |
| "loss": 0.6535, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.05413411930203438, | |
| "rewards/margins": 0.20770862698554993, | |
| "rewards/rejected": -0.2618427276611328, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 1.5793011450453854e-06, | |
| "logits/chosen": -2.230503797531128, | |
| "logits/rejected": -2.2694289684295654, | |
| "logps/chosen": -292.1943664550781, | |
| "logps/rejected": -338.814453125, | |
| "loss": 0.7215, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -0.112449049949646, | |
| "rewards/margins": 0.08113683760166168, | |
| "rewards/rejected": -0.19358590245246887, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 1.5260619551668842e-06, | |
| "logits/chosen": -2.186260461807251, | |
| "logits/rejected": -2.3265433311462402, | |
| "logps/chosen": -315.28778076171875, | |
| "logps/rejected": -363.6637268066406, | |
| "loss": 0.7206, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.10357306897640228, | |
| "rewards/margins": 0.0456685833632946, | |
| "rewards/rejected": -0.14924165606498718, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 1.4735735318966521e-06, | |
| "logits/chosen": -2.338139772415161, | |
| "logits/rejected": -2.1299831867218018, | |
| "logps/chosen": -330.22796630859375, | |
| "logps/rejected": -297.2973327636719, | |
| "loss": 0.644, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": 0.05218541622161865, | |
| "rewards/margins": 0.25528091192245483, | |
| "rewards/rejected": -0.20309551060199738, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 1.4218472187172212e-06, | |
| "logits/chosen": -2.1943013668060303, | |
| "logits/rejected": -2.255190372467041, | |
| "logps/chosen": -294.1163024902344, | |
| "logps/rejected": -333.59228515625, | |
| "loss": 0.6324, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.09838038682937622, | |
| "rewards/margins": 0.21084125339984894, | |
| "rewards/rejected": -0.30922168493270874, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 1.3708941944084636e-06, | |
| "logits/chosen": -2.3456058502197266, | |
| "logits/rejected": -2.361806631088257, | |
| "logps/chosen": -406.53094482421875, | |
| "logps/rejected": -447.1785583496094, | |
| "loss": 0.6449, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": 0.2680016756057739, | |
| "rewards/margins": 0.210541769862175, | |
| "rewards/rejected": 0.057459909468889236, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 1.3207254706317174e-06, | |
| "logits/chosen": -2.276390790939331, | |
| "logits/rejected": -2.296130657196045, | |
| "logps/chosen": -342.73260498046875, | |
| "logps/rejected": -360.7101135253906, | |
| "loss": 0.6452, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": 0.1815967857837677, | |
| "rewards/margins": 0.2110695093870163, | |
| "rewards/rejected": -0.029472723603248596, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 1.2713518895499932e-06, | |
| "logits/chosen": -2.2506117820739746, | |
| "logits/rejected": -2.1966030597686768, | |
| "logps/chosen": -328.85302734375, | |
| "logps/rejected": -346.61041259765625, | |
| "loss": 0.6133, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.16987404227256775, | |
| "rewards/margins": 0.2849760055541992, | |
| "rewards/rejected": -0.45485004782676697, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 1.2227841214848519e-06, | |
| "logits/chosen": -2.3572230339050293, | |
| "logits/rejected": -2.241999387741089, | |
| "logps/chosen": -386.4831237792969, | |
| "logps/rejected": -324.4116516113281, | |
| "loss": 0.623, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": 0.19086837768554688, | |
| "rewards/margins": 0.23254597187042236, | |
| "rewards/rejected": -0.04167759045958519, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 1.175032662610383e-06, | |
| "logits/chosen": -2.384322166442871, | |
| "logits/rejected": -2.372183322906494, | |
| "logps/chosen": -379.8897705078125, | |
| "logps/rejected": -384.9872131347656, | |
| "loss": 0.6472, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.13912078738212585, | |
| "rewards/margins": 0.1807091236114502, | |
| "rewards/rejected": -0.041588325053453445, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 1.1281078326848438e-06, | |
| "logits/chosen": -2.2786386013031006, | |
| "logits/rejected": -2.2903854846954346, | |
| "logps/chosen": -342.7461242675781, | |
| "logps/rejected": -383.90411376953125, | |
| "loss": 0.5802, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": 0.15179908275604248, | |
| "rewards/margins": 0.4043292999267578, | |
| "rewards/rejected": -0.25253021717071533, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 1.0820197728204085e-06, | |
| "logits/chosen": -2.235412120819092, | |
| "logits/rejected": -2.1771421432495117, | |
| "logps/chosen": -336.4576721191406, | |
| "logps/rejected": -346.7325744628906, | |
| "loss": 0.6903, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.0932273119688034, | |
| "rewards/margins": 0.12401854991912842, | |
| "rewards/rejected": -0.03079124726355076, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 1.0367784432915407e-06, | |
| "logits/chosen": -2.2605013847351074, | |
| "logits/rejected": -2.2363693714141846, | |
| "logps/chosen": -337.21728515625, | |
| "logps/rejected": -330.5986633300781, | |
| "loss": 0.6855, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": 0.2193947285413742, | |
| "rewards/margins": 0.11095724254846573, | |
| "rewards/rejected": 0.10843745619058609, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 9.923936213824297e-07, | |
| "logits/chosen": -2.387052297592163, | |
| "logits/rejected": -2.2252724170684814, | |
| "logps/chosen": -368.71881103515625, | |
| "logps/rejected": -402.789794921875, | |
| "loss": 0.6306, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.32878467440605164, | |
| "rewards/margins": 0.26738250255584717, | |
| "rewards/rejected": 0.06140219047665596, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 9.488748992739877e-07, | |
| "logits/chosen": -2.2936031818389893, | |
| "logits/rejected": -2.3079075813293457, | |
| "logps/chosen": -340.22430419921875, | |
| "logps/rejected": -390.5373840332031, | |
| "loss": 0.653, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.11492462456226349, | |
| "rewards/margins": 0.22790834307670593, | |
| "rewards/rejected": -0.11298371851444244, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 9.062316819708427e-07, | |
| "logits/chosen": -2.26062273979187, | |
| "logits/rejected": -2.2534077167510986, | |
| "logps/chosen": -323.99554443359375, | |
| "logps/rejected": -343.2364501953125, | |
| "loss": 0.6792, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.07681259512901306, | |
| "rewards/margins": 0.13750119507312775, | |
| "rewards/rejected": -0.2143137902021408, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 8.644731852687904e-07, | |
| "logits/chosen": -2.1960670948028564, | |
| "logits/rejected": -2.252990245819092, | |
| "logps/chosen": -338.3189392089844, | |
| "logps/rejected": -409.77947998046875, | |
| "loss": 0.6429, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.027236556634306908, | |
| "rewards/margins": 0.200010746717453, | |
| "rewards/rejected": -0.22724728286266327, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 8.236084337631256e-07, | |
| "logits/chosen": -2.269155502319336, | |
| "logits/rejected": -2.2297897338867188, | |
| "logps/chosen": -350.8360900878906, | |
| "logps/rejected": -349.05743408203125, | |
| "loss": 0.618, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": 0.03905141353607178, | |
| "rewards/margins": 0.2928921580314636, | |
| "rewards/rejected": -0.25384077429771423, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 7.836462588983029e-07, | |
| "logits/chosen": -2.2999212741851807, | |
| "logits/rejected": -2.2945046424865723, | |
| "logps/chosen": -331.8743896484375, | |
| "logps/rejected": -358.82427978515625, | |
| "loss": 0.6854, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": 0.007713166065514088, | |
| "rewards/margins": 0.15347187221050262, | |
| "rewards/rejected": -0.1457587033510208, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 7.445952970593401e-07, | |
| "logits/chosen": -2.292762517929077, | |
| "logits/rejected": -2.2326605319976807, | |
| "logps/chosen": -368.51123046875, | |
| "logps/rejected": -343.02362060546875, | |
| "loss": 0.7055, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -0.138399139046669, | |
| "rewards/margins": 0.1448075920343399, | |
| "rewards/rejected": -0.2832067608833313, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 7.064639877053753e-07, | |
| "logits/chosen": -2.2553770542144775, | |
| "logits/rejected": -2.1739296913146973, | |
| "logps/chosen": -347.02081298828125, | |
| "logps/rejected": -329.3694152832031, | |
| "loss": 0.7244, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -0.0695744976401329, | |
| "rewards/margins": 0.0453006774187088, | |
| "rewards/rejected": -0.1148751750588417, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 6.692605715457734e-07, | |
| "logits/chosen": -2.296112537384033, | |
| "logits/rejected": -2.2227189540863037, | |
| "logps/chosen": -335.0692443847656, | |
| "logps/rejected": -342.08563232421875, | |
| "loss": 0.6636, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": 0.09152424335479736, | |
| "rewards/margins": 0.14057905972003937, | |
| "rewards/rejected": -0.049054812639951706, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 6.329930887592067e-07, | |
| "logits/chosen": -2.277210235595703, | |
| "logits/rejected": -2.263932704925537, | |
| "logps/chosen": -373.13623046875, | |
| "logps/rejected": -398.31329345703125, | |
| "loss": 0.6006, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.10486678779125214, | |
| "rewards/margins": 0.2877123951911926, | |
| "rewards/rejected": -0.39257916808128357, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 5.976693772560487e-07, | |
| "logits/chosen": -2.3237483501434326, | |
| "logits/rejected": -2.283463716506958, | |
| "logps/chosen": -397.46905517578125, | |
| "logps/rejected": -399.92266845703125, | |
| "loss": 0.6716, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.03626967594027519, | |
| "rewards/margins": 0.20212960243225098, | |
| "rewards/rejected": -0.23839926719665527, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 5.632970709844976e-07, | |
| "logits/chosen": -2.2484121322631836, | |
| "logits/rejected": -2.2332425117492676, | |
| "logps/chosen": -352.50372314453125, | |
| "logps/rejected": -382.41290283203125, | |
| "loss": 0.7484, | |
| "rewards/accuracies": 0.44999998807907104, | |
| "rewards/chosen": -0.20201630890369415, | |
| "rewards/margins": 0.06586066633462906, | |
| "rewards/rejected": -0.2678769528865814, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 5.298835982807704e-07, | |
| "logits/chosen": -2.324031352996826, | |
| "logits/rejected": -2.2343201637268066, | |
| "logps/chosen": -387.73211669921875, | |
| "logps/rejected": -355.9978942871094, | |
| "loss": 0.7038, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": 0.11137855052947998, | |
| "rewards/margins": 0.1095174103975296, | |
| "rewards/rejected": 0.0018611550331115723, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 4.974361802637395e-07, | |
| "logits/chosen": -2.3116257190704346, | |
| "logits/rejected": -2.272489070892334, | |
| "logps/chosen": -362.0296325683594, | |
| "logps/rejected": -368.2407531738281, | |
| "loss": 0.6696, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.055863846093416214, | |
| "rewards/margins": 0.1546260416507721, | |
| "rewards/rejected": -0.2104898989200592, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 4.6596182927434395e-07, | |
| "logits/chosen": -2.2587242126464844, | |
| "logits/rejected": -2.2343411445617676, | |
| "logps/chosen": -326.1157531738281, | |
| "logps/rejected": -347.9691162109375, | |
| "loss": 0.652, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.12903109192848206, | |
| "rewards/margins": 0.13124972581863403, | |
| "rewards/rejected": -0.2602807879447937, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 4.354673473601251e-07, | |
| "logits/chosen": -2.249849557876587, | |
| "logits/rejected": -2.1856112480163574, | |
| "logps/chosen": -350.0913391113281, | |
| "logps/rejected": -357.5381774902344, | |
| "loss": 0.6364, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": 0.04396069794893265, | |
| "rewards/margins": 0.2554934620857239, | |
| "rewards/rejected": -0.21153274178504944, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 4.059593248052107e-07, | |
| "logits/chosen": -2.270174741744995, | |
| "logits/rejected": -2.214571475982666, | |
| "logps/chosen": -363.952880859375, | |
| "logps/rejected": -363.4573974609375, | |
| "loss": 0.5834, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": 0.0837903842329979, | |
| "rewards/margins": 0.3650640845298767, | |
| "rewards/rejected": -0.2812737226486206, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 3.774441387060634e-07, | |
| "logits/chosen": -2.3621678352355957, | |
| "logits/rejected": -2.304919719696045, | |
| "logps/chosen": -411.8487854003906, | |
| "logps/rejected": -410.06671142578125, | |
| "loss": 0.6429, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": 0.14407065510749817, | |
| "rewards/margins": 0.2489246129989624, | |
| "rewards/rejected": -0.10485398769378662, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 3.4992795159329516e-07, | |
| "logits/chosen": -2.3314731121063232, | |
| "logits/rejected": -2.2980003356933594, | |
| "logps/chosen": -380.12017822265625, | |
| "logps/rejected": -410.78887939453125, | |
| "loss": 0.6918, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": 0.10951967537403107, | |
| "rewards/margins": 0.07033322751522064, | |
| "rewards/rejected": 0.03918645530939102, | |
| "step": 2000 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 2250, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "total_flos": 0.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |