| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9998751404669747, | |
| "eval_steps": 1000, | |
| "global_step": 4004, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.000249719066050693, | |
| "grad_norm": 0.34765625, | |
| "learning_rate": 1.2468827930174565e-08, | |
| "logits/chosen": -2.450503349304199, | |
| "logits/rejected": -2.672837734222412, | |
| "logps/chosen": -21.34674835205078, | |
| "logps/rejected": -42.586097717285156, | |
| "loss": 0.5, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.00249719066050693, | |
| "grad_norm": 0.2890625, | |
| "learning_rate": 1.2468827930174566e-07, | |
| "logits/chosen": -2.275761604309082, | |
| "logits/rejected": -2.479705333709717, | |
| "logps/chosen": -22.14301300048828, | |
| "logps/rejected": -63.31869888305664, | |
| "loss": 0.5, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.00027842415147460997, | |
| "rewards/margins": -0.00017310140538029373, | |
| "rewards/rejected": -0.0001053227242664434, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.00499438132101386, | |
| "grad_norm": 0.318359375, | |
| "learning_rate": 2.493765586034913e-07, | |
| "logits/chosen": -2.2202348709106445, | |
| "logits/rejected": -2.429389238357544, | |
| "logps/chosen": -21.814502716064453, | |
| "logps/rejected": -61.35728073120117, | |
| "loss": 0.5, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": 8.430716843577102e-05, | |
| "rewards/margins": 0.00037039705784991384, | |
| "rewards/rejected": -0.00028608986758627, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.007491571981520789, | |
| "grad_norm": 0.26171875, | |
| "learning_rate": 3.7406483790523695e-07, | |
| "logits/chosen": -2.14150333404541, | |
| "logits/rejected": -2.3708083629608154, | |
| "logps/chosen": -22.1105899810791, | |
| "logps/rejected": -52.95900344848633, | |
| "loss": 0.5001, | |
| "rewards/accuracies": 0.4000000059604645, | |
| "rewards/chosen": -9.514805424259976e-05, | |
| "rewards/margins": -5.593679452431388e-05, | |
| "rewards/rejected": -3.9211259718285874e-05, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.00998876264202772, | |
| "grad_norm": 0.27734375, | |
| "learning_rate": 4.987531172069826e-07, | |
| "logits/chosen": -2.1455249786376953, | |
| "logits/rejected": -2.362419605255127, | |
| "logps/chosen": -22.628782272338867, | |
| "logps/rejected": -63.2244873046875, | |
| "loss": 0.4998, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": 0.0004498485941439867, | |
| "rewards/margins": 0.0016190257156267762, | |
| "rewards/rejected": -0.0011691770050674677, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.012485953302534648, | |
| "grad_norm": 0.212890625, | |
| "learning_rate": 6.234413965087283e-07, | |
| "logits/chosen": -2.2349250316619873, | |
| "logits/rejected": -2.495819568634033, | |
| "logps/chosen": -22.863269805908203, | |
| "logps/rejected": -59.4576416015625, | |
| "loss": 0.4998, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": 0.0006220145733095706, | |
| "rewards/margins": 0.0015806708252057433, | |
| "rewards/rejected": -0.0009586562518961728, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.014983143963041578, | |
| "grad_norm": 0.328125, | |
| "learning_rate": 7.481296758104739e-07, | |
| "logits/chosen": -2.169523239135742, | |
| "logits/rejected": -2.3751749992370605, | |
| "logps/chosen": -22.777694702148438, | |
| "logps/rejected": -68.83964538574219, | |
| "loss": 0.4992, | |
| "rewards/accuracies": 0.925000011920929, | |
| "rewards/chosen": 0.0019476842135190964, | |
| "rewards/margins": 0.004409968852996826, | |
| "rewards/rejected": -0.002462285105139017, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.017480334623548508, | |
| "grad_norm": 0.28515625, | |
| "learning_rate": 8.728179551122195e-07, | |
| "logits/chosen": -2.286738634109497, | |
| "logits/rejected": -2.4896113872528076, | |
| "logps/chosen": -21.078710556030273, | |
| "logps/rejected": -50.04187774658203, | |
| "loss": 0.4985, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.004810997284948826, | |
| "rewards/margins": 0.007176141254603863, | |
| "rewards/rejected": -0.0023651437368243933, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.01997752528405544, | |
| "grad_norm": 0.279296875, | |
| "learning_rate": 9.975062344139653e-07, | |
| "logits/chosen": -2.144176959991455, | |
| "logits/rejected": -2.352398633956909, | |
| "logps/chosen": -21.391971588134766, | |
| "logps/rejected": -56.86810302734375, | |
| "loss": 0.4969, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.010686805471777916, | |
| "rewards/margins": 0.01409011147916317, | |
| "rewards/rejected": -0.003403306705877185, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.02247471594456237, | |
| "grad_norm": 0.34765625, | |
| "learning_rate": 1.1221945137157108e-06, | |
| "logits/chosen": -2.2526628971099854, | |
| "logits/rejected": -2.430774211883545, | |
| "logps/chosen": -19.845823287963867, | |
| "logps/rejected": -51.37982177734375, | |
| "loss": 0.4949, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.019541995599865913, | |
| "rewards/margins": 0.021860197186470032, | |
| "rewards/rejected": -0.0023182008881121874, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.024971906605069295, | |
| "grad_norm": 0.353515625, | |
| "learning_rate": 1.2468827930174565e-06, | |
| "logits/chosen": -2.1313042640686035, | |
| "logits/rejected": -2.3720927238464355, | |
| "logps/chosen": -20.160160064697266, | |
| "logps/rejected": -66.42484283447266, | |
| "loss": 0.4924, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.031359124928712845, | |
| "rewards/margins": 0.03116660751402378, | |
| "rewards/rejected": 0.00019251916091889143, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.027469097265576226, | |
| "grad_norm": 0.33203125, | |
| "learning_rate": 1.3715710723192023e-06, | |
| "logits/chosen": -2.1676554679870605, | |
| "logits/rejected": -2.389533758163452, | |
| "logps/chosen": -17.833478927612305, | |
| "logps/rejected": -60.63257598876953, | |
| "loss": 0.4879, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.04782567545771599, | |
| "rewards/margins": 0.05032258480787277, | |
| "rewards/rejected": -0.002496910747140646, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.029966287926083156, | |
| "grad_norm": 0.68359375, | |
| "learning_rate": 1.4962593516209478e-06, | |
| "logits/chosen": -2.1279516220092773, | |
| "logits/rejected": -2.343705177307129, | |
| "logps/chosen": -15.757919311523438, | |
| "logps/rejected": -51.14020919799805, | |
| "loss": 0.4836, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.06653784960508347, | |
| "rewards/margins": 0.06694493442773819, | |
| "rewards/rejected": -0.0004070843569934368, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.032463478586590086, | |
| "grad_norm": 0.337890625, | |
| "learning_rate": 1.6209476309226935e-06, | |
| "logits/chosen": -2.3082690238952637, | |
| "logits/rejected": -2.5344271659851074, | |
| "logps/chosen": -12.95374870300293, | |
| "logps/rejected": -53.89298629760742, | |
| "loss": 0.4766, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.09254685044288635, | |
| "rewards/margins": 0.09660454094409943, | |
| "rewards/rejected": -0.0040576886385679245, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.034960669247097016, | |
| "grad_norm": 0.291015625, | |
| "learning_rate": 1.745635910224439e-06, | |
| "logits/chosen": -2.1814446449279785, | |
| "logits/rejected": -2.40262508392334, | |
| "logps/chosen": -11.56260871887207, | |
| "logps/rejected": -71.49890899658203, | |
| "loss": 0.4714, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.11319079250097275, | |
| "rewards/margins": 0.11961270868778229, | |
| "rewards/rejected": -0.006421914789825678, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.037457859907603946, | |
| "grad_norm": 0.28125, | |
| "learning_rate": 1.8703241895261848e-06, | |
| "logits/chosen": -2.2549407482147217, | |
| "logits/rejected": -2.4583637714385986, | |
| "logps/chosen": -8.707418441772461, | |
| "logps/rejected": -56.646148681640625, | |
| "loss": 0.4655, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.13278979063034058, | |
| "rewards/margins": 0.14516989886760712, | |
| "rewards/rejected": -0.012380105443298817, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.03995505056811088, | |
| "grad_norm": 0.26171875, | |
| "learning_rate": 1.9950124688279305e-06, | |
| "logits/chosen": -2.261176586151123, | |
| "logits/rejected": -2.454853057861328, | |
| "logps/chosen": -7.25634765625, | |
| "logps/rejected": -62.16912841796875, | |
| "loss": 0.4591, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.14732162654399872, | |
| "rewards/margins": 0.1813906729221344, | |
| "rewards/rejected": -0.034069035202264786, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.04245224122861781, | |
| "grad_norm": 0.228515625, | |
| "learning_rate": 2.119700748129676e-06, | |
| "logits/chosen": -2.413883686065674, | |
| "logits/rejected": -2.6421730518341064, | |
| "logps/chosen": -5.5545244216918945, | |
| "logps/rejected": -54.24146270751953, | |
| "loss": 0.4528, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.16115576028823853, | |
| "rewards/margins": 0.21780212223529816, | |
| "rewards/rejected": -0.05664635822176933, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.04494943188912474, | |
| "grad_norm": 0.310546875, | |
| "learning_rate": 2.2443890274314216e-06, | |
| "logits/chosen": -2.123264789581299, | |
| "logits/rejected": -2.3629353046417236, | |
| "logps/chosen": -5.675574779510498, | |
| "logps/rejected": -81.35579681396484, | |
| "loss": 0.448, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.16601073741912842, | |
| "rewards/margins": 0.2519921362400055, | |
| "rewards/rejected": -0.08598136156797409, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.04744662254963167, | |
| "grad_norm": 0.2734375, | |
| "learning_rate": 2.3690773067331675e-06, | |
| "logits/chosen": -2.162355899810791, | |
| "logits/rejected": -2.4037208557128906, | |
| "logps/chosen": -4.741239547729492, | |
| "logps/rejected": -69.67314147949219, | |
| "loss": 0.4382, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.17391221225261688, | |
| "rewards/margins": 0.32386231422424316, | |
| "rewards/rejected": -0.14995010197162628, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.04994381321013859, | |
| "grad_norm": 0.326171875, | |
| "learning_rate": 2.493765586034913e-06, | |
| "logits/chosen": -2.232464551925659, | |
| "logits/rejected": -2.461862087249756, | |
| "logps/chosen": -4.306845664978027, | |
| "logps/rejected": -70.49752807617188, | |
| "loss": 0.429, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.18084710836410522, | |
| "rewards/margins": 0.39341551065444946, | |
| "rewards/rejected": -0.21256835758686066, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.05244100387064552, | |
| "grad_norm": 0.28515625, | |
| "learning_rate": 2.6184538653366586e-06, | |
| "logits/chosen": -2.2186341285705566, | |
| "logits/rejected": -2.4293782711029053, | |
| "logps/chosen": -2.813771963119507, | |
| "logps/rejected": -77.77786254882812, | |
| "loss": 0.4186, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.18635782599449158, | |
| "rewards/margins": 0.4745180010795593, | |
| "rewards/rejected": -0.28816017508506775, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.05493819453115245, | |
| "grad_norm": 0.279296875, | |
| "learning_rate": 2.7431421446384045e-06, | |
| "logits/chosen": -2.2114510536193848, | |
| "logits/rejected": -2.423021078109741, | |
| "logps/chosen": -2.7164266109466553, | |
| "logps/rejected": -93.01399230957031, | |
| "loss": 0.4086, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.19216802716255188, | |
| "rewards/margins": 0.5540723204612732, | |
| "rewards/rejected": -0.3619043231010437, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.05743538519165938, | |
| "grad_norm": 0.30859375, | |
| "learning_rate": 2.86783042394015e-06, | |
| "logits/chosen": -2.2182869911193848, | |
| "logits/rejected": -2.4157519340515137, | |
| "logps/chosen": -2.1753125190734863, | |
| "logps/rejected": -96.47676086425781, | |
| "loss": 0.3976, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.19363494217395782, | |
| "rewards/margins": 0.6491508483886719, | |
| "rewards/rejected": -0.45551595091819763, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.05993257585216631, | |
| "grad_norm": 0.41796875, | |
| "learning_rate": 2.9925187032418956e-06, | |
| "logits/chosen": -2.303800344467163, | |
| "logits/rejected": -2.5223240852355957, | |
| "logps/chosen": -2.2545647621154785, | |
| "logps/rejected": -115.70625305175781, | |
| "loss": 0.3757, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.19811145961284637, | |
| "rewards/margins": 0.8461275100708008, | |
| "rewards/rejected": -0.6480159759521484, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.06242976651267324, | |
| "grad_norm": 0.408203125, | |
| "learning_rate": 3.117206982543641e-06, | |
| "logits/chosen": -2.2053685188293457, | |
| "logits/rejected": -2.415367841720581, | |
| "logps/chosen": -2.1990444660186768, | |
| "logps/rejected": -140.34054565429688, | |
| "loss": 0.3542, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.19701093435287476, | |
| "rewards/margins": 1.0766099691390991, | |
| "rewards/rejected": -0.8795989751815796, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.06492695717318017, | |
| "grad_norm": 0.45703125, | |
| "learning_rate": 3.241895261845387e-06, | |
| "logits/chosen": -2.224290132522583, | |
| "logits/rejected": -2.4391043186187744, | |
| "logps/chosen": -1.894426941871643, | |
| "logps/rejected": -191.0155029296875, | |
| "loss": 0.3217, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.19716738164424896, | |
| "rewards/margins": 1.5208184719085693, | |
| "rewards/rejected": -1.3236511945724487, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.0674241478336871, | |
| "grad_norm": 0.353515625, | |
| "learning_rate": 3.3665835411471326e-06, | |
| "logits/chosen": -2.1803958415985107, | |
| "logits/rejected": -2.3852007389068604, | |
| "logps/chosen": -2.2776474952697754, | |
| "logps/rejected": -256.2982177734375, | |
| "loss": 0.2905, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.19975684583187103, | |
| "rewards/margins": 2.1906659603118896, | |
| "rewards/rejected": -1.990909218788147, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.06992133849419403, | |
| "grad_norm": 0.1494140625, | |
| "learning_rate": 3.491271820448878e-06, | |
| "logits/chosen": -2.089259624481201, | |
| "logits/rejected": -2.2738101482391357, | |
| "logps/chosen": -3.7932281494140625, | |
| "logps/rejected": -315.3883361816406, | |
| "loss": 0.2858, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.19718244671821594, | |
| "rewards/margins": 2.653756856918335, | |
| "rewards/rejected": -2.4565746784210205, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.07241852915470096, | |
| "grad_norm": 0.138671875, | |
| "learning_rate": 3.615960099750624e-06, | |
| "logits/chosen": -2.136627674102783, | |
| "logits/rejected": -2.336648941040039, | |
| "logps/chosen": -2.27809476852417, | |
| "logps/rejected": -309.0271911621094, | |
| "loss": 0.281, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.19811423122882843, | |
| "rewards/margins": 2.7192320823669434, | |
| "rewards/rejected": -2.521117687225342, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.07491571981520789, | |
| "grad_norm": 0.1826171875, | |
| "learning_rate": 3.7406483790523696e-06, | |
| "logits/chosen": -2.1298162937164307, | |
| "logits/rejected": -2.3403031826019287, | |
| "logps/chosen": -2.7181735038757324, | |
| "logps/rejected": -379.2640075683594, | |
| "loss": 0.2649, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.1967845857143402, | |
| "rewards/margins": 3.449932813644409, | |
| "rewards/rejected": -3.253148317337036, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.07741291047571482, | |
| "grad_norm": 0.189453125, | |
| "learning_rate": 3.8653366583541155e-06, | |
| "logits/chosen": -2.0690829753875732, | |
| "logits/rejected": -2.240788459777832, | |
| "logps/chosen": -2.222135066986084, | |
| "logps/rejected": -404.05157470703125, | |
| "loss": 0.2741, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.19702208042144775, | |
| "rewards/margins": 3.7276394367218018, | |
| "rewards/rejected": -3.5306174755096436, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.07991010113622175, | |
| "grad_norm": 0.16796875, | |
| "learning_rate": 3.990024937655861e-06, | |
| "logits/chosen": -2.0671050548553467, | |
| "logits/rejected": -2.24275279045105, | |
| "logps/chosen": -2.2376856803894043, | |
| "logps/rejected": -507.495849609375, | |
| "loss": 0.2612, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.1967649608850479, | |
| "rewards/margins": 4.618912696838379, | |
| "rewards/rejected": -4.422147750854492, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.08240729179672868, | |
| "grad_norm": 0.1611328125, | |
| "learning_rate": 4.114713216957607e-06, | |
| "logits/chosen": -2.137000560760498, | |
| "logits/rejected": -2.287095546722412, | |
| "logps/chosen": -2.6727747917175293, | |
| "logps/rejected": -397.1515808105469, | |
| "loss": 0.2652, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.1976507008075714, | |
| "rewards/margins": 3.678623914718628, | |
| "rewards/rejected": -3.480973482131958, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.08490448245723561, | |
| "grad_norm": 0.21484375, | |
| "learning_rate": 4.239401496259352e-06, | |
| "logits/chosen": -2.0656325817108154, | |
| "logits/rejected": -2.2314834594726562, | |
| "logps/chosen": -2.123012065887451, | |
| "logps/rejected": -494.6885681152344, | |
| "loss": 0.2573, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.19615662097930908, | |
| "rewards/margins": 4.528371810913086, | |
| "rewards/rejected": -4.332215309143066, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.08740167311774254, | |
| "grad_norm": 0.1259765625, | |
| "learning_rate": 4.364089775561098e-06, | |
| "logits/chosen": -2.1637234687805176, | |
| "logits/rejected": -2.3083388805389404, | |
| "logps/chosen": -2.9447762966156006, | |
| "logps/rejected": -453.163330078125, | |
| "loss": 0.264, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.18918542563915253, | |
| "rewards/margins": 4.223211288452148, | |
| "rewards/rejected": -4.034026145935059, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.08989886377824947, | |
| "grad_norm": 0.376953125, | |
| "learning_rate": 4.488778054862843e-06, | |
| "logits/chosen": -2.1501951217651367, | |
| "logits/rejected": -2.341325521469116, | |
| "logps/chosen": -4.00003719329834, | |
| "logps/rejected": -510.6114196777344, | |
| "loss": 0.2492, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.18887588381767273, | |
| "rewards/margins": 4.7241339683532715, | |
| "rewards/rejected": -4.5352582931518555, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.0923960544387564, | |
| "grad_norm": 0.228515625, | |
| "learning_rate": 4.6134663341645895e-06, | |
| "logits/chosen": -2.152017593383789, | |
| "logits/rejected": -2.326498508453369, | |
| "logps/chosen": -3.2789077758789062, | |
| "logps/rejected": -488.865966796875, | |
| "loss": 0.2472, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.19166019558906555, | |
| "rewards/margins": 4.529562473297119, | |
| "rewards/rejected": -4.337902069091797, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.09489324509926333, | |
| "grad_norm": 0.546875, | |
| "learning_rate": 4.738154613466335e-06, | |
| "logits/chosen": -2.0966598987579346, | |
| "logits/rejected": -2.3076987266540527, | |
| "logps/chosen": -3.7783362865448, | |
| "logps/rejected": -743.3594970703125, | |
| "loss": 0.2398, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.18687334656715393, | |
| "rewards/margins": 6.980570316314697, | |
| "rewards/rejected": -6.793696403503418, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.09739043575977026, | |
| "grad_norm": 0.1982421875, | |
| "learning_rate": 4.862842892768081e-06, | |
| "logits/chosen": -2.1418652534484863, | |
| "logits/rejected": -2.30336332321167, | |
| "logps/chosen": -2.9560298919677734, | |
| "logps/rejected": -607.9320068359375, | |
| "loss": 0.2388, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.18996365368366241, | |
| "rewards/margins": 5.754693031311035, | |
| "rewards/rejected": -5.564728736877441, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.09988762642027718, | |
| "grad_norm": 0.1630859375, | |
| "learning_rate": 4.987531172069826e-06, | |
| "logits/chosen": -2.0703442096710205, | |
| "logits/rejected": -2.2270889282226562, | |
| "logps/chosen": -2.578680992126465, | |
| "logps/rejected": -683.11083984375, | |
| "loss": 0.2415, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.19460181891918182, | |
| "rewards/margins": 6.500932216644287, | |
| "rewards/rejected": -6.30633020401001, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.10238481708078412, | |
| "grad_norm": 0.2021484375, | |
| "learning_rate": 4.999923022460671e-06, | |
| "logits/chosen": -2.0380523204803467, | |
| "logits/rejected": -2.2315127849578857, | |
| "logps/chosen": -4.086075782775879, | |
| "logps/rejected": -833.37255859375, | |
| "loss": 0.2328, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.19286975264549255, | |
| "rewards/margins": 7.908270835876465, | |
| "rewards/rejected": -7.715400695800781, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.10488200774129104, | |
| "grad_norm": 0.248046875, | |
| "learning_rate": 4.999656933348981e-06, | |
| "logits/chosen": -2.244335174560547, | |
| "logits/rejected": -2.4024062156677246, | |
| "logps/chosen": -2.923116445541382, | |
| "logps/rejected": -593.464599609375, | |
| "loss": 0.241, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.19478723406791687, | |
| "rewards/margins": 5.600838661193848, | |
| "rewards/rejected": -5.4060516357421875, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.10737919840179798, | |
| "grad_norm": 0.359375, | |
| "learning_rate": 4.99920080255011e-06, | |
| "logits/chosen": -2.077357769012451, | |
| "logits/rejected": -2.282799243927002, | |
| "logps/chosen": -2.9383771419525146, | |
| "logps/rejected": -852.4064331054688, | |
| "loss": 0.231, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.19819210469722748, | |
| "rewards/margins": 8.028984069824219, | |
| "rewards/rejected": -7.830792427062988, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.1098763890623049, | |
| "grad_norm": 0.171875, | |
| "learning_rate": 4.998554664742362e-06, | |
| "logits/chosen": -2.148183822631836, | |
| "logits/rejected": -2.3020401000976562, | |
| "logps/chosen": -1.9824367761611938, | |
| "logps/rejected": -745.6473999023438, | |
| "loss": 0.2322, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.1946963667869568, | |
| "rewards/margins": 7.165565490722656, | |
| "rewards/rejected": -6.9708685874938965, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.11237357972281184, | |
| "grad_norm": 0.13671875, | |
| "learning_rate": 4.997718569049726e-06, | |
| "logits/chosen": -2.094149351119995, | |
| "logits/rejected": -2.2727301120758057, | |
| "logps/chosen": -3.559483051300049, | |
| "logps/rejected": -817.2952270507812, | |
| "loss": 0.2319, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.19730597734451294, | |
| "rewards/margins": 7.785311222076416, | |
| "rewards/rejected": -7.588005065917969, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.11487077038331876, | |
| "grad_norm": 0.1171875, | |
| "learning_rate": 4.9966925790381404e-06, | |
| "logits/chosen": -2.1491434574127197, | |
| "logits/rejected": -2.301217555999756, | |
| "logps/chosen": -1.5461114645004272, | |
| "logps/rejected": -810.7796020507812, | |
| "loss": 0.2326, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.19482001662254333, | |
| "rewards/margins": 7.743639945983887, | |
| "rewards/rejected": -7.548819541931152, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.1173679610438257, | |
| "grad_norm": 0.1435546875, | |
| "learning_rate": 4.995476772710657e-06, | |
| "logits/chosen": -2.1041364669799805, | |
| "logits/rejected": -2.3101038932800293, | |
| "logps/chosen": -3.1227645874023438, | |
| "logps/rejected": -963.2913208007812, | |
| "loss": 0.2321, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.1952921450138092, | |
| "rewards/margins": 9.208600044250488, | |
| "rewards/rejected": -9.013307571411133, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.11986515170433262, | |
| "grad_norm": 0.08544921875, | |
| "learning_rate": 4.994071242501516e-06, | |
| "logits/chosen": -2.1944689750671387, | |
| "logits/rejected": -2.371983051300049, | |
| "logps/chosen": -2.822134494781494, | |
| "logps/rejected": -869.8029174804688, | |
| "loss": 0.2298, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.19522327184677124, | |
| "rewards/margins": 8.3977632522583, | |
| "rewards/rejected": -8.20253849029541, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.12236234236483956, | |
| "grad_norm": 0.0634765625, | |
| "learning_rate": 4.992476095269112e-06, | |
| "logits/chosen": -2.2050843238830566, | |
| "logits/rejected": -2.3897545337677, | |
| "logps/chosen": -1.4868861436843872, | |
| "logps/rejected": -922.6173095703125, | |
| "loss": 0.2305, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.2005070000886917, | |
| "rewards/margins": 8.763871192932129, | |
| "rewards/rejected": -8.563364028930664, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.12485953302534648, | |
| "grad_norm": 0.369140625, | |
| "learning_rate": 4.990691452287877e-06, | |
| "logits/chosen": -2.042813777923584, | |
| "logits/rejected": -2.213289976119995, | |
| "logps/chosen": -2.393306016921997, | |
| "logps/rejected": -886.4241943359375, | |
| "loss": 0.2303, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.20110133290290833, | |
| "rewards/margins": 8.474591255187988, | |
| "rewards/rejected": -8.273489952087402, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.1273567236858534, | |
| "grad_norm": 0.3984375, | |
| "learning_rate": 4.988717449239056e-06, | |
| "logits/chosen": -2.093723773956299, | |
| "logits/rejected": -2.2634453773498535, | |
| "logps/chosen": -1.9311176538467407, | |
| "logps/rejected": -851.02734375, | |
| "loss": 0.2347, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.19648316502571106, | |
| "rewards/margins": 8.190296173095703, | |
| "rewards/rejected": -7.993813991546631, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.12985391434636034, | |
| "grad_norm": 0.1630859375, | |
| "learning_rate": 4.98655423620039e-06, | |
| "logits/chosen": -2.1161797046661377, | |
| "logits/rejected": -2.3049392700195312, | |
| "logps/chosen": -1.9681230783462524, | |
| "logps/rejected": -963.2742919921875, | |
| "loss": 0.2275, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.20181334018707275, | |
| "rewards/margins": 9.243757247924805, | |
| "rewards/rejected": -9.041942596435547, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.13235110500686728, | |
| "grad_norm": 0.07373046875, | |
| "learning_rate": 4.984201977634711e-06, | |
| "logits/chosen": -2.223388195037842, | |
| "logits/rejected": -2.4297728538513184, | |
| "logps/chosen": -2.4097044467926025, | |
| "logps/rejected": -1106.8994140625, | |
| "loss": 0.2278, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.2010197937488556, | |
| "rewards/margins": 10.71354866027832, | |
| "rewards/rejected": -10.512530326843262, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.1348482956673742, | |
| "grad_norm": 0.11279296875, | |
| "learning_rate": 4.9816608523774345e-06, | |
| "logits/chosen": -2.119506359100342, | |
| "logits/rejected": -2.305849552154541, | |
| "logps/chosen": -2.257546901702881, | |
| "logps/rejected": -930.5267333984375, | |
| "loss": 0.2306, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.19777485728263855, | |
| "rewards/margins": 8.905478477478027, | |
| "rewards/rejected": -8.707704544067383, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.13734548632788113, | |
| "grad_norm": 0.07470703125, | |
| "learning_rate": 4.978931053622964e-06, | |
| "logits/chosen": -2.1544103622436523, | |
| "logits/rejected": -2.354814052581787, | |
| "logps/chosen": -1.3565616607666016, | |
| "logps/rejected": -950.23681640625, | |
| "loss": 0.2307, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.2015368640422821, | |
| "rewards/margins": 9.141637802124023, | |
| "rewards/rejected": -8.940099716186523, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.13984267698838806, | |
| "grad_norm": 0.035400390625, | |
| "learning_rate": 4.9760127889100044e-06, | |
| "logits/chosen": -2.1699581146240234, | |
| "logits/rejected": -2.3422303199768066, | |
| "logps/chosen": -1.4560916423797607, | |
| "logps/rejected": -1047.3670654296875, | |
| "loss": 0.23, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.19995173811912537, | |
| "rewards/margins": 10.138322830200195, | |
| "rewards/rejected": -9.938371658325195, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.142339867648895, | |
| "grad_norm": 0.11328125, | |
| "learning_rate": 4.972906280105781e-06, | |
| "logits/chosen": -2.0392138957977295, | |
| "logits/rejected": -2.2401204109191895, | |
| "logps/chosen": -2.1844277381896973, | |
| "logps/rejected": -998.3021240234375, | |
| "loss": 0.2281, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.20644374191761017, | |
| "rewards/margins": 9.561029434204102, | |
| "rewards/rejected": -9.354585647583008, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.1448370583094019, | |
| "grad_norm": 0.0625, | |
| "learning_rate": 4.969611763389175e-06, | |
| "logits/chosen": -2.2010245323181152, | |
| "logits/rejected": -2.3933498859405518, | |
| "logps/chosen": -2.1393237113952637, | |
| "logps/rejected": -925.5234375, | |
| "loss": 0.2289, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.1972513645887375, | |
| "rewards/margins": 8.949918746948242, | |
| "rewards/rejected": -8.752666473388672, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.14733424896990885, | |
| "grad_norm": 0.0751953125, | |
| "learning_rate": 4.966129489232762e-06, | |
| "logits/chosen": -2.1333353519439697, | |
| "logits/rejected": -2.3556675910949707, | |
| "logps/chosen": -2.2460904121398926, | |
| "logps/rejected": -1139.327392578125, | |
| "loss": 0.226, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.20897097885608673, | |
| "rewards/margins": 10.926295280456543, | |
| "rewards/rejected": -10.717325210571289, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.14983143963041579, | |
| "grad_norm": 0.12890625, | |
| "learning_rate": 4.962459722383775e-06, | |
| "logits/chosen": -2.095088243484497, | |
| "logits/rejected": -2.2931671142578125, | |
| "logps/chosen": -2.7135472297668457, | |
| "logps/rejected": -1181.6075439453125, | |
| "loss": 0.2292, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.20306305587291718, | |
| "rewards/margins": 11.351381301879883, | |
| "rewards/rejected": -11.148316383361816, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.15232863029092272, | |
| "grad_norm": 0.0303955078125, | |
| "learning_rate": 4.958602741843975e-06, | |
| "logits/chosen": -2.0957350730895996, | |
| "logits/rejected": -2.3226089477539062, | |
| "logps/chosen": -2.8655078411102295, | |
| "logps/rejected": -1118.1968994140625, | |
| "loss": 0.2277, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.19633761048316956, | |
| "rewards/margins": 10.744343757629395, | |
| "rewards/rejected": -10.548004150390625, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.15482582095142963, | |
| "grad_norm": 0.107421875, | |
| "learning_rate": 4.954558840848437e-06, | |
| "logits/chosen": -2.211951494216919, | |
| "logits/rejected": -2.3932459354400635, | |
| "logps/chosen": -1.5332846641540527, | |
| "logps/rejected": -932.4984130859375, | |
| "loss": 0.2285, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.20827274024486542, | |
| "rewards/margins": 9.049389839172363, | |
| "rewards/rejected": -8.841116905212402, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.15732301161193657, | |
| "grad_norm": 0.052978515625, | |
| "learning_rate": 4.950328326843258e-06, | |
| "logits/chosen": -2.073488712310791, | |
| "logits/rejected": -2.2822651863098145, | |
| "logps/chosen": -0.9946017265319824, | |
| "logps/rejected": -1086.56689453125, | |
| "loss": 0.2291, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.20773670077323914, | |
| "rewards/margins": 10.345720291137695, | |
| "rewards/rejected": -10.137983322143555, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.1598202022724435, | |
| "grad_norm": 0.08203125, | |
| "learning_rate": 4.945911521462182e-06, | |
| "logits/chosen": -2.2225770950317383, | |
| "logits/rejected": -2.412863254547119, | |
| "logps/chosen": -1.7764488458633423, | |
| "logps/rejected": -1141.6427001953125, | |
| "loss": 0.2286, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.20446841418743134, | |
| "rewards/margins": 11.000048637390137, | |
| "rewards/rejected": -10.79557991027832, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.16231739293295044, | |
| "grad_norm": 0.16796875, | |
| "learning_rate": 4.941308760502149e-06, | |
| "logits/chosen": -2.211944341659546, | |
| "logits/rejected": -2.371511697769165, | |
| "logps/chosen": -2.542166233062744, | |
| "logps/rejected": -972.3176879882812, | |
| "loss": 0.2319, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.200990229845047, | |
| "rewards/margins": 9.126736640930176, | |
| "rewards/rejected": -8.925745964050293, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.16481458359345735, | |
| "grad_norm": 0.134765625, | |
| "learning_rate": 4.936520393897762e-06, | |
| "logits/chosen": -2.1870148181915283, | |
| "logits/rejected": -2.4076366424560547, | |
| "logps/chosen": -2.055567979812622, | |
| "logps/rejected": -1019.7349853515625, | |
| "loss": 0.2287, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21482977271080017, | |
| "rewards/margins": 9.75967788696289, | |
| "rewards/rejected": -9.54484748840332, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.1673117742539643, | |
| "grad_norm": 0.042724609375, | |
| "learning_rate": 4.931546785694684e-06, | |
| "logits/chosen": -2.207019090652466, | |
| "logits/rejected": -2.411149740219116, | |
| "logps/chosen": -1.447061538696289, | |
| "logps/rejected": -1274.262451171875, | |
| "loss": 0.2264, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.2061387598514557, | |
| "rewards/margins": 12.430362701416016, | |
| "rewards/rejected": -12.224225044250488, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.16980896491447123, | |
| "grad_norm": 0.031982421875, | |
| "learning_rate": 4.926388314021964e-06, | |
| "logits/chosen": -2.245506763458252, | |
| "logits/rejected": -2.439272403717041, | |
| "logps/chosen": -1.3953222036361694, | |
| "logps/rejected": -1066.398193359375, | |
| "loss": 0.2262, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.210698202252388, | |
| "rewards/margins": 10.378253936767578, | |
| "rewards/rejected": -10.167555809020996, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.17230615557497814, | |
| "grad_norm": 0.049560546875, | |
| "learning_rate": 4.921045371063283e-06, | |
| "logits/chosen": -2.235975980758667, | |
| "logits/rejected": -2.42988920211792, | |
| "logps/chosen": -0.8631747961044312, | |
| "logps/rejected": -1208.173095703125, | |
| "loss": 0.2262, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.2110184133052826, | |
| "rewards/margins": 11.736184120178223, | |
| "rewards/rejected": -11.525165557861328, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.17480334623548507, | |
| "grad_norm": 0.049072265625, | |
| "learning_rate": 4.915518363027142e-06, | |
| "logits/chosen": -2.29992938041687, | |
| "logits/rejected": -2.4797685146331787, | |
| "logps/chosen": -0.5947138667106628, | |
| "logps/rejected": -1052.22216796875, | |
| "loss": 0.2272, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.2057635486125946, | |
| "rewards/margins": 10.251365661621094, | |
| "rewards/rejected": -10.045602798461914, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.177300536895992, | |
| "grad_norm": 0.0625, | |
| "learning_rate": 4.909807710115977e-06, | |
| "logits/chosen": -2.0681312084198, | |
| "logits/rejected": -2.245760202407837, | |
| "logps/chosen": -1.667133092880249, | |
| "logps/rejected": -1234.741943359375, | |
| "loss": 0.2287, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.19957289099693298, | |
| "rewards/margins": 12.013802528381348, | |
| "rewards/rejected": -11.814229011535645, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.17979772755649895, | |
| "grad_norm": 0.064453125, | |
| "learning_rate": 4.903913846494211e-06, | |
| "logits/chosen": -2.0854830741882324, | |
| "logits/rejected": -2.318626880645752, | |
| "logps/chosen": -1.4859822988510132, | |
| "logps/rejected": -1401.390625, | |
| "loss": 0.2247, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21390756964683533, | |
| "rewards/margins": 13.512557983398438, | |
| "rewards/rejected": -13.298650741577148, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.18229491821700586, | |
| "grad_norm": 0.049072265625, | |
| "learning_rate": 4.897837220255251e-06, | |
| "logits/chosen": -2.105733633041382, | |
| "logits/rejected": -2.273578643798828, | |
| "logps/chosen": -1.5127496719360352, | |
| "logps/rejected": -1189.6934814453125, | |
| "loss": 0.2282, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21006134152412415, | |
| "rewards/margins": 11.488363265991211, | |
| "rewards/rejected": -11.278302192687988, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.1847921088775128, | |
| "grad_norm": 0.06982421875, | |
| "learning_rate": 4.891578293387413e-06, | |
| "logits/chosen": -2.1760973930358887, | |
| "logits/rejected": -2.3570103645324707, | |
| "logps/chosen": -1.769789695739746, | |
| "logps/rejected": -1201.271240234375, | |
| "loss": 0.2279, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.20501787960529327, | |
| "rewards/margins": 11.696678161621094, | |
| "rewards/rejected": -11.491661071777344, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.18728929953801973, | |
| "grad_norm": 0.031982421875, | |
| "learning_rate": 4.885137541738808e-06, | |
| "logits/chosen": -2.141007423400879, | |
| "logits/rejected": -2.313952922821045, | |
| "logps/chosen": -0.702928900718689, | |
| "logps/rejected": -1086.88330078125, | |
| "loss": 0.227, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.20619484782218933, | |
| "rewards/margins": 10.44408893585205, | |
| "rewards/rejected": -10.237894058227539, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.18978649019852667, | |
| "grad_norm": 0.09033203125, | |
| "learning_rate": 4.878515454981153e-06, | |
| "logits/chosen": -2.0163445472717285, | |
| "logits/rejected": -2.219290256500244, | |
| "logps/chosen": -1.4322102069854736, | |
| "logps/rejected": -1299.561767578125, | |
| "loss": 0.2251, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.20874173939228058, | |
| "rewards/margins": 12.519464492797852, | |
| "rewards/rejected": -12.310722351074219, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.19228368085903358, | |
| "grad_norm": 0.0654296875, | |
| "learning_rate": 4.8717125365725545e-06, | |
| "logits/chosen": -2.2308189868927, | |
| "logits/rejected": -2.3827383518218994, | |
| "logps/chosen": -1.321045160293579, | |
| "logps/rejected": -954.9481201171875, | |
| "loss": 0.2298, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.2083440124988556, | |
| "rewards/margins": 9.28177547454834, | |
| "rewards/rejected": -9.073431015014648, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.19478087151954052, | |
| "grad_norm": 0.05029296875, | |
| "learning_rate": 4.864729303719221e-06, | |
| "logits/chosen": -2.1831257343292236, | |
| "logits/rejected": -2.386863946914673, | |
| "logps/chosen": -1.462869644165039, | |
| "logps/rejected": -1309.128662109375, | |
| "loss": 0.2249, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21388690173625946, | |
| "rewards/margins": 12.65107250213623, | |
| "rewards/rejected": -12.437185287475586, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.19727806218004745, | |
| "grad_norm": 0.1259765625, | |
| "learning_rate": 4.857566287336152e-06, | |
| "logits/chosen": -2.125136375427246, | |
| "logits/rejected": -2.3306586742401123, | |
| "logps/chosen": -1.5712594985961914, | |
| "logps/rejected": -1211.277587890625, | |
| "loss": 0.2289, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21211902797222137, | |
| "rewards/margins": 11.6867094039917, | |
| "rewards/rejected": -11.474590301513672, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.19977525284055436, | |
| "grad_norm": 0.11376953125, | |
| "learning_rate": 4.850224032006765e-06, | |
| "logits/chosen": -2.226292610168457, | |
| "logits/rejected": -2.4260332584381104, | |
| "logps/chosen": -1.096842885017395, | |
| "logps/rejected": -1190.5208740234375, | |
| "loss": 0.2266, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21329161524772644, | |
| "rewards/margins": 11.604973793029785, | |
| "rewards/rejected": -11.391681671142578, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.2022724435010613, | |
| "grad_norm": 0.080078125, | |
| "learning_rate": 4.8427030959414984e-06, | |
| "logits/chosen": -2.0340332984924316, | |
| "logits/rejected": -2.239582061767578, | |
| "logps/chosen": -1.4298118352890015, | |
| "logps/rejected": -1246.587158203125, | |
| "loss": 0.2286, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.203706294298172, | |
| "rewards/margins": 12.136808395385742, | |
| "rewards/rejected": -11.933099746704102, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.20476963416156824, | |
| "grad_norm": 0.0400390625, | |
| "learning_rate": 4.835004050935369e-06, | |
| "logits/chosen": -2.142270803451538, | |
| "logits/rejected": -2.3261685371398926, | |
| "logps/chosen": -2.205761432647705, | |
| "logps/rejected": -1209.187744140625, | |
| "loss": 0.2294, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21324896812438965, | |
| "rewards/margins": 11.711974143981934, | |
| "rewards/rejected": -11.498725891113281, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.20726682482207517, | |
| "grad_norm": 0.0257568359375, | |
| "learning_rate": 4.8271274823245e-06, | |
| "logits/chosen": -2.130068778991699, | |
| "logits/rejected": -2.303924083709717, | |
| "logps/chosen": -1.5450295209884644, | |
| "logps/rejected": -1218.6636962890625, | |
| "loss": 0.2285, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.2086503505706787, | |
| "rewards/margins": 11.845584869384766, | |
| "rewards/rejected": -11.636935234069824, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.20976401548258208, | |
| "grad_norm": 0.0791015625, | |
| "learning_rate": 4.8190739889416264e-06, | |
| "logits/chosen": -2.1227643489837646, | |
| "logits/rejected": -2.3156332969665527, | |
| "logps/chosen": -1.4759693145751953, | |
| "logps/rejected": -1314.2388916015625, | |
| "loss": 0.226, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21140392124652863, | |
| "rewards/margins": 12.794939994812012, | |
| "rewards/rejected": -12.583536148071289, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.21226120614308902, | |
| "grad_norm": 0.0264892578125, | |
| "learning_rate": 4.810844183070553e-06, | |
| "logits/chosen": -2.2195773124694824, | |
| "logits/rejected": -2.416642665863037, | |
| "logps/chosen": -1.3944060802459717, | |
| "logps/rejected": -1100.637939453125, | |
| "loss": 0.2267, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.20567412674427032, | |
| "rewards/margins": 10.635955810546875, | |
| "rewards/rejected": -10.430280685424805, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.21475839680359596, | |
| "grad_norm": 0.05712890625, | |
| "learning_rate": 4.802438690399622e-06, | |
| "logits/chosen": -2.170403480529785, | |
| "logits/rejected": -2.3731253147125244, | |
| "logps/chosen": -0.7113627195358276, | |
| "logps/rejected": -1192.8896484375, | |
| "loss": 0.2279, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.20982804894447327, | |
| "rewards/margins": 11.532899856567383, | |
| "rewards/rejected": -11.32307243347168, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.2172555874641029, | |
| "grad_norm": 0.06201171875, | |
| "learning_rate": 4.793858149974129e-06, | |
| "logits/chosen": -2.134357452392578, | |
| "logits/rejected": -2.3488316535949707, | |
| "logps/chosen": -1.1498069763183594, | |
| "logps/rejected": -1405.57177734375, | |
| "loss": 0.2269, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.2079639434814453, | |
| "rewards/margins": 13.713908195495605, | |
| "rewards/rejected": -13.505943298339844, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.2197527781246098, | |
| "grad_norm": 0.03857421875, | |
| "learning_rate": 4.785103214147747e-06, | |
| "logits/chosen": -2.244509220123291, | |
| "logits/rejected": -2.446852445602417, | |
| "logps/chosen": -1.082582950592041, | |
| "logps/rejected": -1192.0093994140625, | |
| "loss": 0.2264, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.20774182677268982, | |
| "rewards/margins": 11.592524528503418, | |
| "rewards/rejected": -11.384782791137695, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.22224996878511674, | |
| "grad_norm": 0.0118408203125, | |
| "learning_rate": 4.776174548532926e-06, | |
| "logits/chosen": -2.1576988697052, | |
| "logits/rejected": -2.3463644981384277, | |
| "logps/chosen": -1.1917221546173096, | |
| "logps/rejected": -1265.5885009765625, | |
| "loss": 0.2264, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.20792751014232635, | |
| "rewards/margins": 12.278467178344727, | |
| "rewards/rejected": -12.070539474487305, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.22474715944562368, | |
| "grad_norm": 0.060546875, | |
| "learning_rate": 4.767072831950288e-06, | |
| "logits/chosen": -2.2008862495422363, | |
| "logits/rejected": -2.402891159057617, | |
| "logps/chosen": -1.2017600536346436, | |
| "logps/rejected": -1313.045654296875, | |
| "loss": 0.2255, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.2119072675704956, | |
| "rewards/margins": 12.807563781738281, | |
| "rewards/rejected": -12.59565544128418, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.22724435010613062, | |
| "grad_norm": 0.091796875, | |
| "learning_rate": 4.7577987563770226e-06, | |
| "logits/chosen": -2.1067652702331543, | |
| "logits/rejected": -2.324591875076294, | |
| "logps/chosen": -2.000681161880493, | |
| "logps/rejected": -1264.68115234375, | |
| "loss": 0.228, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.2102380096912384, | |
| "rewards/margins": 12.193601608276367, | |
| "rewards/rejected": -11.983363151550293, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.22974154076663753, | |
| "grad_norm": 0.059814453125, | |
| "learning_rate": 4.748353026894273e-06, | |
| "logits/chosen": -2.1624951362609863, | |
| "logits/rejected": -2.3448517322540283, | |
| "logps/chosen": -1.4960781335830688, | |
| "logps/rejected": -1188.14990234375, | |
| "loss": 0.2268, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.2139265537261963, | |
| "rewards/margins": 11.510043144226074, | |
| "rewards/rejected": -11.29611587524414, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.23223873142714446, | |
| "grad_norm": 0.080078125, | |
| "learning_rate": 4.738736361633532e-06, | |
| "logits/chosen": -2.25258207321167, | |
| "logits/rejected": -2.4271512031555176, | |
| "logps/chosen": -1.7973697185516357, | |
| "logps/rejected": -1126.24267578125, | |
| "loss": 0.228, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.20842309296131134, | |
| "rewards/margins": 10.903474807739258, | |
| "rewards/rejected": -10.695051193237305, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.2347359220876514, | |
| "grad_norm": 0.06103515625, | |
| "learning_rate": 4.728949491722046e-06, | |
| "logits/chosen": -2.274840831756592, | |
| "logits/rejected": -2.4521872997283936, | |
| "logps/chosen": -0.652289092540741, | |
| "logps/rejected": -1062.56494140625, | |
| "loss": 0.2295, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.20718173682689667, | |
| "rewards/margins": 10.335628509521484, | |
| "rewards/rejected": -10.128446578979492, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.2372331127481583, | |
| "grad_norm": 0.0751953125, | |
| "learning_rate": 4.718993161227231e-06, | |
| "logits/chosen": -2.172180414199829, | |
| "logits/rejected": -2.4125022888183594, | |
| "logps/chosen": -1.2400215864181519, | |
| "logps/rejected": -1376.037841796875, | |
| "loss": 0.2256, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21632233262062073, | |
| "rewards/margins": 13.414273262023926, | |
| "rewards/rejected": -13.197952270507812, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.23973030340866525, | |
| "grad_norm": 0.00982666015625, | |
| "learning_rate": 4.708868127100098e-06, | |
| "logits/chosen": -2.2069010734558105, | |
| "logits/rejected": -2.3836076259613037, | |
| "logps/chosen": -0.6828838586807251, | |
| "logps/rejected": -1159.0107421875, | |
| "loss": 0.2269, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.20559605956077576, | |
| "rewards/margins": 11.286005973815918, | |
| "rewards/rejected": -11.080410957336426, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.24222749406917218, | |
| "grad_norm": 0.08740234375, | |
| "learning_rate": 4.6985751591177075e-06, | |
| "logits/chosen": -2.0572152137756348, | |
| "logits/rejected": -2.2502310276031494, | |
| "logps/chosen": -1.7850786447525024, | |
| "logps/rejected": -1321.8499755859375, | |
| "loss": 0.2266, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.212154358625412, | |
| "rewards/margins": 12.830732345581055, | |
| "rewards/rejected": -12.618578910827637, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.24472468472967912, | |
| "grad_norm": 0.09716796875, | |
| "learning_rate": 4.688115039824648e-06, | |
| "logits/chosen": -2.1182241439819336, | |
| "logits/rejected": -2.292884349822998, | |
| "logps/chosen": -0.9138596653938293, | |
| "logps/rejected": -1220.1195068359375, | |
| "loss": 0.2269, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.2072029858827591, | |
| "rewards/margins": 11.845842361450195, | |
| "rewards/rejected": -11.638639450073242, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.24722187539018603, | |
| "grad_norm": 0.1005859375, | |
| "learning_rate": 4.677488564473535e-06, | |
| "logits/chosen": -2.076742649078369, | |
| "logits/rejected": -2.280050754547119, | |
| "logps/chosen": -2.1341259479522705, | |
| "logps/rejected": -1361.389404296875, | |
| "loss": 0.2274, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.20143508911132812, | |
| "rewards/margins": 13.227249145507812, | |
| "rewards/rejected": -13.0258150100708, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.24971906605069297, | |
| "grad_norm": 0.15625, | |
| "learning_rate": 4.666696540964556e-06, | |
| "logits/chosen": -2.205030918121338, | |
| "logits/rejected": -2.380605697631836, | |
| "logps/chosen": -1.0865452289581299, | |
| "logps/rejected": -1183.8802490234375, | |
| "loss": 0.2255, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21310412883758545, | |
| "rewards/margins": 11.559179306030273, | |
| "rewards/rejected": -11.346075057983398, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.24971906605069297, | |
| "eval_logits/chosen": -2.551421880722046, | |
| "eval_logits/rejected": -2.637223482131958, | |
| "eval_logps/chosen": -0.39880600571632385, | |
| "eval_logps/rejected": -585.1870727539062, | |
| "eval_loss": 0.22298085689544678, | |
| "eval_rewards/accuracies": 1.0, | |
| "eval_rewards/chosen": 0.25514695048332214, | |
| "eval_rewards/margins": 5.658298015594482, | |
| "eval_rewards/rejected": -5.403151035308838, | |
| "eval_runtime": 0.6597, | |
| "eval_samples_per_second": 7.579, | |
| "eval_steps_per_second": 4.548, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.2522162567111999, | |
| "grad_norm": 0.0361328125, | |
| "learning_rate": 4.6557397897840454e-06, | |
| "logits/chosen": -2.226627826690674, | |
| "logits/rejected": -2.434197187423706, | |
| "logps/chosen": -1.4807536602020264, | |
| "logps/rejected": -1233.5753173828125, | |
| "loss": 0.2295, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21030649542808533, | |
| "rewards/margins": 11.924067497253418, | |
| "rewards/rejected": -11.713762283325195, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.2547134473717068, | |
| "grad_norm": 0.0311279296875, | |
| "learning_rate": 4.644619143942108e-06, | |
| "logits/chosen": -2.1962525844573975, | |
| "logits/rejected": -2.418130397796631, | |
| "logps/chosen": -1.2743520736694336, | |
| "logps/rejected": -1324.01123046875, | |
| "loss": 0.225, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.2114056795835495, | |
| "rewards/margins": 12.735904693603516, | |
| "rewards/rejected": -12.524497985839844, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.2572106380322138, | |
| "grad_norm": 0.1162109375, | |
| "learning_rate": 4.633335448909284e-06, | |
| "logits/chosen": -2.0575506687164307, | |
| "logits/rejected": -2.2430522441864014, | |
| "logps/chosen": -1.6322782039642334, | |
| "logps/rejected": -1251.030029296875, | |
| "loss": 0.2257, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21490998566150665, | |
| "rewards/margins": 12.10401725769043, | |
| "rewards/rejected": -11.889106750488281, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.2597078286927207, | |
| "grad_norm": 0.10400390625, | |
| "learning_rate": 4.621889562552272e-06, | |
| "logits/chosen": -2.1623690128326416, | |
| "logits/rejected": -2.387530565261841, | |
| "logps/chosen": -1.5265319347381592, | |
| "logps/rejected": -1406.755615234375, | |
| "loss": 0.2268, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21330364048480988, | |
| "rewards/margins": 13.666200637817383, | |
| "rewards/rejected": -13.452896118164062, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.2622050193532276, | |
| "grad_norm": 0.134765625, | |
| "learning_rate": 4.610282355068707e-06, | |
| "logits/chosen": -2.265820264816284, | |
| "logits/rejected": -2.481659412384033, | |
| "logps/chosen": -1.5380371809005737, | |
| "logps/rejected": -1449.8046875, | |
| "loss": 0.2253, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.2142389565706253, | |
| "rewards/margins": 14.062037467956543, | |
| "rewards/rejected": -13.847798347473145, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.26470221001373456, | |
| "grad_norm": 0.06787109375, | |
| "learning_rate": 4.598514708921006e-06, | |
| "logits/chosen": -2.249868869781494, | |
| "logits/rejected": -2.466034412384033, | |
| "logps/chosen": -0.7143852710723877, | |
| "logps/rejected": -1382.494140625, | |
| "loss": 0.227, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.2079528272151947, | |
| "rewards/margins": 13.4636812210083, | |
| "rewards/rejected": -13.255727767944336, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.26719940067424147, | |
| "grad_norm": 0.01153564453125, | |
| "learning_rate": 4.5865875187692695e-06, | |
| "logits/chosen": -2.1900734901428223, | |
| "logits/rejected": -2.3761203289031982, | |
| "logps/chosen": -1.549536943435669, | |
| "logps/rejected": -1185.685791015625, | |
| "loss": 0.2282, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.20429477095603943, | |
| "rewards/margins": 11.534225463867188, | |
| "rewards/rejected": -11.32992935180664, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.2696965913347484, | |
| "grad_norm": 0.0830078125, | |
| "learning_rate": 4.57450169140327e-06, | |
| "logits/chosen": -2.0554583072662354, | |
| "logits/rejected": -2.273556709289551, | |
| "logps/chosen": -1.3945400714874268, | |
| "logps/rejected": -1522.8463134765625, | |
| "loss": 0.2259, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.2084466516971588, | |
| "rewards/margins": 14.89411449432373, | |
| "rewards/rejected": -14.685667037963867, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.27219378199525535, | |
| "grad_norm": 0.1416015625, | |
| "learning_rate": 4.562258145673507e-06, | |
| "logits/chosen": -2.20988392829895, | |
| "logits/rejected": -2.4358487129211426, | |
| "logps/chosen": -1.0550658702850342, | |
| "logps/rejected": -1489.2562255859375, | |
| "loss": 0.2251, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.20840421319007874, | |
| "rewards/margins": 14.553556442260742, | |
| "rewards/rejected": -14.34515380859375, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.27469097265576226, | |
| "grad_norm": 0.017578125, | |
| "learning_rate": 4.549857812421353e-06, | |
| "logits/chosen": -2.1285512447357178, | |
| "logits/rejected": -2.318908929824829, | |
| "logps/chosen": -0.753593921661377, | |
| "logps/rejected": -1319.107666015625, | |
| "loss": 0.2258, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.20573386549949646, | |
| "rewards/margins": 12.884170532226562, | |
| "rewards/rejected": -12.678436279296875, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.2771881633162692, | |
| "grad_norm": 0.050537109375, | |
| "learning_rate": 4.537301634408281e-06, | |
| "logits/chosen": -2.1442999839782715, | |
| "logits/rejected": -2.34287691116333, | |
| "logps/chosen": -0.9622041583061218, | |
| "logps/rejected": -1223.08837890625, | |
| "loss": 0.226, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21334879100322723, | |
| "rewards/margins": 11.921293258666992, | |
| "rewards/rejected": -11.707944869995117, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.27968535397677613, | |
| "grad_norm": 0.027099609375, | |
| "learning_rate": 4.52459056624419e-06, | |
| "logits/chosen": -2.198021173477173, | |
| "logits/rejected": -2.3665783405303955, | |
| "logps/chosen": -1.6707994937896729, | |
| "logps/rejected": -1209.2952880859375, | |
| "loss": 0.2269, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.20634672045707703, | |
| "rewards/margins": 11.70842170715332, | |
| "rewards/rejected": -11.502074241638184, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.28218254463728304, | |
| "grad_norm": 0.0458984375, | |
| "learning_rate": 4.51172557431483e-06, | |
| "logits/chosen": -2.0804460048675537, | |
| "logits/rejected": -2.27351713180542, | |
| "logps/chosen": -1.3884862661361694, | |
| "logps/rejected": -1267.9599609375, | |
| "loss": 0.227, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.20677968859672546, | |
| "rewards/margins": 12.219032287597656, | |
| "rewards/rejected": -12.012252807617188, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.28467973529779, | |
| "grad_norm": 0.0751953125, | |
| "learning_rate": 4.49870763670833e-06, | |
| "logits/chosen": -2.1440179347991943, | |
| "logits/rejected": -2.3646531105041504, | |
| "logps/chosen": -0.9940131306648254, | |
| "logps/rejected": -1360.1025390625, | |
| "loss": 0.2245, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.2132159024477005, | |
| "rewards/margins": 13.244120597839355, | |
| "rewards/rejected": -13.030904769897461, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.2871769259582969, | |
| "grad_norm": 0.060546875, | |
| "learning_rate": 4.4855377431408335e-06, | |
| "logits/chosen": -2.124523639678955, | |
| "logits/rejected": -2.308046817779541, | |
| "logps/chosen": -1.051758885383606, | |
| "logps/rejected": -1258.587158203125, | |
| "loss": 0.2254, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21350452303886414, | |
| "rewards/margins": 12.086160659790039, | |
| "rewards/rejected": -11.872655868530273, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.2896741166188038, | |
| "grad_norm": 0.07275390625, | |
| "learning_rate": 4.472216894881261e-06, | |
| "logits/chosen": -2.12388277053833, | |
| "logits/rejected": -2.2992734909057617, | |
| "logps/chosen": -1.0673718452453613, | |
| "logps/rejected": -1227.642822265625, | |
| "loss": 0.2261, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21056973934173584, | |
| "rewards/margins": 11.97436809539795, | |
| "rewards/rejected": -11.763797760009766, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.2921713072793108, | |
| "grad_norm": 0.062255859375, | |
| "learning_rate": 4.4587461046751815e-06, | |
| "logits/chosen": -2.165827512741089, | |
| "logits/rejected": -2.366560697555542, | |
| "logps/chosen": -1.3018419742584229, | |
| "logps/rejected": -1152.0526123046875, | |
| "loss": 0.2272, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.2166510820388794, | |
| "rewards/margins": 11.213326454162598, | |
| "rewards/rejected": -10.996675491333008, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.2946684979398177, | |
| "grad_norm": 0.0218505859375, | |
| "learning_rate": 4.44512639666781e-06, | |
| "logits/chosen": -2.153282642364502, | |
| "logits/rejected": -2.3281288146972656, | |
| "logps/chosen": -0.8735140562057495, | |
| "logps/rejected": -1144.37744140625, | |
| "loss": 0.2288, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.20929470658302307, | |
| "rewards/margins": 11.165544509887695, | |
| "rewards/rejected": -10.956250190734863, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.29716568860032466, | |
| "grad_norm": 0.2451171875, | |
| "learning_rate": 4.431358806326158e-06, | |
| "logits/chosen": -2.0921244621276855, | |
| "logits/rejected": -2.2888898849487305, | |
| "logps/chosen": -1.9632396697998047, | |
| "logps/rejected": -1334.217041015625, | |
| "loss": 0.2274, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21045899391174316, | |
| "rewards/margins": 12.872146606445312, | |
| "rewards/rejected": -12.661687850952148, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.29966287926083157, | |
| "grad_norm": 0.08349609375, | |
| "learning_rate": 4.4174443803603e-06, | |
| "logits/chosen": -2.1807141304016113, | |
| "logits/rejected": -2.35149884223938, | |
| "logps/chosen": -1.1249208450317383, | |
| "logps/rejected": -1231.4007568359375, | |
| "loss": 0.2276, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.2088349312543869, | |
| "rewards/margins": 11.981757164001465, | |
| "rewards/rejected": -11.772923469543457, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.3021600699213385, | |
| "grad_norm": 0.054931640625, | |
| "learning_rate": 4.4033841766438e-06, | |
| "logits/chosen": -2.153378486633301, | |
| "logits/rejected": -2.333552598953247, | |
| "logps/chosen": -1.4812664985656738, | |
| "logps/rejected": -1186.764404296875, | |
| "loss": 0.2282, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21333375573158264, | |
| "rewards/margins": 11.490147590637207, | |
| "rewards/rejected": -11.276814460754395, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.30465726058184545, | |
| "grad_norm": 0.0262451171875, | |
| "learning_rate": 4.389179264133281e-06, | |
| "logits/chosen": -2.232697010040283, | |
| "logits/rejected": -2.418818235397339, | |
| "logps/chosen": -0.8499106168746948, | |
| "logps/rejected": -1287.507568359375, | |
| "loss": 0.2263, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.20431029796600342, | |
| "rewards/margins": 12.579316139221191, | |
| "rewards/rejected": -12.375005722045898, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.30715445124235236, | |
| "grad_norm": 0.0478515625, | |
| "learning_rate": 4.374830722787159e-06, | |
| "logits/chosen": -2.2435195446014404, | |
| "logits/rejected": -2.4646503925323486, | |
| "logps/chosen": -0.5742496252059937, | |
| "logps/rejected": -1343.397216796875, | |
| "loss": 0.2276, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.2151050567626953, | |
| "rewards/margins": 13.130419731140137, | |
| "rewards/rejected": -12.915315628051758, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.30965164190285926, | |
| "grad_norm": 0.05615234375, | |
| "learning_rate": 4.360339643483533e-06, | |
| "logits/chosen": -2.2148001194000244, | |
| "logits/rejected": -2.421738862991333, | |
| "logps/chosen": -1.9802653789520264, | |
| "logps/rejected": -1262.169189453125, | |
| "loss": 0.2275, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.20881418883800507, | |
| "rewards/margins": 12.177266120910645, | |
| "rewards/rejected": -11.968450546264648, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.31214883256336623, | |
| "grad_norm": 0.01348876953125, | |
| "learning_rate": 4.345707127937253e-06, | |
| "logits/chosen": -2.1191718578338623, | |
| "logits/rejected": -2.344691753387451, | |
| "logps/chosen": -0.9136890172958374, | |
| "logps/rejected": -1512.323974609375, | |
| "loss": 0.2245, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21416035294532776, | |
| "rewards/margins": 14.802743911743164, | |
| "rewards/rejected": -14.588582992553711, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.31464602322387314, | |
| "grad_norm": 0.0751953125, | |
| "learning_rate": 4.330934288616154e-06, | |
| "logits/chosen": -2.1469109058380127, | |
| "logits/rejected": -2.3361592292785645, | |
| "logps/chosen": -1.4744806289672852, | |
| "logps/rejected": -1288.8616943359375, | |
| "loss": 0.2258, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.2088310271501541, | |
| "rewards/margins": 12.5834379196167, | |
| "rewards/rejected": -12.374608039855957, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.31714321388438005, | |
| "grad_norm": 0.03369140625, | |
| "learning_rate": 4.316022248656485e-06, | |
| "logits/chosen": -2.0783493518829346, | |
| "logits/rejected": -2.3048255443573, | |
| "logps/chosen": -1.100656270980835, | |
| "logps/rejected": -1277.9552001953125, | |
| "loss": 0.2262, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.20990002155303955, | |
| "rewards/margins": 12.193166732788086, | |
| "rewards/rejected": -11.983266830444336, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.319640404544887, | |
| "grad_norm": 0.0400390625, | |
| "learning_rate": 4.3009721417775166e-06, | |
| "logits/chosen": -2.1016387939453125, | |
| "logits/rejected": -2.3064982891082764, | |
| "logps/chosen": -1.263979196548462, | |
| "logps/rejected": -1323.89599609375, | |
| "loss": 0.2257, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21145665645599365, | |
| "rewards/margins": 12.815747261047363, | |
| "rewards/rejected": -12.604291915893555, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.3221375952053939, | |
| "grad_norm": 0.10986328125, | |
| "learning_rate": 4.285785112195346e-06, | |
| "logits/chosen": -2.188570976257324, | |
| "logits/rejected": -2.397493600845337, | |
| "logps/chosen": -2.353158473968506, | |
| "logps/rejected": -1393.356201171875, | |
| "loss": 0.2264, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.20123986899852753, | |
| "rewards/margins": 13.566085815429688, | |
| "rewards/rejected": -13.364847183227539, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.3246347858659009, | |
| "grad_norm": 0.04345703125, | |
| "learning_rate": 4.27046231453591e-06, | |
| "logits/chosen": -2.115800142288208, | |
| "logits/rejected": -2.314438819885254, | |
| "logps/chosen": -1.3714869022369385, | |
| "logps/rejected": -1331.2506103515625, | |
| "loss": 0.2254, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.20796707272529602, | |
| "rewards/margins": 12.886337280273438, | |
| "rewards/rejected": -12.678369522094727, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.3271319765264078, | |
| "grad_norm": 0.0322265625, | |
| "learning_rate": 4.255004913747196e-06, | |
| "logits/chosen": -2.1591382026672363, | |
| "logits/rejected": -2.3501150608062744, | |
| "logps/chosen": -0.8996777534484863, | |
| "logps/rejected": -1417.157470703125, | |
| "loss": 0.2263, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.2074543684720993, | |
| "rewards/margins": 13.838354110717773, | |
| "rewards/rejected": -13.630900382995605, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.3296291671869147, | |
| "grad_norm": 0.05126953125, | |
| "learning_rate": 4.2394140850106825e-06, | |
| "logits/chosen": -2.0840930938720703, | |
| "logits/rejected": -2.285808801651001, | |
| "logps/chosen": -0.9041382670402527, | |
| "logps/rejected": -1322.038818359375, | |
| "loss": 0.2264, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.2139444649219513, | |
| "rewards/margins": 12.818634033203125, | |
| "rewards/rejected": -12.604690551757812, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.33212635784742167, | |
| "grad_norm": 0.045166015625, | |
| "learning_rate": 4.223691013651986e-06, | |
| "logits/chosen": -2.141530990600586, | |
| "logits/rejected": -2.363454580307007, | |
| "logps/chosen": -2.294220209121704, | |
| "logps/rejected": -1329.7213134765625, | |
| "loss": 0.2239, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.2177181988954544, | |
| "rewards/margins": 12.63646411895752, | |
| "rewards/rejected": -12.418745040893555, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.3346235485079286, | |
| "grad_norm": 0.039794921875, | |
| "learning_rate": 4.207836895050748e-06, | |
| "logits/chosen": -2.263815402984619, | |
| "logits/rejected": -2.524907350540161, | |
| "logps/chosen": -0.85591059923172, | |
| "logps/rejected": -1496.051513671875, | |
| "loss": 0.2258, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21331222355365753, | |
| "rewards/margins": 14.541677474975586, | |
| "rewards/rejected": -14.32836627960205, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.3371207391684355, | |
| "grad_norm": 0.056640625, | |
| "learning_rate": 4.1918529345497525e-06, | |
| "logits/chosen": -2.1795644760131836, | |
| "logits/rejected": -2.345736026763916, | |
| "logps/chosen": -1.1188920736312866, | |
| "logps/rejected": -1032.299560546875, | |
| "loss": 0.2274, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21017661690711975, | |
| "rewards/margins": 9.974283218383789, | |
| "rewards/rejected": -9.764104843139648, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.33961792982894246, | |
| "grad_norm": 0.061767578125, | |
| "learning_rate": 4.175740347363289e-06, | |
| "logits/chosen": -2.2571511268615723, | |
| "logits/rejected": -2.450302839279175, | |
| "logps/chosen": -2.4634203910827637, | |
| "logps/rejected": -1143.845703125, | |
| "loss": 0.2276, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.20782017707824707, | |
| "rewards/margins": 10.989904403686523, | |
| "rewards/rejected": -10.782083511352539, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.34211512048944936, | |
| "grad_norm": 0.021240234375, | |
| "learning_rate": 4.159500358484759e-06, | |
| "logits/chosen": -2.104897975921631, | |
| "logits/rejected": -2.321760654449463, | |
| "logps/chosen": -1.1564667224884033, | |
| "logps/rejected": -1532.8436279296875, | |
| "loss": 0.2261, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21173422038555145, | |
| "rewards/margins": 14.948999404907227, | |
| "rewards/rejected": -14.737266540527344, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.3446123111499563, | |
| "grad_norm": 0.0306396484375, | |
| "learning_rate": 4.143134202593549e-06, | |
| "logits/chosen": -2.1347815990448, | |
| "logits/rejected": -2.3222789764404297, | |
| "logps/chosen": -2.063771963119507, | |
| "logps/rejected": -1179.3240966796875, | |
| "loss": 0.2263, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.216390922665596, | |
| "rewards/margins": 11.309762001037598, | |
| "rewards/rejected": -11.093371391296387, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.34710950181046324, | |
| "grad_norm": 0.03955078125, | |
| "learning_rate": 4.126643123961158e-06, | |
| "logits/chosen": -2.216097354888916, | |
| "logits/rejected": -2.431462049484253, | |
| "logps/chosen": -1.3367359638214111, | |
| "logps/rejected": -1441.5928955078125, | |
| "loss": 0.2263, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.2119736224412918, | |
| "rewards/margins": 14.054840087890625, | |
| "rewards/rejected": -13.842867851257324, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.34960669247097015, | |
| "grad_norm": 0.07470703125, | |
| "learning_rate": 4.110028376356599e-06, | |
| "logits/chosen": -2.194693088531494, | |
| "logits/rejected": -2.394153118133545, | |
| "logps/chosen": -2.143383264541626, | |
| "logps/rejected": -1089.128173828125, | |
| "loss": 0.2268, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.2104116678237915, | |
| "rewards/margins": 10.493813514709473, | |
| "rewards/rejected": -10.283400535583496, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.3521038831314771, | |
| "grad_norm": 0.03369140625, | |
| "learning_rate": 4.093291222951079e-06, | |
| "logits/chosen": -2.1454501152038574, | |
| "logits/rejected": -2.360769033432007, | |
| "logps/chosen": -1.1339516639709473, | |
| "logps/rejected": -1363.47119140625, | |
| "loss": 0.2251, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.209524005651474, | |
| "rewards/margins": 13.2172269821167, | |
| "rewards/rejected": -13.007702827453613, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.354601073791984, | |
| "grad_norm": 0.057373046875, | |
| "learning_rate": 4.076432936221965e-06, | |
| "logits/chosen": -2.135999917984009, | |
| "logits/rejected": -2.3061912059783936, | |
| "logps/chosen": -0.5820466876029968, | |
| "logps/rejected": -1179.7847900390625, | |
| "loss": 0.2283, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.2054443657398224, | |
| "rewards/margins": 11.520200729370117, | |
| "rewards/rejected": -11.314754486083984, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.35709826445249093, | |
| "grad_norm": 0.019775390625, | |
| "learning_rate": 4.059454797856039e-06, | |
| "logits/chosen": -2.172046184539795, | |
| "logits/rejected": -2.342928171157837, | |
| "logps/chosen": -0.7546096444129944, | |
| "logps/rejected": -1167.744873046875, | |
| "loss": 0.2286, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.20582588016986847, | |
| "rewards/margins": 11.390329360961914, | |
| "rewards/rejected": -11.184503555297852, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.3595954551129979, | |
| "grad_norm": 0.043212890625, | |
| "learning_rate": 4.042358098652057e-06, | |
| "logits/chosen": -2.244403123855591, | |
| "logits/rejected": -2.4426932334899902, | |
| "logps/chosen": -1.5733036994934082, | |
| "logps/rejected": -1163.822998046875, | |
| "loss": 0.2252, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21082696318626404, | |
| "rewards/margins": 11.297124862670898, | |
| "rewards/rejected": -11.086297988891602, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.3620926457735048, | |
| "grad_norm": 0.046142578125, | |
| "learning_rate": 4.025144138422615e-06, | |
| "logits/chosen": -2.189898729324341, | |
| "logits/rejected": -2.393465757369995, | |
| "logps/chosen": -1.2910453081130981, | |
| "logps/rejected": -1412.8597412109375, | |
| "loss": 0.227, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21461701393127441, | |
| "rewards/margins": 13.799296379089355, | |
| "rewards/rejected": -13.584680557250977, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.3645898364340117, | |
| "grad_norm": 0.0169677734375, | |
| "learning_rate": 4.007814225895321e-06, | |
| "logits/chosen": -2.170092821121216, | |
| "logits/rejected": -2.3824923038482666, | |
| "logps/chosen": -0.8392337560653687, | |
| "logps/rejected": -1365.531005859375, | |
| "loss": 0.2266, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.20813941955566406, | |
| "rewards/margins": 13.32819652557373, | |
| "rewards/rejected": -13.120054244995117, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.3670870270945187, | |
| "grad_norm": 0.017333984375, | |
| "learning_rate": 3.990369678613303e-06, | |
| "logits/chosen": -2.0936970710754395, | |
| "logits/rejected": -2.3042235374450684, | |
| "logps/chosen": -1.4599825143814087, | |
| "logps/rejected": -1356.390869140625, | |
| "loss": 0.225, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21089884638786316, | |
| "rewards/margins": 12.903648376464844, | |
| "rewards/rejected": -12.6927490234375, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.3695842177550256, | |
| "grad_norm": 0.0240478515625, | |
| "learning_rate": 3.97281182283504e-06, | |
| "logits/chosen": -2.157559871673584, | |
| "logits/rejected": -2.371856927871704, | |
| "logps/chosen": -1.3865526914596558, | |
| "logps/rejected": -1416.440185546875, | |
| "loss": 0.227, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.20923642814159393, | |
| "rewards/margins": 13.767707824707031, | |
| "rewards/rejected": -13.558469772338867, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.3720814084155325, | |
| "grad_norm": 0.099609375, | |
| "learning_rate": 3.955141993433526e-06, | |
| "logits/chosen": -2.2016472816467285, | |
| "logits/rejected": -2.3889071941375732, | |
| "logps/chosen": -1.0489656925201416, | |
| "logps/rejected": -1286.4302978515625, | |
| "loss": 0.2253, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21162299811840057, | |
| "rewards/margins": 12.558609962463379, | |
| "rewards/rejected": -12.3469877243042, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.37457859907603946, | |
| "grad_norm": 0.053466796875, | |
| "learning_rate": 3.937361533794784e-06, | |
| "logits/chosen": -2.1290640830993652, | |
| "logits/rejected": -2.337486505508423, | |
| "logps/chosen": -1.496525526046753, | |
| "logps/rejected": -1124.3212890625, | |
| "loss": 0.2277, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21092331409454346, | |
| "rewards/margins": 10.851540565490723, | |
| "rewards/rejected": -10.640616416931152, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.3770757897365464, | |
| "grad_norm": 0.0233154296875, | |
| "learning_rate": 3.919471795715738e-06, | |
| "logits/chosen": -2.18410587310791, | |
| "logits/rejected": -2.3675644397735596, | |
| "logps/chosen": -0.84355628490448, | |
| "logps/rejected": -1166.61279296875, | |
| "loss": 0.226, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.2062278687953949, | |
| "rewards/margins": 11.381316184997559, | |
| "rewards/rejected": -11.175088882446289, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.37957298039705334, | |
| "grad_norm": 0.0390625, | |
| "learning_rate": 3.901474139301433e-06, | |
| "logits/chosen": -2.0796847343444824, | |
| "logits/rejected": -2.264577627182007, | |
| "logps/chosen": -0.6843720078468323, | |
| "logps/rejected": -1241.1590576171875, | |
| "loss": 0.2259, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21053218841552734, | |
| "rewards/margins": 12.031414031982422, | |
| "rewards/rejected": -11.820880889892578, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.38207017105756025, | |
| "grad_norm": 0.0869140625, | |
| "learning_rate": 3.883369932861634e-06, | |
| "logits/chosen": -2.2165303230285645, | |
| "logits/rejected": -2.3859565258026123, | |
| "logps/chosen": -1.1263262033462524, | |
| "logps/rejected": -1200.8397216796875, | |
| "loss": 0.2251, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.20847392082214355, | |
| "rewards/margins": 11.745917320251465, | |
| "rewards/rejected": -11.537444114685059, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.38456736171806716, | |
| "grad_norm": 0.06298828125, | |
| "learning_rate": 3.865160552806796e-06, | |
| "logits/chosen": -2.262539863586426, | |
| "logits/rejected": -2.4538345336914062, | |
| "logps/chosen": -1.3924305438995361, | |
| "logps/rejected": -1240.5035400390625, | |
| "loss": 0.2259, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.20712998509407043, | |
| "rewards/margins": 12.123323440551758, | |
| "rewards/rejected": -11.916193008422852, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.3870645523785741, | |
| "grad_norm": 0.041015625, | |
| "learning_rate": 3.84684738354342e-06, | |
| "logits/chosen": -2.267106771469116, | |
| "logits/rejected": -2.4566650390625, | |
| "logps/chosen": -2.0142922401428223, | |
| "logps/rejected": -1211.2545166015625, | |
| "loss": 0.2271, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.2093096524477005, | |
| "rewards/margins": 11.762309074401855, | |
| "rewards/rejected": -11.552999496459961, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.38956174303908103, | |
| "grad_norm": 0.07958984375, | |
| "learning_rate": 3.828431817368798e-06, | |
| "logits/chosen": -2.141620397567749, | |
| "logits/rejected": -2.33925199508667, | |
| "logps/chosen": -1.531597375869751, | |
| "logps/rejected": -1257.968994140625, | |
| "loss": 0.228, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.2043263465166092, | |
| "rewards/margins": 12.106410026550293, | |
| "rewards/rejected": -11.902084350585938, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.39205893369958794, | |
| "grad_norm": 0.07763671875, | |
| "learning_rate": 3.8099152543651684e-06, | |
| "logits/chosen": -2.3559296131134033, | |
| "logits/rejected": -2.583070993423462, | |
| "logps/chosen": -0.7891671061515808, | |
| "logps/rejected": -1441.2958984375, | |
| "loss": 0.2257, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.20671992003917694, | |
| "rewards/margins": 14.086430549621582, | |
| "rewards/rejected": -13.87971019744873, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.3945561243600949, | |
| "grad_norm": 0.03857421875, | |
| "learning_rate": 3.791299102293261e-06, | |
| "logits/chosen": -2.1035549640655518, | |
| "logits/rejected": -2.3072731494903564, | |
| "logps/chosen": -1.0839884281158447, | |
| "logps/rejected": -1459.4197998046875, | |
| "loss": 0.2256, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21341785788536072, | |
| "rewards/margins": 14.197916984558105, | |
| "rewards/rejected": -13.98449993133545, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.3970533150206018, | |
| "grad_norm": 0.03369140625, | |
| "learning_rate": 3.7725847764852774e-06, | |
| "logits/chosen": -2.10914945602417, | |
| "logits/rejected": -2.3385162353515625, | |
| "logps/chosen": -1.6078799962997437, | |
| "logps/rejected": -1307.208740234375, | |
| "loss": 0.2257, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.2162178009748459, | |
| "rewards/margins": 12.54298210144043, | |
| "rewards/rejected": -12.326765060424805, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.3995505056811087, | |
| "grad_norm": 0.0267333984375, | |
| "learning_rate": 3.7537736997372833e-06, | |
| "logits/chosen": -2.1722114086151123, | |
| "logits/rejected": -2.3555681705474854, | |
| "logps/chosen": -1.133063793182373, | |
| "logps/rejected": -1113.764404296875, | |
| "loss": 0.2263, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21135945618152618, | |
| "rewards/margins": 10.682828903198242, | |
| "rewards/rejected": -10.471468925476074, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.4020476963416157, | |
| "grad_norm": 0.020751953125, | |
| "learning_rate": 3.734867302201038e-06, | |
| "logits/chosen": -2.2481324672698975, | |
| "logits/rejected": -2.4178614616394043, | |
| "logps/chosen": -0.7748688459396362, | |
| "logps/rejected": -1153.1929931640625, | |
| "loss": 0.2264, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.2061961144208908, | |
| "rewards/margins": 11.231634140014648, | |
| "rewards/rejected": -11.02543830871582, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.4045448870021226, | |
| "grad_norm": 0.046630859375, | |
| "learning_rate": 3.7158670212752666e-06, | |
| "logits/chosen": -2.158440113067627, | |
| "logits/rejected": -2.3695878982543945, | |
| "logps/chosen": -0.685897946357727, | |
| "logps/rejected": -1294.4326171875, | |
| "loss": 0.227, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.2151576578617096, | |
| "rewards/margins": 12.627668380737305, | |
| "rewards/rejected": -12.412511825561523, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.40704207766262956, | |
| "grad_norm": 0.015869140625, | |
| "learning_rate": 3.696774301496376e-06, | |
| "logits/chosen": -2.2252297401428223, | |
| "logits/rejected": -2.4217424392700195, | |
| "logps/chosen": -0.6748331785202026, | |
| "logps/rejected": -1261.10009765625, | |
| "loss": 0.2248, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21283042430877686, | |
| "rewards/margins": 12.33554458618164, | |
| "rewards/rejected": -12.122715950012207, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.4095392683231365, | |
| "grad_norm": 0.0283203125, | |
| "learning_rate": 3.677590594428629e-06, | |
| "logits/chosen": -2.159726619720459, | |
| "logits/rejected": -2.3402228355407715, | |
| "logps/chosen": -0.9869475364685059, | |
| "logps/rejected": -1201.0703125, | |
| "loss": 0.2279, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.20935773849487305, | |
| "rewards/margins": 11.699995994567871, | |
| "rewards/rejected": -11.490636825561523, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.4120364589836434, | |
| "grad_norm": 0.07470703125, | |
| "learning_rate": 3.658317358553794e-06, | |
| "logits/chosen": -2.1311771869659424, | |
| "logits/rejected": -2.3283205032348633, | |
| "logps/chosen": -0.7873401045799255, | |
| "logps/rejected": -1318.947265625, | |
| "loss": 0.2256, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.20913653075695038, | |
| "rewards/margins": 12.813528060913086, | |
| "rewards/rejected": -12.604392051696777, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.41453364964415035, | |
| "grad_norm": 0.06494140625, | |
| "learning_rate": 3.638956059160252e-06, | |
| "logits/chosen": -2.180502414703369, | |
| "logits/rejected": -2.3862075805664062, | |
| "logps/chosen": -1.0054365396499634, | |
| "logps/rejected": -1342.7799072265625, | |
| "loss": 0.2253, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21474532783031464, | |
| "rewards/margins": 13.147130966186523, | |
| "rewards/rejected": -12.932388305664062, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.41703084030465726, | |
| "grad_norm": 0.03369140625, | |
| "learning_rate": 3.6195081682315972e-06, | |
| "logits/chosen": -2.2029502391815186, | |
| "logits/rejected": -2.3754451274871826, | |
| "logps/chosen": -1.1696422100067139, | |
| "logps/rejected": -1324.997802734375, | |
| "loss": 0.2252, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.20706875622272491, | |
| "rewards/margins": 12.972146987915039, | |
| "rewards/rejected": -12.765077590942383, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.41952803096516417, | |
| "grad_norm": 0.05126953125, | |
| "learning_rate": 3.5999751643347342e-06, | |
| "logits/chosen": -2.126647472381592, | |
| "logits/rejected": -2.32842755317688, | |
| "logps/chosen": -1.3129024505615234, | |
| "logps/rejected": -1431.062255859375, | |
| "loss": 0.2246, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.2164861261844635, | |
| "rewards/margins": 13.894182205200195, | |
| "rewards/rejected": -13.677694320678711, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.42202522162567113, | |
| "grad_norm": 0.05126953125, | |
| "learning_rate": 3.5803585325074536e-06, | |
| "logits/chosen": -2.1573426723480225, | |
| "logits/rejected": -2.3461415767669678, | |
| "logps/chosen": -0.5849089622497559, | |
| "logps/rejected": -1369.0498046875, | |
| "loss": 0.2265, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.20605847239494324, | |
| "rewards/margins": 13.385258674621582, | |
| "rewards/rejected": -13.179201126098633, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.42452241228617804, | |
| "grad_norm": 0.041015625, | |
| "learning_rate": 3.5606597641455387e-06, | |
| "logits/chosen": -2.201714515686035, | |
| "logits/rejected": -2.3846235275268555, | |
| "logps/chosen": -1.2365072965621948, | |
| "logps/rejected": -1268.500732421875, | |
| "loss": 0.2273, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.20618323981761932, | |
| "rewards/margins": 12.352148056030273, | |
| "rewards/rejected": -12.145965576171875, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.427019602946685, | |
| "grad_norm": 0.6015625, | |
| "learning_rate": 3.540880356889376e-06, | |
| "logits/chosen": -2.204244375228882, | |
| "logits/rejected": -2.37742280960083, | |
| "logps/chosen": -1.9021589756011963, | |
| "logps/rejected": -1228.02685546875, | |
| "loss": 0.228, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.1988053023815155, | |
| "rewards/margins": 11.881242752075195, | |
| "rewards/rejected": -11.682435035705566, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.4295167936071919, | |
| "grad_norm": 0.05712890625, | |
| "learning_rate": 3.5210218145100934e-06, | |
| "logits/chosen": -2.1249117851257324, | |
| "logits/rejected": -2.343653917312622, | |
| "logps/chosen": -0.9779669642448425, | |
| "logps/rejected": -1107.069580078125, | |
| "loss": 0.2291, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.20971660315990448, | |
| "rewards/margins": 10.748934745788574, | |
| "rewards/rejected": -10.53921890258789, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.4320139842676988, | |
| "grad_norm": 0.04931640625, | |
| "learning_rate": 3.5010856467952335e-06, | |
| "logits/chosen": -2.135411262512207, | |
| "logits/rejected": -2.3283915519714355, | |
| "logps/chosen": -1.680784821510315, | |
| "logps/rejected": -1203.44873046875, | |
| "loss": 0.2277, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21119749546051025, | |
| "rewards/margins": 11.60279655456543, | |
| "rewards/rejected": -11.391599655151367, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.4345111749282058, | |
| "grad_norm": 0.061279296875, | |
| "learning_rate": 3.4810733694339687e-06, | |
| "logits/chosen": -2.227553367614746, | |
| "logits/rejected": -2.4453303813934326, | |
| "logps/chosen": -1.1945085525512695, | |
| "logps/rejected": -1365.62158203125, | |
| "loss": 0.2255, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21306195855140686, | |
| "rewards/margins": 13.283732414245605, | |
| "rewards/rejected": -13.070669174194336, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.4370083655887127, | |
| "grad_norm": 0.026123046875, | |
| "learning_rate": 3.4609865039018676e-06, | |
| "logits/chosen": -2.2143800258636475, | |
| "logits/rejected": -2.38647198677063, | |
| "logps/chosen": -0.3982168138027191, | |
| "logps/rejected": -1256.0924072265625, | |
| "loss": 0.2282, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.20522812008857727, | |
| "rewards/margins": 12.254903793334961, | |
| "rewards/rejected": -12.049676895141602, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.4395055562492196, | |
| "grad_norm": 0.017578125, | |
| "learning_rate": 3.4408265773452226e-06, | |
| "logits/chosen": -2.132845401763916, | |
| "logits/rejected": -2.32383394241333, | |
| "logps/chosen": -0.7928985953330994, | |
| "logps/rejected": -1260.4219970703125, | |
| "loss": 0.2281, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21432673931121826, | |
| "rewards/margins": 12.288119316101074, | |
| "rewards/rejected": -12.07379150390625, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.4420027469097266, | |
| "grad_norm": 0.027587890625, | |
| "learning_rate": 3.420595122464942e-06, | |
| "logits/chosen": -2.2310843467712402, | |
| "logits/rejected": -2.43049693107605, | |
| "logps/chosen": -1.0165212154388428, | |
| "logps/rejected": -1248.940673828125, | |
| "loss": 0.2265, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.20808283984661102, | |
| "rewards/margins": 12.158212661743164, | |
| "rewards/rejected": -11.950130462646484, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.4444999375702335, | |
| "grad_norm": 0.05029296875, | |
| "learning_rate": 3.4002936774000284e-06, | |
| "logits/chosen": -2.129657030105591, | |
| "logits/rejected": -2.3626723289489746, | |
| "logps/chosen": -0.534063994884491, | |
| "logps/rejected": -1597.343017578125, | |
| "loss": 0.2245, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21577997505664825, | |
| "rewards/margins": 15.636571884155273, | |
| "rewards/rejected": -15.420791625976562, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.4469971282307404, | |
| "grad_norm": 0.02587890625, | |
| "learning_rate": 3.3799237856106348e-06, | |
| "logits/chosen": -2.1293628215789795, | |
| "logits/rejected": -2.3366832733154297, | |
| "logps/chosen": -0.6109465956687927, | |
| "logps/rejected": -1318.239990234375, | |
| "loss": 0.2255, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.2080315351486206, | |
| "rewards/margins": 12.848733901977539, | |
| "rewards/rejected": -12.640703201293945, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.44949431889124736, | |
| "grad_norm": 0.060546875, | |
| "learning_rate": 3.35948699576072e-06, | |
| "logits/chosen": -2.0792922973632812, | |
| "logits/rejected": -2.285391330718994, | |
| "logps/chosen": -0.9549906849861145, | |
| "logps/rejected": -1534.51953125, | |
| "loss": 0.2244, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21579799056053162, | |
| "rewards/margins": 14.997169494628906, | |
| "rewards/rejected": -14.7813720703125, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.45199150955175427, | |
| "grad_norm": 0.09033203125, | |
| "learning_rate": 3.3389848616003085e-06, | |
| "logits/chosen": -2.169448137283325, | |
| "logits/rejected": -2.34112286567688, | |
| "logps/chosen": -1.1561418771743774, | |
| "logps/rejected": -1331.464111328125, | |
| "loss": 0.2289, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.20999138057231903, | |
| "rewards/margins": 12.991872787475586, | |
| "rewards/rejected": -12.781880378723145, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.45448870021226123, | |
| "grad_norm": 0.024169921875, | |
| "learning_rate": 3.3184189418473674e-06, | |
| "logits/chosen": -2.0690829753875732, | |
| "logits/rejected": -2.2553791999816895, | |
| "logps/chosen": -0.737138032913208, | |
| "logps/rejected": -1278.2681884765625, | |
| "loss": 0.2266, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.20780067145824432, | |
| "rewards/margins": 12.481771469116211, | |
| "rewards/rejected": -12.273969650268555, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.45698589087276814, | |
| "grad_norm": 0.0810546875, | |
| "learning_rate": 3.2977908000692925e-06, | |
| "logits/chosen": -2.1408801078796387, | |
| "logits/rejected": -2.3243911266326904, | |
| "logps/chosen": -1.5268166065216064, | |
| "logps/rejected": -1405.22412109375, | |
| "loss": 0.2274, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.20781424641609192, | |
| "rewards/margins": 13.740381240844727, | |
| "rewards/rejected": -13.532565116882324, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.45948308153327505, | |
| "grad_norm": 0.04736328125, | |
| "learning_rate": 3.2771020045640435e-06, | |
| "logits/chosen": -2.286168336868286, | |
| "logits/rejected": -2.4684412479400635, | |
| "logps/chosen": -0.6708983182907104, | |
| "logps/rejected": -1134.7979736328125, | |
| "loss": 0.2259, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21890632808208466, | |
| "rewards/margins": 11.039240837097168, | |
| "rewards/rejected": -10.820335388183594, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.461980272193782, | |
| "grad_norm": 0.099609375, | |
| "learning_rate": 3.256354128240907e-06, | |
| "logits/chosen": -2.06745982170105, | |
| "logits/rejected": -2.248892307281494, | |
| "logps/chosen": -1.6344282627105713, | |
| "logps/rejected": -1263.974853515625, | |
| "loss": 0.226, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21318969130516052, | |
| "rewards/margins": 12.202125549316406, | |
| "rewards/rejected": -11.988935470581055, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.4644774628542889, | |
| "grad_norm": 0.0654296875, | |
| "learning_rate": 3.235548748500914e-06, | |
| "logits/chosen": -2.3071300983428955, | |
| "logits/rejected": -2.500091314315796, | |
| "logps/chosen": -1.0427045822143555, | |
| "logps/rejected": -1357.378662109375, | |
| "loss": 0.2275, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.20457443594932556, | |
| "rewards/margins": 13.286227226257324, | |
| "rewards/rejected": -13.081652641296387, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.46697465351479583, | |
| "grad_norm": 0.04248046875, | |
| "learning_rate": 3.214687447116913e-06, | |
| "logits/chosen": -2.10600209236145, | |
| "logits/rejected": -2.302873373031616, | |
| "logps/chosen": -0.6546305418014526, | |
| "logps/rejected": -1224.43359375, | |
| "loss": 0.2272, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.20871946215629578, | |
| "rewards/margins": 11.745490074157715, | |
| "rewards/rejected": -11.536770820617676, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.4694718441753028, | |
| "grad_norm": 0.01104736328125, | |
| "learning_rate": 3.193771810113313e-06, | |
| "logits/chosen": -2.1570992469787598, | |
| "logits/rejected": -2.384364604949951, | |
| "logps/chosen": -1.154052495956421, | |
| "logps/rejected": -1359.59619140625, | |
| "loss": 0.2249, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21566633880138397, | |
| "rewards/margins": 13.259126663208008, | |
| "rewards/rejected": -13.043458938598633, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.4719690348358097, | |
| "grad_norm": 0.0174560546875, | |
| "learning_rate": 3.1728034276455032e-06, | |
| "logits/chosen": -2.138918399810791, | |
| "logits/rejected": -2.335463047027588, | |
| "logps/chosen": -0.595456600189209, | |
| "logps/rejected": -1286.499267578125, | |
| "loss": 0.2265, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21340537071228027, | |
| "rewards/margins": 12.50808048248291, | |
| "rewards/rejected": -12.294673919677734, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.4744662254963166, | |
| "grad_norm": 0.0191650390625, | |
| "learning_rate": 3.1517838938789597e-06, | |
| "logits/chosen": -2.1312789916992188, | |
| "logits/rejected": -2.3574845790863037, | |
| "logps/chosen": -1.0333608388900757, | |
| "logps/rejected": -1402.2928466796875, | |
| "loss": 0.2258, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21823573112487793, | |
| "rewards/margins": 13.426950454711914, | |
| "rewards/rejected": -13.208715438842773, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.4769634161568236, | |
| "grad_norm": 0.021240234375, | |
| "learning_rate": 3.130714806868041e-06, | |
| "logits/chosen": -2.1018803119659424, | |
| "logits/rejected": -2.2899601459503174, | |
| "logps/chosen": -1.5672905445098877, | |
| "logps/rejected": -1282.0211181640625, | |
| "loss": 0.2247, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.2161625623703003, | |
| "rewards/margins": 12.432838439941406, | |
| "rewards/rejected": -12.216676712036133, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.4794606068173305, | |
| "grad_norm": 0.03369140625, | |
| "learning_rate": 3.1095977684344976e-06, | |
| "logits/chosen": -2.1870434284210205, | |
| "logits/rejected": -2.3968632221221924, | |
| "logps/chosen": -0.9621860384941101, | |
| "logps/rejected": -1362.8802490234375, | |
| "loss": 0.2252, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21506325900554657, | |
| "rewards/margins": 13.313095092773438, | |
| "rewards/rejected": -13.098034858703613, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.48195779747783746, | |
| "grad_norm": 0.052978515625, | |
| "learning_rate": 3.0884343840456874e-06, | |
| "logits/chosen": -2.2485427856445312, | |
| "logits/rejected": -2.4523234367370605, | |
| "logps/chosen": -0.8971269726753235, | |
| "logps/rejected": -1507.810791015625, | |
| "loss": 0.226, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.20877547562122345, | |
| "rewards/margins": 14.738133430480957, | |
| "rewards/rejected": -14.529356002807617, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.48445498813834437, | |
| "grad_norm": 0.07568359375, | |
| "learning_rate": 3.0672262626925174e-06, | |
| "logits/chosen": -2.148587942123413, | |
| "logits/rejected": -2.359325408935547, | |
| "logps/chosen": -2.250260353088379, | |
| "logps/rejected": -1421.3468017578125, | |
| "loss": 0.2252, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.22058424353599548, | |
| "rewards/margins": 13.836527824401855, | |
| "rewards/rejected": -13.615945816040039, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.4869521787988513, | |
| "grad_norm": 0.06640625, | |
| "learning_rate": 3.0459750167671147e-06, | |
| "logits/chosen": -2.1717689037323, | |
| "logits/rejected": -2.403097629547119, | |
| "logps/chosen": -1.1346304416656494, | |
| "logps/rejected": -1519.8033447265625, | |
| "loss": 0.2253, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21193809807300568, | |
| "rewards/margins": 14.729626655578613, | |
| "rewards/rejected": -14.517687797546387, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.48944936945935824, | |
| "grad_norm": 0.12890625, | |
| "learning_rate": 3.024682261940247e-06, | |
| "logits/chosen": -2.1400859355926514, | |
| "logits/rejected": -2.3196842670440674, | |
| "logps/chosen": -1.9256393909454346, | |
| "logps/rejected": -1212.4700927734375, | |
| "loss": 0.2259, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21558912098407745, | |
| "rewards/margins": 11.704329490661621, | |
| "rewards/rejected": -11.488740921020508, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.49194656011986515, | |
| "grad_norm": 0.053466796875, | |
| "learning_rate": 3.0033496170384803e-06, | |
| "logits/chosen": -2.2003872394561768, | |
| "logits/rejected": -2.384770154953003, | |
| "logps/chosen": -0.6797516345977783, | |
| "logps/rejected": -1223.4056396484375, | |
| "loss": 0.2273, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.20655830204486847, | |
| "rewards/margins": 11.95020866394043, | |
| "rewards/rejected": -11.743650436401367, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 0.49444375078037206, | |
| "grad_norm": 0.05126953125, | |
| "learning_rate": 2.9819787039211068e-06, | |
| "logits/chosen": -2.1409530639648438, | |
| "logits/rejected": -2.3441128730773926, | |
| "logps/chosen": -1.6590759754180908, | |
| "logps/rejected": -1320.5748291015625, | |
| "loss": 0.225, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21551513671875, | |
| "rewards/margins": 12.755599021911621, | |
| "rewards/rejected": -12.540084838867188, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.496940941440879, | |
| "grad_norm": 0.015869140625, | |
| "learning_rate": 2.960571147356845e-06, | |
| "logits/chosen": -2.2252392768859863, | |
| "logits/rejected": -2.4482040405273438, | |
| "logps/chosen": -0.6751580238342285, | |
| "logps/rejected": -1514.2879638671875, | |
| "loss": 0.2233, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.22100117802619934, | |
| "rewards/margins": 14.839349746704102, | |
| "rewards/rejected": -14.618349075317383, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 0.49943813210138593, | |
| "grad_norm": 0.11376953125, | |
| "learning_rate": 2.9391285749003046e-06, | |
| "logits/chosen": -2.1313652992248535, | |
| "logits/rejected": -2.3276991844177246, | |
| "logps/chosen": -1.28163743019104, | |
| "logps/rejected": -1614.152099609375, | |
| "loss": 0.2252, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21389129757881165, | |
| "rewards/margins": 15.772817611694336, | |
| "rewards/rejected": -15.558927536010742, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.49943813210138593, | |
| "eval_logits/chosen": -2.568960428237915, | |
| "eval_logits/rejected": -2.656001329421997, | |
| "eval_logps/chosen": -0.1526380479335785, | |
| "eval_logps/rejected": -643.470458984375, | |
| "eval_loss": 0.2215292751789093, | |
| "eval_rewards/accuracies": 1.0, | |
| "eval_rewards/chosen": 0.25760865211486816, | |
| "eval_rewards/margins": 6.243593215942383, | |
| "eval_rewards/rejected": -5.985984802246094, | |
| "eval_runtime": 0.6593, | |
| "eval_samples_per_second": 7.584, | |
| "eval_steps_per_second": 4.551, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.5019353227618929, | |
| "grad_norm": 0.072265625, | |
| "learning_rate": 2.9176526167682543e-06, | |
| "logits/chosen": -2.0913753509521484, | |
| "logits/rejected": -2.273857593536377, | |
| "logps/chosen": -0.7355623841285706, | |
| "logps/rejected": -1363.037841796875, | |
| "loss": 0.228, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.2061166763305664, | |
| "rewards/margins": 13.315282821655273, | |
| "rewards/rejected": -13.109164237976074, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 0.5044325134223998, | |
| "grad_norm": 0.0242919921875, | |
| "learning_rate": 2.8961449057156775e-06, | |
| "logits/chosen": -2.1776702404022217, | |
| "logits/rejected": -2.3788368701934814, | |
| "logps/chosen": -1.159735918045044, | |
| "logps/rejected": -1370.439697265625, | |
| "loss": 0.2245, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21482279896736145, | |
| "rewards/margins": 13.327527046203613, | |
| "rewards/rejected": -13.112703323364258, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 0.5069297040829067, | |
| "grad_norm": 0.0654296875, | |
| "learning_rate": 2.874607076911642e-06, | |
| "logits/chosen": -2.1823270320892334, | |
| "logits/rejected": -2.400944471359253, | |
| "logps/chosen": -1.355530023574829, | |
| "logps/rejected": -1275.2886962890625, | |
| "loss": 0.2253, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.2216695249080658, | |
| "rewards/margins": 12.436738967895508, | |
| "rewards/rejected": -12.215067863464355, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 0.5094268947434136, | |
| "grad_norm": 0.06689453125, | |
| "learning_rate": 2.8530407678149806e-06, | |
| "logits/chosen": -2.1733579635620117, | |
| "logits/rejected": -2.3787028789520264, | |
| "logps/chosen": -2.122178554534912, | |
| "logps/rejected": -1217.6248779296875, | |
| "loss": 0.2275, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21247439086437225, | |
| "rewards/margins": 11.738574028015137, | |
| "rewards/rejected": -11.526100158691406, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.5119240854039205, | |
| "grad_norm": 0.0164794921875, | |
| "learning_rate": 2.8314476180498003e-06, | |
| "logits/chosen": -2.010568618774414, | |
| "logits/rejected": -2.1947145462036133, | |
| "logps/chosen": -0.8790448904037476, | |
| "logps/rejected": -1320.770263671875, | |
| "loss": 0.2275, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.20843760669231415, | |
| "rewards/margins": 12.884744644165039, | |
| "rewards/rejected": -12.67630672454834, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.5144212760644276, | |
| "grad_norm": 0.037109375, | |
| "learning_rate": 2.8098292692808253e-06, | |
| "logits/chosen": -2.1951942443847656, | |
| "logits/rejected": -2.3474528789520264, | |
| "logps/chosen": -0.8600829839706421, | |
| "logps/rejected": -1061.1048583984375, | |
| "loss": 0.2279, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.20944657921791077, | |
| "rewards/margins": 10.36804485321045, | |
| "rewards/rejected": -10.158597946166992, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 0.5169184667249345, | |
| "grad_norm": 0.0224609375, | |
| "learning_rate": 2.7881873650885904e-06, | |
| "logits/chosen": -2.1963181495666504, | |
| "logits/rejected": -2.3679440021514893, | |
| "logps/chosen": -0.8357653617858887, | |
| "logps/rejected": -1268.226318359375, | |
| "loss": 0.2271, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21145395934581757, | |
| "rewards/margins": 12.395639419555664, | |
| "rewards/rejected": -12.184186935424805, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 0.5194156573854414, | |
| "grad_norm": 0.03955078125, | |
| "learning_rate": 2.7665235508444772e-06, | |
| "logits/chosen": -2.131880044937134, | |
| "logits/rejected": -2.329930067062378, | |
| "logps/chosen": -0.8339768648147583, | |
| "logps/rejected": -1511.36962890625, | |
| "loss": 0.2278, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.2037159651517868, | |
| "rewards/margins": 14.78296184539795, | |
| "rewards/rejected": -14.579244613647461, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 0.5219128480459483, | |
| "grad_norm": 0.1123046875, | |
| "learning_rate": 2.7448394735856275e-06, | |
| "logits/chosen": -2.0990092754364014, | |
| "logits/rejected": -2.317046642303467, | |
| "logps/chosen": -0.900246798992157, | |
| "logps/rejected": -1560.1123046875, | |
| "loss": 0.2273, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.20990662276744843, | |
| "rewards/margins": 15.233263969421387, | |
| "rewards/rejected": -15.023355484008789, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 0.5244100387064552, | |
| "grad_norm": 0.05810546875, | |
| "learning_rate": 2.723136781889722e-06, | |
| "logits/chosen": -2.221381664276123, | |
| "logits/rejected": -2.4073383808135986, | |
| "logps/chosen": -1.555213451385498, | |
| "logps/rejected": -1313.25439453125, | |
| "loss": 0.2281, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21150963008403778, | |
| "rewards/margins": 12.850160598754883, | |
| "rewards/rejected": -12.638651847839355, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.5269072293669622, | |
| "grad_norm": 0.031494140625, | |
| "learning_rate": 2.7014171257496414e-06, | |
| "logits/chosen": -2.224299669265747, | |
| "logits/rejected": -2.4082083702087402, | |
| "logps/chosen": -1.5661276578903198, | |
| "logps/rejected": -1288.989013671875, | |
| "loss": 0.2263, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.2089545726776123, | |
| "rewards/margins": 12.412330627441406, | |
| "rewards/rejected": -12.203374862670898, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 0.5294044200274691, | |
| "grad_norm": 0.044189453125, | |
| "learning_rate": 2.6796821564480237e-06, | |
| "logits/chosen": -2.143993854522705, | |
| "logits/rejected": -2.3330225944519043, | |
| "logps/chosen": -1.3014509677886963, | |
| "logps/rejected": -1159.53271484375, | |
| "loss": 0.2263, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21552510559558868, | |
| "rewards/margins": 11.207192420959473, | |
| "rewards/rejected": -10.991667747497559, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 0.531901610687976, | |
| "grad_norm": 0.032958984375, | |
| "learning_rate": 2.6579335264317253e-06, | |
| "logits/chosen": -2.2805047035217285, | |
| "logits/rejected": -2.4840075969696045, | |
| "logps/chosen": -0.6564453840255737, | |
| "logps/rejected": -1376.549560546875, | |
| "loss": 0.2269, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.20875303447246552, | |
| "rewards/margins": 13.3289794921875, | |
| "rewards/rejected": -13.120226860046387, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 0.5343988013484829, | |
| "grad_norm": 0.02587890625, | |
| "learning_rate": 2.6361728891861843e-06, | |
| "logits/chosen": -2.044534206390381, | |
| "logits/rejected": -2.263455629348755, | |
| "logps/chosen": -2.359926462173462, | |
| "logps/rejected": -1182.7542724609375, | |
| "loss": 0.2261, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21868690848350525, | |
| "rewards/margins": 11.231551170349121, | |
| "rewards/rejected": -11.01286506652832, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 0.5368959920089899, | |
| "grad_norm": 0.0235595703125, | |
| "learning_rate": 2.614401899109716e-06, | |
| "logits/chosen": -2.2184996604919434, | |
| "logits/rejected": -2.4115943908691406, | |
| "logps/chosen": -0.7188009023666382, | |
| "logps/rejected": -1362.302490234375, | |
| "loss": 0.2252, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.20849958062171936, | |
| "rewards/margins": 13.323092460632324, | |
| "rewards/rejected": -13.114593505859375, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.5393931826694968, | |
| "grad_norm": 0.023681640625, | |
| "learning_rate": 2.5926222113877282e-06, | |
| "logits/chosen": -2.2279531955718994, | |
| "logits/rejected": -2.4470245838165283, | |
| "logps/chosen": -0.8932285308837891, | |
| "logps/rejected": -1380.791748046875, | |
| "loss": 0.2261, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.20549210906028748, | |
| "rewards/margins": 13.206730842590332, | |
| "rewards/rejected": -13.001237869262695, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 0.5418903733300038, | |
| "grad_norm": 0.09619140625, | |
| "learning_rate": 2.570835481866889e-06, | |
| "logits/chosen": -2.122584819793701, | |
| "logits/rejected": -2.3029303550720215, | |
| "logps/chosen": -0.6316767334938049, | |
| "logps/rejected": -1331.388916015625, | |
| "loss": 0.2263, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.20623505115509033, | |
| "rewards/margins": 13.001462936401367, | |
| "rewards/rejected": -12.795228958129883, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 0.5443875639905107, | |
| "grad_norm": 0.04248046875, | |
| "learning_rate": 2.5490433669292337e-06, | |
| "logits/chosen": -2.044675350189209, | |
| "logits/rejected": -2.251300811767578, | |
| "logps/chosen": -0.7981548309326172, | |
| "logps/rejected": -1485.2850341796875, | |
| "loss": 0.2262, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.2164611518383026, | |
| "rewards/margins": 14.53178596496582, | |
| "rewards/rejected": -14.315322875976562, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 0.5468847546510176, | |
| "grad_norm": 0.038330078125, | |
| "learning_rate": 2.527247523366232e-06, | |
| "logits/chosen": -2.2029881477355957, | |
| "logits/rejected": -2.4012579917907715, | |
| "logps/chosen": -1.3100454807281494, | |
| "logps/rejected": -1426.16357421875, | |
| "loss": 0.2252, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.2108200490474701, | |
| "rewards/margins": 13.932962417602539, | |
| "rewards/rejected": -13.722142219543457, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 0.5493819453115245, | |
| "grad_norm": 0.02978515625, | |
| "learning_rate": 2.5054496082528336e-06, | |
| "logits/chosen": -2.263662576675415, | |
| "logits/rejected": -2.4767444133758545, | |
| "logps/chosen": -0.6738319993019104, | |
| "logps/rejected": -1380.506103515625, | |
| "loss": 0.2246, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21471872925758362, | |
| "rewards/margins": 13.528160095214844, | |
| "rewards/rejected": -13.313441276550293, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.5518791359720314, | |
| "grad_norm": 0.03955078125, | |
| "learning_rate": 2.483651278821481e-06, | |
| "logits/chosen": -2.2110023498535156, | |
| "logits/rejected": -2.4015591144561768, | |
| "logps/chosen": -1.228434443473816, | |
| "logps/rejected": -1266.2230224609375, | |
| "loss": 0.2262, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.20852570235729218, | |
| "rewards/margins": 12.340888977050781, | |
| "rewards/rejected": -12.13236141204834, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 0.5543763266325384, | |
| "grad_norm": 0.0238037109375, | |
| "learning_rate": 2.4618541923361166e-06, | |
| "logits/chosen": -2.3842873573303223, | |
| "logits/rejected": -2.558562994003296, | |
| "logps/chosen": -1.321533203125, | |
| "logps/rejected": -1156.223876953125, | |
| "loss": 0.2268, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.20611576735973358, | |
| "rewards/margins": 11.165016174316406, | |
| "rewards/rejected": -10.958898544311523, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 0.5568735172930454, | |
| "grad_norm": 0.06005859375, | |
| "learning_rate": 2.4400600059661836e-06, | |
| "logits/chosen": -2.069483757019043, | |
| "logits/rejected": -2.31620717048645, | |
| "logps/chosen": -1.093656301498413, | |
| "logps/rejected": -1508.9503173828125, | |
| "loss": 0.2254, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.2100195437669754, | |
| "rewards/margins": 14.735700607299805, | |
| "rewards/rejected": -14.52568244934082, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 0.5593707079535523, | |
| "grad_norm": 0.009765625, | |
| "learning_rate": 2.41827037666064e-06, | |
| "logits/chosen": -2.2314319610595703, | |
| "logits/rejected": -2.4116859436035156, | |
| "logps/chosen": -0.6631449460983276, | |
| "logps/rejected": -1216.8101806640625, | |
| "loss": 0.2259, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.2116110622882843, | |
| "rewards/margins": 11.8889799118042, | |
| "rewards/rejected": -11.677370071411133, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 0.5618678986140592, | |
| "grad_norm": 0.035888671875, | |
| "learning_rate": 2.396486961021983e-06, | |
| "logits/chosen": -2.156050443649292, | |
| "logits/rejected": -2.355743885040283, | |
| "logps/chosen": -0.5853773951530457, | |
| "logps/rejected": -1307.397705078125, | |
| "loss": 0.2272, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21641604602336884, | |
| "rewards/margins": 12.776580810546875, | |
| "rewards/rejected": -12.560165405273438, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.5643650892745661, | |
| "grad_norm": 0.0247802734375, | |
| "learning_rate": 2.3747114151802993e-06, | |
| "logits/chosen": -2.2995388507843018, | |
| "logits/rejected": -2.4979677200317383, | |
| "logps/chosen": -1.0234979391098022, | |
| "logps/rejected": -1314.0380859375, | |
| "loss": 0.2272, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.2088872194290161, | |
| "rewards/margins": 12.864030838012695, | |
| "rewards/rejected": -12.655143737792969, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 0.566862279935073, | |
| "grad_norm": 0.04345703125, | |
| "learning_rate": 2.352945394667363e-06, | |
| "logits/chosen": -2.087890386581421, | |
| "logits/rejected": -2.308422803878784, | |
| "logps/chosen": -0.9035698771476746, | |
| "logps/rejected": -1510.1090087890625, | |
| "loss": 0.2248, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.2111314833164215, | |
| "rewards/margins": 14.675390243530273, | |
| "rewards/rejected": -14.464259147644043, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 0.56935947059558, | |
| "grad_norm": 0.126953125, | |
| "learning_rate": 2.3311905542907627e-06, | |
| "logits/chosen": -2.234039545059204, | |
| "logits/rejected": -2.428889751434326, | |
| "logps/chosen": -0.797686755657196, | |
| "logps/rejected": -1220.7269287109375, | |
| "loss": 0.2268, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.2120717316865921, | |
| "rewards/margins": 11.918030738830566, | |
| "rewards/rejected": -11.70595932006836, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 0.5718566612560869, | |
| "grad_norm": 0.06396484375, | |
| "learning_rate": 2.30944854800809e-06, | |
| "logits/chosen": -2.1873550415039062, | |
| "logits/rejected": -2.3636820316314697, | |
| "logps/chosen": -0.8641906976699829, | |
| "logps/rejected": -1375.240478515625, | |
| "loss": 0.2275, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21045894920825958, | |
| "rewards/margins": 13.458274841308594, | |
| "rewards/rejected": -13.247815132141113, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 0.5743538519165938, | |
| "grad_norm": 0.0224609375, | |
| "learning_rate": 2.287721028801204e-06, | |
| "logits/chosen": -2.147500991821289, | |
| "logits/rejected": -2.3285794258117676, | |
| "logps/chosen": -1.5540382862091064, | |
| "logps/rejected": -1261.9169921875, | |
| "loss": 0.2262, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.2090359479188919, | |
| "rewards/margins": 12.258954048156738, | |
| "rewards/rejected": -12.049917221069336, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.5768510425771007, | |
| "grad_norm": 0.224609375, | |
| "learning_rate": 2.26600964855055e-06, | |
| "logits/chosen": -2.2112767696380615, | |
| "logits/rejected": -2.387683868408203, | |
| "logps/chosen": -1.0878078937530518, | |
| "logps/rejected": -1259.334716796875, | |
| "loss": 0.2262, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.2047530710697174, | |
| "rewards/margins": 12.326273918151855, | |
| "rewards/rejected": -12.121520042419434, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 0.5793482332376076, | |
| "grad_norm": 0.033203125, | |
| "learning_rate": 2.244316057909573e-06, | |
| "logits/chosen": -2.179072856903076, | |
| "logits/rejected": -2.3518600463867188, | |
| "logps/chosen": -0.5903832912445068, | |
| "logps/rejected": -1252.9005126953125, | |
| "loss": 0.2288, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.20970389246940613, | |
| "rewards/margins": 12.249414443969727, | |
| "rewards/rejected": -12.039710998535156, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 0.5818454238981147, | |
| "grad_norm": 0.043212890625, | |
| "learning_rate": 2.2226419061792282e-06, | |
| "logits/chosen": -2.2571616172790527, | |
| "logits/rejected": -2.4548702239990234, | |
| "logps/chosen": -0.747587263584137, | |
| "logps/rejected": -1403.0311279296875, | |
| "loss": 0.2256, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.20638947188854218, | |
| "rewards/margins": 13.71589183807373, | |
| "rewards/rejected": -13.509503364562988, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 0.5843426145586216, | |
| "grad_norm": 0.0079345703125, | |
| "learning_rate": 2.200988841182589e-06, | |
| "logits/chosen": -2.1915557384490967, | |
| "logits/rejected": -2.3925371170043945, | |
| "logps/chosen": -0.653125524520874, | |
| "logps/rejected": -1481.6878662109375, | |
| "loss": 0.2271, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.20729561150074005, | |
| "rewards/margins": 14.506765365600586, | |
| "rewards/rejected": -14.299470901489258, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 0.5868398052191285, | |
| "grad_norm": 0.0286865234375, | |
| "learning_rate": 2.179358509139559e-06, | |
| "logits/chosen": -2.149214267730713, | |
| "logits/rejected": -2.344883680343628, | |
| "logps/chosen": -2.6051526069641113, | |
| "logps/rejected": -1142.56201171875, | |
| "loss": 0.2253, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21485964953899384, | |
| "rewards/margins": 11.022435188293457, | |
| "rewards/rejected": -10.807573318481445, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.5893369958796354, | |
| "grad_norm": 0.041748046875, | |
| "learning_rate": 2.1577525545417254e-06, | |
| "logits/chosen": -2.1596992015838623, | |
| "logits/rejected": -2.3585286140441895, | |
| "logps/chosen": -0.6524207592010498, | |
| "logps/rejected": -1219.5198974609375, | |
| "loss": 0.227, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.2145775854587555, | |
| "rewards/margins": 11.869647026062012, | |
| "rewards/rejected": -11.655069351196289, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 0.5918341865401423, | |
| "grad_norm": 0.203125, | |
| "learning_rate": 2.1361726200273293e-06, | |
| "logits/chosen": -2.247102737426758, | |
| "logits/rejected": -2.4553802013397217, | |
| "logps/chosen": -1.189576506614685, | |
| "logps/rejected": -1349.142578125, | |
| "loss": 0.2259, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21253642439842224, | |
| "rewards/margins": 13.139638900756836, | |
| "rewards/rejected": -12.927103042602539, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 0.5943313772006493, | |
| "grad_norm": 0.035888671875, | |
| "learning_rate": 2.1146203462563773e-06, | |
| "logits/chosen": -2.302658796310425, | |
| "logits/rejected": -2.4925646781921387, | |
| "logps/chosen": -0.5675852298736572, | |
| "logps/rejected": -1279.3642578125, | |
| "loss": 0.2272, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.20174381136894226, | |
| "rewards/margins": 12.505022048950195, | |
| "rewards/rejected": -12.303278923034668, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 0.5968285678611562, | |
| "grad_norm": 0.024169921875, | |
| "learning_rate": 2.0930973717859117e-06, | |
| "logits/chosen": -2.3194613456726074, | |
| "logits/rejected": -2.526947498321533, | |
| "logps/chosen": -0.6186977624893188, | |
| "logps/rejected": -1298.871826171875, | |
| "loss": 0.226, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21502625942230225, | |
| "rewards/margins": 12.666671752929688, | |
| "rewards/rejected": -12.45164680480957, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 0.5993257585216631, | |
| "grad_norm": 0.0150146484375, | |
| "learning_rate": 2.0716053329454337e-06, | |
| "logits/chosen": -2.0586659908294678, | |
| "logits/rejected": -2.262817621231079, | |
| "logps/chosen": -1.2787067890167236, | |
| "logps/rejected": -1338.0716552734375, | |
| "loss": 0.2271, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21538551151752472, | |
| "rewards/margins": 12.998420715332031, | |
| "rewards/rejected": -12.783034324645996, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.60182294918217, | |
| "grad_norm": 0.03369140625, | |
| "learning_rate": 2.0501458637124963e-06, | |
| "logits/chosen": -2.1946122646331787, | |
| "logits/rejected": -2.4308152198791504, | |
| "logps/chosen": -0.9974037408828735, | |
| "logps/rejected": -1574.956787109375, | |
| "loss": 0.2249, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21448758244514465, | |
| "rewards/margins": 15.443408012390137, | |
| "rewards/rejected": -15.228919982910156, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 0.604320139842677, | |
| "grad_norm": 0.021484375, | |
| "learning_rate": 2.0287205955884812e-06, | |
| "logits/chosen": -2.1859405040740967, | |
| "logits/rejected": -2.419334888458252, | |
| "logps/chosen": -1.4137351512908936, | |
| "logps/rejected": -1243.725830078125, | |
| "loss": 0.2266, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21700558066368103, | |
| "rewards/margins": 11.884050369262695, | |
| "rewards/rejected": -11.667045593261719, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 0.6068173305031839, | |
| "grad_norm": 0.0595703125, | |
| "learning_rate": 2.0073311574745583e-06, | |
| "logits/chosen": -2.162872791290283, | |
| "logits/rejected": -2.378561019897461, | |
| "logps/chosen": -0.7903895378112793, | |
| "logps/rejected": -1427.887939453125, | |
| "loss": 0.2243, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21638064086437225, | |
| "rewards/margins": 13.9492769241333, | |
| "rewards/rejected": -13.73289680480957, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 0.6093145211636909, | |
| "grad_norm": 0.037841796875, | |
| "learning_rate": 1.9859791755478453e-06, | |
| "logits/chosen": -2.1776349544525146, | |
| "logits/rejected": -2.3626227378845215, | |
| "logps/chosen": -1.0283732414245605, | |
| "logps/rejected": -1148.4774169921875, | |
| "loss": 0.2259, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21198368072509766, | |
| "rewards/margins": 11.212942123413086, | |
| "rewards/rejected": -11.000959396362305, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 0.6118117118241978, | |
| "grad_norm": 0.0311279296875, | |
| "learning_rate": 1.9646662731377737e-06, | |
| "logits/chosen": -2.130434989929199, | |
| "logits/rejected": -2.3274593353271484, | |
| "logps/chosen": -0.7933204770088196, | |
| "logps/rejected": -1231.31201171875, | |
| "loss": 0.2282, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.20909221470355988, | |
| "rewards/margins": 11.990662574768066, | |
| "rewards/rejected": -11.781569480895996, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.6143089024847047, | |
| "grad_norm": 0.06689453125, | |
| "learning_rate": 1.9433940706026743e-06, | |
| "logits/chosen": -2.162235736846924, | |
| "logits/rejected": -2.3636813163757324, | |
| "logps/chosen": -0.8596396446228027, | |
| "logps/rejected": -1512.643798828125, | |
| "loss": 0.2267, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21136781573295593, | |
| "rewards/margins": 14.79273796081543, | |
| "rewards/rejected": -14.581372261047363, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 0.6168060931452116, | |
| "grad_norm": 0.040283203125, | |
| "learning_rate": 1.9221641852065807e-06, | |
| "logits/chosen": -2.153958797454834, | |
| "logits/rejected": -2.322754383087158, | |
| "logps/chosen": -0.7868290543556213, | |
| "logps/rejected": -1277.087890625, | |
| "loss": 0.2275, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21162231266498566, | |
| "rewards/margins": 12.481648445129395, | |
| "rewards/rejected": -12.270025253295898, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 0.6193032838057185, | |
| "grad_norm": 0.0302734375, | |
| "learning_rate": 1.9009782309962805e-06, | |
| "logits/chosen": -2.2541210651397705, | |
| "logits/rejected": -2.451572895050049, | |
| "logps/chosen": -0.9773980379104614, | |
| "logps/rejected": -1259.029296875, | |
| "loss": 0.2243, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21588608622550964, | |
| "rewards/margins": 12.217048645019531, | |
| "rewards/rejected": -12.001164436340332, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 0.6218004744662256, | |
| "grad_norm": 0.042236328125, | |
| "learning_rate": 1.8798378186785979e-06, | |
| "logits/chosen": -2.208289623260498, | |
| "logits/rejected": -2.3975791931152344, | |
| "logps/chosen": -0.47841542959213257, | |
| "logps/rejected": -1317.6165771484375, | |
| "loss": 0.2268, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21394380927085876, | |
| "rewards/margins": 12.884051322937012, | |
| "rewards/rejected": -12.670106887817383, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 0.6242976651267325, | |
| "grad_norm": 0.0172119140625, | |
| "learning_rate": 1.8587445554979404e-06, | |
| "logits/chosen": -2.054529905319214, | |
| "logits/rejected": -2.2491745948791504, | |
| "logps/chosen": -0.9916723370552063, | |
| "logps/rejected": -1467.903076171875, | |
| "loss": 0.2261, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21281161904335022, | |
| "rewards/margins": 14.350473403930664, | |
| "rewards/rejected": -14.137661933898926, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.6267948557872394, | |
| "grad_norm": 0.04052734375, | |
| "learning_rate": 1.8377000451141013e-06, | |
| "logits/chosen": -2.1033387184143066, | |
| "logits/rejected": -2.311828136444092, | |
| "logps/chosen": -1.013270616531372, | |
| "logps/rejected": -1430.32568359375, | |
| "loss": 0.2256, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.2092626839876175, | |
| "rewards/margins": 13.935432434082031, | |
| "rewards/rejected": -13.726168632507324, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 0.6292920464477463, | |
| "grad_norm": 0.033203125, | |
| "learning_rate": 1.8167058874803405e-06, | |
| "logits/chosen": -2.2198266983032227, | |
| "logits/rejected": -2.435263156890869, | |
| "logps/chosen": -1.5374799966812134, | |
| "logps/rejected": -1410.561279296875, | |
| "loss": 0.2258, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21642649173736572, | |
| "rewards/margins": 13.645418167114258, | |
| "rewards/rejected": -13.428991317749023, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 0.6317892371082532, | |
| "grad_norm": 0.025390625, | |
| "learning_rate": 1.7957636787217451e-06, | |
| "logits/chosen": -2.1474337577819824, | |
| "logits/rejected": -2.3489108085632324, | |
| "logps/chosen": -0.525337278842926, | |
| "logps/rejected": -1465.3909912109375, | |
| "loss": 0.2257, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21127943694591522, | |
| "rewards/margins": 14.354647636413574, | |
| "rewards/rejected": -14.1433687210083, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 0.6342864277687601, | |
| "grad_norm": 0.045166015625, | |
| "learning_rate": 1.7748750110138768e-06, | |
| "logits/chosen": -2.1010701656341553, | |
| "logits/rejected": -2.3061635494232178, | |
| "logps/chosen": -1.495689034461975, | |
| "logps/rejected": -1522.001953125, | |
| "loss": 0.2248, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21690087020397186, | |
| "rewards/margins": 14.7809476852417, | |
| "rewards/rejected": -14.564045906066895, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 0.6367836184292671, | |
| "grad_norm": 0.0771484375, | |
| "learning_rate": 1.7540414724617282e-06, | |
| "logits/chosen": -2.0483648777008057, | |
| "logits/rejected": -2.2502453327178955, | |
| "logps/chosen": -1.7171008586883545, | |
| "logps/rejected": -1322.4296875, | |
| "loss": 0.2253, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21715514361858368, | |
| "rewards/margins": 12.821990966796875, | |
| "rewards/rejected": -12.604835510253906, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.639280809089774, | |
| "grad_norm": 0.0155029296875, | |
| "learning_rate": 1.7332646469789827e-06, | |
| "logits/chosen": -2.2271251678466797, | |
| "logits/rejected": -2.4021248817443848, | |
| "logps/chosen": -0.7044438719749451, | |
| "logps/rejected": -1151.026611328125, | |
| "loss": 0.2256, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21061739325523376, | |
| "rewards/margins": 11.253252983093262, | |
| "rewards/rejected": -11.042635917663574, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 0.6417779997502809, | |
| "grad_norm": 0.031494140625, | |
| "learning_rate": 1.7125461141675881e-06, | |
| "logits/chosen": -2.115159034729004, | |
| "logits/rejected": -2.321096181869507, | |
| "logps/chosen": -1.5179011821746826, | |
| "logps/rejected": -1341.9727783203125, | |
| "loss": 0.2269, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21012921631336212, | |
| "rewards/margins": 13.043965339660645, | |
| "rewards/rejected": -12.833836555480957, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 0.6442751904107878, | |
| "grad_norm": 0.07177734375, | |
| "learning_rate": 1.6918874491976744e-06, | |
| "logits/chosen": -2.262359619140625, | |
| "logits/rejected": -2.4549667835235596, | |
| "logps/chosen": -1.1417173147201538, | |
| "logps/rejected": -1349.908203125, | |
| "loss": 0.2249, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21297034621238708, | |
| "rewards/margins": 13.129191398620605, | |
| "rewards/rejected": -12.916219711303711, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 0.6467723810712948, | |
| "grad_norm": 0.019287109375, | |
| "learning_rate": 1.6712902226877917e-06, | |
| "logits/chosen": -2.1325788497924805, | |
| "logits/rejected": -2.323542356491089, | |
| "logps/chosen": -1.002483606338501, | |
| "logps/rejected": -1407.6922607421875, | |
| "loss": 0.2264, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21138958632946014, | |
| "rewards/margins": 13.752557754516602, | |
| "rewards/rejected": -13.541168212890625, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 0.6492695717318018, | |
| "grad_norm": 0.046875, | |
| "learning_rate": 1.6507560005854977e-06, | |
| "logits/chosen": -2.0466830730438232, | |
| "logits/rejected": -2.254211664199829, | |
| "logps/chosen": -1.2699908018112183, | |
| "logps/rejected": -1284.965576171875, | |
| "loss": 0.2255, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.22062024474143982, | |
| "rewards/margins": 12.382702827453613, | |
| "rewards/rejected": -12.16208267211914, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.6517667623923087, | |
| "grad_norm": 0.0283203125, | |
| "learning_rate": 1.6302863440483121e-06, | |
| "logits/chosen": -2.102281093597412, | |
| "logits/rejected": -2.344468832015991, | |
| "logps/chosen": -0.9672495722770691, | |
| "logps/rejected": -1371.63232421875, | |
| "loss": 0.2237, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.2286236733198166, | |
| "rewards/margins": 13.317922592163086, | |
| "rewards/rejected": -13.089300155639648, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 0.6542639530528156, | |
| "grad_norm": 0.10302734375, | |
| "learning_rate": 1.6098828093250203e-06, | |
| "logits/chosen": -2.012927770614624, | |
| "logits/rejected": -2.23055100440979, | |
| "logps/chosen": -2.223574161529541, | |
| "logps/rejected": -1439.184326171875, | |
| "loss": 0.2261, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.2114640474319458, | |
| "rewards/margins": 13.80018424987793, | |
| "rewards/rejected": -13.588720321655273, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 0.6567611437133225, | |
| "grad_norm": 0.0201416015625, | |
| "learning_rate": 1.5895469476373545e-06, | |
| "logits/chosen": -2.0998306274414062, | |
| "logits/rejected": -2.284853935241699, | |
| "logps/chosen": -1.0365889072418213, | |
| "logps/rejected": -1287.3863525390625, | |
| "loss": 0.2258, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21419724822044373, | |
| "rewards/margins": 12.467586517333984, | |
| "rewards/rejected": -12.253389358520508, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 0.6592583343738294, | |
| "grad_norm": 0.048583984375, | |
| "learning_rate": 1.5692803050620642e-06, | |
| "logits/chosen": -2.1266770362854004, | |
| "logits/rejected": -2.341489553451538, | |
| "logps/chosen": -1.9875209331512451, | |
| "logps/rejected": -1219.6407470703125, | |
| "loss": 0.2258, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21611304581165314, | |
| "rewards/margins": 11.721773147583008, | |
| "rewards/rejected": -11.505661010742188, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 0.6617555250343363, | |
| "grad_norm": 0.045166015625, | |
| "learning_rate": 1.5490844224133717e-06, | |
| "logits/chosen": -2.178802251815796, | |
| "logits/rejected": -2.3850629329681396, | |
| "logps/chosen": -1.1978418827056885, | |
| "logps/rejected": -1456.7591552734375, | |
| "loss": 0.2253, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.2064187228679657, | |
| "rewards/margins": 14.242889404296875, | |
| "rewards/rejected": -14.036470413208008, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.6642527156948433, | |
| "grad_norm": 0.02978515625, | |
| "learning_rate": 1.528960835125822e-06, | |
| "logits/chosen": -2.3235323429107666, | |
| "logits/rejected": -2.508779525756836, | |
| "logps/chosen": -0.7140904664993286, | |
| "logps/rejected": -1262.5396728515625, | |
| "loss": 0.2256, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.2111320048570633, | |
| "rewards/margins": 12.3548002243042, | |
| "rewards/rejected": -12.143668174743652, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 0.6667499063553503, | |
| "grad_norm": 0.04541015625, | |
| "learning_rate": 1.5089110731375568e-06, | |
| "logits/chosen": -2.1535146236419678, | |
| "logits/rejected": -2.346010446548462, | |
| "logps/chosen": -1.2154910564422607, | |
| "logps/rejected": -1353.01416015625, | |
| "loss": 0.226, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21193823218345642, | |
| "rewards/margins": 13.174649238586426, | |
| "rewards/rejected": -12.962712287902832, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 0.6692470970158572, | |
| "grad_norm": 0.0224609375, | |
| "learning_rate": 1.4889366607739925e-06, | |
| "logits/chosen": -2.2847390174865723, | |
| "logits/rejected": -2.437983989715576, | |
| "logps/chosen": -0.47022026777267456, | |
| "logps/rejected": -1079.610595703125, | |
| "loss": 0.2268, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.20733828842639923, | |
| "rewards/margins": 10.533978462219238, | |
| "rewards/rejected": -10.326639175415039, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 0.6717442876763641, | |
| "grad_norm": 0.0517578125, | |
| "learning_rate": 1.4690391166319307e-06, | |
| "logits/chosen": -2.091798782348633, | |
| "logits/rejected": -2.286367177963257, | |
| "logps/chosen": -0.8848400115966797, | |
| "logps/rejected": -1370.623046875, | |
| "loss": 0.2255, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21229073405265808, | |
| "rewards/margins": 13.300837516784668, | |
| "rewards/rejected": -13.088546752929688, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 0.674241478336871, | |
| "grad_norm": 0.058837890625, | |
| "learning_rate": 1.4492199534641055e-06, | |
| "logits/chosen": -2.1903884410858154, | |
| "logits/rejected": -2.389869451522827, | |
| "logps/chosen": -0.7620021104812622, | |
| "logps/rejected": -1357.733642578125, | |
| "loss": 0.2269, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.2085207998752594, | |
| "rewards/margins": 13.300318717956543, | |
| "rewards/rejected": -13.091796875, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.676738668997378, | |
| "grad_norm": 0.050048828125, | |
| "learning_rate": 1.429480678064174e-06, | |
| "logits/chosen": -2.1907572746276855, | |
| "logits/rejected": -2.4412574768066406, | |
| "logps/chosen": -1.4903779029846191, | |
| "logps/rejected": -1532.8353271484375, | |
| "loss": 0.2235, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.22088858485221863, | |
| "rewards/margins": 14.948209762573242, | |
| "rewards/rejected": -14.727320671081543, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 0.6792358596578849, | |
| "grad_norm": 0.04833984375, | |
| "learning_rate": 1.4098227911521523e-06, | |
| "logits/chosen": -2.1927340030670166, | |
| "logits/rejected": -2.384458065032959, | |
| "logps/chosen": -1.0519030094146729, | |
| "logps/rejected": -1408.5384521484375, | |
| "loss": 0.2247, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21749384701251984, | |
| "rewards/margins": 13.769442558288574, | |
| "rewards/rejected": -13.551948547363281, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 0.6817330503183918, | |
| "grad_norm": 0.0206298828125, | |
| "learning_rate": 1.3902477872603295e-06, | |
| "logits/chosen": -2.292635440826416, | |
| "logits/rejected": -2.4606173038482666, | |
| "logps/chosen": -1.3724099397659302, | |
| "logps/rejected": -1059.629638671875, | |
| "loss": 0.2292, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.2094014585018158, | |
| "rewards/margins": 10.156023025512695, | |
| "rewards/rejected": -9.946621894836426, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 0.6842302409788987, | |
| "grad_norm": 0.06494140625, | |
| "learning_rate": 1.370757154619638e-06, | |
| "logits/chosen": -2.2135720252990723, | |
| "logits/rejected": -2.4035539627075195, | |
| "logps/chosen": -0.8492560386657715, | |
| "logps/rejected": -1440.1517333984375, | |
| "loss": 0.2249, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21952596306800842, | |
| "rewards/margins": 13.991949081420898, | |
| "rewards/rejected": -13.772422790527344, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 0.6867274316394056, | |
| "grad_norm": 0.0546875, | |
| "learning_rate": 1.3513523750465049e-06, | |
| "logits/chosen": -2.2055509090423584, | |
| "logits/rejected": -2.3952600955963135, | |
| "logps/chosen": -0.848610520362854, | |
| "logps/rejected": -1253.37841796875, | |
| "loss": 0.2269, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.2106543481349945, | |
| "rewards/margins": 12.153672218322754, | |
| "rewards/rejected": -11.94301700592041, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.6892246222999125, | |
| "grad_norm": 0.0078125, | |
| "learning_rate": 1.332034923830199e-06, | |
| "logits/chosen": -2.1199612617492676, | |
| "logits/rejected": -2.3331620693206787, | |
| "logps/chosen": -0.572918176651001, | |
| "logps/rejected": -1314.5574951171875, | |
| "loss": 0.2245, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21192285418510437, | |
| "rewards/margins": 12.847297668457031, | |
| "rewards/rejected": -12.635372161865234, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 0.6917218129604196, | |
| "grad_norm": 0.05322265625, | |
| "learning_rate": 1.31280626962067e-06, | |
| "logits/chosen": -2.242522716522217, | |
| "logits/rejected": -2.4255213737487793, | |
| "logps/chosen": -0.6031197905540466, | |
| "logps/rejected": -1176.1724853515625, | |
| "loss": 0.2251, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21441105008125305, | |
| "rewards/margins": 11.398508071899414, | |
| "rewards/rejected": -11.18409538269043, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 0.6942190036209265, | |
| "grad_norm": 0.03759765625, | |
| "learning_rate": 1.2936678743168813e-06, | |
| "logits/chosen": -2.1787726879119873, | |
| "logits/rejected": -2.379664659500122, | |
| "logps/chosen": -0.6903184652328491, | |
| "logps/rejected": -1316.2584228515625, | |
| "loss": 0.228, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.2146139144897461, | |
| "rewards/margins": 12.865110397338867, | |
| "rewards/rejected": -12.650495529174805, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 0.6967161942814334, | |
| "grad_norm": 0.033935546875, | |
| "learning_rate": 1.2746211929556777e-06, | |
| "logits/chosen": -2.1566481590270996, | |
| "logits/rejected": -2.4140141010284424, | |
| "logps/chosen": -0.8048852682113647, | |
| "logps/rejected": -1725.7884521484375, | |
| "loss": 0.2244, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21056988835334778, | |
| "rewards/margins": 16.911666870117188, | |
| "rewards/rejected": -16.70109748840332, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 0.6992133849419403, | |
| "grad_norm": 0.1259765625, | |
| "learning_rate": 1.2556676736011558e-06, | |
| "logits/chosen": -2.1705546379089355, | |
| "logits/rejected": -2.36136531829834, | |
| "logps/chosen": -1.7305570840835571, | |
| "logps/rejected": -1468.9334716796875, | |
| "loss": 0.2251, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21282191574573517, | |
| "rewards/margins": 14.295863151550293, | |
| "rewards/rejected": -14.083041191101074, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.7017105756024472, | |
| "grad_norm": 0.0264892578125, | |
| "learning_rate": 1.2368087572345772e-06, | |
| "logits/chosen": -2.2008700370788574, | |
| "logits/rejected": -2.3622145652770996, | |
| "logps/chosen": -0.9749493598937988, | |
| "logps/rejected": -1153.006103515625, | |
| "loss": 0.2278, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.20674769580364227, | |
| "rewards/margins": 11.259106636047363, | |
| "rewards/rejected": -11.052358627319336, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 0.7042077662629542, | |
| "grad_norm": 0.07275390625, | |
| "learning_rate": 1.2180458776448067e-06, | |
| "logits/chosen": -2.183065891265869, | |
| "logits/rejected": -2.4031364917755127, | |
| "logps/chosen": -1.3278162479400635, | |
| "logps/rejected": -1352.5931396484375, | |
| "loss": 0.2276, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21398906409740448, | |
| "rewards/margins": 13.097040176391602, | |
| "rewards/rejected": -12.883050918579102, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 0.7067049569234611, | |
| "grad_norm": 0.04150390625, | |
| "learning_rate": 1.1993804613193158e-06, | |
| "logits/chosen": -2.166015625, | |
| "logits/rejected": -2.376171827316284, | |
| "logps/chosen": -0.8504392504692078, | |
| "logps/rejected": -1218.2906494140625, | |
| "loss": 0.2242, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.2224057912826538, | |
| "rewards/margins": 11.764514923095703, | |
| "rewards/rejected": -11.542108535766602, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 0.709202147583968, | |
| "grad_norm": 0.0791015625, | |
| "learning_rate": 1.1808139273357232e-06, | |
| "logits/chosen": -2.1249091625213623, | |
| "logits/rejected": -2.324924945831299, | |
| "logps/chosen": -1.2602803707122803, | |
| "logps/rejected": -1440.6927490234375, | |
| "loss": 0.2271, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21478672325611115, | |
| "rewards/margins": 13.95526123046875, | |
| "rewards/rejected": -13.740473747253418, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 0.711699338244475, | |
| "grad_norm": 0.0186767578125, | |
| "learning_rate": 1.1623476872539108e-06, | |
| "logits/chosen": -2.1342732906341553, | |
| "logits/rejected": -2.3520121574401855, | |
| "logps/chosen": -1.0939338207244873, | |
| "logps/rejected": -1569.9661865234375, | |
| "loss": 0.2256, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.209732323884964, | |
| "rewards/margins": 15.359413146972656, | |
| "rewards/rejected": -15.149681091308594, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.7141965289049819, | |
| "grad_norm": 0.0206298828125, | |
| "learning_rate": 1.1439831450087032e-06, | |
| "logits/chosen": -2.1833555698394775, | |
| "logits/rejected": -2.408240795135498, | |
| "logps/chosen": -1.4031983613967896, | |
| "logps/rejected": -1495.2554931640625, | |
| "loss": 0.2271, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.20904704928398132, | |
| "rewards/margins": 14.592279434204102, | |
| "rewards/rejected": -14.383232116699219, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 0.7166937195654888, | |
| "grad_norm": 0.048828125, | |
| "learning_rate": 1.1257216968031357e-06, | |
| "logits/chosen": -2.1499791145324707, | |
| "logits/rejected": -2.3467013835906982, | |
| "logps/chosen": -0.6740778684616089, | |
| "logps/rejected": -1315.198486328125, | |
| "loss": 0.2264, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.2084966003894806, | |
| "rewards/margins": 12.877813339233398, | |
| "rewards/rejected": -12.669316291809082, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 0.7191909102259958, | |
| "grad_norm": 0.099609375, | |
| "learning_rate": 1.1075647310022974e-06, | |
| "logits/chosen": -2.293015956878662, | |
| "logits/rejected": -2.477437973022461, | |
| "logps/chosen": -0.6577932238578796, | |
| "logps/rejected": -1144.9639892578125, | |
| "loss": 0.2249, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21200187504291534, | |
| "rewards/margins": 11.185698509216309, | |
| "rewards/rejected": -10.973695755004883, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 0.7216881008865027, | |
| "grad_norm": 0.0341796875, | |
| "learning_rate": 1.0895136280277863e-06, | |
| "logits/chosen": -2.1305599212646484, | |
| "logits/rejected": -2.3395919799804688, | |
| "logps/chosen": -0.9710084795951843, | |
| "logps/rejected": -1521.902099609375, | |
| "loss": 0.2256, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21521084010601044, | |
| "rewards/margins": 14.770858764648438, | |
| "rewards/rejected": -14.555648803710938, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 0.7241852915470096, | |
| "grad_norm": 0.02685546875, | |
| "learning_rate": 1.0715697602527542e-06, | |
| "logits/chosen": -1.9920504093170166, | |
| "logits/rejected": -2.2198596000671387, | |
| "logps/chosen": -0.49225324392318726, | |
| "logps/rejected": -1440.2822265625, | |
| "loss": 0.2255, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21176087856292725, | |
| "rewards/margins": 13.888765335083008, | |
| "rewards/rejected": -13.67700481414795, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.7266824822075165, | |
| "grad_norm": 0.05322265625, | |
| "learning_rate": 1.0537344918975708e-06, | |
| "logits/chosen": -2.1923391819000244, | |
| "logits/rejected": -2.3587305545806885, | |
| "logps/chosen": -2.3005270957946777, | |
| "logps/rejected": -1118.677490234375, | |
| "loss": 0.2245, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.22067633271217346, | |
| "rewards/margins": 10.68152141571045, | |
| "rewards/rejected": -10.460844993591309, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 0.7291796728680234, | |
| "grad_norm": 0.0277099609375, | |
| "learning_rate": 1.036009178926107e-06, | |
| "logits/chosen": -2.162017822265625, | |
| "logits/rejected": -2.350229263305664, | |
| "logps/chosen": -0.4403456151485443, | |
| "logps/rejected": -1365.908203125, | |
| "loss": 0.2254, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21478745341300964, | |
| "rewards/margins": 13.359176635742188, | |
| "rewards/rejected": -13.144391059875488, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 0.7316768635285305, | |
| "grad_norm": 0.2041015625, | |
| "learning_rate": 1.0183951689426438e-06, | |
| "logits/chosen": -2.0874218940734863, | |
| "logits/rejected": -2.286980152130127, | |
| "logps/chosen": -1.1334517002105713, | |
| "logps/rejected": -1574.8843994140625, | |
| "loss": 0.2267, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.20781561732292175, | |
| "rewards/margins": 15.404953002929688, | |
| "rewards/rejected": -15.197137832641602, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 0.7341740541890374, | |
| "grad_norm": 0.033203125, | |
| "learning_rate": 1.0008938010894156e-06, | |
| "logits/chosen": -2.05769419670105, | |
| "logits/rejected": -2.291485548019409, | |
| "logps/chosen": -0.6213763356208801, | |
| "logps/rejected": -1545.57421875, | |
| "loss": 0.2253, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21286337077617645, | |
| "rewards/margins": 15.127001762390137, | |
| "rewards/rejected": -14.914140701293945, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 0.7366712448495443, | |
| "grad_norm": 0.06005859375, | |
| "learning_rate": 9.83506405944804e-07, | |
| "logits/chosen": -2.0132200717926025, | |
| "logits/rejected": -2.2228617668151855, | |
| "logps/chosen": -1.0132176876068115, | |
| "logps/rejected": -1225.736572265625, | |
| "loss": 0.2248, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21595139801502228, | |
| "rewards/margins": 11.760801315307617, | |
| "rewards/rejected": -11.544851303100586, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 0.7391684355100512, | |
| "grad_norm": 0.02099609375, | |
| "learning_rate": 9.662343054221743e-07, | |
| "logits/chosen": -2.038722515106201, | |
| "logits/rejected": -2.254706621170044, | |
| "logps/chosen": -1.0080900192260742, | |
| "logps/rejected": -1486.7254638671875, | |
| "loss": 0.2248, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21779987215995789, | |
| "rewards/margins": 14.319659233093262, | |
| "rewards/rejected": -14.101860046386719, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 0.7416656261705581, | |
| "grad_norm": 0.033447265625, | |
| "learning_rate": 9.490788126693754e-07, | |
| "logits/chosen": -2.05572247505188, | |
| "logits/rejected": -2.270496129989624, | |
| "logps/chosen": -1.580960988998413, | |
| "logps/rejected": -1349.623779296875, | |
| "loss": 0.2259, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21096165478229523, | |
| "rewards/margins": 13.037325859069824, | |
| "rewards/rejected": -12.826364517211914, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 0.744162816831065, | |
| "grad_norm": 0.034423828125, | |
| "learning_rate": 9.32041231968904e-07, | |
| "logits/chosen": -2.135493040084839, | |
| "logits/rejected": -2.3431620597839355, | |
| "logps/chosen": -0.692672848701477, | |
| "logps/rejected": -1422.2606201171875, | |
| "loss": 0.2249, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21278159320354462, | |
| "rewards/margins": 13.839566230773926, | |
| "rewards/rejected": -13.626785278320312, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 0.746660007491572, | |
| "grad_norm": 0.038330078125, | |
| "learning_rate": 9.151228586387464e-07, | |
| "logits/chosen": -2.1877083778381348, | |
| "logits/rejected": -2.3766164779663086, | |
| "logps/chosen": -0.7439475655555725, | |
| "logps/rejected": -1241.116943359375, | |
| "loss": 0.2292, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.2107519656419754, | |
| "rewards/margins": 12.074844360351562, | |
| "rewards/rejected": -11.864092826843262, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 0.7491571981520789, | |
| "grad_norm": 0.051025390625, | |
| "learning_rate": 8.983249789338941e-07, | |
| "logits/chosen": -2.150568723678589, | |
| "logits/rejected": -2.329155445098877, | |
| "logps/chosen": -0.8139511346817017, | |
| "logps/rejected": -1264.535888671875, | |
| "loss": 0.2264, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.20758719742298126, | |
| "rewards/margins": 12.365687370300293, | |
| "rewards/rejected": -12.158101081848145, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.7491571981520789, | |
| "eval_logits/chosen": -2.5715415477752686, | |
| "eval_logits/rejected": -2.65895676612854, | |
| "eval_logps/chosen": -0.12666501104831696, | |
| "eval_logps/rejected": -650.5204467773438, | |
| "eval_loss": 0.22132699191570282, | |
| "eval_rewards/accuracies": 1.0, | |
| "eval_rewards/chosen": 0.25786837935447693, | |
| "eval_rewards/margins": 6.314352512359619, | |
| "eval_rewards/rejected": -6.056484222412109, | |
| "eval_runtime": 0.6559, | |
| "eval_samples_per_second": 7.623, | |
| "eval_steps_per_second": 4.574, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.7516543888125858, | |
| "grad_norm": 0.0194091796875, | |
| "learning_rate": 8.816488699485593e-07, | |
| "logits/chosen": -2.176842212677002, | |
| "logits/rejected": -2.3571324348449707, | |
| "logps/chosen": -0.4218795895576477, | |
| "logps/rejected": -1318.904052734375, | |
| "loss": 0.2254, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21097974479198456, | |
| "rewards/margins": 12.884817123413086, | |
| "rewards/rejected": -12.67383861541748, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 0.7541515794730927, | |
| "grad_norm": 0.041015625, | |
| "learning_rate": 8.650957995190784e-07, | |
| "logits/chosen": -2.1513025760650635, | |
| "logits/rejected": -2.3777430057525635, | |
| "logps/chosen": -1.3863859176635742, | |
| "logps/rejected": -1556.2415771484375, | |
| "loss": 0.2249, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21383562684059143, | |
| "rewards/margins": 15.212430000305176, | |
| "rewards/rejected": -14.9985933303833, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 0.7566487701335997, | |
| "grad_norm": 0.029541015625, | |
| "learning_rate": 8.486670261275193e-07, | |
| "logits/chosen": -2.252506732940674, | |
| "logits/rejected": -2.452782392501831, | |
| "logps/chosen": -0.9220790863037109, | |
| "logps/rejected": -1333.130615234375, | |
| "loss": 0.226, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.20681767165660858, | |
| "rewards/margins": 13.058314323425293, | |
| "rewards/rejected": -12.851496696472168, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 0.7591459607941067, | |
| "grad_norm": 0.0244140625, | |
| "learning_rate": 8.32363798806011e-07, | |
| "logits/chosen": -2.2259833812713623, | |
| "logits/rejected": -2.4163031578063965, | |
| "logps/chosen": -0.6000443696975708, | |
| "logps/rejected": -1395.551513671875, | |
| "loss": 0.2265, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21475176513195038, | |
| "rewards/margins": 13.636807441711426, | |
| "rewards/rejected": -13.42205810546875, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 0.7616431514546136, | |
| "grad_norm": 0.02490234375, | |
| "learning_rate": 8.161873570417742e-07, | |
| "logits/chosen": -2.1769793033599854, | |
| "logits/rejected": -2.3828330039978027, | |
| "logps/chosen": -0.49710139632225037, | |
| "logps/rejected": -1448.161865234375, | |
| "loss": 0.2243, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.2149210423231125, | |
| "rewards/margins": 14.16187858581543, | |
| "rewards/rejected": -13.946958541870117, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 0.7641403421151205, | |
| "grad_norm": 0.04931640625, | |
| "learning_rate": 8.001389306828897e-07, | |
| "logits/chosen": -2.094914674758911, | |
| "logits/rejected": -2.325759172439575, | |
| "logps/chosen": -1.7604669332504272, | |
| "logps/rejected": -1601.197998046875, | |
| "loss": 0.2249, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.2165949046611786, | |
| "rewards/margins": 15.435786247253418, | |
| "rewards/rejected": -15.219189643859863, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 0.7666375327756274, | |
| "grad_norm": 0.056884765625, | |
| "learning_rate": 7.842197398447993e-07, | |
| "logits/chosen": -2.119885206222534, | |
| "logits/rejected": -2.3199260234832764, | |
| "logps/chosen": -1.7511274814605713, | |
| "logps/rejected": -1411.588623046875, | |
| "loss": 0.2263, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.2108723670244217, | |
| "rewards/margins": 13.716115951538086, | |
| "rewards/rejected": -13.505243301391602, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 0.7691347234361343, | |
| "grad_norm": 0.050048828125, | |
| "learning_rate": 7.684309948175414e-07, | |
| "logits/chosen": -2.0922672748565674, | |
| "logits/rejected": -2.2642672061920166, | |
| "logps/chosen": -0.6221259832382202, | |
| "logps/rejected": -1387.583251953125, | |
| "loss": 0.2247, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.2143571376800537, | |
| "rewards/margins": 13.555415153503418, | |
| "rewards/rejected": -13.341056823730469, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 0.7716319140966412, | |
| "grad_norm": 0.0257568359375, | |
| "learning_rate": 7.527738959737371e-07, | |
| "logits/chosen": -2.1526269912719727, | |
| "logits/rejected": -2.3576555252075195, | |
| "logps/chosen": -1.0096280574798584, | |
| "logps/rejected": -1377.4000244140625, | |
| "loss": 0.2246, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.2141624391078949, | |
| "rewards/margins": 13.408686637878418, | |
| "rewards/rejected": -13.194523811340332, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 0.7741291047571482, | |
| "grad_norm": 0.054443359375, | |
| "learning_rate": 7.372496336773269e-07, | |
| "logits/chosen": -2.1142802238464355, | |
| "logits/rejected": -2.297616958618164, | |
| "logps/chosen": -0.8569754362106323, | |
| "logps/rejected": -1148.1827392578125, | |
| "loss": 0.2277, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21184638142585754, | |
| "rewards/margins": 11.140499114990234, | |
| "rewards/rejected": -10.9286527633667, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.7766262954176552, | |
| "grad_norm": 0.0341796875, | |
| "learning_rate": 7.218593881930744e-07, | |
| "logits/chosen": -2.2074034214019775, | |
| "logits/rejected": -2.391183853149414, | |
| "logps/chosen": -0.8030778765678406, | |
| "logps/rejected": -1229.139404296875, | |
| "loss": 0.2271, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.2128894329071045, | |
| "rewards/margins": 12.000519752502441, | |
| "rewards/rejected": -11.787630081176758, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 0.7791234860781621, | |
| "grad_norm": 0.040283203125, | |
| "learning_rate": 7.066043295968342e-07, | |
| "logits/chosen": -2.1711983680725098, | |
| "logits/rejected": -2.370105266571045, | |
| "logps/chosen": -1.8718315362930298, | |
| "logps/rejected": -1323.237060546875, | |
| "loss": 0.2263, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.2125745713710785, | |
| "rewards/margins": 12.693571090698242, | |
| "rewards/rejected": -12.480997085571289, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 0.781620676738669, | |
| "grad_norm": 0.048828125, | |
| "learning_rate": 6.914856176865891e-07, | |
| "logits/chosen": -2.255979537963867, | |
| "logits/rejected": -2.4583592414855957, | |
| "logps/chosen": -1.3440260887145996, | |
| "logps/rejected": -1252.6439208984375, | |
| "loss": 0.2271, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.205407053232193, | |
| "rewards/margins": 12.165246963500977, | |
| "rewards/rejected": -11.959839820861816, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 0.7841178673991759, | |
| "grad_norm": 0.05322265625, | |
| "learning_rate": 6.765044018942804e-07, | |
| "logits/chosen": -2.2532248497009277, | |
| "logits/rejected": -2.4564757347106934, | |
| "logps/chosen": -0.6921563744544983, | |
| "logps/rejected": -1213.1630859375, | |
| "loss": 0.2251, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.20914144814014435, | |
| "rewards/margins": 11.834319114685059, | |
| "rewards/rejected": -11.625177383422852, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 0.7866150580596829, | |
| "grad_norm": 0.034423828125, | |
| "learning_rate": 6.616618211984169e-07, | |
| "logits/chosen": -2.1614041328430176, | |
| "logits/rejected": -2.3516342639923096, | |
| "logps/chosen": -0.3677124083042145, | |
| "logps/rejected": -1380.3824462890625, | |
| "loss": 0.2254, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.2100847065448761, | |
| "rewards/margins": 13.517425537109375, | |
| "rewards/rejected": -13.307340621948242, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 0.7891122487201898, | |
| "grad_norm": 0.046630859375, | |
| "learning_rate": 6.469590040374799e-07, | |
| "logits/chosen": -2.108102560043335, | |
| "logits/rejected": -2.3011136054992676, | |
| "logps/chosen": -0.5627329349517822, | |
| "logps/rejected": -1450.8382568359375, | |
| "loss": 0.2246, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21635802090168, | |
| "rewards/margins": 14.105690002441406, | |
| "rewards/rejected": -13.88933277130127, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 0.7916094393806967, | |
| "grad_norm": 0.038818359375, | |
| "learning_rate": 6.32397068224136e-07, | |
| "logits/chosen": -2.2220847606658936, | |
| "logits/rejected": -2.4407782554626465, | |
| "logps/chosen": -0.8599641919136047, | |
| "logps/rejected": -1339.21826171875, | |
| "loss": 0.2275, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21565623581409454, | |
| "rewards/margins": 13.00416088104248, | |
| "rewards/rejected": -12.788503646850586, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 0.7941066300412036, | |
| "grad_norm": 0.41015625, | |
| "learning_rate": 6.17977120860249e-07, | |
| "logits/chosen": -2.208421230316162, | |
| "logits/rejected": -2.4098763465881348, | |
| "logps/chosen": -1.8245439529418945, | |
| "logps/rejected": -1350.473876953125, | |
| "loss": 0.2257, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21073952317237854, | |
| "rewards/margins": 13.214599609375, | |
| "rewards/rejected": -13.003860473632812, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 0.7966038207017105, | |
| "grad_norm": 0.03857421875, | |
| "learning_rate": 6.037002582527121e-07, | |
| "logits/chosen": -2.1419100761413574, | |
| "logits/rejected": -2.3298497200012207, | |
| "logps/chosen": -0.7109914422035217, | |
| "logps/rejected": -1332.651123046875, | |
| "loss": 0.2261, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21053273975849152, | |
| "rewards/margins": 12.884849548339844, | |
| "rewards/rejected": -12.67431640625, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 0.7991010113622175, | |
| "grad_norm": 0.0263671875, | |
| "learning_rate": 5.895675658300981e-07, | |
| "logits/chosen": -2.310133934020996, | |
| "logits/rejected": -2.4916586875915527, | |
| "logps/chosen": -0.809489905834198, | |
| "logps/rejected": -1163.7008056640625, | |
| "loss": 0.2252, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21247372031211853, | |
| "rewards/margins": 11.380758285522461, | |
| "rewards/rejected": -11.168285369873047, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.8015982020227245, | |
| "grad_norm": 0.049072265625, | |
| "learning_rate": 5.755801180601381e-07, | |
| "logits/chosen": -2.2009828090667725, | |
| "logits/rejected": -2.4239349365234375, | |
| "logps/chosen": -0.8167581558227539, | |
| "logps/rejected": -1351.5517578125, | |
| "loss": 0.225, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21433226764202118, | |
| "rewards/margins": 13.204305648803711, | |
| "rewards/rejected": -12.989973068237305, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 0.8040953926832314, | |
| "grad_norm": 0.0390625, | |
| "learning_rate": 5.617389783680307e-07, | |
| "logits/chosen": -2.080115795135498, | |
| "logits/rejected": -2.3226914405822754, | |
| "logps/chosen": -0.7443311810493469, | |
| "logps/rejected": -1530.034423828125, | |
| "loss": 0.224, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21728749573230743, | |
| "rewards/margins": 14.894986152648926, | |
| "rewards/rejected": -14.677696228027344, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 0.8065925833437383, | |
| "grad_norm": 0.232421875, | |
| "learning_rate": 5.48045199055596e-07, | |
| "logits/chosen": -2.1640877723693848, | |
| "logits/rejected": -2.36962628364563, | |
| "logps/chosen": -1.1329087018966675, | |
| "logps/rejected": -1273.104736328125, | |
| "loss": 0.2282, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.20660333335399628, | |
| "rewards/margins": 12.42498779296875, | |
| "rewards/rejected": -12.218384742736816, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 0.8090897740042452, | |
| "grad_norm": 0.043212890625, | |
| "learning_rate": 5.344998212212704e-07, | |
| "logits/chosen": -2.091491937637329, | |
| "logits/rejected": -2.327758312225342, | |
| "logps/chosen": -1.4844013452529907, | |
| "logps/rejected": -1502.2427978515625, | |
| "loss": 0.224, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.2210853397846222, | |
| "rewards/margins": 14.54228687286377, | |
| "rewards/rejected": -14.321202278137207, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 0.8115869646647521, | |
| "grad_norm": 0.03564453125, | |
| "learning_rate": 5.211038746809551e-07, | |
| "logits/chosen": -2.192322015762329, | |
| "logits/rejected": -2.3808138370513916, | |
| "logps/chosen": -0.5706063508987427, | |
| "logps/rejected": -1285.364013671875, | |
| "loss": 0.227, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.20710210502147675, | |
| "rewards/margins": 12.564165115356445, | |
| "rewards/rejected": -12.357062339782715, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 0.8140841553252591, | |
| "grad_norm": 0.07275390625, | |
| "learning_rate": 5.078583778897216e-07, | |
| "logits/chosen": -2.1883485317230225, | |
| "logits/rejected": -2.3633830547332764, | |
| "logps/chosen": -1.4209082126617432, | |
| "logps/rejected": -1214.212890625, | |
| "loss": 0.2275, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21422162652015686, | |
| "rewards/margins": 11.81783390045166, | |
| "rewards/rejected": -11.603612899780273, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 0.816581345985766, | |
| "grad_norm": 0.044677734375, | |
| "learning_rate": 4.94764337864384e-07, | |
| "logits/chosen": -2.2724106311798096, | |
| "logits/rejected": -2.4622254371643066, | |
| "logps/chosen": -0.9480986595153809, | |
| "logps/rejected": -1307.112548828125, | |
| "loss": 0.2255, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.2102733850479126, | |
| "rewards/margins": 12.694864273071289, | |
| "rewards/rejected": -12.484590530395508, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 0.819078536646273, | |
| "grad_norm": 0.0203857421875, | |
| "learning_rate": 4.818227501069328e-07, | |
| "logits/chosen": -2.2342686653137207, | |
| "logits/rejected": -2.4815187454223633, | |
| "logps/chosen": -1.358782410621643, | |
| "logps/rejected": -1722.28125, | |
| "loss": 0.2233, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21825866401195526, | |
| "rewards/margins": 16.87509536743164, | |
| "rewards/rejected": -16.656835556030273, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 0.8215757273067799, | |
| "grad_norm": 0.031982421875, | |
| "learning_rate": 4.690345985288572e-07, | |
| "logits/chosen": -2.1274971961975098, | |
| "logits/rejected": -2.328562021255493, | |
| "logps/chosen": -1.1761976480484009, | |
| "logps/rejected": -1403.730224609375, | |
| "loss": 0.2264, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.213038831949234, | |
| "rewards/margins": 13.633008003234863, | |
| "rewards/rejected": -13.41996955871582, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 0.8240729179672868, | |
| "grad_norm": 0.01953125, | |
| "learning_rate": 4.5640085537633633e-07, | |
| "logits/chosen": -2.1780600547790527, | |
| "logits/rejected": -2.418370008468628, | |
| "logps/chosen": -1.041684865951538, | |
| "logps/rejected": -1449.9898681640625, | |
| "loss": 0.2235, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21910138428211212, | |
| "rewards/margins": 14.122426986694336, | |
| "rewards/rejected": -13.903326034545898, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.8265701086277938, | |
| "grad_norm": 0.04052734375, | |
| "learning_rate": 4.439224811563211e-07, | |
| "logits/chosen": -2.0584537982940674, | |
| "logits/rejected": -2.258396625518799, | |
| "logps/chosen": -0.6486183404922485, | |
| "logps/rejected": -1476.123291015625, | |
| "loss": 0.2255, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21381358802318573, | |
| "rewards/margins": 14.360862731933594, | |
| "rewards/rejected": -14.147050857543945, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 0.8290672992883007, | |
| "grad_norm": 0.04345703125, | |
| "learning_rate": 4.316004245635158e-07, | |
| "logits/chosen": -2.147899866104126, | |
| "logits/rejected": -2.3480546474456787, | |
| "logps/chosen": -1.0383152961730957, | |
| "logps/rejected": -1587.1812744140625, | |
| "loss": 0.2271, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.2098073661327362, | |
| "rewards/margins": 15.521720886230469, | |
| "rewards/rejected": -15.311912536621094, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 0.8315644899488076, | |
| "grad_norm": 0.01336669921875, | |
| "learning_rate": 4.194356224082455e-07, | |
| "logits/chosen": -2.0754525661468506, | |
| "logits/rejected": -2.304088592529297, | |
| "logps/chosen": -0.6566920876502991, | |
| "logps/rejected": -1547.8634033203125, | |
| "loss": 0.227, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.20915034413337708, | |
| "rewards/margins": 15.037984848022461, | |
| "rewards/rejected": -14.828834533691406, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 0.8340616806093145, | |
| "grad_norm": 0.0283203125, | |
| "learning_rate": 4.074289995452338e-07, | |
| "logits/chosen": -2.141746997833252, | |
| "logits/rejected": -2.3306097984313965, | |
| "logps/chosen": -0.9173293113708496, | |
| "logps/rejected": -1333.7867431640625, | |
| "loss": 0.2251, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.2176503688097, | |
| "rewards/margins": 13.049924850463867, | |
| "rewards/rejected": -12.832275390625, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 0.8365588712698214, | |
| "grad_norm": 0.01904296875, | |
| "learning_rate": 3.9558146880329246e-07, | |
| "logits/chosen": -2.1531293392181396, | |
| "logits/rejected": -2.3555006980895996, | |
| "logps/chosen": -1.041372537612915, | |
| "logps/rejected": -1363.6673583984375, | |
| "loss": 0.2268, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21647608280181885, | |
| "rewards/margins": 13.151887893676758, | |
| "rewards/rejected": -12.935412406921387, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 0.8390560619303283, | |
| "grad_norm": 0.0869140625, | |
| "learning_rate": 3.838939309159187e-07, | |
| "logits/chosen": -2.150744915008545, | |
| "logits/rejected": -2.3291797637939453, | |
| "logps/chosen": -0.6859675645828247, | |
| "logps/rejected": -1347.5732421875, | |
| "loss": 0.2263, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21168136596679688, | |
| "rewards/margins": 13.16771411895752, | |
| "rewards/rejected": -12.956031799316406, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 0.8415532525908354, | |
| "grad_norm": 0.0213623046875, | |
| "learning_rate": 3.723672744528162e-07, | |
| "logits/chosen": -2.225355863571167, | |
| "logits/rejected": -2.434971570968628, | |
| "logps/chosen": -0.7719963788986206, | |
| "logps/rejected": -1404.882568359375, | |
| "loss": 0.2245, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.2120278775691986, | |
| "rewards/margins": 13.696516036987305, | |
| "rewards/rejected": -13.484487533569336, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 0.8440504432513423, | |
| "grad_norm": 0.0220947265625, | |
| "learning_rate": 3.6100237575233647e-07, | |
| "logits/chosen": -2.2835781574249268, | |
| "logits/rejected": -2.459686279296875, | |
| "logps/chosen": -0.8155478239059448, | |
| "logps/rejected": -1183.509033203125, | |
| "loss": 0.2259, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21422457695007324, | |
| "rewards/margins": 11.59019660949707, | |
| "rewards/rejected": -11.375970840454102, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 0.8465476339118492, | |
| "grad_norm": 0.047607421875, | |
| "learning_rate": 3.4980009885486054e-07, | |
| "logits/chosen": -2.2139523029327393, | |
| "logits/rejected": -2.3762905597686768, | |
| "logps/chosen": -0.49865931272506714, | |
| "logps/rejected": -1125.1737060546875, | |
| "loss": 0.2277, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.20752505958080292, | |
| "rewards/margins": 10.996343612670898, | |
| "rewards/rejected": -10.788819313049316, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 0.8490448245723561, | |
| "grad_norm": 0.01556396484375, | |
| "learning_rate": 3.3876129543710197e-07, | |
| "logits/chosen": -2.184354305267334, | |
| "logits/rejected": -2.3724493980407715, | |
| "logps/chosen": -0.690311074256897, | |
| "logps/rejected": -1528.7587890625, | |
| "loss": 0.2248, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21172885596752167, | |
| "rewards/margins": 14.938085556030273, | |
| "rewards/rejected": -14.726354598999023, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.851542015232863, | |
| "grad_norm": 0.01495361328125, | |
| "learning_rate": 3.2788680474735687e-07, | |
| "logits/chosen": -2.1705164909362793, | |
| "logits/rejected": -2.373166561126709, | |
| "logps/chosen": -0.5612165927886963, | |
| "logps/rejected": -1317.1187744140625, | |
| "loss": 0.2251, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.20889082551002502, | |
| "rewards/margins": 12.858512878417969, | |
| "rewards/rejected": -12.649621963500977, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 0.85403920589337, | |
| "grad_norm": 0.00872802734375, | |
| "learning_rate": 3.1717745354170214e-07, | |
| "logits/chosen": -2.071406841278076, | |
| "logits/rejected": -2.2939293384552, | |
| "logps/chosen": -0.8238442540168762, | |
| "logps/rejected": -1532.26513671875, | |
| "loss": 0.2244, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21248868107795715, | |
| "rewards/margins": 15.00029468536377, | |
| "rewards/rejected": -14.787805557250977, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 0.8565363965538769, | |
| "grad_norm": 0.021484375, | |
| "learning_rate": 3.0663405602113727e-07, | |
| "logits/chosen": -2.24153208732605, | |
| "logits/rejected": -2.467984676361084, | |
| "logps/chosen": -0.9753687977790833, | |
| "logps/rejected": -1389.8427734375, | |
| "loss": 0.225, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.20957298576831818, | |
| "rewards/margins": 13.579081535339355, | |
| "rewards/rejected": -13.369508743286133, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 0.8590335872143838, | |
| "grad_norm": 0.047607421875, | |
| "learning_rate": 2.9625741376968107e-07, | |
| "logits/chosen": -2.060586452484131, | |
| "logits/rejected": -2.3030850887298584, | |
| "logps/chosen": -2.972282886505127, | |
| "logps/rejected": -1365.322265625, | |
| "loss": 0.226, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21505007147789001, | |
| "rewards/margins": 12.994850158691406, | |
| "rewards/rejected": -12.77979850769043, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 0.8615307778748907, | |
| "grad_norm": 0.02001953125, | |
| "learning_rate": 2.8604831569343324e-07, | |
| "logits/chosen": -2.2799830436706543, | |
| "logits/rejected": -2.4574227333068848, | |
| "logps/chosen": -0.9521903991699219, | |
| "logps/rejected": -1208.1971435546875, | |
| "loss": 0.2266, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21667388081550598, | |
| "rewards/margins": 11.710010528564453, | |
| "rewards/rejected": -11.493337631225586, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 0.8640279685353977, | |
| "grad_norm": 0.028564453125, | |
| "learning_rate": 2.760075379605942e-07, | |
| "logits/chosen": -2.1184418201446533, | |
| "logits/rejected": -2.292738199234009, | |
| "logps/chosen": -0.882199764251709, | |
| "logps/rejected": -1400.0753173828125, | |
| "loss": 0.2253, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.20649878680706024, | |
| "rewards/margins": 13.685522079467773, | |
| "rewards/rejected": -13.479023933410645, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 0.8665251591959046, | |
| "grad_norm": 0.166015625, | |
| "learning_rate": 2.661358439424552e-07, | |
| "logits/chosen": -2.1794090270996094, | |
| "logits/rejected": -2.3647027015686035, | |
| "logps/chosen": -0.8141934275627136, | |
| "logps/rejected": -1179.304931640625, | |
| "loss": 0.2276, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21081428229808807, | |
| "rewards/margins": 11.469918251037598, | |
| "rewards/rejected": -11.25910472869873, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 0.8690223498564116, | |
| "grad_norm": 0.0262451171875, | |
| "learning_rate": 2.564339841553615e-07, | |
| "logits/chosen": -2.1696417331695557, | |
| "logits/rejected": -2.341275453567505, | |
| "logps/chosen": -0.6168124675750732, | |
| "logps/rejected": -1255.4180908203125, | |
| "loss": 0.2276, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.20439627766609192, | |
| "rewards/margins": 12.246126174926758, | |
| "rewards/rejected": -12.041730880737305, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 0.8715195405169185, | |
| "grad_norm": 0.212890625, | |
| "learning_rate": 2.469026962036539e-07, | |
| "logits/chosen": -2.155325412750244, | |
| "logits/rejected": -2.346266984939575, | |
| "logps/chosen": -1.7188537120819092, | |
| "logps/rejected": -1198.50634765625, | |
| "loss": 0.2282, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21203342080116272, | |
| "rewards/margins": 11.423583984375, | |
| "rewards/rejected": -11.211549758911133, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 0.8740167311774254, | |
| "grad_norm": 0.033447265625, | |
| "learning_rate": 2.3754270472358786e-07, | |
| "logits/chosen": -2.1500706672668457, | |
| "logits/rejected": -2.346287965774536, | |
| "logps/chosen": -1.2322837114334106, | |
| "logps/rejected": -1203.8701171875, | |
| "loss": 0.2269, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.2134041041135788, | |
| "rewards/margins": 11.585506439208984, | |
| "rewards/rejected": -11.372100830078125, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.8765139218379323, | |
| "grad_norm": 0.0419921875, | |
| "learning_rate": 2.283547213282458e-07, | |
| "logits/chosen": -2.26165509223938, | |
| "logits/rejected": -2.4591403007507324, | |
| "logps/chosen": -1.2189921140670776, | |
| "logps/rejected": -1291.148193359375, | |
| "loss": 0.2255, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21497786045074463, | |
| "rewards/margins": 12.45503044128418, | |
| "rewards/rejected": -12.24005126953125, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 0.8790111124984392, | |
| "grad_norm": 0.0400390625, | |
| "learning_rate": 2.1933944455343166e-07, | |
| "logits/chosen": -1.9996531009674072, | |
| "logits/rejected": -2.232881784439087, | |
| "logps/chosen": -1.0685181617736816, | |
| "logps/rejected": -1328.8775634765625, | |
| "loss": 0.2251, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21257129311561584, | |
| "rewards/margins": 12.904253959655762, | |
| "rewards/rejected": -12.691683769226074, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 0.8815083031589462, | |
| "grad_norm": 0.01324462890625, | |
| "learning_rate": 2.104975598045647e-07, | |
| "logits/chosen": -2.1279807090759277, | |
| "logits/rejected": -2.3155367374420166, | |
| "logps/chosen": -0.7418814897537231, | |
| "logps/rejected": -1234.801025390625, | |
| "loss": 0.226, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21231353282928467, | |
| "rewards/margins": 12.064626693725586, | |
| "rewards/rejected": -11.852312088012695, | |
| "step": 3530 | |
| }, | |
| { | |
| "epoch": 0.8840054938194531, | |
| "grad_norm": 0.04248046875, | |
| "learning_rate": 2.018297393045701e-07, | |
| "logits/chosen": -2.169581651687622, | |
| "logits/rejected": -2.334414005279541, | |
| "logps/chosen": -1.1093792915344238, | |
| "logps/rejected": -1281.266357421875, | |
| "loss": 0.2262, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.209160715341568, | |
| "rewards/margins": 12.48926067352295, | |
| "rewards/rejected": -12.280099868774414, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 0.8865026844799601, | |
| "grad_norm": 0.0888671875, | |
| "learning_rate": 1.9333664204277236e-07, | |
| "logits/chosen": -2.0912182331085205, | |
| "logits/rejected": -2.292468309402466, | |
| "logps/chosen": -0.8641373515129089, | |
| "logps/rejected": -1473.767333984375, | |
| "loss": 0.2265, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21468326449394226, | |
| "rewards/margins": 14.221132278442383, | |
| "rewards/rejected": -14.006446838378906, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 0.888999875140467, | |
| "grad_norm": 0.017578125, | |
| "learning_rate": 1.8501891372479124e-07, | |
| "logits/chosen": -2.155086040496826, | |
| "logits/rejected": -2.3607256412506104, | |
| "logps/chosen": -1.0332701206207275, | |
| "logps/rejected": -1407.046875, | |
| "loss": 0.2253, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21279795467853546, | |
| "rewards/margins": 13.675623893737793, | |
| "rewards/rejected": -13.4628267288208, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 0.8914970658009739, | |
| "grad_norm": 0.10498046875, | |
| "learning_rate": 1.7687718672345533e-07, | |
| "logits/chosen": -2.1115050315856934, | |
| "logits/rejected": -2.295365810394287, | |
| "logps/chosen": -1.0766206979751587, | |
| "logps/rejected": -1537.2154541015625, | |
| "loss": 0.225, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21170708537101746, | |
| "rewards/margins": 15.0249662399292, | |
| "rewards/rejected": -14.813258171081543, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 0.8939942564614808, | |
| "grad_norm": 0.03515625, | |
| "learning_rate": 1.689120800307212e-07, | |
| "logits/chosen": -2.010655403137207, | |
| "logits/rejected": -2.2343146800994873, | |
| "logps/chosen": -0.6959076523780823, | |
| "logps/rejected": -1583.5645751953125, | |
| "loss": 0.2245, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.2187151461839676, | |
| "rewards/margins": 15.251518249511719, | |
| "rewards/rejected": -15.032801628112793, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 0.8964914471219878, | |
| "grad_norm": 0.041259765625, | |
| "learning_rate": 1.6112419921061357e-07, | |
| "logits/chosen": -2.149298906326294, | |
| "logits/rejected": -2.3335325717926025, | |
| "logps/chosen": -1.0091092586517334, | |
| "logps/rejected": -1295.9100341796875, | |
| "loss": 0.2272, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.2126895934343338, | |
| "rewards/margins": 12.669659614562988, | |
| "rewards/rejected": -12.456971168518066, | |
| "step": 3590 | |
| }, | |
| { | |
| "epoch": 0.8989886377824947, | |
| "grad_norm": 0.05615234375, | |
| "learning_rate": 1.5351413635318807e-07, | |
| "logits/chosen": -2.2476723194122314, | |
| "logits/rejected": -2.4481379985809326, | |
| "logps/chosen": -1.025138258934021, | |
| "logps/rejected": -1300.3070068359375, | |
| "loss": 0.2253, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21075649559497833, | |
| "rewards/margins": 12.60840892791748, | |
| "rewards/rejected": -12.397652626037598, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.9014858284430016, | |
| "grad_norm": 0.0279541015625, | |
| "learning_rate": 1.460824700295138e-07, | |
| "logits/chosen": -2.246796131134033, | |
| "logits/rejected": -2.438882350921631, | |
| "logps/chosen": -1.5276464223861694, | |
| "logps/rejected": -1376.2548828125, | |
| "loss": 0.2262, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21834063529968262, | |
| "rewards/margins": 13.44012451171875, | |
| "rewards/rejected": -13.221783638000488, | |
| "step": 3610 | |
| }, | |
| { | |
| "epoch": 0.9039830191035085, | |
| "grad_norm": 0.0157470703125, | |
| "learning_rate": 1.3882976524768694e-07, | |
| "logits/chosen": -2.2246479988098145, | |
| "logits/rejected": -2.397996425628662, | |
| "logps/chosen": -1.2670552730560303, | |
| "logps/rejected": -1179.010986328125, | |
| "loss": 0.2277, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21077945828437805, | |
| "rewards/margins": 11.500974655151367, | |
| "rewards/rejected": -11.290196418762207, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 0.9064802097640154, | |
| "grad_norm": 0.044677734375, | |
| "learning_rate": 1.3175657340987664e-07, | |
| "logits/chosen": -2.1487388610839844, | |
| "logits/rejected": -2.334177255630493, | |
| "logps/chosen": -0.5317996740341187, | |
| "logps/rejected": -1380.195556640625, | |
| "loss": 0.2264, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.2111264169216156, | |
| "rewards/margins": 13.50303840637207, | |
| "rewards/rejected": -13.291911125183105, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 0.9089774004245225, | |
| "grad_norm": 0.052978515625, | |
| "learning_rate": 1.2486343227040122e-07, | |
| "logits/chosen": -2.2575807571411133, | |
| "logits/rejected": -2.471717357635498, | |
| "logps/chosen": -1.4988012313842773, | |
| "logps/rejected": -1318.032470703125, | |
| "loss": 0.226, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.22440342605113983, | |
| "rewards/margins": 12.804231643676758, | |
| "rewards/rejected": -12.579828262329102, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 0.9114745910850294, | |
| "grad_norm": 0.0654296875, | |
| "learning_rate": 1.181508658948452e-07, | |
| "logits/chosen": -2.189079999923706, | |
| "logits/rejected": -2.372708559036255, | |
| "logps/chosen": -0.8293665051460266, | |
| "logps/rejected": -1286.6043701171875, | |
| "loss": 0.2268, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21110188961029053, | |
| "rewards/margins": 12.531554222106934, | |
| "rewards/rejected": -12.320451736450195, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 0.9139717817455363, | |
| "grad_norm": 0.018798828125, | |
| "learning_rate": 1.1161938462021627e-07, | |
| "logits/chosen": -2.082040309906006, | |
| "logits/rejected": -2.2717125415802, | |
| "logps/chosen": -1.0598349571228027, | |
| "logps/rejected": -1245.3990478515625, | |
| "loss": 0.228, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21748106181621552, | |
| "rewards/margins": 12.090206146240234, | |
| "rewards/rejected": -11.872724533081055, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 0.9164689724060432, | |
| "grad_norm": 0.06982421875, | |
| "learning_rate": 1.0526948501614536e-07, | |
| "logits/chosen": -2.103464126586914, | |
| "logits/rejected": -2.3152968883514404, | |
| "logps/chosen": -1.075402021408081, | |
| "logps/rejected": -1461.2308349609375, | |
| "loss": 0.2268, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21564142405986786, | |
| "rewards/margins": 14.216550827026367, | |
| "rewards/rejected": -14.000910758972168, | |
| "step": 3670 | |
| }, | |
| { | |
| "epoch": 0.9189661630665501, | |
| "grad_norm": 0.06982421875, | |
| "learning_rate": 9.910164984713477e-08, | |
| "logits/chosen": -2.1121301651000977, | |
| "logits/rejected": -2.327693223953247, | |
| "logps/chosen": -1.3442718982696533, | |
| "logps/rejected": -1471.4466552734375, | |
| "loss": 0.2261, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21234098076820374, | |
| "rewards/margins": 14.340484619140625, | |
| "rewards/rejected": -14.128143310546875, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 0.921463353727057, | |
| "grad_norm": 0.06005859375, | |
| "learning_rate": 9.311634803585323e-08, | |
| "logits/chosen": -2.1561217308044434, | |
| "logits/rejected": -2.3662197589874268, | |
| "logps/chosen": -0.8007721900939941, | |
| "logps/rejected": -1469.8878173828125, | |
| "loss": 0.2286, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.2085859775543213, | |
| "rewards/margins": 14.383298873901367, | |
| "rewards/rejected": -14.174713134765625, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 0.923960544387564, | |
| "grad_norm": 0.068359375, | |
| "learning_rate": 8.7314034627487e-08, | |
| "logits/chosen": -2.203339099884033, | |
| "logits/rejected": -2.40441632270813, | |
| "logps/chosen": -0.5535265207290649, | |
| "logps/rejected": -1442.5123291015625, | |
| "loss": 0.2263, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.20969831943511963, | |
| "rewards/margins": 14.11566162109375, | |
| "rewards/rejected": -13.905962944030762, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.9264577350480709, | |
| "grad_norm": 0.04833984375, | |
| "learning_rate": 8.16951507551439e-08, | |
| "logits/chosen": -2.2100465297698975, | |
| "logits/rejected": -2.394742250442505, | |
| "logps/chosen": -1.0725539922714233, | |
| "logps/rejected": -1294.114990234375, | |
| "loss": 0.2255, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21020770072937012, | |
| "rewards/margins": 12.512723922729492, | |
| "rewards/rejected": -12.302515029907227, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 0.9289549257085778, | |
| "grad_norm": 0.01544189453125, | |
| "learning_rate": 7.626012360631291e-08, | |
| "logits/chosen": -2.2372231483459473, | |
| "logits/rejected": -2.4310178756713867, | |
| "logps/chosen": -1.1079142093658447, | |
| "logps/rejected": -1298.0045166015625, | |
| "loss": 0.2261, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21124926209449768, | |
| "rewards/margins": 12.63310718536377, | |
| "rewards/rejected": -12.421857833862305, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 0.9314521163690848, | |
| "grad_norm": 0.03466796875, | |
| "learning_rate": 7.100936639038936e-08, | |
| "logits/chosen": -2.0344414710998535, | |
| "logits/rejected": -2.2667644023895264, | |
| "logps/chosen": -1.0317838191986084, | |
| "logps/rejected": -1655.9033203125, | |
| "loss": 0.224, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.2157924920320511, | |
| "rewards/margins": 16.17062759399414, | |
| "rewards/rejected": -15.954833984375, | |
| "step": 3730 | |
| }, | |
| { | |
| "epoch": 0.9339493070295917, | |
| "grad_norm": 0.00531005859375, | |
| "learning_rate": 6.594327830725916e-08, | |
| "logits/chosen": -2.162308931350708, | |
| "logits/rejected": -2.371338367462158, | |
| "logps/chosen": -0.7821828722953796, | |
| "logps/rejected": -1442.607177734375, | |
| "loss": 0.2251, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21128495037555695, | |
| "rewards/margins": 14.142976760864258, | |
| "rewards/rejected": -13.931692123413086, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 0.9364464976900987, | |
| "grad_norm": 0.043701171875, | |
| "learning_rate": 6.106224451694592e-08, | |
| "logits/chosen": -2.1930558681488037, | |
| "logits/rejected": -2.386634111404419, | |
| "logps/chosen": -0.6907114386558533, | |
| "logps/rejected": -1420.8463134765625, | |
| "loss": 0.225, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21480241417884827, | |
| "rewards/margins": 13.8462495803833, | |
| "rewards/rejected": -13.631448745727539, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 0.9389436883506056, | |
| "grad_norm": 0.04736328125, | |
| "learning_rate": 5.636663611033266e-08, | |
| "logits/chosen": -2.058790683746338, | |
| "logits/rejected": -2.274880886077881, | |
| "logps/chosen": -0.41397613286972046, | |
| "logps/rejected": -1431.2852783203125, | |
| "loss": 0.2266, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.20900818705558777, | |
| "rewards/margins": 14.00958251953125, | |
| "rewards/rejected": -13.800572395324707, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 0.9414408790111125, | |
| "grad_norm": 0.019287109375, | |
| "learning_rate": 5.185681008094579e-08, | |
| "logits/chosen": -2.251438617706299, | |
| "logits/rejected": -2.4458415508270264, | |
| "logps/chosen": -1.0221302509307861, | |
| "logps/rejected": -1385.7362060546875, | |
| "loss": 0.2249, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21662044525146484, | |
| "rewards/margins": 13.469772338867188, | |
| "rewards/rejected": -13.253152847290039, | |
| "step": 3770 | |
| }, | |
| { | |
| "epoch": 0.9439380696716194, | |
| "grad_norm": 0.03857421875, | |
| "learning_rate": 4.753310929781513e-08, | |
| "logits/chosen": -2.206300973892212, | |
| "logits/rejected": -2.3716189861297607, | |
| "logps/chosen": -0.6498397588729858, | |
| "logps/rejected": -1291.197021484375, | |
| "loss": 0.2258, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21270425617694855, | |
| "rewards/margins": 12.625164985656738, | |
| "rewards/rejected": -12.412460327148438, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 0.9464352603321263, | |
| "grad_norm": 0.033447265625, | |
| "learning_rate": 4.3395862479405914e-08, | |
| "logits/chosen": -2.1362087726593018, | |
| "logits/rejected": -2.332123041152954, | |
| "logps/chosen": -1.0763086080551147, | |
| "logps/rejected": -1387.7713623046875, | |
| "loss": 0.2244, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.2146589756011963, | |
| "rewards/margins": 13.39326286315918, | |
| "rewards/rejected": -13.178604125976562, | |
| "step": 3790 | |
| }, | |
| { | |
| "epoch": 0.9489324509926332, | |
| "grad_norm": 0.0155029296875, | |
| "learning_rate": 3.9445384168628474e-08, | |
| "logits/chosen": -2.291581869125366, | |
| "logits/rejected": -2.500275135040283, | |
| "logps/chosen": -1.0031490325927734, | |
| "logps/rejected": -1276.675537109375, | |
| "loss": 0.226, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.20909054577350616, | |
| "rewards/margins": 12.3539457321167, | |
| "rewards/rejected": -12.144854545593262, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.9514296416531403, | |
| "grad_norm": 0.0284423828125, | |
| "learning_rate": 3.5681974708923484e-08, | |
| "logits/chosen": -2.1034350395202637, | |
| "logits/rejected": -2.2940239906311035, | |
| "logps/chosen": -0.8783596158027649, | |
| "logps/rejected": -1220.660888671875, | |
| "loss": 0.2256, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21768565475940704, | |
| "rewards/margins": 11.806703567504883, | |
| "rewards/rejected": -11.589017868041992, | |
| "step": 3810 | |
| }, | |
| { | |
| "epoch": 0.9539268323136472, | |
| "grad_norm": 0.047119140625, | |
| "learning_rate": 3.210592022142717e-08, | |
| "logits/chosen": -2.1330649852752686, | |
| "logits/rejected": -2.2985074520111084, | |
| "logps/chosen": -0.7123221158981323, | |
| "logps/rejected": -1336.329833984375, | |
| "loss": 0.2258, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.20608916878700256, | |
| "rewards/margins": 12.982263565063477, | |
| "rewards/rejected": -12.776172637939453, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 0.9564240229741541, | |
| "grad_norm": 0.021484375, | |
| "learning_rate": 2.8717492583220095e-08, | |
| "logits/chosen": -2.225675106048584, | |
| "logits/rejected": -2.428712844848633, | |
| "logps/chosen": -0.8774779438972473, | |
| "logps/rejected": -1398.2039794921875, | |
| "loss": 0.2253, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.20993375778198242, | |
| "rewards/margins": 13.676666259765625, | |
| "rewards/rejected": -13.4667329788208, | |
| "step": 3830 | |
| }, | |
| { | |
| "epoch": 0.958921213634661, | |
| "grad_norm": 0.03271484375, | |
| "learning_rate": 2.551694940665539e-08, | |
| "logits/chosen": -2.163163423538208, | |
| "logits/rejected": -2.351386070251465, | |
| "logps/chosen": -0.9975617527961731, | |
| "logps/rejected": -1255.6383056640625, | |
| "loss": 0.2265, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21191437542438507, | |
| "rewards/margins": 12.254827499389648, | |
| "rewards/rejected": -12.042913436889648, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 0.9614184042951679, | |
| "grad_norm": 0.055908203125, | |
| "learning_rate": 2.2504534019774092e-08, | |
| "logits/chosen": -2.3171262741088867, | |
| "logits/rejected": -2.492202043533325, | |
| "logps/chosen": -0.872540295124054, | |
| "logps/rejected": -1181.1051025390625, | |
| "loss": 0.2264, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21220049262046814, | |
| "rewards/margins": 11.461995124816895, | |
| "rewards/rejected": -11.24979305267334, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 0.9639155949556749, | |
| "grad_norm": 0.032958984375, | |
| "learning_rate": 1.9680475447805826e-08, | |
| "logits/chosen": -2.1993744373321533, | |
| "logits/rejected": -2.380159378051758, | |
| "logps/chosen": -0.721504807472229, | |
| "logps/rejected": -1297.561767578125, | |
| "loss": 0.2275, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.20789849758148193, | |
| "rewards/margins": 12.683464050292969, | |
| "rewards/rejected": -12.475565910339355, | |
| "step": 3860 | |
| }, | |
| { | |
| "epoch": 0.9664127856161818, | |
| "grad_norm": 0.0201416015625, | |
| "learning_rate": 1.70449883957563e-08, | |
| "logits/chosen": -2.232905626296997, | |
| "logits/rejected": -2.4287447929382324, | |
| "logps/chosen": -2.1762092113494873, | |
| "logps/rejected": -1314.664794921875, | |
| "loss": 0.2253, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21345773339271545, | |
| "rewards/margins": 12.700533866882324, | |
| "rewards/rejected": -12.487077713012695, | |
| "step": 3870 | |
| }, | |
| { | |
| "epoch": 0.9689099762766887, | |
| "grad_norm": 0.072265625, | |
| "learning_rate": 1.4598273232083182e-08, | |
| "logits/chosen": -2.198019027709961, | |
| "logits/rejected": -2.3671329021453857, | |
| "logps/chosen": -0.9621660113334656, | |
| "logps/rejected": -1280.2039794921875, | |
| "loss": 0.2287, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.20697855949401855, | |
| "rewards/margins": 12.497517585754395, | |
| "rewards/rejected": -12.29054069519043, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 0.9714071669371956, | |
| "grad_norm": 0.0478515625, | |
| "learning_rate": 1.2340515973464917e-08, | |
| "logits/chosen": -2.1526544094085693, | |
| "logits/rejected": -2.3664348125457764, | |
| "logps/chosen": -1.546007752418518, | |
| "logps/rejected": -1401.54638671875, | |
| "loss": 0.2249, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.20804882049560547, | |
| "rewards/margins": 13.68072509765625, | |
| "rewards/rejected": -13.472674369812012, | |
| "step": 3890 | |
| }, | |
| { | |
| "epoch": 0.9739043575977026, | |
| "grad_norm": 0.016357421875, | |
| "learning_rate": 1.0271888270655118e-08, | |
| "logits/chosen": -2.043034076690674, | |
| "logits/rejected": -2.229666233062744, | |
| "logps/chosen": -0.9901046752929688, | |
| "logps/rejected": -1281.4815673828125, | |
| "loss": 0.2265, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.2106485813856125, | |
| "rewards/margins": 12.358712196350098, | |
| "rewards/rejected": -12.148063659667969, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.9764015482582095, | |
| "grad_norm": 0.07666015625, | |
| "learning_rate": 8.392547395435769e-09, | |
| "logits/chosen": -2.374267101287842, | |
| "logits/rejected": -2.551339626312256, | |
| "logps/chosen": -1.2009716033935547, | |
| "logps/rejected": -1176.1605224609375, | |
| "loss": 0.2263, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.20642951130867004, | |
| "rewards/margins": 11.431352615356445, | |
| "rewards/rejected": -11.224924087524414, | |
| "step": 3910 | |
| }, | |
| { | |
| "epoch": 0.9788987389187165, | |
| "grad_norm": 0.0732421875, | |
| "learning_rate": 6.702636228657911e-09, | |
| "logits/chosen": -2.262585163116455, | |
| "logits/rejected": -2.4511070251464844, | |
| "logps/chosen": -0.7528651356697083, | |
| "logps/rejected": -1265.910400390625, | |
| "loss": 0.2256, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21303972601890564, | |
| "rewards/margins": 12.352733612060547, | |
| "rewards/rejected": -12.139693260192871, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 0.9813959295792234, | |
| "grad_norm": 0.11572265625, | |
| "learning_rate": 5.2022832493800465e-09, | |
| "logits/chosen": -2.3309874534606934, | |
| "logits/rejected": -2.5094618797302246, | |
| "logps/chosen": -0.8482611775398254, | |
| "logps/rejected": -1145.723876953125, | |
| "loss": 0.225, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.2170940339565277, | |
| "rewards/margins": 11.18010139465332, | |
| "rewards/rejected": -10.963006973266602, | |
| "step": 3930 | |
| }, | |
| { | |
| "epoch": 0.9838931202397303, | |
| "grad_norm": 0.0174560546875, | |
| "learning_rate": 3.891602525100124e-09, | |
| "logits/chosen": -2.202822208404541, | |
| "logits/rejected": -2.4167404174804688, | |
| "logps/chosen": -0.8022462725639343, | |
| "logps/rejected": -1359.097412109375, | |
| "loss": 0.2251, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21217508614063263, | |
| "rewards/margins": 13.16607666015625, | |
| "rewards/rejected": -12.953901290893555, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 0.9863903109002372, | |
| "grad_norm": 0.0947265625, | |
| "learning_rate": 2.7706937030827495e-09, | |
| "logits/chosen": -2.245856285095215, | |
| "logits/rejected": -2.436892032623291, | |
| "logps/chosen": -1.236242651939392, | |
| "logps/rejected": -1134.9066162109375, | |
| "loss": 0.226, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.20883974432945251, | |
| "rewards/margins": 10.997222900390625, | |
| "rewards/rejected": -10.788381576538086, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 0.9888875015607441, | |
| "grad_norm": 0.07568359375, | |
| "learning_rate": 1.839642002783859e-09, | |
| "logits/chosen": -2.1721549034118652, | |
| "logits/rejected": -2.3608601093292236, | |
| "logps/chosen": -0.9914839863777161, | |
| "logps/rejected": -1147.5926513671875, | |
| "loss": 0.2273, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21253648400306702, | |
| "rewards/margins": 11.109753608703613, | |
| "rewards/rejected": -10.897214889526367, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 0.9913846922212511, | |
| "grad_norm": 0.0308837890625, | |
| "learning_rate": 1.0985182093714574e-09, | |
| "logits/chosen": -2.2215476036071777, | |
| "logits/rejected": -2.3835880756378174, | |
| "logps/chosen": -0.42377692461013794, | |
| "logps/rejected": -1237.712646484375, | |
| "loss": 0.2256, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.20775683224201202, | |
| "rewards/margins": 12.072611808776855, | |
| "rewards/rejected": -11.86485481262207, | |
| "step": 3970 | |
| }, | |
| { | |
| "epoch": 0.993881882881758, | |
| "grad_norm": 0.1875, | |
| "learning_rate": 5.473786683440896e-10, | |
| "logits/chosen": -2.119377613067627, | |
| "logits/rejected": -2.3185195922851562, | |
| "logps/chosen": -1.0564239025115967, | |
| "logps/rejected": -1471.339111328125, | |
| "loss": 0.2259, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.21374066174030304, | |
| "rewards/margins": 14.393136978149414, | |
| "rewards/rejected": -14.17939567565918, | |
| "step": 3980 | |
| }, | |
| { | |
| "epoch": 0.996379073542265, | |
| "grad_norm": 0.0311279296875, | |
| "learning_rate": 1.862652812467669e-10, | |
| "logits/chosen": -2.1754400730133057, | |
| "logits/rejected": -2.3970232009887695, | |
| "logps/chosen": -1.259765863418579, | |
| "logps/rejected": -1448.65576171875, | |
| "loss": 0.2242, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.2176629602909088, | |
| "rewards/margins": 13.863238334655762, | |
| "rewards/rejected": -13.645576477050781, | |
| "step": 3990 | |
| }, | |
| { | |
| "epoch": 0.9988762642027719, | |
| "grad_norm": 0.035400390625, | |
| "learning_rate": 1.5205502486292932e-11, | |
| "logits/chosen": -2.143209934234619, | |
| "logits/rejected": -2.34411883354187, | |
| "logps/chosen": -0.6734473705291748, | |
| "logps/rejected": -1441.0018310546875, | |
| "loss": 0.2262, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.20581206679344177, | |
| "rewards/margins": 14.11164379119873, | |
| "rewards/rejected": -13.905832290649414, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.9988762642027719, | |
| "eval_logits/chosen": -2.571059465408325, | |
| "eval_logits/rejected": -2.6589972972869873, | |
| "eval_logps/chosen": -0.11967950314283371, | |
| "eval_logps/rejected": -652.1184692382812, | |
| "eval_loss": 0.22132086753845215, | |
| "eval_rewards/accuracies": 1.0, | |
| "eval_rewards/chosen": 0.2579382359981537, | |
| "eval_rewards/margins": 6.330402374267578, | |
| "eval_rewards/rejected": -6.072464466094971, | |
| "eval_runtime": 0.656, | |
| "eval_samples_per_second": 7.622, | |
| "eval_steps_per_second": 4.573, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.9998751404669747, | |
| "step": 4004, | |
| "total_flos": 0.0, | |
| "train_loss": 0.2426841035559699, | |
| "train_runtime": 8271.4989, | |
| "train_samples_per_second": 1.936, | |
| "train_steps_per_second": 0.484 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 4004, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 1000, | |
| "total_flos": 0.0, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |