| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 500, | |
| "global_step": 2000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 3.3333333333333334e-09, | |
| "logits/chosen": -1.9057868719100952, | |
| "logits/rejected": -1.3045780658721924, | |
| "logps/chosen": -0.3248765170574188, | |
| "logps/rejected": -209.01206970214844, | |
| "loss": 0.6931, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 3.3333333333333334e-08, | |
| "logits/chosen": -0.6534053087234497, | |
| "logits/rejected": -0.6231400370597839, | |
| "logps/chosen": -1.4383400678634644, | |
| "logps/rejected": -9.363475799560547, | |
| "loss": 0.6957, | |
| "rewards/accuracies": 0.2222222238779068, | |
| "rewards/chosen": 0.0005377347115427256, | |
| "rewards/margins": -0.004949397407472134, | |
| "rewards/rejected": 0.005487131420522928, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 6.666666666666667e-08, | |
| "logits/chosen": -0.7646675705909729, | |
| "logits/rejected": -0.6648472547531128, | |
| "logps/chosen": -1.4389005899429321, | |
| "logps/rejected": -24.582592010498047, | |
| "loss": 0.6987, | |
| "rewards/accuracies": 0.4000000059604645, | |
| "rewards/chosen": -0.0026078899390995502, | |
| "rewards/margins": -0.010636803694069386, | |
| "rewards/rejected": 0.008028914220631123, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 1e-07, | |
| "logits/chosen": -1.079425573348999, | |
| "logits/rejected": -0.8333357572555542, | |
| "logps/chosen": -1.2763736248016357, | |
| "logps/rejected": -20.987844467163086, | |
| "loss": 0.6927, | |
| "rewards/accuracies": 0.4000000059604645, | |
| "rewards/chosen": -0.004916798323392868, | |
| "rewards/margins": 0.0008920803666114807, | |
| "rewards/rejected": -0.005808879155665636, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 1.3333333333333334e-07, | |
| "logits/chosen": -0.7393895983695984, | |
| "logits/rejected": -0.7093493342399597, | |
| "logps/chosen": -1.157975196838379, | |
| "logps/rejected": -20.361637115478516, | |
| "loss": 0.6921, | |
| "rewards/accuracies": 0.30000001192092896, | |
| "rewards/chosen": -0.006825856864452362, | |
| "rewards/margins": 0.002141424920409918, | |
| "rewards/rejected": -0.008967281319200993, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 1.6666666666666665e-07, | |
| "logits/chosen": -0.7778738141059875, | |
| "logits/rejected": -0.6960525512695312, | |
| "logps/chosen": -1.0992166996002197, | |
| "logps/rejected": -8.134437561035156, | |
| "loss": 0.6907, | |
| "rewards/accuracies": 0.4000000059604645, | |
| "rewards/chosen": -0.007767821196466684, | |
| "rewards/margins": 0.005042984150350094, | |
| "rewards/rejected": -0.012810803949832916, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 2e-07, | |
| "logits/chosen": -1.124731421470642, | |
| "logits/rejected": -1.046034574508667, | |
| "logps/chosen": -1.1876769065856934, | |
| "logps/rejected": -18.648529052734375, | |
| "loss": 0.6264, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.01165817491710186, | |
| "rewards/margins": 0.294198215007782, | |
| "rewards/rejected": -0.3058564066886902, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 2.3333333333333333e-07, | |
| "logits/chosen": -0.9541412591934204, | |
| "logits/rejected": -0.9094465374946594, | |
| "logps/chosen": -1.2173497676849365, | |
| "logps/rejected": -9.718514442443848, | |
| "loss": 0.68, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.014510683715343475, | |
| "rewards/margins": 0.02750353142619133, | |
| "rewards/rejected": -0.042014218866825104, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 2.6666666666666667e-07, | |
| "logits/chosen": -0.8979532122612, | |
| "logits/rejected": -0.771776556968689, | |
| "logps/chosen": -1.3160572052001953, | |
| "logps/rejected": -39.639564514160156, | |
| "loss": 0.6714, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.001878797309473157, | |
| "rewards/margins": 0.04782485216856003, | |
| "rewards/rejected": -0.04970364645123482, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 3e-07, | |
| "logits/chosen": -0.8290653228759766, | |
| "logits/rejected": -0.7559410333633423, | |
| "logps/chosen": -1.5210120677947998, | |
| "logps/rejected": -10.581136703491211, | |
| "loss": 0.6876, | |
| "rewards/accuracies": 0.30000001192092896, | |
| "rewards/chosen": -0.004562065005302429, | |
| "rewards/margins": 0.01130376011133194, | |
| "rewards/rejected": -0.01586582511663437, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 3.333333333333333e-07, | |
| "logits/chosen": -0.8234152793884277, | |
| "logits/rejected": -0.597020149230957, | |
| "logps/chosen": -1.6208550930023193, | |
| "logps/rejected": -23.56104278564453, | |
| "loss": 0.6735, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": 0.006368436850607395, | |
| "rewards/margins": 0.04360882192850113, | |
| "rewards/rejected": -0.03724038228392601, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 3.666666666666666e-07, | |
| "logits/chosen": -0.7968894243240356, | |
| "logits/rejected": -0.6932646036148071, | |
| "logps/chosen": -1.2827363014221191, | |
| "logps/rejected": -8.872964859008789, | |
| "loss": 0.6971, | |
| "rewards/accuracies": 0.20000000298023224, | |
| "rewards/chosen": -0.010955500416457653, | |
| "rewards/margins": -0.007888413965702057, | |
| "rewards/rejected": -0.003067085286602378, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4e-07, | |
| "logits/chosen": -0.7554408311843872, | |
| "logits/rejected": -0.7240918278694153, | |
| "logps/chosen": -2.111884355545044, | |
| "logps/rejected": -13.839550971984863, | |
| "loss": 0.6801, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": 0.0014948326861485839, | |
| "rewards/margins": 0.028023576363921165, | |
| "rewards/rejected": -0.026528745889663696, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.3333333333333335e-07, | |
| "logits/chosen": -0.785883903503418, | |
| "logits/rejected": -0.7439943552017212, | |
| "logps/chosen": -0.9020355343818665, | |
| "logps/rejected": -6.26843786239624, | |
| "loss": 0.6953, | |
| "rewards/accuracies": 0.30000001192092896, | |
| "rewards/chosen": -0.012480726465582848, | |
| "rewards/margins": -0.00418872619047761, | |
| "rewards/rejected": -0.00829199980944395, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.6666666666666666e-07, | |
| "logits/chosen": -0.7403808832168579, | |
| "logits/rejected": -0.7405279874801636, | |
| "logps/chosen": -0.5109063386917114, | |
| "logps/rejected": -0.5015324354171753, | |
| "loss": 0.6947, | |
| "rewards/accuracies": 0.30000001192092896, | |
| "rewards/chosen": 0.009772956371307373, | |
| "rewards/margins": -0.0031128614209592342, | |
| "rewards/rejected": 0.012885818257927895, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 5e-07, | |
| "logits/chosen": -0.9348798990249634, | |
| "logits/rejected": -0.7525736689567566, | |
| "logps/chosen": -1.8914287090301514, | |
| "logps/rejected": -51.92854690551758, | |
| "loss": 0.6319, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": 0.06059448793530464, | |
| "rewards/margins": 0.13546349108219147, | |
| "rewards/rejected": -0.07486900687217712, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.972972972972973e-07, | |
| "logits/chosen": -1.309621810913086, | |
| "logits/rejected": -0.8352526426315308, | |
| "logps/chosen": -1.6440341472625732, | |
| "logps/rejected": -77.83134460449219, | |
| "loss": 0.4626, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": 0.07928337156772614, | |
| "rewards/margins": 0.8833033442497253, | |
| "rewards/rejected": -0.8040200471878052, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.945945945945945e-07, | |
| "logits/chosen": -0.8511091470718384, | |
| "logits/rejected": -0.8510860204696655, | |
| "logps/chosen": -1.1958461999893188, | |
| "logps/rejected": -1.1916860342025757, | |
| "loss": 0.6933, | |
| "rewards/accuracies": 0.4000000059604645, | |
| "rewards/chosen": 0.06808782368898392, | |
| "rewards/margins": -0.00024626366212032735, | |
| "rewards/rejected": 0.06833408772945404, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.918918918918919e-07, | |
| "logits/chosen": -0.5700015425682068, | |
| "logits/rejected": -0.5157918930053711, | |
| "logps/chosen": -0.31525278091430664, | |
| "logps/rejected": -9.227466583251953, | |
| "loss": 0.6653, | |
| "rewards/accuracies": 0.30000001192092896, | |
| "rewards/chosen": 0.042024269700050354, | |
| "rewards/margins": 0.06360773742198944, | |
| "rewards/rejected": -0.021583477035164833, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.891891891891891e-07, | |
| "logits/chosen": -0.6965602040290833, | |
| "logits/rejected": -0.639516294002533, | |
| "logps/chosen": -0.856187641620636, | |
| "logps/rejected": -11.494100570678711, | |
| "loss": 0.6298, | |
| "rewards/accuracies": 0.30000001192092896, | |
| "rewards/chosen": 0.06196604296565056, | |
| "rewards/margins": 0.2772708833217621, | |
| "rewards/rejected": -0.21530480682849884, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.864864864864865e-07, | |
| "logits/chosen": -0.6798317432403564, | |
| "logits/rejected": -0.5887733101844788, | |
| "logps/chosen": -0.7568815350532532, | |
| "logps/rejected": -19.734209060668945, | |
| "loss": 0.6161, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.07841428369283676, | |
| "rewards/margins": 0.2028009593486786, | |
| "rewards/rejected": -0.12438668310642242, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.837837837837838e-07, | |
| "logits/chosen": -0.5506168007850647, | |
| "logits/rejected": -0.5255534052848816, | |
| "logps/chosen": -0.594875693321228, | |
| "logps/rejected": -10.898618698120117, | |
| "loss": 0.616, | |
| "rewards/accuracies": 0.4000000059604645, | |
| "rewards/chosen": 0.08072157204151154, | |
| "rewards/margins": 0.20680299401283264, | |
| "rewards/rejected": -0.12608139216899872, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.810810810810811e-07, | |
| "logits/chosen": -0.6420431137084961, | |
| "logits/rejected": -0.4758230149745941, | |
| "logps/chosen": -0.9624983072280884, | |
| "logps/rejected": -15.536343574523926, | |
| "loss": 0.6356, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.15020258724689484, | |
| "rewards/margins": 0.19687768816947937, | |
| "rewards/rejected": -0.04667510837316513, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.783783783783784e-07, | |
| "logits/chosen": -0.47034144401550293, | |
| "logits/rejected": -0.4152582287788391, | |
| "logps/chosen": -0.6653767824172974, | |
| "logps/rejected": -26.184194564819336, | |
| "loss": 0.591, | |
| "rewards/accuracies": 0.4000000059604645, | |
| "rewards/chosen": 0.08964937925338745, | |
| "rewards/margins": 0.5100451707839966, | |
| "rewards/rejected": -0.42039579153060913, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.7567567567567566e-07, | |
| "logits/chosen": -0.5280557870864868, | |
| "logits/rejected": -0.5208911299705505, | |
| "logps/chosen": -1.561173915863037, | |
| "logps/rejected": -4.852164268493652, | |
| "loss": 0.6755, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.08790391683578491, | |
| "rewards/margins": 0.03883642703294754, | |
| "rewards/rejected": 0.04906748980283737, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.7297297297297294e-07, | |
| "logits/chosen": -0.6650221347808838, | |
| "logits/rejected": -0.4887104630470276, | |
| "logps/chosen": -0.8397138714790344, | |
| "logps/rejected": -24.225984573364258, | |
| "loss": 0.581, | |
| "rewards/accuracies": 0.4000000059604645, | |
| "rewards/chosen": 0.036707792431116104, | |
| "rewards/margins": 0.3958829939365387, | |
| "rewards/rejected": -0.3591752350330353, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.702702702702703e-07, | |
| "logits/chosen": -0.6431881785392761, | |
| "logits/rejected": -0.5451701879501343, | |
| "logps/chosen": -0.8626500368118286, | |
| "logps/rejected": -18.065229415893555, | |
| "loss": 0.6263, | |
| "rewards/accuracies": 0.20000000298023224, | |
| "rewards/chosen": 0.05334913730621338, | |
| "rewards/margins": 0.4245104193687439, | |
| "rewards/rejected": -0.3711613118648529, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.6756756756756757e-07, | |
| "logits/chosen": -0.9750626683235168, | |
| "logits/rejected": -0.6229163408279419, | |
| "logps/chosen": -1.2636909484863281, | |
| "logps/rejected": -39.48912811279297, | |
| "loss": 0.5592, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": 0.07849131524562836, | |
| "rewards/margins": 0.8403557538986206, | |
| "rewards/rejected": -0.7618645429611206, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.6486486486486485e-07, | |
| "logits/chosen": -0.6725428700447083, | |
| "logits/rejected": -0.5742911100387573, | |
| "logps/chosen": -0.5612740516662598, | |
| "logps/rejected": -11.10429573059082, | |
| "loss": 0.6343, | |
| "rewards/accuracies": 0.30000001192092896, | |
| "rewards/chosen": 0.05452188849449158, | |
| "rewards/margins": 0.2184288203716278, | |
| "rewards/rejected": -0.16390694677829742, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.6216216216216214e-07, | |
| "logits/chosen": -0.5694271922111511, | |
| "logits/rejected": -0.5494506359100342, | |
| "logps/chosen": -0.36877548694610596, | |
| "logps/rejected": -2.7846176624298096, | |
| "loss": 0.6649, | |
| "rewards/accuracies": 0.4000000059604645, | |
| "rewards/chosen": 0.057922400534152985, | |
| "rewards/margins": 0.06819172948598862, | |
| "rewards/rejected": -0.010269328020513058, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.594594594594595e-07, | |
| "logits/chosen": -0.43670493364334106, | |
| "logits/rejected": -0.4366675019264221, | |
| "logps/chosen": -0.8144774436950684, | |
| "logps/rejected": -0.8205119967460632, | |
| "loss": 0.6935, | |
| "rewards/accuracies": 0.30000001192092896, | |
| "rewards/chosen": 0.049146492034196854, | |
| "rewards/margins": -0.0007481955690309405, | |
| "rewards/rejected": 0.04989469051361084, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.567567567567567e-07, | |
| "logits/chosen": -1.080672264099121, | |
| "logits/rejected": -0.5540364980697632, | |
| "logps/chosen": -1.4554121494293213, | |
| "logps/rejected": -44.3469352722168, | |
| "loss": 0.5069, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": 0.10377122461795807, | |
| "rewards/margins": 1.340423583984375, | |
| "rewards/rejected": -1.2366522550582886, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.5405405405405405e-07, | |
| "logits/chosen": -0.7922333478927612, | |
| "logits/rejected": -0.4999925494194031, | |
| "logps/chosen": -1.0405428409576416, | |
| "logps/rejected": -32.01726531982422, | |
| "loss": 0.5538, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.07706712186336517, | |
| "rewards/margins": 1.2412185668945312, | |
| "rewards/rejected": -1.1641514301300049, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.5135135135135134e-07, | |
| "logits/chosen": -0.5424150228500366, | |
| "logits/rejected": -0.5425628423690796, | |
| "logps/chosen": -1.4353896379470825, | |
| "logps/rejected": -1.433706521987915, | |
| "loss": 0.6926, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": 0.0135975182056427, | |
| "rewards/margins": 0.0010272532235831022, | |
| "rewards/rejected": 0.012570266611874104, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.486486486486487e-07, | |
| "logits/chosen": -0.6280439496040344, | |
| "logits/rejected": -0.43740400671958923, | |
| "logps/chosen": -1.5716569423675537, | |
| "logps/rejected": -42.92607116699219, | |
| "loss": 0.5532, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": 0.10536029189825058, | |
| "rewards/margins": 1.679062843322754, | |
| "rewards/rejected": -1.5737024545669556, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.459459459459459e-07, | |
| "logits/chosen": -0.6212655305862427, | |
| "logits/rejected": -0.5467087030410767, | |
| "logps/chosen": -1.4269088506698608, | |
| "logps/rejected": -15.58141040802002, | |
| "loss": 0.5713, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": 0.02300642430782318, | |
| "rewards/margins": 0.6028021574020386, | |
| "rewards/rejected": -0.5797957181930542, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.4324324324324325e-07, | |
| "logits/chosen": -0.5765206217765808, | |
| "logits/rejected": -0.5639607310295105, | |
| "logps/chosen": -1.3736517429351807, | |
| "logps/rejected": -7.530215263366699, | |
| "loss": 0.6255, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.014159053564071655, | |
| "rewards/margins": 0.3620375394821167, | |
| "rewards/rejected": -0.37619656324386597, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.4054054054054053e-07, | |
| "logits/chosen": -0.774685263633728, | |
| "logits/rejected": -0.48308247327804565, | |
| "logps/chosen": -0.7153536677360535, | |
| "logps/rejected": -31.57724952697754, | |
| "loss": 0.5592, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": 0.09816861152648926, | |
| "rewards/margins": 1.5147247314453125, | |
| "rewards/rejected": -1.4165561199188232, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.378378378378378e-07, | |
| "logits/chosen": -0.5976985692977905, | |
| "logits/rejected": -0.4839654862880707, | |
| "logps/chosen": -0.9786246418952942, | |
| "logps/rejected": -32.14298629760742, | |
| "loss": 0.4226, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.043240711092948914, | |
| "rewards/margins": 2.0043439865112305, | |
| "rewards/rejected": -2.0475847721099854, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.351351351351351e-07, | |
| "logits/chosen": -0.40646108984947205, | |
| "logits/rejected": -0.40619462728500366, | |
| "logps/chosen": -1.4551324844360352, | |
| "logps/rejected": -1.436767339706421, | |
| "loss": 0.6951, | |
| "rewards/accuracies": 0.20000000298023224, | |
| "rewards/chosen": -0.09133030474185944, | |
| "rewards/margins": -0.0038097582291811705, | |
| "rewards/rejected": -0.087520532310009, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.3243243243243244e-07, | |
| "logits/chosen": -0.8727982640266418, | |
| "logits/rejected": -0.4593069553375244, | |
| "logps/chosen": -1.0557596683502197, | |
| "logps/rejected": -44.42220687866211, | |
| "loss": 0.4219, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": 0.11892731487751007, | |
| "rewards/margins": 2.8284239768981934, | |
| "rewards/rejected": -2.70949649810791, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.2972972972972973e-07, | |
| "logits/chosen": -0.7421806454658508, | |
| "logits/rejected": -0.5064016580581665, | |
| "logps/chosen": -1.2260805368423462, | |
| "logps/rejected": -38.49885940551758, | |
| "loss": 0.5559, | |
| "rewards/accuracies": 0.30000001192092896, | |
| "rewards/chosen": 0.0014935005456209183, | |
| "rewards/margins": 2.3735568523406982, | |
| "rewards/rejected": -2.372063636779785, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.27027027027027e-07, | |
| "logits/chosen": -0.5848358869552612, | |
| "logits/rejected": -0.37570345401763916, | |
| "logps/chosen": -0.7797711491584778, | |
| "logps/rejected": -33.012638092041016, | |
| "loss": 0.5546, | |
| "rewards/accuracies": 0.4000000059604645, | |
| "rewards/chosen": 0.05262111499905586, | |
| "rewards/margins": 1.6432182788848877, | |
| "rewards/rejected": -1.5905970335006714, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.243243243243243e-07, | |
| "logits/chosen": -0.5585634708404541, | |
| "logits/rejected": -0.5583919286727905, | |
| "logps/chosen": -0.8610560297966003, | |
| "logps/rejected": -0.8612043261528015, | |
| "loss": 0.6937, | |
| "rewards/accuracies": 0.30000001192092896, | |
| "rewards/chosen": 0.048935629427433014, | |
| "rewards/margins": -0.0010301045840606093, | |
| "rewards/rejected": 0.04996573179960251, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.2162162162162164e-07, | |
| "logits/chosen": -0.4769681394100189, | |
| "logits/rejected": -0.45251044631004333, | |
| "logps/chosen": -1.607731819152832, | |
| "logps/rejected": -8.222475051879883, | |
| "loss": 0.6243, | |
| "rewards/accuracies": 0.4000000059604645, | |
| "rewards/chosen": -0.004343940410763025, | |
| "rewards/margins": 0.5505796670913696, | |
| "rewards/rejected": -0.5549236536026001, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.189189189189189e-07, | |
| "logits/chosen": -0.4478166997432709, | |
| "logits/rejected": -0.42751985788345337, | |
| "logps/chosen": -1.0469465255737305, | |
| "logps/rejected": -5.765268325805664, | |
| "loss": 0.626, | |
| "rewards/accuracies": 0.30000001192092896, | |
| "rewards/chosen": -0.046685151755809784, | |
| "rewards/margins": 0.36600273847579956, | |
| "rewards/rejected": -0.4126877784729004, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.162162162162162e-07, | |
| "logits/chosen": -0.5552384257316589, | |
| "logits/rejected": -0.4869106709957123, | |
| "logps/chosen": -0.8934662938117981, | |
| "logps/rejected": -15.432965278625488, | |
| "loss": 0.6253, | |
| "rewards/accuracies": 0.30000001192092896, | |
| "rewards/chosen": 0.07785534113645554, | |
| "rewards/margins": 0.6028213500976562, | |
| "rewards/rejected": -0.5249660611152649, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.135135135135135e-07, | |
| "logits/chosen": -0.636600911617279, | |
| "logits/rejected": -0.3652159571647644, | |
| "logps/chosen": -0.6702336668968201, | |
| "logps/rejected": -65.19319915771484, | |
| "loss": 0.4183, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.059565335512161255, | |
| "rewards/margins": 4.7467474937438965, | |
| "rewards/rejected": -4.6871819496154785, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.1081081081081084e-07, | |
| "logits/chosen": -0.27527302503585815, | |
| "logits/rejected": -0.2753751873970032, | |
| "logps/chosen": -0.7251207232475281, | |
| "logps/rejected": -0.7327331900596619, | |
| "loss": 0.6924, | |
| "rewards/accuracies": 0.4000000059604645, | |
| "rewards/chosen": 0.09307748824357986, | |
| "rewards/margins": 0.0015917860437184572, | |
| "rewards/rejected": 0.09148569405078888, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.0810810810810807e-07, | |
| "logits/chosen": -0.2706758379936218, | |
| "logits/rejected": -0.2708207964897156, | |
| "logps/chosen": -0.31038787961006165, | |
| "logps/rejected": -0.30583518743515015, | |
| "loss": 0.6945, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.030269015580415726, | |
| "rewards/margins": -0.002708534011617303, | |
| "rewards/rejected": 0.032977551221847534, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.054054054054054e-07, | |
| "logits/chosen": -0.5894482135772705, | |
| "logits/rejected": -0.48265695571899414, | |
| "logps/chosen": -0.7016115188598633, | |
| "logps/rejected": -17.208364486694336, | |
| "loss": 0.5006, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": 0.13988427817821503, | |
| "rewards/margins": 0.8726360201835632, | |
| "rewards/rejected": -0.732751727104187, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "eval_logits/chosen": -0.48661941289901733, | |
| "eval_logits/rejected": -0.3604813516139984, | |
| "eval_logps/chosen": -1.2089859247207642, | |
| "eval_logps/rejected": -21.482067108154297, | |
| "eval_loss": 0.5730764269828796, | |
| "eval_rewards/accuracies": 0.46000000834465027, | |
| "eval_rewards/chosen": 0.09127917140722275, | |
| "eval_rewards/margins": 1.2398022413253784, | |
| "eval_rewards/rejected": -1.148523211479187, | |
| "eval_runtime": 26.6494, | |
| "eval_samples_per_second": 3.752, | |
| "eval_steps_per_second": 3.752, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 4.027027027027027e-07, | |
| "logits/chosen": -0.5123504400253296, | |
| "logits/rejected": -0.37626415491104126, | |
| "logps/chosen": -0.7230352163314819, | |
| "logps/rejected": -32.59217071533203, | |
| "loss": 0.5533, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": 0.11992353200912476, | |
| "rewards/margins": 2.84232497215271, | |
| "rewards/rejected": -2.7224011421203613, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 4e-07, | |
| "logits/chosen": -0.5709089636802673, | |
| "logits/rejected": -0.4344344735145569, | |
| "logps/chosen": -1.6505239009857178, | |
| "logps/rejected": -29.4504337310791, | |
| "loss": 0.5534, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.012776079587638378, | |
| "rewards/margins": 2.109302043914795, | |
| "rewards/rejected": -2.1220779418945312, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 3.9729729729729727e-07, | |
| "logits/chosen": -0.7366935014724731, | |
| "logits/rejected": -0.2669216990470886, | |
| "logps/chosen": -1.2830209732055664, | |
| "logps/rejected": -63.51800537109375, | |
| "loss": 0.4195, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": 0.121388278901577, | |
| "rewards/margins": 2.739515542984009, | |
| "rewards/rejected": -2.6181273460388184, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 3.945945945945946e-07, | |
| "logits/chosen": -0.43380504846572876, | |
| "logits/rejected": -0.3422037661075592, | |
| "logps/chosen": -1.0721886157989502, | |
| "logps/rejected": -18.663774490356445, | |
| "loss": 0.5626, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.05732693150639534, | |
| "rewards/margins": 1.6029354333877563, | |
| "rewards/rejected": -1.5456085205078125, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 3.918918918918919e-07, | |
| "logits/chosen": -0.5525709986686707, | |
| "logits/rejected": -0.41818103194236755, | |
| "logps/chosen": -1.293927788734436, | |
| "logps/rejected": -8.750811576843262, | |
| "loss": 0.5738, | |
| "rewards/accuracies": 0.20000000298023224, | |
| "rewards/chosen": -0.009553834795951843, | |
| "rewards/margins": 0.4876217246055603, | |
| "rewards/rejected": -0.4971756041049957, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 3.891891891891892e-07, | |
| "logits/chosen": -0.27140697836875916, | |
| "logits/rejected": -0.27138635516166687, | |
| "logps/chosen": -1.5451117753982544, | |
| "logps/rejected": -1.539139986038208, | |
| "loss": 0.6938, | |
| "rewards/accuracies": 0.4000000059604645, | |
| "rewards/chosen": -0.05115029215812683, | |
| "rewards/margins": -0.0012699353974312544, | |
| "rewards/rejected": -0.04988035187125206, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 3.8648648648648646e-07, | |
| "logits/chosen": -0.3724200129508972, | |
| "logits/rejected": -0.37271934747695923, | |
| "logps/chosen": -1.388579249382019, | |
| "logps/rejected": -1.4015172719955444, | |
| "loss": 0.6919, | |
| "rewards/accuracies": 0.4000000059604645, | |
| "rewards/chosen": -0.030899781733751297, | |
| "rewards/margins": 0.0024078444112092257, | |
| "rewards/rejected": -0.033307623118162155, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 3.837837837837838e-07, | |
| "logits/chosen": -0.48146286606788635, | |
| "logits/rejected": -0.3344786763191223, | |
| "logps/chosen": -1.6303869485855103, | |
| "logps/rejected": -16.107934951782227, | |
| "loss": 0.604, | |
| "rewards/accuracies": 0.4000000059604645, | |
| "rewards/chosen": -0.04100670665502548, | |
| "rewards/margins": 1.0195186138153076, | |
| "rewards/rejected": -1.0605252981185913, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 3.8108108108108104e-07, | |
| "logits/chosen": -0.7203977108001709, | |
| "logits/rejected": -0.3984030783176422, | |
| "logps/chosen": -1.4085242748260498, | |
| "logps/rejected": -33.98643493652344, | |
| "loss": 0.5603, | |
| "rewards/accuracies": 0.30000001192092896, | |
| "rewards/chosen": -0.022265303879976273, | |
| "rewards/margins": 2.3924202919006348, | |
| "rewards/rejected": -2.4146857261657715, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 3.783783783783784e-07, | |
| "logits/chosen": -0.58781498670578, | |
| "logits/rejected": -0.2599487900733948, | |
| "logps/chosen": -0.7738112211227417, | |
| "logps/rejected": -37.13945770263672, | |
| "loss": 0.4929, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.011140326038002968, | |
| "rewards/margins": 1.930899977684021, | |
| "rewards/rejected": -1.919759750366211, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 3.7567567567567566e-07, | |
| "logits/chosen": -0.17637184262275696, | |
| "logits/rejected": -0.17617428302764893, | |
| "logps/chosen": -1.59823477268219, | |
| "logps/rejected": -1.589928150177002, | |
| "loss": 0.6933, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": 0.006097626406699419, | |
| "rewards/margins": -0.0003702353569678962, | |
| "rewards/rejected": 0.0064678615890443325, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 3.72972972972973e-07, | |
| "logits/chosen": -0.2997194230556488, | |
| "logits/rejected": -0.22588582336902618, | |
| "logps/chosen": -1.5014773607254028, | |
| "logps/rejected": -19.09648323059082, | |
| "loss": 0.6233, | |
| "rewards/accuracies": 0.20000000298023224, | |
| "rewards/chosen": -0.14116178452968597, | |
| "rewards/margins": 1.9240531921386719, | |
| "rewards/rejected": -2.0652148723602295, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 3.7027027027027023e-07, | |
| "logits/chosen": -0.48020777106285095, | |
| "logits/rejected": -0.2105187177658081, | |
| "logps/chosen": -1.2748075723648071, | |
| "logps/rejected": -30.414684295654297, | |
| "loss": 0.6246, | |
| "rewards/accuracies": 0.20000000298023224, | |
| "rewards/chosen": -0.05854244902729988, | |
| "rewards/margins": 1.8600146770477295, | |
| "rewards/rejected": -1.9185569286346436, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 3.6756756756756757e-07, | |
| "logits/chosen": -0.32584524154663086, | |
| "logits/rejected": -0.325616717338562, | |
| "logps/chosen": -1.2563741207122803, | |
| "logps/rejected": -1.2441630363464355, | |
| "loss": 0.6948, | |
| "rewards/accuracies": 0.20000000298023224, | |
| "rewards/chosen": -0.052650950849056244, | |
| "rewards/margins": -0.003209482878446579, | |
| "rewards/rejected": -0.04944147169589996, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 3.6486486486486486e-07, | |
| "logits/chosen": -0.5669493675231934, | |
| "logits/rejected": -0.4724608063697815, | |
| "logps/chosen": -1.0622951984405518, | |
| "logps/rejected": -32.84709930419922, | |
| "loss": 0.5534, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": 0.011947070248425007, | |
| "rewards/margins": 2.908785581588745, | |
| "rewards/rejected": -2.896839141845703, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 3.6216216216216214e-07, | |
| "logits/chosen": -0.771874725818634, | |
| "logits/rejected": -0.4653758108615875, | |
| "logps/chosen": -2.2719671726226807, | |
| "logps/rejected": -45.466407775878906, | |
| "loss": 0.5563, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.012174086645245552, | |
| "rewards/margins": 2.240072727203369, | |
| "rewards/rejected": -2.252246856689453, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 3.5945945945945943e-07, | |
| "logits/chosen": -0.5078957080841064, | |
| "logits/rejected": -0.30749645829200745, | |
| "logps/chosen": -1.5645476579666138, | |
| "logps/rejected": -26.516870498657227, | |
| "loss": 0.5576, | |
| "rewards/accuracies": 0.30000001192092896, | |
| "rewards/chosen": -0.032517604529857635, | |
| "rewards/margins": 2.3935961723327637, | |
| "rewards/rejected": -2.426114082336426, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 3.5675675675675677e-07, | |
| "logits/chosen": -0.4348447918891907, | |
| "logits/rejected": -0.30584046244621277, | |
| "logps/chosen": -1.2801238298416138, | |
| "logps/rejected": -25.644485473632812, | |
| "loss": 0.5972, | |
| "rewards/accuracies": 0.4000000059604645, | |
| "rewards/chosen": -0.03776795417070389, | |
| "rewards/margins": 1.9652618169784546, | |
| "rewards/rejected": -2.0030298233032227, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 3.5405405405405406e-07, | |
| "logits/chosen": -0.28920015692710876, | |
| "logits/rejected": -0.28904658555984497, | |
| "logps/chosen": -1.448441743850708, | |
| "logps/rejected": -1.4275623559951782, | |
| "loss": 0.6958, | |
| "rewards/accuracies": 0.10000000149011612, | |
| "rewards/chosen": -0.08941696584224701, | |
| "rewards/margins": -0.005217382218688726, | |
| "rewards/rejected": -0.08419958502054214, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 3.5135135135135134e-07, | |
| "logits/chosen": -0.182106152176857, | |
| "logits/rejected": -0.18192948400974274, | |
| "logps/chosen": -1.501230239868164, | |
| "logps/rejected": -1.5124905109405518, | |
| "loss": 0.6925, | |
| "rewards/accuracies": 0.4000000059604645, | |
| "rewards/chosen": -0.03814355283975601, | |
| "rewards/margins": 0.0013828824739903212, | |
| "rewards/rejected": -0.03952643647789955, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 3.4864864864864863e-07, | |
| "logits/chosen": -0.669059157371521, | |
| "logits/rejected": -0.36879947781562805, | |
| "logps/chosen": -1.125857949256897, | |
| "logps/rejected": -40.65812301635742, | |
| "loss": 0.5525, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.04682355001568794, | |
| "rewards/margins": 2.6082589626312256, | |
| "rewards/rejected": -2.5614356994628906, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 3.4594594594594597e-07, | |
| "logits/chosen": -0.3450705409049988, | |
| "logits/rejected": -0.3405342400074005, | |
| "logps/chosen": -1.4364194869995117, | |
| "logps/rejected": -5.2330427169799805, | |
| "loss": 0.6319, | |
| "rewards/accuracies": 0.4000000059604645, | |
| "rewards/chosen": -0.12864340841770172, | |
| "rewards/margins": 0.24519333243370056, | |
| "rewards/rejected": -0.3738367259502411, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 3.432432432432432e-07, | |
| "logits/chosen": -0.19730427861213684, | |
| "logits/rejected": -0.19721275568008423, | |
| "logps/chosen": -1.455517053604126, | |
| "logps/rejected": -1.468201756477356, | |
| "loss": 0.6921, | |
| "rewards/accuracies": 0.30000001192092896, | |
| "rewards/chosen": -0.07263887673616409, | |
| "rewards/margins": 0.0021636115852743387, | |
| "rewards/rejected": -0.07480248063802719, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 3.4054054054054054e-07, | |
| "logits/chosen": -0.6894451975822449, | |
| "logits/rejected": -0.3585297465324402, | |
| "logps/chosen": -2.602849245071411, | |
| "logps/rejected": -59.2165641784668, | |
| "loss": 0.4853, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.09771852195262909, | |
| "rewards/margins": 3.6492016315460205, | |
| "rewards/rejected": -3.746920347213745, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 3.378378378378378e-07, | |
| "logits/chosen": -0.48982128500938416, | |
| "logits/rejected": -0.19518586993217468, | |
| "logps/chosen": -1.4529268741607666, | |
| "logps/rejected": -40.79501724243164, | |
| "loss": 0.6238, | |
| "rewards/accuracies": 0.4000000059604645, | |
| "rewards/chosen": -0.05402202159166336, | |
| "rewards/margins": 2.7485110759735107, | |
| "rewards/rejected": -2.802532911300659, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 3.3513513513513516e-07, | |
| "logits/chosen": -0.3830093741416931, | |
| "logits/rejected": -0.38310927152633667, | |
| "logps/chosen": -1.994246482849121, | |
| "logps/rejected": -1.9973366260528564, | |
| "loss": 0.6927, | |
| "rewards/accuracies": 0.4000000059604645, | |
| "rewards/chosen": -0.09356460720300674, | |
| "rewards/margins": 0.0009627247345633805, | |
| "rewards/rejected": -0.09452733397483826, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 3.324324324324324e-07, | |
| "logits/chosen": -0.5786948204040527, | |
| "logits/rejected": -0.29584842920303345, | |
| "logps/chosen": -1.9216482639312744, | |
| "logps/rejected": -53.21647262573242, | |
| "loss": 0.5528, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.14687001705169678, | |
| "rewards/margins": 3.7029500007629395, | |
| "rewards/rejected": -3.8498198986053467, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 3.2972972972972973e-07, | |
| "logits/chosen": -0.44832101464271545, | |
| "logits/rejected": -0.3366854190826416, | |
| "logps/chosen": -1.6166212558746338, | |
| "logps/rejected": -20.03157615661621, | |
| "loss": 0.5547, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.16137336194515228, | |
| "rewards/margins": 1.7396749258041382, | |
| "rewards/rejected": -1.9010480642318726, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 3.27027027027027e-07, | |
| "logits/chosen": -0.5321040153503418, | |
| "logits/rejected": -0.2713443636894226, | |
| "logps/chosen": -2.4121830463409424, | |
| "logps/rejected": -41.183345794677734, | |
| "loss": 0.518, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -0.1596633493900299, | |
| "rewards/margins": 2.9636120796203613, | |
| "rewards/rejected": -3.1232752799987793, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 3.243243243243243e-07, | |
| "logits/chosen": -0.23825044929981232, | |
| "logits/rejected": -0.238026425242424, | |
| "logps/chosen": -1.5677788257598877, | |
| "logps/rejected": -1.5716334581375122, | |
| "loss": 0.6941, | |
| "rewards/accuracies": 0.4000000059604645, | |
| "rewards/chosen": -0.07762937247753143, | |
| "rewards/margins": -0.0019604426342993975, | |
| "rewards/rejected": -0.07566893845796585, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 3.216216216216216e-07, | |
| "logits/chosen": -0.22825762629508972, | |
| "logits/rejected": -0.2077532261610031, | |
| "logps/chosen": -2.0755527019500732, | |
| "logps/rejected": -17.001359939575195, | |
| "loss": 0.6234, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.23694956302642822, | |
| "rewards/margins": 1.708189606666565, | |
| "rewards/rejected": -1.9451383352279663, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 3.1891891891891893e-07, | |
| "logits/chosen": -0.12132171541452408, | |
| "logits/rejected": -0.12116007506847382, | |
| "logps/chosen": -2.348914623260498, | |
| "logps/rejected": -2.347259044647217, | |
| "loss": 0.6945, | |
| "rewards/accuracies": 0.4000000059604645, | |
| "rewards/chosen": -0.19036361575126648, | |
| "rewards/margins": -0.0027313902974128723, | |
| "rewards/rejected": -0.1876322329044342, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 3.162162162162162e-07, | |
| "logits/chosen": -0.26363319158554077, | |
| "logits/rejected": -0.16369813680648804, | |
| "logps/chosen": -1.8039945363998413, | |
| "logps/rejected": -29.438343048095703, | |
| "loss": 0.5547, | |
| "rewards/accuracies": 0.4000000059604645, | |
| "rewards/chosen": -0.18878988921642303, | |
| "rewards/margins": 2.6108524799346924, | |
| "rewards/rejected": -2.799642562866211, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 3.135135135135135e-07, | |
| "logits/chosen": -0.3847021460533142, | |
| "logits/rejected": -0.19058382511138916, | |
| "logps/chosen": -1.9469951391220093, | |
| "logps/rejected": -23.151546478271484, | |
| "loss": 0.5567, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.12975777685642242, | |
| "rewards/margins": 1.3818089962005615, | |
| "rewards/rejected": -1.5115668773651123, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 3.108108108108108e-07, | |
| "logits/chosen": -0.5285392999649048, | |
| "logits/rejected": -0.3884163200855255, | |
| "logps/chosen": -2.879500150680542, | |
| "logps/rejected": -37.99626541137695, | |
| "loss": 0.5552, | |
| "rewards/accuracies": 0.20000000298023224, | |
| "rewards/chosen": -0.22818878293037415, | |
| "rewards/margins": 2.465597629547119, | |
| "rewards/rejected": -2.693786144256592, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 3.0810810810810813e-07, | |
| "logits/chosen": -0.4468708038330078, | |
| "logits/rejected": -0.16887584328651428, | |
| "logps/chosen": -2.016929864883423, | |
| "logps/rejected": -29.296972274780273, | |
| "loss": 0.5575, | |
| "rewards/accuracies": 0.30000001192092896, | |
| "rewards/chosen": -0.18932850658893585, | |
| "rewards/margins": 1.512830138206482, | |
| "rewards/rejected": -1.7021586894989014, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 3.0540540540540536e-07, | |
| "logits/chosen": -0.6263138055801392, | |
| "logits/rejected": -0.1559181660413742, | |
| "logps/chosen": -1.9883623123168945, | |
| "logps/rejected": -97.72358703613281, | |
| "loss": 0.3479, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.06004463508725166, | |
| "rewards/margins": 7.97985315322876, | |
| "rewards/rejected": -8.039897918701172, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 3.027027027027027e-07, | |
| "logits/chosen": -0.3427307605743408, | |
| "logits/rejected": -0.11785911023616791, | |
| "logps/chosen": -2.4055051803588867, | |
| "logps/rejected": -50.41362380981445, | |
| "loss": 0.4854, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.08261863887310028, | |
| "rewards/margins": 5.198975563049316, | |
| "rewards/rejected": -5.281594276428223, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 3e-07, | |
| "logits/chosen": -0.3482494354248047, | |
| "logits/rejected": -0.2711491584777832, | |
| "logps/chosen": -2.2725043296813965, | |
| "logps/rejected": -20.280826568603516, | |
| "loss": 0.6235, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.32118305563926697, | |
| "rewards/margins": 1.9148486852645874, | |
| "rewards/rejected": -2.236032009124756, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 2.972972972972973e-07, | |
| "logits/chosen": -0.4881555438041687, | |
| "logits/rejected": -0.4544796049594879, | |
| "logps/chosen": -2.1451969146728516, | |
| "logps/rejected": -12.899765014648438, | |
| "loss": 0.6245, | |
| "rewards/accuracies": 0.4000000059604645, | |
| "rewards/chosen": -0.20810385048389435, | |
| "rewards/margins": 0.9362422227859497, | |
| "rewards/rejected": -1.144345998764038, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 2.9459459459459456e-07, | |
| "logits/chosen": -0.5529161095619202, | |
| "logits/rejected": -0.01812545582652092, | |
| "logps/chosen": -2.3933329582214355, | |
| "logps/rejected": -77.99223327636719, | |
| "loss": 0.4831, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.13273164629936218, | |
| "rewards/margins": 5.853229999542236, | |
| "rewards/rejected": -5.985960960388184, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 2.918918918918919e-07, | |
| "logits/chosen": -0.6067525148391724, | |
| "logits/rejected": -0.22178903222084045, | |
| "logps/chosen": -2.893634080886841, | |
| "logps/rejected": -89.12733459472656, | |
| "loss": 0.4156, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.3031526207923889, | |
| "rewards/margins": 6.654090881347656, | |
| "rewards/rejected": -6.9572434425354, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 2.891891891891892e-07, | |
| "logits/chosen": -0.3381947875022888, | |
| "logits/rejected": -0.2047184258699417, | |
| "logps/chosen": -2.5098400115966797, | |
| "logps/rejected": -36.30571365356445, | |
| "loss": 0.4865, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.22798296809196472, | |
| "rewards/margins": 2.872692108154297, | |
| "rewards/rejected": -3.100675106048584, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 2.8648648648648647e-07, | |
| "logits/chosen": -0.4152565002441406, | |
| "logits/rejected": -0.24672865867614746, | |
| "logps/chosen": -2.5835165977478027, | |
| "logps/rejected": -41.009944915771484, | |
| "loss": 0.4875, | |
| "rewards/accuracies": 0.4000000059604645, | |
| "rewards/chosen": -0.27744337916374207, | |
| "rewards/margins": 2.9683868885040283, | |
| "rewards/rejected": -3.2458300590515137, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 2.8378378378378376e-07, | |
| "logits/chosen": -0.18132410943508148, | |
| "logits/rejected": -0.08390505611896515, | |
| "logps/chosen": -3.1348986625671387, | |
| "logps/rejected": -34.38957977294922, | |
| "loss": 0.623, | |
| "rewards/accuracies": 0.30000001192092896, | |
| "rewards/chosen": -0.3771377205848694, | |
| "rewards/margins": 2.4244353771209717, | |
| "rewards/rejected": -2.8015732765197754, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 2.810810810810811e-07, | |
| "logits/chosen": -0.31961002945899963, | |
| "logits/rejected": -0.3197285532951355, | |
| "logps/chosen": -3.2407329082489014, | |
| "logps/rejected": -3.257624864578247, | |
| "loss": 0.6924, | |
| "rewards/accuracies": 0.20000000298023224, | |
| "rewards/chosen": -0.42207542061805725, | |
| "rewards/margins": 0.0015475511318072677, | |
| "rewards/rejected": -0.4236229956150055, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 2.7837837837837833e-07, | |
| "logits/chosen": -0.1493210643529892, | |
| "logits/rejected": -0.02894291840493679, | |
| "logps/chosen": -2.7680492401123047, | |
| "logps/rejected": -39.11075210571289, | |
| "loss": 0.4872, | |
| "rewards/accuracies": 0.30000001192092896, | |
| "rewards/chosen": -0.3285685181617737, | |
| "rewards/margins": 2.61509370803833, | |
| "rewards/rejected": -2.943662166595459, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 2.7567567567567567e-07, | |
| "logits/chosen": -0.2658258378505707, | |
| "logits/rejected": -0.14939609169960022, | |
| "logps/chosen": -3.0067975521087646, | |
| "logps/rejected": -9.233423233032227, | |
| "loss": 0.6251, | |
| "rewards/accuracies": 0.30000001192092896, | |
| "rewards/chosen": -0.3324972093105316, | |
| "rewards/margins": 0.6167054772377014, | |
| "rewards/rejected": -0.9492026567459106, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 2.7297297297297295e-07, | |
| "logits/chosen": 0.09181342273950577, | |
| "logits/rejected": 0.0917346253991127, | |
| "logps/chosen": -2.657616138458252, | |
| "logps/rejected": -2.649034023284912, | |
| "loss": 0.694, | |
| "rewards/accuracies": 0.20000000298023224, | |
| "rewards/chosen": -0.4064779281616211, | |
| "rewards/margins": -0.0017143071163445711, | |
| "rewards/rejected": -0.4047636091709137, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 2.702702702702703e-07, | |
| "logits/chosen": -0.5827860832214355, | |
| "logits/rejected": -0.3403246998786926, | |
| "logps/chosen": -3.277855396270752, | |
| "logps/rejected": -52.3044548034668, | |
| "loss": 0.4839, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.37350228428840637, | |
| "rewards/margins": 4.705965995788574, | |
| "rewards/rejected": -5.079468727111816, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "eval_logits/chosen": -0.248033806681633, | |
| "eval_logits/rejected": -0.10857230424880981, | |
| "eval_logps/chosen": -3.3428871631622314, | |
| "eval_logps/rejected": -28.17331314086914, | |
| "eval_loss": 0.5582190752029419, | |
| "eval_rewards/accuracies": 0.4000000059604645, | |
| "eval_rewards/chosen": -0.335501104593277, | |
| "eval_rewards/margins": 2.151271104812622, | |
| "eval_rewards/rejected": -2.486772060394287, | |
| "eval_runtime": 28.5461, | |
| "eval_samples_per_second": 3.503, | |
| "eval_steps_per_second": 3.503, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 2.675675675675675e-07, | |
| "logits/chosen": -0.5932222604751587, | |
| "logits/rejected": -0.03573286160826683, | |
| "logps/chosen": -3.360830307006836, | |
| "logps/rejected": -68.9535903930664, | |
| "loss": 0.5535, | |
| "rewards/accuracies": 0.4000000059604645, | |
| "rewards/chosen": -0.29853901267051697, | |
| "rewards/margins": 6.061123371124268, | |
| "rewards/rejected": -6.3596625328063965, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 2.6486486486486486e-07, | |
| "logits/chosen": -0.10955256223678589, | |
| "logits/rejected": -0.09704247862100601, | |
| "logps/chosen": -2.8949010372161865, | |
| "logps/rejected": -11.125778198242188, | |
| "loss": 0.6242, | |
| "rewards/accuracies": 0.30000001192092896, | |
| "rewards/chosen": -0.321120023727417, | |
| "rewards/margins": 0.8519529104232788, | |
| "rewards/rejected": -1.1730728149414062, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 2.6216216216216215e-07, | |
| "logits/chosen": -0.2371162623167038, | |
| "logits/rejected": -0.02502809837460518, | |
| "logps/chosen": -3.312039613723755, | |
| "logps/rejected": -56.23164749145508, | |
| "loss": 0.4161, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.3528529703617096, | |
| "rewards/margins": 5.84485387802124, | |
| "rewards/rejected": -6.197707176208496, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 2.594594594594595e-07, | |
| "logits/chosen": -0.3305162191390991, | |
| "logits/rejected": -0.12264938652515411, | |
| "logps/chosen": -3.0487217903137207, | |
| "logps/rejected": -31.816059112548828, | |
| "loss": 0.5539, | |
| "rewards/accuracies": 0.4000000059604645, | |
| "rewards/chosen": -0.4525887966156006, | |
| "rewards/margins": 2.451446056365967, | |
| "rewards/rejected": -2.9040348529815674, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 2.567567567567567e-07, | |
| "logits/chosen": -0.3112742304801941, | |
| "logits/rejected": -0.14556877315044403, | |
| "logps/chosen": -4.298488140106201, | |
| "logps/rejected": -32.196067810058594, | |
| "loss": 0.625, | |
| "rewards/accuracies": 0.30000001192092896, | |
| "rewards/chosen": -0.45854702591896057, | |
| "rewards/margins": 1.6213452816009521, | |
| "rewards/rejected": -2.079892635345459, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 2.5405405405405406e-07, | |
| "logits/chosen": -0.47445744276046753, | |
| "logits/rejected": -0.17515473067760468, | |
| "logps/chosen": -3.6740562915802, | |
| "logps/rejected": -53.9892692565918, | |
| "loss": 0.4853, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.4956344664096832, | |
| "rewards/margins": 4.100377559661865, | |
| "rewards/rejected": -4.596012115478516, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 2.5135135135135135e-07, | |
| "logits/chosen": -0.11676591634750366, | |
| "logits/rejected": -0.05013752728700638, | |
| "logps/chosen": -3.1980667114257812, | |
| "logps/rejected": -23.358375549316406, | |
| "loss": 0.5551, | |
| "rewards/accuracies": 0.4000000059604645, | |
| "rewards/chosen": -0.470308393239975, | |
| "rewards/margins": 2.065474033355713, | |
| "rewards/rejected": -2.5357823371887207, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 2.4864864864864863e-07, | |
| "logits/chosen": -0.08944498002529144, | |
| "logits/rejected": -0.04073537513613701, | |
| "logps/chosen": -3.459454298019409, | |
| "logps/rejected": -17.295406341552734, | |
| "loss": 0.6246, | |
| "rewards/accuracies": 0.20000000298023224, | |
| "rewards/chosen": -0.4144110083580017, | |
| "rewards/margins": 1.442077875137329, | |
| "rewards/rejected": -1.856488823890686, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 2.4594594594594597e-07, | |
| "logits/chosen": -0.4133351445198059, | |
| "logits/rejected": -0.11843075603246689, | |
| "logps/chosen": -3.5334014892578125, | |
| "logps/rejected": -50.10197830200195, | |
| "loss": 0.5545, | |
| "rewards/accuracies": 0.4000000059604645, | |
| "rewards/chosen": -0.49260371923446655, | |
| "rewards/margins": 3.9945976734161377, | |
| "rewards/rejected": -4.487200736999512, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 2.4324324324324326e-07, | |
| "logits/chosen": -0.017034702003002167, | |
| "logits/rejected": 0.03910530358552933, | |
| "logps/chosen": -3.597899913787842, | |
| "logps/rejected": -19.891422271728516, | |
| "loss": 0.554, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.47992175817489624, | |
| "rewards/margins": 1.7824008464813232, | |
| "rewards/rejected": -2.2623229026794434, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 2.4054054054054054e-07, | |
| "logits/chosen": -0.16526077687740326, | |
| "logits/rejected": -0.15733735263347626, | |
| "logps/chosen": -3.504206895828247, | |
| "logps/rejected": -6.611302852630615, | |
| "loss": 0.6313, | |
| "rewards/accuracies": 0.20000000298023224, | |
| "rewards/chosen": -0.473609060049057, | |
| "rewards/margins": 0.266178697347641, | |
| "rewards/rejected": -0.7397876977920532, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 2.3783783783783783e-07, | |
| "logits/chosen": -0.6425670385360718, | |
| "logits/rejected": -0.21171347796916962, | |
| "logps/chosen": -3.7131621837615967, | |
| "logps/rejected": -65.28227233886719, | |
| "loss": 0.4824, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.4607169032096863, | |
| "rewards/margins": 6.263044834136963, | |
| "rewards/rejected": -6.72376012802124, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 2.3513513513513514e-07, | |
| "logits/chosen": -0.1325923502445221, | |
| "logits/rejected": 0.008936069905757904, | |
| "logps/chosen": -2.75840425491333, | |
| "logps/rejected": -39.773658752441406, | |
| "loss": 0.6259, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.38081198930740356, | |
| "rewards/margins": 3.1747312545776367, | |
| "rewards/rejected": -3.5555434226989746, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 2.3243243243243243e-07, | |
| "logits/chosen": -0.15440817177295685, | |
| "logits/rejected": -0.15450319647789001, | |
| "logps/chosen": -3.1343817710876465, | |
| "logps/rejected": -3.1313533782958984, | |
| "loss": 0.6944, | |
| "rewards/accuracies": 0.20000000298023224, | |
| "rewards/chosen": -0.3842015564441681, | |
| "rewards/margins": -0.002351009752601385, | |
| "rewards/rejected": -0.38185054063796997, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 2.2972972972972974e-07, | |
| "logits/chosen": -0.229770228266716, | |
| "logits/rejected": -0.22977308928966522, | |
| "logps/chosen": -3.290398359298706, | |
| "logps/rejected": -3.267608165740967, | |
| "loss": 0.6948, | |
| "rewards/accuracies": 0.4000000059604645, | |
| "rewards/chosen": -0.3774813413619995, | |
| "rewards/margins": -0.003159981919452548, | |
| "rewards/rejected": -0.3743213415145874, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 2.2702702702702703e-07, | |
| "logits/chosen": -0.07762424647808075, | |
| "logits/rejected": 0.020208783447742462, | |
| "logps/chosen": -2.1210696697235107, | |
| "logps/rejected": -36.04180908203125, | |
| "loss": 0.4959, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.12996070086956024, | |
| "rewards/margins": 3.2478432655334473, | |
| "rewards/rejected": -3.3778038024902344, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 2.2432432432432434e-07, | |
| "logits/chosen": -0.10635857284069061, | |
| "logits/rejected": -0.10641410201787949, | |
| "logps/chosen": -3.266584873199463, | |
| "logps/rejected": -3.2667083740234375, | |
| "loss": 0.6908, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.42059874534606934, | |
| "rewards/margins": 0.004667977802455425, | |
| "rewards/rejected": -0.42526674270629883, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 2.2162162162162162e-07, | |
| "logits/chosen": 0.016759177669882774, | |
| "logits/rejected": 0.04513305425643921, | |
| "logps/chosen": -3.0024590492248535, | |
| "logps/rejected": -15.668512344360352, | |
| "loss": 0.6221, | |
| "rewards/accuracies": 0.30000001192092896, | |
| "rewards/chosen": -0.4371574819087982, | |
| "rewards/margins": 0.9103776216506958, | |
| "rewards/rejected": -1.347535252571106, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 2.189189189189189e-07, | |
| "logits/chosen": -0.05008067935705185, | |
| "logits/rejected": 0.06452060490846634, | |
| "logps/chosen": -3.3980178833007812, | |
| "logps/rejected": -43.369049072265625, | |
| "loss": 0.4841, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.45819729566574097, | |
| "rewards/margins": 4.862743854522705, | |
| "rewards/rejected": -5.320940971374512, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 2.1621621621621622e-07, | |
| "logits/chosen": -0.24912318587303162, | |
| "logits/rejected": -0.09523974359035492, | |
| "logps/chosen": -4.080093860626221, | |
| "logps/rejected": -51.317169189453125, | |
| "loss": 0.4837, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.44981464743614197, | |
| "rewards/margins": 4.76999044418335, | |
| "rewards/rejected": -5.2198052406311035, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 2.135135135135135e-07, | |
| "logits/chosen": -0.2081013172864914, | |
| "logits/rejected": -0.05113474279642105, | |
| "logps/chosen": -4.021970272064209, | |
| "logps/rejected": -38.43590545654297, | |
| "loss": 0.5536, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.3837403953075409, | |
| "rewards/margins": 3.026031017303467, | |
| "rewards/rejected": -3.40977144241333, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 2.1081081081081082e-07, | |
| "logits/chosen": -0.2219875603914261, | |
| "logits/rejected": -0.051030516624450684, | |
| "logps/chosen": -3.1909682750701904, | |
| "logps/rejected": -46.616905212402344, | |
| "loss": 0.4852, | |
| "rewards/accuracies": 0.4000000059604645, | |
| "rewards/chosen": -0.50543612241745, | |
| "rewards/margins": 3.5189208984375, | |
| "rewards/rejected": -4.024357318878174, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 2.081081081081081e-07, | |
| "logits/chosen": 0.03644023835659027, | |
| "logits/rejected": 0.06066631153225899, | |
| "logps/chosen": -3.725661516189575, | |
| "logps/rejected": -23.01262092590332, | |
| "loss": 0.5528, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.4582531452178955, | |
| "rewards/margins": 2.343986988067627, | |
| "rewards/rejected": -2.8022401332855225, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 2.0540540540540542e-07, | |
| "logits/chosen": -0.15473979711532593, | |
| "logits/rejected": 0.008312966674566269, | |
| "logps/chosen": -2.7111215591430664, | |
| "logps/rejected": -31.44228172302246, | |
| "loss": 0.553, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.30419665575027466, | |
| "rewards/margins": 1.9790928363800049, | |
| "rewards/rejected": -2.2832894325256348, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 2.027027027027027e-07, | |
| "logits/chosen": -0.0361042395234108, | |
| "logits/rejected": -0.035879164934158325, | |
| "logps/chosen": -3.412536144256592, | |
| "logps/rejected": -3.4127590656280518, | |
| "loss": 0.6934, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.4676332473754883, | |
| "rewards/margins": -0.0005405143019743264, | |
| "rewards/rejected": -0.46709269285202026, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 2e-07, | |
| "logits/chosen": -0.1826142817735672, | |
| "logits/rejected": -0.11776237189769745, | |
| "logps/chosen": -4.550057411193848, | |
| "logps/rejected": -18.74826431274414, | |
| "loss": 0.6218, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.7531288266181946, | |
| "rewards/margins": 1.4324684143066406, | |
| "rewards/rejected": -2.1855974197387695, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 1.972972972972973e-07, | |
| "logits/chosen": -0.1055067628622055, | |
| "logits/rejected": 0.11131813377141953, | |
| "logps/chosen": -2.982210159301758, | |
| "logps/rejected": -58.02173614501953, | |
| "loss": 0.5565, | |
| "rewards/accuracies": 0.4000000059604645, | |
| "rewards/chosen": -0.366026908159256, | |
| "rewards/margins": 6.08723258972168, | |
| "rewards/rejected": -6.453258514404297, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 1.945945945945946e-07, | |
| "logits/chosen": -0.038440294563770294, | |
| "logits/rejected": 0.10673608630895615, | |
| "logps/chosen": -3.7921531200408936, | |
| "logps/rejected": -37.6851692199707, | |
| "loss": 0.5568, | |
| "rewards/accuracies": 0.30000001192092896, | |
| "rewards/chosen": -0.5283007025718689, | |
| "rewards/margins": 3.1157898902893066, | |
| "rewards/rejected": -3.644090175628662, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 1.918918918918919e-07, | |
| "logits/chosen": 0.040133845061063766, | |
| "logits/rejected": 0.06518431752920151, | |
| "logps/chosen": -3.61901593208313, | |
| "logps/rejected": -14.156512260437012, | |
| "loss": 0.6239, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.5208865404129028, | |
| "rewards/margins": 1.1946496963500977, | |
| "rewards/rejected": -1.7155358791351318, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 1.891891891891892e-07, | |
| "logits/chosen": -0.41648340225219727, | |
| "logits/rejected": -0.07645007967948914, | |
| "logps/chosen": -2.7470812797546387, | |
| "logps/rejected": -100.8885726928711, | |
| "loss": 0.4846, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.22242751717567444, | |
| "rewards/margins": 8.930724143981934, | |
| "rewards/rejected": -9.153151512145996, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 1.864864864864865e-07, | |
| "logits/chosen": -0.43833446502685547, | |
| "logits/rejected": -0.2219434529542923, | |
| "logps/chosen": -3.622807741165161, | |
| "logps/rejected": -45.591163635253906, | |
| "loss": 0.6216, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.5277474522590637, | |
| "rewards/margins": 4.0409955978393555, | |
| "rewards/rejected": -4.568743705749512, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 1.8378378378378379e-07, | |
| "logits/chosen": -0.1983651965856552, | |
| "logits/rejected": -0.062241751700639725, | |
| "logps/chosen": -4.179381370544434, | |
| "logps/rejected": -50.65184783935547, | |
| "loss": 0.4826, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -0.6547450423240662, | |
| "rewards/margins": 4.6731157302856445, | |
| "rewards/rejected": -5.327860355377197, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 1.8108108108108107e-07, | |
| "logits/chosen": -0.16832640767097473, | |
| "logits/rejected": -0.14985333383083344, | |
| "logps/chosen": -3.139037609100342, | |
| "logps/rejected": -7.793992519378662, | |
| "loss": 0.6251, | |
| "rewards/accuracies": 0.30000001192092896, | |
| "rewards/chosen": -0.3424663543701172, | |
| "rewards/margins": 0.5691796541213989, | |
| "rewards/rejected": -0.9116460680961609, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 1.7837837837837838e-07, | |
| "logits/chosen": -0.06780896335840225, | |
| "logits/rejected": 0.03146328404545784, | |
| "logps/chosen": -4.410508155822754, | |
| "logps/rejected": -24.816577911376953, | |
| "loss": 0.5552, | |
| "rewards/accuracies": 0.4000000059604645, | |
| "rewards/chosen": -0.5241163969039917, | |
| "rewards/margins": 2.202816963195801, | |
| "rewards/rejected": -2.726933002471924, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 1.7567567567567567e-07, | |
| "logits/chosen": -0.355562299489975, | |
| "logits/rejected": 0.05104954168200493, | |
| "logps/chosen": -3.206317901611328, | |
| "logps/rejected": -77.11846923828125, | |
| "loss": 0.3602, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.4161297380924225, | |
| "rewards/margins": 8.3109130859375, | |
| "rewards/rejected": -8.727044105529785, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 1.7297297297297298e-07, | |
| "logits/chosen": -0.04275091364979744, | |
| "logits/rejected": 0.1841900795698166, | |
| "logps/chosen": -4.1488471031188965, | |
| "logps/rejected": -67.05726623535156, | |
| "loss": 0.5551, | |
| "rewards/accuracies": 0.4000000059604645, | |
| "rewards/chosen": -0.43814602494239807, | |
| "rewards/margins": 5.562375068664551, | |
| "rewards/rejected": -6.000521659851074, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 1.7027027027027027e-07, | |
| "logits/chosen": -0.02052360214293003, | |
| "logits/rejected": 0.09947922080755234, | |
| "logps/chosen": -2.9535369873046875, | |
| "logps/rejected": -24.08611297607422, | |
| "loss": 0.5545, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.3186715245246887, | |
| "rewards/margins": 2.140519142150879, | |
| "rewards/rejected": -2.459190607070923, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 1.6756756756756758e-07, | |
| "logits/chosen": -0.12382978200912476, | |
| "logits/rejected": -0.1093042716383934, | |
| "logps/chosen": -4.469671249389648, | |
| "logps/rejected": -20.117874145507812, | |
| "loss": 0.6187, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.5799933671951294, | |
| "rewards/margins": 1.2598206996917725, | |
| "rewards/rejected": -1.8398144245147705, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 1.6486486486486487e-07, | |
| "logits/chosen": 0.18149466812610626, | |
| "logits/rejected": 0.18185070157051086, | |
| "logps/chosen": -4.917318344116211, | |
| "logps/rejected": -4.922450065612793, | |
| "loss": 0.6934, | |
| "rewards/accuracies": 0.20000000298023224, | |
| "rewards/chosen": -0.7914124727249146, | |
| "rewards/margins": -0.000482580071548, | |
| "rewards/rejected": -0.790929913520813, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 1.6216216216216215e-07, | |
| "logits/chosen": -0.1398981809616089, | |
| "logits/rejected": 0.06895492970943451, | |
| "logps/chosen": -2.947361469268799, | |
| "logps/rejected": -44.058815002441406, | |
| "loss": 0.4173, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.3960328698158264, | |
| "rewards/margins": 4.381956577301025, | |
| "rewards/rejected": -4.777989387512207, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 1.5945945945945947e-07, | |
| "logits/chosen": 0.006692798342555761, | |
| "logits/rejected": 0.0069901407696306705, | |
| "logps/chosen": -3.508424758911133, | |
| "logps/rejected": -3.4823012351989746, | |
| "loss": 0.696, | |
| "rewards/accuracies": 0.10000000149011612, | |
| "rewards/chosen": -0.5846782922744751, | |
| "rewards/margins": -0.005588895175606012, | |
| "rewards/rejected": -0.5790894031524658, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 1.5675675675675675e-07, | |
| "logits/chosen": -0.08773870766162872, | |
| "logits/rejected": 0.2178010493516922, | |
| "logps/chosen": -3.085310935974121, | |
| "logps/rejected": -40.300601959228516, | |
| "loss": 0.6219, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.3880263566970825, | |
| "rewards/margins": 3.6542656421661377, | |
| "rewards/rejected": -4.042291164398193, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 1.5405405405405406e-07, | |
| "logits/chosen": -0.20309165120124817, | |
| "logits/rejected": 0.12440992891788483, | |
| "logps/chosen": -3.6524136066436768, | |
| "logps/rejected": -88.05582427978516, | |
| "loss": 0.4843, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.37993237376213074, | |
| "rewards/margins": 8.410959243774414, | |
| "rewards/rejected": -8.790891647338867, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 1.5135135135135135e-07, | |
| "logits/chosen": -0.03451851010322571, | |
| "logits/rejected": -0.03453352302312851, | |
| "logps/chosen": -3.138916015625, | |
| "logps/rejected": -3.1397087574005127, | |
| "loss": 0.694, | |
| "rewards/accuracies": 0.20000000298023224, | |
| "rewards/chosen": -0.5204917788505554, | |
| "rewards/margins": -0.001755397766828537, | |
| "rewards/rejected": -0.5187363624572754, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 1.4864864864864866e-07, | |
| "logits/chosen": 0.10227219760417938, | |
| "logits/rejected": 0.10233476012945175, | |
| "logps/chosen": -4.582298755645752, | |
| "logps/rejected": -4.5724334716796875, | |
| "loss": 0.694, | |
| "rewards/accuracies": 0.30000001192092896, | |
| "rewards/chosen": -0.6803295612335205, | |
| "rewards/margins": -0.0017308980459347367, | |
| "rewards/rejected": -0.6785987019538879, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 1.4594594594594595e-07, | |
| "logits/chosen": -0.5597248077392578, | |
| "logits/rejected": 0.06213284283876419, | |
| "logps/chosen": -2.8770649433135986, | |
| "logps/rejected": -96.86759185791016, | |
| "loss": 0.4859, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.2279277741909027, | |
| "rewards/margins": 10.135581970214844, | |
| "rewards/rejected": -10.363508224487305, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 1.4324324324324323e-07, | |
| "logits/chosen": 0.09240862727165222, | |
| "logits/rejected": 0.0921512097120285, | |
| "logps/chosen": -3.724111557006836, | |
| "logps/rejected": -3.7362544536590576, | |
| "loss": 0.6918, | |
| "rewards/accuracies": 0.20000000298023224, | |
| "rewards/chosen": -0.5456451773643494, | |
| "rewards/margins": 0.002606689929962158, | |
| "rewards/rejected": -0.5482519268989563, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 1.4054054054054055e-07, | |
| "logits/chosen": -0.12598402798175812, | |
| "logits/rejected": -0.015654001384973526, | |
| "logps/chosen": -4.451899528503418, | |
| "logps/rejected": -42.00288772583008, | |
| "loss": 0.6218, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.48525696992874146, | |
| "rewards/margins": 2.573787212371826, | |
| "rewards/rejected": -3.059044361114502, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 1.3783783783783783e-07, | |
| "logits/chosen": -0.4521062970161438, | |
| "logits/rejected": -0.004660460166633129, | |
| "logps/chosen": -2.9012811183929443, | |
| "logps/rejected": -115.5091781616211, | |
| "loss": 0.4862, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.19031697511672974, | |
| "rewards/margins": 10.333425521850586, | |
| "rewards/rejected": -10.523741722106934, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 1.3513513513513515e-07, | |
| "logits/chosen": -0.0979565680027008, | |
| "logits/rejected": 0.11449885368347168, | |
| "logps/chosen": -3.5851001739501953, | |
| "logps/rejected": -76.48931884765625, | |
| "loss": 0.4849, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.4619313180446625, | |
| "rewards/margins": 5.934266090393066, | |
| "rewards/rejected": -6.396197319030762, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "eval_logits/chosen": -0.11751853674650192, | |
| "eval_logits/rejected": 0.025734370574355125, | |
| "eval_logps/chosen": -3.4931278228759766, | |
| "eval_logps/rejected": -30.210371017456055, | |
| "eval_loss": 0.5550487637519836, | |
| "eval_rewards/accuracies": 0.3799999952316284, | |
| "eval_rewards/chosen": -0.3655491769313812, | |
| "eval_rewards/margins": 2.528634548187256, | |
| "eval_rewards/rejected": -2.89418363571167, | |
| "eval_runtime": 28.6292, | |
| "eval_samples_per_second": 3.493, | |
| "eval_steps_per_second": 3.493, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 1.3243243243243243e-07, | |
| "logits/chosen": -0.014925278723239899, | |
| "logits/rejected": 0.0027809618040919304, | |
| "logps/chosen": -3.423701524734497, | |
| "logps/rejected": -14.077951431274414, | |
| "loss": 0.6212, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.4755886495113373, | |
| "rewards/margins": 1.5500478744506836, | |
| "rewards/rejected": -2.025636672973633, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 1.2972972972972974e-07, | |
| "logits/chosen": -0.10386158525943756, | |
| "logits/rejected": 0.07022352516651154, | |
| "logps/chosen": -4.077963829040527, | |
| "logps/rejected": -35.390384674072266, | |
| "loss": 0.6257, | |
| "rewards/accuracies": 0.20000000298023224, | |
| "rewards/chosen": -0.5015150308609009, | |
| "rewards/margins": 2.290374755859375, | |
| "rewards/rejected": -2.7918896675109863, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 1.2702702702702703e-07, | |
| "logits/chosen": -0.5414426326751709, | |
| "logits/rejected": 0.043320734053850174, | |
| "logps/chosen": -2.5671865940093994, | |
| "logps/rejected": -73.81044006347656, | |
| "loss": 0.5565, | |
| "rewards/accuracies": 0.4000000059604645, | |
| "rewards/chosen": -0.255888968706131, | |
| "rewards/margins": 6.815358638763428, | |
| "rewards/rejected": -7.0712480545043945, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 1.2432432432432432e-07, | |
| "logits/chosen": -0.08797252923250198, | |
| "logits/rejected": -0.08808515965938568, | |
| "logps/chosen": -4.043805122375488, | |
| "logps/rejected": -4.037627696990967, | |
| "loss": 0.6933, | |
| "rewards/accuracies": 0.30000001192092896, | |
| "rewards/chosen": -0.5562341213226318, | |
| "rewards/margins": -0.00024047940678428859, | |
| "rewards/rejected": -0.5559936761856079, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 1.2162162162162163e-07, | |
| "logits/chosen": -0.04218659549951553, | |
| "logits/rejected": -0.036192767322063446, | |
| "logps/chosen": -2.878105401992798, | |
| "logps/rejected": -9.950769424438477, | |
| "loss": 0.6258, | |
| "rewards/accuracies": 0.4000000059604645, | |
| "rewards/chosen": -0.3870970606803894, | |
| "rewards/margins": 1.0968773365020752, | |
| "rewards/rejected": -1.4839744567871094, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 1.1891891891891891e-07, | |
| "logits/chosen": -0.10892989486455917, | |
| "logits/rejected": 0.07078132778406143, | |
| "logps/chosen": -3.934946060180664, | |
| "logps/rejected": -26.729068756103516, | |
| "loss": 0.6248, | |
| "rewards/accuracies": 0.30000001192092896, | |
| "rewards/chosen": -0.47404026985168457, | |
| "rewards/margins": 1.411412000656128, | |
| "rewards/rejected": -1.8854520320892334, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 1.1621621621621621e-07, | |
| "logits/chosen": -0.2891389727592468, | |
| "logits/rejected": 0.12911613285541534, | |
| "logps/chosen": -2.690746545791626, | |
| "logps/rejected": -63.75246047973633, | |
| "loss": 0.4878, | |
| "rewards/accuracies": 0.30000001192092896, | |
| "rewards/chosen": -0.25007423758506775, | |
| "rewards/margins": 5.411334991455078, | |
| "rewards/rejected": -5.6614089012146, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 1.1351351351351351e-07, | |
| "logits/chosen": -0.5384889841079712, | |
| "logits/rejected": 0.012323490343987942, | |
| "logps/chosen": -4.0185675621032715, | |
| "logps/rejected": -59.906455993652344, | |
| "loss": 0.4854, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.40213894844055176, | |
| "rewards/margins": 4.404304504394531, | |
| "rewards/rejected": -4.806443691253662, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 1.1081081081081081e-07, | |
| "logits/chosen": -0.04935307055711746, | |
| "logits/rejected": 0.07298599183559418, | |
| "logps/chosen": -2.5241751670837402, | |
| "logps/rejected": -33.777976989746094, | |
| "loss": 0.4127, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.2943554222583771, | |
| "rewards/margins": 3.7618205547332764, | |
| "rewards/rejected": -4.05617618560791, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 1.0810810810810811e-07, | |
| "logits/chosen": -0.17055651545524597, | |
| "logits/rejected": 0.0044966209679841995, | |
| "logps/chosen": -3.5189216136932373, | |
| "logps/rejected": -49.50484848022461, | |
| "loss": 0.4863, | |
| "rewards/accuracies": 0.4000000059604645, | |
| "rewards/chosen": -0.36419206857681274, | |
| "rewards/margins": 4.198049068450928, | |
| "rewards/rejected": -4.562241077423096, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 1.0540540540540541e-07, | |
| "logits/chosen": -0.2850159704685211, | |
| "logits/rejected": -0.21129021048545837, | |
| "logps/chosen": -3.473625898361206, | |
| "logps/rejected": -26.81081199645996, | |
| "loss": 0.6218, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.40565043687820435, | |
| "rewards/margins": 2.5721168518066406, | |
| "rewards/rejected": -2.9777674674987793, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 1.0270270270270271e-07, | |
| "logits/chosen": -0.19239701330661774, | |
| "logits/rejected": -0.003470667405053973, | |
| "logps/chosen": -3.0670382976531982, | |
| "logps/rejected": -51.9354362487793, | |
| "loss": 0.5542, | |
| "rewards/accuracies": 0.4000000059604645, | |
| "rewards/chosen": -0.40356960892677307, | |
| "rewards/margins": 4.389127731323242, | |
| "rewards/rejected": -4.792697429656982, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 1e-07, | |
| "logits/chosen": 0.008682191371917725, | |
| "logits/rejected": 0.008879792876541615, | |
| "logps/chosen": -3.3635425567626953, | |
| "logps/rejected": -3.363541841506958, | |
| "loss": 0.6931, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -0.4913901686668396, | |
| "rewards/margins": -1.5556812513750629e-06, | |
| "rewards/rejected": -0.4913886487483978, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 9.72972972972973e-08, | |
| "logits/chosen": -0.10675084590911865, | |
| "logits/rejected": 0.005977548658847809, | |
| "logps/chosen": -3.781205415725708, | |
| "logps/rejected": -25.103981018066406, | |
| "loss": 0.5558, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.5226456522941589, | |
| "rewards/margins": 2.3052382469177246, | |
| "rewards/rejected": -2.8278839588165283, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 9.45945945945946e-08, | |
| "logits/chosen": -0.03640662506222725, | |
| "logits/rejected": 0.03828350454568863, | |
| "logps/chosen": -3.3790955543518066, | |
| "logps/rejected": -10.027456283569336, | |
| "loss": 0.625, | |
| "rewards/accuracies": 0.4000000059604645, | |
| "rewards/chosen": -0.4088617265224457, | |
| "rewards/margins": 0.5881537199020386, | |
| "rewards/rejected": -0.9970153570175171, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 9.189189189189189e-08, | |
| "logits/chosen": -0.22357909381389618, | |
| "logits/rejected": -0.22356662154197693, | |
| "logps/chosen": -4.314882755279541, | |
| "logps/rejected": -4.330056190490723, | |
| "loss": 0.6915, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.5269622802734375, | |
| "rewards/margins": 0.0032821507193148136, | |
| "rewards/rejected": -0.5302444100379944, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 8.918918918918919e-08, | |
| "logits/chosen": 0.0022161633241921663, | |
| "logits/rejected": 0.0022703767754137516, | |
| "logps/chosen": -4.084799289703369, | |
| "logps/rejected": -4.066622734069824, | |
| "loss": 0.6944, | |
| "rewards/accuracies": 0.4000000059604645, | |
| "rewards/chosen": -0.5763648748397827, | |
| "rewards/margins": -0.0025318176485598087, | |
| "rewards/rejected": -0.5738331079483032, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 8.648648648648649e-08, | |
| "logits/chosen": 0.12270589172840118, | |
| "logits/rejected": 0.1225275993347168, | |
| "logps/chosen": -2.5021376609802246, | |
| "logps/rejected": -2.52251935005188, | |
| "loss": 0.6911, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.2609153687953949, | |
| "rewards/margins": 0.004049187991768122, | |
| "rewards/rejected": -0.26496458053588867, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 8.378378378378379e-08, | |
| "logits/chosen": -0.11181571334600449, | |
| "logits/rejected": 0.11308126151561737, | |
| "logps/chosen": -2.6813411712646484, | |
| "logps/rejected": -53.58498001098633, | |
| "loss": 0.4154, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.3474321663379669, | |
| "rewards/margins": 4.637228965759277, | |
| "rewards/rejected": -4.984661102294922, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 8.108108108108108e-08, | |
| "logits/chosen": -0.17449359595775604, | |
| "logits/rejected": -0.03248979523777962, | |
| "logps/chosen": -3.2400214672088623, | |
| "logps/rejected": -48.60923767089844, | |
| "loss": 0.5534, | |
| "rewards/accuracies": 0.4000000059604645, | |
| "rewards/chosen": -0.30690425634384155, | |
| "rewards/margins": 4.303795337677002, | |
| "rewards/rejected": -4.610699653625488, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 7.837837837837838e-08, | |
| "logits/chosen": -0.5135300755500793, | |
| "logits/rejected": -0.15363462269306183, | |
| "logps/chosen": -3.052196741104126, | |
| "logps/rejected": -54.189735412597656, | |
| "loss": 0.554, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.1840744912624359, | |
| "rewards/margins": 4.594564914703369, | |
| "rewards/rejected": -4.778639316558838, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 7.567567567567568e-08, | |
| "logits/chosen": -0.30552589893341064, | |
| "logits/rejected": -0.014799046330153942, | |
| "logps/chosen": -2.9994537830352783, | |
| "logps/rejected": -33.19937515258789, | |
| "loss": 0.6267, | |
| "rewards/accuracies": 0.20000000298023224, | |
| "rewards/chosen": -0.33928194642066956, | |
| "rewards/margins": 2.3723442554473877, | |
| "rewards/rejected": -2.7116260528564453, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 7.297297297297297e-08, | |
| "logits/chosen": -0.031065676361322403, | |
| "logits/rejected": 0.12822124361991882, | |
| "logps/chosen": -2.5956356525421143, | |
| "logps/rejected": -44.217403411865234, | |
| "loss": 0.557, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.38229799270629883, | |
| "rewards/margins": 4.7165846824646, | |
| "rewards/rejected": -5.098883152008057, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 7.027027027027027e-08, | |
| "logits/chosen": -0.07541604340076447, | |
| "logits/rejected": -0.07454513013362885, | |
| "logps/chosen": -3.0078632831573486, | |
| "logps/rejected": -9.483304977416992, | |
| "loss": 0.6243, | |
| "rewards/accuracies": 0.30000001192092896, | |
| "rewards/chosen": -0.3317088484764099, | |
| "rewards/margins": 0.9091914892196655, | |
| "rewards/rejected": -1.2409002780914307, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 6.756756756756757e-08, | |
| "logits/chosen": -0.06383942067623138, | |
| "logits/rejected": 0.10376764833927155, | |
| "logps/chosen": -2.433790683746338, | |
| "logps/rejected": -46.35570526123047, | |
| "loss": 0.5548, | |
| "rewards/accuracies": 0.4000000059604645, | |
| "rewards/chosen": -0.22169184684753418, | |
| "rewards/margins": 4.687034606933594, | |
| "rewards/rejected": -4.908726692199707, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 6.486486486486487e-08, | |
| "logits/chosen": -0.1200224757194519, | |
| "logits/rejected": -0.01750156842172146, | |
| "logps/chosen": -2.9395155906677246, | |
| "logps/rejected": -21.974435806274414, | |
| "loss": 0.6243, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.30157679319381714, | |
| "rewards/margins": 1.9352340698242188, | |
| "rewards/rejected": -2.2368111610412598, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 6.216216216216216e-08, | |
| "logits/chosen": -0.05863137170672417, | |
| "logits/rejected": 0.04493208974599838, | |
| "logps/chosen": -2.9632556438446045, | |
| "logps/rejected": -37.81572723388672, | |
| "loss": 0.4845, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.307827889919281, | |
| "rewards/margins": 3.4735076427459717, | |
| "rewards/rejected": -3.7813358306884766, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 5.945945945945946e-08, | |
| "logits/chosen": 0.08130965381860733, | |
| "logits/rejected": 0.10798033326864243, | |
| "logps/chosen": -3.6858971118927, | |
| "logps/rejected": -11.559499740600586, | |
| "loss": 0.6218, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.4870009422302246, | |
| "rewards/margins": 0.6409335732460022, | |
| "rewards/rejected": -1.127934455871582, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 5.6756756756756756e-08, | |
| "logits/chosen": -0.023443683981895447, | |
| "logits/rejected": 0.08183663338422775, | |
| "logps/chosen": -2.5851669311523438, | |
| "logps/rejected": -28.7203369140625, | |
| "loss": 0.6222, | |
| "rewards/accuracies": 0.30000001192092896, | |
| "rewards/chosen": -0.3686230778694153, | |
| "rewards/margins": 3.186333417892456, | |
| "rewards/rejected": -3.5549559593200684, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 5.4054054054054056e-08, | |
| "logits/chosen": -0.03929876536130905, | |
| "logits/rejected": -0.0392637625336647, | |
| "logps/chosen": -3.2762999534606934, | |
| "logps/rejected": -3.265766143798828, | |
| "loss": 0.6936, | |
| "rewards/accuracies": 0.10000000149011612, | |
| "rewards/chosen": -0.44085612893104553, | |
| "rewards/margins": -0.0008075117948465049, | |
| "rewards/rejected": -0.44004860520362854, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 5.1351351351351355e-08, | |
| "logits/chosen": 0.051630906760692596, | |
| "logits/rejected": 0.051436759531497955, | |
| "logps/chosen": -3.7616355419158936, | |
| "logps/rejected": -3.779416561126709, | |
| "loss": 0.6912, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.5269044041633606, | |
| "rewards/margins": 0.003887352766469121, | |
| "rewards/rejected": -0.530791699886322, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 4.864864864864865e-08, | |
| "logits/chosen": -0.1325063705444336, | |
| "logits/rejected": -0.011743051931262016, | |
| "logps/chosen": -3.144193649291992, | |
| "logps/rejected": -31.121057510375977, | |
| "loss": 0.5549, | |
| "rewards/accuracies": 0.30000001192092896, | |
| "rewards/chosen": -0.45744314789772034, | |
| "rewards/margins": 2.0994107723236084, | |
| "rewards/rejected": -2.556854009628296, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 4.5945945945945947e-08, | |
| "logits/chosen": -0.032472122460603714, | |
| "logits/rejected": 0.010701514780521393, | |
| "logps/chosen": -3.4038867950439453, | |
| "logps/rejected": -27.584125518798828, | |
| "loss": 0.5532, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.43393245339393616, | |
| "rewards/margins": 2.7115139961242676, | |
| "rewards/rejected": -3.145446300506592, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 4.3243243243243246e-08, | |
| "logits/chosen": 0.15468727052211761, | |
| "logits/rejected": 0.15615049004554749, | |
| "logps/chosen": -3.311304807662964, | |
| "logps/rejected": -9.414125442504883, | |
| "loss": 0.6202, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.4434788227081299, | |
| "rewards/margins": 0.7733428478240967, | |
| "rewards/rejected": -1.2168217897415161, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 4.054054054054054e-08, | |
| "logits/chosen": -0.24390192329883575, | |
| "logits/rejected": -0.007583351340144873, | |
| "logps/chosen": -3.870515823364258, | |
| "logps/rejected": -39.793636322021484, | |
| "loss": 0.5515, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.4439309239387512, | |
| "rewards/margins": 3.1208810806274414, | |
| "rewards/rejected": -3.5648117065429688, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 3.783783783783784e-08, | |
| "logits/chosen": -0.013895763084292412, | |
| "logits/rejected": -0.014107026159763336, | |
| "logps/chosen": -2.9954867362976074, | |
| "logps/rejected": -2.9771194458007812, | |
| "loss": 0.6946, | |
| "rewards/accuracies": 0.30000001192092896, | |
| "rewards/chosen": -0.3877726197242737, | |
| "rewards/margins": -0.0028153404127806425, | |
| "rewards/rejected": -0.3849572539329529, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 3.513513513513514e-08, | |
| "logits/chosen": 0.02430087886750698, | |
| "logits/rejected": 0.03372497111558914, | |
| "logps/chosen": -2.9531288146972656, | |
| "logps/rejected": -9.80555248260498, | |
| "loss": 0.6233, | |
| "rewards/accuracies": 0.4000000059604645, | |
| "rewards/chosen": -0.42032957077026367, | |
| "rewards/margins": 1.0188238620758057, | |
| "rewards/rejected": -1.4391534328460693, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 3.2432432432432436e-08, | |
| "logits/chosen": 0.07200212776660919, | |
| "logits/rejected": 0.13575957715511322, | |
| "logps/chosen": -2.400723934173584, | |
| "logps/rejected": -35.662139892578125, | |
| "loss": 0.495, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -0.2884984314441681, | |
| "rewards/margins": 3.8999791145324707, | |
| "rewards/rejected": -4.188477516174316, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 2.972972972972973e-08, | |
| "logits/chosen": -0.36185282468795776, | |
| "logits/rejected": 0.017909474670886993, | |
| "logps/chosen": -3.7324061393737793, | |
| "logps/rejected": -76.72266387939453, | |
| "loss": 0.484, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.46223974227905273, | |
| "rewards/margins": 5.9966840744018555, | |
| "rewards/rejected": -6.458924293518066, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 2.7027027027027028e-08, | |
| "logits/chosen": 0.10856851190328598, | |
| "logits/rejected": 0.156903475522995, | |
| "logps/chosen": -2.646015167236328, | |
| "logps/rejected": -21.1688175201416, | |
| "loss": 0.6246, | |
| "rewards/accuracies": 0.4000000059604645, | |
| "rewards/chosen": -0.32123029232025146, | |
| "rewards/margins": 2.1456785202026367, | |
| "rewards/rejected": -2.4669089317321777, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 2.4324324324324324e-08, | |
| "logits/chosen": 0.06192172318696976, | |
| "logits/rejected": 0.17663788795471191, | |
| "logps/chosen": -2.737999677658081, | |
| "logps/rejected": -26.416967391967773, | |
| "loss": 0.5541, | |
| "rewards/accuracies": 0.4000000059604645, | |
| "rewards/chosen": -0.3163206875324249, | |
| "rewards/margins": 2.440368175506592, | |
| "rewards/rejected": -2.756688117980957, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 2.1621621621621623e-08, | |
| "logits/chosen": -0.042785413563251495, | |
| "logits/rejected": 0.24969927966594696, | |
| "logps/chosen": -2.8873772621154785, | |
| "logps/rejected": -53.33967208862305, | |
| "loss": 0.5587, | |
| "rewards/accuracies": 0.20000000298023224, | |
| "rewards/chosen": -0.28660848736763, | |
| "rewards/margins": 4.235617160797119, | |
| "rewards/rejected": -4.522225379943848, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 1.891891891891892e-08, | |
| "logits/chosen": -0.05043508857488632, | |
| "logits/rejected": -0.047407329082489014, | |
| "logps/chosen": -2.8302414417266846, | |
| "logps/rejected": -8.945648193359375, | |
| "loss": 0.6245, | |
| "rewards/accuracies": 0.30000001192092896, | |
| "rewards/chosen": -0.37974387407302856, | |
| "rewards/margins": 0.7309183478355408, | |
| "rewards/rejected": -1.1106622219085693, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 1.6216216216216218e-08, | |
| "logits/chosen": 0.0068864584900438786, | |
| "logits/rejected": 0.09314581006765366, | |
| "logps/chosen": -3.152959108352661, | |
| "logps/rejected": -26.90890121459961, | |
| "loss": 0.5543, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -0.4128968119621277, | |
| "rewards/margins": 2.2474188804626465, | |
| "rewards/rejected": -2.660315990447998, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 1.3513513513513514e-08, | |
| "logits/chosen": -0.013911092653870583, | |
| "logits/rejected": 0.13355985283851624, | |
| "logps/chosen": -4.011211395263672, | |
| "logps/rejected": -43.98572540283203, | |
| "loss": 0.6181, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.4239436686038971, | |
| "rewards/margins": 3.6849803924560547, | |
| "rewards/rejected": -4.10892391204834, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 1.0810810810810811e-08, | |
| "logits/chosen": -0.06818331778049469, | |
| "logits/rejected": -0.02771920897066593, | |
| "logps/chosen": -2.947129964828491, | |
| "logps/rejected": -16.071887969970703, | |
| "loss": 0.6201, | |
| "rewards/accuracies": 0.30000001192092896, | |
| "rewards/chosen": -0.47470030188560486, | |
| "rewards/margins": 1.8438775539398193, | |
| "rewards/rejected": -2.318577766418457, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 8.108108108108109e-09, | |
| "logits/chosen": -0.07423537969589233, | |
| "logits/rejected": -0.07442188262939453, | |
| "logps/chosen": -3.448483943939209, | |
| "logps/rejected": -3.465106964111328, | |
| "loss": 0.6926, | |
| "rewards/accuracies": 0.20000000298023224, | |
| "rewards/chosen": -0.5680142641067505, | |
| "rewards/margins": 0.0010964989196509123, | |
| "rewards/rejected": -0.5691107511520386, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 5.405405405405406e-09, | |
| "logits/chosen": -0.31879547238349915, | |
| "logits/rejected": 0.06546586751937866, | |
| "logps/chosen": -2.377040386199951, | |
| "logps/rejected": -101.98162078857422, | |
| "loss": 0.4165, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.21218986809253693, | |
| "rewards/margins": 10.473356246948242, | |
| "rewards/rejected": -10.685547828674316, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 2.702702702702703e-09, | |
| "logits/chosen": 0.08084534108638763, | |
| "logits/rejected": 0.0808691754937172, | |
| "logps/chosen": -4.6396708488464355, | |
| "logps/rejected": -4.617379188537598, | |
| "loss": 0.6956, | |
| "rewards/accuracies": 0.20000000298023224, | |
| "rewards/chosen": -0.7069979906082153, | |
| "rewards/margins": -0.004753425717353821, | |
| "rewards/rejected": -0.7022445797920227, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 0.0, | |
| "logits/chosen": -0.14494428038597107, | |
| "logits/rejected": 0.04603511840105057, | |
| "logps/chosen": -2.9900779724121094, | |
| "logps/rejected": -54.25315475463867, | |
| "loss": 0.4851, | |
| "rewards/accuracies": 0.4000000059604645, | |
| "rewards/chosen": -0.3263259530067444, | |
| "rewards/margins": 4.676065921783447, | |
| "rewards/rejected": -5.002391815185547, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_logits/chosen": -0.10488395392894745, | |
| "eval_logits/rejected": 0.038251034915447235, | |
| "eval_logps/chosen": -3.3913989067077637, | |
| "eval_logps/rejected": -30.402114868164062, | |
| "eval_loss": 0.5553861260414124, | |
| "eval_rewards/accuracies": 0.4000000059604645, | |
| "eval_rewards/chosen": -0.34520336985588074, | |
| "eval_rewards/margins": 2.5873284339904785, | |
| "eval_rewards/rejected": -2.9325320720672607, | |
| "eval_runtime": 43.9337, | |
| "eval_samples_per_second": 2.276, | |
| "eval_steps_per_second": 2.276, | |
| "step": 2000 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 2000, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "total_flos": 0.0, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |