| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9981298423724285, | |
| "eval_steps": 400, | |
| "global_step": 467, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "abs_diff": 0.043448589742183685, | |
| "all_logps_1": -124.6441650390625, | |
| "all_logps_1_values": -124.64417266845703, | |
| "all_logps_2": 459.15625, | |
| "all_logps_2_values": 459.15625, | |
| "epoch": 0.0021373230029388193, | |
| "grad_norm": 16.66867807446414, | |
| "learning_rate": 2.127659574468085e-08, | |
| "logits/chosen": -1.1381689310073853, | |
| "logits/rejected": -0.9913416504859924, | |
| "logps/chosen": -0.2839311361312866, | |
| "logps/rejected": -0.29555341601371765, | |
| "loss": 1.5077, | |
| "original_losses": 1.5989841222763062, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.7098277807235718, | |
| "rewards/margins": 0.029055725783109665, | |
| "rewards/rejected": -0.7388835549354553, | |
| "step": 1, | |
| "weight": 0.9598712921142578 | |
| }, | |
| { | |
| "abs_diff": 0.050563473254442215, | |
| "all_logps_1": -113.89578247070312, | |
| "all_logps_1_values": -113.89578247070312, | |
| "all_logps_2": 426.234375, | |
| "all_logps_2_values": 426.234375, | |
| "epoch": 0.010686615014694095, | |
| "grad_norm": 12.434660441186981, | |
| "learning_rate": 1.0638297872340425e-07, | |
| "logits/chosen": -0.9904537796974182, | |
| "logits/rejected": -0.9189692735671997, | |
| "logps/chosen": -0.2694719731807709, | |
| "logps/rejected": -0.2684631943702698, | |
| "loss": 1.5251, | |
| "original_losses": 1.6255850791931152, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.6736798286437988, | |
| "rewards/margins": -0.0025218012742698193, | |
| "rewards/rejected": -0.6711580753326416, | |
| "step": 5, | |
| "weight": 0.9548923373222351 | |
| }, | |
| { | |
| "abs_diff": 0.06418919563293457, | |
| "all_logps_1": -118.16609191894531, | |
| "all_logps_1_values": -118.16609191894531, | |
| "all_logps_2": 443.21875, | |
| "all_logps_2_values": 443.21875, | |
| "epoch": 0.02137323002938819, | |
| "grad_norm": 11.724962863400911, | |
| "learning_rate": 2.127659574468085e-07, | |
| "logits/chosen": -0.9794756174087524, | |
| "logits/rejected": -0.9353710412979126, | |
| "logps/chosen": -0.2719997763633728, | |
| "logps/rejected": -0.2735568881034851, | |
| "loss": 1.5172, | |
| "original_losses": 1.620931625366211, | |
| "rewards/accuracies": 0.48750001192092896, | |
| "rewards/chosen": -0.6799993515014648, | |
| "rewards/margins": 0.0038928240537643433, | |
| "rewards/rejected": -0.6838923096656799, | |
| "step": 10, | |
| "weight": 0.9420804977416992 | |
| }, | |
| { | |
| "abs_diff": 0.06552017480134964, | |
| "all_logps_1": -101.9596939086914, | |
| "all_logps_1_values": -101.95967864990234, | |
| "all_logps_2": 370.20001220703125, | |
| "all_logps_2_values": 370.20001220703125, | |
| "epoch": 0.03205984504408229, | |
| "grad_norm": 9.773542967175878, | |
| "learning_rate": 3.1914893617021275e-07, | |
| "logits/chosen": -0.9607246518135071, | |
| "logits/rejected": -0.9163097143173218, | |
| "logps/chosen": -0.29539960622787476, | |
| "logps/rejected": -0.2832711338996887, | |
| "loss": 1.5128, | |
| "original_losses": 1.6492595672607422, | |
| "rewards/accuracies": 0.48750001192092896, | |
| "rewards/chosen": -0.7384990453720093, | |
| "rewards/margins": -0.030321191996335983, | |
| "rewards/rejected": -0.708177924156189, | |
| "step": 15, | |
| "weight": 0.9420396089553833 | |
| }, | |
| { | |
| "abs_diff": 0.082237109541893, | |
| "all_logps_1": -95.52127075195312, | |
| "all_logps_1_values": -95.52125549316406, | |
| "all_logps_2": 368.6625061035156, | |
| "all_logps_2_values": 368.6625061035156, | |
| "epoch": 0.04274646005877638, | |
| "grad_norm": 14.386337719633973, | |
| "learning_rate": 4.25531914893617e-07, | |
| "logits/chosen": -0.9820459485054016, | |
| "logits/rejected": -0.9820452928543091, | |
| "logps/chosen": -0.26204216480255127, | |
| "logps/rejected": -0.26956799626350403, | |
| "loss": 1.5149, | |
| "original_losses": 1.6124236583709717, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": -0.6551053524017334, | |
| "rewards/margins": 0.018814602866768837, | |
| "rewards/rejected": -0.6739200353622437, | |
| "step": 20, | |
| "weight": 0.9291993379592896 | |
| }, | |
| { | |
| "abs_diff": 0.07468467205762863, | |
| "all_logps_1": -101.43566131591797, | |
| "all_logps_1_values": -101.43565368652344, | |
| "all_logps_2": 359.6499938964844, | |
| "all_logps_2_values": 359.6499938964844, | |
| "epoch": 0.053433075073470476, | |
| "grad_norm": 12.506683302853757, | |
| "learning_rate": 5.319148936170212e-07, | |
| "logits/chosen": -1.0295155048370361, | |
| "logits/rejected": -1.0065571069717407, | |
| "logps/chosen": -0.28278106451034546, | |
| "logps/rejected": -0.2869016230106354, | |
| "loss": 1.5005, | |
| "original_losses": 1.6180095672607422, | |
| "rewards/accuracies": 0.4625000059604645, | |
| "rewards/chosen": -0.706952691078186, | |
| "rewards/margins": 0.010301386937499046, | |
| "rewards/rejected": -0.7172540426254272, | |
| "step": 25, | |
| "weight": 0.9346221089363098 | |
| }, | |
| { | |
| "abs_diff": 0.07145524024963379, | |
| "all_logps_1": -96.14094543457031, | |
| "all_logps_1_values": -96.14093780517578, | |
| "all_logps_2": 358.6937561035156, | |
| "all_logps_2_values": 358.6937561035156, | |
| "epoch": 0.06411969008816458, | |
| "grad_norm": 17.486598946846197, | |
| "learning_rate": 6.382978723404255e-07, | |
| "logits/chosen": -1.0747442245483398, | |
| "logits/rejected": -0.9867307543754578, | |
| "logps/chosen": -0.27444857358932495, | |
| "logps/rejected": -0.27685946226119995, | |
| "loss": 1.5207, | |
| "original_losses": 1.6215848922729492, | |
| "rewards/accuracies": 0.4625000059604645, | |
| "rewards/chosen": -0.6861215233802795, | |
| "rewards/margins": 0.006027159281075001, | |
| "rewards/rejected": -0.6921486258506775, | |
| "step": 30, | |
| "weight": 0.9376131296157837 | |
| }, | |
| { | |
| "abs_diff": 0.08128118515014648, | |
| "all_logps_1": -110.31912994384766, | |
| "all_logps_1_values": -110.3191146850586, | |
| "all_logps_2": 396.7250061035156, | |
| "all_logps_2_values": 396.7250061035156, | |
| "epoch": 0.07480630510285867, | |
| "grad_norm": 10.190092324128308, | |
| "learning_rate": 7.446808510638297e-07, | |
| "logits/chosen": -1.0031483173370361, | |
| "logits/rejected": -0.9225772023200989, | |
| "logps/chosen": -0.2776695191860199, | |
| "logps/rejected": -0.3029964566230774, | |
| "loss": 1.5058, | |
| "original_losses": 1.5780258178710938, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.6941738128662109, | |
| "rewards/margins": 0.06331733614206314, | |
| "rewards/rejected": -0.7574911713600159, | |
| "step": 35, | |
| "weight": 0.9304083585739136 | |
| }, | |
| { | |
| "abs_diff": 0.06388907134532928, | |
| "all_logps_1": -94.03665924072266, | |
| "all_logps_1_values": -94.03666687011719, | |
| "all_logps_2": 347.20001220703125, | |
| "all_logps_2_values": 347.20001220703125, | |
| "epoch": 0.08549292011755276, | |
| "grad_norm": 12.383837039803712, | |
| "learning_rate": 8.51063829787234e-07, | |
| "logits/chosen": -0.9180997014045715, | |
| "logits/rejected": -0.9071486592292786, | |
| "logps/chosen": -0.28308817744255066, | |
| "logps/rejected": -0.29446059465408325, | |
| "loss": 1.5141, | |
| "original_losses": 1.6014320850372314, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -0.7077205181121826, | |
| "rewards/margins": 0.028431018814444542, | |
| "rewards/rejected": -0.7361515760421753, | |
| "step": 40, | |
| "weight": 0.9425530433654785 | |
| }, | |
| { | |
| "abs_diff": 0.09521429240703583, | |
| "all_logps_1": -106.0528793334961, | |
| "all_logps_1_values": -106.0528793334961, | |
| "all_logps_2": 362.95623779296875, | |
| "all_logps_2_values": 362.95623779296875, | |
| "epoch": 0.09617953513224686, | |
| "grad_norm": 9.970613374779385, | |
| "learning_rate": 9.574468085106384e-07, | |
| "logits/chosen": -0.9140686988830566, | |
| "logits/rejected": -0.8324721455574036, | |
| "logps/chosen": -0.33634239435195923, | |
| "logps/rejected": -0.34527257084846497, | |
| "loss": 1.4915, | |
| "original_losses": 1.614324927330017, | |
| "rewards/accuracies": 0.512499988079071, | |
| "rewards/chosen": -0.8408559560775757, | |
| "rewards/margins": 0.02232544682919979, | |
| "rewards/rejected": -0.8631814122200012, | |
| "step": 45, | |
| "weight": 0.9211470484733582 | |
| }, | |
| { | |
| "abs_diff": 0.12202360481023788, | |
| "all_logps_1": -105.84830474853516, | |
| "all_logps_1_values": -105.84830474853516, | |
| "all_logps_2": 377.7437438964844, | |
| "all_logps_2_values": 377.7437438964844, | |
| "epoch": 0.10686615014694095, | |
| "grad_norm": 10.765426712830973, | |
| "learning_rate": 9.998741174712533e-07, | |
| "logits/chosen": -0.8902776837348938, | |
| "logits/rejected": -0.8994420766830444, | |
| "logps/chosen": -0.31167787313461304, | |
| "logps/rejected": -0.3589983582496643, | |
| "loss": 1.466, | |
| "original_losses": 1.5521076917648315, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -0.7791945934295654, | |
| "rewards/margins": 0.11830125004053116, | |
| "rewards/rejected": -0.8974958658218384, | |
| "step": 50, | |
| "weight": 0.9070577621459961 | |
| }, | |
| { | |
| "abs_diff": 0.11367271095514297, | |
| "all_logps_1": -112.1168441772461, | |
| "all_logps_1_values": -112.1168441772461, | |
| "all_logps_2": 420.46875, | |
| "all_logps_2_values": 420.46875, | |
| "epoch": 0.11755276516163506, | |
| "grad_norm": 10.584693183679102, | |
| "learning_rate": 9.991050648838675e-07, | |
| "logits/chosen": -0.8847481608390808, | |
| "logits/rejected": -0.8255330920219421, | |
| "logps/chosen": -0.28891468048095703, | |
| "logps/rejected": -0.3513794541358948, | |
| "loss": 1.465, | |
| "original_losses": 1.557521939277649, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.7222867012023926, | |
| "rewards/margins": 0.15616199374198914, | |
| "rewards/rejected": -0.8784486651420593, | |
| "step": 55, | |
| "weight": 0.9259511828422546 | |
| }, | |
| { | |
| "abs_diff": 0.08213352411985397, | |
| "all_logps_1": -120.3653564453125, | |
| "all_logps_1_values": -120.36537170410156, | |
| "all_logps_2": 451.7250061035156, | |
| "all_logps_2_values": 451.7250061035156, | |
| "epoch": 0.12823938017632916, | |
| "grad_norm": 20.487281254270606, | |
| "learning_rate": 9.97637968732563e-07, | |
| "logits/chosen": -0.9171462059020996, | |
| "logits/rejected": -0.8949100375175476, | |
| "logps/chosen": -0.2980085015296936, | |
| "logps/rejected": -0.32817280292510986, | |
| "loss": 1.4606, | |
| "original_losses": 1.5710750818252563, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.7450211644172668, | |
| "rewards/margins": 0.07541082799434662, | |
| "rewards/rejected": -0.8204320073127747, | |
| "step": 60, | |
| "weight": 0.9325092434883118 | |
| }, | |
| { | |
| "abs_diff": 0.08584319800138474, | |
| "all_logps_1": -115.28419494628906, | |
| "all_logps_1_values": -115.28419494628906, | |
| "all_logps_2": 410.28125, | |
| "all_logps_2_values": 410.28125, | |
| "epoch": 0.13892599519102325, | |
| "grad_norm": 13.268818877197086, | |
| "learning_rate": 9.954748808839674e-07, | |
| "logits/chosen": -0.9003847241401672, | |
| "logits/rejected": -0.9516555666923523, | |
| "logps/chosen": -0.31763237714767456, | |
| "logps/rejected": -0.3270418345928192, | |
| "loss": 1.4586, | |
| "original_losses": 1.614269495010376, | |
| "rewards/accuracies": 0.4625000059604645, | |
| "rewards/chosen": -0.794080913066864, | |
| "rewards/margins": 0.023523610085248947, | |
| "rewards/rejected": -0.817604660987854, | |
| "step": 65, | |
| "weight": 0.9301543235778809 | |
| }, | |
| { | |
| "abs_diff": 0.23710966110229492, | |
| "all_logps_1": -129.6254119873047, | |
| "all_logps_1_values": -129.6254425048828, | |
| "all_logps_2": 391.6187438964844, | |
| "all_logps_2_values": 391.6187438964844, | |
| "epoch": 0.14961261020571734, | |
| "grad_norm": 19.008527618804656, | |
| "learning_rate": 9.926188266120295e-07, | |
| "logits/chosen": -0.9297588467597961, | |
| "logits/rejected": -0.8964225053787231, | |
| "logps/chosen": -0.4621095657348633, | |
| "logps/rejected": -0.565943717956543, | |
| "loss": 1.4309, | |
| "original_losses": 1.5991542339324951, | |
| "rewards/accuracies": 0.4625000059604645, | |
| "rewards/chosen": -1.1552739143371582, | |
| "rewards/margins": 0.25958532094955444, | |
| "rewards/rejected": -1.414859414100647, | |
| "step": 70, | |
| "weight": 0.8780097961425781 | |
| }, | |
| { | |
| "abs_diff": 0.22396209836006165, | |
| "all_logps_1": -126.3341064453125, | |
| "all_logps_1_values": -126.33412170410156, | |
| "all_logps_2": 375.15625, | |
| "all_logps_2_values": 375.15625, | |
| "epoch": 0.16029922522041143, | |
| "grad_norm": 14.741661325228266, | |
| "learning_rate": 9.890738003669027e-07, | |
| "logits/chosen": -0.88294517993927, | |
| "logits/rejected": -0.8696261644363403, | |
| "logps/chosen": -0.6373583078384399, | |
| "logps/rejected": -0.7649468779563904, | |
| "loss": 1.371, | |
| "original_losses": 1.5059027671813965, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -1.593395709991455, | |
| "rewards/margins": 0.3189714848995209, | |
| "rewards/rejected": -1.9123672246932983, | |
| "step": 75, | |
| "weight": 0.874294102191925 | |
| }, | |
| { | |
| "abs_diff": 0.4753897786140442, | |
| "all_logps_1": -154.002197265625, | |
| "all_logps_1_values": -154.002197265625, | |
| "all_logps_2": 385.40625, | |
| "all_logps_2_values": 385.40625, | |
| "epoch": 0.17098584023510552, | |
| "grad_norm": 10.653088582817368, | |
| "learning_rate": 9.848447601883433e-07, | |
| "logits/chosen": -0.9209216833114624, | |
| "logits/rejected": -0.905800461769104, | |
| "logps/chosen": -0.9318068623542786, | |
| "logps/rejected": -1.1782509088516235, | |
| "loss": 1.3728, | |
| "original_losses": 1.6557430028915405, | |
| "rewards/accuracies": 0.48750001192092896, | |
| "rewards/chosen": -2.329517126083374, | |
| "rewards/margins": 0.61611008644104, | |
| "rewards/rejected": -2.945627212524414, | |
| "step": 80, | |
| "weight": 0.8384539484977722 | |
| }, | |
| { | |
| "abs_diff": 0.4482264518737793, | |
| "all_logps_1": -181.6018829345703, | |
| "all_logps_1_values": -181.6018829345703, | |
| "all_logps_2": 381.91876220703125, | |
| "all_logps_2_values": 381.91876220703125, | |
| "epoch": 0.18167245524979964, | |
| "grad_norm": 8.388730168314039, | |
| "learning_rate": 9.799376207714444e-07, | |
| "logits/chosen": -0.8116687536239624, | |
| "logits/rejected": -0.7630541324615479, | |
| "logps/chosen": -1.007387638092041, | |
| "logps/rejected": -1.0764662027359009, | |
| "loss": 1.3965, | |
| "original_losses": 1.8705193996429443, | |
| "rewards/accuracies": 0.4749999940395355, | |
| "rewards/chosen": -2.5184690952301025, | |
| "rewards/margins": 0.17269621789455414, | |
| "rewards/rejected": -2.6911654472351074, | |
| "step": 85, | |
| "weight": 0.8248960375785828 | |
| }, | |
| { | |
| "abs_diff": 0.638414204120636, | |
| "all_logps_1": -197.71530151367188, | |
| "all_logps_1_values": -197.7152862548828, | |
| "all_logps_2": 368.6000061035156, | |
| "all_logps_2_values": 368.6000061035156, | |
| "epoch": 0.19235907026449373, | |
| "grad_norm": 12.62143276771947, | |
| "learning_rate": 9.743592451943998e-07, | |
| "logits/chosen": -0.7098425626754761, | |
| "logits/rejected": -0.6454850435256958, | |
| "logps/chosen": -1.299263596534729, | |
| "logps/rejected": -1.3454030752182007, | |
| "loss": 1.3792, | |
| "original_losses": 2.042982578277588, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -3.2481586933135986, | |
| "rewards/margins": 0.11534923315048218, | |
| "rewards/rejected": -3.3635077476501465, | |
| "step": 90, | |
| "weight": 0.788603663444519 | |
| }, | |
| { | |
| "abs_diff": 0.3771124482154846, | |
| "all_logps_1": -198.22885131835938, | |
| "all_logps_1_values": -198.22885131835938, | |
| "all_logps_2": 307.64373779296875, | |
| "all_logps_2_values": 307.64373779296875, | |
| "epoch": 0.20304568527918782, | |
| "grad_norm": 9.223783777700444, | |
| "learning_rate": 9.681174353198686e-07, | |
| "logits/chosen": -0.7450689077377319, | |
| "logits/rejected": -0.7714122533798218, | |
| "logps/chosen": -1.5162893533706665, | |
| "logps/rejected": -1.538206696510315, | |
| "loss": 1.3537, | |
| "original_losses": 1.7573131322860718, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -3.7907233238220215, | |
| "rewards/margins": 0.054793525487184525, | |
| "rewards/rejected": -3.8455166816711426, | |
| "step": 95, | |
| "weight": 0.7987316846847534 | |
| }, | |
| { | |
| "abs_diff": 0.531648576259613, | |
| "all_logps_1": -257.82080078125, | |
| "all_logps_1_values": -257.82080078125, | |
| "all_logps_2": 405.08123779296875, | |
| "all_logps_2_values": 405.08123779296875, | |
| "epoch": 0.2137323002938819, | |
| "grad_norm": 13.130824511623645, | |
| "learning_rate": 9.612209208833646e-07, | |
| "logits/chosen": -0.7543559074401855, | |
| "logits/rejected": -0.6947053074836731, | |
| "logps/chosen": -1.3733211755752563, | |
| "logps/rejected": -1.4744349718093872, | |
| "loss": 1.3472, | |
| "original_losses": 1.8884124755859375, | |
| "rewards/accuracies": 0.512499988079071, | |
| "rewards/chosen": -3.433303117752075, | |
| "rewards/margins": 0.2527844309806824, | |
| "rewards/rejected": -3.6860873699188232, | |
| "step": 100, | |
| "weight": 0.8195359110832214 | |
| }, | |
| { | |
| "abs_diff": 0.4814772605895996, | |
| "all_logps_1": -285.88824462890625, | |
| "all_logps_1_values": -285.88824462890625, | |
| "all_logps_2": 447.76251220703125, | |
| "all_logps_2_values": 447.76251220703125, | |
| "epoch": 0.224418915308576, | |
| "grad_norm": 15.741233324493118, | |
| "learning_rate": 9.536793472839324e-07, | |
| "logits/chosen": -0.5685318112373352, | |
| "logits/rejected": -0.5175650119781494, | |
| "logps/chosen": -1.1041462421417236, | |
| "logps/rejected": -1.3609198331832886, | |
| "loss": 1.347, | |
| "original_losses": 1.60434091091156, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -2.7603654861450195, | |
| "rewards/margins": 0.6419342756271362, | |
| "rewards/rejected": -3.4022998809814453, | |
| "step": 105, | |
| "weight": 0.8199658393859863 | |
| }, | |
| { | |
| "abs_diff": 0.5063992738723755, | |
| "all_logps_1": -312.87860107421875, | |
| "all_logps_1_values": -312.8785705566406, | |
| "all_logps_2": 410.79998779296875, | |
| "all_logps_2_values": 410.79998779296875, | |
| "epoch": 0.2351055303232701, | |
| "grad_norm": 14.779833008390499, | |
| "learning_rate": 9.455032620941839e-07, | |
| "logits/chosen": -0.3194349706172943, | |
| "logits/rejected": -0.27131232619285583, | |
| "logps/chosen": -1.436680793762207, | |
| "logps/rejected": -1.3837544918060303, | |
| "loss": 1.3485, | |
| "original_losses": 2.0654890537261963, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -3.5917022228240967, | |
| "rewards/margins": -0.13231578469276428, | |
| "rewards/rejected": -3.459386110305786, | |
| "step": 110, | |
| "weight": 0.8112524151802063 | |
| }, | |
| { | |
| "abs_diff": 0.79926997423172, | |
| "all_logps_1": -352.8046875, | |
| "all_logps_1_values": -352.8046875, | |
| "all_logps_2": 401.26873779296875, | |
| "all_logps_2_values": 401.26873779296875, | |
| "epoch": 0.2457921453379642, | |
| "grad_norm": 17.098670325278757, | |
| "learning_rate": 9.367041003085648e-07, | |
| "logits/chosen": -0.27068907022476196, | |
| "logits/rejected": -0.25977402925491333, | |
| "logps/chosen": -1.8351905345916748, | |
| "logps/rejected": -2.079685688018799, | |
| "loss": 1.2568, | |
| "original_losses": 1.9370386600494385, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -4.587975978851318, | |
| "rewards/margins": 0.6112388968467712, | |
| "rewards/rejected": -5.199214458465576, | |
| "step": 115, | |
| "weight": 0.7355886101722717 | |
| }, | |
| { | |
| "abs_diff": 0.4315846860408783, | |
| "all_logps_1": -371.93505859375, | |
| "all_logps_1_values": -371.93505859375, | |
| "all_logps_2": 397.9624938964844, | |
| "all_logps_2_values": 397.9624938964844, | |
| "epoch": 0.2564787603526583, | |
| "grad_norm": 17.135021647585766, | |
| "learning_rate": 9.272941683504808e-07, | |
| "logits/chosen": -0.18766793608665466, | |
| "logits/rejected": -0.1377825289964676, | |
| "logps/chosen": -1.6060386896133423, | |
| "logps/rejected": -1.7283703088760376, | |
| "loss": 1.2524, | |
| "original_losses": 1.669327974319458, | |
| "rewards/accuracies": 0.4375, | |
| "rewards/chosen": -4.015096187591553, | |
| "rewards/margins": 0.30582934617996216, | |
| "rewards/rejected": -4.320925712585449, | |
| "step": 120, | |
| "weight": 0.7726086378097534 | |
| }, | |
| { | |
| "abs_diff": 0.8556106686592102, | |
| "all_logps_1": -424.2312927246094, | |
| "all_logps_1_values": -424.2313537597656, | |
| "all_logps_2": 358.1312561035156, | |
| "all_logps_2_values": 358.1312561035156, | |
| "epoch": 0.2671653753673524, | |
| "grad_norm": 18.949047249790798, | |
| "learning_rate": 9.172866268606513e-07, | |
| "logits/chosen": -0.0937797874212265, | |
| "logits/rejected": -0.08780622482299805, | |
| "logps/chosen": -2.3565449714660645, | |
| "logps/rejected": -2.821481227874756, | |
| "loss": 1.2455, | |
| "original_losses": 1.5799314975738525, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -5.891362190246582, | |
| "rewards/margins": 1.1623404026031494, | |
| "rewards/rejected": -7.053703308105469, | |
| "step": 125, | |
| "weight": 0.6997275352478027 | |
| }, | |
| { | |
| "abs_diff": 1.122897982597351, | |
| "all_logps_1": -483.11285400390625, | |
| "all_logps_1_values": -483.11279296875, | |
| "all_logps_2": 356.2250061035156, | |
| "all_logps_2_values": 356.2250061035156, | |
| "epoch": 0.2778519903820465, | |
| "grad_norm": 16.067627857167523, | |
| "learning_rate": 9.066954722907638e-07, | |
| "logits/chosen": 0.18425658345222473, | |
| "logits/rejected": 0.12208795547485352, | |
| "logps/chosen": -2.2584593296051025, | |
| "logps/rejected": -2.747421979904175, | |
| "loss": 1.2378, | |
| "original_losses": 1.9530925750732422, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -5.646147727966309, | |
| "rewards/margins": 1.2224081754684448, | |
| "rewards/rejected": -6.868556022644043, | |
| "step": 130, | |
| "weight": 0.6967185139656067 | |
| }, | |
| { | |
| "abs_diff": 0.5274697542190552, | |
| "all_logps_1": -584.13671875, | |
| "all_logps_1_values": -584.13671875, | |
| "all_logps_2": 443.01873779296875, | |
| "all_logps_2_values": 443.01873779296875, | |
| "epoch": 0.2885386053967406, | |
| "grad_norm": 29.366033343143968, | |
| "learning_rate": 8.955355173281707e-07, | |
| "logits/chosen": 0.3088318705558777, | |
| "logits/rejected": 0.3932690918445587, | |
| "logps/chosen": -2.3267366886138916, | |
| "logps/rejected": -2.385960102081299, | |
| "loss": 1.1916, | |
| "original_losses": 1.8465898036956787, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -5.816841125488281, | |
| "rewards/margins": 0.1480589658021927, | |
| "rewards/rejected": -5.964900016784668, | |
| "step": 135, | |
| "weight": 0.7594529390335083 | |
| }, | |
| { | |
| "abs_diff": 0.9901386499404907, | |
| "all_logps_1": -715.9130859375, | |
| "all_logps_1_values": -715.9131469726562, | |
| "all_logps_2": 402.9312438964844, | |
| "all_logps_2_values": 402.9312438964844, | |
| "epoch": 0.2992252204114347, | |
| "grad_norm": 27.69156284264097, | |
| "learning_rate": 8.838223701790055e-07, | |
| "logits/chosen": 0.5694825649261475, | |
| "logits/rejected": 0.5738533139228821, | |
| "logps/chosen": -3.3967947959899902, | |
| "logps/rejected": -3.4784629344940186, | |
| "loss": 1.1521, | |
| "original_losses": 2.2902231216430664, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -8.491987228393555, | |
| "rewards/margins": 0.20417042076587677, | |
| "rewards/rejected": -8.696157455444336, | |
| "step": 140, | |
| "weight": 0.6874681115150452 | |
| }, | |
| { | |
| "abs_diff": 0.9199058413505554, | |
| "all_logps_1": -995.3132934570312, | |
| "all_logps_1_values": -995.3132934570312, | |
| "all_logps_2": 409.5249938964844, | |
| "all_logps_2_values": 409.5249938964844, | |
| "epoch": 0.30991183542612877, | |
| "grad_norm": 28.11539806786062, | |
| "learning_rate": 8.71572412738697e-07, | |
| "logits/chosen": 0.8747909665107727, | |
| "logits/rejected": 0.9098325967788696, | |
| "logps/chosen": -3.898921251296997, | |
| "logps/rejected": -3.9907355308532715, | |
| "loss": 1.1592, | |
| "original_losses": 2.074253797531128, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -9.74730110168457, | |
| "rewards/margins": 0.22953681647777557, | |
| "rewards/rejected": -9.976838111877441, | |
| "step": 145, | |
| "weight": 0.6336122751235962 | |
| }, | |
| { | |
| "abs_diff": 1.7418813705444336, | |
| "all_logps_1": -1663.3861083984375, | |
| "all_logps_1_values": -1663.3861083984375, | |
| "all_logps_2": 383.75, | |
| "all_logps_2_values": 383.75, | |
| "epoch": 0.32059845044082286, | |
| "grad_norm": 43.30888911111554, | |
| "learning_rate": 8.588027776804058e-07, | |
| "logits/chosen": 1.2933635711669922, | |
| "logits/rejected": 1.2684452533721924, | |
| "logps/chosen": -6.538305759429932, | |
| "logps/rejected": -7.486212253570557, | |
| "loss": 1.0994, | |
| "original_losses": 1.926180124282837, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": -16.34576416015625, | |
| "rewards/margins": 2.3697667121887207, | |
| "rewards/rejected": -18.715530395507812, | |
| "step": 150, | |
| "weight": 0.5583394765853882 | |
| }, | |
| { | |
| "abs_diff": 1.5373389720916748, | |
| "all_logps_1": -2462.133056640625, | |
| "all_logps_1_values": -2462.13330078125, | |
| "all_logps_2": 434.73748779296875, | |
| "all_logps_2_values": 434.73748779296875, | |
| "epoch": 0.33128506545551695, | |
| "grad_norm": 47.421884036345716, | |
| "learning_rate": 8.455313244934324e-07, | |
| "logits/chosen": 1.8083369731903076, | |
| "logits/rejected": 1.890794038772583, | |
| "logps/chosen": -8.33267879486084, | |
| "logps/rejected": -9.018165588378906, | |
| "loss": 1.0741, | |
| "original_losses": 2.0032851696014404, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -20.83169937133789, | |
| "rewards/margins": 1.713716745376587, | |
| "rewards/rejected": -22.5454158782959, | |
| "step": 155, | |
| "weight": 0.5593416094779968 | |
| }, | |
| { | |
| "abs_diff": 1.8985588550567627, | |
| "all_logps_1": -2538.660400390625, | |
| "all_logps_1_values": -2538.66064453125, | |
| "all_logps_2": 403.66876220703125, | |
| "all_logps_2_values": 403.66876220703125, | |
| "epoch": 0.34197168047021104, | |
| "grad_norm": 58.88642904599502, | |
| "learning_rate": 8.317766145051057e-07, | |
| "logits/chosen": 2.1515212059020996, | |
| "logits/rejected": 2.141986846923828, | |
| "logps/chosen": -8.633856773376465, | |
| "logps/rejected": -9.374483108520508, | |
| "loss": 1.0769, | |
| "original_losses": 2.3099827766418457, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -21.584644317626953, | |
| "rewards/margins": 1.8515657186508179, | |
| "rewards/rejected": -23.436208724975586, | |
| "step": 160, | |
| "weight": 0.5209288001060486 | |
| }, | |
| { | |
| "abs_diff": 2.2082934379577637, | |
| "all_logps_1": -3570.93603515625, | |
| "all_logps_1_values": -3570.936279296875, | |
| "all_logps_2": 442.4437561035156, | |
| "all_logps_2_values": 442.4437561035156, | |
| "epoch": 0.3526582954849052, | |
| "grad_norm": 32.977138977170775, | |
| "learning_rate": 8.175578849210894e-07, | |
| "logits/chosen": 2.5748469829559326, | |
| "logits/rejected": 2.677804470062256, | |
| "logps/chosen": -9.694478988647461, | |
| "logps/rejected": -10.093037605285645, | |
| "loss": 1.0398, | |
| "original_losses": 3.134640693664551, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -24.236202239990234, | |
| "rewards/margins": 0.9963935017585754, | |
| "rewards/rejected": -25.232593536376953, | |
| "step": 165, | |
| "weight": 0.49304407835006714 | |
| }, | |
| { | |
| "abs_diff": 2.007434129714966, | |
| "all_logps_1": -3220.789794921875, | |
| "all_logps_1_values": -3220.789794921875, | |
| "all_logps_2": 357.3062438964844, | |
| "all_logps_2_values": 357.3062438964844, | |
| "epoch": 0.36334491049959927, | |
| "grad_norm": 44.745926058943496, | |
| "learning_rate": 8.028950219204099e-07, | |
| "logits/chosen": 2.934321641921997, | |
| "logits/rejected": 2.8931219577789307, | |
| "logps/chosen": -11.122208595275879, | |
| "logps/rejected": -11.998506546020508, | |
| "loss": 0.9596, | |
| "original_losses": 2.2297332286834717, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -27.80552101135254, | |
| "rewards/margins": 2.1907458305358887, | |
| "rewards/rejected": -29.996265411376953, | |
| "step": 170, | |
| "weight": 0.49520620703697205 | |
| }, | |
| { | |
| "abs_diff": 2.5391037464141846, | |
| "all_logps_1": -3010.77099609375, | |
| "all_logps_1_values": -3010.77099609375, | |
| "all_logps_2": 336.26251220703125, | |
| "all_logps_2_values": 336.26251220703125, | |
| "epoch": 0.37403152551429336, | |
| "grad_norm": 50.44282847929724, | |
| "learning_rate": 7.878085328428368e-07, | |
| "logits/chosen": 2.6517717838287354, | |
| "logits/rejected": 2.698502779006958, | |
| "logps/chosen": -11.271635055541992, | |
| "logps/rejected": -12.422686576843262, | |
| "loss": 0.953, | |
| "original_losses": 2.4483256340026855, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -28.1790828704834, | |
| "rewards/margins": 2.8776297569274902, | |
| "rewards/rejected": -31.056713104248047, | |
| "step": 175, | |
| "weight": 0.45478373765945435 | |
| }, | |
| { | |
| "abs_diff": 2.311084270477295, | |
| "all_logps_1": -3630.26123046875, | |
| "all_logps_1_values": -3630.26123046875, | |
| "all_logps_2": 367.6937561035156, | |
| "all_logps_2_values": 367.6937561035156, | |
| "epoch": 0.38471814052898745, | |
| "grad_norm": 54.556403188950036, | |
| "learning_rate": 7.723195175075135e-07, | |
| "logits/chosen": 2.640475273132324, | |
| "logits/rejected": 2.6134068965911865, | |
| "logps/chosen": -12.537522315979004, | |
| "logps/rejected": -13.568713188171387, | |
| "loss": 0.9044, | |
| "original_losses": 2.333768844604492, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -31.343807220458984, | |
| "rewards/margins": 2.577979564666748, | |
| "rewards/rejected": -33.921791076660156, | |
| "step": 180, | |
| "weight": 0.46685990691185 | |
| }, | |
| { | |
| "abs_diff": 2.934654951095581, | |
| "all_logps_1": -5179.39404296875, | |
| "all_logps_1_values": -5179.39404296875, | |
| "all_logps_2": 370.9624938964844, | |
| "all_logps_2_values": 370.9624938964844, | |
| "epoch": 0.39540475554368154, | |
| "grad_norm": 57.260252425269734, | |
| "learning_rate": 7.564496387029531e-07, | |
| "logits/chosen": 2.326862096786499, | |
| "logits/rejected": 2.4421494007110596, | |
| "logps/chosen": -15.849513053894043, | |
| "logps/rejected": -17.323734283447266, | |
| "loss": 0.9407, | |
| "original_losses": 2.5988547801971436, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -39.623779296875, | |
| "rewards/margins": 3.6855552196502686, | |
| "rewards/rejected": -43.30933380126953, | |
| "step": 185, | |
| "weight": 0.40877920389175415 | |
| }, | |
| { | |
| "abs_diff": 2.9652016162872314, | |
| "all_logps_1": -5177.00244140625, | |
| "all_logps_1_values": -5177.00244140625, | |
| "all_logps_2": 374.4312438964844, | |
| "all_logps_2_values": 374.4312438964844, | |
| "epoch": 0.40609137055837563, | |
| "grad_norm": 83.2255328888069, | |
| "learning_rate": 7.402210918896689e-07, | |
| "logits/chosen": 2.44303297996521, | |
| "logits/rejected": 2.4873244762420654, | |
| "logps/chosen": -15.580667495727539, | |
| "logps/rejected": -17.045442581176758, | |
| "loss": 0.9238, | |
| "original_losses": 2.6292238235473633, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -38.95166778564453, | |
| "rewards/margins": 3.661935329437256, | |
| "rewards/rejected": -42.61360168457031, | |
| "step": 190, | |
| "weight": 0.40674179792404175 | |
| }, | |
| { | |
| "abs_diff": 2.7273154258728027, | |
| "all_logps_1": -4500.06005859375, | |
| "all_logps_1_values": -4500.06005859375, | |
| "all_logps_2": 380.1312561035156, | |
| "all_logps_2_values": 380.1312561035156, | |
| "epoch": 0.4167779855730697, | |
| "grad_norm": 84.18074257984793, | |
| "learning_rate": 7.236565741578162e-07, | |
| "logits/chosen": 2.6910769939422607, | |
| "logits/rejected": 2.7326107025146484, | |
| "logps/chosen": -13.98046875, | |
| "logps/rejected": -15.500396728515625, | |
| "loss": 0.9048, | |
| "original_losses": 2.1395676136016846, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -34.95117950439453, | |
| "rewards/margins": 3.7998204231262207, | |
| "rewards/rejected": -38.75099182128906, | |
| "step": 195, | |
| "weight": 0.3992369771003723 | |
| }, | |
| { | |
| "abs_diff": 2.3771374225616455, | |
| "all_logps_1": -4996.7001953125, | |
| "all_logps_1_values": -4996.7001953125, | |
| "all_logps_2": 438.8500061035156, | |
| "all_logps_2_values": 438.8500061035156, | |
| "epoch": 0.4274646005877638, | |
| "grad_norm": 51.852682835194706, | |
| "learning_rate": 7.067792524832603e-07, | |
| "logits/chosen": 2.5128085613250732, | |
| "logits/rejected": 2.454047679901123, | |
| "logps/chosen": -13.007303237915039, | |
| "logps/rejected": -13.782841682434082, | |
| "loss": 0.9745, | |
| "original_losses": 2.816681385040283, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -32.51825714111328, | |
| "rewards/margins": 1.9388458728790283, | |
| "rewards/rejected": -34.45710372924805, | |
| "step": 200, | |
| "weight": 0.4336828589439392 | |
| }, | |
| { | |
| "abs_diff": 2.789199113845825, | |
| "all_logps_1": -5606.87744140625, | |
| "all_logps_1_values": -5606.87744140625, | |
| "all_logps_2": 413.7875061035156, | |
| "all_logps_2_values": 413.7875061035156, | |
| "epoch": 0.4381512156024579, | |
| "grad_norm": 82.65919240097834, | |
| "learning_rate": 6.896127313264642e-07, | |
| "logits/chosen": 2.4827866554260254, | |
| "logits/rejected": 2.610020399093628, | |
| "logps/chosen": -15.495327949523926, | |
| "logps/rejected": -16.71689224243164, | |
| "loss": 0.8079, | |
| "original_losses": 2.6531503200531006, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -38.738319396972656, | |
| "rewards/margins": 3.053907871246338, | |
| "rewards/rejected": -41.7922248840332, | |
| "step": 205, | |
| "weight": 0.40062981843948364 | |
| }, | |
| { | |
| "abs_diff": 3.1174449920654297, | |
| "all_logps_1": -6078.1650390625, | |
| "all_logps_1_values": -6078.1650390625, | |
| "all_logps_2": 408.83123779296875, | |
| "all_logps_2_values": 408.83123779296875, | |
| "epoch": 0.448837830617152, | |
| "grad_norm": 66.91462129577006, | |
| "learning_rate": 6.721810196195174e-07, | |
| "logits/chosen": 2.3251194953918457, | |
| "logits/rejected": 2.481720209121704, | |
| "logps/chosen": -15.918850898742676, | |
| "logps/rejected": -17.23949432373047, | |
| "loss": 0.8447, | |
| "original_losses": 2.9006853103637695, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -39.79712677001953, | |
| "rewards/margins": 3.3016059398651123, | |
| "rewards/rejected": -43.09873580932617, | |
| "step": 210, | |
| "weight": 0.37287402153015137 | |
| }, | |
| { | |
| "abs_diff": 3.3388848304748535, | |
| "all_logps_1": -6523.8935546875, | |
| "all_logps_1_values": -6523.8935546875, | |
| "all_logps_2": 405.98748779296875, | |
| "all_logps_2_values": 405.98748779296875, | |
| "epoch": 0.45952444563184613, | |
| "grad_norm": 95.8421548369589, | |
| "learning_rate": 6.545084971874736e-07, | |
| "logits/chosen": 2.866258382797241, | |
| "logits/rejected": 2.9341139793395996, | |
| "logps/chosen": -16.77628517150879, | |
| "logps/rejected": -18.90264320373535, | |
| "loss": 0.8426, | |
| "original_losses": 2.032466411590576, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -41.940711975097656, | |
| "rewards/margins": 5.31589412689209, | |
| "rewards/rejected": -47.25660705566406, | |
| "step": 215, | |
| "weight": 0.35115545988082886 | |
| }, | |
| { | |
| "abs_diff": 2.8094236850738525, | |
| "all_logps_1": -4738.73046875, | |
| "all_logps_1_values": -4738.73046875, | |
| "all_logps_2": 363.98126220703125, | |
| "all_logps_2_values": 363.98126220703125, | |
| "epoch": 0.4702110606465402, | |
| "grad_norm": 112.65545034373879, | |
| "learning_rate": 6.3661988065096e-07, | |
| "logits/chosen": 2.7162396907806396, | |
| "logits/rejected": 2.835710048675537, | |
| "logps/chosen": -15.200531005859375, | |
| "logps/rejected": -15.732034683227539, | |
| "loss": 0.7804, | |
| "original_losses": 3.5092949867248535, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -38.0013313293457, | |
| "rewards/margins": 1.32875394821167, | |
| "rewards/rejected": -39.33008575439453, | |
| "step": 220, | |
| "weight": 0.4126754403114319 | |
| }, | |
| { | |
| "abs_diff": 3.209429979324341, | |
| "all_logps_1": -5642.91943359375, | |
| "all_logps_1_values": -5642.92041015625, | |
| "all_logps_2": 383.92498779296875, | |
| "all_logps_2_values": 383.92498779296875, | |
| "epoch": 0.4808976756612343, | |
| "grad_norm": 37.46832492030243, | |
| "learning_rate": 6.185401888577487e-07, | |
| "logits/chosen": 2.5830130577087402, | |
| "logits/rejected": 2.689384937286377, | |
| "logps/chosen": -15.603918075561523, | |
| "logps/rejected": -16.610340118408203, | |
| "loss": 0.9122, | |
| "original_losses": 3.4112372398376465, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -39.00979995727539, | |
| "rewards/margins": 2.5160529613494873, | |
| "rewards/rejected": -41.525856018066406, | |
| "step": 225, | |
| "weight": 0.3604838252067566 | |
| }, | |
| { | |
| "abs_diff": 3.7125911712646484, | |
| "all_logps_1": -5569.94384765625, | |
| "all_logps_1_values": -5569.94384765625, | |
| "all_logps_2": 361.3125, | |
| "all_logps_2_values": 361.3125, | |
| "epoch": 0.4915842906759284, | |
| "grad_norm": 66.55702343700871, | |
| "learning_rate": 6.002947078916364e-07, | |
| "logits/chosen": 2.3465304374694824, | |
| "logits/rejected": 2.66461181640625, | |
| "logps/chosen": -17.106571197509766, | |
| "logps/rejected": -19.080835342407227, | |
| "loss": 0.8076, | |
| "original_losses": 2.705897808074951, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -42.76642608642578, | |
| "rewards/margins": 4.935657501220703, | |
| "rewards/rejected": -47.70208740234375, | |
| "step": 230, | |
| "weight": 0.3060615658760071 | |
| }, | |
| { | |
| "abs_diff": 2.281184434890747, | |
| "all_logps_1": -3926.673828125, | |
| "all_logps_1_values": -3926.67333984375, | |
| "all_logps_2": 311.42498779296875, | |
| "all_logps_2_values": 311.42498779296875, | |
| "epoch": 0.5022709056906225, | |
| "grad_norm": 52.12193473626352, | |
| "learning_rate": 5.819089557075688e-07, | |
| "logits/chosen": 2.5927655696868896, | |
| "logits/rejected": 2.721041679382324, | |
| "logps/chosen": -14.924860000610352, | |
| "logps/rejected": -15.577176094055176, | |
| "loss": 0.8527, | |
| "original_losses": 2.7761876583099365, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -37.3121452331543, | |
| "rewards/margins": 1.6307960748672485, | |
| "rewards/rejected": -38.94294357299805, | |
| "step": 235, | |
| "weight": 0.42170318961143494 | |
| }, | |
| { | |
| "abs_diff": 2.7382442951202393, | |
| "all_logps_1": -5511.8671875, | |
| "all_logps_1_values": -5511.8671875, | |
| "all_logps_2": 424.04376220703125, | |
| "all_logps_2_values": 424.04376220703125, | |
| "epoch": 0.5129575207053166, | |
| "grad_norm": 59.31175783914156, | |
| "learning_rate": 5.634086464424742e-07, | |
| "logits/chosen": 2.750415086746216, | |
| "logits/rejected": 2.8377902507781982, | |
| "logps/chosen": -15.228363037109375, | |
| "logps/rejected": -16.618165969848633, | |
| "loss": 0.8222, | |
| "original_losses": 2.4813647270202637, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -38.07091522216797, | |
| "rewards/margins": 3.474503993988037, | |
| "rewards/rejected": -41.545413970947266, | |
| "step": 240, | |
| "weight": 0.43811964988708496 | |
| }, | |
| { | |
| "abs_diff": 3.254149913787842, | |
| "all_logps_1": -5742.85595703125, | |
| "all_logps_1_values": -5742.85546875, | |
| "all_logps_2": 412.4624938964844, | |
| "all_logps_2_values": 412.4624938964844, | |
| "epoch": 0.5236441357200107, | |
| "grad_norm": 54.9226284927014, | |
| "learning_rate": 5.448196544517167e-07, | |
| "logits/chosen": 2.565314531326294, | |
| "logits/rejected": 2.691755533218384, | |
| "logps/chosen": -15.381324768066406, | |
| "logps/rejected": -17.23483657836914, | |
| "loss": 0.7997, | |
| "original_losses": 2.286261558532715, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -38.45330810546875, | |
| "rewards/margins": 4.633780479431152, | |
| "rewards/rejected": -43.08708953857422, | |
| "step": 245, | |
| "weight": 0.3790872097015381 | |
| }, | |
| { | |
| "abs_diff": 3.364607334136963, | |
| "all_logps_1": -5477.4482421875, | |
| "all_logps_1_values": -5477.4482421875, | |
| "all_logps_2": 341.70623779296875, | |
| "all_logps_2_values": 341.70623779296875, | |
| "epoch": 0.5343307507347048, | |
| "grad_norm": 75.70050581279018, | |
| "learning_rate": 5.26167978121472e-07, | |
| "logits/chosen": 2.6822657585144043, | |
| "logits/rejected": 2.7521121501922607, | |
| "logps/chosen": -16.76608657836914, | |
| "logps/rejected": -19.173168182373047, | |
| "loss": 0.8369, | |
| "original_losses": 1.7328109741210938, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -41.91521453857422, | |
| "rewards/margins": 6.017703056335449, | |
| "rewards/rejected": -47.932918548583984, | |
| "step": 250, | |
| "weight": 0.3458004593849182 | |
| }, | |
| { | |
| "abs_diff": 3.0323586463928223, | |
| "all_logps_1": -6443.626953125, | |
| "all_logps_1_values": -6443.62646484375, | |
| "all_logps_2": 363.6000061035156, | |
| "all_logps_2_values": 363.6000061035156, | |
| "epoch": 0.5450173657493989, | |
| "grad_norm": 39.687795704366174, | |
| "learning_rate": 5.074797035076318e-07, | |
| "logits/chosen": 2.954530954360962, | |
| "logits/rejected": 2.9405295848846436, | |
| "logps/chosen": -18.485279083251953, | |
| "logps/rejected": -19.909687042236328, | |
| "loss": 0.7436, | |
| "original_losses": 2.6259872913360596, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -46.21319580078125, | |
| "rewards/margins": 3.561020612716675, | |
| "rewards/rejected": -49.77421569824219, | |
| "step": 255, | |
| "weight": 0.37864193320274353 | |
| }, | |
| { | |
| "abs_diff": 3.276740312576294, | |
| "all_logps_1": -8267.275390625, | |
| "all_logps_1_values": -8267.275390625, | |
| "all_logps_2": 393.79376220703125, | |
| "all_logps_2_values": 393.79376220703125, | |
| "epoch": 0.555703980764093, | |
| "grad_norm": 82.01295751328553, | |
| "learning_rate": 4.887809678520975e-07, | |
| "logits/chosen": 3.072216749191284, | |
| "logits/rejected": 3.1636574268341064, | |
| "logps/chosen": -20.29796600341797, | |
| "logps/rejected": -22.58323860168457, | |
| "loss": 0.767, | |
| "original_losses": 1.7455909252166748, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -50.74491882324219, | |
| "rewards/margins": 5.713181972503662, | |
| "rewards/rejected": -56.458106994628906, | |
| "step": 260, | |
| "weight": 0.35711461305618286 | |
| }, | |
| { | |
| "abs_diff": 3.182936429977417, | |
| "all_logps_1": -9216.587890625, | |
| "all_logps_1_values": -9216.5869140625, | |
| "all_logps_2": 407.7562561035156, | |
| "all_logps_2_values": 407.7562561035156, | |
| "epoch": 0.566390595778787, | |
| "grad_norm": 52.240919124363245, | |
| "learning_rate": 4.700979230274829e-07, | |
| "logits/chosen": 3.0337119102478027, | |
| "logits/rejected": 3.0206868648529053, | |
| "logps/chosen": -23.038707733154297, | |
| "logps/rejected": -23.99751091003418, | |
| "loss": 0.807, | |
| "original_losses": 3.4282360076904297, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -57.596778869628906, | |
| "rewards/margins": 2.3969998359680176, | |
| "rewards/rejected": -59.9937744140625, | |
| "step": 265, | |
| "weight": 0.34545254707336426 | |
| }, | |
| { | |
| "abs_diff": 3.006873607635498, | |
| "all_logps_1": -10153.31640625, | |
| "all_logps_1_values": -10153.3154296875, | |
| "all_logps_2": 477.38751220703125, | |
| "all_logps_2_values": 477.38751220703125, | |
| "epoch": 0.5770772107934812, | |
| "grad_norm": 59.52695646189972, | |
| "learning_rate": 4.514566989613559e-07, | |
| "logits/chosen": 2.972503185272217, | |
| "logits/rejected": 2.9690961837768555, | |
| "logps/chosen": -22.136503219604492, | |
| "logps/rejected": -23.38858413696289, | |
| "loss": 0.8091, | |
| "original_losses": 2.8519082069396973, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -55.34125900268555, | |
| "rewards/margins": 3.1301934719085693, | |
| "rewards/rejected": -58.47145462036133, | |
| "step": 270, | |
| "weight": 0.36154988408088684 | |
| }, | |
| { | |
| "abs_diff": 2.563995599746704, | |
| "all_logps_1": -7391.75, | |
| "all_logps_1_values": -7391.75, | |
| "all_logps_2": 375.40625, | |
| "all_logps_2_values": 375.40625, | |
| "epoch": 0.5877638258081752, | |
| "grad_norm": 59.32000543668621, | |
| "learning_rate": 4.328833670911724e-07, | |
| "logits/chosen": 3.481792449951172, | |
| "logits/rejected": 3.5533995628356934, | |
| "logps/chosen": -21.077594757080078, | |
| "logps/rejected": -22.37049674987793, | |
| "loss": 0.7438, | |
| "original_losses": 2.241507053375244, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -52.6939811706543, | |
| "rewards/margins": 3.2322616577148438, | |
| "rewards/rejected": -55.926246643066406, | |
| "step": 275, | |
| "weight": 0.41661015152931213 | |
| }, | |
| { | |
| "abs_diff": 3.315411329269409, | |
| "all_logps_1": -7719.34619140625, | |
| "all_logps_1_values": -7719.34521484375, | |
| "all_logps_2": 439.35626220703125, | |
| "all_logps_2_values": 439.35626220703125, | |
| "epoch": 0.5984504408228694, | |
| "grad_norm": 53.61685628912313, | |
| "learning_rate": 4.144039039010124e-07, | |
| "logits/chosen": 2.6844732761383057, | |
| "logits/rejected": 2.87386417388916, | |
| "logps/chosen": -17.859844207763672, | |
| "logps/rejected": -19.173076629638672, | |
| "loss": 0.7914, | |
| "original_losses": 3.27254056930542, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": -44.64960861206055, | |
| "rewards/margins": 3.283079147338867, | |
| "rewards/rejected": -47.93268966674805, | |
| "step": 280, | |
| "weight": 0.38690507411956787 | |
| }, | |
| { | |
| "abs_diff": 2.917543649673462, | |
| "all_logps_1": -6426.8310546875, | |
| "all_logps_1_values": -6426.8310546875, | |
| "all_logps_2": 355.16876220703125, | |
| "all_logps_2_values": 355.16876220703125, | |
| "epoch": 0.6091370558375635, | |
| "grad_norm": 55.70128923603701, | |
| "learning_rate": 3.960441545911204e-07, | |
| "logits/chosen": 3.0214133262634277, | |
| "logits/rejected": 3.1276047229766846, | |
| "logps/chosen": -20.0152530670166, | |
| "logps/rejected": -20.51242446899414, | |
| "loss": 0.8001, | |
| "original_losses": 3.778569459915161, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -50.03813552856445, | |
| "rewards/margins": 1.2429269552230835, | |
| "rewards/rejected": -51.28105926513672, | |
| "step": 285, | |
| "weight": 0.4056159555912018 | |
| }, | |
| { | |
| "abs_diff": 3.5806915760040283, | |
| "all_logps_1": -6845.4326171875, | |
| "all_logps_1_values": -6845.4326171875, | |
| "all_logps_2": 341.95001220703125, | |
| "all_logps_2_values": 341.95001220703125, | |
| "epoch": 0.6198236708522575, | |
| "grad_norm": 67.43658438729601, | |
| "learning_rate": 3.778297969310529e-07, | |
| "logits/chosen": 2.87160325050354, | |
| "logits/rejected": 2.953885555267334, | |
| "logps/chosen": -19.99938201904297, | |
| "logps/rejected": -22.214576721191406, | |
| "loss": 0.8043, | |
| "original_losses": 2.356289863586426, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -49.99845504760742, | |
| "rewards/margins": 5.537986755371094, | |
| "rewards/rejected": -55.53644561767578, | |
| "step": 290, | |
| "weight": 0.3736080527305603 | |
| }, | |
| { | |
| "abs_diff": 3.2340712547302246, | |
| "all_logps_1": -7549.24755859375, | |
| "all_logps_1_values": -7549.24755859375, | |
| "all_logps_2": 351.07501220703125, | |
| "all_logps_2_values": 351.07501220703125, | |
| "epoch": 0.6305102858669517, | |
| "grad_norm": 48.7199759637811, | |
| "learning_rate": 3.5978630534699865e-07, | |
| "logits/chosen": 2.5181379318237305, | |
| "logits/rejected": 2.6238226890563965, | |
| "logps/chosen": -21.65777587890625, | |
| "logps/rejected": -23.368385314941406, | |
| "loss": 0.8187, | |
| "original_losses": 2.4767355918884277, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -54.144432067871094, | |
| "rewards/margins": 4.276528835296631, | |
| "rewards/rejected": -58.42096710205078, | |
| "step": 295, | |
| "weight": 0.35286107659339905 | |
| }, | |
| { | |
| "abs_diff": 3.32385516166687, | |
| "all_logps_1": -8850.7470703125, | |
| "all_logps_1_values": -8850.748046875, | |
| "all_logps_2": 415.8999938964844, | |
| "all_logps_2_values": 415.8999938964844, | |
| "epoch": 0.6411969008816457, | |
| "grad_norm": 40.177069639353974, | |
| "learning_rate": 3.4193891529348795e-07, | |
| "logits/chosen": 2.7022032737731934, | |
| "logits/rejected": 2.7918949127197266, | |
| "logps/chosen": -22.0867862701416, | |
| "logps/rejected": -24.649303436279297, | |
| "loss": 0.7237, | |
| "original_losses": 1.4938082695007324, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -55.21696090698242, | |
| "rewards/margins": 6.4062957763671875, | |
| "rewards/rejected": -61.623252868652344, | |
| "step": 300, | |
| "weight": 0.38917768001556396 | |
| }, | |
| { | |
| "abs_diff": 2.9255619049072266, | |
| "all_logps_1": -7401.28662109375, | |
| "all_logps_1_values": -7401.2861328125, | |
| "all_logps_2": 407.0562438964844, | |
| "all_logps_2_values": 407.0562438964844, | |
| "epoch": 0.6518835158963399, | |
| "grad_norm": 48.44087105424344, | |
| "learning_rate": 3.243125879593286e-07, | |
| "logits/chosen": 2.6635046005249023, | |
| "logits/rejected": 2.777791976928711, | |
| "logps/chosen": -18.8937931060791, | |
| "logps/rejected": -20.48404312133789, | |
| "loss": 0.8144, | |
| "original_losses": 2.4159512519836426, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -47.23448181152344, | |
| "rewards/margins": 3.975621461868286, | |
| "rewards/rejected": -51.21010208129883, | |
| "step": 305, | |
| "weight": 0.4257276952266693 | |
| }, | |
| { | |
| "abs_diff": 3.013671875, | |
| "all_logps_1": -7221.7607421875, | |
| "all_logps_1_values": -7221.76171875, | |
| "all_logps_2": 377.16876220703125, | |
| "all_logps_2_values": 377.16876220703125, | |
| "epoch": 0.6625701309110339, | |
| "grad_norm": 59.965292421288716, | |
| "learning_rate": 3.069319753571269e-07, | |
| "logits/chosen": 2.7733490467071533, | |
| "logits/rejected": 2.600106954574585, | |
| "logps/chosen": -19.796558380126953, | |
| "logps/rejected": -20.72552490234375, | |
| "loss": 0.8117, | |
| "original_losses": 3.373765230178833, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": -49.491390228271484, | |
| "rewards/margins": 2.3224196434020996, | |
| "rewards/rejected": -51.813812255859375, | |
| "step": 310, | |
| "weight": 0.3705739974975586 | |
| }, | |
| { | |
| "abs_diff": 3.0523111820220947, | |
| "all_logps_1": -7815.6552734375, | |
| "all_logps_1_values": -7815.65478515625, | |
| "all_logps_2": 449.16876220703125, | |
| "all_logps_2_values": 449.16876220703125, | |
| "epoch": 0.673256745925728, | |
| "grad_norm": 52.38266043751792, | |
| "learning_rate": 2.898213858452173e-07, | |
| "logits/chosen": 2.1578516960144043, | |
| "logits/rejected": 2.247980833053589, | |
| "logps/chosen": -17.26466941833496, | |
| "logps/rejected": -18.508235931396484, | |
| "loss": 0.7937, | |
| "original_losses": 2.871872901916504, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -43.16167449951172, | |
| "rewards/margins": 3.1089208126068115, | |
| "rewards/rejected": -46.270591735839844, | |
| "step": 315, | |
| "weight": 0.3665739893913269 | |
| }, | |
| { | |
| "abs_diff": 3.318554639816284, | |
| "all_logps_1": -6473.89013671875, | |
| "all_logps_1_values": -6473.890625, | |
| "all_logps_2": 359.54376220703125, | |
| "all_logps_2_values": 359.54376220703125, | |
| "epoch": 0.6839433609404221, | |
| "grad_norm": 87.91813204561389, | |
| "learning_rate": 2.730047501302266e-07, | |
| "logits/chosen": 2.3339014053344727, | |
| "logits/rejected": 2.4213125705718994, | |
| "logps/chosen": -17.509052276611328, | |
| "logps/rejected": -19.367351531982422, | |
| "loss": 0.7705, | |
| "original_losses": 2.49141263961792, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -43.77263259887695, | |
| "rewards/margins": 4.645747184753418, | |
| "rewards/rejected": -48.41838455200195, | |
| "step": 320, | |
| "weight": 0.37379634380340576 | |
| }, | |
| { | |
| "abs_diff": 4.029627799987793, | |
| "all_logps_1": -8260.576171875, | |
| "all_logps_1_values": -8260.576171875, | |
| "all_logps_2": 420.7562561035156, | |
| "all_logps_2_values": 420.7562561035156, | |
| "epoch": 0.6946299759551162, | |
| "grad_norm": 56.877689091030156, | |
| "learning_rate": 2.5650558779781635e-07, | |
| "logits/chosen": 2.8901479244232178, | |
| "logits/rejected": 2.8577167987823486, | |
| "logps/chosen": -18.898571014404297, | |
| "logps/rejected": -21.253376007080078, | |
| "loss": 0.7293, | |
| "original_losses": 2.648833751678467, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -47.24642562866211, | |
| "rewards/margins": 5.887020111083984, | |
| "rewards/rejected": -53.133445739746094, | |
| "step": 325, | |
| "weight": 0.299586683511734 | |
| }, | |
| { | |
| "abs_diff": 3.5890209674835205, | |
| "all_logps_1": -8075.91650390625, | |
| "all_logps_1_values": -8075.91552734375, | |
| "all_logps_2": 370.53125, | |
| "all_logps_2_values": 370.53125, | |
| "epoch": 0.7053165909698104, | |
| "grad_norm": 51.745088875170836, | |
| "learning_rate": 2.403469744184154e-07, | |
| "logits/chosen": 2.560868978500366, | |
| "logits/rejected": 2.73579740524292, | |
| "logps/chosen": -20.837478637695312, | |
| "logps/rejected": -23.191274642944336, | |
| "loss": 0.8048, | |
| "original_losses": 2.0950331687927246, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -52.09369659423828, | |
| "rewards/margins": 5.884491443634033, | |
| "rewards/rejected": -57.978187561035156, | |
| "step": 330, | |
| "weight": 0.35120078921318054 | |
| }, | |
| { | |
| "abs_diff": 3.3162055015563965, | |
| "all_logps_1": -8510.986328125, | |
| "all_logps_1_values": -8510.9873046875, | |
| "all_logps_2": 404.1875, | |
| "all_logps_2_values": 404.1875, | |
| "epoch": 0.7160032059845044, | |
| "grad_norm": 69.38405615517823, | |
| "learning_rate": 2.2455150927394878e-07, | |
| "logits/chosen": 2.6478374004364014, | |
| "logits/rejected": 2.565058946609497, | |
| "logps/chosen": -20.254060745239258, | |
| "logps/rejected": -22.03819465637207, | |
| "loss": 0.7845, | |
| "original_losses": 2.5941619873046875, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -50.63515090942383, | |
| "rewards/margins": 4.460334300994873, | |
| "rewards/rejected": -55.095489501953125, | |
| "step": 335, | |
| "weight": 0.3666679263114929 | |
| }, | |
| { | |
| "abs_diff": 3.677370548248291, | |
| "all_logps_1": -8691.5263671875, | |
| "all_logps_1_values": -8691.5263671875, | |
| "all_logps_2": 381.01873779296875, | |
| "all_logps_2_values": 381.01873779296875, | |
| "epoch": 0.7266898209991985, | |
| "grad_norm": 84.25483121877998, | |
| "learning_rate": 2.0914128375069722e-07, | |
| "logits/chosen": 2.709319829940796, | |
| "logits/rejected": 2.7781405448913574, | |
| "logps/chosen": -21.70474624633789, | |
| "logps/rejected": -23.93856430053711, | |
| "loss": 0.7836, | |
| "original_losses": 2.420710802078247, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -54.261871337890625, | |
| "rewards/margins": 5.5845465660095215, | |
| "rewards/rejected": -59.84641647338867, | |
| "step": 340, | |
| "weight": 0.35216349363327026 | |
| }, | |
| { | |
| "abs_diff": 2.6053452491760254, | |
| "all_logps_1": -8825.68359375, | |
| "all_logps_1_values": -8825.68359375, | |
| "all_logps_2": 365.8812561035156, | |
| "all_logps_2_values": 365.8812561035156, | |
| "epoch": 0.7373764360138926, | |
| "grad_norm": 70.75060453919657, | |
| "learning_rate": 1.9413785044249676e-07, | |
| "logits/chosen": 2.845489501953125, | |
| "logits/rejected": 2.95839262008667, | |
| "logps/chosen": -24.01942253112793, | |
| "logps/rejected": -25.074626922607422, | |
| "loss": 0.7906, | |
| "original_losses": 2.7006657123565674, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -60.048553466796875, | |
| "rewards/margins": 2.63801908493042, | |
| "rewards/rejected": -62.68656539916992, | |
| "step": 345, | |
| "weight": 0.4165709912776947 | |
| }, | |
| { | |
| "abs_diff": 3.481846570968628, | |
| "all_logps_1": -9110.2353515625, | |
| "all_logps_1_values": -9110.2353515625, | |
| "all_logps_2": 392.26251220703125, | |
| "all_logps_2_values": 392.26251220703125, | |
| "epoch": 0.7480630510285867, | |
| "grad_norm": 50.64757547547787, | |
| "learning_rate": 1.7956219300748792e-07, | |
| "logits/chosen": 2.579031467437744, | |
| "logits/rejected": 2.5901365280151367, | |
| "logps/chosen": -21.98320198059082, | |
| "logps/rejected": -24.78140640258789, | |
| "loss": 0.7388, | |
| "original_losses": 1.231533408164978, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -54.9580078125, | |
| "rewards/margins": 6.995513916015625, | |
| "rewards/rejected": -61.953514099121094, | |
| "step": 350, | |
| "weight": 0.3367912769317627 | |
| }, | |
| { | |
| "abs_diff": 3.284003496170044, | |
| "all_logps_1": -9058.169921875, | |
| "all_logps_1_values": -9058.169921875, | |
| "all_logps_2": 396.1812438964844, | |
| "all_logps_2_values": 396.1812438964844, | |
| "epoch": 0.7587496660432808, | |
| "grad_norm": 74.67147548055407, | |
| "learning_rate": 1.6543469682057104e-07, | |
| "logits/chosen": 2.3805794715881348, | |
| "logits/rejected": 2.5762991905212402, | |
| "logps/chosen": -21.627700805664062, | |
| "logps/rejected": -23.67769432067871, | |
| "loss": 0.7775, | |
| "original_losses": 2.081150531768799, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -54.06926345825195, | |
| "rewards/margins": 5.124981880187988, | |
| "rewards/rejected": -59.194244384765625, | |
| "step": 355, | |
| "weight": 0.333683043718338 | |
| }, | |
| { | |
| "abs_diff": 3.9802608489990234, | |
| "all_logps_1": -8140.62646484375, | |
| "all_logps_1_values": -8140.625, | |
| "all_logps_2": 368.1812438964844, | |
| "all_logps_2_values": 368.1812438964844, | |
| "epoch": 0.7694362810579749, | |
| "grad_norm": 58.567962370545146, | |
| "learning_rate": 1.5177512046261666e-07, | |
| "logits/chosen": 2.5346484184265137, | |
| "logits/rejected": 2.3816428184509277, | |
| "logps/chosen": -22.101619720458984, | |
| "logps/rejected": -24.49993896484375, | |
| "loss": 0.6993, | |
| "original_losses": 2.540489435195923, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -55.254051208496094, | |
| "rewards/margins": 5.995795249938965, | |
| "rewards/rejected": -61.249847412109375, | |
| "step": 360, | |
| "weight": 0.34169501066207886 | |
| }, | |
| { | |
| "abs_diff": 3.0081470012664795, | |
| "all_logps_1": -7452.68115234375, | |
| "all_logps_1_values": -7452.68115234375, | |
| "all_logps_2": 344.38751220703125, | |
| "all_logps_2_values": 344.38751220703125, | |
| "epoch": 0.7801228960726689, | |
| "grad_norm": 83.23124267198439, | |
| "learning_rate": 1.3860256808630427e-07, | |
| "logits/chosen": 2.4369776248931885, | |
| "logits/rejected": 2.584667682647705, | |
| "logps/chosen": -21.06991195678711, | |
| "logps/rejected": -22.77521324157715, | |
| "loss": 0.7695, | |
| "original_losses": 2.1701793670654297, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -52.674774169921875, | |
| "rewards/margins": 4.263253211975098, | |
| "rewards/rejected": -56.93803024291992, | |
| "step": 365, | |
| "weight": 0.345781534910202 | |
| }, | |
| { | |
| "abs_diff": 3.4343185424804688, | |
| "all_logps_1": -9116.8271484375, | |
| "all_logps_1_values": -9116.826171875, | |
| "all_logps_2": 410.375, | |
| "all_logps_2_values": 410.375, | |
| "epoch": 0.7908095110873631, | |
| "grad_norm": 70.00940117238335, | |
| "learning_rate": 1.2593546269723647e-07, | |
| "logits/chosen": 2.4547030925750732, | |
| "logits/rejected": 2.5984954833984375, | |
| "logps/chosen": -21.283931732177734, | |
| "logps/rejected": -23.039413452148438, | |
| "loss": 0.7116, | |
| "original_losses": 2.7036542892456055, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -53.20983123779297, | |
| "rewards/margins": 4.388695240020752, | |
| "rewards/rejected": -57.59852981567383, | |
| "step": 370, | |
| "weight": 0.3490845561027527 | |
| }, | |
| { | |
| "abs_diff": 3.5221400260925293, | |
| "all_logps_1": -8307.474609375, | |
| "all_logps_1_values": -8307.4755859375, | |
| "all_logps_2": 382.3999938964844, | |
| "all_logps_2_values": 382.3999938964844, | |
| "epoch": 0.8014961261020572, | |
| "grad_norm": 46.47990793449235, | |
| "learning_rate": 1.1379152038770029e-07, | |
| "logits/chosen": 2.5157063007354736, | |
| "logits/rejected": 2.4793992042541504, | |
| "logps/chosen": -20.3429012298584, | |
| "logps/rejected": -21.467952728271484, | |
| "loss": 0.836, | |
| "original_losses": 3.6247520446777344, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -50.85725784301758, | |
| "rewards/margins": 2.8126296997070312, | |
| "rewards/rejected": -53.669883728027344, | |
| "step": 375, | |
| "weight": 0.3106473684310913 | |
| }, | |
| { | |
| "abs_diff": 3.1884102821350098, | |
| "all_logps_1": -7604.51953125, | |
| "all_logps_1_values": -7604.5185546875, | |
| "all_logps_2": 386.5625, | |
| "all_logps_2_values": 386.5625, | |
| "epoch": 0.8121827411167513, | |
| "grad_norm": 53.33016210631404, | |
| "learning_rate": 1.0218772555910954e-07, | |
| "logits/chosen": 2.299121141433716, | |
| "logits/rejected": 2.4894156455993652, | |
| "logps/chosen": -18.67618179321289, | |
| "logps/rejected": -20.802087783813477, | |
| "loss": 0.7253, | |
| "original_losses": 1.8635917901992798, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -46.690452575683594, | |
| "rewards/margins": 5.314764499664307, | |
| "rewards/rejected": -52.005226135253906, | |
| "step": 380, | |
| "weight": 0.36687955260276794 | |
| }, | |
| { | |
| "abs_diff": 3.728355884552002, | |
| "all_logps_1": -6403.2841796875, | |
| "all_logps_1_values": -6403.2841796875, | |
| "all_logps_2": 352.4937438964844, | |
| "all_logps_2_values": 352.4937438964844, | |
| "epoch": 0.8228693561314454, | |
| "grad_norm": 51.015747481657996, | |
| "learning_rate": 9.114030716778432e-08, | |
| "logits/chosen": 2.5289080142974854, | |
| "logits/rejected": 2.568324565887451, | |
| "logps/chosen": -17.892498016357422, | |
| "logps/rejected": -20.8332462310791, | |
| "loss": 0.6978, | |
| "original_losses": 1.4315834045410156, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -44.73124694824219, | |
| "rewards/margins": 7.351869106292725, | |
| "rewards/rejected": -52.0831184387207, | |
| "step": 385, | |
| "weight": 0.3369835317134857 | |
| }, | |
| { | |
| "abs_diff": 3.4457297325134277, | |
| "all_logps_1": -7086.70947265625, | |
| "all_logps_1_values": -7086.70849609375, | |
| "all_logps_2": 400.7562561035156, | |
| "all_logps_2_values": 400.7562561035156, | |
| "epoch": 0.8335559711461394, | |
| "grad_norm": 68.15400742768429, | |
| "learning_rate": 8.066471602728803e-08, | |
| "logits/chosen": 2.300518751144409, | |
| "logits/rejected": 2.432492256164551, | |
| "logps/chosen": -17.285266876220703, | |
| "logps/rejected": -19.280744552612305, | |
| "loss": 0.6869, | |
| "original_losses": 2.3130502700805664, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -43.213172912597656, | |
| "rewards/margins": 4.9886932373046875, | |
| "rewards/rejected": -48.201866149902344, | |
| "step": 390, | |
| "weight": 0.337992399930954 | |
| }, | |
| { | |
| "abs_diff": 2.9501354694366455, | |
| "all_logps_1": -7602.40478515625, | |
| "all_logps_1_values": -7602.40380859375, | |
| "all_logps_2": 396.3125, | |
| "all_logps_2_values": 396.3125, | |
| "epoch": 0.8442425861608336, | |
| "grad_norm": 72.89829906287879, | |
| "learning_rate": 7.077560319906694e-08, | |
| "logits/chosen": 2.815917491912842, | |
| "logits/rejected": 3.0646049976348877, | |
| "logps/chosen": -17.960046768188477, | |
| "logps/rejected": -19.63981056213379, | |
| "loss": 0.7686, | |
| "original_losses": 2.1916909217834473, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -44.900108337402344, | |
| "rewards/margins": 4.1994123458862305, | |
| "rewards/rejected": -49.099525451660156, | |
| "step": 395, | |
| "weight": 0.36895015835762024 | |
| }, | |
| { | |
| "abs_diff": 3.306037187576294, | |
| "all_logps_1": -6128.6064453125, | |
| "all_logps_1_values": -6128.6064453125, | |
| "all_logps_2": 348.07501220703125, | |
| "all_logps_2_values": 348.07501220703125, | |
| "epoch": 0.8549292011755276, | |
| "grad_norm": 49.93516351014214, | |
| "learning_rate": 6.148679950161672e-08, | |
| "logits/chosen": 2.5622057914733887, | |
| "logits/rejected": 2.715359926223755, | |
| "logps/chosen": -18.067874908447266, | |
| "logps/rejected": -20.04085922241211, | |
| "loss": 0.7506, | |
| "original_losses": 2.2437596321105957, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -45.169681549072266, | |
| "rewards/margins": 4.932468891143799, | |
| "rewards/rejected": -50.10215377807617, | |
| "step": 400, | |
| "weight": 0.34681177139282227 | |
| }, | |
| { | |
| "epoch": 0.8549292011755276, | |
| "eval_abs_diff": 3.175931930541992, | |
| "eval_all_logps_1": -7614.6904296875, | |
| "eval_all_logps_1_values": -7614.69091796875, | |
| "eval_all_logps_2": 414.86090087890625, | |
| "eval_all_logps_2_values": 414.86090087890625, | |
| "eval_logits/chosen": 1.7177369594573975, | |
| "eval_logits/rejected": 1.830857753753662, | |
| "eval_logps/chosen": -18.158353805541992, | |
| "eval_logps/rejected": -20.146547317504883, | |
| "eval_loss": 0.752778172492981, | |
| "eval_original_losses": 2.049124002456665, | |
| "eval_rewards/accuracies": 0.6975806355476379, | |
| "eval_rewards/chosen": -45.3958854675293, | |
| "eval_rewards/margins": 4.970486640930176, | |
| "eval_rewards/rejected": -50.36636734008789, | |
| "eval_runtime": 70.2236, | |
| "eval_samples_per_second": 27.925, | |
| "eval_steps_per_second": 0.883, | |
| "eval_weight": 0.37132638692855835, | |
| "step": 400 | |
| }, | |
| { | |
| "abs_diff": 3.7374179363250732, | |
| "all_logps_1": -6704.875, | |
| "all_logps_1_values": -6704.875, | |
| "all_logps_2": 385.4375, | |
| "all_logps_2_values": 385.4375, | |
| "epoch": 0.8656158161902218, | |
| "grad_norm": 69.66297582257639, | |
| "learning_rate": 5.2811296166831666e-08, | |
| "logits/chosen": 2.536898612976074, | |
| "logits/rejected": 2.8442349433898926, | |
| "logps/chosen": -17.0179443359375, | |
| "logps/rejected": -19.512527465820312, | |
| "loss": 0.6907, | |
| "original_losses": 1.9623138904571533, | |
| "rewards/accuracies": 0.7875000238418579, | |
| "rewards/chosen": -42.544864654541016, | |
| "rewards/margins": 6.236458778381348, | |
| "rewards/rejected": -48.78131866455078, | |
| "step": 405, | |
| "weight": 0.320218563079834 | |
| }, | |
| { | |
| "abs_diff": 3.427241802215576, | |
| "all_logps_1": -6360.1455078125, | |
| "all_logps_1_values": -6360.1455078125, | |
| "all_logps_2": 339.8062438964844, | |
| "all_logps_2_values": 339.8062438964844, | |
| "epoch": 0.8763024312049158, | |
| "grad_norm": 61.75715741585555, | |
| "learning_rate": 4.4761226670592066e-08, | |
| "logits/chosen": 2.682762861251831, | |
| "logits/rejected": 2.7268879413604736, | |
| "logps/chosen": -18.33367347717285, | |
| "logps/rejected": -20.431079864501953, | |
| "loss": 0.7588, | |
| "original_losses": 2.1594674587249756, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -45.83418273925781, | |
| "rewards/margins": 5.243517875671387, | |
| "rewards/rejected": -51.07769775390625, | |
| "step": 410, | |
| "weight": 0.32417041063308716 | |
| }, | |
| { | |
| "abs_diff": 3.5729141235351562, | |
| "all_logps_1": -8468.05078125, | |
| "all_logps_1_values": -8468.05078125, | |
| "all_logps_2": 414.2124938964844, | |
| "all_logps_2_values": 414.2124938964844, | |
| "epoch": 0.88698904621961, | |
| "grad_norm": 40.72517812799497, | |
| "learning_rate": 3.734784976300165e-08, | |
| "logits/chosen": 2.8361315727233887, | |
| "logits/rejected": 2.8616833686828613, | |
| "logps/chosen": -19.978229522705078, | |
| "logps/rejected": -22.11844825744629, | |
| "loss": 0.7242, | |
| "original_losses": 2.3812079429626465, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -49.94557189941406, | |
| "rewards/margins": 5.3505539894104, | |
| "rewards/rejected": -55.29612350463867, | |
| "step": 415, | |
| "weight": 0.3600180447101593 | |
| }, | |
| { | |
| "abs_diff": 2.8872458934783936, | |
| "all_logps_1": -8678.85546875, | |
| "all_logps_1_values": -8678.85546875, | |
| "all_logps_2": 427.64373779296875, | |
| "all_logps_2_values": 427.64373779296875, | |
| "epoch": 0.897675661234304, | |
| "grad_norm": 40.225954100303696, | |
| "learning_rate": 3.058153372200695e-08, | |
| "logits/chosen": 2.452263355255127, | |
| "logits/rejected": 2.515206813812256, | |
| "logps/chosen": -20.152559280395508, | |
| "logps/rejected": -21.298845291137695, | |
| "loss": 0.7959, | |
| "original_losses": 2.8674798011779785, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -50.38140106201172, | |
| "rewards/margins": 2.865709066390991, | |
| "rewards/rejected": -53.247108459472656, | |
| "step": 420, | |
| "weight": 0.39061683416366577 | |
| }, | |
| { | |
| "abs_diff": 4.009498119354248, | |
| "all_logps_1": -7007.01708984375, | |
| "all_logps_1_values": -7007.01708984375, | |
| "all_logps_2": 359.6187438964844, | |
| "all_logps_2_values": 359.6187438964844, | |
| "epoch": 0.9083622762489981, | |
| "grad_norm": 54.351457754994804, | |
| "learning_rate": 2.4471741852423233e-08, | |
| "logits/chosen": 2.617743968963623, | |
| "logits/rejected": 2.7704989910125732, | |
| "logps/chosen": -19.43728256225586, | |
| "logps/rejected": -21.93575668334961, | |
| "loss": 0.7422, | |
| "original_losses": 2.389147996902466, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -48.59320831298828, | |
| "rewards/margins": 6.246188163757324, | |
| "rewards/rejected": -54.83939743041992, | |
| "step": 425, | |
| "weight": 0.31762319803237915 | |
| }, | |
| { | |
| "abs_diff": 2.755589723587036, | |
| "all_logps_1": -8104.44921875, | |
| "all_logps_1_values": -8104.44921875, | |
| "all_logps_2": 428.79376220703125, | |
| "all_logps_2_values": 428.79376220703125, | |
| "epoch": 0.9190488912636923, | |
| "grad_norm": 45.34333002111428, | |
| "learning_rate": 1.9027019250647036e-08, | |
| "logits/chosen": 2.6327333450317383, | |
| "logits/rejected": 2.7319021224975586, | |
| "logps/chosen": -18.6940975189209, | |
| "logps/rejected": -20.32192039489746, | |
| "loss": 0.6933, | |
| "original_losses": 2.010368824005127, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -46.735252380371094, | |
| "rewards/margins": 4.069557189941406, | |
| "rewards/rejected": -50.80480194091797, | |
| "step": 430, | |
| "weight": 0.38695794343948364 | |
| }, | |
| { | |
| "abs_diff": 3.834909439086914, | |
| "all_logps_1": -7406.4482421875, | |
| "all_logps_1_values": -7406.44775390625, | |
| "all_logps_2": 382.15625, | |
| "all_logps_2_values": 382.15625, | |
| "epoch": 0.9297355062783863, | |
| "grad_norm": 103.89987589364694, | |
| "learning_rate": 1.4254980853566246e-08, | |
| "logits/chosen": 2.688000440597534, | |
| "logits/rejected": 2.763110399246216, | |
| "logps/chosen": -19.011985778808594, | |
| "logps/rejected": -21.563823699951172, | |
| "loss": 0.7401, | |
| "original_losses": 2.1428942680358887, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -47.52996063232422, | |
| "rewards/margins": 6.379598617553711, | |
| "rewards/rejected": -53.90956497192383, | |
| "step": 435, | |
| "weight": 0.35418570041656494 | |
| }, | |
| { | |
| "abs_diff": 3.49601411819458, | |
| "all_logps_1": -7640.515625, | |
| "all_logps_1_values": -7640.515625, | |
| "all_logps_2": 394.25, | |
| "all_logps_2_values": 394.25, | |
| "epoch": 0.9404221212930804, | |
| "grad_norm": 66.9604311531267, | |
| "learning_rate": 1.016230078838226e-08, | |
| "logits/chosen": 2.6405506134033203, | |
| "logits/rejected": 2.7150299549102783, | |
| "logps/chosen": -18.938282012939453, | |
| "logps/rejected": -21.01675796508789, | |
| "loss": 0.7279, | |
| "original_losses": 2.3662502765655518, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -47.345703125, | |
| "rewards/margins": 5.196188449859619, | |
| "rewards/rejected": -52.541893005371094, | |
| "step": 440, | |
| "weight": 0.35034170746803284 | |
| }, | |
| { | |
| "abs_diff": 3.1276192665100098, | |
| "all_logps_1": -9211.677734375, | |
| "all_logps_1_values": -9211.6787109375, | |
| "all_logps_2": 462.4624938964844, | |
| "all_logps_2_values": 462.4624938964844, | |
| "epoch": 0.9511087363077745, | |
| "grad_norm": 62.83164635980714, | |
| "learning_rate": 6.754703038239329e-09, | |
| "logits/chosen": 2.502159357070923, | |
| "logits/rejected": 2.6519925594329834, | |
| "logps/chosen": -18.46548080444336, | |
| "logps/rejected": -20.194454193115234, | |
| "loss": 0.6978, | |
| "original_losses": 2.2807629108428955, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -46.16370391845703, | |
| "rewards/margins": 4.322434902191162, | |
| "rewards/rejected": -50.48613739013672, | |
| "step": 445, | |
| "weight": 0.35711297392845154 | |
| }, | |
| { | |
| "abs_diff": 3.5259463787078857, | |
| "all_logps_1": -7040.4140625, | |
| "all_logps_1_values": -7040.4140625, | |
| "all_logps_2": 358.57501220703125, | |
| "all_logps_2_values": 358.57501220703125, | |
| "epoch": 0.9617953513224686, | |
| "grad_norm": 58.22216553617623, | |
| "learning_rate": 4.036953436716895e-09, | |
| "logits/chosen": 2.841308832168579, | |
| "logits/rejected": 2.788696050643921, | |
| "logps/chosen": -19.402172088623047, | |
| "logps/rejected": -21.435121536254883, | |
| "loss": 0.6648, | |
| "original_losses": 2.3494513034820557, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": -48.505435943603516, | |
| "rewards/margins": 5.082365989685059, | |
| "rewards/rejected": -53.587799072265625, | |
| "step": 450, | |
| "weight": 0.32446950674057007 | |
| }, | |
| { | |
| "abs_diff": 2.527660369873047, | |
| "all_logps_1": -7083.1259765625, | |
| "all_logps_1_values": -7083.1259765625, | |
| "all_logps_2": 354.76873779296875, | |
| "all_logps_2_values": 354.76873779296875, | |
| "epoch": 0.9724819663371627, | |
| "grad_norm": 63.64964025419041, | |
| "learning_rate": 2.0128530023804656e-09, | |
| "logits/chosen": 2.5407052040100098, | |
| "logits/rejected": 2.6334285736083984, | |
| "logps/chosen": -19.97518539428711, | |
| "logps/rejected": -21.144289016723633, | |
| "loss": 0.773, | |
| "original_losses": 2.366673469543457, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -49.937965393066406, | |
| "rewards/margins": 2.922760486602783, | |
| "rewards/rejected": -52.86072540283203, | |
| "step": 455, | |
| "weight": 0.4015112519264221 | |
| }, | |
| { | |
| "abs_diff": 3.792357921600342, | |
| "all_logps_1": -6872.0908203125, | |
| "all_logps_1_values": -6872.0908203125, | |
| "all_logps_2": 352.35626220703125, | |
| "all_logps_2_values": 352.35626220703125, | |
| "epoch": 0.9831685813518568, | |
| "grad_norm": 70.18502240580426, | |
| "learning_rate": 6.852326227130833e-10, | |
| "logits/chosen": 2.659250020980835, | |
| "logits/rejected": 2.507812976837158, | |
| "logps/chosen": -20.08974266052246, | |
| "logps/rejected": -22.00864028930664, | |
| "loss": 0.7596, | |
| "original_losses": 3.0322279930114746, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -50.22435760498047, | |
| "rewards/margins": 4.797248840332031, | |
| "rewards/rejected": -55.0216064453125, | |
| "step": 460, | |
| "weight": 0.3797384202480316 | |
| }, | |
| { | |
| "abs_diff": 3.1223583221435547, | |
| "all_logps_1": -7477.0185546875, | |
| "all_logps_1_values": -7477.0185546875, | |
| "all_logps_2": 386.9937438964844, | |
| "all_logps_2_values": 386.9937438964844, | |
| "epoch": 0.9938551963665508, | |
| "grad_norm": 70.11026953873642, | |
| "learning_rate": 5.594909486328348e-11, | |
| "logits/chosen": 2.367159366607666, | |
| "logits/rejected": 2.6166296005249023, | |
| "logps/chosen": -18.468345642089844, | |
| "logps/rejected": -20.6806697845459, | |
| "loss": 0.6765, | |
| "original_losses": 1.560880422592163, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -46.17086410522461, | |
| "rewards/margins": 5.5308074951171875, | |
| "rewards/rejected": -51.7016716003418, | |
| "step": 465, | |
| "weight": 0.36222249269485474 | |
| }, | |
| { | |
| "epoch": 0.9981298423724285, | |
| "step": 467, | |
| "total_flos": 0.0, | |
| "train_loss": 0.9884350126254227, | |
| "train_runtime": 7236.0008, | |
| "train_samples_per_second": 8.275, | |
| "train_steps_per_second": 0.065 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 467, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 1000000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": false, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |