| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 9.93687707641196, | |
| "eval_steps": 500, | |
| "global_step": 1500, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0664451827242525, | |
| "grad_norm": 40.48354721069336, | |
| "learning_rate": 3.3333333333333335e-07, | |
| "logits/chosen": -0.12491344660520554, | |
| "logits/rejected": -0.18818695843219757, | |
| "logps/chosen": -1117.441650390625, | |
| "logps/rejected": -1311.48095703125, | |
| "loss": 0.726, | |
| "rewards/accuracies": 0.3765625059604645, | |
| "rewards/chosen": -0.022913634777069092, | |
| "rewards/margins": -0.004669740330427885, | |
| "rewards/rejected": -0.018243903294205666, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.132890365448505, | |
| "grad_norm": 68.69932556152344, | |
| "learning_rate": 6.666666666666667e-07, | |
| "logits/chosen": -0.12522226572036743, | |
| "logits/rejected": -0.18426118791103363, | |
| "logps/chosen": -1105.445068359375, | |
| "logps/rejected": -1318.8582763671875, | |
| "loss": 0.7146, | |
| "rewards/accuracies": 0.504687488079071, | |
| "rewards/chosen": -0.014212612062692642, | |
| "rewards/margins": 0.031459741294384, | |
| "rewards/rejected": -0.04567235708236694, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.19933554817275748, | |
| "grad_norm": 32.92807388305664, | |
| "learning_rate": 1.0000000000000002e-06, | |
| "logits/chosen": -0.13368168473243713, | |
| "logits/rejected": -0.19534170627593994, | |
| "logps/chosen": -1098.4427490234375, | |
| "logps/rejected": -1286.0972900390625, | |
| "loss": 0.7014, | |
| "rewards/accuracies": 0.5453125238418579, | |
| "rewards/chosen": -0.14931853115558624, | |
| "rewards/margins": 0.06388694047927856, | |
| "rewards/rejected": -0.2132054567337036, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.26578073089701, | |
| "grad_norm": 35.1468391418457, | |
| "learning_rate": 1.3333333333333334e-06, | |
| "logits/chosen": -0.13665804266929626, | |
| "logits/rejected": -0.19470006227493286, | |
| "logps/chosen": -1057.49560546875, | |
| "logps/rejected": -1253.0081787109375, | |
| "loss": 0.6609, | |
| "rewards/accuracies": 0.5296874642372131, | |
| "rewards/chosen": -0.42790666222572327, | |
| "rewards/margins": 0.21245712041854858, | |
| "rewards/rejected": -0.6403638124465942, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.33222591362126247, | |
| "grad_norm": 23.31159210205078, | |
| "learning_rate": 1.6666666666666667e-06, | |
| "logits/chosen": -0.1325100213289261, | |
| "logits/rejected": -0.19247327744960785, | |
| "logps/chosen": -1068.646484375, | |
| "logps/rejected": -1261.2486572265625, | |
| "loss": 0.6518, | |
| "rewards/accuracies": 0.596875011920929, | |
| "rewards/chosen": -0.8173043131828308, | |
| "rewards/margins": 0.3520071506500244, | |
| "rewards/rejected": -1.1693115234375, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.39867109634551495, | |
| "grad_norm": 21.35710334777832, | |
| "learning_rate": 2.0000000000000003e-06, | |
| "logits/chosen": -0.1356111764907837, | |
| "logits/rejected": -0.19784294068813324, | |
| "logps/chosen": -1058.8770751953125, | |
| "logps/rejected": -1271.4691162109375, | |
| "loss": 0.6231, | |
| "rewards/accuracies": 0.6468750238418579, | |
| "rewards/chosen": -0.9412558078765869, | |
| "rewards/margins": 0.5320442914962769, | |
| "rewards/rejected": -1.4733000993728638, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.46511627906976744, | |
| "grad_norm": 23.317035675048828, | |
| "learning_rate": 2.3333333333333336e-06, | |
| "logits/chosen": -0.13605119287967682, | |
| "logits/rejected": -0.19660170376300812, | |
| "logps/chosen": -1103.0458984375, | |
| "logps/rejected": -1314.48291015625, | |
| "loss": 0.5443, | |
| "rewards/accuracies": 0.7125000357627869, | |
| "rewards/chosen": -0.8000861406326294, | |
| "rewards/margins": 0.6938734650611877, | |
| "rewards/rejected": -1.493959665298462, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.53156146179402, | |
| "grad_norm": 22.587018966674805, | |
| "learning_rate": 2.666666666666667e-06, | |
| "logits/chosen": -0.11189720779657364, | |
| "logits/rejected": -0.18983830511569977, | |
| "logps/chosen": -1066.6973876953125, | |
| "logps/rejected": -1319.9337158203125, | |
| "loss": 0.4977, | |
| "rewards/accuracies": 0.7718749642372131, | |
| "rewards/chosen": -0.792549729347229, | |
| "rewards/margins": 1.0354700088500977, | |
| "rewards/rejected": -1.8280198574066162, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.5980066445182725, | |
| "grad_norm": 20.043182373046875, | |
| "learning_rate": 3e-06, | |
| "logits/chosen": -0.1244162917137146, | |
| "logits/rejected": -0.188711017370224, | |
| "logps/chosen": -1076.73779296875, | |
| "logps/rejected": -1331.7142333984375, | |
| "loss": 0.4294, | |
| "rewards/accuracies": 0.7984374761581421, | |
| "rewards/chosen": -0.42743515968322754, | |
| "rewards/margins": 1.2227166891098022, | |
| "rewards/rejected": -1.6501517295837402, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.6644518272425249, | |
| "grad_norm": 10.029542922973633, | |
| "learning_rate": 3.3333333333333333e-06, | |
| "logits/chosen": -0.12235234677791595, | |
| "logits/rejected": -0.1966520994901657, | |
| "logps/chosen": -1058.425537109375, | |
| "logps/rejected": -1334.389404296875, | |
| "loss": 0.3671, | |
| "rewards/accuracies": 0.871874988079071, | |
| "rewards/chosen": -0.5026114583015442, | |
| "rewards/margins": 1.8994375467300415, | |
| "rewards/rejected": -2.4020493030548096, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.7308970099667774, | |
| "grad_norm": 11.281858444213867, | |
| "learning_rate": 3.6666666666666666e-06, | |
| "logits/chosen": -0.13173305988311768, | |
| "logits/rejected": -0.19946981966495514, | |
| "logps/chosen": -1074.1378173828125, | |
| "logps/rejected": -1311.911865234375, | |
| "loss": 0.2999, | |
| "rewards/accuracies": 0.895312488079071, | |
| "rewards/chosen": -0.37171292304992676, | |
| "rewards/margins": 2.6020548343658447, | |
| "rewards/rejected": -2.9737677574157715, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.7973421926910299, | |
| "grad_norm": 11.228713989257812, | |
| "learning_rate": 4.000000000000001e-06, | |
| "logits/chosen": -0.12718108296394348, | |
| "logits/rejected": -0.19788110256195068, | |
| "logps/chosen": -1091.0115966796875, | |
| "logps/rejected": -1348.0794677734375, | |
| "loss": 0.2278, | |
| "rewards/accuracies": 0.9249999523162842, | |
| "rewards/chosen": -0.0037933550775051117, | |
| "rewards/margins": 4.05342435836792, | |
| "rewards/rejected": -4.057217597961426, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.8637873754152824, | |
| "grad_norm": 7.068265438079834, | |
| "learning_rate": 4.333333333333334e-06, | |
| "logits/chosen": -0.12988580763339996, | |
| "logits/rejected": -0.19472359120845795, | |
| "logps/chosen": -1101.286865234375, | |
| "logps/rejected": -1364.6812744140625, | |
| "loss": 0.162, | |
| "rewards/accuracies": 0.9515625834465027, | |
| "rewards/chosen": 0.24117425084114075, | |
| "rewards/margins": 5.719590663909912, | |
| "rewards/rejected": -5.4784159660339355, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.9302325581395349, | |
| "grad_norm": 2.4044954776763916, | |
| "learning_rate": 4.666666666666667e-06, | |
| "logits/chosen": -0.13687601685523987, | |
| "logits/rejected": -0.20584635436534882, | |
| "logps/chosen": -1033.876708984375, | |
| "logps/rejected": -1361.149658203125, | |
| "loss": 0.1267, | |
| "rewards/accuracies": 0.9468749761581421, | |
| "rewards/chosen": 1.0959956645965576, | |
| "rewards/margins": 7.263400077819824, | |
| "rewards/rejected": -6.1674041748046875, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.9966777408637874, | |
| "grad_norm": 23.234800338745117, | |
| "learning_rate": 5e-06, | |
| "logits/chosen": -0.14618705213069916, | |
| "logits/rejected": -0.2165984809398651, | |
| "logps/chosen": -1056.0648193359375, | |
| "logps/rejected": -1392.6947021484375, | |
| "loss": 0.1071, | |
| "rewards/accuracies": 0.9390624761581421, | |
| "rewards/chosen": 0.6555425524711609, | |
| "rewards/margins": 9.398031234741211, | |
| "rewards/rejected": -8.742487907409668, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 1.0598006644518272, | |
| "grad_norm": 5.546257495880127, | |
| "learning_rate": 4.999323102948655e-06, | |
| "logits/chosen": -0.1447768211364746, | |
| "logits/rejected": -0.2160787135362625, | |
| "logps/chosen": -1052.21728515625, | |
| "logps/rejected": -1390.3724365234375, | |
| "loss": 0.084, | |
| "rewards/accuracies": 0.9391447901725769, | |
| "rewards/chosen": 1.6069414615631104, | |
| "rewards/margins": 11.616178512573242, | |
| "rewards/rejected": -10.009236335754395, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 1.1262458471760797, | |
| "grad_norm": 29.2595157623291, | |
| "learning_rate": 4.997292778346312e-06, | |
| "logits/chosen": -0.15423689782619476, | |
| "logits/rejected": -0.21319061517715454, | |
| "logps/chosen": -1064.402587890625, | |
| "logps/rejected": -1432.095458984375, | |
| "loss": 0.061, | |
| "rewards/accuracies": 0.9578124284744263, | |
| "rewards/chosen": 1.6962934732437134, | |
| "rewards/margins": 15.001029968261719, | |
| "rewards/rejected": -13.304736137390137, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 1.1926910299003322, | |
| "grad_norm": 3.240455150604248, | |
| "learning_rate": 4.993910125649561e-06, | |
| "logits/chosen": -0.13828535377979279, | |
| "logits/rejected": -0.21751590073108673, | |
| "logps/chosen": -998.13134765625, | |
| "logps/rejected": -1392.8687744140625, | |
| "loss": 0.0494, | |
| "rewards/accuracies": 0.9625000357627869, | |
| "rewards/chosen": 2.9092648029327393, | |
| "rewards/margins": 15.800436019897461, | |
| "rewards/rejected": -12.891172409057617, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 1.2591362126245846, | |
| "grad_norm": 0.4213032126426697, | |
| "learning_rate": 4.989176976624511e-06, | |
| "logits/chosen": -0.16740064322948456, | |
| "logits/rejected": -0.23276962339878082, | |
| "logps/chosen": -1051.3272705078125, | |
| "logps/rejected": -1436.19189453125, | |
| "loss": 0.0566, | |
| "rewards/accuracies": 0.942187488079071, | |
| "rewards/chosen": 3.340867757797241, | |
| "rewards/margins": 18.916629791259766, | |
| "rewards/rejected": -15.575761795043945, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 1.3255813953488373, | |
| "grad_norm": 33.59416961669922, | |
| "learning_rate": 4.983095894354858e-06, | |
| "logits/chosen": -0.17280790209770203, | |
| "logits/rejected": -0.23289522528648376, | |
| "logps/chosen": -1036.917236328125, | |
| "logps/rejected": -1430.266357421875, | |
| "loss": 0.0552, | |
| "rewards/accuracies": 0.9453125, | |
| "rewards/chosen": 4.887056827545166, | |
| "rewards/margins": 18.91439437866211, | |
| "rewards/rejected": -14.027338027954102, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.3920265780730898, | |
| "grad_norm": 0.12995536625385284, | |
| "learning_rate": 4.975670171853926e-06, | |
| "logits/chosen": -0.16127409040927887, | |
| "logits/rejected": -0.22987647354602814, | |
| "logps/chosen": -1064.2008056640625, | |
| "logps/rejected": -1541.5230712890625, | |
| "loss": 0.0741, | |
| "rewards/accuracies": 0.9515624642372131, | |
| "rewards/chosen": 1.9763206243515015, | |
| "rewards/margins": 24.5360107421875, | |
| "rewards/rejected": -22.5596923828125, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 1.4584717607973423, | |
| "grad_norm": 125.04955291748047, | |
| "learning_rate": 4.966903830281449e-06, | |
| "logits/chosen": -0.16986006498336792, | |
| "logits/rejected": -0.23232412338256836, | |
| "logps/chosen": -1011.2138061523438, | |
| "logps/rejected": -1458.410888671875, | |
| "loss": 0.0702, | |
| "rewards/accuracies": 0.9468750357627869, | |
| "rewards/chosen": 4.205624580383301, | |
| "rewards/margins": 22.20413589477539, | |
| "rewards/rejected": -17.998512268066406, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 1.5249169435215948, | |
| "grad_norm": 1.0301318168640137, | |
| "learning_rate": 4.956801616766033e-06, | |
| "logits/chosen": -0.17181265354156494, | |
| "logits/rejected": -0.22897139191627502, | |
| "logps/chosen": -1064.4287109375, | |
| "logps/rejected": -1548.607177734375, | |
| "loss": 0.0419, | |
| "rewards/accuracies": 0.956250011920929, | |
| "rewards/chosen": 4.201747894287109, | |
| "rewards/margins": 26.632787704467773, | |
| "rewards/rejected": -22.43103790283203, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 1.5913621262458473, | |
| "grad_norm": 1.9554957151412964, | |
| "learning_rate": 4.9453690018345144e-06, | |
| "logits/chosen": -0.17581965029239655, | |
| "logits/rejected": -0.23298974335193634, | |
| "logps/chosen": -1037.4566650390625, | |
| "logps/rejected": -1497.9283447265625, | |
| "loss": 0.0354, | |
| "rewards/accuracies": 0.957812488079071, | |
| "rewards/chosen": 4.2116312980651855, | |
| "rewards/margins": 25.39874267578125, | |
| "rewards/rejected": -21.18711280822754, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 1.6578073089700998, | |
| "grad_norm": 0.35193875432014465, | |
| "learning_rate": 4.93261217644956e-06, | |
| "logits/chosen": -0.16469064354896545, | |
| "logits/rejected": -0.23437215387821198, | |
| "logps/chosen": -1052.7650146484375, | |
| "logps/rejected": -1553.04833984375, | |
| "loss": 0.0423, | |
| "rewards/accuracies": 0.96875, | |
| "rewards/chosen": 4.150611877441406, | |
| "rewards/margins": 26.79295539855957, | |
| "rewards/rejected": -22.642345428466797, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 1.7242524916943522, | |
| "grad_norm": 0.6141932606697083, | |
| "learning_rate": 4.91853804865716e-06, | |
| "logits/chosen": -0.18150699138641357, | |
| "logits/rejected": -0.2411101907491684, | |
| "logps/chosen": -972.6925048828125, | |
| "logps/rejected": -1363.6278076171875, | |
| "loss": 0.0335, | |
| "rewards/accuracies": 0.9609375, | |
| "rewards/chosen": 6.711765289306641, | |
| "rewards/margins": 22.288022994995117, | |
| "rewards/rejected": -15.576257705688477, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 1.7906976744186047, | |
| "grad_norm": 36.097991943359375, | |
| "learning_rate": 4.903154239845798e-06, | |
| "logits/chosen": -0.17819088697433472, | |
| "logits/rejected": -0.2480602264404297, | |
| "logps/chosen": -1043.45751953125, | |
| "logps/rejected": -1562.9696044921875, | |
| "loss": 0.0432, | |
| "rewards/accuracies": 0.9500000476837158, | |
| "rewards/chosen": 4.969973087310791, | |
| "rewards/margins": 29.227781295776367, | |
| "rewards/rejected": -24.25780487060547, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 1.8571428571428572, | |
| "grad_norm": 0.804478645324707, | |
| "learning_rate": 4.88646908061933e-06, | |
| "logits/chosen": -0.18135662376880646, | |
| "logits/rejected": -0.2500635087490082, | |
| "logps/chosen": -973.140869140625, | |
| "logps/rejected": -1519.1575927734375, | |
| "loss": 0.0387, | |
| "rewards/accuracies": 0.948437511920929, | |
| "rewards/chosen": 6.344993591308594, | |
| "rewards/margins": 28.498210906982422, | |
| "rewards/rejected": -22.15321922302246, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 1.9235880398671097, | |
| "grad_norm": 0.21806667745113373, | |
| "learning_rate": 4.868491606285823e-06, | |
| "logits/chosen": -0.1847972869873047, | |
| "logits/rejected": -0.24828991293907166, | |
| "logps/chosen": -1015.7157592773438, | |
| "logps/rejected": -1512.730712890625, | |
| "loss": 0.0349, | |
| "rewards/accuracies": 0.9546875357627869, | |
| "rewards/chosen": 7.247753620147705, | |
| "rewards/margins": 27.996492385864258, | |
| "rewards/rejected": -20.748737335205078, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 1.9900332225913622, | |
| "grad_norm": 0.06243917718529701, | |
| "learning_rate": 4.849231551964771e-06, | |
| "logits/chosen": -0.18840095400810242, | |
| "logits/rejected": -0.2502696216106415, | |
| "logps/chosen": -1035.1356201171875, | |
| "logps/rejected": -1553.83154296875, | |
| "loss": 0.0338, | |
| "rewards/accuracies": 0.9562500715255737, | |
| "rewards/chosen": 6.608232021331787, | |
| "rewards/margins": 30.3477783203125, | |
| "rewards/rejected": -23.739547729492188, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 2.053156146179402, | |
| "grad_norm": 1.4525890350341797, | |
| "learning_rate": 4.828699347315357e-06, | |
| "logits/chosen": -0.16825956106185913, | |
| "logits/rejected": -0.2412978559732437, | |
| "logps/chosen": -993.029541015625, | |
| "logps/rejected": -1512.5572509765625, | |
| "loss": 0.0283, | |
| "rewards/accuracies": 0.9621711373329163, | |
| "rewards/chosen": 7.502988815307617, | |
| "rewards/margins": 29.106731414794922, | |
| "rewards/rejected": -21.603742599487305, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 2.1196013289036544, | |
| "grad_norm": 0.341370165348053, | |
| "learning_rate": 4.806906110888606e-06, | |
| "logits/chosen": -0.19253456592559814, | |
| "logits/rejected": -0.24857579171657562, | |
| "logps/chosen": -1003.42822265625, | |
| "logps/rejected": -1490.8272705078125, | |
| "loss": 0.0341, | |
| "rewards/accuracies": 0.953125, | |
| "rewards/chosen": 6.586777210235596, | |
| "rewards/margins": 30.160865783691406, | |
| "rewards/rejected": -23.574087142944336, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 2.186046511627907, | |
| "grad_norm": 0.32293370366096497, | |
| "learning_rate": 4.783863644106502e-06, | |
| "logits/chosen": -0.18394093215465546, | |
| "logits/rejected": -0.2500993013381958, | |
| "logps/chosen": -998.8344116210938, | |
| "logps/rejected": -1539.57958984375, | |
| "loss": 0.0293, | |
| "rewards/accuracies": 0.9593749642372131, | |
| "rewards/chosen": 6.540307521820068, | |
| "rewards/margins": 32.438438415527344, | |
| "rewards/rejected": -25.898130416870117, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 2.2524916943521593, | |
| "grad_norm": 0.4895381033420563, | |
| "learning_rate": 4.759584424871302e-06, | |
| "logits/chosen": -0.19156453013420105, | |
| "logits/rejected": -0.2491527646780014, | |
| "logps/chosen": -1053.5201416015625, | |
| "logps/rejected": -1579.8900146484375, | |
| "loss": 0.0266, | |
| "rewards/accuracies": 0.964062511920929, | |
| "rewards/chosen": 8.41015338897705, | |
| "rewards/margins": 31.691499710083008, | |
| "rewards/rejected": -23.281347274780273, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 2.318936877076412, | |
| "grad_norm": 0.6678661108016968, | |
| "learning_rate": 4.734081600808531e-06, | |
| "logits/chosen": -0.2005804479122162, | |
| "logits/rejected": -0.2595979571342468, | |
| "logps/chosen": -1060.26025390625, | |
| "logps/rejected": -1603.9803466796875, | |
| "loss": 0.0389, | |
| "rewards/accuracies": 0.9468750357627869, | |
| "rewards/chosen": 7.6117658615112305, | |
| "rewards/margins": 33.64573287963867, | |
| "rewards/rejected": -26.03396987915039, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 2.3853820598006643, | |
| "grad_norm": 0.05852515622973442, | |
| "learning_rate": 4.707368982147318e-06, | |
| "logits/chosen": -0.18053898215293884, | |
| "logits/rejected": -0.25306934118270874, | |
| "logps/chosen": -965.810791015625, | |
| "logps/rejected": -1501.8775634765625, | |
| "loss": 0.0405, | |
| "rewards/accuracies": 0.9578125476837158, | |
| "rewards/chosen": 7.657576560974121, | |
| "rewards/margins": 30.364038467407227, | |
| "rewards/rejected": -22.706459045410156, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 2.451827242524917, | |
| "grad_norm": 0.2221815437078476, | |
| "learning_rate": 4.679461034241906e-06, | |
| "logits/chosen": -0.1780516505241394, | |
| "logits/rejected": -0.25180700421333313, | |
| "logps/chosen": -1041.829345703125, | |
| "logps/rejected": -1607.8099365234375, | |
| "loss": 0.0274, | |
| "rewards/accuracies": 0.9625000953674316, | |
| "rewards/chosen": 7.245983123779297, | |
| "rewards/margins": 34.91341018676758, | |
| "rewards/rejected": -27.667430877685547, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 2.5182724252491693, | |
| "grad_norm": 0.47833314538002014, | |
| "learning_rate": 4.650372869738415e-06, | |
| "logits/chosen": -0.17948493361473083, | |
| "logits/rejected": -0.2594072222709656, | |
| "logps/chosen": -964.8648071289062, | |
| "logps/rejected": -1580.651123046875, | |
| "loss": 0.0309, | |
| "rewards/accuracies": 0.957812488079071, | |
| "rewards/chosen": 7.238793849945068, | |
| "rewards/margins": 33.64303207397461, | |
| "rewards/rejected": -26.404239654541016, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 2.584717607973422, | |
| "grad_norm": 0.4996240437030792, | |
| "learning_rate": 4.620120240391065e-06, | |
| "logits/chosen": -0.19269897043704987, | |
| "logits/rejected": -0.25276264548301697, | |
| "logps/chosen": -1004.8121948242188, | |
| "logps/rejected": -1599.7081298828125, | |
| "loss": 0.0263, | |
| "rewards/accuracies": 0.9640625715255737, | |
| "rewards/chosen": 7.900957107543945, | |
| "rewards/margins": 33.70750427246094, | |
| "rewards/rejected": -25.806547164916992, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 2.6511627906976747, | |
| "grad_norm": 0.15727798640727997, | |
| "learning_rate": 4.588719528532342e-06, | |
| "logits/chosen": -0.1740064173936844, | |
| "logits/rejected": -0.25867804884910583, | |
| "logps/chosen": -946.6286010742188, | |
| "logps/rejected": -1478.2218017578125, | |
| "loss": 0.0325, | |
| "rewards/accuracies": 0.9578125476837158, | |
| "rewards/chosen": 8.154074668884277, | |
| "rewards/margins": 30.49738311767578, | |
| "rewards/rejected": -22.343307495117188, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 2.717607973421927, | |
| "grad_norm": 0.02712377905845642, | |
| "learning_rate": 4.556187738201656e-06, | |
| "logits/chosen": -0.19258640706539154, | |
| "logits/rejected": -0.2623384892940521, | |
| "logps/chosen": -974.1800537109375, | |
| "logps/rejected": -1545.2799072265625, | |
| "loss": 0.0369, | |
| "rewards/accuracies": 0.949999988079071, | |
| "rewards/chosen": 5.771888732910156, | |
| "rewards/margins": 35.3669548034668, | |
| "rewards/rejected": -29.595062255859375, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 2.7840531561461797, | |
| "grad_norm": 0.4458315968513489, | |
| "learning_rate": 4.522542485937369e-06, | |
| "logits/chosen": -0.1969211995601654, | |
| "logits/rejected": -0.2592199444770813, | |
| "logps/chosen": -996.4677124023438, | |
| "logps/rejected": -1540.347900390625, | |
| "loss": 0.0307, | |
| "rewards/accuracies": 0.9578126072883606, | |
| "rewards/chosen": 8.071084022521973, | |
| "rewards/margins": 33.89667892456055, | |
| "rewards/rejected": -25.82559585571289, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 2.850498338870432, | |
| "grad_norm": 0.0796787217259407, | |
| "learning_rate": 4.48780199123712e-06, | |
| "logits/chosen": -0.19366417825222015, | |
| "logits/rejected": -0.25863632559776306, | |
| "logps/chosen": -1018.47607421875, | |
| "logps/rejected": -1551.6494140625, | |
| "loss": 0.0326, | |
| "rewards/accuracies": 0.9546875357627869, | |
| "rewards/chosen": 8.464451789855957, | |
| "rewards/margins": 33.95619201660156, | |
| "rewards/rejected": -25.491741180419922, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 2.9169435215946846, | |
| "grad_norm": 0.07867090404033661, | |
| "learning_rate": 4.451985066691649e-06, | |
| "logits/chosen": -0.19828085601329803, | |
| "logits/rejected": -0.263224720954895, | |
| "logps/chosen": -987.7084350585938, | |
| "logps/rejected": -1536.230224609375, | |
| "loss": 0.0312, | |
| "rewards/accuracies": 0.956250011920929, | |
| "rewards/chosen": 7.850325584411621, | |
| "rewards/margins": 34.385406494140625, | |
| "rewards/rejected": -26.535079956054688, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 2.983388704318937, | |
| "grad_norm": 0.07914525270462036, | |
| "learning_rate": 4.415111107797445e-06, | |
| "logits/chosen": -0.18529188632965088, | |
| "logits/rejected": -0.25437110662460327, | |
| "logps/chosen": -1014.2196044921875, | |
| "logps/rejected": -1524.4237060546875, | |
| "loss": 0.0269, | |
| "rewards/accuracies": 0.9656250476837158, | |
| "rewards/chosen": 7.967530727386475, | |
| "rewards/margins": 34.628875732421875, | |
| "rewards/rejected": -26.661344528198242, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 3.046511627906977, | |
| "grad_norm": 0.026503894478082657, | |
| "learning_rate": 4.377200082453748e-06, | |
| "logits/chosen": -0.18043489754199982, | |
| "logits/rejected": -0.2650010883808136, | |
| "logps/chosen": -929.5118408203125, | |
| "logps/rejected": -1481.138427734375, | |
| "loss": 0.0311, | |
| "rewards/accuracies": 0.9539474248886108, | |
| "rewards/chosen": 7.918570041656494, | |
| "rewards/margins": 33.1231575012207, | |
| "rewards/rejected": -25.204586029052734, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 3.1129568106312293, | |
| "grad_norm": 0.055876098573207855, | |
| "learning_rate": 4.338272520149572e-06, | |
| "logits/chosen": -0.20002666115760803, | |
| "logits/rejected": -0.27018633484840393, | |
| "logps/chosen": -1009.8369750976562, | |
| "logps/rejected": -1604.3740234375, | |
| "loss": 0.033, | |
| "rewards/accuracies": 0.953125, | |
| "rewards/chosen": 7.928904056549072, | |
| "rewards/margins": 36.35823440551758, | |
| "rewards/rejected": -28.4293270111084, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 3.179401993355482, | |
| "grad_norm": 0.007625865284353495, | |
| "learning_rate": 4.2983495008466285e-06, | |
| "logits/chosen": -0.1756007969379425, | |
| "logits/rejected": -0.2624135911464691, | |
| "logps/chosen": -979.9953002929688, | |
| "logps/rejected": -1648.5302734375, | |
| "loss": 0.027, | |
| "rewards/accuracies": 0.9625000357627869, | |
| "rewards/chosen": 6.49752950668335, | |
| "rewards/margins": 39.52600860595703, | |
| "rewards/rejected": -33.028480529785156, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 3.2458471760797343, | |
| "grad_norm": 1.3655072450637817, | |
| "learning_rate": 4.257452643564155e-06, | |
| "logits/chosen": -0.19411802291870117, | |
| "logits/rejected": -0.26652225852012634, | |
| "logps/chosen": -955.16552734375, | |
| "logps/rejected": -1548.7850341796875, | |
| "loss": 0.0344, | |
| "rewards/accuracies": 0.953125, | |
| "rewards/chosen": 7.868605613708496, | |
| "rewards/margins": 34.91292190551758, | |
| "rewards/rejected": -27.04431915283203, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 3.3122923588039868, | |
| "grad_norm": 0.03723715990781784, | |
| "learning_rate": 4.215604094671835e-06, | |
| "logits/chosen": -0.20745989680290222, | |
| "logits/rejected": -0.26310446858406067, | |
| "logps/chosen": -1038.3160400390625, | |
| "logps/rejected": -1586.4140625, | |
| "loss": 0.0265, | |
| "rewards/accuracies": 0.9625000357627869, | |
| "rewards/chosen": 8.868291854858398, | |
| "rewards/margins": 36.5751953125, | |
| "rewards/rejected": -27.70690155029297, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 3.3787375415282392, | |
| "grad_norm": 0.04950110614299774, | |
| "learning_rate": 4.172826515897146e-06, | |
| "logits/chosen": -0.19769521057605743, | |
| "logits/rejected": -0.2655687928199768, | |
| "logps/chosen": -1002.1111450195312, | |
| "logps/rejected": -1597.6588134765625, | |
| "loss": 0.0232, | |
| "rewards/accuracies": 0.9671874642372131, | |
| "rewards/chosen": 7.961508274078369, | |
| "rewards/margins": 36.75872039794922, | |
| "rewards/rejected": -28.79721450805664, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 3.4451827242524917, | |
| "grad_norm": 0.27452749013900757, | |
| "learning_rate": 4.129143072053639e-06, | |
| "logits/chosen": -0.18965086340904236, | |
| "logits/rejected": -0.2614585757255554, | |
| "logps/chosen": -1033.2958984375, | |
| "logps/rejected": -1618.4097900390625, | |
| "loss": 0.0222, | |
| "rewards/accuracies": 0.96875, | |
| "rewards/chosen": 8.251802444458008, | |
| "rewards/margins": 37.35887145996094, | |
| "rewards/rejected": -29.107072830200195, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 3.511627906976744, | |
| "grad_norm": 0.0644262358546257, | |
| "learning_rate": 4.084577418496775e-06, | |
| "logits/chosen": -0.20613916218280792, | |
| "logits/rejected": -0.27108556032180786, | |
| "logps/chosen": -1004.0643310546875, | |
| "logps/rejected": -1601.927978515625, | |
| "loss": 0.0287, | |
| "rewards/accuracies": 0.9593750238418579, | |
| "rewards/chosen": 8.70193099975586, | |
| "rewards/margins": 35.765296936035156, | |
| "rewards/rejected": -27.063365936279297, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 3.5780730897009967, | |
| "grad_norm": 0.021257763728499413, | |
| "learning_rate": 4.039153688314146e-06, | |
| "logits/chosen": -0.20685432851314545, | |
| "logits/rejected": -0.267654150724411, | |
| "logps/chosen": -997.7388305664062, | |
| "logps/rejected": -1588.1624755859375, | |
| "loss": 0.0328, | |
| "rewards/accuracies": 0.953125, | |
| "rewards/chosen": 8.601181983947754, | |
| "rewards/margins": 37.05242156982422, | |
| "rewards/rejected": -28.45123863220215, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 3.644518272425249, | |
| "grad_norm": 0.029319366440176964, | |
| "learning_rate": 3.992896479256966e-06, | |
| "logits/chosen": -0.20922623574733734, | |
| "logits/rejected": -0.2706463634967804, | |
| "logps/chosen": -986.989501953125, | |
| "logps/rejected": -1553.56787109375, | |
| "loss": 0.0361, | |
| "rewards/accuracies": 0.9484374523162842, | |
| "rewards/chosen": 7.84389591217041, | |
| "rewards/margins": 38.18416213989258, | |
| "rewards/rejected": -30.34027099609375, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 3.7109634551495017, | |
| "grad_norm": 0.029560599476099014, | |
| "learning_rate": 3.945830840419966e-06, | |
| "logits/chosen": -0.18561449646949768, | |
| "logits/rejected": -0.2662014961242676, | |
| "logps/chosen": -1032.16552734375, | |
| "logps/rejected": -1648.7718505859375, | |
| "loss": 0.0263, | |
| "rewards/accuracies": 0.9625000357627869, | |
| "rewards/chosen": 8.428018569946289, | |
| "rewards/margins": 39.801116943359375, | |
| "rewards/rejected": -31.373104095458984, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 3.777408637873754, | |
| "grad_norm": 0.010398728772997856, | |
| "learning_rate": 3.897982258676867e-06, | |
| "logits/chosen": -0.1940029114484787, | |
| "logits/rejected": -0.27436965703964233, | |
| "logps/chosen": -961.5516357421875, | |
| "logps/rejected": -1579.232421875, | |
| "loss": 0.0383, | |
| "rewards/accuracies": 0.9453125, | |
| "rewards/chosen": 8.185991287231445, | |
| "rewards/margins": 38.32309341430664, | |
| "rewards/rejected": -30.13710594177246, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 3.8438538205980066, | |
| "grad_norm": 0.05186082422733307, | |
| "learning_rate": 3.849376644878783e-06, | |
| "logits/chosen": -0.20401492714881897, | |
| "logits/rejected": -0.266576886177063, | |
| "logps/chosen": -1019.6111450195312, | |
| "logps/rejected": -1620.943115234375, | |
| "loss": 0.0279, | |
| "rewards/accuracies": 0.9609374403953552, | |
| "rewards/chosen": 7.710155487060547, | |
| "rewards/margins": 40.542640686035156, | |
| "rewards/rejected": -32.83248519897461, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 3.910299003322259, | |
| "grad_norm": 0.11145040392875671, | |
| "learning_rate": 3.8000403198230385e-06, | |
| "logits/chosen": -0.19729003310203552, | |
| "logits/rejected": -0.26546692848205566, | |
| "logps/chosen": -987.747802734375, | |
| "logps/rejected": -1537.516357421875, | |
| "loss": 0.0223, | |
| "rewards/accuracies": 0.9703125357627869, | |
| "rewards/chosen": 8.000187873840332, | |
| "rewards/margins": 37.64095687866211, | |
| "rewards/rejected": -29.640769958496094, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 3.9767441860465116, | |
| "grad_norm": 0.2694355249404907, | |
| "learning_rate": 3.7500000000000005e-06, | |
| "logits/chosen": -0.21104276180267334, | |
| "logits/rejected": -0.2754373848438263, | |
| "logps/chosen": -982.5044555664062, | |
| "logps/rejected": -1553.8619384765625, | |
| "loss": 0.0359, | |
| "rewards/accuracies": 0.949999988079071, | |
| "rewards/chosen": 8.334994316101074, | |
| "rewards/margins": 38.43461227416992, | |
| "rewards/rejected": -30.099620819091797, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 4.039867109634551, | |
| "grad_norm": 0.0788518562912941, | |
| "learning_rate": 3.699282783125616e-06, | |
| "logits/chosen": -0.20658275485038757, | |
| "logits/rejected": -0.2766464054584503, | |
| "logps/chosen": -965.9616088867188, | |
| "logps/rejected": -1610.8004150390625, | |
| "loss": 0.0344, | |
| "rewards/accuracies": 0.9490132331848145, | |
| "rewards/chosen": 5.881260395050049, | |
| "rewards/margins": 41.5827522277832, | |
| "rewards/rejected": -35.70149230957031, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 4.106312292358804, | |
| "grad_norm": 0.005860505159944296, | |
| "learning_rate": 3.6479161334675294e-06, | |
| "logits/chosen": -0.18060919642448425, | |
| "logits/rejected": -0.2699647545814514, | |
| "logps/chosen": -948.4402465820312, | |
| "logps/rejected": -1546.55078125, | |
| "loss": 0.02, | |
| "rewards/accuracies": 0.971875011920929, | |
| "rewards/chosen": 9.497621536254883, | |
| "rewards/margins": 34.928138732910156, | |
| "rewards/rejected": -25.430519104003906, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 4.172757475083056, | |
| "grad_norm": 0.006049708928912878, | |
| "learning_rate": 3.595927866972694e-06, | |
| "logits/chosen": -0.20163992047309875, | |
| "logits/rejected": -0.2756229043006897, | |
| "logps/chosen": -980.5719604492188, | |
| "logps/rejected": -1553.8719482421875, | |
| "loss": 0.0253, | |
| "rewards/accuracies": 0.964062511920929, | |
| "rewards/chosen": 9.457831382751465, | |
| "rewards/margins": 36.089698791503906, | |
| "rewards/rejected": -26.631864547729492, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 4.239202657807309, | |
| "grad_norm": 0.035230789333581924, | |
| "learning_rate": 3.543346136204545e-06, | |
| "logits/chosen": -0.20396091043949127, | |
| "logits/rejected": -0.2794302701950073, | |
| "logps/chosen": -1021.5272827148438, | |
| "logps/rejected": -1694.7557373046875, | |
| "loss": 0.0339, | |
| "rewards/accuracies": 0.9515625238418579, | |
| "rewards/chosen": 8.177035331726074, | |
| "rewards/margins": 42.25918197631836, | |
| "rewards/rejected": -34.08214569091797, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 4.305647840531561, | |
| "grad_norm": 0.10100215673446655, | |
| "learning_rate": 3.4901994150978926e-06, | |
| "logits/chosen": -0.22341015934944153, | |
| "logits/rejected": -0.27249380946159363, | |
| "logps/chosen": -1055.6053466796875, | |
| "logps/rejected": -1636.560302734375, | |
| "loss": 0.0264, | |
| "rewards/accuracies": 0.9624999761581421, | |
| "rewards/chosen": 7.556117057800293, | |
| "rewards/margins": 41.31330490112305, | |
| "rewards/rejected": -33.75718688964844, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 4.372093023255814, | |
| "grad_norm": 0.023471888154745102, | |
| "learning_rate": 3.436516483539781e-06, | |
| "logits/chosen": -0.21736814081668854, | |
| "logits/rejected": -0.2824287712574005, | |
| "logps/chosen": -982.9537353515625, | |
| "logps/rejected": -1562.0203857421875, | |
| "loss": 0.0416, | |
| "rewards/accuracies": 0.940625011920929, | |
| "rewards/chosen": 6.172419548034668, | |
| "rewards/margins": 38.94424819946289, | |
| "rewards/rejected": -32.771827697753906, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 4.438538205980066, | |
| "grad_norm": 0.6410449743270874, | |
| "learning_rate": 3.3823264117846722e-06, | |
| "logits/chosen": -0.20028334856033325, | |
| "logits/rejected": -0.27208542823791504, | |
| "logps/chosen": -1042.419189453125, | |
| "logps/rejected": -1699.8818359375, | |
| "loss": 0.0243, | |
| "rewards/accuracies": 0.965624988079071, | |
| "rewards/chosen": 6.371281623840332, | |
| "rewards/margins": 44.25002670288086, | |
| "rewards/rejected": -37.878746032714844, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 4.504983388704319, | |
| "grad_norm": 0.052079688757658005, | |
| "learning_rate": 3.3276585447123957e-06, | |
| "logits/chosen": -0.19798581302165985, | |
| "logits/rejected": -0.27162185311317444, | |
| "logps/chosen": -993.3763427734375, | |
| "logps/rejected": -1604.8309326171875, | |
| "loss": 0.0232, | |
| "rewards/accuracies": 0.9671875238418579, | |
| "rewards/chosen": 8.744821548461914, | |
| "rewards/margins": 39.64046859741211, | |
| "rewards/rejected": -30.895647048950195, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 4.571428571428571, | |
| "grad_norm": 0.010166758671402931, | |
| "learning_rate": 3.272542485937369e-06, | |
| "logits/chosen": -0.2120884358882904, | |
| "logits/rejected": -0.2828371226787567, | |
| "logps/chosen": -979.7803344726562, | |
| "logps/rejected": -1523.5648193359375, | |
| "loss": 0.0295, | |
| "rewards/accuracies": 0.9578125476837158, | |
| "rewards/chosen": 9.28367805480957, | |
| "rewards/margins": 36.00359344482422, | |
| "rewards/rejected": -26.71991729736328, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 4.637873754152824, | |
| "grad_norm": 0.07122723013162613, | |
| "learning_rate": 3.217008081777726e-06, | |
| "logits/chosen": -0.2192113846540451, | |
| "logits/rejected": -0.2890257239341736, | |
| "logps/chosen": -929.1891479492188, | |
| "logps/rejected": -1475.6949462890625, | |
| "loss": 0.0346, | |
| "rewards/accuracies": 0.9515625238418579, | |
| "rewards/chosen": 9.672334671020508, | |
| "rewards/margins": 35.26881408691406, | |
| "rewards/rejected": -25.596477508544922, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 4.704318936877076, | |
| "grad_norm": 0.005518330726772547, | |
| "learning_rate": 3.1610854050930063e-06, | |
| "logits/chosen": -0.19891753792762756, | |
| "logits/rejected": -0.27730634808540344, | |
| "logps/chosen": -985.259033203125, | |
| "logps/rejected": -1633.97314453125, | |
| "loss": 0.0294, | |
| "rewards/accuracies": 0.957812488079071, | |
| "rewards/chosen": 9.867067337036133, | |
| "rewards/margins": 38.44176483154297, | |
| "rewards/rejected": -28.574697494506836, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 4.770764119601329, | |
| "grad_norm": 0.008760541677474976, | |
| "learning_rate": 3.1048047389991693e-06, | |
| "logits/chosen": -0.21026046574115753, | |
| "logits/rejected": -0.2759222686290741, | |
| "logps/chosen": -1001.1782836914062, | |
| "logps/rejected": -1598.6923828125, | |
| "loss": 0.0328, | |
| "rewards/accuracies": 0.9546875357627869, | |
| "rewards/chosen": 9.485105514526367, | |
| "rewards/margins": 38.465606689453125, | |
| "rewards/rejected": -28.980497360229492, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 4.837209302325581, | |
| "grad_norm": 0.016581548377871513, | |
| "learning_rate": 3.0481965604697582e-06, | |
| "logits/chosen": -0.1935136914253235, | |
| "logits/rejected": -0.27545979619026184, | |
| "logps/chosen": -973.7349243164062, | |
| "logps/rejected": -1601.532958984375, | |
| "loss": 0.0242, | |
| "rewards/accuracies": 0.9656250476837158, | |
| "rewards/chosen": 7.491199016571045, | |
| "rewards/margins": 41.66270065307617, | |
| "rewards/rejected": -34.1714973449707, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 4.903654485049834, | |
| "grad_norm": 0.00849311426281929, | |
| "learning_rate": 2.9912915238320755e-06, | |
| "logits/chosen": -0.19976982474327087, | |
| "logits/rejected": -0.2815185487270355, | |
| "logps/chosen": -965.1561889648438, | |
| "logps/rejected": -1615.7647705078125, | |
| "loss": 0.0306, | |
| "rewards/accuracies": 0.956250011920929, | |
| "rewards/chosen": 9.341843605041504, | |
| "rewards/margins": 40.90797424316406, | |
| "rewards/rejected": -31.566131591796875, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 4.970099667774086, | |
| "grad_norm": 0.018370211124420166, | |
| "learning_rate": 2.9341204441673267e-06, | |
| "logits/chosen": -0.21805454790592194, | |
| "logits/rejected": -0.28051772713661194, | |
| "logps/chosen": -1029.127197265625, | |
| "logps/rejected": -1687.1690673828125, | |
| "loss": 0.0305, | |
| "rewards/accuracies": 0.9562500715255737, | |
| "rewards/chosen": 9.458334922790527, | |
| "rewards/margins": 42.93328857421875, | |
| "rewards/rejected": -33.47495651245117, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 5.033222591362127, | |
| "grad_norm": 0.014480801299214363, | |
| "learning_rate": 2.876714280623708e-06, | |
| "logits/chosen": -0.19559527933597565, | |
| "logits/rejected": -0.2727430462837219, | |
| "logps/chosen": -966.6165771484375, | |
| "logps/rejected": -1623.1119384765625, | |
| "loss": 0.023, | |
| "rewards/accuracies": 0.9654605388641357, | |
| "rewards/chosen": 8.72402572631836, | |
| "rewards/margins": 41.55656051635742, | |
| "rewards/rejected": -32.83253479003906, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 5.099667774086379, | |
| "grad_norm": 0.004520288668572903, | |
| "learning_rate": 2.8191041196514874e-06, | |
| "logits/chosen": -0.19683074951171875, | |
| "logits/rejected": -0.28631192445755005, | |
| "logps/chosen": -1009.6803588867188, | |
| "logps/rejected": -1692.927734375, | |
| "loss": 0.0359, | |
| "rewards/accuracies": 0.9484374523162842, | |
| "rewards/chosen": 8.311098098754883, | |
| "rewards/margins": 43.74188232421875, | |
| "rewards/rejected": -35.430789947509766, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 5.166112956810632, | |
| "grad_norm": 0.03138303384184837, | |
| "learning_rate": 2.761321158169134e-06, | |
| "logits/chosen": -0.21386271715164185, | |
| "logits/rejected": -0.27936917543411255, | |
| "logps/chosen": -1063.0811767578125, | |
| "logps/rejected": -1719.7852783203125, | |
| "loss": 0.0338, | |
| "rewards/accuracies": 0.953125, | |
| "rewards/chosen": 8.667400360107422, | |
| "rewards/margins": 45.44544219970703, | |
| "rewards/rejected": -36.77804183959961, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 5.232558139534884, | |
| "grad_norm": 0.05668414384126663, | |
| "learning_rate": 2.703396686669646e-06, | |
| "logits/chosen": -0.2076747715473175, | |
| "logits/rejected": -0.2838803827762604, | |
| "logps/chosen": -986.5599365234375, | |
| "logps/rejected": -1629.951416015625, | |
| "loss": 0.0339, | |
| "rewards/accuracies": 0.9515625238418579, | |
| "rewards/chosen": 8.366827011108398, | |
| "rewards/margins": 41.922176361083984, | |
| "rewards/rejected": -33.55535125732422, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 5.299003322259137, | |
| "grad_norm": 0.02641725167632103, | |
| "learning_rate": 2.6453620722761897e-06, | |
| "logits/chosen": -0.21938087046146393, | |
| "logits/rejected": -0.27865853905677795, | |
| "logps/chosen": -1019.5682373046875, | |
| "logps/rejected": -1662.2581787109375, | |
| "loss": 0.0305, | |
| "rewards/accuracies": 0.9562499523162842, | |
| "rewards/chosen": 8.933462142944336, | |
| "rewards/margins": 44.147613525390625, | |
| "rewards/rejected": -35.21416091918945, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 5.365448504983389, | |
| "grad_norm": 0.02291504666209221, | |
| "learning_rate": 2.587248741756253e-06, | |
| "logits/chosen": -0.19530998170375824, | |
| "logits/rejected": -0.2803831100463867, | |
| "logps/chosen": -1002.9342651367188, | |
| "logps/rejected": -1661.538330078125, | |
| "loss": 0.0202, | |
| "rewards/accuracies": 0.9718750715255737, | |
| "rewards/chosen": 7.737525939941406, | |
| "rewards/margins": 43.79603576660156, | |
| "rewards/rejected": -36.058509826660156, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 5.431893687707642, | |
| "grad_norm": 0.008839444257318974, | |
| "learning_rate": 2.5290881645034932e-06, | |
| "logits/chosen": -0.20212310552597046, | |
| "logits/rejected": -0.26888853311538696, | |
| "logps/chosen": -986.2118530273438, | |
| "logps/rejected": -1612.09619140625, | |
| "loss": 0.0196, | |
| "rewards/accuracies": 0.9734375476837158, | |
| "rewards/chosen": 7.7221360206604, | |
| "rewards/margins": 42.720664978027344, | |
| "rewards/rejected": -34.998531341552734, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 5.498338870431894, | |
| "grad_norm": 0.01382855698466301, | |
| "learning_rate": 2.470911835496508e-06, | |
| "logits/chosen": -0.19646425545215607, | |
| "logits/rejected": -0.2737523019313812, | |
| "logps/chosen": -1009.5712890625, | |
| "logps/rejected": -1698.4268798828125, | |
| "loss": 0.024, | |
| "rewards/accuracies": 0.9656250476837158, | |
| "rewards/chosen": 7.582469940185547, | |
| "rewards/margins": 45.48985290527344, | |
| "rewards/rejected": -37.90738296508789, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 5.5647840531561465, | |
| "grad_norm": 0.22888414561748505, | |
| "learning_rate": 2.4127512582437486e-06, | |
| "logits/chosen": -0.21698302030563354, | |
| "logits/rejected": -0.28401094675064087, | |
| "logps/chosen": -955.4769897460938, | |
| "logps/rejected": -1580.6790771484375, | |
| "loss": 0.0351, | |
| "rewards/accuracies": 0.9500000476837158, | |
| "rewards/chosen": 8.103876113891602, | |
| "rewards/margins": 41.412132263183594, | |
| "rewards/rejected": -33.308258056640625, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 5.631229235880399, | |
| "grad_norm": 0.024031994864344597, | |
| "learning_rate": 2.3546379277238107e-06, | |
| "logits/chosen": -0.2063540369272232, | |
| "logits/rejected": -0.2823134660720825, | |
| "logps/chosen": -986.14453125, | |
| "logps/rejected": -1621.3382568359375, | |
| "loss": 0.023, | |
| "rewards/accuracies": 0.9671875238418579, | |
| "rewards/chosen": 9.615578651428223, | |
| "rewards/margins": 43.22168731689453, | |
| "rewards/rejected": -33.60611343383789, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 5.6976744186046515, | |
| "grad_norm": 0.010551623068749905, | |
| "learning_rate": 2.296603313330355e-06, | |
| "logits/chosen": -0.22434450685977936, | |
| "logits/rejected": -0.28748488426208496, | |
| "logps/chosen": -952.8927612304688, | |
| "logps/rejected": -1560.9144287109375, | |
| "loss": 0.0393, | |
| "rewards/accuracies": 0.9437499642372131, | |
| "rewards/chosen": 9.026927947998047, | |
| "rewards/margins": 40.4544563293457, | |
| "rewards/rejected": -31.42752456665039, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 5.764119601328904, | |
| "grad_norm": 0.0801558867096901, | |
| "learning_rate": 2.238678841830867e-06, | |
| "logits/chosen": -0.21255464851856232, | |
| "logits/rejected": -0.28065019845962524, | |
| "logps/chosen": -972.204345703125, | |
| "logps/rejected": -1601.0872802734375, | |
| "loss": 0.0326, | |
| "rewards/accuracies": 0.9531249403953552, | |
| "rewards/chosen": 8.012575149536133, | |
| "rewards/margins": 41.84084701538086, | |
| "rewards/rejected": -33.828269958496094, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 5.8305647840531565, | |
| "grad_norm": 0.08310822397470474, | |
| "learning_rate": 2.1808958803485134e-06, | |
| "logits/chosen": -0.20110653340816498, | |
| "logits/rejected": -0.28692275285720825, | |
| "logps/chosen": -959.0699462890625, | |
| "logps/rejected": -1659.7845458984375, | |
| "loss": 0.0294, | |
| "rewards/accuracies": 0.957812488079071, | |
| "rewards/chosen": 8.209933280944824, | |
| "rewards/margins": 44.37035369873047, | |
| "rewards/rejected": -36.160423278808594, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 5.897009966777409, | |
| "grad_norm": 0.012136918492615223, | |
| "learning_rate": 2.1232857193762923e-06, | |
| "logits/chosen": -0.20550034940242767, | |
| "logits/rejected": -0.2773197889328003, | |
| "logps/chosen": -976.6769409179688, | |
| "logps/rejected": -1591.7425537109375, | |
| "loss": 0.0284, | |
| "rewards/accuracies": 0.9593750238418579, | |
| "rewards/chosen": 8.299397468566895, | |
| "rewards/margins": 42.518890380859375, | |
| "rewards/rejected": -34.21949005126953, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 5.9634551495016614, | |
| "grad_norm": 0.10679075866937637, | |
| "learning_rate": 2.0658795558326745e-06, | |
| "logits/chosen": -0.21557196974754333, | |
| "logits/rejected": -0.2806122899055481, | |
| "logps/chosen": -991.53466796875, | |
| "logps/rejected": -1629.1478271484375, | |
| "loss": 0.0316, | |
| "rewards/accuracies": 0.9546875357627869, | |
| "rewards/chosen": 8.746665000915527, | |
| "rewards/margins": 42.621334075927734, | |
| "rewards/rejected": -33.87466812133789, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 6.026578073089701, | |
| "grad_norm": 0.016590170562267303, | |
| "learning_rate": 2.0087084761679245e-06, | |
| "logits/chosen": -0.20582108199596405, | |
| "logits/rejected": -0.27399370074272156, | |
| "logps/chosen": -1017.154541015625, | |
| "logps/rejected": -1689.7642822265625, | |
| "loss": 0.0176, | |
| "rewards/accuracies": 0.9736842513084412, | |
| "rewards/chosen": 8.807604789733887, | |
| "rewards/margins": 44.51702880859375, | |
| "rewards/rejected": -35.70942306518555, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 6.093023255813954, | |
| "grad_norm": 0.02025614120066166, | |
| "learning_rate": 1.9518034395302413e-06, | |
| "logits/chosen": -0.19750240445137024, | |
| "logits/rejected": -0.2810463011264801, | |
| "logps/chosen": -990.0672607421875, | |
| "logps/rejected": -1701.806640625, | |
| "loss": 0.0273, | |
| "rewards/accuracies": 0.9609375596046448, | |
| "rewards/chosen": 9.167813301086426, | |
| "rewards/margins": 45.0367546081543, | |
| "rewards/rejected": -35.86894226074219, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 6.159468438538206, | |
| "grad_norm": 0.08283823728561401, | |
| "learning_rate": 1.895195261000831e-06, | |
| "logits/chosen": -0.20525017380714417, | |
| "logits/rejected": -0.2784285545349121, | |
| "logps/chosen": -1004.66357421875, | |
| "logps/rejected": -1686.80908203125, | |
| "loss": 0.0261, | |
| "rewards/accuracies": 0.9625000357627869, | |
| "rewards/chosen": 8.760817527770996, | |
| "rewards/margins": 44.76042556762695, | |
| "rewards/rejected": -35.99960708618164, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 6.225913621262459, | |
| "grad_norm": 0.03424767032265663, | |
| "learning_rate": 1.8389145949069953e-06, | |
| "logits/chosen": -0.20580098032951355, | |
| "logits/rejected": -0.2847464382648468, | |
| "logps/chosen": -1016.2109985351562, | |
| "logps/rejected": -1675.8634033203125, | |
| "loss": 0.0298, | |
| "rewards/accuracies": 0.9578125476837158, | |
| "rewards/chosen": 8.987253189086914, | |
| "rewards/margins": 45.16158676147461, | |
| "rewards/rejected": -36.174339294433594, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 6.292358803986711, | |
| "grad_norm": 0.08232512325048447, | |
| "learning_rate": 1.7829919182222752e-06, | |
| "logits/chosen": -0.21305060386657715, | |
| "logits/rejected": -0.278297483921051, | |
| "logps/chosen": -994.125, | |
| "logps/rejected": -1603.1475830078125, | |
| "loss": 0.0284, | |
| "rewards/accuracies": 0.9593750238418579, | |
| "rewards/chosen": 8.597787857055664, | |
| "rewards/margins": 43.382965087890625, | |
| "rewards/rejected": -34.785179138183594, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 6.358803986710964, | |
| "grad_norm": 0.11056870222091675, | |
| "learning_rate": 1.7274575140626318e-06, | |
| "logits/chosen": -0.20215469598770142, | |
| "logits/rejected": -0.2870931327342987, | |
| "logps/chosen": -979.541748046875, | |
| "logps/rejected": -1649.0330810546875, | |
| "loss": 0.0327, | |
| "rewards/accuracies": 0.9546874761581421, | |
| "rewards/chosen": 8.763008117675781, | |
| "rewards/margins": 43.981746673583984, | |
| "rewards/rejected": -35.21874237060547, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 6.425249169435216, | |
| "grad_norm": 0.17400754988193512, | |
| "learning_rate": 1.6723414552876052e-06, | |
| "logits/chosen": -0.2209278792142868, | |
| "logits/rejected": -0.29013967514038086, | |
| "logps/chosen": -1003.218505859375, | |
| "logps/rejected": -1650.18798828125, | |
| "loss": 0.0316, | |
| "rewards/accuracies": 0.9546874761581421, | |
| "rewards/chosen": 9.620824813842773, | |
| "rewards/margins": 42.64148712158203, | |
| "rewards/rejected": -33.020660400390625, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 6.4916943521594686, | |
| "grad_norm": 0.007690763566643, | |
| "learning_rate": 1.6176735882153284e-06, | |
| "logits/chosen": -0.2107953131198883, | |
| "logits/rejected": -0.28245460987091064, | |
| "logps/chosen": -986.3475341796875, | |
| "logps/rejected": -1621.1788330078125, | |
| "loss": 0.0274, | |
| "rewards/accuracies": 0.9609375, | |
| "rewards/chosen": 9.415980339050293, | |
| "rewards/margins": 43.30890655517578, | |
| "rewards/rejected": -33.89292526245117, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 6.558139534883721, | |
| "grad_norm": 0.007917268201708794, | |
| "learning_rate": 1.56348351646022e-06, | |
| "logits/chosen": -0.21639874577522278, | |
| "logits/rejected": -0.28619349002838135, | |
| "logps/chosen": -996.1339111328125, | |
| "logps/rejected": -1701.57275390625, | |
| "loss": 0.0326, | |
| "rewards/accuracies": 0.953125, | |
| "rewards/chosen": 8.72823715209961, | |
| "rewards/margins": 45.77238082885742, | |
| "rewards/rejected": -37.04414367675781, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 6.6245847176079735, | |
| "grad_norm": 0.009819849394261837, | |
| "learning_rate": 1.509800584902108e-06, | |
| "logits/chosen": -0.20866619050502777, | |
| "logits/rejected": -0.2797132730484009, | |
| "logps/chosen": -966.6695556640625, | |
| "logps/rejected": -1632.124267578125, | |
| "loss": 0.025, | |
| "rewards/accuracies": 0.9640624523162842, | |
| "rewards/chosen": 8.59074878692627, | |
| "rewards/margins": 44.0283317565918, | |
| "rewards/rejected": -35.437583923339844, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 6.691029900332226, | |
| "grad_norm": 0.004776698537170887, | |
| "learning_rate": 1.4566538637954556e-06, | |
| "logits/chosen": -0.20434224605560303, | |
| "logits/rejected": -0.2861843407154083, | |
| "logps/chosen": -919.8916625976562, | |
| "logps/rejected": -1539.8953857421875, | |
| "loss": 0.0229, | |
| "rewards/accuracies": 0.9671874642372131, | |
| "rewards/chosen": 8.691563606262207, | |
| "rewards/margins": 40.7747917175293, | |
| "rewards/rejected": -32.083229064941406, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 6.7574750830564785, | |
| "grad_norm": 0.00468341913074255, | |
| "learning_rate": 1.4040721330273063e-06, | |
| "logits/chosen": -0.22420468926429749, | |
| "logits/rejected": -0.28962579369544983, | |
| "logps/chosen": -963.7784423828125, | |
| "logps/rejected": -1591.2889404296875, | |
| "loss": 0.0434, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": 8.361918449401855, | |
| "rewards/margins": 43.0437126159668, | |
| "rewards/rejected": -34.68179702758789, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 6.823920265780731, | |
| "grad_norm": 0.04172283038496971, | |
| "learning_rate": 1.3520838665324704e-06, | |
| "logits/chosen": -0.20492787659168243, | |
| "logits/rejected": -0.2808915078639984, | |
| "logps/chosen": -975.69921875, | |
| "logps/rejected": -1621.455322265625, | |
| "loss": 0.0295, | |
| "rewards/accuracies": 0.957812488079071, | |
| "rewards/chosen": 8.498809814453125, | |
| "rewards/margins": 43.53432846069336, | |
| "rewards/rejected": -35.035518646240234, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 6.8903654485049834, | |
| "grad_norm": 0.014652963727712631, | |
| "learning_rate": 1.3007172168743854e-06, | |
| "logits/chosen": -0.2149275690317154, | |
| "logits/rejected": -0.28474923968315125, | |
| "logps/chosen": -996.0318603515625, | |
| "logps/rejected": -1641.5811767578125, | |
| "loss": 0.0273, | |
| "rewards/accuracies": 0.9609375, | |
| "rewards/chosen": 9.519927024841309, | |
| "rewards/margins": 42.85387420654297, | |
| "rewards/rejected": -33.33395004272461, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 6.956810631229236, | |
| "grad_norm": 0.058657530695199966, | |
| "learning_rate": 1.2500000000000007e-06, | |
| "logits/chosen": -0.19909979403018951, | |
| "logits/rejected": -0.2798536419868469, | |
| "logps/chosen": -1014.5499877929688, | |
| "logps/rejected": -1707.7406005859375, | |
| "loss": 0.024, | |
| "rewards/accuracies": 0.965624988079071, | |
| "rewards/chosen": 9.100263595581055, | |
| "rewards/margins": 45.508270263671875, | |
| "rewards/rejected": -36.40800857543945, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 7.019933554817276, | |
| "grad_norm": 0.08728719502687454, | |
| "learning_rate": 1.1999596801769617e-06, | |
| "logits/chosen": -0.23542040586471558, | |
| "logits/rejected": -0.28797173500061035, | |
| "logps/chosen": -1009.5910034179688, | |
| "logps/rejected": -1648.842529296875, | |
| "loss": 0.0337, | |
| "rewards/accuracies": 0.9490132331848145, | |
| "rewards/chosen": 9.306107521057129, | |
| "rewards/margins": 43.96856689453125, | |
| "rewards/rejected": -34.66246032714844, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 7.086378737541528, | |
| "grad_norm": 0.02003307454288006, | |
| "learning_rate": 1.1506233551212186e-06, | |
| "logits/chosen": -0.20915324985980988, | |
| "logits/rejected": -0.2867971360683441, | |
| "logps/chosen": -1014.2936401367188, | |
| "logps/rejected": -1682.6341552734375, | |
| "loss": 0.024, | |
| "rewards/accuracies": 0.965624988079071, | |
| "rewards/chosen": 9.585081100463867, | |
| "rewards/margins": 44.993507385253906, | |
| "rewards/rejected": -35.408424377441406, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 7.152823920265781, | |
| "grad_norm": 0.006420216057449579, | |
| "learning_rate": 1.1020177413231334e-06, | |
| "logits/chosen": -0.20691779255867004, | |
| "logits/rejected": -0.2857610583305359, | |
| "logps/chosen": -972.8970947265625, | |
| "logps/rejected": -1629.0289306640625, | |
| "loss": 0.0304, | |
| "rewards/accuracies": 0.956250011920929, | |
| "rewards/chosen": 8.977264404296875, | |
| "rewards/margins": 43.42811584472656, | |
| "rewards/rejected": -34.45084762573242, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 7.219269102990033, | |
| "grad_norm": 0.02167355827987194, | |
| "learning_rate": 1.0541691595800338e-06, | |
| "logits/chosen": -0.20276153087615967, | |
| "logits/rejected": -0.2794567942619324, | |
| "logps/chosen": -1003.0272216796875, | |
| "logps/rejected": -1687.5604248046875, | |
| "loss": 0.0348, | |
| "rewards/accuracies": 0.949999988079071, | |
| "rewards/chosen": 8.437175750732422, | |
| "rewards/margins": 46.04411315917969, | |
| "rewards/rejected": -37.60693359375, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 7.285714285714286, | |
| "grad_norm": 0.05616578087210655, | |
| "learning_rate": 1.0071035207430352e-06, | |
| "logits/chosen": -0.19716766476631165, | |
| "logits/rejected": -0.28074419498443604, | |
| "logps/chosen": -981.2333374023438, | |
| "logps/rejected": -1657.1065673828125, | |
| "loss": 0.0262, | |
| "rewards/accuracies": 0.9624999761581421, | |
| "rewards/chosen": 8.663593292236328, | |
| "rewards/margins": 45.19432067871094, | |
| "rewards/rejected": -36.530723571777344, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 7.352159468438538, | |
| "grad_norm": 0.01851947233080864, | |
| "learning_rate": 9.608463116858544e-07, | |
| "logits/chosen": -0.2091461569070816, | |
| "logits/rejected": -0.2789131700992584, | |
| "logps/chosen": -1033.9593505859375, | |
| "logps/rejected": -1729.9864501953125, | |
| "loss": 0.0217, | |
| "rewards/accuracies": 0.96875, | |
| "rewards/chosen": 9.394617080688477, | |
| "rewards/margins": 47.00334930419922, | |
| "rewards/rejected": -37.608734130859375, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 7.4186046511627906, | |
| "grad_norm": 0.005292957648634911, | |
| "learning_rate": 9.154225815032242e-07, | |
| "logits/chosen": -0.2048484981060028, | |
| "logits/rejected": -0.28349366784095764, | |
| "logps/chosen": -944.3374633789062, | |
| "logps/rejected": -1572.23681640625, | |
| "loss": 0.0284, | |
| "rewards/accuracies": 0.9593749642372131, | |
| "rewards/chosen": 9.14864444732666, | |
| "rewards/margins": 42.550880432128906, | |
| "rewards/rejected": -33.40223693847656, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 7.485049833887043, | |
| "grad_norm": 0.026964331045746803, | |
| "learning_rate": 8.708569279463622e-07, | |
| "logits/chosen": -0.20627908408641815, | |
| "logits/rejected": -0.2784706652164459, | |
| "logps/chosen": -964.9845581054688, | |
| "logps/rejected": -1633.6651611328125, | |
| "loss": 0.0273, | |
| "rewards/accuracies": 0.9609375, | |
| "rewards/chosen": 8.720355987548828, | |
| "rewards/margins": 44.15267562866211, | |
| "rewards/rejected": -35.43231964111328, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 7.5514950166112955, | |
| "grad_norm": 0.0011573478113859892, | |
| "learning_rate": 8.271734841028553e-07, | |
| "logits/chosen": -0.21377238631248474, | |
| "logits/rejected": -0.2910877466201782, | |
| "logps/chosen": -972.2564086914062, | |
| "logps/rejected": -1676.005126953125, | |
| "loss": 0.0305, | |
| "rewards/accuracies": 0.9578125476837158, | |
| "rewards/chosen": 9.1951322555542, | |
| "rewards/margins": 45.21895980834961, | |
| "rewards/rejected": -36.02383041381836, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 7.617940199335548, | |
| "grad_norm": 0.028651030734181404, | |
| "learning_rate": 7.843959053281663e-07, | |
| "logits/chosen": -0.217359721660614, | |
| "logits/rejected": -0.2867337465286255, | |
| "logps/chosen": -986.8521118164062, | |
| "logps/rejected": -1608.333740234375, | |
| "loss": 0.024, | |
| "rewards/accuracies": 0.965624988079071, | |
| "rewards/chosen": 8.933571815490723, | |
| "rewards/margins": 43.58317947387695, | |
| "rewards/rejected": -34.64960861206055, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 7.6843853820598005, | |
| "grad_norm": 0.021432682871818542, | |
| "learning_rate": 7.425473564358457e-07, | |
| "logits/chosen": -0.21459349989891052, | |
| "logits/rejected": -0.2918223738670349, | |
| "logps/chosen": -950.2572021484375, | |
| "logps/rejected": -1665.85546875, | |
| "loss": 0.0348, | |
| "rewards/accuracies": 0.9499999284744263, | |
| "rewards/chosen": 8.907698631286621, | |
| "rewards/margins": 44.95459747314453, | |
| "rewards/rejected": -36.04690170288086, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 7.750830564784053, | |
| "grad_norm": 0.0003203708620276302, | |
| "learning_rate": 7.016504991533727e-07, | |
| "logits/chosen": -0.2105288803577423, | |
| "logits/rejected": -0.2897786796092987, | |
| "logps/chosen": -1022.5786743164062, | |
| "logps/rejected": -1684.5330810546875, | |
| "loss": 0.0273, | |
| "rewards/accuracies": 0.9609375, | |
| "rewards/chosen": 8.936477661132812, | |
| "rewards/margins": 44.89566421508789, | |
| "rewards/rejected": -35.95918655395508, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 7.8172757475083055, | |
| "grad_norm": 0.04623947665095329, | |
| "learning_rate": 6.617274798504286e-07, | |
| "logits/chosen": -0.22387103736400604, | |
| "logits/rejected": -0.28619539737701416, | |
| "logps/chosen": -971.675048828125, | |
| "logps/rejected": -1651.590087890625, | |
| "loss": 0.038, | |
| "rewards/accuracies": 0.9468749761581421, | |
| "rewards/chosen": 8.927178382873535, | |
| "rewards/margins": 44.58891677856445, | |
| "rewards/rejected": -35.661739349365234, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 7.883720930232558, | |
| "grad_norm": 0.1079607829451561, | |
| "learning_rate": 6.227999175462521e-07, | |
| "logits/chosen": -0.2090545892715454, | |
| "logits/rejected": -0.2881450355052948, | |
| "logps/chosen": -952.6052856445312, | |
| "logps/rejected": -1569.903076171875, | |
| "loss": 0.03, | |
| "rewards/accuracies": 0.957812488079071, | |
| "rewards/chosen": 9.027518272399902, | |
| "rewards/margins": 41.79720687866211, | |
| "rewards/rejected": -32.76968765258789, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 7.95016611295681, | |
| "grad_norm": 0.0075605567544698715, | |
| "learning_rate": 5.848888922025553e-07, | |
| "logits/chosen": -0.21150845289230347, | |
| "logits/rejected": -0.28404462337493896, | |
| "logps/chosen": -996.9921875, | |
| "logps/rejected": -1636.0484619140625, | |
| "loss": 0.0282, | |
| "rewards/accuracies": 0.9593750238418579, | |
| "rewards/chosen": 9.349788665771484, | |
| "rewards/margins": 43.83129119873047, | |
| "rewards/rejected": -34.481502532958984, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 8.013289036544851, | |
| "grad_norm": 0.10174239426851273, | |
| "learning_rate": 5.48014933308352e-07, | |
| "logits/chosen": -0.23533087968826294, | |
| "logits/rejected": -0.28401005268096924, | |
| "logps/chosen": -979.27587890625, | |
| "logps/rejected": -1561.8616943359375, | |
| "loss": 0.0337, | |
| "rewards/accuracies": 0.9490132331848145, | |
| "rewards/chosen": 9.154387474060059, | |
| "rewards/margins": 42.322410583496094, | |
| "rewards/rejected": -33.16802215576172, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 8.079734219269103, | |
| "grad_norm": 0.037876877933740616, | |
| "learning_rate": 5.121980087628802e-07, | |
| "logits/chosen": -0.21666212379932404, | |
| "logits/rejected": -0.28196656703948975, | |
| "logps/chosen": -1002.8568725585938, | |
| "logps/rejected": -1634.5693359375, | |
| "loss": 0.0304, | |
| "rewards/accuracies": 0.9562499523162842, | |
| "rewards/chosen": 9.121562957763672, | |
| "rewards/margins": 44.36361312866211, | |
| "rewards/rejected": -35.24205017089844, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 8.146179401993356, | |
| "grad_norm": 0.10557155311107635, | |
| "learning_rate": 4.774575140626317e-07, | |
| "logits/chosen": -0.19356323778629303, | |
| "logits/rejected": -0.27849042415618896, | |
| "logps/chosen": -967.4945068359375, | |
| "logps/rejected": -1631.644775390625, | |
| "loss": 0.0229, | |
| "rewards/accuracies": 0.9671875238418579, | |
| "rewards/chosen": 8.805334091186523, | |
| "rewards/margins": 43.998016357421875, | |
| "rewards/rejected": -35.19268035888672, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 8.212624584717608, | |
| "grad_norm": 0.022014953196048737, | |
| "learning_rate": 4.438122617983442e-07, | |
| "logits/chosen": -0.21807442605495453, | |
| "logits/rejected": -0.28545916080474854, | |
| "logps/chosen": -999.7384033203125, | |
| "logps/rejected": -1630.1734619140625, | |
| "loss": 0.0283, | |
| "rewards/accuracies": 0.9593749642372131, | |
| "rewards/chosen": 9.582186698913574, | |
| "rewards/margins": 44.091854095458984, | |
| "rewards/rejected": -34.509674072265625, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 8.279069767441861, | |
| "grad_norm": 0.007186461240053177, | |
| "learning_rate": 4.1128047146765936e-07, | |
| "logits/chosen": -0.2181546837091446, | |
| "logits/rejected": -0.28165513277053833, | |
| "logps/chosen": -1004.7896118164062, | |
| "logps/rejected": -1676.617431640625, | |
| "loss": 0.0263, | |
| "rewards/accuracies": 0.9624999761581421, | |
| "rewards/chosen": 9.609552383422852, | |
| "rewards/margins": 45.283634185791016, | |
| "rewards/rejected": -35.67407989501953, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 8.345514950166113, | |
| "grad_norm": 0.05021677538752556, | |
| "learning_rate": 3.798797596089351e-07, | |
| "logits/chosen": -0.2084813416004181, | |
| "logits/rejected": -0.2937774360179901, | |
| "logps/chosen": -988.1944580078125, | |
| "logps/rejected": -1683.8162841796875, | |
| "loss": 0.0294, | |
| "rewards/accuracies": 0.957812488079071, | |
| "rewards/chosen": 8.989810943603516, | |
| "rewards/margins": 45.19856262207031, | |
| "rewards/rejected": -36.20874786376953, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 8.411960132890366, | |
| "grad_norm": 0.025954890996217728, | |
| "learning_rate": 3.4962713026158697e-07, | |
| "logits/chosen": -0.20687171816825867, | |
| "logits/rejected": -0.28650417923927307, | |
| "logps/chosen": -1004.6787109375, | |
| "logps/rejected": -1670.76513671875, | |
| "loss": 0.0305, | |
| "rewards/accuracies": 0.956250011920929, | |
| "rewards/chosen": 9.011610984802246, | |
| "rewards/margins": 44.895050048828125, | |
| "rewards/rejected": -35.88343811035156, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 8.478405315614618, | |
| "grad_norm": 0.0554281584918499, | |
| "learning_rate": 3.2053896575809426e-07, | |
| "logits/chosen": -0.2132522314786911, | |
| "logits/rejected": -0.28568530082702637, | |
| "logps/chosen": -999.4458618164062, | |
| "logps/rejected": -1643.4739990234375, | |
| "loss": 0.0336, | |
| "rewards/accuracies": 0.9515625238418579, | |
| "rewards/chosen": 8.973052978515625, | |
| "rewards/margins": 44.0505485534668, | |
| "rewards/rejected": -35.07749938964844, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 8.544850498338871, | |
| "grad_norm": 0.022469861432909966, | |
| "learning_rate": 2.9263101785268253e-07, | |
| "logits/chosen": -0.20100554823875427, | |
| "logits/rejected": -0.2842293679714203, | |
| "logps/chosen": -990.7542724609375, | |
| "logps/rejected": -1718.5325927734375, | |
| "loss": 0.0317, | |
| "rewards/accuracies": 0.9546875357627869, | |
| "rewards/chosen": 9.08223819732666, | |
| "rewards/margins": 47.4109001159668, | |
| "rewards/rejected": -38.32866668701172, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 8.611295681063122, | |
| "grad_norm": 0.026393350213766098, | |
| "learning_rate": 2.6591839919146963e-07, | |
| "logits/chosen": -0.226187601685524, | |
| "logits/rejected": -0.29070550203323364, | |
| "logps/chosen": -976.7939453125, | |
| "logps/rejected": -1631.8603515625, | |
| "loss": 0.0337, | |
| "rewards/accuracies": 0.9515624642372131, | |
| "rewards/chosen": 9.085509300231934, | |
| "rewards/margins": 43.998069763183594, | |
| "rewards/rejected": -34.91256332397461, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 8.677740863787376, | |
| "grad_norm": 0.0665309801697731, | |
| "learning_rate": 2.404155751286988e-07, | |
| "logits/chosen": -0.19709895551204681, | |
| "logits/rejected": -0.2848767340183258, | |
| "logps/chosen": -960.6867065429688, | |
| "logps/rejected": -1658.4659423828125, | |
| "loss": 0.0217, | |
| "rewards/accuracies": 0.9687501192092896, | |
| "rewards/chosen": 8.968391418457031, | |
| "rewards/margins": 44.709556579589844, | |
| "rewards/rejected": -35.74116134643555, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 8.744186046511627, | |
| "grad_norm": 0.03218339383602142, | |
| "learning_rate": 2.1613635589349756e-07, | |
| "logits/chosen": -0.213968887925148, | |
| "logits/rejected": -0.2814236581325531, | |
| "logps/chosen": -1001.9435424804688, | |
| "logps/rejected": -1669.0272216796875, | |
| "loss": 0.0255, | |
| "rewards/accuracies": 0.964062511920929, | |
| "rewards/chosen": 8.87741756439209, | |
| "rewards/margins": 45.792850494384766, | |
| "rewards/rejected": -36.915435791015625, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 8.81063122923588, | |
| "grad_norm": 0.027269069105386734, | |
| "learning_rate": 1.9309388911139427e-07, | |
| "logits/chosen": -0.21916161477565765, | |
| "logits/rejected": -0.2914758026599884, | |
| "logps/chosen": -955.6594848632812, | |
| "logps/rejected": -1604.225341796875, | |
| "loss": 0.038, | |
| "rewards/accuracies": 0.9453125596046448, | |
| "rewards/chosen": 9.038484573364258, | |
| "rewards/margins": 44.25259017944336, | |
| "rewards/rejected": -35.21410369873047, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 8.877076411960132, | |
| "grad_norm": 0.041460830718278885, | |
| "learning_rate": 1.713006526846439e-07, | |
| "logits/chosen": -0.20554713904857635, | |
| "logits/rejected": -0.2785477638244629, | |
| "logps/chosen": -990.9168090820312, | |
| "logps/rejected": -1690.2587890625, | |
| "loss": 0.0197, | |
| "rewards/accuracies": 0.9718750715255737, | |
| "rewards/chosen": 9.279547691345215, | |
| "rewards/margins": 46.73269271850586, | |
| "rewards/rejected": -37.453147888183594, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 8.943521594684386, | |
| "grad_norm": 0.013856984674930573, | |
| "learning_rate": 1.507684480352292e-07, | |
| "logits/chosen": -0.21384954452514648, | |
| "logits/rejected": -0.29137367010116577, | |
| "logps/chosen": -947.7948608398438, | |
| "logps/rejected": -1607.2012939453125, | |
| "loss": 0.037, | |
| "rewards/accuracies": 0.948437511920929, | |
| "rewards/chosen": 8.737092971801758, | |
| "rewards/margins": 43.15813446044922, | |
| "rewards/rejected": -34.42103958129883, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 9.006644518272426, | |
| "grad_norm": 0.01655706949532032, | |
| "learning_rate": 1.31508393714177e-07, | |
| "logits/chosen": -0.22794920206069946, | |
| "logits/rejected": -0.28400349617004395, | |
| "logps/chosen": -989.8770141601562, | |
| "logps/rejected": -1598.807373046875, | |
| "loss": 0.0293, | |
| "rewards/accuracies": 0.955592155456543, | |
| "rewards/chosen": 9.594341278076172, | |
| "rewards/margins": 43.53114318847656, | |
| "rewards/rejected": -33.936805725097656, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 9.073089700996677, | |
| "grad_norm": 0.017982784658670425, | |
| "learning_rate": 1.1353091938067024e-07, | |
| "logits/chosen": -0.22623415291309357, | |
| "logits/rejected": -0.29436349868774414, | |
| "logps/chosen": -972.5148315429688, | |
| "logps/rejected": -1602.6168212890625, | |
| "loss": 0.0402, | |
| "rewards/accuracies": 0.942187488079071, | |
| "rewards/chosen": 9.074629783630371, | |
| "rewards/margins": 43.110740661621094, | |
| "rewards/rejected": -34.036109924316406, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 9.13953488372093, | |
| "grad_norm": 0.004334774799644947, | |
| "learning_rate": 9.684576015420277e-08, | |
| "logits/chosen": -0.22325854003429413, | |
| "logits/rejected": -0.28321751952171326, | |
| "logps/chosen": -982.11572265625, | |
| "logps/rejected": -1638.993896484375, | |
| "loss": 0.0272, | |
| "rewards/accuracies": 0.9609375596046448, | |
| "rewards/chosen": 9.52658462524414, | |
| "rewards/margins": 44.85720443725586, | |
| "rewards/rejected": -35.33061599731445, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 9.205980066445182, | |
| "grad_norm": 0.018007798120379448, | |
| "learning_rate": 8.146195134284052e-08, | |
| "logits/chosen": -0.20939311385154724, | |
| "logits/rejected": -0.28719571232795715, | |
| "logps/chosen": -977.8973999023438, | |
| "logps/rejected": -1680.6883544921875, | |
| "loss": 0.0261, | |
| "rewards/accuracies": 0.9625000953674316, | |
| "rewards/chosen": 9.223061561584473, | |
| "rewards/margins": 44.93825912475586, | |
| "rewards/rejected": -35.71520233154297, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 9.272425249169435, | |
| "grad_norm": 0.11176791042089462, | |
| "learning_rate": 6.738782355044048e-08, | |
| "logits/chosen": -0.2103874534368515, | |
| "logits/rejected": -0.28649529814720154, | |
| "logps/chosen": -950.8549194335938, | |
| "logps/rejected": -1609.9903564453125, | |
| "loss": 0.0316, | |
| "rewards/accuracies": 0.9546874761581421, | |
| "rewards/chosen": 8.95008659362793, | |
| "rewards/margins": 43.54734802246094, | |
| "rewards/rejected": -34.597259521484375, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 9.338870431893687, | |
| "grad_norm": 0.027401011437177658, | |
| "learning_rate": 5.463099816548578e-08, | |
| "logits/chosen": -0.21140608191490173, | |
| "logits/rejected": -0.27913111448287964, | |
| "logps/chosen": -1046.7650146484375, | |
| "logps/rejected": -1688.9732666015625, | |
| "loss": 0.0186, | |
| "rewards/accuracies": 0.973437488079071, | |
| "rewards/chosen": 9.671619415283203, | |
| "rewards/margins": 46.846065521240234, | |
| "rewards/rejected": -37.1744499206543, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 9.40531561461794, | |
| "grad_norm": 0.0032386966049671173, | |
| "learning_rate": 4.319838323396691e-08, | |
| "logits/chosen": -0.20964354276657104, | |
| "logits/rejected": -0.28780320286750793, | |
| "logps/chosen": -936.599853515625, | |
| "logps/rejected": -1573.99267578125, | |
| "loss": 0.0295, | |
| "rewards/accuracies": 0.957812488079071, | |
| "rewards/chosen": 8.730453491210938, | |
| "rewards/margins": 43.16553497314453, | |
| "rewards/rejected": -34.43508529663086, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 9.471760797342192, | |
| "grad_norm": 0.004229187499731779, | |
| "learning_rate": 3.309616971855195e-08, | |
| "logits/chosen": -0.22262541949748993, | |
| "logits/rejected": -0.2844969928264618, | |
| "logps/chosen": -1004.188232421875, | |
| "logps/rejected": -1635.885009765625, | |
| "loss": 0.0293, | |
| "rewards/accuracies": 0.9593750238418579, | |
| "rewards/chosen": 9.532754898071289, | |
| "rewards/margins": 44.6163215637207, | |
| "rewards/rejected": -35.08356475830078, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 9.538205980066445, | |
| "grad_norm": 0.004731557797640562, | |
| "learning_rate": 2.4329828146074096e-08, | |
| "logits/chosen": -0.2104283571243286, | |
| "logits/rejected": -0.29052725434303284, | |
| "logps/chosen": -969.8198852539062, | |
| "logps/rejected": -1657.4737548828125, | |
| "loss": 0.0349, | |
| "rewards/accuracies": 0.949999988079071, | |
| "rewards/chosen": 8.997271537780762, | |
| "rewards/margins": 44.33870315551758, | |
| "rewards/rejected": -35.3414306640625, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 9.604651162790697, | |
| "grad_norm": 0.04136363044381142, | |
| "learning_rate": 1.6904105645142443e-08, | |
| "logits/chosen": -0.2100651115179062, | |
| "logits/rejected": -0.2873626947402954, | |
| "logps/chosen": -951.37109375, | |
| "logps/rejected": -1638.732666015625, | |
| "loss": 0.0298, | |
| "rewards/accuracies": 0.957812488079071, | |
| "rewards/chosen": 8.915160179138184, | |
| "rewards/margins": 44.380455017089844, | |
| "rewards/rejected": -35.46529769897461, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 9.67109634551495, | |
| "grad_norm": 0.02028900943696499, | |
| "learning_rate": 1.0823023375489128e-08, | |
| "logits/chosen": -0.20784109830856323, | |
| "logits/rejected": -0.2790369689464569, | |
| "logps/chosen": -1006.0265502929688, | |
| "logps/rejected": -1686.586181640625, | |
| "loss": 0.0207, | |
| "rewards/accuracies": 0.9703124761581421, | |
| "rewards/chosen": 9.536187171936035, | |
| "rewards/margins": 46.156455993652344, | |
| "rewards/rejected": -36.62027359008789, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 9.737541528239202, | |
| "grad_norm": 0.0592665858566761, | |
| "learning_rate": 6.089874350439507e-09, | |
| "logits/chosen": -0.20697462558746338, | |
| "logits/rejected": -0.2866511642932892, | |
| "logps/chosen": -991.3517456054688, | |
| "logps/rejected": -1683.9049072265625, | |
| "loss": 0.0338, | |
| "rewards/accuracies": 0.9515625238418579, | |
| "rewards/chosen": 8.75904369354248, | |
| "rewards/margins": 46.561065673828125, | |
| "rewards/rejected": -37.80202865600586, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 9.803986710963455, | |
| "grad_norm": 0.10093259811401367, | |
| "learning_rate": 2.7072216536885855e-09, | |
| "logits/chosen": -0.21343420445919037, | |
| "logits/rejected": -0.29003995656967163, | |
| "logps/chosen": -978.60205078125, | |
| "logps/rejected": -1612.8367919921875, | |
| "loss": 0.0359, | |
| "rewards/accuracies": 0.9484375715255737, | |
| "rewards/chosen": 8.994418144226074, | |
| "rewards/margins": 43.18799591064453, | |
| "rewards/rejected": -34.193572998046875, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 9.870431893687707, | |
| "grad_norm": 0.0005512971547432244, | |
| "learning_rate": 6.768970513457151e-10, | |
| "logits/chosen": -0.20138706266880035, | |
| "logits/rejected": -0.2804562747478485, | |
| "logps/chosen": -1005.9734497070312, | |
| "logps/rejected": -1714.1412353515625, | |
| "loss": 0.0197, | |
| "rewards/accuracies": 0.971875011920929, | |
| "rewards/chosen": 8.944713592529297, | |
| "rewards/margins": 46.28833770751953, | |
| "rewards/rejected": -37.343624114990234, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 9.93687707641196, | |
| "grad_norm": 0.09091484546661377, | |
| "learning_rate": 0.0, | |
| "logits/chosen": -0.21035358309745789, | |
| "logits/rejected": -0.2859784662723541, | |
| "logps/chosen": -1002.4822387695312, | |
| "logps/rejected": -1666.3056640625, | |
| "loss": 0.0272, | |
| "rewards/accuracies": 0.9609375, | |
| "rewards/chosen": 9.086991310119629, | |
| "rewards/margins": 45.62021255493164, | |
| "rewards/rejected": -36.53321838378906, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 9.93687707641196, | |
| "step": 1500, | |
| "total_flos": 2.9219422466201354e+19, | |
| "train_loss": 0.07423472365736962, | |
| "train_runtime": 24807.8996, | |
| "train_samples_per_second": 3.881, | |
| "train_steps_per_second": 0.06 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 1500, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 10, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.9219422466201354e+19, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |