| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.9933065595716197, |
| "eval_steps": 500, |
| "global_step": 1119, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.013386880856760375, |
| "grad_norm": 48.32637023925781, |
| "learning_rate": 1.3879733999999997e-06, |
| "logits/chosen": -2.7358782291412354, |
| "logits/rejected": -2.8863089084625244, |
| "logps/chosen": -154.43099975585938, |
| "logps/rejected": -76.9035873413086, |
| "loss": 0.6782, |
| "rewards/accuracies": 0.44999998807907104, |
| "rewards/chosen": 0.03575515002012253, |
| "rewards/margins": 0.0314163900911808, |
| "rewards/rejected": 0.004338760394603014, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.02677376171352075, |
| "grad_norm": 18.94371223449707, |
| "learning_rate": 3.122940149999999e-06, |
| "logits/chosen": -2.7054200172424316, |
| "logits/rejected": -2.8710813522338867, |
| "logps/chosen": -155.5043182373047, |
| "logps/rejected": -79.5620346069336, |
| "loss": 0.4295, |
| "rewards/accuracies": 0.9750000238418579, |
| "rewards/chosen": 0.7799822688102722, |
| "rewards/margins": 0.7580081224441528, |
| "rewards/rejected": 0.021974176168441772, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.040160642570281124, |
| "grad_norm": 5.264588356018066, |
| "learning_rate": 4.8579069e-06, |
| "logits/chosen": -2.6499576568603516, |
| "logits/rejected": -2.841108798980713, |
| "logps/chosen": -137.66973876953125, |
| "logps/rejected": -80.86649322509766, |
| "loss": 0.0953, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.818009376525879, |
| "rewards/margins": 3.042386770248413, |
| "rewards/rejected": -0.22437739372253418, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.0535475234270415, |
| "grad_norm": 1.9624146223068237, |
| "learning_rate": 6.592873649999998e-06, |
| "logits/chosen": -2.5176243782043457, |
| "logits/rejected": -2.8056235313415527, |
| "logps/chosen": -112.12801361083984, |
| "logps/rejected": -94.06233215332031, |
| "loss": 0.0214, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.590962886810303, |
| "rewards/margins": 6.781089782714844, |
| "rewards/rejected": -1.1901264190673828, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.06693440428380187, |
| "grad_norm": 0.09991835057735443, |
| "learning_rate": 8.3278404e-06, |
| "logits/chosen": -2.3332083225250244, |
| "logits/rejected": -2.6696295738220215, |
| "logps/chosen": -97.1298828125, |
| "logps/rejected": -108.11856842041016, |
| "loss": 0.0007, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 8.456403732299805, |
| "rewards/margins": 11.336581230163574, |
| "rewards/rejected": -2.880176544189453, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.08032128514056225, |
| "grad_norm": 0.006451677531003952, |
| "learning_rate": 1.006280715e-05, |
| "logits/chosen": -2.206895589828491, |
| "logits/rejected": -2.462498426437378, |
| "logps/chosen": -80.75566101074219, |
| "logps/rejected": -129.00411987304688, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 8.789403915405273, |
| "rewards/margins": 13.747503280639648, |
| "rewards/rejected": -4.958100318908691, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.09370816599732262, |
| "grad_norm": 0.0004014262813143432, |
| "learning_rate": 1.1797773899999998e-05, |
| "logits/chosen": -2.10058331489563, |
| "logits/rejected": -2.342927932739258, |
| "logps/chosen": -79.42524719238281, |
| "logps/rejected": -143.72073364257812, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.209558486938477, |
| "rewards/margins": 15.470553398132324, |
| "rewards/rejected": -6.260995864868164, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.107095046854083, |
| "grad_norm": 0.00023443216923624277, |
| "learning_rate": 1.2144461232143962e-05, |
| "logits/chosen": -2.0052661895751953, |
| "logits/rejected": -2.1909799575805664, |
| "logps/chosen": -68.39270782470703, |
| "logps/rejected": -153.67198181152344, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.552885055541992, |
| "rewards/margins": 17.000492095947266, |
| "rewards/rejected": -7.447608947753906, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.12048192771084337, |
| "grad_norm": 0.009947865270078182, |
| "learning_rate": 1.214321810508581e-05, |
| "logits/chosen": -1.9368336200714111, |
| "logits/rejected": -2.0944368839263916, |
| "logps/chosen": -77.8448486328125, |
| "logps/rejected": -161.29324340820312, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.328283309936523, |
| "rewards/margins": 18.502532958984375, |
| "rewards/rejected": -8.174247741699219, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.13386880856760375, |
| "grad_norm": 0.0006397409015335143, |
| "learning_rate": 1.2141019003537938e-05, |
| "logits/chosen": -1.9432464838027954, |
| "logits/rejected": -1.997998833656311, |
| "logps/chosen": -67.01065063476562, |
| "logps/rejected": -168.06625366210938, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.874837875366211, |
| "rewards/margins": 18.738964080810547, |
| "rewards/rejected": -8.864126205444336, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.14725568942436412, |
| "grad_norm": 3.562494021025486e-05, |
| "learning_rate": 1.2137864389263077e-05, |
| "logits/chosen": -1.8939683437347412, |
| "logits/rejected": -1.9639301300048828, |
| "logps/chosen": -69.45832061767578, |
| "logps/rejected": -175.5773162841797, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.973077774047852, |
| "rewards/margins": 19.39337921142578, |
| "rewards/rejected": -9.420300483703613, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.1606425702811245, |
| "grad_norm": 0.00018317776266485453, |
| "learning_rate": 1.213375492466051e-05, |
| "logits/chosen": -1.8978850841522217, |
| "logits/rejected": -1.9658511877059937, |
| "logps/chosen": -61.8738899230957, |
| "logps/rejected": -180.20217895507812, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.461588859558105, |
| "rewards/margins": 19.28191375732422, |
| "rewards/rejected": -9.82032585144043, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.17402945113788487, |
| "grad_norm": 0.0010487588588148355, |
| "learning_rate": 1.2128691472626986e-05, |
| "logits/chosen": -1.9120140075683594, |
| "logits/rejected": -1.9368822574615479, |
| "logps/chosen": -61.631805419921875, |
| "logps/rejected": -174.88168334960938, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.515580177307129, |
| "rewards/margins": 19.191221237182617, |
| "rewards/rejected": -9.675638198852539, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.18741633199464525, |
| "grad_norm": 0.0005829242873005569, |
| "learning_rate": 1.2122675096375539e-05, |
| "logits/chosen": -1.8962472677230835, |
| "logits/rejected": -1.8821055889129639, |
| "logps/chosen": -67.1492691040039, |
| "logps/rejected": -182.51010131835938, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.898228645324707, |
| "rewards/margins": 20.0985107421875, |
| "rewards/rejected": -10.200281143188477, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.20080321285140562, |
| "grad_norm": 0.0001830089750001207, |
| "learning_rate": 1.2115707059212225e-05, |
| "logits/chosen": -1.9109745025634766, |
| "logits/rejected": -1.8918424844741821, |
| "logps/chosen": -60.247169494628906, |
| "logps/rejected": -170.59629821777344, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.591668128967285, |
| "rewards/margins": 19.038829803466797, |
| "rewards/rejected": -9.447163581848145, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.214190093708166, |
| "grad_norm": 3.0526671253028326e-06, |
| "learning_rate": 1.2107788824270861e-05, |
| "logits/chosen": -1.9272515773773193, |
| "logits/rejected": -1.94720458984375, |
| "logps/chosen": -58.57343673706055, |
| "logps/rejected": -181.34176635742188, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.232949256896973, |
| "rewards/margins": 19.28238296508789, |
| "rewards/rejected": -10.049432754516602, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.22757697456492637, |
| "grad_norm": 0.0006005926989018917, |
| "learning_rate": 1.2098922054205801e-05, |
| "logits/chosen": -1.9209476709365845, |
| "logits/rejected": -1.8958288431167603, |
| "logps/chosen": -58.07838821411133, |
| "logps/rejected": -187.4013671875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 8.958385467529297, |
| "rewards/margins": 19.543930053710938, |
| "rewards/rejected": -10.585546493530273, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.24096385542168675, |
| "grad_norm": 1.353817879135022e-05, |
| "learning_rate": 1.208910861084281e-05, |
| "logits/chosen": -1.8882849216461182, |
| "logits/rejected": -1.8732059001922607, |
| "logps/chosen": -70.58480834960938, |
| "logps/rejected": -178.2596435546875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.439764022827148, |
| "rewards/margins": 20.539878845214844, |
| "rewards/rejected": -10.100113868713379, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.2543507362784471, |
| "grad_norm": 0.00023403888917528093, |
| "learning_rate": 1.207835055478813e-05, |
| "logits/chosen": -1.8713138103485107, |
| "logits/rejected": -1.8445053100585938, |
| "logps/chosen": -72.9098892211914, |
| "logps/rejected": -176.0099334716797, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.652149200439453, |
| "rewards/margins": 20.62753677368164, |
| "rewards/rejected": -9.975388526916504, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.2677376171352075, |
| "grad_norm": 0.0002160475414711982, |
| "learning_rate": 1.2066650144995788e-05, |
| "logits/chosen": -1.8813066482543945, |
| "logits/rejected": -1.842013955116272, |
| "logps/chosen": -63.20849609375, |
| "logps/rejected": -184.35964965820312, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.75583553314209, |
| "rewards/margins": 20.272876739501953, |
| "rewards/rejected": -10.517043113708496, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.28112449799196787, |
| "grad_norm": 1.5363968486781232e-05, |
| "learning_rate": 1.2054009838293278e-05, |
| "logits/chosen": -1.9106378555297852, |
| "logits/rejected": -1.8844468593597412, |
| "logps/chosen": -57.907188415527344, |
| "logps/rejected": -182.01736450195312, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.319896697998047, |
| "rewards/margins": 19.627681732177734, |
| "rewards/rejected": -10.307784080505371, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.29451137884872824, |
| "grad_norm": 0.00032003922387957573, |
| "learning_rate": 1.2040432288865665e-05, |
| "logits/chosen": -1.8974215984344482, |
| "logits/rejected": -1.914280891418457, |
| "logps/chosen": -61.646949768066406, |
| "logps/rejected": -186.77334594726562, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.848616600036621, |
| "rewards/margins": 20.335248947143555, |
| "rewards/rejected": -10.486631393432617, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.3078982597054886, |
| "grad_norm": 2.4666236640769057e-05, |
| "learning_rate": 1.2025920347698281e-05, |
| "logits/chosen": -1.8672893047332764, |
| "logits/rejected": -1.8574968576431274, |
| "logps/chosen": -62.46479034423828, |
| "logps/rejected": -183.5287628173828, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.873312950134277, |
| "rewards/margins": 20.34807777404785, |
| "rewards/rejected": -10.474763870239258, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.321285140562249, |
| "grad_norm": 7.831333641661331e-05, |
| "learning_rate": 1.2010477061978072e-05, |
| "logits/chosen": -1.8667519092559814, |
| "logits/rejected": -1.8458188772201538, |
| "logps/chosen": -67.73392486572266, |
| "logps/rejected": -190.04678344726562, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.298925399780273, |
| "rewards/margins": 21.361431121826172, |
| "rewards/rejected": -11.062504768371582, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.33467202141900937, |
| "grad_norm": 0.0011632780078798532, |
| "learning_rate": 1.1994105674453762e-05, |
| "logits/chosen": -1.906557321548462, |
| "logits/rejected": -1.8378311395645142, |
| "logps/chosen": -59.9715690612793, |
| "logps/rejected": -187.6858673095703, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.155950546264648, |
| "rewards/margins": 20.995595932006836, |
| "rewards/rejected": -10.839643478393555, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.34805890227576974, |
| "grad_norm": 2.287779534526635e-05, |
| "learning_rate": 1.1976809622754933e-05, |
| "logits/chosen": -1.8965215682983398, |
| "logits/rejected": -1.9305957555770874, |
| "logps/chosen": -60.8524284362793, |
| "logps/rejected": -182.12356567382812, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.748926162719727, |
| "rewards/margins": 20.00579261779785, |
| "rewards/rejected": -10.256868362426758, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.3614457831325301, |
| "grad_norm": 0.00024706361000426114, |
| "learning_rate": 1.1958592538670224e-05, |
| "logits/chosen": -1.8761208057403564, |
| "logits/rejected": -1.821118950843811, |
| "logps/chosen": -58.57818603515625, |
| "logps/rejected": -191.0096435546875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.970569610595703, |
| "rewards/margins": 20.901823043823242, |
| "rewards/rejected": -10.931253433227539, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.3748326639892905, |
| "grad_norm": 2.4137092623277567e-05, |
| "learning_rate": 1.1939458247384714e-05, |
| "logits/chosen": -1.878674864768982, |
| "logits/rejected": -1.8472044467926025, |
| "logps/chosen": -62.983184814453125, |
| "logps/rejected": -192.6395263671875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.132028579711914, |
| "rewards/margins": 21.37057113647461, |
| "rewards/rejected": -11.238546371459961, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.38821954484605087, |
| "grad_norm": 0.00031987050897441804, |
| "learning_rate": 1.191941076667672e-05, |
| "logits/chosen": -1.8975257873535156, |
| "logits/rejected": -1.8439594507217407, |
| "logps/chosen": -62.376380920410156, |
| "logps/rejected": -178.85665893554688, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.485511779785156, |
| "rewards/margins": 19.73735809326172, |
| "rewards/rejected": -10.251847267150879, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.40160642570281124, |
| "grad_norm": 3.340610419400036e-05, |
| "learning_rate": 1.1898454306074163e-05, |
| "logits/chosen": -1.8920434713363647, |
| "logits/rejected": -1.849535346031189, |
| "logps/chosen": -63.710914611816406, |
| "logps/rejected": -193.69337463378906, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.413044929504395, |
| "rewards/margins": 20.71396255493164, |
| "rewards/rejected": -11.300919532775879, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.4149933065595716, |
| "grad_norm": 0.00011380790965631604, |
| "learning_rate": 1.187659326597066e-05, |
| "logits/chosen": -1.8865476846694946, |
| "logits/rejected": -1.8176262378692627, |
| "logps/chosen": -68.44486999511719, |
| "logps/rejected": -200.52450561523438, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.273351669311523, |
| "rewards/margins": 22.084421157836914, |
| "rewards/rejected": -11.811070442199707, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.428380187416332, |
| "grad_norm": 4.0704333514440805e-05, |
| "learning_rate": 1.185383223670152e-05, |
| "logits/chosen": -1.8735504150390625, |
| "logits/rejected": -1.8823959827423096, |
| "logps/chosen": -57.306846618652344, |
| "logps/rejected": -183.9485626220703, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.857657432556152, |
| "rewards/margins": 20.344463348388672, |
| "rewards/rejected": -10.486806869506836, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.44176706827309237, |
| "grad_norm": 3.696778730954975e-05, |
| "learning_rate": 1.1830175997579895e-05, |
| "logits/chosen": -1.8666346073150635, |
| "logits/rejected": -1.8352988958358765, |
| "logps/chosen": -65.62974548339844, |
| "logps/rejected": -186.6559295654297, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.239971160888672, |
| "rewards/margins": 21.187232971191406, |
| "rewards/rejected": -10.947261810302734, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.45515394912985274, |
| "grad_norm": 2.024579771386925e-05, |
| "learning_rate": 1.1805629515893225e-05, |
| "logits/chosen": -1.8567460775375366, |
| "logits/rejected": -1.7982889413833618, |
| "logps/chosen": -63.90752029418945, |
| "logps/rejected": -192.54873657226562, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.393075942993164, |
| "rewards/margins": 21.65923500061035, |
| "rewards/rejected": -11.266157150268555, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.4685408299866131, |
| "grad_norm": 0.0002714527945499867, |
| "learning_rate": 1.1780197945860211e-05, |
| "logits/chosen": -1.8908030986785889, |
| "logits/rejected": -1.8585189580917358, |
| "logps/chosen": -58.50342559814453, |
| "logps/rejected": -189.9221649169922, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.500165939331055, |
| "rewards/margins": 20.399356842041016, |
| "rewards/rejected": -10.899189949035645, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.4819277108433735, |
| "grad_norm": 0.0004148809239268303, |
| "learning_rate": 1.1753886627548548e-05, |
| "logits/chosen": -1.8859355449676514, |
| "logits/rejected": -1.8143894672393799, |
| "logps/chosen": -66.53254699707031, |
| "logps/rejected": -196.88986206054688, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.707757949829102, |
| "rewards/margins": 21.089689254760742, |
| "rewards/rejected": -11.381932258605957, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.49531459170013387, |
| "grad_norm": 4.077264020452276e-05, |
| "learning_rate": 1.172670108575363e-05, |
| "logits/chosen": -1.88116455078125, |
| "logits/rejected": -1.8810043334960938, |
| "logps/chosen": -75.669189453125, |
| "logps/rejected": -184.03500366210938, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.555752754211426, |
| "rewards/margins": 21.035213470458984, |
| "rewards/rejected": -10.479463577270508, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.5087014725568942, |
| "grad_norm": 0.00022642931435257196, |
| "learning_rate": 1.1698647028838462e-05, |
| "logits/chosen": -1.8791462182998657, |
| "logits/rejected": -1.8246482610702515, |
| "logps/chosen": -58.291839599609375, |
| "logps/rejected": -193.2161102294922, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.682637214660645, |
| "rewards/margins": 20.842124938964844, |
| "rewards/rejected": -11.159486770629883, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.5220883534136547, |
| "grad_norm": 0.00010173048212891445, |
| "learning_rate": 1.166973034753503e-05, |
| "logits/chosen": -1.9097864627838135, |
| "logits/rejected": -1.8443893194198608, |
| "logps/chosen": -54.96189498901367, |
| "logps/rejected": -183.68154907226562, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.263029098510742, |
| "rewards/margins": 19.78293228149414, |
| "rewards/rejected": -10.519901275634766, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.535475234270415, |
| "grad_norm": 3.119400207651779e-05, |
| "learning_rate": 1.1639957113707378e-05, |
| "logits/chosen": -1.8835645914077759, |
| "logits/rejected": -1.8237943649291992, |
| "logps/chosen": -66.83441162109375, |
| "logps/rejected": -185.5270538330078, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.838064193725586, |
| "rewards/margins": 20.68081283569336, |
| "rewards/rejected": -10.842748641967773, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.5488621151271754, |
| "grad_norm": 2.8754557206411846e-05, |
| "learning_rate": 1.1609333579076652e-05, |
| "logits/chosen": -1.8577368259429932, |
| "logits/rejected": -1.8328752517700195, |
| "logps/chosen": -66.59144592285156, |
| "logps/rejected": -180.5541534423828, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.479964256286621, |
| "rewards/margins": 21.016925811767578, |
| "rewards/rejected": -10.536964416503906, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.5622489959839357, |
| "grad_norm": 0.00011976935638813302, |
| "learning_rate": 1.157786617390838e-05, |
| "logits/chosen": -1.9020277261734009, |
| "logits/rejected": -1.870615005493164, |
| "logps/chosen": -55.10723114013672, |
| "logps/rejected": -188.2975311279297, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.497312545776367, |
| "rewards/margins": 20.4838924407959, |
| "rewards/rejected": -10.986580848693848, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.5756358768406962, |
| "grad_norm": 0.0002118870906997472, |
| "learning_rate": 1.1545561505662249e-05, |
| "logits/chosen": -1.876802682876587, |
| "logits/rejected": -1.8307838439941406, |
| "logps/chosen": -69.99948120117188, |
| "logps/rejected": -189.57418823242188, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.201910018920898, |
| "rewards/margins": 21.337665557861328, |
| "rewards/rejected": -11.13575553894043, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.5890227576974565, |
| "grad_norm": 8.499900286551565e-05, |
| "learning_rate": 1.1512426357604687e-05, |
| "logits/chosen": -1.8773601055145264, |
| "logits/rejected": -1.8962678909301758, |
| "logps/chosen": -68.12947082519531, |
| "logps/rejected": -185.85067749023438, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.250208854675293, |
| "rewards/margins": 20.985477447509766, |
| "rewards/rejected": -10.735268592834473, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.6024096385542169, |
| "grad_norm": 3.3592546969885007e-05, |
| "learning_rate": 1.147846768738454e-05, |
| "logits/chosen": -1.9119741916656494, |
| "logits/rejected": -1.8050686120986938, |
| "logps/chosen": -55.8441276550293, |
| "logps/rejected": -185.8286590576172, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.300341606140137, |
| "rewards/margins": 20.14506721496582, |
| "rewards/rejected": -10.844724655151367, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.6157965194109772, |
| "grad_norm": 1.3038397810305469e-05, |
| "learning_rate": 1.1443692625572097e-05, |
| "logits/chosen": -1.8610095977783203, |
| "logits/rejected": -1.8084399700164795, |
| "logps/chosen": -66.45205688476562, |
| "logps/rejected": -190.3325958251953, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.85809326171875, |
| "rewards/margins": 21.86368179321289, |
| "rewards/rejected": -11.00558853149414, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.6291834002677377, |
| "grad_norm": 0.0004023597575724125, |
| "learning_rate": 1.140810847416185e-05, |
| "logits/chosen": -1.891358733177185, |
| "logits/rejected": -1.8402206897735596, |
| "logps/chosen": -67.42313385009766, |
| "logps/rejected": -189.2025604248047, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.033833503723145, |
| "rewards/margins": 21.153135299682617, |
| "rewards/rejected": -11.119302749633789, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.642570281124498, |
| "grad_norm": 1.1516180165926926e-05, |
| "learning_rate": 1.1371722705039222e-05, |
| "logits/chosen": -1.8551809787750244, |
| "logits/rejected": -1.8508669137954712, |
| "logps/chosen": -64.57566833496094, |
| "logps/rejected": -199.9986114501953, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.904739379882812, |
| "rewards/margins": 22.700956344604492, |
| "rewards/rejected": -11.796217918395996, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.6559571619812584, |
| "grad_norm": 2.9551241823355667e-05, |
| "learning_rate": 1.1334542958411638e-05, |
| "logits/chosen": -1.8598964214324951, |
| "logits/rejected": -1.7960243225097656, |
| "logps/chosen": -64.33881378173828, |
| "logps/rejected": -190.1427001953125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.994267463684082, |
| "rewards/margins": 21.328704833984375, |
| "rewards/rejected": -11.334436416625977, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.6693440428380187, |
| "grad_norm": 2.7200452677789144e-05, |
| "learning_rate": 1.129657704120426e-05, |
| "logits/chosen": -1.8504676818847656, |
| "logits/rejected": -1.7715709209442139, |
| "logps/chosen": -69.25659942626953, |
| "logps/rejected": -188.40867614746094, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.336310386657715, |
| "rewards/margins": 21.38101577758789, |
| "rewards/rejected": -11.04470157623291, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.6827309236947792, |
| "grad_norm": 1.9446013538981788e-05, |
| "learning_rate": 1.125783292542069e-05, |
| "logits/chosen": -1.8880693912506104, |
| "logits/rejected": -1.8214142322540283, |
| "logps/chosen": -61.559906005859375, |
| "logps/rejected": -190.22848510742188, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.045111656188965, |
| "rewards/margins": 21.14029312133789, |
| "rewards/rejected": -11.095178604125977, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.6961178045515395, |
| "grad_norm": 1.5772628103150055e-05, |
| "learning_rate": 1.1218318746469043e-05, |
| "logits/chosen": -1.8853015899658203, |
| "logits/rejected": -1.7798893451690674, |
| "logps/chosen": -59.47282791137695, |
| "logps/rejected": -192.4330291748047, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.76961612701416, |
| "rewards/margins": 21.079383850097656, |
| "rewards/rejected": -11.309769630432129, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.7095046854082999, |
| "grad_norm": 4.3517022277228534e-05, |
| "learning_rate": 1.1178042801453673e-05, |
| "logits/chosen": -1.8884027004241943, |
| "logits/rejected": -1.7771536111831665, |
| "logps/chosen": -68.0465087890625, |
| "logps/rejected": -191.3766632080078, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.391663551330566, |
| "rewards/margins": 21.76491355895996, |
| "rewards/rejected": -11.373248100280762, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.7228915662650602, |
| "grad_norm": 1.183638596558012e-05, |
| "learning_rate": 1.1137013547432978e-05, |
| "logits/chosen": -1.877772331237793, |
| "logits/rejected": -1.8006465435028076, |
| "logps/chosen": -60.1486701965332, |
| "logps/rejected": -190.3517608642578, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.311609268188477, |
| "rewards/margins": 21.450910568237305, |
| "rewards/rejected": -11.139305114746094, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.7362784471218207, |
| "grad_norm": 0.00011462459951872006, |
| "learning_rate": 1.1095239599643599e-05, |
| "logits/chosen": -1.8803141117095947, |
| "logits/rejected": -1.7724977731704712, |
| "logps/chosen": -69.33953094482422, |
| "logps/rejected": -206.20681762695312, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.447629928588867, |
| "rewards/margins": 22.70147132873535, |
| "rewards/rejected": -12.253841400146484, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.749665327978581, |
| "grad_norm": 3.4330005291849375e-05, |
| "learning_rate": 1.1052729729691409e-05, |
| "logits/chosen": -1.9076026678085327, |
| "logits/rejected": -1.8033192157745361, |
| "logps/chosen": -64.76860046386719, |
| "logps/rejected": -193.28390502929688, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.796707153320312, |
| "rewards/margins": 21.169038772583008, |
| "rewards/rejected": -11.372334480285645, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.7630522088353414, |
| "grad_norm": 0.0004265084571670741, |
| "learning_rate": 1.1009492863709674e-05, |
| "logits/chosen": -1.858690857887268, |
| "logits/rejected": -1.8127180337905884, |
| "logps/chosen": -63.8545036315918, |
| "logps/rejected": -192.01919555664062, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.419605255126953, |
| "rewards/margins": 21.708141326904297, |
| "rewards/rejected": -11.28853702545166, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.7764390896921017, |
| "grad_norm": 2.2610509404330514e-05, |
| "learning_rate": 1.0965538080484765e-05, |
| "logits/chosen": -1.8989194631576538, |
| "logits/rejected": -1.8063570261001587, |
| "logps/chosen": -59.67851638793945, |
| "logps/rejected": -196.120361328125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.765997886657715, |
| "rewards/margins": 21.32438087463379, |
| "rewards/rejected": -11.558382034301758, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.7898259705488622, |
| "grad_norm": 1.3567336281994358e-05, |
| "learning_rate": 1.0920874609549798e-05, |
| "logits/chosen": -1.870410680770874, |
| "logits/rejected": -1.8066644668579102, |
| "logps/chosen": -69.43475341796875, |
| "logps/rejected": -203.3020782470703, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.31662368774414, |
| "rewards/margins": 22.408395767211914, |
| "rewards/rejected": -12.091771125793457, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.8032128514056225, |
| "grad_norm": 6.72144815325737e-05, |
| "learning_rate": 1.0875511829246656e-05, |
| "logits/chosen": -1.8767350912094116, |
| "logits/rejected": -1.769721269607544, |
| "logps/chosen": -59.4417610168457, |
| "logps/rejected": -193.3367156982422, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.873218536376953, |
| "rewards/margins": 21.551494598388672, |
| "rewards/rejected": -11.678277015686035, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.8165997322623829, |
| "grad_norm": 6.611274147871882e-05, |
| "learning_rate": 1.0829459264756734e-05, |
| "logits/chosen": -1.871285080909729, |
| "logits/rejected": -1.7826554775238037, |
| "logps/chosen": -61.01091766357422, |
| "logps/rejected": -198.24710083007812, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.198274612426758, |
| "rewards/margins": 21.897686004638672, |
| "rewards/rejected": -11.699411392211914, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.8299866131191432, |
| "grad_norm": 1.358661029371433e-05, |
| "learning_rate": 1.0782726586100857e-05, |
| "logits/chosen": -1.8602094650268555, |
| "logits/rejected": -1.7963926792144775, |
| "logps/chosen": -63.376007080078125, |
| "logps/rejected": -202.0255889892578, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.44829273223877, |
| "rewards/margins": 22.673847198486328, |
| "rewards/rejected": -12.225557327270508, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.8433734939759037, |
| "grad_norm": 0.00011065916623920202, |
| "learning_rate": 1.0735323606108803e-05, |
| "logits/chosen": -1.87014639377594, |
| "logits/rejected": -1.7621206045150757, |
| "logps/chosen": -69.85023498535156, |
| "logps/rejected": -202.82557678222656, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.451375007629395, |
| "rewards/margins": 22.6992130279541, |
| "rewards/rejected": -12.247835159301758, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.856760374832664, |
| "grad_norm": 1.2892563063360285e-05, |
| "learning_rate": 1.0687260278358814e-05, |
| "logits/chosen": -1.8692089319229126, |
| "logits/rejected": -1.8146623373031616, |
| "logps/chosen": -62.0356330871582, |
| "logps/rejected": -198.94203186035156, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.914308547973633, |
| "rewards/margins": 21.57122230529785, |
| "rewards/rejected": -11.656911849975586, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.8701472556894244, |
| "grad_norm": 6.038762876414694e-06, |
| "learning_rate": 1.0638546695087565e-05, |
| "logits/chosen": -1.885371446609497, |
| "logits/rejected": -1.8294477462768555, |
| "logps/chosen": -61.702667236328125, |
| "logps/rejected": -192.01895141601562, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.938339233398438, |
| "rewards/margins": 21.29090690612793, |
| "rewards/rejected": -11.35256576538086, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.8835341365461847, |
| "grad_norm": 1.671519385126885e-05, |
| "learning_rate": 1.0589193085071023e-05, |
| "logits/chosen": -1.8809674978256226, |
| "logits/rejected": -1.794091820716858, |
| "logps/chosen": -73.87046813964844, |
| "logps/rejected": -199.53927612304688, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.256256103515625, |
| "rewards/margins": 22.13440704345703, |
| "rewards/rejected": -11.878148078918457, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.8969210174029452, |
| "grad_norm": 7.38472408556845e-06, |
| "learning_rate": 1.0539209811476632e-05, |
| "logits/chosen": -1.87711501121521, |
| "logits/rejected": -1.7717126607894897, |
| "logps/chosen": -61.84357452392578, |
| "logps/rejected": -199.06480407714844, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.918207168579102, |
| "rewards/margins": 21.842174530029297, |
| "rewards/rejected": -11.923968315124512, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.9103078982597055, |
| "grad_norm": 3.710574674187228e-06, |
| "learning_rate": 1.0488607369687263e-05, |
| "logits/chosen": -1.8811956644058228, |
| "logits/rejected": -1.7410866022109985, |
| "logps/chosen": -62.85358810424805, |
| "logps/rejected": -208.62289428710938, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.954957008361816, |
| "rewards/margins": 22.574024200439453, |
| "rewards/rejected": -12.619064331054688, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.9236947791164659, |
| "grad_norm": 3.9282083889702335e-05, |
| "learning_rate": 1.0437396385097436e-05, |
| "logits/chosen": -1.8749635219573975, |
| "logits/rejected": -1.8668371438980103, |
| "logps/chosen": -67.99688720703125, |
| "logps/rejected": -194.4200439453125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.10667610168457, |
| "rewards/margins": 21.479095458984375, |
| "rewards/rejected": -11.372419357299805, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.9370816599732262, |
| "grad_norm": 6.739242053299677e-06, |
| "learning_rate": 1.0385587610882203e-05, |
| "logits/chosen": -1.8696308135986328, |
| "logits/rejected": -1.7740843296051025, |
| "logps/chosen": -61.758819580078125, |
| "logps/rejected": -188.14012145996094, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.408076286315918, |
| "rewards/margins": 21.486896514892578, |
| "rewards/rejected": -11.07882022857666, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.9504685408299867, |
| "grad_norm": 2.1943731553619727e-05, |
| "learning_rate": 1.0333191925739228e-05, |
| "logits/chosen": -1.8707389831542969, |
| "logits/rejected": -1.745100975036621, |
| "logps/chosen": -54.22405242919922, |
| "logps/rejected": -189.01776123046875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.374455451965332, |
| "rewards/margins": 20.611839294433594, |
| "rewards/rejected": -11.237382888793945, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.963855421686747, |
| "grad_norm": 1.4479804121947382e-05, |
| "learning_rate": 1.0280220331604505e-05, |
| "logits/chosen": -1.886850118637085, |
| "logits/rejected": -1.7762616872787476, |
| "logps/chosen": -65.0347900390625, |
| "logps/rejected": -192.35006713867188, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.607036590576172, |
| "rewards/margins": 21.074512481689453, |
| "rewards/rejected": -11.467473983764648, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.9772423025435074, |
| "grad_norm": 6.089697853894904e-05, |
| "learning_rate": 1.0226683951342178e-05, |
| "logits/chosen": -1.86488938331604, |
| "logits/rejected": -1.7880140542984009, |
| "logps/chosen": -70.48530578613281, |
| "logps/rejected": -203.04815673828125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.034753799438477, |
| "rewards/margins": 22.2109432220459, |
| "rewards/rejected": -12.176187515258789, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.9906291834002677, |
| "grad_norm": 1.1036102478101384e-05, |
| "learning_rate": 1.017259402640901e-05, |
| "logits/chosen": -1.89974045753479, |
| "logits/rejected": -1.7988322973251343, |
| "logps/chosen": -62.814735412597656, |
| "logps/rejected": -194.54603576660156, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.982305526733398, |
| "rewards/margins": 21.521968841552734, |
| "rewards/rejected": -11.53965950012207, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.998661311914324, |
| "eval_logits/chosen": -1.9349273443222046, |
| "eval_logits/rejected": -1.7649630308151245, |
| "eval_logps/chosen": -63.54435729980469, |
| "eval_logps/rejected": -194.9509735107422, |
| "eval_loss": 1.3701900059004402e-07, |
| "eval_rewards/accuracies": 1.0, |
| "eval_rewards/chosen": 10.00214958190918, |
| "eval_rewards/margins": 21.566242218017578, |
| "eval_rewards/rejected": -11.564092636108398, |
| "eval_runtime": 29.1535, |
| "eval_samples_per_second": 6.86, |
| "eval_steps_per_second": 6.86, |
| "step": 373 |
| }, |
| { |
| "epoch": 1.002677376171352, |
| "grad_norm": 1.2704935215879232e-05, |
| "learning_rate": 1.0117961914493904e-05, |
| "logits/chosen": -1.8601760864257812, |
| "logits/rejected": -1.7008464336395264, |
| "logps/chosen": -74.2369155883789, |
| "logps/rejected": -199.70252990722656, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.066021919250488, |
| "rewards/margins": 22.14150619506836, |
| "rewards/rejected": -12.075483322143555, |
| "step": 375 |
| }, |
| { |
| "epoch": 1.0160642570281124, |
| "grad_norm": 5.538755431189202e-05, |
| "learning_rate": 1.0062799087133048e-05, |
| "logits/chosen": -1.8701107501983643, |
| "logits/rejected": -1.7948967218399048, |
| "logps/chosen": -63.428306579589844, |
| "logps/rejected": -198.6887969970703, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.991368293762207, |
| "rewards/margins": 21.902233123779297, |
| "rewards/rejected": -11.910863876342773, |
| "step": 380 |
| }, |
| { |
| "epoch": 1.0294511378848727, |
| "grad_norm": 4.9225644033867866e-05, |
| "learning_rate": 1.0007117127301148e-05, |
| "logits/chosen": -1.8625848293304443, |
| "logits/rejected": -1.7941219806671143, |
| "logps/chosen": -63.298004150390625, |
| "logps/rejected": -200.94699096679688, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.300239562988281, |
| "rewards/margins": 22.36918067932129, |
| "rewards/rejected": -12.068942070007324, |
| "step": 385 |
| }, |
| { |
| "epoch": 1.0428380187416333, |
| "grad_norm": 1.8702992747421376e-05, |
| "learning_rate": 9.950927726979255e-06, |
| "logits/chosen": -1.8798978328704834, |
| "logits/rejected": -1.7565393447875977, |
| "logps/chosen": -56.972923278808594, |
| "logps/rejected": -190.3721160888672, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.65361213684082, |
| "rewards/margins": 21.039199829101562, |
| "rewards/rejected": -11.385587692260742, |
| "step": 390 |
| }, |
| { |
| "epoch": 1.0562248995983936, |
| "grad_norm": 2.7559770387597382e-05, |
| "learning_rate": 9.89424268469971e-06, |
| "logits/chosen": -1.8960098028182983, |
| "logits/rejected": -1.7793407440185547, |
| "logps/chosen": -59.296142578125, |
| "logps/rejected": -201.22280883789062, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.653037071228027, |
| "rewards/margins": 21.753782272338867, |
| "rewards/rejected": -12.10074520111084, |
| "step": 395 |
| }, |
| { |
| "epoch": 1.069611780455154, |
| "grad_norm": 0.0003086234792135656, |
| "learning_rate": 9.83707390306871e-06, |
| "logits/chosen": -1.8640127182006836, |
| "logits/rejected": -1.7932268381118774, |
| "logps/chosen": -67.73115539550781, |
| "logps/rejected": -197.77268981933594, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.911338806152344, |
| "rewards/margins": 21.78359031677246, |
| "rewards/rejected": -11.872251510620117, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.0829986613119142, |
| "grad_norm": 6.989373559918022e-06, |
| "learning_rate": 9.779433386267028e-06, |
| "logits/chosen": -1.8561309576034546, |
| "logits/rejected": -1.7354758977890015, |
| "logps/chosen": -58.682838439941406, |
| "logps/rejected": -199.67955017089844, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.759561538696289, |
| "rewards/margins": 21.87822151184082, |
| "rewards/rejected": -12.118657112121582, |
| "step": 405 |
| }, |
| { |
| "epoch": 1.0963855421686748, |
| "grad_norm": 4.132572212256491e-05, |
| "learning_rate": 9.721333237529395e-06, |
| "logits/chosen": -1.8625351190567017, |
| "logits/rejected": -1.7816200256347656, |
| "logps/chosen": -60.27020263671875, |
| "logps/rejected": -198.3787384033203, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.702848434448242, |
| "rewards/margins": 21.661922454833984, |
| "rewards/rejected": -11.959076881408691, |
| "step": 410 |
| }, |
| { |
| "epoch": 1.109772423025435, |
| "grad_norm": 9.149351171799935e-06, |
| "learning_rate": 9.662785656603096e-06, |
| "logits/chosen": -1.8707053661346436, |
| "logits/rejected": -1.7554585933685303, |
| "logps/chosen": -63.7075309753418, |
| "logps/rejected": -206.32568359375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.28803825378418, |
| "rewards/margins": 22.78645896911621, |
| "rewards/rejected": -12.498420715332031, |
| "step": 415 |
| }, |
| { |
| "epoch": 1.1231593038821954, |
| "grad_norm": 1.0895137165789492e-05, |
| "learning_rate": 9.603802937186282e-06, |
| "logits/chosen": -1.8571460247039795, |
| "logits/rejected": -1.7835206985473633, |
| "logps/chosen": -71.52728271484375, |
| "logps/rejected": -204.67822265625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.877116203308105, |
| "rewards/margins": 23.043004989624023, |
| "rewards/rejected": -12.165888786315918, |
| "step": 420 |
| }, |
| { |
| "epoch": 1.1365461847389557, |
| "grad_norm": 6.231999577721581e-05, |
| "learning_rate": 9.544397464346573e-06, |
| "logits/chosen": -1.844512939453125, |
| "logits/rejected": -1.759734869003296, |
| "logps/chosen": -70.35665130615234, |
| "logps/rejected": -192.6426544189453, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.559091567993164, |
| "rewards/margins": 22.039852142333984, |
| "rewards/rejected": -11.480762481689453, |
| "step": 425 |
| }, |
| { |
| "epoch": 1.1499330655957163, |
| "grad_norm": 0.00020394229795783758, |
| "learning_rate": 9.48458171192047e-06, |
| "logits/chosen": -1.8643211126327515, |
| "logits/rejected": -1.808932900428772, |
| "logps/chosen": -63.81361770629883, |
| "logps/rejected": -203.98910522460938, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.149599075317383, |
| "rewards/margins": 22.48383140563965, |
| "rewards/rejected": -12.334233283996582, |
| "step": 430 |
| }, |
| { |
| "epoch": 1.1633199464524766, |
| "grad_norm": 0.00010723127343226224, |
| "learning_rate": 9.424368239894115e-06, |
| "logits/chosen": -1.8679778575897217, |
| "logits/rejected": -1.7837800979614258, |
| "logps/chosen": -63.05647659301758, |
| "logps/rejected": -189.9877166748047, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.14889907836914, |
| "rewards/margins": 21.50381088256836, |
| "rewards/rejected": -11.354910850524902, |
| "step": 435 |
| }, |
| { |
| "epoch": 1.176706827309237, |
| "grad_norm": 5.072273052064702e-05, |
| "learning_rate": 9.363769691765979e-06, |
| "logits/chosen": -1.892148733139038, |
| "logits/rejected": -1.7716989517211914, |
| "logps/chosen": -56.24811935424805, |
| "logps/rejected": -198.82974243164062, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.459526062011719, |
| "rewards/margins": 21.396852493286133, |
| "rewards/rejected": -11.93732738494873, |
| "step": 440 |
| }, |
| { |
| "epoch": 1.1900937081659972, |
| "grad_norm": 3.936883786082035e-06, |
| "learning_rate": 9.302798791892003e-06, |
| "logits/chosen": -1.886169672012329, |
| "logits/rejected": -1.799318552017212, |
| "logps/chosen": -61.040687561035156, |
| "logps/rejected": -195.46054077148438, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.538091659545898, |
| "rewards/margins": 21.095800399780273, |
| "rewards/rejected": -11.557706832885742, |
| "step": 445 |
| }, |
| { |
| "epoch": 1.2034805890227578, |
| "grad_norm": 7.63648931751959e-05, |
| "learning_rate": 9.241468342813765e-06, |
| "logits/chosen": -1.8738839626312256, |
| "logits/rejected": -1.7493702173233032, |
| "logps/chosen": -64.00971984863281, |
| "logps/rejected": -193.2359161376953, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.317130088806152, |
| "rewards/margins": 21.8746337890625, |
| "rewards/rejected": -11.557502746582031, |
| "step": 450 |
| }, |
| { |
| "epoch": 1.216867469879518, |
| "grad_norm": 2.4901968572521582e-05, |
| "learning_rate": 9.179791222570236e-06, |
| "logits/chosen": -1.870624303817749, |
| "logits/rejected": -1.7692861557006836, |
| "logps/chosen": -56.192413330078125, |
| "logps/rejected": -197.38587951660156, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.873640060424805, |
| "rewards/margins": 21.597814559936523, |
| "rewards/rejected": -11.724173545837402, |
| "step": 455 |
| }, |
| { |
| "epoch": 1.2302543507362784, |
| "grad_norm": 0.00011131736391689628, |
| "learning_rate": 9.117780381993665e-06, |
| "logits/chosen": -1.8614346981048584, |
| "logits/rejected": -1.7440112829208374, |
| "logps/chosen": -63.95283889770508, |
| "logps/rejected": -197.30955505371094, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.20496940612793, |
| "rewards/margins": 21.90732192993164, |
| "rewards/rejected": -11.702352523803711, |
| "step": 460 |
| }, |
| { |
| "epoch": 1.2436412315930387, |
| "grad_norm": 0.00010593160550342873, |
| "learning_rate": 9.055448841990199e-06, |
| "logits/chosen": -1.8694393634796143, |
| "logits/rejected": -1.799663782119751, |
| "logps/chosen": -66.95099639892578, |
| "logps/rejected": -200.08949279785156, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.330759048461914, |
| "rewards/margins": 22.335012435913086, |
| "rewards/rejected": -12.004252433776855, |
| "step": 465 |
| }, |
| { |
| "epoch": 1.2570281124497993, |
| "grad_norm": 2.5527588149998337e-05, |
| "learning_rate": 8.992809690805775e-06, |
| "logits/chosen": -1.8609740734100342, |
| "logits/rejected": -1.74262273311615, |
| "logps/chosen": -63.916542053222656, |
| "logps/rejected": -192.27394104003906, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.684073448181152, |
| "rewards/margins": 21.17348861694336, |
| "rewards/rejected": -11.489413261413574, |
| "step": 470 |
| }, |
| { |
| "epoch": 1.2704149933065596, |
| "grad_norm": 1.9687415260705166e-05, |
| "learning_rate": 8.929876081277882e-06, |
| "logits/chosen": -1.861687421798706, |
| "logits/rejected": -1.725548505783081, |
| "logps/chosen": -60.264564514160156, |
| "logps/rejected": -213.0454864501953, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.275764465332031, |
| "rewards/margins": 23.038787841796875, |
| "rewards/rejected": -12.763025283813477, |
| "step": 475 |
| }, |
| { |
| "epoch": 1.28380187416332, |
| "grad_norm": 1.3463418326864485e-05, |
| "learning_rate": 8.866661228073754e-06, |
| "logits/chosen": -1.844506859779358, |
| "logits/rejected": -1.6935580968856812, |
| "logps/chosen": -66.41419982910156, |
| "logps/rejected": -208.23080444335938, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.857114791870117, |
| "rewards/margins": 23.624019622802734, |
| "rewards/rejected": -12.766902923583984, |
| "step": 480 |
| }, |
| { |
| "epoch": 1.2971887550200802, |
| "grad_norm": 3.575617665774189e-05, |
| "learning_rate": 8.803178404915581e-06, |
| "logits/chosen": -1.8610200881958008, |
| "logits/rejected": -1.7305552959442139, |
| "logps/chosen": -62.477806091308594, |
| "logps/rejected": -208.88522338867188, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.284475326538086, |
| "rewards/margins": 23.20441436767578, |
| "rewards/rejected": -12.919939994812012, |
| "step": 485 |
| }, |
| { |
| "epoch": 1.3105756358768406, |
| "grad_norm": 1.7733655113261193e-05, |
| "learning_rate": 8.739440941793324e-06, |
| "logits/chosen": -1.8665441274642944, |
| "logits/rejected": -1.7732328176498413, |
| "logps/chosen": -65.22151184082031, |
| "logps/rejected": -202.66506958007812, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.311973571777344, |
| "rewards/margins": 22.648090362548828, |
| "rewards/rejected": -12.336113929748535, |
| "step": 490 |
| }, |
| { |
| "epoch": 1.323962516733601, |
| "grad_norm": 5.565172978094779e-05, |
| "learning_rate": 8.675462222165706e-06, |
| "logits/chosen": -1.8920332193374634, |
| "logits/rejected": -1.8146085739135742, |
| "logps/chosen": -58.67218780517578, |
| "logps/rejected": -198.10592651367188, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.465251922607422, |
| "rewards/margins": 21.327327728271484, |
| "rewards/rejected": -11.862076759338379, |
| "step": 495 |
| }, |
| { |
| "epoch": 1.3373493975903614, |
| "grad_norm": 4.80995959151187e-06, |
| "learning_rate": 8.611255680149984e-06, |
| "logits/chosen": -1.8779428005218506, |
| "logits/rejected": -1.7206926345825195, |
| "logps/chosen": -56.64423751831055, |
| "logps/rejected": -197.15884399414062, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.939409255981445, |
| "rewards/margins": 21.892318725585938, |
| "rewards/rejected": -11.952908515930176, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.3373493975903614, |
| "eval_logits/chosen": -1.931687355041504, |
| "eval_logits/rejected": -1.7430627346038818, |
| "eval_logps/chosen": -63.374298095703125, |
| "eval_logps/rejected": -197.72195434570312, |
| "eval_loss": 1.0383198656427339e-07, |
| "eval_rewards/accuracies": 1.0, |
| "eval_rewards/chosen": 10.019155502319336, |
| "eval_rewards/margins": 21.86034393310547, |
| "eval_rewards/rejected": -11.841187477111816, |
| "eval_runtime": 29.641, |
| "eval_samples_per_second": 6.747, |
| "eval_steps_per_second": 6.747, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.3507362784471217, |
| "grad_norm": 1.1448615623521619e-05, |
| "learning_rate": 8.546834797701083e-06, |
| "logits/chosen": -1.8999868631362915, |
| "logits/rejected": -1.7782312631607056, |
| "logps/chosen": -56.27119064331055, |
| "logps/rejected": -200.72647094726562, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.567099571228027, |
| "rewards/margins": 21.628511428833008, |
| "rewards/rejected": -12.061409950256348, |
| "step": 505 |
| }, |
| { |
| "epoch": 1.3641231593038823, |
| "grad_norm": 2.9590013582492247e-05, |
| "learning_rate": 8.482213101780686e-06, |
| "logits/chosen": -1.8799558877944946, |
| "logits/rejected": -1.7939083576202393, |
| "logps/chosen": -69.73988342285156, |
| "logps/rejected": -209.3248748779297, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.24708366394043, |
| "rewards/margins": 22.704837799072266, |
| "rewards/rejected": -12.45775318145752, |
| "step": 510 |
| }, |
| { |
| "epoch": 1.3775100401606426, |
| "grad_norm": 3.4167998819611967e-05, |
| "learning_rate": 8.41740416151686e-06, |
| "logits/chosen": -1.8656768798828125, |
| "logits/rejected": -1.730297327041626, |
| "logps/chosen": -64.72178649902344, |
| "logps/rejected": -198.3512725830078, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.287378311157227, |
| "rewards/margins": 22.36208152770996, |
| "rewards/rejected": -12.074703216552734, |
| "step": 515 |
| }, |
| { |
| "epoch": 1.390896921017403, |
| "grad_norm": 3.03801989502972e-05, |
| "learning_rate": 8.352421585354853e-06, |
| "logits/chosen": -1.8596899509429932, |
| "logits/rejected": -1.6925132274627686, |
| "logps/chosen": -66.36322021484375, |
| "logps/rejected": -199.36143493652344, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.167764663696289, |
| "rewards/margins": 22.33287811279297, |
| "rewards/rejected": -12.16511344909668, |
| "step": 520 |
| }, |
| { |
| "epoch": 1.4042838018741632, |
| "grad_norm": 1.139958476414904e-05, |
| "learning_rate": 8.287279018199613e-06, |
| "logits/chosen": -1.8639628887176514, |
| "logits/rejected": -1.7620197534561157, |
| "logps/chosen": -64.00345611572266, |
| "logps/rejected": -206.9742431640625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.084488868713379, |
| "rewards/margins": 22.643537521362305, |
| "rewards/rejected": -12.559050559997559, |
| "step": 525 |
| }, |
| { |
| "epoch": 1.4176706827309236, |
| "grad_norm": 4.2092684452654794e-05, |
| "learning_rate": 8.221990138550654e-06, |
| "logits/chosen": -1.886985182762146, |
| "logits/rejected": -1.7139594554901123, |
| "logps/chosen": -61.876548767089844, |
| "logps/rejected": -207.38473510742188, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.575916290283203, |
| "rewards/margins": 22.21279525756836, |
| "rewards/rejected": -12.636876106262207, |
| "step": 530 |
| }, |
| { |
| "epoch": 1.431057563587684, |
| "grad_norm": 0.00011076881492044777, |
| "learning_rate": 8.156568655629891e-06, |
| "logits/chosen": -1.868139624595642, |
| "logits/rejected": -1.7361793518066406, |
| "logps/chosen": -65.99727630615234, |
| "logps/rejected": -193.82699584960938, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.533285140991211, |
| "rewards/margins": 22.23984718322754, |
| "rewards/rejected": -11.706562042236328, |
| "step": 535 |
| }, |
| { |
| "epoch": 1.4444444444444444, |
| "grad_norm": 1.3130883417034056e-05, |
| "learning_rate": 8.091028306502991e-06, |
| "logits/chosen": -1.8768796920776367, |
| "logits/rejected": -1.7341238260269165, |
| "logps/chosen": -56.3282585144043, |
| "logps/rejected": -198.73318481445312, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.183573722839355, |
| "rewards/margins": 22.112152099609375, |
| "rewards/rejected": -11.92857551574707, |
| "step": 540 |
| }, |
| { |
| "epoch": 1.4578313253012047, |
| "grad_norm": 1.579059016876272e-06, |
| "learning_rate": 8.025382853194894e-06, |
| "logits/chosen": -1.8691343069076538, |
| "logits/rejected": -1.7641900777816772, |
| "logps/chosen": -69.58892059326172, |
| "logps/rejected": -199.70413208007812, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.471430778503418, |
| "rewards/margins": 22.377849578857422, |
| "rewards/rejected": -11.90641975402832, |
| "step": 545 |
| }, |
| { |
| "epoch": 1.4712182061579653, |
| "grad_norm": 3.329586616018787e-05, |
| "learning_rate": 7.9596460798001e-06, |
| "logits/chosen": -1.8352091312408447, |
| "logits/rejected": -1.7521950006484985, |
| "logps/chosen": -66.96287536621094, |
| "logps/rejected": -195.3020782470703, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 11.123357772827148, |
| "rewards/margins": 22.767568588256836, |
| "rewards/rejected": -11.644209861755371, |
| "step": 550 |
| }, |
| { |
| "epoch": 1.4846050870147256, |
| "grad_norm": 2.0335013687144965e-05, |
| "learning_rate": 7.893831789588308e-06, |
| "logits/chosen": -1.887690782546997, |
| "logits/rejected": -1.756474256515503, |
| "logps/chosen": -60.508277893066406, |
| "logps/rejected": -201.3896942138672, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.79334545135498, |
| "rewards/margins": 22.125324249267578, |
| "rewards/rejected": -12.331976890563965, |
| "step": 555 |
| }, |
| { |
| "epoch": 1.497991967871486, |
| "grad_norm": 8.999327292258386e-06, |
| "learning_rate": 7.827953802106033e-06, |
| "logits/chosen": -1.864855408668518, |
| "logits/rejected": -1.7428302764892578, |
| "logps/chosen": -69.32931518554688, |
| "logps/rejected": -202.78677368164062, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.690114974975586, |
| "rewards/margins": 23.005146026611328, |
| "rewards/rejected": -12.315031051635742, |
| "step": 560 |
| }, |
| { |
| "epoch": 1.5113788487282465, |
| "grad_norm": 2.8161759473732673e-05, |
| "learning_rate": 7.762025950274813e-06, |
| "logits/chosen": -1.8802257776260376, |
| "logits/rejected": -1.7101455926895142, |
| "logps/chosen": -61.14844512939453, |
| "logps/rejected": -190.69073486328125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.25882625579834, |
| "rewards/margins": 21.803659439086914, |
| "rewards/rejected": -11.544832229614258, |
| "step": 565 |
| }, |
| { |
| "epoch": 1.5247657295850066, |
| "grad_norm": 3.1670299449615413e-06, |
| "learning_rate": 7.696062077486596e-06, |
| "logits/chosen": -1.8524658679962158, |
| "logits/rejected": -1.7226076126098633, |
| "logps/chosen": -71.62201690673828, |
| "logps/rejected": -203.36416625976562, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.312665939331055, |
| "rewards/margins": 22.732587814331055, |
| "rewards/rejected": -12.419922828674316, |
| "step": 570 |
| }, |
| { |
| "epoch": 1.538152610441767, |
| "grad_norm": 3.04284712910885e-05, |
| "learning_rate": 7.630076034696934e-06, |
| "logits/chosen": -1.8999922275543213, |
| "logits/rejected": -1.7562729120254517, |
| "logps/chosen": -55.377227783203125, |
| "logps/rejected": -203.68716430664062, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.358214378356934, |
| "rewards/margins": 21.71164894104004, |
| "rewards/rejected": -12.353431701660156, |
| "step": 575 |
| }, |
| { |
| "epoch": 1.5515394912985274, |
| "grad_norm": 2.8392036256263964e-05, |
| "learning_rate": 7.564081677516588e-06, |
| "logits/chosen": -1.8879365921020508, |
| "logits/rejected": -1.7530921697616577, |
| "logps/chosen": -60.916473388671875, |
| "logps/rejected": -210.0749969482422, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.68341064453125, |
| "rewards/margins": 22.60190773010254, |
| "rewards/rejected": -12.918497085571289, |
| "step": 580 |
| }, |
| { |
| "epoch": 1.5649263721552877, |
| "grad_norm": 6.038487845216878e-05, |
| "learning_rate": 7.4980928633021615e-06, |
| "logits/chosen": -1.8659874200820923, |
| "logits/rejected": -1.734829306602478, |
| "logps/chosen": -58.3009033203125, |
| "logps/rejected": -198.56259155273438, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.786453247070312, |
| "rewards/margins": 21.83785629272461, |
| "rewards/rejected": -12.05140209197998, |
| "step": 585 |
| }, |
| { |
| "epoch": 1.5783132530120483, |
| "grad_norm": 0.0002005839196499437, |
| "learning_rate": 7.432123448246354e-06, |
| "logits/chosen": -1.8782905340194702, |
| "logits/rejected": -1.714125633239746, |
| "logps/chosen": -62.971839904785156, |
| "logps/rejected": -199.8243408203125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.586263656616211, |
| "rewards/margins": 21.808002471923828, |
| "rewards/rejected": -12.221738815307617, |
| "step": 590 |
| }, |
| { |
| "epoch": 1.5917001338688086, |
| "grad_norm": 2.639082367750234e-06, |
| "learning_rate": 7.366187284468474e-06, |
| "logits/chosen": -1.8676035404205322, |
| "logits/rejected": -1.7441984415054321, |
| "logps/chosen": -63.17724609375, |
| "logps/rejected": -207.64389038085938, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.685806274414062, |
| "rewards/margins": 23.19856834411621, |
| "rewards/rejected": -12.512762069702148, |
| "step": 595 |
| }, |
| { |
| "epoch": 1.605087014725569, |
| "grad_norm": 1.1028166227333713e-05, |
| "learning_rate": 7.300298217105793e-06, |
| "logits/chosen": -1.873063087463379, |
| "logits/rejected": -1.742378830909729, |
| "logps/chosen": -61.4983024597168, |
| "logps/rejected": -200.46188354492188, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.306286811828613, |
| "rewards/margins": 22.533138275146484, |
| "rewards/rejected": -12.226852416992188, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.6184738955823295, |
| "grad_norm": 5.2287599828559905e-05, |
| "learning_rate": 7.234470081406376e-06, |
| "logits/chosen": -1.8500477075576782, |
| "logits/rejected": -1.7297271490097046, |
| "logps/chosen": -71.09878540039062, |
| "logps/rejected": -198.9838409423828, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.490249633789062, |
| "rewards/margins": 22.542438507080078, |
| "rewards/rejected": -12.052189826965332, |
| "step": 605 |
| }, |
| { |
| "epoch": 1.6318607764390896, |
| "grad_norm": 3.5077468055533245e-05, |
| "learning_rate": 7.168716699823987e-06, |
| "logits/chosen": -1.8880395889282227, |
| "logits/rejected": -1.7822411060333252, |
| "logps/chosen": -60.97273635864258, |
| "logps/rejected": -207.91616821289062, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.011301040649414, |
| "rewards/margins": 22.688114166259766, |
| "rewards/rejected": -12.676815032958984, |
| "step": 610 |
| }, |
| { |
| "epoch": 1.64524765729585, |
| "grad_norm": 7.598105185024906e-06, |
| "learning_rate": 7.103051879115679e-06, |
| "logits/chosen": -1.8631807565689087, |
| "logits/rejected": -1.698168158531189, |
| "logps/chosen": -62.397377014160156, |
| "logps/rejected": -208.0476531982422, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.969112396240234, |
| "rewards/margins": 22.851131439208984, |
| "rewards/rejected": -12.88201904296875, |
| "step": 615 |
| }, |
| { |
| "epoch": 1.6586345381526104, |
| "grad_norm": 4.015982267446816e-05, |
| "learning_rate": 7.037489407442674e-06, |
| "logits/chosen": -1.85487961769104, |
| "logits/rejected": -1.75741708278656, |
| "logps/chosen": -66.20887756347656, |
| "logps/rejected": -200.14871215820312, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.314077377319336, |
| "rewards/margins": 22.312610626220703, |
| "rewards/rejected": -11.998533248901367, |
| "step": 620 |
| }, |
| { |
| "epoch": 1.6720214190093707, |
| "grad_norm": 1.63569475262193e-05, |
| "learning_rate": 6.9720430514751625e-06, |
| "logits/chosen": -1.8645589351654053, |
| "logits/rejected": -1.7157907485961914, |
| "logps/chosen": -59.857513427734375, |
| "logps/rejected": -197.91986083984375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.427450180053711, |
| "rewards/margins": 21.496850967407227, |
| "rewards/rejected": -12.069400787353516, |
| "step": 625 |
| }, |
| { |
| "epoch": 1.6854082998661313, |
| "grad_norm": 1.3075319657218643e-05, |
| "learning_rate": 6.9067265535016e-06, |
| "logits/chosen": -1.8522984981536865, |
| "logits/rejected": -1.8144248723983765, |
| "logps/chosen": -63.267921447753906, |
| "logps/rejected": -200.5415802001953, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.205777168273926, |
| "rewards/margins": 22.44827651977539, |
| "rewards/rejected": -12.242500305175781, |
| "step": 630 |
| }, |
| { |
| "epoch": 1.6987951807228916, |
| "grad_norm": 8.631217497168109e-06, |
| "learning_rate": 6.841553628543135e-06, |
| "logits/chosen": -1.8666969537734985, |
| "logits/rejected": -1.7287845611572266, |
| "logps/chosen": -63.22052764892578, |
| "logps/rejected": -199.9890594482422, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.489965438842773, |
| "rewards/margins": 22.612924575805664, |
| "rewards/rejected": -12.122960090637207, |
| "step": 635 |
| }, |
| { |
| "epoch": 1.712182061579652, |
| "grad_norm": 1.6783178580226377e-05, |
| "learning_rate": 6.776537961473755e-06, |
| "logits/chosen": -1.8770383596420288, |
| "logits/rejected": -1.7175519466400146, |
| "logps/chosen": -59.59788131713867, |
| "logps/rejected": -203.4702911376953, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.750931739807129, |
| "rewards/margins": 22.120555877685547, |
| "rewards/rejected": -12.36962604522705, |
| "step": 640 |
| }, |
| { |
| "epoch": 1.7255689424364125, |
| "grad_norm": 6.6644392973103095e-06, |
| "learning_rate": 6.711693204146765e-06, |
| "logits/chosen": -1.8620790243148804, |
| "logits/rejected": -1.7563501596450806, |
| "logps/chosen": -72.01322937011719, |
| "logps/rejected": -202.68197631835938, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.699959754943848, |
| "rewards/margins": 22.8387451171875, |
| "rewards/rejected": -12.138784408569336, |
| "step": 645 |
| }, |
| { |
| "epoch": 1.7389558232931726, |
| "grad_norm": 8.885542047210038e-05, |
| "learning_rate": 6.6470329725282045e-06, |
| "logits/chosen": -1.8752985000610352, |
| "logits/rejected": -1.7436233758926392, |
| "logps/chosen": -57.58866500854492, |
| "logps/rejected": -203.06430053710938, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.94059944152832, |
| "rewards/margins": 22.154766082763672, |
| "rewards/rejected": -12.214167594909668, |
| "step": 650 |
| }, |
| { |
| "epoch": 1.752342704149933, |
| "grad_norm": 2.4713101083762012e-05, |
| "learning_rate": 6.5825708438377856e-06, |
| "logits/chosen": -1.8654638528823853, |
| "logits/rejected": -1.740407943725586, |
| "logps/chosen": -63.27727127075195, |
| "logps/rejected": -203.1544189453125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.673643112182617, |
| "rewards/margins": 22.118864059448242, |
| "rewards/rejected": -12.445220947265625, |
| "step": 655 |
| }, |
| { |
| "epoch": 1.7657295850066934, |
| "grad_norm": 6.906106136739254e-05, |
| "learning_rate": 6.518320353697992e-06, |
| "logits/chosen": -1.8629789352416992, |
| "logits/rejected": -1.7562310695648193, |
| "logps/chosen": -61.98480987548828, |
| "logps/rejected": -205.32180786132812, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.444381713867188, |
| "rewards/margins": 22.911869049072266, |
| "rewards/rejected": -12.467488288879395, |
| "step": 660 |
| }, |
| { |
| "epoch": 1.7791164658634537, |
| "grad_norm": 1.5507324860664085e-05, |
| "learning_rate": 6.454294993291879e-06, |
| "logits/chosen": -1.8623278141021729, |
| "logits/rejected": -1.68741774559021, |
| "logps/chosen": -66.75733184814453, |
| "logps/rejected": -205.45748901367188, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.855257034301758, |
| "rewards/margins": 22.61016082763672, |
| "rewards/rejected": -12.754903793334961, |
| "step": 665 |
| }, |
| { |
| "epoch": 1.7925033467202143, |
| "grad_norm": 6.3960551415220834e-06, |
| "learning_rate": 6.390508206530243e-06, |
| "logits/chosen": -1.87300705909729, |
| "logits/rejected": -1.731795310974121, |
| "logps/chosen": -64.09671020507812, |
| "logps/rejected": -207.6633758544922, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.862910270690918, |
| "rewards/margins": 22.58968162536621, |
| "rewards/rejected": -12.726774215698242, |
| "step": 670 |
| }, |
| { |
| "epoch": 1.8058902275769746, |
| "grad_norm": 5.351726599656104e-07, |
| "learning_rate": 6.326973387228678e-06, |
| "logits/chosen": -1.859619140625, |
| "logits/rejected": -1.7685467004776, |
| "logps/chosen": -69.70652770996094, |
| "logps/rejected": -205.14407348632812, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.268574714660645, |
| "rewards/margins": 22.812740325927734, |
| "rewards/rejected": -12.544163703918457, |
| "step": 675 |
| }, |
| { |
| "epoch": 1.819277108433735, |
| "grad_norm": 2.628388028824702e-05, |
| "learning_rate": 6.263703876295187e-06, |
| "logits/chosen": -1.87285578250885, |
| "logits/rejected": -1.7142107486724854, |
| "logps/chosen": -65.52404022216797, |
| "logps/rejected": -213.24234008789062, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.968465805053711, |
| "rewards/margins": 23.130929946899414, |
| "rewards/rejected": -13.162463188171387, |
| "step": 680 |
| }, |
| { |
| "epoch": 1.8326639892904955, |
| "grad_norm": 1.8713224562816322e-05, |
| "learning_rate": 6.200712958928871e-06, |
| "logits/chosen": -1.8909542560577393, |
| "logits/rejected": -1.7693777084350586, |
| "logps/chosen": -55.27473831176758, |
| "logps/rejected": -202.45265197753906, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.046746253967285, |
| "rewards/margins": 22.324237823486328, |
| "rewards/rejected": -12.277493476867676, |
| "step": 685 |
| }, |
| { |
| "epoch": 1.8460508701472556, |
| "grad_norm": 7.098522928572493e-06, |
| "learning_rate": 6.138013861830348e-06, |
| "logits/chosen": -1.8942615985870361, |
| "logits/rejected": -1.731011986732483, |
| "logps/chosen": -53.98344802856445, |
| "logps/rejected": -205.3796844482422, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.68769645690918, |
| "rewards/margins": 22.195293426513672, |
| "rewards/rejected": -12.507596969604492, |
| "step": 690 |
| }, |
| { |
| "epoch": 1.859437751004016, |
| "grad_norm": 3.2382187782786787e-05, |
| "learning_rate": 6.075619750424422e-06, |
| "logits/chosen": -1.873392105102539, |
| "logits/rejected": -1.6868184804916382, |
| "logps/chosen": -59.67496871948242, |
| "logps/rejected": -209.4269256591797, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.969255447387695, |
| "rewards/margins": 22.771167755126953, |
| "rewards/rejected": -12.801912307739258, |
| "step": 695 |
| }, |
| { |
| "epoch": 1.8728246318607764, |
| "grad_norm": 6.702355221932521e-06, |
| "learning_rate": 6.013543726095646e-06, |
| "logits/chosen": -1.8677663803100586, |
| "logits/rejected": -1.71700119972229, |
| "logps/chosen": -70.77827453613281, |
| "logps/rejected": -204.8978729248047, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.274455070495605, |
| "rewards/margins": 22.849491119384766, |
| "rewards/rejected": -12.575034141540527, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.8862115127175367, |
| "grad_norm": 5.6430312724842224e-06, |
| "learning_rate": 5.9517988234373095e-06, |
| "logits/chosen": -1.8611056804656982, |
| "logits/rejected": -1.7488939762115479, |
| "logps/chosen": -70.99211883544922, |
| "logps/rejected": -209.8909912109375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.573973655700684, |
| "rewards/margins": 23.441730499267578, |
| "rewards/rejected": -12.867757797241211, |
| "step": 705 |
| }, |
| { |
| "epoch": 1.8995983935742973, |
| "grad_norm": 9.81956509349402e-06, |
| "learning_rate": 5.890398007514474e-06, |
| "logits/chosen": -1.8544105291366577, |
| "logits/rejected": -1.7340114116668701, |
| "logps/chosen": -56.71294403076172, |
| "logps/rejected": -206.75668334960938, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.037813186645508, |
| "rewards/margins": 22.693235397338867, |
| "rewards/rejected": -12.655420303344727, |
| "step": 710 |
| }, |
| { |
| "epoch": 1.9129852744310576, |
| "grad_norm": 8.173860805982258e-06, |
| "learning_rate": 5.8293541711415895e-06, |
| "logits/chosen": -1.8714882135391235, |
| "logits/rejected": -1.7200887203216553, |
| "logps/chosen": -72.78733825683594, |
| "logps/rejected": -192.37254333496094, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.271878242492676, |
| "rewards/margins": 21.913005828857422, |
| "rewards/rejected": -11.641127586364746, |
| "step": 715 |
| }, |
| { |
| "epoch": 1.926372155287818, |
| "grad_norm": 0.00012220365169923753, |
| "learning_rate": 5.768680132175289e-06, |
| "logits/chosen": -1.8853578567504883, |
| "logits/rejected": -1.6917082071304321, |
| "logps/chosen": -60.888282775878906, |
| "logps/rejected": -199.157470703125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.8045072555542, |
| "rewards/margins": 22.041522979736328, |
| "rewards/rejected": -12.237015724182129, |
| "step": 720 |
| }, |
| { |
| "epoch": 1.9397590361445785, |
| "grad_norm": 0.0001451301359338686, |
| "learning_rate": 5.708388630822922e-06, |
| "logits/chosen": -1.8622829914093018, |
| "logits/rejected": -1.7366501092910767, |
| "logps/chosen": -68.04113006591797, |
| "logps/rejected": -201.74124145507812, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.160343170166016, |
| "rewards/margins": 22.52884864807129, |
| "rewards/rejected": -12.36850357055664, |
| "step": 725 |
| }, |
| { |
| "epoch": 1.9531459170013385, |
| "grad_norm": 6.776998816349078e-06, |
| "learning_rate": 5.648492326967392e-06, |
| "logits/chosen": -1.855958342552185, |
| "logits/rejected": -1.7051986455917358, |
| "logps/chosen": -65.05091857910156, |
| "logps/rejected": -209.4147491455078, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.261792182922363, |
| "rewards/margins": 23.097671508789062, |
| "rewards/rejected": -12.835878372192383, |
| "step": 730 |
| }, |
| { |
| "epoch": 1.966532797858099, |
| "grad_norm": 8.551254722988233e-05, |
| "learning_rate": 5.589003797508865e-06, |
| "logits/chosen": -1.8737328052520752, |
| "logits/rejected": -1.7128698825836182, |
| "logps/chosen": -61.9605598449707, |
| "logps/rejected": -210.7420196533203, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.895185470581055, |
| "rewards/margins": 23.087865829467773, |
| "rewards/rejected": -13.192680358886719, |
| "step": 735 |
| }, |
| { |
| "epoch": 1.9799196787148594, |
| "grad_norm": 1.1372939297871199e-05, |
| "learning_rate": 5.52993553372389e-06, |
| "logits/chosen": -1.8983303308486938, |
| "logits/rejected": -1.7375752925872803, |
| "logps/chosen": -64.04942321777344, |
| "logps/rejected": -205.53140258789062, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.743837356567383, |
| "rewards/margins": 22.313587188720703, |
| "rewards/rejected": -12.569747924804688, |
| "step": 740 |
| }, |
| { |
| "epoch": 1.9933065595716197, |
| "grad_norm": 2.0934508938807994e-05, |
| "learning_rate": 5.471299938642517e-06, |
| "logits/chosen": -1.8644678592681885, |
| "logits/rejected": -1.6225488185882568, |
| "logps/chosen": -75.8808822631836, |
| "logps/rejected": -205.18276977539062, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.244497299194336, |
| "rewards/margins": 22.91061019897461, |
| "rewards/rejected": -12.66611385345459, |
| "step": 745 |
| }, |
| { |
| "epoch": 1.9959839357429718, |
| "eval_logits/chosen": -1.9272390604019165, |
| "eval_logits/rejected": -1.7022486925125122, |
| "eval_logps/chosen": -63.15131378173828, |
| "eval_logps/rejected": -202.59934997558594, |
| "eval_loss": 6.345212000269385e-08, |
| "eval_rewards/accuracies": 1.0, |
| "eval_rewards/chosen": 10.041454315185547, |
| "eval_rewards/margins": 22.370386123657227, |
| "eval_rewards/rejected": -12.328930854797363, |
| "eval_runtime": 33.4211, |
| "eval_samples_per_second": 5.984, |
| "eval_steps_per_second": 5.984, |
| "step": 746 |
| }, |
| { |
| "epoch": 2.005354752342704, |
| "grad_norm": 1.0485188795428257e-05, |
| "learning_rate": 5.413109324443927e-06, |
| "logits/chosen": -1.85820472240448, |
| "logits/rejected": -1.7645277976989746, |
| "logps/chosen": -62.93926239013672, |
| "logps/rejected": -206.52940368652344, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.950172424316406, |
| "rewards/margins": 22.60118293762207, |
| "rewards/rejected": -12.651012420654297, |
| "step": 750 |
| }, |
| { |
| "epoch": 2.0187416331994643, |
| "grad_norm": 8.381151928915642e-06, |
| "learning_rate": 5.355375909871147e-06, |
| "logits/chosen": -1.865766167640686, |
| "logits/rejected": -1.7003635168075562, |
| "logps/chosen": -65.76628112792969, |
| "logps/rejected": -203.88465881347656, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.34652328491211, |
| "rewards/margins": 22.93129539489746, |
| "rewards/rejected": -12.5847749710083, |
| "step": 755 |
| }, |
| { |
| "epoch": 2.032128514056225, |
| "grad_norm": 5.861081262992229e-06, |
| "learning_rate": 5.298111817665392e-06, |
| "logits/chosen": -1.8790661096572876, |
| "logits/rejected": -1.6590349674224854, |
| "logps/chosen": -61.801902770996094, |
| "logps/rejected": -207.1491241455078, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.80274486541748, |
| "rewards/margins": 22.702667236328125, |
| "rewards/rejected": -12.899922370910645, |
| "step": 760 |
| }, |
| { |
| "epoch": 2.0455153949129854, |
| "grad_norm": 1.3709258382732514e-05, |
| "learning_rate": 5.2413290720205445e-06, |
| "logits/chosen": -1.8769207000732422, |
| "logits/rejected": -1.7207590341567993, |
| "logps/chosen": -65.25004577636719, |
| "logps/rejected": -205.05783081054688, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.010208129882812, |
| "rewards/margins": 22.46693992614746, |
| "rewards/rejected": -12.456731796264648, |
| "step": 765 |
| }, |
| { |
| "epoch": 2.0589022757697455, |
| "grad_norm": 6.286778443609364e-06, |
| "learning_rate": 5.185039596058357e-06, |
| "logits/chosen": -1.8302192687988281, |
| "logits/rejected": -1.6823524236679077, |
| "logps/chosen": -57.266639709472656, |
| "logps/rejected": -213.8351287841797, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.011209487915039, |
| "rewards/margins": 23.254796981811523, |
| "rewards/rejected": -13.243589401245117, |
| "step": 770 |
| }, |
| { |
| "epoch": 2.072289156626506, |
| "grad_norm": 1.2825248631997965e-05, |
| "learning_rate": 5.129255209324836e-06, |
| "logits/chosen": -1.8800920248031616, |
| "logits/rejected": -1.6835558414459229, |
| "logps/chosen": -66.84913635253906, |
| "logps/rejected": -201.17745971679688, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.850937843322754, |
| "rewards/margins": 22.242956161499023, |
| "rewards/rejected": -12.39201831817627, |
| "step": 775 |
| }, |
| { |
| "epoch": 2.0856760374832666, |
| "grad_norm": 2.984725870192051e-05, |
| "learning_rate": 5.073987625308423e-06, |
| "logits/chosen": -1.8780286312103271, |
| "logits/rejected": -1.7410333156585693, |
| "logps/chosen": -67.51756286621094, |
| "logps/rejected": -206.67996215820312, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.865274429321289, |
| "rewards/margins": 22.475534439086914, |
| "rewards/rejected": -12.610260963439941, |
| "step": 780 |
| }, |
| { |
| "epoch": 2.0990629183400267, |
| "grad_norm": 0.00011517904931679368, |
| "learning_rate": 5.019248448980402e-06, |
| "logits/chosen": -1.8741604089736938, |
| "logits/rejected": -1.7101694345474243, |
| "logps/chosen": -60.773651123046875, |
| "logps/rejected": -216.3609619140625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.092966079711914, |
| "rewards/margins": 23.445796966552734, |
| "rewards/rejected": -13.35283088684082, |
| "step": 785 |
| }, |
| { |
| "epoch": 2.112449799196787, |
| "grad_norm": 0.00015422521391883492, |
| "learning_rate": 4.965049174358126e-06, |
| "logits/chosen": -1.8605587482452393, |
| "logits/rejected": -1.728424072265625, |
| "logps/chosen": -57.07219314575195, |
| "logps/rejected": -209.2133026123047, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.606898307800293, |
| "rewards/margins": 23.471294403076172, |
| "rewards/rejected": -12.864397048950195, |
| "step": 790 |
| }, |
| { |
| "epoch": 2.1258366800535473, |
| "grad_norm": 8.874901141098235e-06, |
| "learning_rate": 4.911401182091517e-06, |
| "logits/chosen": -1.871817946434021, |
| "logits/rejected": -1.662431001663208, |
| "logps/chosen": -58.060302734375, |
| "logps/rejected": -210.3882293701172, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.023431777954102, |
| "rewards/margins": 22.93259620666504, |
| "rewards/rejected": -12.90916633605957, |
| "step": 795 |
| }, |
| { |
| "epoch": 2.139223560910308, |
| "grad_norm": 6.589568329218309e-06, |
| "learning_rate": 4.858315737073384e-06, |
| "logits/chosen": -1.8591467142105103, |
| "logits/rejected": -1.7722461223602295, |
| "logps/chosen": -57.941993713378906, |
| "logps/rejected": -201.37484741210938, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.01035213470459, |
| "rewards/margins": 22.34813117980957, |
| "rewards/rejected": -12.337777137756348, |
| "step": 800 |
| }, |
| { |
| "epoch": 2.1526104417670684, |
| "grad_norm": 5.866213086846983e-06, |
| "learning_rate": 4.8058039860740515e-06, |
| "logits/chosen": -1.8637701272964478, |
| "logits/rejected": -1.7024104595184326, |
| "logps/chosen": -62.71826171875, |
| "logps/rejected": -204.09197998046875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.38242244720459, |
| "rewards/margins": 22.9773006439209, |
| "rewards/rejected": -12.594879150390625, |
| "step": 805 |
| }, |
| { |
| "epoch": 2.1659973226238285, |
| "grad_norm": 5.492693617270561e-06, |
| "learning_rate": 4.753876955400771e-06, |
| "logits/chosen": -1.8641217947006226, |
| "logits/rejected": -1.7333282232284546, |
| "logps/chosen": -61.98204803466797, |
| "logps/rejected": -210.81973266601562, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.179308891296387, |
| "rewards/margins": 23.154348373413086, |
| "rewards/rejected": -12.9750394821167, |
| "step": 810 |
| }, |
| { |
| "epoch": 2.179384203480589, |
| "grad_norm": 3.0393581255339086e-05, |
| "learning_rate": 4.702545548582452e-06, |
| "logits/chosen": -1.871565818786621, |
| "logits/rejected": -1.7063429355621338, |
| "logps/chosen": -63.68731689453125, |
| "logps/rejected": -205.74569702148438, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.423490524291992, |
| "rewards/margins": 23.04408073425293, |
| "rewards/rejected": -12.620591163635254, |
| "step": 815 |
| }, |
| { |
| "epoch": 2.1927710843373496, |
| "grad_norm": 0.00012603566574398428, |
| "learning_rate": 4.651820544080155e-06, |
| "logits/chosen": -1.8732541799545288, |
| "logits/rejected": -1.743831992149353, |
| "logps/chosen": -59.885963439941406, |
| "logps/rejected": -203.7340850830078, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.865376472473145, |
| "rewards/margins": 22.34671974182129, |
| "rewards/rejected": -12.481344223022461, |
| "step": 820 |
| }, |
| { |
| "epoch": 2.2061579651941097, |
| "grad_norm": 0.00017827175906859338, |
| "learning_rate": 4.601712593023857e-06, |
| "logits/chosen": -1.8484163284301758, |
| "logits/rejected": -1.6758276224136353, |
| "logps/chosen": -66.60858154296875, |
| "logps/rejected": -206.3388671875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.185609817504883, |
| "rewards/margins": 22.969905853271484, |
| "rewards/rejected": -12.784296035766602, |
| "step": 825 |
| }, |
| { |
| "epoch": 2.21954484605087, |
| "grad_norm": 7.549571819254197e-06, |
| "learning_rate": 4.552232216975945e-06, |
| "logits/chosen": -1.8551725149154663, |
| "logits/rejected": -1.7120177745819092, |
| "logps/chosen": -57.83213424682617, |
| "logps/rejected": -209.56076049804688, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.335837364196777, |
| "rewards/margins": 23.33651351928711, |
| "rewards/rejected": -13.000676155090332, |
| "step": 830 |
| }, |
| { |
| "epoch": 2.2329317269076308, |
| "grad_norm": 3.6077890399610624e-05, |
| "learning_rate": 4.503389805721925e-06, |
| "logits/chosen": -1.8688617944717407, |
| "logits/rejected": -1.7254632711410522, |
| "logps/chosen": -68.70154571533203, |
| "logps/rejected": -204.951904296875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.894525527954102, |
| "rewards/margins": 22.42927360534668, |
| "rewards/rejected": -12.534747123718262, |
| "step": 835 |
| }, |
| { |
| "epoch": 2.246318607764391, |
| "grad_norm": 0.00010842949996003881, |
| "learning_rate": 4.455195615088791e-06, |
| "logits/chosen": -1.8668763637542725, |
| "logits/rejected": -1.7311407327651978, |
| "logps/chosen": -56.98907470703125, |
| "logps/rejected": -207.2830047607422, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.711297988891602, |
| "rewards/margins": 22.293800354003906, |
| "rewards/rejected": -12.582502365112305, |
| "step": 840 |
| }, |
| { |
| "epoch": 2.2597054886211514, |
| "grad_norm": 1.2728739420708735e-05, |
| "learning_rate": 4.407659764791537e-06, |
| "logits/chosen": -1.8525810241699219, |
| "logits/rejected": -1.724234938621521, |
| "logps/chosen": -64.33460998535156, |
| "logps/rejected": -203.0344696044922, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.49009895324707, |
| "rewards/margins": 22.857555389404297, |
| "rewards/rejected": -12.36745548248291, |
| "step": 845 |
| }, |
| { |
| "epoch": 2.2730923694779115, |
| "grad_norm": 6.543518975377083e-05, |
| "learning_rate": 4.3607922363082345e-06, |
| "logits/chosen": -1.864620566368103, |
| "logits/rejected": -1.7289314270019531, |
| "logps/chosen": -57.764991760253906, |
| "logps/rejected": -202.0431365966797, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.821728706359863, |
| "rewards/margins": 22.27083396911621, |
| "rewards/rejected": -12.449103355407715, |
| "step": 850 |
| }, |
| { |
| "epoch": 2.286479250334672, |
| "grad_norm": 1.0032449608843308e-05, |
| "learning_rate": 4.314602870784138e-06, |
| "logits/chosen": -1.8435178995132446, |
| "logits/rejected": -1.6707134246826172, |
| "logps/chosen": -63.69310760498047, |
| "logps/rejected": -214.45443725585938, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.206135749816895, |
| "rewards/margins": 23.512680053710938, |
| "rewards/rejected": -13.306546211242676, |
| "step": 855 |
| }, |
| { |
| "epoch": 2.2998661311914326, |
| "grad_norm": 2.2827032353234245e-06, |
| "learning_rate": 4.2691013669652716e-06, |
| "logits/chosen": -1.8863375186920166, |
| "logits/rejected": -1.7014707326889038, |
| "logps/chosen": -65.29761505126953, |
| "logps/rejected": -213.1626739501953, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.766716957092285, |
| "rewards/margins": 22.87672996520996, |
| "rewards/rejected": -13.110013008117676, |
| "step": 860 |
| }, |
| { |
| "epoch": 2.3132530120481927, |
| "grad_norm": 8.756860552239232e-06, |
| "learning_rate": 4.224297279161901e-06, |
| "logits/chosen": -1.8587490320205688, |
| "logits/rejected": -1.69536554813385, |
| "logps/chosen": -66.74024963378906, |
| "logps/rejected": -202.9591827392578, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.559983253479004, |
| "rewards/margins": 23.037792205810547, |
| "rewards/rejected": -12.47780704498291, |
| "step": 865 |
| }, |
| { |
| "epoch": 2.326639892904953, |
| "grad_norm": 1.0455483788973652e-05, |
| "learning_rate": 4.180200015242344e-06, |
| "logits/chosen": -1.884346604347229, |
| "logits/rejected": -1.686924695968628, |
| "logps/chosen": -55.4239501953125, |
| "logps/rejected": -197.32254028320312, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.581134796142578, |
| "rewards/margins": 21.723163604736328, |
| "rewards/rejected": -12.142029762268066, |
| "step": 870 |
| }, |
| { |
| "epoch": 2.3400267737617133, |
| "grad_norm": 1.2215328752063215e-05, |
| "learning_rate": 4.1368188346575155e-06, |
| "logits/chosen": -1.8889604806900024, |
| "logits/rejected": -1.6923484802246094, |
| "logps/chosen": -62.9194450378418, |
| "logps/rejected": -200.98641967773438, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.61778450012207, |
| "rewards/margins": 22.01634979248047, |
| "rewards/rejected": -12.398564338684082, |
| "step": 875 |
| }, |
| { |
| "epoch": 2.353413654618474, |
| "grad_norm": 1.9663550119730644e-05, |
| "learning_rate": 4.0941628464966635e-06, |
| "logits/chosen": -1.8721472024917603, |
| "logits/rejected": -1.7548141479492188, |
| "logps/chosen": -60.20267868041992, |
| "logps/rejected": -194.3836669921875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.855585098266602, |
| "rewards/margins": 21.740840911865234, |
| "rewards/rejected": -11.88525676727295, |
| "step": 880 |
| }, |
| { |
| "epoch": 2.3668005354752344, |
| "grad_norm": 5.436282663140446e-06, |
| "learning_rate": 4.052241007574645e-06, |
| "logits/chosen": -1.8602116107940674, |
| "logits/rejected": -1.6991478204727173, |
| "logps/chosen": -65.68549346923828, |
| "logps/rejected": -204.56455993652344, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.281954765319824, |
| "rewards/margins": 22.885639190673828, |
| "rewards/rejected": -12.603684425354004, |
| "step": 885 |
| }, |
| { |
| "epoch": 2.3801874163319945, |
| "grad_norm": 5.809453796246089e-05, |
| "learning_rate": 4.011062120551208e-06, |
| "logits/chosen": -1.8664907217025757, |
| "logits/rejected": -1.6747157573699951, |
| "logps/chosen": -58.36309814453125, |
| "logps/rejected": -204.2161102294922, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.09082317352295, |
| "rewards/margins": 22.670461654663086, |
| "rewards/rejected": -12.57963752746582, |
| "step": 890 |
| }, |
| { |
| "epoch": 2.393574297188755, |
| "grad_norm": 1.9118770069326274e-05, |
| "learning_rate": 3.9706348320826135e-06, |
| "logits/chosen": -1.8767648935317993, |
| "logits/rejected": -1.6911203861236572, |
| "logps/chosen": -64.69419860839844, |
| "logps/rejected": -204.36233520507812, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.22978401184082, |
| "rewards/margins": 22.811058044433594, |
| "rewards/rejected": -12.581275939941406, |
| "step": 895 |
| }, |
| { |
| "epoch": 2.4069611780455156, |
| "grad_norm": 1.5444951714016497e-05, |
| "learning_rate": 3.930967631006043e-06, |
| "logits/chosen": -1.85677170753479, |
| "logits/rejected": -1.6846284866333008, |
| "logps/chosen": -56.4135627746582, |
| "logps/rejected": -208.22659301757812, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.155652046203613, |
| "rewards/margins": 23.030597686767578, |
| "rewards/rejected": -12.874944686889648, |
| "step": 900 |
| }, |
| { |
| "epoch": 2.4203480589022757, |
| "grad_norm": 1.261055422219215e-05, |
| "learning_rate": 3.892068846557114e-06, |
| "logits/chosen": -1.8779144287109375, |
| "logits/rejected": -1.7127296924591064, |
| "logps/chosen": -59.67717742919922, |
| "logps/rejected": -204.9344482421875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.156450271606445, |
| "rewards/margins": 22.72434425354004, |
| "rewards/rejected": -12.567895889282227, |
| "step": 905 |
| }, |
| { |
| "epoch": 2.433734939759036, |
| "grad_norm": 2.424490412522573e-05, |
| "learning_rate": 3.8539466466209426e-06, |
| "logits/chosen": -1.8967710733413696, |
| "logits/rejected": -1.7396190166473389, |
| "logps/chosen": -59.95705032348633, |
| "logps/rejected": -201.16258239746094, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.140314102172852, |
| "rewards/margins": 22.46833038330078, |
| "rewards/rejected": -12.32801342010498, |
| "step": 910 |
| }, |
| { |
| "epoch": 2.4471218206157968, |
| "grad_norm": 1.6727104821256944e-06, |
| "learning_rate": 3.816609036017052e-06, |
| "logits/chosen": -1.861707329750061, |
| "logits/rejected": -1.6875286102294922, |
| "logps/chosen": -67.87788391113281, |
| "logps/rejected": -216.080078125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.8674955368042, |
| "rewards/margins": 23.15651512145996, |
| "rewards/rejected": -13.289019584655762, |
| "step": 915 |
| }, |
| { |
| "epoch": 2.460508701472557, |
| "grad_norm": 0.0006341092521324754, |
| "learning_rate": 3.780063854818545e-06, |
| "logits/chosen": -1.8393361568450928, |
| "logits/rejected": -1.7207624912261963, |
| "logps/chosen": -67.45433044433594, |
| "logps/rejected": -207.20132446289062, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.836288452148438, |
| "rewards/margins": 23.471036911010742, |
| "rewards/rejected": -12.634748458862305, |
| "step": 920 |
| }, |
| { |
| "epoch": 2.4738955823293174, |
| "grad_norm": 2.6886251362157054e-05, |
| "learning_rate": 3.744318776705866e-06, |
| "logits/chosen": -1.910638451576233, |
| "logits/rejected": -1.7034502029418945, |
| "logps/chosen": -58.13981246948242, |
| "logps/rejected": -209.4466552734375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.597261428833008, |
| "rewards/margins": 22.449031829833984, |
| "rewards/rejected": -12.851768493652344, |
| "step": 925 |
| }, |
| { |
| "epoch": 2.4872824631860775, |
| "grad_norm": 1.630528458917979e-05, |
| "learning_rate": 3.709381307355487e-06, |
| "logits/chosen": -1.8732010126113892, |
| "logits/rejected": -1.6983455419540405, |
| "logps/chosen": -70.4014663696289, |
| "logps/rejected": -209.67861938476562, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.463674545288086, |
| "rewards/margins": 23.396472930908203, |
| "rewards/rejected": -12.9327974319458, |
| "step": 930 |
| }, |
| { |
| "epoch": 2.500669344042838, |
| "grad_norm": 8.642481589049567e-06, |
| "learning_rate": 3.675258782863893e-06, |
| "logits/chosen": -1.8884601593017578, |
| "logits/rejected": -1.6976335048675537, |
| "logps/chosen": -62.5555419921875, |
| "logps/rejected": -209.5203094482422, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.015859603881836, |
| "rewards/margins": 22.983142852783203, |
| "rewards/rejected": -12.96728515625, |
| "step": 935 |
| }, |
| { |
| "epoch": 2.5140562248995986, |
| "grad_norm": 3.012664819834754e-05, |
| "learning_rate": 3.641958368207152e-06, |
| "logits/chosen": -1.8587169647216797, |
| "logits/rejected": -1.7203285694122314, |
| "logps/chosen": -66.09326171875, |
| "logps/rejected": -216.6996307373047, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.267293930053711, |
| "rewards/margins": 23.60677146911621, |
| "rewards/rejected": -13.339475631713867, |
| "step": 940 |
| }, |
| { |
| "epoch": 2.5274431057563587, |
| "grad_norm": 3.527342414599843e-05, |
| "learning_rate": 3.609487055736439e-06, |
| "logits/chosen": -1.8892666101455688, |
| "logits/rejected": -1.6997896432876587, |
| "logps/chosen": -63.81671142578125, |
| "logps/rejected": -214.39913940429688, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.233362197875977, |
| "rewards/margins": 22.56268882751465, |
| "rewards/rejected": -13.329327583312988, |
| "step": 945 |
| }, |
| { |
| "epoch": 2.540829986613119, |
| "grad_norm": 6.991498594288714e-06, |
| "learning_rate": 3.5778516637097892e-06, |
| "logits/chosen": -1.8411668539047241, |
| "logits/rejected": -1.6937415599822998, |
| "logps/chosen": -70.88134765625, |
| "logps/rejected": -214.8602294921875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.95386791229248, |
| "rewards/margins": 24.302793502807617, |
| "rewards/rejected": -13.348925590515137, |
| "step": 950 |
| }, |
| { |
| "epoch": 2.5542168674698793, |
| "grad_norm": 8.049645657592919e-06, |
| "learning_rate": 3.547058834860421e-06, |
| "logits/chosen": -1.8594615459442139, |
| "logits/rejected": -1.6968889236450195, |
| "logps/chosen": -70.17243957519531, |
| "logps/rejected": -201.14242553710938, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.171913146972656, |
| "rewards/margins": 22.592042922973633, |
| "rewards/rejected": -12.420127868652344, |
| "step": 955 |
| }, |
| { |
| "epoch": 2.56760374832664, |
| "grad_norm": 4.853548034589039e-06, |
| "learning_rate": 3.517115035001902e-06, |
| "logits/chosen": -1.848902940750122, |
| "logits/rejected": -1.678993821144104, |
| "logps/chosen": -67.01107788085938, |
| "logps/rejected": -207.88156127929688, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.421578407287598, |
| "rewards/margins": 23.362747192382812, |
| "rewards/rejected": -12.941169738769531, |
| "step": 960 |
| }, |
| { |
| "epoch": 2.5809906291834004, |
| "grad_norm": 1.5446234101545997e-05, |
| "learning_rate": 3.4880265516704755e-06, |
| "logits/chosen": -1.886223554611206, |
| "logits/rejected": -1.6841551065444946, |
| "logps/chosen": -53.77461624145508, |
| "logps/rejected": -209.840576171875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.018083572387695, |
| "rewards/margins": 22.03033447265625, |
| "rewards/rejected": -13.012250900268555, |
| "step": 965 |
| }, |
| { |
| "epoch": 2.5943775100401605, |
| "grad_norm": 4.117569574191293e-07, |
| "learning_rate": 3.4597994928048157e-06, |
| "logits/chosen": -1.8674980401992798, |
| "logits/rejected": -1.7873084545135498, |
| "logps/chosen": -64.16064453125, |
| "logps/rejected": -204.54177856445312, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.784576416015625, |
| "rewards/margins": 23.250102996826172, |
| "rewards/rejected": -12.465524673461914, |
| "step": 970 |
| }, |
| { |
| "epoch": 2.607764390896921, |
| "grad_norm": 0.00015623288345523179, |
| "learning_rate": 3.432439785463496e-06, |
| "logits/chosen": -1.8516432046890259, |
| "logits/rejected": -1.6646175384521484, |
| "logps/chosen": -60.28071975708008, |
| "logps/rejected": -211.36422729492188, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.473731994628906, |
| "rewards/margins": 22.622058868408203, |
| "rewards/rejected": -13.14832878112793, |
| "step": 975 |
| }, |
| { |
| "epoch": 2.621151271753681, |
| "grad_norm": 2.727318678807933e-05, |
| "learning_rate": 3.405953174580438e-06, |
| "logits/chosen": -1.868819236755371, |
| "logits/rejected": -1.6735336780548096, |
| "logps/chosen": -68.70064544677734, |
| "logps/rejected": -216.250244140625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.41883373260498, |
| "rewards/margins": 23.808761596679688, |
| "rewards/rejected": -13.389928817749023, |
| "step": 980 |
| }, |
| { |
| "epoch": 2.6345381526104417, |
| "grad_norm": 2.9853383239242248e-05, |
| "learning_rate": 3.380345221758599e-06, |
| "logits/chosen": -1.8542439937591553, |
| "logits/rejected": -1.734086036682129, |
| "logps/chosen": -55.98112869262695, |
| "logps/rejected": -208.2278289794922, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.873754501342773, |
| "rewards/margins": 22.735363006591797, |
| "rewards/rejected": -12.861605644226074, |
| "step": 985 |
| }, |
| { |
| "epoch": 2.647925033467202, |
| "grad_norm": 2.1380317775765434e-05, |
| "learning_rate": 3.3556213041021635e-06, |
| "logits/chosen": -1.9123871326446533, |
| "logits/rejected": -1.659395456314087, |
| "logps/chosen": -59.75432205200195, |
| "logps/rejected": -213.9033203125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.29840087890625, |
| "rewards/margins": 22.681549072265625, |
| "rewards/rejected": -13.383146286010742, |
| "step": 990 |
| }, |
| { |
| "epoch": 2.6613119143239627, |
| "grad_norm": 1.689990131126251e-05, |
| "learning_rate": 3.331786613087466e-06, |
| "logits/chosen": -1.8490474224090576, |
| "logits/rejected": -1.7181682586669922, |
| "logps/chosen": -73.18424224853516, |
| "logps/rejected": -204.5380859375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.719615936279297, |
| "rewards/margins": 23.40756607055664, |
| "rewards/rejected": -12.687950134277344, |
| "step": 995 |
| }, |
| { |
| "epoch": 2.674698795180723, |
| "grad_norm": 1.4408194147108588e-05, |
| "learning_rate": 3.3088461534728977e-06, |
| "logits/chosen": -1.8690017461776733, |
| "logits/rejected": -1.7061388492584229, |
| "logps/chosen": -63.67757034301758, |
| "logps/rejected": -206.1077117919922, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.205856323242188, |
| "rewards/margins": 23.052112579345703, |
| "rewards/rejected": -12.846254348754883, |
| "step": 1000 |
| }, |
| { |
| "epoch": 2.674698795180723, |
| "eval_logits/chosen": -1.924623727798462, |
| "eval_logits/rejected": -1.679319143295288, |
| "eval_logps/chosen": -63.07221984863281, |
| "eval_logps/rejected": -205.45054626464844, |
| "eval_loss": 4.907800743580992e-08, |
| "eval_rewards/accuracies": 1.0, |
| "eval_rewards/chosen": 10.049363136291504, |
| "eval_rewards/margins": 22.663414001464844, |
| "eval_rewards/rejected": -12.614049911499023, |
| "eval_runtime": 29.3063, |
| "eval_samples_per_second": 6.824, |
| "eval_steps_per_second": 6.824, |
| "step": 1000 |
| }, |
| { |
| "epoch": 2.6880856760374834, |
| "grad_norm": 9.276622586185113e-05, |
| "learning_rate": 3.2868047422480172e-06, |
| "logits/chosen": -1.8449962139129639, |
| "logits/rejected": -1.6728605031967163, |
| "logps/chosen": -72.02220916748047, |
| "logps/rejected": -206.3667449951172, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 11.057626724243164, |
| "rewards/margins": 23.6590576171875, |
| "rewards/rejected": -12.601430892944336, |
| "step": 1005 |
| }, |
| { |
| "epoch": 2.7014725568942435, |
| "grad_norm": 4.395175346871838e-05, |
| "learning_rate": 3.26566700762209e-06, |
| "logits/chosen": -1.878538727760315, |
| "logits/rejected": -1.684623122215271, |
| "logps/chosen": -60.65617752075195, |
| "logps/rejected": -218.4495391845703, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.014341354370117, |
| "rewards/margins": 23.78032684326172, |
| "rewards/rejected": -13.765981674194336, |
| "step": 1010 |
| }, |
| { |
| "epoch": 2.714859437751004, |
| "grad_norm": 4.3231425479461905e-06, |
| "learning_rate": 3.2454373880522666e-06, |
| "logits/chosen": -1.8722236156463623, |
| "logits/rejected": -1.7218602895736694, |
| "logps/chosen": -65.2080307006836, |
| "logps/rejected": -204.86962890625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.908885955810547, |
| "rewards/margins": 22.567001342773438, |
| "rewards/rejected": -12.658113479614258, |
| "step": 1015 |
| }, |
| { |
| "epoch": 2.7282463186077646, |
| "grad_norm": 4.5356948248809204e-05, |
| "learning_rate": 3.2261201313116e-06, |
| "logits/chosen": -1.844891905784607, |
| "logits/rejected": -1.7258058786392212, |
| "logps/chosen": -59.008201599121094, |
| "logps/rejected": -207.6454315185547, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.589787483215332, |
| "rewards/margins": 23.398906707763672, |
| "rewards/rejected": -12.809122085571289, |
| "step": 1020 |
| }, |
| { |
| "epoch": 2.7416331994645247, |
| "grad_norm": 6.669775302725611e-06, |
| "learning_rate": 3.2077192935971174e-06, |
| "logits/chosen": -1.841321587562561, |
| "logits/rejected": -1.6863746643066406, |
| "logps/chosen": -58.30268478393555, |
| "logps/rejected": -206.64907836914062, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.965548515319824, |
| "rewards/margins": 22.85080337524414, |
| "rewards/rejected": -12.885255813598633, |
| "step": 1025 |
| }, |
| { |
| "epoch": 2.755020080321285, |
| "grad_norm": 6.417185431928374e-06, |
| "learning_rate": 3.1902387386780987e-06, |
| "logits/chosen": -1.875192642211914, |
| "logits/rejected": -1.6304317712783813, |
| "logps/chosen": -57.858924865722656, |
| "logps/rejected": -208.5902557373047, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.979833602905273, |
| "rewards/margins": 22.941097259521484, |
| "rewards/rejected": -12.961263656616211, |
| "step": 1030 |
| }, |
| { |
| "epoch": 2.7684069611780453, |
| "grad_norm": 7.029405969660729e-05, |
| "learning_rate": 3.1736821370847745e-06, |
| "logits/chosen": -1.8532873392105103, |
| "logits/rejected": -1.7104461193084717, |
| "logps/chosen": -66.86785888671875, |
| "logps/rejected": -206.0425567626953, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.42613697052002, |
| "rewards/margins": 23.20665740966797, |
| "rewards/rejected": -12.780519485473633, |
| "step": 1035 |
| }, |
| { |
| "epoch": 2.781793842034806, |
| "grad_norm": 3.107989687123336e-05, |
| "learning_rate": 3.158052965337594e-06, |
| "logits/chosen": -1.857616662979126, |
| "logits/rejected": -1.6626615524291992, |
| "logps/chosen": -62.6285514831543, |
| "logps/rejected": -208.3471221923828, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.993135452270508, |
| "rewards/margins": 23.01392364501953, |
| "rewards/rejected": -13.020787239074707, |
| "step": 1040 |
| }, |
| { |
| "epoch": 2.7951807228915664, |
| "grad_norm": 1.721379931041156e-06, |
| "learning_rate": 3.1433545052172393e-06, |
| "logits/chosen": -1.863524079322815, |
| "logits/rejected": -1.6777336597442627, |
| "logps/chosen": -58.976402282714844, |
| "logps/rejected": -217.3468017578125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.048527717590332, |
| "rewards/margins": 23.601850509643555, |
| "rewards/rejected": -13.553324699401855, |
| "step": 1045 |
| }, |
| { |
| "epoch": 2.8085676037483265, |
| "grad_norm": 0.00015129183884710073, |
| "learning_rate": 3.129589843075512e-06, |
| "logits/chosen": -1.8648707866668701, |
| "logits/rejected": -1.698838233947754, |
| "logps/chosen": -56.40584182739258, |
| "logps/rejected": -201.5301055908203, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.857680320739746, |
| "rewards/margins": 22.250385284423828, |
| "rewards/rejected": -12.392705917358398, |
| "step": 1050 |
| }, |
| { |
| "epoch": 2.821954484605087, |
| "grad_norm": 1.1143507435917854e-05, |
| "learning_rate": 3.116761869187279e-06, |
| "logits/chosen": -1.8589363098144531, |
| "logits/rejected": -1.7270303964614868, |
| "logps/chosen": -66.82688903808594, |
| "logps/rejected": -206.1546173095703, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.576805114746094, |
| "rewards/margins": 23.24510955810547, |
| "rewards/rejected": -12.668303489685059, |
| "step": 1055 |
| }, |
| { |
| "epoch": 2.835341365461847, |
| "grad_norm": 1.0546603334660176e-05, |
| "learning_rate": 3.1048732771435713e-06, |
| "logits/chosen": -1.8395435810089111, |
| "logits/rejected": -1.6673234701156616, |
| "logps/chosen": -82.46403503417969, |
| "logps/rejected": -201.4765625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 11.403456687927246, |
| "rewards/margins": 23.960718154907227, |
| "rewards/rejected": -12.557262420654297, |
| "step": 1060 |
| }, |
| { |
| "epoch": 2.8487282463186077, |
| "grad_norm": 1.641225753701292e-05, |
| "learning_rate": 3.093926563285992e-06, |
| "logits/chosen": -1.8687680959701538, |
| "logits/rejected": -1.6740798950195312, |
| "logps/chosen": -59.4666748046875, |
| "logps/rejected": -203.675537109375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.573221206665039, |
| "rewards/margins": 22.31708335876465, |
| "rewards/rejected": -12.743863105773926, |
| "step": 1065 |
| }, |
| { |
| "epoch": 2.862115127175368, |
| "grad_norm": 6.346489681163803e-05, |
| "learning_rate": 3.0839240261825406e-06, |
| "logits/chosen": -1.862137794494629, |
| "logits/rejected": -1.7026790380477905, |
| "logps/chosen": -74.59800720214844, |
| "logps/rejected": -208.82510375976562, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.843328475952148, |
| "rewards/margins": 23.61244010925293, |
| "rewards/rejected": -12.769109725952148, |
| "step": 1070 |
| }, |
| { |
| "epoch": 2.8755020080321287, |
| "grad_norm": 7.015174560365267e-06, |
| "learning_rate": 3.0748677661449626e-06, |
| "logits/chosen": -1.8922617435455322, |
| "logits/rejected": -1.663762092590332, |
| "logps/chosen": -53.6298942565918, |
| "logps/rejected": -209.82418823242188, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.294633865356445, |
| "rewards/margins": 22.25174331665039, |
| "rewards/rejected": -12.95710563659668, |
| "step": 1075 |
| }, |
| { |
| "epoch": 2.888888888888889, |
| "grad_norm": 9.642781151342206e-06, |
| "learning_rate": 3.0667596847877337e-06, |
| "logits/chosen": -1.8729660511016846, |
| "logits/rejected": -1.631821870803833, |
| "logps/chosen": -64.28204345703125, |
| "logps/rejected": -207.61941528320312, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.652636528015137, |
| "rewards/margins": 22.619272232055664, |
| "rewards/rejected": -12.966634750366211, |
| "step": 1080 |
| }, |
| { |
| "epoch": 2.9022757697456494, |
| "grad_norm": 4.731034096039366e-06, |
| "learning_rate": 3.059601484628756e-06, |
| "logits/chosen": -1.864485740661621, |
| "logits/rejected": -1.7230615615844727, |
| "logps/chosen": -57.4099235534668, |
| "logps/rejected": -213.2287139892578, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.917817115783691, |
| "rewards/margins": 22.95882225036621, |
| "rewards/rejected": -13.041006088256836, |
| "step": 1085 |
| }, |
| { |
| "epoch": 2.9156626506024095, |
| "grad_norm": 2.3282762413145974e-05, |
| "learning_rate": 3.053394668731877e-06, |
| "logits/chosen": -1.8527206182479858, |
| "logits/rejected": -1.655686616897583, |
| "logps/chosen": -64.55480194091797, |
| "logps/rejected": -214.9347381591797, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.030226707458496, |
| "rewards/margins": 23.382362365722656, |
| "rewards/rejected": -13.352136611938477, |
| "step": 1090 |
| }, |
| { |
| "epoch": 2.92904953145917, |
| "grad_norm": 0.00011880494275828823, |
| "learning_rate": 3.0481405403912697e-06, |
| "logits/chosen": -1.8731340169906616, |
| "logits/rejected": -1.7239511013031006, |
| "logps/chosen": -63.4110107421875, |
| "logps/rejected": -210.80111694335938, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.36390495300293, |
| "rewards/margins": 23.33377456665039, |
| "rewards/rejected": -12.969869613647461, |
| "step": 1095 |
| }, |
| { |
| "epoch": 2.9424364123159306, |
| "grad_norm": 2.658485209394712e-05, |
| "learning_rate": 3.043840202857774e-06, |
| "logits/chosen": -1.851257562637329, |
| "logits/rejected": -1.6927006244659424, |
| "logps/chosen": -73.62875366210938, |
| "logps/rejected": -207.9374237060547, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.74313735961914, |
| "rewards/margins": 23.574596405029297, |
| "rewards/rejected": -12.831457138061523, |
| "step": 1100 |
| }, |
| { |
| "epoch": 2.9558232931726907, |
| "grad_norm": 2.298586650795187e-06, |
| "learning_rate": 3.0404945591072405e-06, |
| "logits/chosen": -1.852618932723999, |
| "logits/rejected": -1.7044318914413452, |
| "logps/chosen": -67.87403106689453, |
| "logps/rejected": -205.13784790039062, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.148408889770508, |
| "rewards/margins": 22.653907775878906, |
| "rewards/rejected": -12.505498886108398, |
| "step": 1105 |
| }, |
| { |
| "epoch": 2.969210174029451, |
| "grad_norm": 1.3922598327553715e-06, |
| "learning_rate": 3.0381043116509197e-06, |
| "logits/chosen": -1.8768529891967773, |
| "logits/rejected": -1.7174484729766846, |
| "logps/chosen": -71.07394409179688, |
| "logps/rejected": -204.34207153320312, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.052443504333496, |
| "rewards/margins": 22.617359161376953, |
| "rewards/rejected": -12.564915657043457, |
| "step": 1110 |
| }, |
| { |
| "epoch": 2.9825970548862113, |
| "grad_norm": 3.243790729356988e-07, |
| "learning_rate": 3.0366699623879565e-06, |
| "logits/chosen": -1.8698358535766602, |
| "logits/rejected": -1.6645195484161377, |
| "logps/chosen": -53.6240348815918, |
| "logps/rejected": -214.02001953125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.68481159210205, |
| "rewards/margins": 23.085359573364258, |
| "rewards/rejected": -13.400548934936523, |
| "step": 1115 |
| }, |
| { |
| "epoch": 2.9933065595716197, |
| "eval_logits/chosen": -1.9241392612457275, |
| "eval_logits/rejected": -1.6729750633239746, |
| "eval_logps/chosen": -63.028377532958984, |
| "eval_logps/rejected": -206.25421142578125, |
| "eval_loss": 4.4528654541409196e-08, |
| "eval_rewards/accuracies": 1.0, |
| "eval_rewards/chosen": 10.053747177124023, |
| "eval_rewards/margins": 22.7481632232666, |
| "eval_rewards/rejected": -12.694416046142578, |
| "eval_runtime": 29.0568, |
| "eval_samples_per_second": 6.883, |
| "eval_steps_per_second": 6.883, |
| "step": 1119 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 1119, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|